From 8b7cfc56936dba2b08600bb0b7fcdba7949e95c3 Mon Sep 17 00:00:00 2001 From: Andre Mikulec Date: Fri, 11 Oct 2013 21:03:12 -0500 Subject: [PATCH 1/2] single vector to matrix fix ...plus ... extract html title --- R/fundamental.data.r | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/R/fundamental.data.r b/R/fundamental.data.r index 4e7a6b8..f527ec1 100644 --- a/R/fundamental.data.r +++ b/R/fundamental.data.r @@ -70,7 +70,12 @@ if( len(grep('INDICATORS', txt, ignore.case = T)) == 0 ) { cat('No Data Found for', Symbol, '\n') return(all.data) } - + # extract title from this page + HTMLOPENTITLETAGposStart <- regexpr(pattern="", txt,ignore.case=TRUE)[1] + HTMLCLOSETITLETAGposStart <- regexpr(pattern="",txt,ignore.case=TRUE)[1] + HTMLOPENTITLETAGlength <- nchar("") + HTMLTITLEtext <- substr(txt, HTMLOPENTITLETAGposStart + HTMLOPENTITLETAGlength , HTMLCLOSETITLETAGposStart - 1 ) + # extract table from this page data = extract.table.from.webpage(txt, 'INDICATORS', hasHeader = T) colnames(data) = data[1,] @@ -107,10 +112,35 @@ if( len(grep('INDICATORS', txt, ignore.case = T)) == 0 ) { # remove empty columns all.data = all.data[, colSums(nchar(trim(all.data))) > 0] + # if converted to a vector, then make it a matrix again + if (is.vector(all.data)) { + all.data.temp <- matrix(all.data,nrow=length(all.data)) + rownames(all.data.temp) <- names(all.data) + colnames(all.data.temp) <- all.data.temp[1, ] + all.data <- all.data.temp + all.data.temp <- matrix(nrow=0, ncol=0) + } if( ncol(all.data) > n ) { - return(all.data[,(ncol(all.data)-n+1):ncol(all.data)]) + all.data <- all.data[, (ncol(all.data) - n + 1):ncol(all.data)] + # if converted to a vector, then make it a matrix again + if (is.vector(all.data)) { + all.data.temp <- matrix(all.data,nrow=length(all.data)) + rownames(all.data.temp) <- names(all.data) + colnames(all.data.temp) <- all.data.temp[1, ] + all.data <- all.data.temp + all.data.temp <- matrix(nrow=0, ncol=0) + } + # add a row of the HTMLTITLEtext values + all.data <- rbind(all.data, rep( HTMLTITLEtext, ncol(all.data) ) ) + # to the new 'added row' name it "HTMLTITLEtext" + rownames(all.data)[nrow(all.data)] <- "HTMLTITLEtext" + return(all.data) } else { + # add a row of the HTMLTITLEtext values + all.data <- rbind(all.data, rep( HTMLTITLEtext, ncol(all.data) ) ) + # to the new 'added row' name it "HTMLTITLEtext" + rownames(all.data)[nrow(all.data)] <- "HTMLTITLEtext" return(all.data) } } From 0a14fea6b9cebc59ac698c251b978a8daea56bd8 Mon Sep 17 00:00:00 2001 From: Andre Mikulec <Andre_Mikulec@Hotmail.com> Date: Sun, 13 Oct 2013 11:46:26 -0500 Subject: [PATCH 2/2] added feature keepHTMLTITLEtext --- R/fundamental.data.r | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/R/fundamental.data.r b/R/fundamental.data.r index f527ec1..7ce3c00 100644 --- a/R/fundamental.data.r +++ b/R/fundamental.data.r @@ -31,7 +31,8 @@ fund.data <- function Symbol, # ticker n=10, # number of periods mode=c('quarterly','annual'), # periodicity - max.attempts=5 # maximum number of attempts to download before exiting + max.attempts=5, # maximum number of attempts to download before exiting + keepHTMLTITLEtext = FALSE # last row includes HTML TITLE text ) { all.data = c() @@ -70,11 +71,14 @@ if( len(grep('INDICATORS', txt, ignore.case = T)) == 0 ) { cat('No Data Found for', Symbol, '\n') return(all.data) } - # extract title from this page - HTMLOPENTITLETAGposStart <- regexpr(pattern="<title>", txt,ignore.case=TRUE)[1] - HTMLCLOSETITLETAGposStart <- regexpr(pattern="",txt,ignore.case=TRUE)[1] - HTMLOPENTITLETAGlength <- nchar("") - HTMLTITLEtext <- substr(txt, HTMLOPENTITLETAGposStart + HTMLOPENTITLETAGlength , HTMLCLOSETITLETAGposStart - 1 ) + + if ( keepHTMLTITLEtext == TRUE ) { + # extract title from this page + HTMLOPENTITLETAGposStart <- regexpr(pattern="<title>", txt,ignore.case=TRUE)[1] + HTMLCLOSETITLETAGposStart <- regexpr(pattern="",txt,ignore.case=TRUE)[1] + HTMLOPENTITLETAGlength <- nchar("") + HTMLTITLEtext <- substr(txt, HTMLOPENTITLETAGposStart + HTMLOPENTITLETAGlength , HTMLCLOSETITLETAGposStart - 1 ) + } # extract table from this page data = extract.table.from.webpage(txt, 'INDICATORS', hasHeader = T) @@ -131,22 +135,24 @@ if( len(grep('INDICATORS', txt, ignore.case = T)) == 0 ) { all.data <- all.data.temp all.data.temp <- matrix(nrow=0, ncol=0) } - # add a row of the HTMLTITLEtext values - all.data <- rbind(all.data, rep( HTMLTITLEtext, ncol(all.data) ) ) - # to the new 'added row' name it "HTMLTITLEtext" - rownames(all.data)[nrow(all.data)] <- "HTMLTITLEtext" + if ( keepHTMLTITLEtext == TRUE ) { + # add a row of the HTMLTITLEtext values + all.data <- rbind(all.data, rep( HTMLTITLEtext, ncol(all.data) ) ) + # to the new 'added row' name it "HTMLTITLEtext" + rownames(all.data)[nrow(all.data)] <- "HTMLTITLEtext" + } return(all.data) } else { - # add a row of the HTMLTITLEtext values - all.data <- rbind(all.data, rep( HTMLTITLEtext, ncol(all.data) ) ) - # to the new 'added row' name it "HTMLTITLEtext" - rownames(all.data)[nrow(all.data)] <- "HTMLTITLEtext" + if ( keepHTMLTITLEtext == TRUE ) { + # add a row of the HTMLTITLEtext values + all.data <- rbind(all.data, rep( HTMLTITLEtext, ncol(all.data) ) ) + # to the new 'added row' name it "HTMLTITLEtext" + rownames(all.data)[nrow(all.data)] <- "HTMLTITLEtext" + } return(all.data) } } - - ############################################################################### # determine date when fundamental data is available # use 'date preliminary data loaded' when available