#To request and retrieve the content from web server
> require(RCurl)
> mine<-getURL("https://www.clickz.com/search-in-china-how-baidu-is-different-
from-google/36812/",ssl.verifypeer = FALSE)
> class(mine)
[1] "character"
> is.vector(mine)
[1] TRUE
> print(mine)
#to extract the main text of the page
> require(XML)
> mine.tree<-htmlTreeParse(mine,useInternal = TRUE)
> print(mine.tree)
#to extract the content of each paragraph
> mine.tree.parse<-unlist(xpathApply(mine.tree,path = "//p",fun = xmlValue))
> class(mine.tree.parse)
[1] "character"
> print(mine.tree.parse)
#To export to excel
> mine.txt<-NULL
> for(i in 2:(length(mine.tree.parse)-1)){
mine.txt<-paste(mine.txt,as.character(mine.tree.parse[1]),sep = '')
}
> is.vector(mine.txt)
[1] TRUE
> length(mine.txt)
[1] 1
> print(mine.txt)
>write.table(dt, file="mydata.csv",sep=",",row.names=F)require(RCurl)