week1
2014-04-13 10:10
645 查看
目录操作
下载文件
读取文件
读取Excel
读取xml
library(XML)
fileUrl<-"http://www.w3schools.com/xml/simple.xml"
#读取xml使用xmlTreeParse
doc<-xmlTreeParse(fileUrl,useInternal=TRUE)
rootNode<-xmlRoot(doc)
print(xmlName(rootNode))
print(names(rootNode))
print(rootNode[[1]])
print(rootNode[[1]][[1]])
#xmlSApply提前node下所有信息
print(xmlSApply(rootNode[[1]],xmlValue))
##################################################
fileUrl<-"http://espn.go.com/nfl/team/_/name/bal/baltimore-ravens"
#读取html使用htmlTreeParse
doc<-htmlTreeParse(fileUrl,useInternal=TRUE)
#xpath
#/node 顶部node
#//node 任意级别node
#node[@attr-name] 有某属性的node
#node[@attr-name='bob']某属性值为bob的node
scores <- xpathSApply(doc,"//li[@class='score']",xmlValue)
teams <- xpathSApply(doc,"//li[@class='team-name']",xmlValue)
scores
读取JSON
library(jsonlite)
#读取json
jsonData <- fromJSON("https://api.github.com/users/jtleek/repos")
names(jsonData)
names(jsonData$owner)
jsonData$owner$login
#转为json
iris<-data.frame(foo=1:4,bar=c(T,T,F,F))
myjson <- toJSON(iris, pretty=TRUE)
cat(myjson)
getwd(); setwd('../') if(!file.exists(directoryname)){ dir.create(directoryname) }
下载文件
#http不需要method,https在mac下需要method='curl' dowload.file(url=fileurl,destfile='./camara.csv',method='curl')
读取文件
read.table('camara.csv',sep=',',HEAD=TRUE) #read.csv('camara.csv') head(camara)
读取Excel
#请使用命令setwd("")来到达此文件路径,再运行 if(!file.exists("data")){dir.create("data")}#如果不存在data目录,创建data目录 mfile = "./data/cameras.xlsx" if(!file.exists(mfile)){ fileUrl<-"https://data.baltimorecity.gov/api/views/dz54-2aru/rows.xlsx?accessType=DOWNLOAD" #查看help(download.file) setInternet2(use = TRUE) download.file(fileUrl,destfile=mfile,mode="wb") dateDownloaded<-date() } library(xlsx)#excel包 cameraData<-read.xlsx(mfile,sheetIndex=1,header=TRUE)#读取xlsx文件的第一个工作表 head(cameraData) #读取一部分,指定行列范围colIndex rowIndex colIndex <- 2:3 rowIndex <- 1:4 cameraDataSubset <- read.xlsx(mfile,sheetIndex=1, colIndex=colIndex,rowIndex=rowIndex) cameraDataSubset ######################################################### #1.写excel,使用write.xlsx函数 # #2.read.xlsx2读取文件时较read.xlsx快,但在读取一部分时不稳定# #3.大量读写excel,使用XLContent包 # ########################################################
读取xml
library(XML)
fileUrl<-"http://www.w3schools.com/xml/simple.xml"
#读取xml使用xmlTreeParse
doc<-xmlTreeParse(fileUrl,useInternal=TRUE)
rootNode<-xmlRoot(doc)
print(xmlName(rootNode))
print(names(rootNode))
print(rootNode[[1]])
print(rootNode[[1]][[1]])
#xmlSApply提前node下所有信息
print(xmlSApply(rootNode[[1]],xmlValue))
##################################################
fileUrl<-"http://espn.go.com/nfl/team/_/name/bal/baltimore-ravens"
#读取html使用htmlTreeParse
doc<-htmlTreeParse(fileUrl,useInternal=TRUE)
#xpath
#/node 顶部node
#//node 任意级别node
#node[@attr-name] 有某属性的node
#node[@attr-name='bob']某属性值为bob的node
scores <- xpathSApply(doc,"//li[@class='score']",xmlValue)
teams <- xpathSApply(doc,"//li[@class='team-name']",xmlValue)
scores
读取JSON
library(jsonlite)
#读取json
jsonData <- fromJSON("https://api.github.com/users/jtleek/repos")
names(jsonData)
names(jsonData$owner)
jsonData$owner$login
#转为json
iris<-data.frame(foo=1:4,bar=c(T,T,F,F))
myjson <- toJSON(iris, pretty=TRUE)
cat(myjson)
相关文章推荐
- Guru of the Week 条款01: 变量的初始化
- Guru of the Week 条款16:具有最大可复用性的通用Containers
- Book Of The Week #20040728 - 《唐老鸭的幸福生活》[Jun.2004/人民邮电出版社]
- A week went again
- BE11d Week (告警、日志与报告部分的增强) 推荐
- in Mar. 2007 ,my work and study plan at the second week
- Week 27 (07.07.02-07.07.06)
- Guru of the Week 条款23:对象的生存期(第二部分)
- DATE_GET_WEEK 和 WEEK_GET_FIRST_DAY
- the aim for next week
- This Week in HTML 5 – Episode 2
- DayOfWeek转换成"日一二三四五六"
- Week 2 - Wed. & Thu.
- 【WSD】the 1st week:WSD's Motivation
- Week 2: Project Planning
- Week 6, SOC
- usc week 4 DP 2012-4-18日 队内练习赛
- (3rd week)
- Python Module of the Week - Python Module of the Week
- 题目7:Day of Week