使用R从CSV文件创建xml文件

时间:2019-12-09 12:47:22

标签: r xml apply lapply

我正在尝试使用CSV文件中的XML包创建xml文件。我的CSV文件如下所示:

>head(patient)
  Source         Target             weight
1 Bacteroides   Lachnospiraceae  3.80735493
2 Bacteroides        Klebsiella -1.61890983
3 Bacteroides Lachnoclostridium  3.80735493
4 Bacteroides     Streptococcus -1.77760758
5 Streptococcus   Clostridium    1.19264508
6 Streptococcus [Eubacterium]    5.58496251

我的期望输出xml应该如下所示:

<?xml version="1.0" encoding="iso-8859-1"?>
<gxl>
    <graph id="graph id= ExtendedCallGraph edgeids=true edgemode=undirected">
        <node id="1">
            <attr name="Bacteroides">
            </attr>
        </node>
        <edge from="Bacteroides" to="Lachnospiraceae" isdirected="False" id="1--2">
        </edge>
        <edge from="Bacteroides" to=" Klebsiella" isdirected="False" id="1--2">
        </edge>
        <edge from="Bacteroides" to="Lachnoclostridium" isdirected="False" id="1--3">
        </edge>
        <edge from="Bacteroides" to=" Streptococcus" isdirected="False" id="1--4">
        </edge>
        <node id="2">
            <attr name="Streptococcus">
            </attr>
        </node>
        <edge from="Streptococcus" to="Clostridium" isdirected="False" id="2--3">
        </edge>
          <edge from="Streptococcus" to="Eubacterium" isdirected="False" id="2--4">
        </edge>
        :
        :
        :
        :
    </graph>
 </gxl>

我尝试了以下代码:

DD = xmlHashTree()
top1<-addNode(xmlNode("gxl"), character(), DD)
addNode(xmlNode("graph id= ExtendedCallGraph edgeids=true edgemode=directed"),top1,DD,close=FALSE)

lapply(unique(patient_1$Source),function(x){
  b=addNode(xmlNode("node",attrs = c('id' = as.integer(x))),top1,DD)
  c=addNode(xmlNode("attr",attrs = c('name' = as.character(x))),b,DD)})

   #####I am trying to add edge node from source to Target########
    apply(unique(patient_1[,1:2]),1,function(x){
  e=addNode(xmlNode("edge",attrs = c("from"= as.character(patient_1$Source[1]), 
                    "to"=as.character(patient_1$target[1]), isdirected="false")),top1,DD)})

但是我现在被困住了。我如何将源列设置为“从”属性,将目标列设置为“至”属性。我该如何实现? 预先感谢

2 个答案:

答案 0 :(得分:0)

考虑一个更简单的嵌套for循环,该循环将遍历每个唯一的 Source 及其子集的观察结果。与应用族解决方案相反,您可以为所需的@id属性保留迭代编号,并扩展XML树。另外,请考虑使用newXMLNode方法来构建元素及其attrs参数作为属性,在其中使用c()

传递命名向量
# CREATE XML FILE
doc = newXMLDoc()
root = newXMLNode("gxl", doc = doc)
graph = newXMLNode("graph", parent = root,
                   attrs = c(id="ExtendedCallGraph", edgeids="true", edgemode="directed"))

# WRITE XML NODES AND DATA
sources <- unique(patient$Source)

for(i in seq_along(sources)){
  # NODE nodes
  grp_node = newXMLNode("node", parent = graph, attrs=c(name=i))
  attr_node = newXMLNode("attr", sources[i], parent = grp_node)

  sub_df <- subset(patient, Source == sources[i])

  # EDGE nodes
  for(j in 1:nrow(sub_df)){
    edge_node = newXMLNode("edge", parent=graph,
                           attrs=c(from=sub_df$Source[j], to=sub_df$Target[j],
                                   isdirected="False", id=paste0(i, '--', j)))
  }
}    

# OUTPUT XML CONTENT TO SCREEN
print(doc)

# OUTPUT XML CONTENT TO FILE
saveXML(doc, file="Output.xml")

输出

print(doc)    
# <?xml version="1.0"?>
# <gxl>
#   <graph id="ExtendedCallGraph" edgeids="true" edgemode="directed">
#     <node name="1">
#       <attr>Bacteroides</attr>
#     </node>
#     <edge from="Bacteroides" to="Lachnospiraceae" isdirected="False" id="1--1"/>
#     <edge from="Bacteroides" to="Klebsiella" isdirected="False" id="1--2"/>
#     <edge from="Bacteroides" to="Lachnoclostridium" isdirected="False" id="1--3"/>
#     <edge from="Bacteroides" to="Streptococcus" isdirected="False" id="1--4"/>
#     <node name="2">
#       <attr>Streptococcus</attr>
#     </node>
#     <edge from="Streptococcus" to="Clostridium" isdirected="False" id="2--1"/>
#     <edge from="Streptococcus" to="[Eubacterium]" isdirected="False" id="2--2"/>
#   </graph>
# </gxl>

答案 1 :(得分:0)

我也使用了foreach函数来实现。但这需要相当长的时间。

Fin_Doc = newXMLDoc()
root = newXMLNode("gxl", doc = Fin_Doc)

graph = newXMLNode("graph", parent = root,
                   attrs = c(id="Co-occurance Network", edgeids="true", edgemode="undirected"))

##########adding the node id and attribute_name##########

foreach(w=as.vector(unique(patient_1$Otu_1)),y = as.vector(unique(patient_1$taxonomy.y)), x=as.vector(patient_1$taxonomy.x)) %do%{
  (grp_node = newXMLNode("node", parent = graph, attrs=c(id= "_")))
  (attr_name = newXMLNode("attr",parent = grp_node, text="", attrs=c(name="OTU")))
  (otu_id=newXMLNode("int",parent = attr_name, text="",w ))
  (bacteria=newXMLNode("attr",parent = grp_node, text="", attrs=c(name="Bacteria")))
  (string_name=newXMLNode("string",parent = bacteria, text="",y))
}

####################edge from otuids########################## 
foreach(w=as.vector(patient_1$Otu_1),q=as.vector(patient_1$Otu_2), z=as.vector(patient_1$patient1))%do% {   (edge_node1 = newXMLNode("edge", parent=graph,text="\n", attrs=c(from= w, to=q)))
 (attrs_node1=newXMLNode("attr", parent=edge_node1, text=" ", attrs=c("logratio")))   
(weight_node1= newXMLNode("float", as.character(z), parent=attrs_node1, text=" ")) }
相关问题