拆分XML并维护标头

时间:2015-12-08 18:12:22

标签: xml split

我有一个大型XML文件,我需要拆分并维护一些标题详细信息。请参阅下面的示例。

Original
<ArticleExportPackage>
     <PackageInfo>
          <License>1234</License>
     </PackageInfo>     
     <ItemInfo>
          <ItemID>ABC123>
     </ItemInfo>
     <ItemInfo>
          <ItemID>BCD123>
     </ItemInfo>
</ArticleExportPackage>

分手后:

#1
<ArticleExportPackage>
     <PackageInfo>
          <License>1234</License>
     </PackageInfo>     
     <ItemInfo>
          <ItemID>ABC123>
     </ItemInfo>
</ArticleExportPackage>

#2
<ArticleExportPackage>
     <PackageInfo>
          <License>1234</License>
     </PackageInfo>     
     <ItemInfo>
          <ItemID>BCD123>
     </ItemInfo>
</ArticleExportPackage>

非常感谢任何指导!

1 个答案:

答案 0 :(得分:0)

以下是使用vtd-xml ...

进行拆分的代码
import com.ximpleware.*;
import java.io.*;
public class simpleSplit {
    public static void main(String[] s) throws VTDException, IOException{
        VTDGen vg = new VTDGen();
        if (!vg.parseFile("input.xml", true)) //namespace awareness disabled
            return;
        VTDNav vn = vg.getNav();
        long l=0;
        // get the fragment descriptor of packageInfo
        if(vn.toElement(VTDNav.FIRST_CHILD,"PackageInfo")){ 
            l=vn.getElementFragment();
        }else{
            System.out.println("packageInfo not found");
        }       
        AutoPilot ap = new AutoPilot(vn);
        XMLModifier xm = new XMLModifier(vn);
        ap.selectXPath("/ArticleExportPackage/ItemInfo");
        int i=0,j=1;
        byte[] ba1 = "<ArticleExportPackage>\n".getBytes();
        byte[] ba2 = "\n</ArticleExportPackage>".getBytes();
        byte[] ba = "\n".getBytes();
        while((i=ap.evalXPath())!=-1){
            FileOutputStream fios = new FileOutputStream("file"+j+".xml");
            fios.write(ba1);//write starting tag
            vn.dumpFragment(l,fios); // write Packageinfo fragment
            fios.write(ba);
            vn.dumpFragment(fios);// write the ItemInfo fragment
            fios.write(ba2);// write ending tag
            j++;
        }
    }
}