使用java转换器和命令行处理器

时间:2018-03-15 14:34:50

标签: java xml xslt

在Java中应用XSL适用于某些数据样本,但在其他数据样本上产生空结果,其中命令行处理器仍然生成有效结果。 下面是一个我看到差异的例子。 给定下面列出的XSL和XML,命令行:

saxonb-xslt -s:metsmods_test3.xml -xsl:metsmods2.xsl

返回:

  Warning: at xsl:stylesheet on line 5 column 61 of metsmods2.xsl:
  Running an XSLT 1.0 stylesheet with an XSLT 2.0 processor

        main_label:Wachsende Häuser aus lebenden Bäumen entstehend
        identifier:urn:nbn:de:kobv:83-goobi-3255500
        main_label:II. Teil. Art und Verwendung der Naturbauten.
        identifier:urn:nbn:de:kobv:83-goobi-3255762
        main_label:III. Teil. Erörterung sonstiger Punkte.
        identifier:urn:nbn:de:kobv:83-goobi-3255929
        main_label:
        identifier:urn:nbn:de:kobv:83-goobi-3256094
        main_label:
        identifier:urn:nbn:de:kobv:83-goobi-3256100
        identifier.kobv:990006350260302884
        title:Wachsende Häuser aus lebenden Bäumen entstehend
        title:Wachsende Häuser aus lebenden Bäumen entstehend

但是java转换器snipplet(取自https://examples.javacodegeeks.com/core-java/xml/xpath/xpath-xslt-tutorial/并在下面列出)在saxonb-xslt所做的相同文件上没有输出。 我认为这与名称空间或访问DTD有关,命令行处理器和所选变换器的处理方式不同。 究竟是什么?为什么在Java中处理时没有异常?

代码:

public class Main {

private static Document document;

public static void main(String[] args) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    File xml = new File("/home/peter/1stax/src/metsmods_test3.xml");
    File xsl = new File("/home/peter/1stax/src/metsmods2.xsl");


    DocumentBuilder builder = factory.newDocumentBuilder();
    document = builder.parse(xml);

    // Use a Transformer for output
    TransformerFactory transformerFactory = TransformerFactory.newInstance();
    StreamSource style = new StreamSource(xsl);
    Transformer transformer = transformerFactory.newTransformer(style);

    DOMSource source = new DOMSource(document);
    StreamResult result = new StreamResult(System.out);
    transformer.transform(source, result);
}

}

的xsl:

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:mets="http://www.loc.gov/METS/"
                xmlns:mods="http://www.loc.gov/mods/v3"
                xmlns:goobi="http://meta.goobi.org/v1.5.1/">

    <xsl:output method="text" omit-xml-declaration="yes" indent="no" encoding="utf-8"/>

    <xsl:variable name="dmdsec_id">
        <xsl:choose>
            <xsl:when test="/mets:mets/mets:structMap[@TYPE='LOGICAL']/mets:div/@DMDID">
                <xsl:value-of select="/mets:mets/mets:structMap[@TYPE='LOGICAL']/mets:div/@DMDID"/>
            </xsl:when>
            <xsl:otherwise>
                <xsl:choose>
                    <xsl:when test="/mets:mets/mets:structMap[@TYPE='LOGICAL']/mets:div/mets:div/@DMDID">
                        <xsl:value-of select="/mets:mets/mets:structMap[@TYPE='LOGICAL']/mets:div/mets:div/@DMDID"/>
                    </xsl:when>
                    <xsl:otherwise>
                        <xsl:text>DMDLOG_0000</xsl:text>
                    </xsl:otherwise>
                </xsl:choose>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:variable>

    <xsl:template match="/">

        <xsl:for-each select="//mets:structMap[@TYPE='LOGICAL']/mets:div">
            main_label:<xsl:value-of select="@LABEL"/>
            identifier:<xsl:value-of select="@CONTENTIDS"/>
        </xsl:for-each>

        <xsl:for-each select="//mets:dmdSec[@ID=$dmdsec_id]/mets:mdWrap/mets:xmlData/mods:mods/mods:identifier">
            identifier.<xsl:value-of select="@type"/>:<xsl:value-of select="."/>
        </xsl:for-each>

        <xsl:for-each select="//mets:dmdSec[@ID=$dmdsec_id]/mets:mdWrap/mets:xmlData/mods:mods/mods:recordInfo/mods:recordIdentifier">
            identifier.<xsl:value-of select="@source"/>:<xsl:value-of select="."/>
        </xsl:for-each>

        <xsl:for-each select="//mets:dmdSec[@ID=$dmdsec_id]/mets:mdWrap/mets:xmlData/mods:mods/mods:titleInfo/mods:title">
            title:<xsl:value-of select="."/>
        </xsl:for-each>


    </xsl:template>

</xsl:stylesheet>

xml数据:

<?xml version="1.0" encoding="UTF-8"?>
<mets:mets OBJID="" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-5.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.loc.gov/standards/premis/ http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/standards/mix/ http://www.loc.gov/standards/mix/mix.xsd" xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
   <mets:metsHdr CREATEDATE="2018-02-27T12:37:35Z">
      <mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
         <mets:name>Goobi - ugh-3.0-ugh-2.0.0-29-g3b6efe1 - 21−December−2016</mets:name>
         <mets:note>Goobi</mets:note>
      </mets:agent>
   </mets:metsHdr>
   <mets:dmdSec ID="DMDLOG_0000">
      <mets:mdWrap MDTYPE="MODS">
         <mets:xmlData>
            <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
               <mods:titleInfo>
                  <mods:title>Wachsende Häuser aus lebenden Bäumen entstehend</mods:title>
               </mods:titleInfo>
               <mods:titleInfo type="uniform">
                  <mods:title>Wachsende Häuser aus lebenden Bäumen entstehend</mods:title>
               </mods:titleInfo>
               <mods:recordInfo>
                  <mods:recordIdentifier source="kobv">990006350260302884</mods:recordIdentifier>
               </mods:recordInfo>
               <mods:classification authority="ivdcc">Deutsche Gartenbaubibliothek#Monographien#Gartenbau</mods:classification>
               <mods:classification authority="ivdcc">Zentralbibliothek#Monographien#Technik, Architektur, Bauwesen</mods:classification>
               <mods:language>
                  <mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
               </mods:language>
               <mods:originInfo>
                  <mods:dateIssued encoding="w3cdtf" keyDate="yes">[1926]</mods:dateIssued>
               </mods:originInfo>
               <mods:accessCondition type="use and reproduction">https://creativecommons.org/publicdomain/mark/1.0/</mods:accessCondition>
               <mods:physicalDescription>
                  <mods:extent>320 Seiten</mods:extent>
                  <mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
               </mods:physicalDescription>
               <mods:location>
                  <mods:shelfLocator>8Af6500</mods:shelfLocator>
                  <mods:physicalLocation>Universitätsbibliothek der Technischen Universität Berlin</mods:physicalLocation>
               </mods:location>
               <mods:name authority="gnd" authorityURI="http://d-nb.info/gnd/" type="personal" valueURI="http://d-nb.info/gnd/1065837569">
                  <mods:role>
                     <mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
                  </mods:role>
                  <mods:namePart type="family">Wiechula</mods:namePart>
                  <mods:namePart type="given">Arthur</mods:namePart>
                  <mods:displayForm>Wiechula, Arthur</mods:displayForm>
               </mods:name>
               <mods:originInfo>
                  <mods:place>
                     <mods:placeTerm type="text">Berlin</mods:placeTerm>
                  </mods:place>
                  <mods:dateCaptured encoding="w3cdtf">2018</mods:dateCaptured>
                  <mods:publisher>Universitätsbibliothek der Technischen Universität Berlin</mods:publisher>
                  <mods:edition>[Electronic ed.]</mods:edition>
               </mods:originInfo>
            </mods:mods>
         </mets:xmlData>
      </mets:mdWrap>
   </mets:dmdSec>

   <mets:fileSec>
      <mets:fileGrp USE="PRESENTATION">
         <mets:file ID="FILE_0001_PRESENTATION" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="URL" xlink:href="file:///opt/digiverso/viewer/media/990006350260302884/wiecwach_990006350260302884_0001.tif" xmlns:xlink="http://www.w3.org/1999/xlink"/>
         </mets:file>
         <mets:file ID="FILE_0002_PRESENTATION" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="URL" xlink:href="file:///opt/digiverso/viewer/media/990006350260302884/wiecwach_990006350260302884_0002.tif" xmlns:xlink="http://www.w3.org/1999/xlink"/>
         </mets:file>
         <mets:file ID="FILE_0003_PRESENTATION" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="URL" xlink:href="file:///opt/digiverso/viewer/media/990006350260302884/wiecwach_990006350260302884_0003.tif" xmlns:xlink="http://www.w3.org/1999/xlink"/>
         </mets:file>
         <mets:file ID="FILE_0004_PRESENTATION" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="URL" xlink:href="file:///opt/digiverso/viewer/media/990006350260302884/wiecwach_990006350260302884_0004.tif" xmlns:xlink="http://www.w3.org/1999/xlink"/>
         </mets:file>
         <mets:file ID="FILE_0005_PRESENTATION" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="URL" xlink:href="file:///opt/digiverso/viewer/media/990006350260302884/wiecwach_990006350260302884_0005.tif" xmlns:xlink="http://www.w3.org/1999/xlink"/>
         </mets:file>
      </mets:fileGrp>
   </mets:fileSec>
   <mets:structMap TYPE="LOGICAL">
      <mets:div ADMID="AMD" CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255500" DMDID="DMDLOG_0000" ID="LOG_0000" LABEL="Wachsende Häuser aus lebenden Bäumen entstehend" TYPE="Monograph">
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255512" ID="LOG_0001" TYPE="Cover"/>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255526" ID="LOG_0002" TYPE="TitlePage"/>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255538" DMDID="DMDLOG_0003" ID="LOG_0003" LABEL="Vorwort." TYPE="Preface"/>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255549" DMDID="DMDLOG_0004" ID="LOG_0004" LABEL="Inhalts-Verzeichnis." TYPE="TableOfContents"/>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255550" DMDID="DMDLOG_0005" ID="LOG_0005" LABEL="Verzeichnis der Abbildungen." TYPE="ListOfIllustrations"/>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255567" DMDID="DMDLOG_0006" ID="LOG_0006" LABEL="Stichwörterverzeichnis." TYPE="OtherDocStrct"/>
         </mets:div>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255762" DMDID="DMDLOG_0026" ID="LOG_0026" LABEL="II. Teil. Art und Verwendung der Naturbauten." TYPE="Chapter">
            <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255774" DMDID="DMDLOG_0027" ID="LOG_0027" LABEL="18. Zäune." TYPE="Chapter"/>
            <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255789" DMDID="DMDLOG_0028" ID="LOG_0028" LABEL="19. Einfache Häuser." TYPE="Chapter"/>
         </mets:div>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255929" DMDID="DMDLOG_0042" ID="LOG_0042" LABEL="III. Teil. Erörterung sonstiger Punkte." TYPE="Chapter">
            <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255936" DMDID="DMDLOG_0043" ID="LOG_0043" LABEL="33. Die Entstehungsdauer der Naturbauwerke." TYPE="Chapter"/>
            <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3255949" DMDID="DMDLOG_0044" ID="LOG_0044" LABEL="34. Das Lebensalter der Naturbauwerke." TYPE="Chapter"/>
         </mets:div>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3256094" ID="LOG_0059" TYPE="Cover"/>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3256100" ID="LOG_0060" TYPE="ColorChart"/>
   </mets:structMap>
   <mets:structMap TYPE="PHYSICAL">
      <mets:div ID="PHYS_0000" TYPE="physSequence">
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3256112" ID="PHYS_0001" ORDER="1" ORDERLABEL=" - " TYPE="page">
            <mets:fptr FILEID="FILE_0001_PRESENTATION"/>
            <mets:fptr FILEID="FILE_0001_MIN"/>
            <mets:fptr FILEID="FILE_0001_DEFAULT"/>
            <mets:fptr FILEID="FILE_0001_MAX"/>
            <mets:fptr FILEID="FILE_0001_THUMBS"/>
         </mets:div>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3256122" ID="PHYS_0002" ORDER="2" ORDERLABEL=" - " TYPE="page">
            <mets:fptr FILEID="FILE_0002_PRESENTATION"/>
            <mets:fptr FILEID="FILE_0002_MIN"/>
            <mets:fptr FILEID="FILE_0002_DEFAULT"/>
            <mets:fptr FILEID="FILE_0002_MAX"/>
            <mets:fptr FILEID="FILE_0002_THUMBS"/>
         </mets:div>
         <mets:div CONTENTIDS="urn:nbn:de:kobv:83-goobi-3256138" ID="PHYS_0003" ORDER="3" ORDERLABEL=" - " TYPE="page">
            <mets:fptr FILEID="FILE_0003_PRESENTATION"/>
            <mets:fptr FILEID="FILE_0003_MIN"/>
            <mets:fptr FILEID="FILE_0003_DEFAULT"/>
            <mets:fptr FILEID="FILE_0003_MAX"/>
            <mets:fptr FILEID="FILE_0003_THUMBS"/>
         </mets:div>
      </mets:div>
   </mets:structMap>
   <mets:structLink>
      <mets:smLink xlink:to="PHYS_0001" xlink:from="LOG_0000" xmlns:xlink="http://www.w3.org/1999/xlink"/>
      <mets:smLink xlink:to="PHYS_0002" xlink:from="LOG_0000" xmlns:xlink="http://www.w3.org/1999/xlink"/>
      <mets:smLink xlink:to="PHYS_0003" xlink:from="LOG_0000" xmlns:xlink="http://www.w3.org/1999/xlink"/>
      <mets:smLink xlink:to="PHYS_0004" xlink:from="LOG_0000" xmlns:xlink="http://www.w3.org/1999/xlink"/>
      <mets:smLink xlink:to="PHYS_0005" xlink:from="LOG_0000" xmlns:xlink="http://www.w3.org/1999/xlink"/>
      <mets:smLink xlink:to="PHYS_0006" xlink:from="LOG_0000" xmlns:xlink="http://www.w3.org/1999/xlink"/>
   </mets:structLink>
</mets:mets>

2 个答案:

答案 0 :(得分:2)

在Martin Honnen的回答之后,我已经从我正在使用的Snipplet表单“javacodegeeks”中删除了完全不必要的“解析到DOM”部分(这一定是搞乱了名称空间)并且采用了阻力最小的方式,只需用StreamSource替换它。它现在有效! 这是新代码:

import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.File;


public class Main {

    private static Document document;

    public static void main(String[] args) throws Exception {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

        File xml = new File("/home/peter/1stax/src/metsmods_test3.xml");
        File xsl = new File("/home/peter/1stax/src/metsmods2.xsl");
        // Use a Transformer for output
        TransformerFactory transformerFactory = TransformerFactory.newInstance();
        StreamSource style = new StreamSource(xsl);
        Transformer transformer = transformerFactory.newTransformer(style);

        StreamSource source = new StreamSource (xml);
        StreamResult result = new StreamResult(System.out);
        transformer.transform(source, result);
     }
}

答案 1 :(得分:1)

如果要使用Java处理XML和XSLT,请确保显式使用名称空间感知DocumentBuilderFactory,否则您将无法获得有意义的结果,XSLT本身就是XML,取决于名称空间,它使用的任何XML输入也是使用命名空间支持更好地处理。

当然,要简单地使用文件中的XML作为输入,您不需要DocumentBuilder和DOMSource,您也可以使用StreamSource,然后XSLT处理器负责处理命名空间中的输入模式。