使用XLST从XML文件中删除重复记录

时间:2015-10-26 04:25:06

标签: xml xslt

我有以下XML文件:

<xml xmlns:s='uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882'
    xmlns:dt='uuid:C2F41010-65B3-11d1-A29F-00AA00C14882'
    xmlns:rs='urn:schemas-microsoft-com:rowset'
    xmlns:z='RowsetSchema'>
<s:schema id='RowsetSchema'>
    <s:elementType name='row' content='eltOnly'>
        <s:attributeType name='iBookID' rs:number='1' rs:writeunknown='true'>
            <s:datatype dt:type='int' dt:maxLength='4' rs:precision='10' rs:fixedlength='true' rs:maybenull='false'/>
        </s:attributeType>
        <s:attributeType name='vchISBN' rs:number='2' rs:writeunknown='true'>
            <s:datatype dt:type='string' rs:dbtype='str' dt:maxLength='25' rs:maybenull='false'/>
        </s:attributeType>
        <s:attributeType name='vchEAN' rs:number='3' rs:writeunknown='true'>
            <s:datatype dt:type='string' rs:dbtype='str' dt:maxLength='25' rs:maybenull='false'/>
        </s:attributeType>
        <s:attributeType name='vchLCCN' rs:number='4' rs:writeunknown='true'>
            <s:datatype dt:type='string' rs:dbtype='str' dt:maxLength='25' rs:maybenull='false'/>
        </s:attributeType>
        <s:attributeType name='iNumPages' rs:number='5' rs:writeunknown='true'>
            <s:datatype dt:type='int' dt:maxLength='4' rs:precision='10' rs:fixedlength='true' rs:maybenull='false'/>
        </s:attributeType>
        <s:attributeType name='vchPublisherName' rs:number='6' rs:writeunknown='true'>
            <s:datatype dt:type='string' rs:dbtype='str' dt:maxLength='75' rs:maybenull='false'/>
        </s:attributeType>
        <s:attributeType name='iYearPublished' rs:number='7' rs:writeunknown='true'>
            <s:datatype dt:type='int' dt:maxLength='4' rs:precision='10' rs:fixedlength='true' rs:maybenull='false'/>
        </s:attributeType>
        <s:attributeType name='tiPrimaryISBN_Flag' rs:number='8' rs:nullable='true' rs:writeunknown='true'>
            <s:datatype dt:type='ui1' dt:maxLength='1' rs:precision='3' rs:fixedlength='true'/>
        </s:attributeType>
        <s:attributeType name='vchDivision' rs:number='9' rs:nullable='true' rs:writeunknown='true'>
            <s:datatype dt:type='string' rs:dbtype='str' dt:maxLength='10'/>
        </s:attributeType>
        <s:extends type='rs:rowbase'/>
    </s:elementType>
</s:schema>
<rs:data>
    <z:row iBookID='3093' vchISBN='978-0-329-05967-5' vchEAN='9780329059675' vchLCCN='Not Available' iNumPages='317'
         vchPublisherName='FollettBound' iYearPublished='1987' tiPrimaryISBN_Flag='0' vchDivision='PLATALS'/>
    <z:row iBookID='3093' vchISBN='978-0-329-05967-5' vchEAN='9780329059675' vchLCCN='Not Available' iNumPages='317'
         vchPublisherName='FollettBound' iYearPublished='1987' tiPrimaryISBN_Flag='0' vchDivision='PLATALS'/>
    <z:row iBookID='3093' vchISBN='978-0-329-47925-1' vchEAN='9780329479251' vchLCCN='Not Available' iNumPages='317'
         vchPublisherName='FollettBound' iYearPublished='2006' tiPrimaryISBN_Flag='0' vchDivision='PLATALS'/>
    <z:row iBookID='3093' vchISBN='978-0-329-47925-1' vchEAN='9780329479251' vchLCCN='Not Available' iNumPages='317'
         vchPublisherName='FollettBound' iYearPublished='2006' tiPrimaryISBN_Flag='0' vchDivision='PLATALS'/>
    <z:row iBookID='3093' vchISBN='978-0-329-47925-1' vchEAN='9780329479251' vchLCCN='Not Available' iNumPages='317'
         vchPublisherName='FollettBound' iYearPublished='2006' tiPrimaryISBN_Flag='0' vchDivision='PLATALS'/>
    <z:row iBookID='3093' vchISBN='978-0-7587-0171-8' vchEAN='9780758701718' vchLCCN='Not Available' iNumPages='317'
         vchPublisherName='FollettBound' iYearPublished='1987' tiPrimaryISBN_Flag='0' vchDivision='PLATALS'/>
    <z:row iBookID='3093' vchISBN='978-0-7587-0171-8' vchEAN='9780758701718' vchLCCN='Not Available' iNumPages='317'
         vchPublisherName='FollettBound' iYearPublished='1987' tiPrimaryISBN_Flag='0' vchDivision='PLATALS'/>
    <z:row iBookID='3093' vchISBN='978-0-7587-0171-8' vchEAN='9780758701718' vchLCCN='Not Available' iNumPages='317'
         vchPublisherName='FollettBound' iYearPublished='1987' tiPrimaryISBN_Flag='0' vchDivision='PLATALS'/>
</rs:data>
</xml>

我一直在努力尝试从中删除重复的记录(行),但没有运气。我已经在线阅读并尝试了几个例子,但是有点简短。如果有人可以帮我纠正我的xsl中的语法,我将非常感激。

这是我的xsl:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:template match="node()|@*">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
 </xsl:template>

   <xsl:template match="z:row" xmlns:rs="urn:schemas-microsoft-com:rowset" xmlns:z="RowsetSchema" >
        <xsl:if test="not(following-sibling::z:row)">
            <xsl:copy>
                <xsl:apply-templates select="@*|node()"/>
            </xsl:copy>
        </xsl:if>
    </xsl:template>
</xsl:stylesheet>

我收到的结果只是写了最后一条记录(行)与我期待的3条记录(行)。

<xml xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"
     xmlns:rs="urn:schemas-microsoft-com:rowset"
     xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882"
     xmlns:z="RowsetSchema">
   <s:schema id="RowsetSchema">
      <s:elementType content="eltOnly" name="row">
         <s:attributeType name="iBookID" rs:number="1" rs:writeunknown="true">
            <s:datatype dt:maxLength="4" dt:type="int" rs:fixedlength="true" rs:maybenull="false"
                        rs:precision="10"/>
         </s:attributeType>
         <s:attributeType name="vchISBN" rs:number="2" rs:writeunknown="true">
            <s:datatype dt:maxLength="25" dt:type="string" rs:dbtype="str" rs:maybenull="false"/>
         </s:attributeType>
         <s:attributeType name="vchEAN" rs:number="3" rs:writeunknown="true">
            <s:datatype dt:maxLength="25" dt:type="string" rs:dbtype="str" rs:maybenull="false"/>
         </s:attributeType>
         <s:attributeType name="vchLCCN" rs:number="4" rs:writeunknown="true">
            <s:datatype dt:maxLength="25" dt:type="string" rs:dbtype="str" rs:maybenull="false"/>
         </s:attributeType>
         <s:attributeType name="iNumPages" rs:number="5" rs:writeunknown="true">
            <s:datatype dt:maxLength="4" dt:type="int" rs:fixedlength="true" rs:maybenull="false"
                        rs:precision="10"/>
         </s:attributeType>
         <s:attributeType name="vchPublisherName" rs:number="6" rs:writeunknown="true">
            <s:datatype dt:maxLength="75" dt:type="string" rs:dbtype="str" rs:maybenull="false"/>
         </s:attributeType>
         <s:attributeType name="iYearPublished" rs:number="7" rs:writeunknown="true">
            <s:datatype dt:maxLength="4" dt:type="int" rs:fixedlength="true" rs:maybenull="false"
                        rs:precision="10"/>
         </s:attributeType>
         <s:attributeType name="tiPrimaryISBN_Flag" rs:nullable="true" rs:number="8"
                          rs:writeunknown="true">
            <s:datatype dt:maxLength="1" dt:type="ui1" rs:fixedlength="true" rs:precision="3"/>
         </s:attributeType>
         <s:attributeType name="vchDivision" rs:nullable="true" rs:number="9" rs:writeunknown="true">
            <s:datatype dt:maxLength="10" dt:type="string" rs:dbtype="str"/>
         </s:attributeType>
         <s:extends type="rs:rowbase"/>
      </s:elementType>
   </s:schema>
   <rs:data>
      <z:row iBookID="3093" iNumPages="317" iYearPublished="1987" tiPrimaryISBN_Flag="0"
             vchDivision="PLATALS"
             vchEAN="9780758701718"
             vchISBN="978-0-7587-0171-8"
             vchLCCN="Not Available"
             vchPublisherName="FollettBound"/>
   </rs:data>
</xml>

我再次想要消除新xml中的重复记录(行)。我很感激人们可能提出的任何建议。我使用以下在线工具来测试我的xsl - http://xslttest.appspot.com/

2 个答案:

答案 0 :(得分:2)

假设重复&#34;由vchEAN属性决定,您需要更改:

<xsl:if test="not(following-sibling::z:row)">

为:

<xsl:if test="not(following-sibling::z:row[@vchEAN=current()/@vchEAN])">

但是,我建议你改为Muenchian grouping

答案 1 :(得分:1)

您可以使用密钥尝试它,例如:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:key name="DistinctEAN" match="z:row" xmlns:rs="urn:schemas-microsoft-com:rowset" xmlns:z="RowsetSchema" use="@vchEAN" />

 <xsl:template match="node()|@*">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
 </xsl:template>

   <xsl:template match="z:row" xmlns:rs="urn:schemas-microsoft-com:rowset" xmlns:z="RowsetSchema" >
      <xsl:copy-of select=".[generate-id()=generate-id(key('DistinctEAN' , @vchEAN )[1])]"/>
    </xsl:template>
</xsl:stylesheet>