使用正则表达式查找包含节点的特定文本

时间:2013-05-22 12:03:47

标签: xml regex xslt xslt-2.0

我是XSLT的新手,我不是程序员,对于我可能愚蠢的问题感到抱歉。

我需要找到一些看起来像这样的引用:

BSK StPO-`<emphasis role="smallcaps">Burger,</emphasis>` Art. 4 N 5

包含引文的文本节点可以位于不同的父元素内,例如parafootnote

我希望将整个引文用refid元素包装,使用部分引文作为id。

<refid multi-idref="K_BSK_STPO-JSTPO_StPO_Art4_5">
    BSK STGB I-`<span class="smallcaps">Burger,</span>` Art. 4 N 5
</refid>`

问题是emphasis元素:我找不到“围绕”它的方法。我找到了this answer类似的问题,我试图将它应用到我的问题但我没有成功。此脚本部分未找到任何引用。

这是我的代码的一部分。 $DokumentName是指全局定义的参数。引文中带有罗马数字的部分是可选的:

<xsl:template match="text()[matches(., 'BSK\s+(\p{L}{2,5})\s+(I|II|III|IV|V|VI|VII)?\p{P}')]">
  <xsl:variable name="vCur" select="."/>
  <xsl:variable name="pContent" select="string(.)"/>
  <xsl:analyze-string select="$pContent" regex="BSK\s+(\p{{L}}{{2,5}})\s+(I|II|III|IV|V|VI|VII)?\p{{P}}" flags="i">
    <xsl:matching-substring>
      <xsl:variable name="figureToTargetId">
        <xsl:choose>
          <xsl:when test="matches(., 'BSK\s+(\p{L}{2,5})\s+(I|II|III|IV|V|VI|VII)?\p{P}')">                  
            <xsl:analyze-string select="." regex="(\p{{L}}{{2,5}})\s+(I|II|III|IV|V|VI|VII)">
              <xsl:matching-substring>
                <xsl:value-of select="concat($DokumentName, '_', regex-group(1), regex-group(2))"/>
              </xsl:matching-substring>
            </xsl:analyze-string>
          </xsl:when>
          <xsl:otherwise>
            <xsl:analyze-string select="." regex="(\p{{L}}{{2,5}})">
              <xsl:matching-substring>
                <xsl:value-of select="concat($DokumentName, '_', regex-group(1))"/>
              </xsl:matching-substring>
            </xsl:analyze-string>
          </xsl:otherwise>
        </xsl:choose>   
      </xsl:variable>
      <xsl:variable name="figureFromTargetId">
        <xsl:if test="matches($vCur, 'BSK\s+(\p{L}{2,5})\s+(I|II|III|IV|V|VI|VII)?\p{P}')">
          <xsl:analyze-string select="string($vCur/following-sibling::emphasis[1]/following-sibling::*[1])" regex=",?Art\.\s+(d+)\s+N\s+(d+)">
            <xsl:matching-substring>
              <xsl:value-of
                select="concat('_Art', regex-group(1), '_', regex-group(2))"/>
            </xsl:matching-substring>
          </xsl:analyze-string>
        </xsl:if>
      </xsl:variable>
      <xsl:element name="ref-multi-id">
        <xsl:attribute name="multi-idref">
          <xsl:value-of select="concat($figureToTargetId, $figureToTargetId)"/>
        </xsl:attribute>
        <xsl:value-of select="."/>
        <xsl:if test="matches($vCur, 'BSK\s+(\p{L}{2,5})\s+(I|II|III|IV|V|VI|VII)?\p{P}')">
          <xsl:apply-templates select="$vCur/following-sibling::emphasis[1]" mode="copy-style"/>
          <xsl:value-of select="$vCur/following-sibling::emphasis[1]/following-sibling::*[1][matches(.,',?Art\.\s+(d+)\s+N\s+(d+)')]"/>
        </xsl:if>
      </xsl:element>
    </xsl:matching-substring>
    <xsl:non-matching-substring>
      <xsl:value-of select="."/>
    </xsl:non-matching-substring>
  </xsl:analyze-string>
</xsl:template>

<xsl:template match="emphasis[@role='smallcaps']" mode="copy-style">
  <xsl:element name="span">
    <xsl:attribute name="class">
      <xsl:value-of select="@role"/>
    </xsl:attribute>
    <xsl:apply-templates/>
  </xsl:element>
</xsl:template>

任何帮助都会非常感激!

2 个答案:

答案 0 :(得分:0)

您的正则表达式与您显示的字符串不匹配,因为您需要有连字符的空格。它看起来像

BSK\s+(\p{L}{2,5})\s+(I|II|III|IV|V|VI|VII)?\p{P}

应该是

BSK\s+(\p{L}{2,5})(\s+(I|II|III|IV|V|VI|VII))?\p{P}

答案 1 :(得分:0)

这最终是工作代码,我忘了简单地添加“模式” - 属性,我不得不考虑更多的引用替代品,我不得不摆脱冗余节点和节点部分。

    <xsl:template match="text()[matches(., 'BSK\s+(\p{L}{2,5})(\s+(I|II|III|IV|V|VI|VII))?\p{P}')]" mode="copy-style">
    <xsl:variable name="vCur" select="."/>
    <xsl:variable name="pContent" select="string(.)"/>
    <xsl:analyze-string select="$pContent" regex="BSK\s+(\p{{L}}{{2,5}})(\s+(I|II|III|IV|V|VI|VII))?\p{{P}}" flags="i">
        <xsl:matching-substring>
            <xsl:variable name="figureToTargetId">
             <xsl:choose>
                 <xsl:when test="matches(., '(BSK)\s+(\p{L}{2,5})\s+(I|II|III|IV|V|VI|VII)\p{P}')">                  
                   <xsl:analyze-string select="." regex="(\p{{L}}{{2,5}})\s+(I|II|III|IV|V|VI|VII)">
                    <xsl:matching-substring>
                        <xsl:value-of select="concat('K_BSK_', regex-group(1), regex-group(2), '_', regex-group(1))"/>
                    </xsl:matching-substring>
                  </xsl:analyze-string>
                 </xsl:when>
                 <xsl:when test="matches(., '(BSK)\s+(StPO)\p{P}') ">
                     <xsl:analyze-string select="." regex="(BSK)\s+(\p{{L}}{{2,5}})">
                         <xsl:matching-substring>
                             <xsl:value-of select="concat('K_BSK_STPO-JSTPO_', regex-group(2))"/>
                         </xsl:matching-substring>
                    </xsl:analyze-string>
                 </xsl:when>
                 <xsl:when test="matches(., '(BSK)\s+(JStPO)\p{P}') ">
                     <xsl:analyze-string select="." regex="(BSK)\s+(\p{{L}}{{2,5}})">
                         <xsl:matching-substring>
                             <xsl:value-of select="concat('K_BSK_STPO-JSTPO_', regex-group(2))"/>
                         </xsl:matching-substring>
                    </xsl:analyze-string>
                 </xsl:when>
                 <xsl:when test="matches(., 'BSK\s+(\p{L}{2,5})\p{P}') ">
                     <xsl:analyze-string select="." regex="BSK\s+(\p{{L}}{{2,5}})">
                      <xsl:matching-substring>
                          <xsl:value-of select="concat('K_BSK_', regex-group(1), '_', regex-group(1))"/>
                      </xsl:matching-substring>
                  </xsl:analyze-string>
                </xsl:when>
             </xsl:choose>   
            </xsl:variable>
            <xsl:variable name="figureFromTargetId">
                <xsl:if test="matches($vCur, 'BSK\s+(\p{L}{2,5})(\s+(I|II|III|IV|V|VI|VII))?\p{P}')">
                    <xsl:analyze-string select="string($vCur/following-sibling::emphasis[1]/following-sibling::text()[1])" regex="^,?(\s+Vor)?\s+Art\.(\s+|\p{{Zs}})(\p{{N}}{{1,4}})\s+N(\s+|\p{{Zs}})(\p{{N}}{{1,4}})">
                    <xsl:matching-substring>
                      <xsl:choose>
                          <xsl:when test="contains(., 'Vor')">
                              <xsl:value-of select="concat('_VorArt', regex-group(3), '_', regex-group(5))"/>
                          </xsl:when>
                          <xsl:otherwise>
                              <xsl:value-of select="concat('_Art', regex-group(3), '_', regex-group(5))"/>
                          </xsl:otherwise>
                      </xsl:choose>         
                    </xsl:matching-substring>                      
                 </xsl:analyze-string>
                </xsl:if>
            </xsl:variable>
            <xsl:element name="ref-multi-id">
                <xsl:attribute name="multi-idref">
                    <xsl:value-of select="concat($figureToTargetId, $figureFromTargetId)"/>
                </xsl:attribute>
                <xsl:value-of select="."/>
                <xsl:if test="matches($vCur, 'BSK\s+(\p{L}{2,5})(\s+(I|II|III|IV|V|VI|VII))?\p{P}')">
                    <xsl:apply-templates select="$vCur/following-sibling::emphasis[1]" mode="match"/>                    
                </xsl:if>
                <xsl:analyze-string select="string($vCur/following-sibling::emphasis[1]/following-sibling::text()[1])" regex="(^,?(\s+Vor)?\s+Art\.(\s+|\p{{Zs}})(\p{{N}}{{1,4}})\s+N(\s+|\p{{Zs}})(\p{{N}}{{1,4}}))">
                    <xsl:matching-substring>
                        <xsl:value-of select="regex-group(1)"/>
                    </xsl:matching-substring>                        
                </xsl:analyze-string>
            </xsl:element>

        </xsl:matching-substring>
        <xsl:non-matching-substring>
            <xsl:copy-of select="."/>
        </xsl:non-matching-substring>
     </xsl:analyze-string>         
</xsl:template>
    <xsl:template match="emphasis[@role='smallcaps'][not(preceding-sibling::node()[1][self::text() and matches(., 'BSK\s+(\p{L}{2,5})(\s+(I|II|III|IV|V|VI|VII))?\p{P}')])]" mode="copy-style">
     <xsl:element name="span">
            <xsl:attribute name="class">
                <xsl:value-of select="@role"/>
            </xsl:attribute>
            <xsl:apply-templates/>
        </xsl:element>       
     </xsl:template>
     <xsl:template match="emphasis[@role='smallcaps'][preceding-sibling::node()[1][self::text() and matches(., 'BSK\s+(\p{L}{2,5})(\s+(I|II|III|IV|V|VI|VII))?\p{P}')]]" mode="match">
    <xsl:element name="span">
        <xsl:attribute name="class">
            <xsl:value-of select="@role"/>
        </xsl:attribute>
        <xsl:apply-templates/>
      </xsl:element>       
     </xsl:template>
    <xsl:template mode="copy-style" match="text()[matches(., '^,?(\s+Vor)?\s+Art\.(\s+|\p{Zs})(\p{N}{1,4})\s+N(\s+|\p{Zs})(\p{N}{1,4})') and preceding-sibling::emphasis[@role='smallcaps'][1] and matches(preceding-sibling::emphasis[1]/preceding-sibling::text()[1], 'BSK\s+(\p{L}{2,5})(\s+(I|II|III|IV|V|VI|VII))?\p{P}')]">
     <xsl:variable name="pContent" select="string(.)"/>
     <xsl:analyze-string select="$pContent" regex="^,?(\s+Vor)?\s+Art\.(\s+|\p{{Zs}})(\p{{N}}{{1,4}})\s+N(\s+|\p{{Zs}})(\p{{N}}{{1,4}})">
        <xsl:matching-substring/>
        <xsl:non-matching-substring>
            <xsl:copy-of select="."/>
        </xsl:non-matching-substring>
      </xsl:analyze-string>
     </xsl:template>