检测XML字段中的URL

时间:2013-03-08 01:32:58

标签: xml xslt hyperlink detection

XSLT可以通过脚本或其他方式检测是否存在:

"http://..."

<a>"http..."</a> 

在消息字段中并将其设置为链接。我已经做好了最坏的准备!

1 个答案:

答案 0 :(得分:0)

XPath 2.0提供了RegEx处理的重要功能。 在XSLT 2.0+中使用它可以写

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:xs="http://www.w3.org/2001/XMLSchema">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:variable name="vurlRegex"
 >(http|https|ftp|mailto)://(\p{L}+\.)+\p{L}+(/\p{L}([./?%&amp;=])*)?</xsl:variable>

 <xsl:template match="/*/*[not(matches(.,$vurlRegex))]">
  <xsl:sequence select='"Warning:", ., " isn&apos;t a valid Uri&#xA;"'/>
 </xsl:template>

 <xsl:template match="text()"/>
</xsl:stylesheet>

对以下XML文档应用此转换时:

<t>
 <a>http://abc.com</a>
 <b>\`\```^^^</b>
</t>

产生了想要的正确结果

Warning:<b>\`\```^^^</b> isn't a valid Uri

请注意

使用的RegEx可能不是匹配Url最精确的 - 您可以用更好更精确的替换它。


可以使用xsl:function以更可重复的形式表达此有用的词汇检查:

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:my="my:my">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:variable name="vurlRegex"
 >(http|https|ftp|mailto)://(\p{L}+\.)+\p{L}+(/\p{L}([./?%&amp;=])*)?</xsl:variable>

 <xsl:template match="/*/*[not(my:IsValidUrl(.))]">
  <xsl:sequence select='"Warning:", ., " isn&apos;t a valid Uri&#xA;"'/>
 </xsl:template>

 <xsl:template match="text()"/>

 <xsl:function name="my:IsValidUrl" as="xs:boolean">
  <xsl:param name="pStr" as="xs:string"/>

  <xsl:sequence select="matches($pStr, $vurlRegex)"/>
 </xsl:function>
</xsl:stylesheet>

<强>更新

这里我使用Michael Kay在评论中提供的表达式引用 - 这是from a W3C Note published by the XSD working group, separately from the XSD specification

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:my="my:my">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:variable name="vurlRegex"
 >((([A-Za-z])[A-Za-z0-9+\-\.]*)
   :((//(((([A-Za-z0-9\-\._~!$&amp;'()*+,;=:]|(%[0-9A-Fa-f][0-9A-Fa-f]))*@))?
   ((\[(((((([0-9A-Fa-f]){0,4}:)){6}((([0-9A-Fa-f]){0,4}:([0-9A-Fa-f]){0,4})
   |(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])
   |(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])
   |(1([0-9]){2})|(2[0-4][0-9])
   |(25[0-5]))\.([0-9]|([1-9][0-9])
   |(1([0-9]){2})|(2[0-4][0-9])
   |(25[0-5])))))
   |(::((([0-9A-Fa-f]){0,4}:)){5}((([0-9A-Fa-f]){0,4}:([0-9A-Fa-f]){0,4})
   |(([0-9]|([1-9][0-9])|(1([0-9]){2})
   |(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})
   |(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])
   |(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])
   |(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))
   |((([0-9A-Fa-f]){0,4})?::((([0-9A-Fa-f]){0,4}:)){4}((([0-9A-Fa-f]){0,4}:([0-9A-Fa-f]){0,4})
   |(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]
   |([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])
   |(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])
   |(25[0-5])))))|(((((([0-9A-Fa-f]){0,4}:))?([0-9A-Fa-f]){0,4}))?::((([0-9A-Fa-f]){0,4}:)){3}((([0-9A-Fa-f]){0,4}:([0-9A-Fa-f]){0,4})
   |(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]
   |([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])
   |(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})
   |(2[0-4][0-9])|(25[0-5])))))|(((((([0-9A-Fa-f]){0,4}:)){0,2}([0-9A-Fa-f]){0,4}))?
   ::((([0-9A-Fa-f]){0,4}:)){2}((([0-9A-Fa-f]){0,4}:([0-9A-Fa-f]){0,4})|
   (([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|
   ([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})
   |(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})
   |(2[0-4][0-9])|(25[0-5])))))|(((((([0-9A-Fa-f]){0,4}:)){0,3}([0-9A-Fa-f]){0,4}))
   ?::([0-9A-Fa-f]){0,4}:((([0-9A-Fa-f]){0,4}:([0-9A-Fa-f]){0,4})|(([0-9]|([1-9][0-9])
   |(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})
   |(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])
   |(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))
   |(((((([0-9A-Fa-f]){0,4}:)){0,4}([0-9A-Fa-f]){0,4}))?::((([0-9A-Fa-f]){0,4}:([0-9A-Fa-f]){0,4})
   |(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})
   |(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]
   |([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))
   |(((((([0-9A-Fa-f]){0,4}:)){0,5}([0-9A-Fa-f]){0,4}))?::([0-9A-Fa-f]){0,4})
   |(((((([0-9A-Fa-f]){0,4}:)){0,6}([0-9A-Fa-f]){0,4}))?::))|(v([0-9A-Fa-f])+\.(([A-Za-z0-9\-\._~]
   |[!$&amp;'()*+,;=]|:))+))\])|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])
   |(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]
   |([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))|(([A-Za-z0-9\-\._~]|(%[0-9A-Fa-f][0-9A-Fa-f])
   |[!$&amp;'()*+,;=]))*)((:([0-9])*))?)((/(([A-Za-z0-9\-\._~!$&amp;'()*+,;=:@]|(%[0-9A-Fa-f][0-9A-Fa-f])))*))*)
   |(/(((([A-Za-z0-9\-\._~!$&amp;'()*+,;=:@]|(%[0-9A-Fa-f][0-9A-Fa-f])))+((/(([A-Za-z0-9\-\._~!$&amp;'()*+,;=:@]|(%[0-9A-Fa-f][0-9A-Fa-f])))*))*))?)
   |((([A-Za-z0-9\-\._~!$&amp;'()*+,;=:@]|(%[0-9A-Fa-f][0-9A-Fa-f])))+((/(([A-Za-z0-9\-\._~!$&amp;'()*+,;=:@]|(%[0-9A-Fa-f][0-9A-Fa-f])))*))*)
   |)((\?((([A-Za-z0-9\-\._~!$&amp;'()*+,;=:@]|(%[0-9A-Fa-f][0-9A-Fa-f]))|/
   |\?))*))?((#((([A-Za-z0-9\-\._~!$&amp;'()*+,;=:@]|(%[0-9A-Fa-f][0-9A-Fa-f]))|/|\?))*))?)</xsl:variable>

 <xsl:template match="/*/*[not(my:IsValidUrl(.))]">
  <xsl:sequence select='"Warning:", ., " isn&apos;t a valid Uri&#xA;"'/>
 </xsl:template>

 <xsl:template match="text()"/>

 <xsl:function name="my:IsValidUrl" as="xs:boolean">
  <xsl:param name="pStr" as="xs:string"/>

  <xsl:sequence select="matches($pStr, $vurlRegex, 'x')"/>
 </xsl:function>
</xsl:stylesheet>