没有System.Web的HTML解码

时间:2013-06-11 17:06:26

标签: c# .net sql-server clr

我正在研究SQL Server 2008 R2的CLR表值函数。我需要在一个点上对一个字符串进行HTMLDecode,但这是有问题的b / c依赖于System.Web,它不是SQL Server的支持程序集。

有人能想出更好的方法来进行HTML解码吗?

仅供参考:SQL Server 2008 CLR最多只支持.NET 3.5,因此system.net.webutility不起作用。

1 个答案:

答案 0 :(得分:1)

此外,您可以使用反射器直接从WebUtility获取代码(请不要因为编码风格,反映的东西而责怪我):

public class WebUtility {
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  private static class HtmlEntities {
  private static string[] _entitiesList = new string[]
        {
            "\"-quot",
            "&-amp",
            "'-apos",
            "<-lt",
            ">-gt",
            "\u00a0-nbsp",
            "¡-iexcl",
            "¢-cent",
            "£-pound",
            "¤-curren",
            "¥-yen",
            "¦-brvbar",
            "§-sect",
            "¨-uml",
            "©-copy",
            "ª-ordf",
            "«-laquo",
            "¬-not",
            "­-shy",
            "®-reg",
            "¯-macr",
            "°-deg",
            "±-plusmn",
            "²-sup2",
            "³-sup3",
            "´-acute",
            "µ-micro",
            "¶-para",
            "·-middot",
            "¸-cedil",
            "¹-sup1",
            "º-ordm",
            "»-raquo",
            "¼-frac14",
            "½-frac12",
            "¾-frac34",
            "¿-iquest",
            "À-Agrave",
            "Á-Aacute",
            "Â-Acirc",
            "Ã-Atilde",
            "Ä-Auml",
            "Å-Aring",
            "Æ-AElig",
            "Ç-Ccedil",
            "È-Egrave",
            "É-Eacute",
            "Ê-Ecirc",
            "Ë-Euml",
            "Ì-Igrave",
            "Í-Iacute",
            "Î-Icirc",
            "Ï-Iuml",
            "Ð-ETH",
            "Ñ-Ntilde",
            "Ò-Ograve",
            "Ó-Oacute",
            "Ô-Ocirc",
            "Õ-Otilde",
            "Ö-Ouml",
            "×-times",
            "Ø-Oslash",
            "Ù-Ugrave",
            "Ú-Uacute",
            "Û-Ucirc",
            "Ü-Uuml",
            "Ý-Yacute",
            "Þ-THORN",
            "ß-szlig",
            "à-agrave",
            "á-aacute",
            "â-acirc",
            "ã-atilde",
            "ä-auml",
            "å-aring",
            "æ-aelig",
            "ç-ccedil",
            "è-egrave",
            "é-eacute",
            "ê-ecirc",
            "ë-euml",
            "ì-igrave",
            "í-iacute",
            "î-icirc",
            "ï-iuml",
            "ð-eth",
            "ñ-ntilde",
            "ò-ograve",
            "ó-oacute",
            "ô-ocirc",
            "õ-otilde",
            "ö-ouml",
            "÷-divide",
            "ø-oslash",
            "ù-ugrave",
            "ú-uacute",
            "û-ucirc",
            "ü-uuml",
            "ý-yacute",
            "þ-thorn",
            "ÿ-yuml",
            "Œ-OElig",
            "œ-oelig",
            "Š-Scaron",
            "š-scaron",
            "Ÿ-Yuml",
            "ƒ-fnof",
            "ˆ-circ",
            "˜-tilde",
            "Α-Alpha",
            "Β-Beta",
            "Γ-Gamma",
            "Δ-Delta",
            "Ε-Epsilon",
            "Ζ-Zeta",
            "Η-Eta",
            "Θ-Theta",
            "Ι-Iota",
            "Κ-Kappa",
            "Λ-Lambda",
            "Μ-Mu",
            "Ν-Nu",
            "Ξ-Xi",
            "Ο-Omicron",
            "Π-Pi",
            "Ρ-Rho",
            "Σ-Sigma",
            "Τ-Tau",
            "Υ-Upsilon",
            "Φ-Phi",
            "Χ-Chi",
            "Ψ-Psi",
            "Ω-Omega",
            "α-alpha",
            "β-beta",
            "γ-gamma",
            "δ-delta",
            "ε-epsilon",
            "ζ-zeta",
            "η-eta",
            "θ-theta",
            "ι-iota",
            "κ-kappa",
            "λ-lambda",
            "μ-mu",
            "ν-nu",
            "ξ-xi",
            "ο-omicron",
            "π-pi",
            "ρ-rho",
            "ς-sigmaf",
            "σ-sigma",
            "τ-tau",
            "υ-upsilon",
            "φ-phi",
            "χ-chi",
            "ψ-psi",
            "ω-omega",
            "ϑ-thetasym",
            "ϒ-upsih",
            "ϖ-piv",
            "\u2002-ensp",
            "\u2003-emsp",
            "\u2009-thinsp",
            "‌-zwnj",
            "‍-zwj",
            "‎-lrm",
            "‏-rlm",
            "–-ndash",
            "—-mdash",
            "‘-lsquo",
            "’-rsquo",
            "‚-sbquo",
            "“-ldquo",
            "”-rdquo",
            "„-bdquo",
            "†-dagger",
            "‡-Dagger",
            "•-bull",
            "…-hellip",
            "‰-permil",
            "′-prime",
            "″-Prime",
            "‹-lsaquo",
            "›-rsaquo",
            "‾-oline",
            "⁄-frasl",
            "€-euro",
            "ℑ-image",
            "℘-weierp",
            "ℜ-real",
            "™-trade",
            "ℵ-alefsym",
            "←-larr",
            "↑-uarr",
            "→-rarr",
            "↓-darr",
            "↔-harr",
            "↵-crarr",
            "⇐-lArr",
            "⇑-uArr",
            "⇒-rArr",
            "⇓-dArr",
            "⇔-hArr",
            "∀-forall",
            "∂-part",
            "∃-exist",
            "∅-empty",
            "∇-nabla",
            "∈-isin",
            "∉-notin",
            "∋-ni",
            "∏-prod",
            "∑-sum",
            "−-minus",
            "∗-lowast",
            "√-radic",
            "∝-prop",
            "∞-infin",
            "∠-ang",
            "∧-and",
            "∨-or",
            "∩-cap",
            "∪-cup",
            "∫-int",
            "∴-there4",
            "∼-sim",
            "≅-cong",
            "≈-asymp",
            "≠-ne",
            "≡-equiv",
            "≤-le",
            "≥-ge",
            "⊂-sub",
            "⊃-sup",
            "⊄-nsub",
            "⊆-sube",
            "⊇-supe",
            "⊕-oplus",
            "⊗-otimes",
            "⊥-perp",
            "⋅-sdot",
            "⌈-lceil",
            "⌉-rceil",
            "⌊-lfloor",
            "⌋-rfloor",
            "〈-lang",
            "〉-rang",
            "◊-loz",
            "♠-spades",
            "♣-clubs",
            "♥-hearts",
            "♦-diams"
        };
  private static Dictionary<string, char> _lookupTable = WebUtility.HtmlEntities.GenerateLookupTable();
  private static Dictionary<string, char> GenerateLookupTable() {
    Dictionary<string, char> dictionary = new Dictionary<string, char>(StringComparer.Ordinal);
    string[] entitiesList = WebUtility.HtmlEntities._entitiesList;
    for (int i = 0; i < entitiesList.Length; i++) {
      string text = entitiesList[i];
      dictionary.Add(text.Substring(2), text[0]);
    }
    return dictionary;
  }
  public static char Lookup(string entity) {
    char result;
    WebUtility.HtmlEntities._lookupTable.TryGetValue(entity, out result);
    return result;
  }
}
  private enum UnicodeDecodingConformance {
    Auto,
    Strict,
    Compat,
    Loose
  }

  private static char[] _htmlEntityEndingChars = new char[] { ';', '&' };
  private static readonly UnicodeDecodingConformance _htmlDecodeConformance = UnicodeDecodingConformance.Auto;

  public static string HtmlDecode(string value) {
    if (string.IsNullOrEmpty(value)) {
      return value;
    }
    if (!WebUtility.StringRequiresHtmlDecoding(value)) {
      return value;
    }
    StringWriter stringWriter = new StringWriter(CultureInfo.InvariantCulture);
    WebUtility.HtmlDecode(value, stringWriter);
    return stringWriter.ToString();
  }

  private static bool StringRequiresHtmlDecoding(string s) {
    if (WebUtility._htmlDecodeConformance == UnicodeDecodingConformance.Compat) {
      return s.IndexOf('&') >= 0;
    }
    for (int i = 0; i < s.Length; i++) {
      char c = s[i];
      if (c == '&' || char.IsSurrogate(c)) {
        return true;
      }
    }
    return false;
  }

  private static void ConvertSmpToUtf16(uint smpChar, out char leadingSurrogate, out char trailingSurrogate) {
    int num = (int)(smpChar - 65536u);
    leadingSurrogate = (char)(num / 1024 + 55296);
    trailingSurrogate = (char)(num % 1024 + 56320);
  }

  public static void HtmlDecode(string value, TextWriter output) {
    if (value == null) {
      return;
    }
    if (output == null) {
      throw new ArgumentNullException("output");
    }
    if (!WebUtility.StringRequiresHtmlDecoding(value)) {
      output.Write(value);
      return;
    }
    int length = value.Length;
    int i = 0;
    while (i < length) {
      char c = value[i];
      if (c != '&') {
        goto IL_1B6;
      }
      int num = value.IndexOfAny(WebUtility._htmlEntityEndingChars, i + 1);
      if (num <= 0 || value[num] != ';') {
        goto IL_1B6;
      }
      string text = value.Substring(i + 1, num - i - 1);
      if (text.Length > 1 && text[0] == '#') {
        uint num2;
        bool flag;
        if (text[1] == 'x' || text[1] == 'X') {
          flag = uint.TryParse(text.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out num2);
        } else {
          flag = uint.TryParse(text.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out num2);
        }
        if (flag) {
          switch (WebUtility._htmlDecodeConformance) {
            case UnicodeDecodingConformance.Strict:
              flag = (num2 < 55296u || (57343u < num2 && num2 <= 1114111u));
              break;
            case UnicodeDecodingConformance.Compat:
              flag = (0u < num2 && num2 <= 65535u);
              break;
            case UnicodeDecodingConformance.Loose:
              flag = (num2 <= 1114111u);
              break;
            default:
              flag = false;
              break;
          }
        }
        if (!flag) {
          goto IL_1B6;
        }
        if (num2 <= 65535u) {
          output.Write((char)num2);
        } else {
          char value2;
          char value3;
          WebUtility.ConvertSmpToUtf16(num2, out value2, out value3);
          output.Write(value2);
          output.Write(value3);
        }
        i = num;
      } else {
        i = num;
        char c2 = WebUtility.HtmlEntities.Lookup(text);
        if (c2 != '\0') {
          c = c2;
          goto IL_1B6;
        }
        output.Write('&');
        output.Write(text);
        output.Write(';');
      }
    IL_1BD:
      i++;
      continue;
    IL_1B6:
      output.Write(c);
      goto IL_1BD;
    }
  }
}