使用区分文化的比较从字符串中获取子字符串

时间:2016-02-18 15:31:53

标签: c# .net string

有没有办法使用文化敏感的相等比较从字符串中获取匹配的子字符串?例如,在en-US文化下,æae被认为是平等的。 "Encyclopædia".IndexOf("aed")评估为8,表示匹配;但是,有没有办法提取匹配的子串æd,不涉及迭代源字符串?请注意,所寻找和匹配的子串的长度可以有几个字符。

1 个答案:

答案 0 :(得分:2)

我最终通过首先调用IndexOf来获得匹配的起始位置,然后迭代地尝试识别其长度来解决这个问题。我针对匹配的热路径进行了优化,其长度与指定的子字符串相同;在这种情况下,只进行一次比较。

public static class StringExtensions
{
    public static void Find(this string source, string substring, StringComparison comparisonType, out int matchIndex, out int matchLength)
    {
        Find(source, substring, 0, source.Length, comparisonType, out matchIndex, out matchLength);
    }

    public static void Find(this string source, string substring, int searchIndex, StringComparison comparisonType, out int matchIndex, out int matchLength)
    {
        Find(source, substring, searchIndex, source.Length - searchIndex, comparisonType, out matchIndex, out matchLength);
    }

    public static void Find(this string source, string substring, int searchIndex, int searchLength, StringComparison comparisonType, out int matchIndex, out int matchLength)
    {
        matchIndex = source.IndexOf(substring, searchIndex, searchLength, comparisonType);
        if (matchIndex == -1)
        {
            matchLength = -1;
            return;
        }

        matchLength = FindMatchLength(source, substring, searchIndex, searchLength, comparisonType, matchIndex);

        // Defensive programming, but should never happen
        if (matchLength == -1)
            matchIndex = -1;
    }

    private static int FindMatchLength(string source, string substring, int searchIndex, int searchLength, StringComparison comparisonType, int matchIndex)
    {
        int matchLengthMaximum = searchLength - (matchIndex - searchIndex);
        int matchLengthInitial = Math.Min(substring.Length, matchLengthMaximum);

        // Hot path: match length is same as substring length.
        if (Compare(source, matchIndex, matchLengthInitial, substring, 0, substring.Length, comparisonType) == 0)
            return matchLengthInitial;

        int matchLengthDecrementing = matchLengthInitial - 1;
        int matchLengthIncrementing = matchLengthInitial + 1;

        while (matchLengthDecrementing >= 0 || matchLengthIncrementing <= matchLengthMaximum)
        {
            if (matchLengthDecrementing >= 0)
            {
                if (Compare(source, matchIndex, matchLengthDecrementing, substring, 0, substring.Length, comparisonType) == 0)
                    return matchLengthDecrementing;

                matchLengthDecrementing--;
            }

            if (matchLengthIncrementing <= matchLengthMaximum)
            {
                if (Compare(source, matchIndex, matchLengthIncrementing, substring, 0, substring.Length, comparisonType) == 0)
                    return matchLengthIncrementing;

                matchLengthIncrementing++;
            }
        }

        // Should never happen
        return -1;
    }

    private static int Compare(string strA, int indexA, int lengthA, string strB, int indexB, int lengthB, StringComparison comparisonType)
    {
        switch (comparisonType)
        {
            case StringComparison.CurrentCulture:
                return CultureInfo.CurrentCulture.CompareInfo.Compare(strA, indexA, lengthA, strB, indexB, lengthB, CompareOptions.None);

            case StringComparison.CurrentCultureIgnoreCase:
                return CultureInfo.CurrentCulture.CompareInfo.Compare(strA, indexA, lengthA, strB, indexB, lengthB, CompareOptions.IgnoreCase);

            case StringComparison.InvariantCulture:
                return CultureInfo.InvariantCulture.CompareInfo.Compare(strA, indexA, lengthA, strB, indexB, lengthB, CompareOptions.None);

            case StringComparison.InvariantCultureIgnoreCase:
                return CultureInfo.InvariantCulture.CompareInfo.Compare(strA, indexA, lengthA, strB, indexB, lengthB, CompareOptions.IgnoreCase);

            case StringComparison.Ordinal:
                return CultureInfo.InvariantCulture.CompareInfo.Compare(strA, indexA, lengthA, strB, indexB, lengthB, CompareOptions.Ordinal);

            case StringComparison.OrdinalIgnoreCase:
                return CultureInfo.InvariantCulture.CompareInfo.Compare(strA, indexA, lengthA, strB, indexB, lengthB, CompareOptions.OrdinalIgnoreCase);

            default:
                throw new ArgumentException("The string comparison type passed in is currently not supported.", nameof(comparisonType));
        }
    }
}

样品使用:

int index, length;
source.Find(remove, StringComparison.CurrentCulture, out index, out length);
string clean = index < 0 ? source : source.Remove(index, length);
相关问题