字符串按长度分割,仅按最近的空格分割

时间:2013-08-23 06:26:40

标签: c# string linq ienumerable

我正在使用文字

var data = "âô¢¬ôè÷¢ : ªîø¢è¤ô¢ - ã¿ñ¬ô ñèù¢ ªð¼ñ£÷¢ ï¤ôñ¢,«ñø¢è¤ô¢ - ªð¼ñ£÷¢ ñèù¢ ÝÁºèñ¢ ï¤ô袰ñ¢ ñ¤ì¢ì£ Üò¢òñ¢ ªð¼ñ£ñ¢ð좮 è¤ó£ñ âô¢¬ô袰ñ¢,õìè¢è¤ô¢ - ÝÁºèñ¢ ï¤ôñ¢,è¤öè¢è¤ô¢ - ô좲ñ¤ ï¤ôñ¢ ñø¢Áñ¢ 1,22 ªê ï¤ôñ¢ ð£î¢î¤òñ¢";

我正在使用扩展方法来拆分字符串

public static IEnumerable<string> EnumByLength(this string s, int length)
{
    for (int i = 0; i < s.Length; i += length)
    {
        if (i + length <= s.Length)
        {
            yield return s.Substring(i, length);
        }
        else
        {
            yield return s.Substring(i);
        }
    }
}
public static string[] SplitByLength(this string s, int maxLen)
{
    var v = EnumByLength(s, maxLen);
    if (v == null)
        return new string[] { s };
    else
        return s.EnumByLength(maxLen).ToArray();
}

现在我的问题是

要按最大长度150拆分此字符串,并且拆分必须仅由其中的最近空间完成..(在150之前或在150之后..不在一句话。

如何?

5 个答案:

答案 0 :(得分:3)

我的版本:

// Enumerate by nearest space
// Split String value by closest to length spaces
// e.g. for length = 3 
// "abcd efghihjkl m n p qrstsf" -> "abcd", "efghihjkl", "m n", "p", "qrstsf" 
public static IEnumerable<String> EnumByNearestSpace(this String value, int length) {
  if (String.IsNullOrEmpty(value))
    yield break;

  int bestDelta = int.MaxValue;
  int bestSplit = -1;

  int from = 0;

  for (int i = 0; i < value.Length; ++i) {
    var Ch = value[i];

    if (Ch != ' ')
      continue;

    int size = (i - from);
    int delta = (size - length > 0) ? size - length : length - size;

    if ((bestSplit < 0) || (delta < bestDelta)) {
      bestSplit = i;
      bestDelta = delta;
    }
    else {
      yield return value.Substring(from, bestSplit - from);

      i = bestSplit;

      from = i + 1;
      bestSplit = -1;
      bestDelta = int.MaxValue;
    }
  }

  // String's tail
  if (from < value.Length) {
    if (bestSplit >= 0) {
      if (bestDelta < value.Length - from)
        yield return value.Substring(from, bestSplit - from);

      from = bestSplit + 1;
    }

    if (from < value.Length)
      yield return value.Substring(from);
  }
}

...

var list = data.EnumByNearestSpace(150).ToList();

答案 1 :(得分:1)

我的版本

var data = "âô¢¬ôè÷¢ : ªîø¢è¤ô¢ - ã¿ñ¬ô ñèù¢ ªð¼ñ£÷¢ ï¤ôñ¢,«ñø¢è¤ô¢ - ªð¼ñ£÷¢ ñèù¢ ÝÁºèñ¢ ï¤ô袰ñ¢ ñ¤ì¢ì£ Üò¢òñ¢ ªð¼ñ£ñ¢ð좮 è¤ó£ñ âô¢¬ô袰ñ¢,õìè¢è¤ô¢ - ÝÁºèñ¢ ï¤ôñ¢,è¤öè¢è¤ô¢ - ô좲ñ¤ ï¤ôñ¢ ñø¢Áñ¢ 1,22 ªê ï¤ôñ¢ ð£î¢î¤òñ¢";

var indexes = new List<int>();
var lastFoundIndex = 0;
while((lastFoundIndex = data.IndexOf(' ', lastFoundIndex + 1)) != -1)
{
    indexes.Add(lastFoundIndex);
}

int intNum = 150;
int index;
var newList = new List<string>();
while ((index = indexes.Where(x => x > intNum - 150 &&  x <= intNum).LastOrDefault()) != 0)
{
    var firstIndex = newList.Count == 0 ? 0 : index;
    var lastIndex = firstIndex + 150 >= data.Length ? data.Length - 150 : intNum;
    newList.Add(data.Substring(intNum - 150, lastIndex));
    intNum += 150;
}

newList包含拆分字符串

答案 2 :(得分:0)

你去了:

 for (int i = 0; i < s.Length; i += length)
    {
        int index=s.IndexOf(" ",i, s.Length-i)

        if (index!=-1 && index + length <= s.Length)
        {
            i =index;           
            yield return s.Substring(index, length);
        }
        else
        {
            index= s.LastIndexOf(" ", 0, i);
            if(index==-1)
                yield return s.Substring(i);
            else
            {
                i = index;
                yield return s.Substring(i);
            }
        }
    }

答案 3 :(得分:0)

老话题,但我只是遇到了同样的问题,并试图自己解决。 这是我的方法,如果任何单词超过当前限制,它也会抛出错误。

static void Main(string[] args)
{
    string veryLongText = @"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.";

    var result = SplitString(veryLongText, 20);
    if (result != null)
        foreach (var t in result)
            Console.WriteLine($"{t.Length, 3} : '{t}'");

    Console.ReadLine();
}

private static List<string> SplitString(string data, int length)
{
    List<string> result = new List<string>();

    if (data.Split(' ').Any(x => x.Length > length))
    {
        Console.WriteLine("ERROR, SINGLE WORD EXCEED THE CURRENT LIMIT!");
        return null;
    }

    int lastSpace = 0;
    int currentSpace = 0;
    int newLinePos = 0;

    for (int i = 0; i < data.Length; i++)
    {
        if (data.Length - newLinePos <= length)
        {
            result.Add(data.Substring(newLinePos, data.Length - newLinePos));
            break;
        }
        if (data[i] == ' ')
        {
            lastSpace = currentSpace;
            currentSpace = i;
            if (currentSpace - newLinePos > length)
            {
                result.Add(data.Substring(newLinePos, lastSpace - newLinePos));
                newLinePos = lastSpace + 1;
            }
        }
    }

    return result;
}

答案 4 :(得分:-1)

试试这个,这段代码会将长句子分成行列表,直到小于或等于chunksize的单词:

    private List<string> splitIntoChunks(string toSplit, int chunkSize)
    {
        List<string> splittedLines = new List<string>();

        string [] toSplitAr = toSplit.Split(new char[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries);

        for (int i = 0; i < toSplitAr.Length; )
        {
            string line = "";
            string prefix = "";

            for (int linesize = 0; linesize <= chunkSize;)
            {
                if (i >= toSplitAr.Length) break; //i should not exceed splited array
                prefix = (line == "" ? "" : " "); //prefix with space if not first word in line
                linesize += toSplitAr[i].Length;
                if (linesize > chunkSize) break; //line size should not exceed chunksize
                line += (prefix  + toSplitAr[i]);
                i++;
            }

            splittedLines.Add(line);
        }

        return splittedLines;
    }