我的字符串是这样的:
string str = "Psppsp palm springs airport, 3400 e tahquitz canyon way, Palm springs, CA, US, 92262-6966 psppsp";
我得到字符串" psppsp"单独并需要将它与str中的第一个和最后一个单词进行比较,如果找到(在第一个或最后一个单词),需要将其从str中删除。
我需要知道最佳和最快的方法。
答案 0 :(得分:0)
禁食的方式是O(n)。下面是代码示例,可以对其进行改进。
string str = "Psppsp palm springs airport, 3400 e tahquitz canyon way, Palm springs, CA, US, 92262-6966 psppsp";
string word = "psppsp";
// Check if str and word are equals
if (str == word)
{
str = "";
}
// Check Firt word in str
if (str.Length > word.Length)
{
bool equal = true;
for (int i = 0; i < word.Length; i++)
{
if (str[i] != word[i])
{
equal = false;
break;
}
}
if (equal && str[word.Length] == ' ')
{
str = str.Substring(word.Length);
}
}
// Check Last word in str
if (str.Length > word.Length)
{
bool equal = true;
for (int i = word.Length - 1; i >= 0; i--)
{
if (str[str.Length - word.Length + i] != word[i])
{
equal = false;
break;
}
}
if (equal)
{
str = str.Substring(0, str.Length - word.Length);
}
}
答案 1 :(得分:0)
有几种方法可以做到这一点。这是使用正则表达式的一种方式。您可以预编译正则表达式,如果您在许多字符串上执行此操作,将加快速度:
string str = "Psppsp palm springs airport, 3400 e tahquitz canyon way, Palm springs, CA, US, 92262-6966 psppsp";
string match = "psppsp";
// Build 2 re-usable regexes
string pattern1 = "^" + match + "\\s*";
string pattern2 = "\\s*" + match + "$";
Regex rgx1 = new Regex(pattern1, RegexOptions.Compiled | RegexOptions.IgnoreCase);
Regex rgx2 = new Regex(pattern2, RegexOptions.Compiled | RegexOptions.IgnoreCase);
// Apply the 2 regexes
str = rgx1.Replace(rgx2.Replace(str, ""), "");
如果匹配将无法在字符串的其他位置进行,则可以使用linq。这涉及将split返回的数组转换为列表:
// Convert to list
var tempList = new List<string>(str.Split());
// Remove all occurences of match
tempList.RemoveAll(x => String.Compare(x, match, StringComparison.OrdinalIgnoreCase) == 0);
// Convert list back to string
str = String.Join(" ", tempList.ToArray());
或者,更简单的方法
if (str.StartsWith(match, StringComparison.InvariantCultureIgnoreCase)) {
str = str.Substring(match.Length);
}
if (str.EndsWith(match, StringComparison.InvariantCultureIgnoreCase)) {
str = str.Substring(0, str.Length - match.Length);
}
str = str.Trim();
不确定哪些(如果有的话)是最好的&#34;。我喜欢最后一个。
答案 2 :(得分:0)
你可以使用str.StartsWith(x),str.EndsWith(x),str.Contains(x),str.IndexOf(x)来查找和定位你的搜索字符串和str.Substring(start,len)来改变字符串。有许多方法可以实现这种字符串操作,但是你要求...
最好,最快:让我们使用一些完全安全的“不安全”代码,这样我们就可以使用指针。
// note this is an extension method so you need to include it in a static class
public unsafe static string RemoveCaseInsensitive(this string source, string remove)
{
// convert to lower to enable case insensitive comparison
string sourceLower = source.ToLower();
// define working pointers
int srcPos = 0;
int srcLen = source.Length;
int dstPos = 0;
int rmvPos = 0;
int rmvLen = remove.Length;
// create char arrays to work with in the 'unsafe' code
char[] destChar = new char[srcLen];
fixed (char* srcPtr = source, srcLwrPtr = sourceLower, rmvPtr = remove, dstPtr = destChar)
{
// loop through each char in the source array
while (srcPos < srcLen)
{
// copy the char and move dest position on
*(dstPtr + dstPos) = *(srcPtr + srcPos);
dstPos++;
// compare source char to remove char
// note we're comparing against the sourceLower but copying from source so that
// a case insensitive remove preserves the rest of the string's original case
if (*(srcLwrPtr + srcPos) == *(rmvPtr + rmvPos))
{
rmvPos++;
if (rmvPos == rmvLen)
{
// if the whole string has been matched
// reverse dest position back by length of remove string
dstPos -= rmvPos;
rmvPos = 0;
}
}
else
{
rmvPos = 0;
}
// move to next char in source
srcPos++;
}
}
// return the string
return new string(destChar, 0, dstPos);
}
用法:
str.RemoveCaseInsensitive("Psppsp"); // this will remove all instances throughout the string
str.RemoveCaseInsensitive("Psppsp "); // space included at the end so in your example will remove the first instance and trailing space.
str.RemoveCaseInsensitive(" psppsp"); // space included at the start so in your example will remove the final instance and leading space.
为什么要使用您可能会问的不安全代码?处理数组时,每次指向该数组中的元素时,都会进行边界检查。所以str [1],str [2],str [3]等都有开销。因此,当您处理对数千个字符进行此类检查时,它会累加起来。使用不安全的代码可以使用指针直接访问内存。没有边界检查,或者其他任何减慢操作的因素。性能差异很大。
作为性能差异的一个例子,我创建了两个版本。一个安全使用标准字符串指针和不安全的。我已经创建了一个字符串,通过递归添加数千个字符串来保留和删除。结果很清楚,不安全的版本在安全版本的一半时间内完成。除了安全和不安全之外,这些方法是相同的。
public static class StringExtensions
{
public unsafe static string RemoveUnsafe(this string source, string remove)
{
// convert to lower to enable case insensitive comparison
string sourceLower = source.ToLower();
// define working pointers
int srcPos = 0;
int srcLen = source.Length;
int dstPos = 0;
int rmvPos = 0;
int rmvLen = remove.Length;
// create char arrays to work with in the 'unsafe' code
char[] destChar = new char[srcLen];
fixed (char* srcPtr = source, srcLwrPtr = sourceLower, rmvPtr = remove, dstPtr = destChar)
{
// loop through each char in the source array
while (srcPos < srcLen)
{
// copy the char and move dest position on
*(dstPtr + dstPos) = *(srcPtr + srcPos);
dstPos++;
// compare source char to remove char
// note we're comparing against the sourceLower but copying from source so that
// a case insensitive remove preserves the rest of the string's original case
if (*(srcLwrPtr + srcPos) == *(rmvPtr + rmvPos))
{
rmvPos++;
if (rmvPos == rmvLen)
{
// if the whole string has been matched
// reverse dest position back by length of remove string
dstPos -= rmvPos;
rmvPos = 0;
}
}
else
{
rmvPos = 0;
}
// move to next char in source
srcPos++;
}
}
// return the string
return new string(destChar, 0, dstPos);
}
public static string RemoveSafe(this string source, string remove)
{
// convert to lower to enable case insensitive comparison
string sourceLower = source.ToLower();
string removeLower = remove.ToLower();
// define working pointers
int srcPos = 0;
int srcLen = source.Length;
int dstPos = 0;
int rmvPos = 0;
int rmvLen = remove.Length;
// create char arrays to work with in the 'unsafe' code
char[] destChar = new char[srcLen];
// loop through each char in the source array
while (srcPos < srcLen)
{
// copy the char and move dest position on
destChar[dstPos] = source[srcPos];
dstPos++;
// compare source char to remove char
// note we're comparing against the sourceLower but copying from source so that
// a case insensitive remove preserves the rest of the string's original case
if (sourceLower[srcPos] == removeLower[rmvPos])
{
rmvPos++;
if (rmvPos == rmvLen)
{
// if the whole string has been matched
// reverse dest position back by length of remove string
dstPos -= rmvPos;
rmvPos = 0;
}
}
else
{
rmvPos = 0;
}
// move to next char in source
srcPos++;
}
// return the string
return new string(destChar, 0, dstPos);
}
}
这是基准测试:
internal static class StringRemoveTests
{
private static string CreateString()
{
string x = "xxxxxxxxxxxxxxxxxxxx";
string y = "GoodBye";
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 1000000; i++)
sb.Append(i % 3 == 0 ? y : x);
return sb.ToString();
}
private static int RunBenchMarkUnsafe()
{
string str = CreateString();
DateTime start = DateTime.Now;
string str2 = str.RemoveUnsafe("goodBYE");
DateTime end = DateTime.Now;
return (int)(end - start).TotalMilliseconds;
}
private static int RunBenchMarkSafe()
{
string str = CreateString();
DateTime start = DateTime.Now;
string str2 = str.RemoveSafe("goodBYE");
DateTime end = DateTime.Now;
return (int)(end - start).TotalMilliseconds;
}
public static void RunBenchmarks()
{
Console.WriteLine("Safe version: " + RunBenchMarkSafe());
Console.WriteLine("Unsafe version: " + RunBenchMarkUnsafe());
}
}
class Program
{
static void Main(string[] args)
{
StringRemoveTests.RunBenchmarks();
Console.ReadLine();
}
}
输出:(结果是毫秒)
// 1st run
Safe version: 569
Unsafe version: 260
// 2nd run
Safe version: 709
Unsafe version: 329
// 3rd run
Safe version: 486
Unsafe version: 279