用空格替换字符串中的多个非ASCII字符

时间:2019-04-18 22:06:15

标签: sql tsql

我试图用一个空格替换多个非ascii字符,或者只是删除T-SQL中的字符。

'øsmeøø' string should be replaced be 'sme'

我尝试使用以下内容

 SELECT STUFF('smeøø string',PATINDEX('%[' + CHAR(127)+ '-' +CHAR(255)+']%
 'COLLATE Latin1_General_100_BIN2,'smeøø string'),1,'')

这将返回以下内容,但我希望它继续直到Patindex为0

 smeø string

http://www.sqlfiddle.com/#!18/9eecb/41689

1 个答案:

答案 0 :(得分:0)

于20190419更新,以演示不需要您创建NGrams8K函数的解决方案

首先获取NGrams8K的副本

删除“不良”字符:

DECLARE @string VARCHAR(1000) = 'øsmøeøø' COLLATE Latin1_General_100_BIN2;

SELECT 
(
  SELECT    ng.token+''
  FROM      dbo.ngrams8k(@string,1) AS ng
  WHERE     ASCII(ng.token) < 127
  ORDER BY ng.position
  FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');

用空格替换“坏”字符:

SELECT 
(
  SELECT    CASE WHEN ASCII(ng.token) < 127 THEN ng.token ELSE ' ' END+''
  FROM      dbo.ngrams8k(@string,1) AS ng
  ORDER BY ng.position
  FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');

...,如果您正在运行的是2017 ++版本,并且您想要的代码更简洁:

-- Remove bad characters
SELECT STRING_AGG(ng.token,'') WITHIN GROUP (ORDER BY ng.position)
FROM   dbo.ngrams8k(@string,1) AS ng
WHERE  ASCII(ng.token) < 127;

-- Replace bad characters
SELECT STRING_AGG(IIF(ASCII(ng.token) < 127,ng.token,' '),'') WITHIN GROUP (ORDER BY ng.position)
FROM   dbo.ngrams8k(@string,1) AS ng;

使用NGrams8K逻辑转换为子查询的更新解决方案

在查询中记录我的评论...

DECLARE @string VARCHAR(1000) = 'øsmøeøø' COLLATE Latin1_General_100_BIN2, @N INT = 1;

-- Remove bad characters
SELECT 
(
  SELECT    ng.token+''
  FROM      
  (
    SELECT Position = N,
           Token    = SUBSTRING(@string,CAST(N AS int),@N)
    FROM
    (
      SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
        ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
      FROM 
      (  SELECT 1 FROM (VALUES    -- 90 "dummy" values used to create the CTE Tally Table
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
      (  SELECT 1 FROM (VALUES    -- 90*90=8100, enough for varchar(8000)
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x) 
    ) AS iTally(N)
    WHERE @N > 0 AND @N <= DATALENGTH(@string)
  ) AS ng -- dbo.NGrams8K as an inline function
  WHERE     ASCII(ng.token) < 127
  ORDER BY ng.position
  FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');

-- Replace bad characters 
SELECT 
(
  SELECT    CASE WHEN ASCII(ng.token) < 127 THEN ng.token ELSE ' ' END+''
  FROM   --dbo.ngrams8k(@string,1) AS ng
  (
    SELECT Position = N,
           Token    = SUBSTRING(@string,CAST(N AS int),@N)
    FROM
    (
      SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
        ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
      FROM 
      (  SELECT 1 FROM (VALUES    -- 90 "dummy" values used to create the CTE Tally Table
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
      (  SELECT 1 FROM (VALUES    -- 90*90=8100, enough for varchar(8000)
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
            (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x) 
    ) AS iTally(N)
    WHERE @N > 0 AND @N <= DATALENGTH(@string)
  ) AS ng -- dbo.NGrams8K as an inline function
  ORDER BY ng.position
  FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');

-- Remove bad characters using STRING_AGG (SQL 2017++)
SELECT STRING_AGG(ng.token,'') WITHIN GROUP (ORDER BY ng.position)
FROM   --dbo.ngrams8k(@string,1) AS ng
(
  SELECT Position = N,
         Token    = SUBSTRING(@string,CAST(N AS int),@N)
  FROM
  (
    SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
      ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
    FROM 
    (  SELECT 1 FROM (VALUES    -- 90 "dummy" values used to create the CTE Tally Table
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
    (  SELECT 1 FROM (VALUES    -- 90*90=8100, enough for varchar(8000)
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x) 
  ) AS iTally(N)
  WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng -- dbo.NGrams8K as an inline function
WHERE  ASCII(ng.token) < 127;

-- Replace bad characters using STRING_AGG (SQL 2017++)
SELECT STRING_AGG(IIF(ASCII(ng.token) < 127,ng.token,' '),'') WITHIN GROUP (ORDER BY ng.position)
FROM   --dbo.ngrams8k(@string,1) AS ng
(
  SELECT Position = N,
         Token    = SUBSTRING(@string,CAST(N AS int),@N)
  FROM
  (
    SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
      ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
    FROM 
    (  SELECT 1 FROM (VALUES    -- 90 "dummy" values used to create the CTE Tally Table
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
    (  SELECT 1 FROM (VALUES    -- 90*90=8100, enough for varchar(8000)
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
          (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x) 
  ) AS iTally(N)
  WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng; -- dbo.NGrams8K as an inline function