我如何克服substr和clob限制?

时间:2014-02-19 12:30:24

标签: oracle plsql substr clob

我有一个存储在clob中的XML。我需要运行各种substr和instr函数,以便显示XML中的特定数据。

我当前的代码使用varchar2(32000)变量处理它,如下所示:

select xml_data into xmlclob
                 from test.test_data_v6 
                 where test_id =r1.test_id;

                 start_position := dbms_lob.instr(xmlclob,'<DataSheet>',1,1)+11;
                 end_position := dbms_lob.instr(xmlclob,'</DataSheet>',start_position,1);                         
                 l_str := (dbms_lob.substr(xmlclob, end_position - start_position, start_position));                   
                 l_str := substr(l_str, instr(l_str, '<SummaryParameters>'),instr(l_str, '</SummaryParameters>')-instr(l_str, '<SummaryParameters>')+20);

l_str是varchar2。

如果长度大于32000,则不显示子串,我假设是varchar的限制。所以我将l_str更改为clob变量并执行以下操作:

select xml_data into xmlclob
                 from test.test_data_v6_temp 
                 where test_id =r1.test_id;

                 start_position := dbms_lob.instr(xmlclob,'<DataSheet>',1,1)+11;
                 end_position := dbms_lob.instr(xmlclob,'</DataSheet>',start_position,1);                         
                 l_str := dbms_lob.substr(xmlclob, end_position - start_position, start_position);
                 l_str1 := substr(l_str, instr(l_str, '<SummaryParameters>'), instr(l_str, '</SummaryParameters>')-instr(l_str, '<SummaryParameters>')+20);

如果xmls的end_position-start_position也超过32000,则为null。

我编写了一个匿名块来测试子字符串的限制,如下所示:

declare
error_reason varchar2(32000);
start_position number;
end_position number;
str_length number;
str_length2 number;
l_clob2 clob;
l_str varchar2(32000);
e_clob clob;
temp_clob clob;
begin
           select xml_data into l_clob2 from test.test_data_v6_temp where test_id=(hard coded the test id);

           start_position := dbms_lob.instr(l_clob2,'<DataSheet>',1,1)+11;
           dbms_output.put_line('startposition='||start_position);
           end_position := dbms_lob.instr(l_clob2,'</DataSheet>',start_position,1);
           dbms_output.put_line('end position='||end_position);
           str_length := end_position-start_position;
           str_length2 := 30000;
           dbms_output.put_line('str_length='||str_length);
           dbms_output.put_line('str_length2='||str_length2);
           dbms_output.put_line('str_length='||str_length);

                        temp_clob := dbms_lob.substr(l_clob2, str_length2, start_position);
                   --dbms_output.put_line('temp_clob1='||temp_clob);
                  --temp_clob := substr(temp_clob, instr(temp_clob, '<SummaryParameters>'), instr(temp_clob, '</SummaryParameters>')-instr(temp_clob, '<SummaryParameters>')+20);
                  insert into test.temp values(temp_clob);
                 commit;

end;

--select * from test.temp

我注释掉第二个子字符串,只是为了看看我的表是否填充了数据,如果我将str_length更改为低于32000的任何值并且确实填充了它。

这是我的输出

startposition=202134
end position=237746
str_length=35612
str_length2=30000

如何克服substr和clob的限制以获取正确的数据?

收到答案后编辑: 我尝试了复制解决方案,但我遇到的问题是SummaryParameters位于XML中的DataSheet标记内。将它放在循环中,SummaryParametes的第二次迭代无法找到相同的结束标记。同样在XML提取函数中,我使用了getStringVal()方法,并且还返回了另一个具有大小问题的varchar。我不知道如何使用复制功能来获取DataSheet标签内的摘要参数(我无法直接查找SummaryParameters标签,因为DataSheet标签之外还有其他标签。这是我的代码:

declare
start_position number;
end_position number;
str_length number;
str_length2 number;
sp2 number;
ep2 number;
str_len number;
 str_len2 number;
 l_clob2 clob;
temp_clob clob;
temp_clob2 clob;
 begin
               select xml_data into l_clob2 from test.test_data_v6_temp where test_id=211230309;

             start_position := dbms_lob.instr(l_clob2,'<DataSheet>',1,1)+11;
            dbms_output.put_line('startposition='||start_position);
           end_position := dbms_lob.instr(l_clob2,'</DataSheet>',start_position,1);
               dbms_output.put_line('end position='||end_position);
               str_length := end_position-start_position;

            dbms_output.put_line('Original clob size: ' || dbms_lob.getlength(l_clob2));
            dbms_lob.createtemporary(temp_clob, false);
            while str_length > 0 
            loop
            str_length2 := least(str_length, 32768);
            str_length := str_length - str_length2;
            dbms_lob.copy(temp_clob, l_clob2, str_length2,dbms_lob.getlength(temp_clob) + 1, start_position);
            start_position := start_position + str_length;
              end loop;
             dbms_output.put_line('Copied clob 1 size: ' || dbms_lob.getlength(temp_clob));
             sp2 := dbms_lob.instr(temp_clob,'<SummaryParameters>')+20;
                dbms_output.put_line('sp2 = '|| sp2);
              ep2 := instr(temp_clob, '</SummaryParameters>');
                dbms_output.put_line('ep2 = '|| ep2);

              str_len :=ep2 - sp2;
              dbms_lob.createtemporary(temp_clob2, false);
             while str_len > 0 
              loop
                str_len2 := least(str_len, 32768);
                str_len := str_len - str_len2;
                dbms_lob.copy(temp_clob2, temp_clob, str_len2,dbms_lob.getlength(temp_clob2) + 1, sp2);
                sp2 := sp2 + str_len;
              end loop; 


            dbms_output.put_line('Copied 2 clob size: ' || dbms_lob.getlength(temp_clob2));

 insert into test.temp values(temp_clob2);
                  commit;
end;

这是我的输出:

startposition=202134
end position=237746
Original clob size: 1723831
Copied clob 1 size: 35612
sp2 = 703
ep2 = 0
Copied 2 clob size: 0

2 个答案:

答案 0 :(得分:1)

以下是使用COPY程序避免切换为VARCHAR2的示例:

   dbms_output.put_line('Original clob size: ' || dbms_lob.getlength(l_clob2));
   dbms_lob.createtemporary(temp_clob, false);
   while str_length > 0 loop
     str_length2 := least(str_length, 32768);
     str_length := str_length - str_length2;
     dbms_lob.copy(temp_clob, l_clob2, str_length2,
       dbms_lob.getlength(temp_clob) + 1, start_position);
     start_position := start_position + str_length2;
   end loop;
   dbms_output.put_line('Copied clob size: ' || dbms_lob.getlength(temp_clob));

我填充了'XML'值,这给了我输出:

startposition=17
end position=40456
str_length=40439
str_length2=30000
str_length=40439
Original clob size: 40473
Copied clob size: 40439

所以temp_clob包含原始CLOB的40k +'子串'。


@ user272735是正确的,如果列保存XML数据,那么它应该这样处理(并且最好也以那种方式存储,如XMLType而不是CLOB,现在看来是这样)。

您可以通过以下方式从XML中提取相关数据来避免这项工作:

select xmlquery('*/DataSheet/SummaryParameters/*'
  passing xmltype(xml_data) returning content))
from ...

或旧版本(11gR2中弃用):

select extract(xmltype(xml_data), '*/DataSheet/SummaryParameters/*')
from ...

其中任何一个都会为您提供<SummaryParameters>...</SummaryParameters>内的部分。从您的问题不清楚DataSheet是否是顶部元素,或者SummaryParameters是否直接位于该元素内部,因此您可能需要尝试将通配符放在何处。 More about XMLQuery in the documentation


使用两个复制语句可以获取内部SummaryParameters部分:

declare
  start_position number;
  end_position number;
  str_length number;
  str_length2 number;
  l_clob2 clob;
  temp_clob clob;
  temp_clob2 clob;
begin
  select xml_data into l_clob2 from t42 where id = 1;

  start_position := dbms_lob.instr(l_clob2,'<DataSheet>',1,1)+11;
  dbms_output.put_line('startposition='||start_position);
  end_position := dbms_lob.instr(l_clob2,'</DataSheet>',start_position,1);
  dbms_output.put_line('end position='||end_position);
  str_length := end_position-start_position;

  dbms_lob.createtemporary(temp_clob, false);
  while str_length > 0 loop
    str_length2 := least(str_length, 32768);
    str_length := str_length - str_length2;
    dbms_lob.copy(temp_clob, l_clob2, str_length2,
      dbms_lob.getlength(temp_clob) + 1, start_position);
    start_position := start_position + str_length2;
  end loop;
  dbms_output.put_line('temp_clob length: ' || dbms_lob.getlength(temp_clob));

  start_position := dbms_lob.instr(temp_clob,'<SummaryParameters>',1,1)+19;
  dbms_output.put_line('startposition='||start_position);
  end_position := dbms_lob.instr(temp_clob,'</SummaryParameters>',start_position,1);
  dbms_output.put_line('end position='||end_position);
  str_length := end_position-start_position;

  dbms_lob.createtemporary(temp_clob2, false);
  while str_length > 0 loop
    str_length2 := least(str_length, 32768);
    str_length := str_length - str_length2;
    dbms_lob.copy(temp_clob2, temp_clob, str_length2,
      dbms_lob.getlength(temp_clob2) + 1, start_position);
    start_position := start_position + str_length2;
  end loop;
  dbms_output.put_line('temp_clob2 length: ' || dbms_lob.getlength(temp_clob2));

  insert into t42 values (2, temp_clob2);
end;
/

给出(用我的虚拟数据):

startposition=17
end position=40456
temp_clob length: 40439
startposition=220
end position=420
temp_clob2 length: 200

答案 1 :(得分:0)

您是否尝试过XML功能,例如UPDATEXML修改XML,而不是在CLOB上使用纯字符串函数。

也许是这样的:

DECLARE
   xmlval XMLType;
BEGIN

SELECT xml_data INTO xmlval 
FROM TEST.test_data_v6_temp 
WHERE test_id =r1.test_id;

xmlval := UPDATEXML(xmlval, '/DataSheet', 'SummaryParameters');

END;

您只需找到正确的X-Path表达式并替换。

其他XML函数

DELETEXML 
INSERTCHILDXML  
INSERTCHILDXMLAFTER
INSERTCHILDXMLBEFORE
INSERTXMLAFTER 
INSERTXMLBEFORE 
XMLELEMENT
XMLCONCAT

也许其中一个可以帮助你。

相关问题