
时间:2013-05-23 07:36:43

标签: javascript html stripping

我想使用javascript从字符串中删除除<br><br/>标记之外的所有html标记。 我看过很多像这样的问题,但他们的答案会删除所有的html标签,包括<br><br/>标签。


8 个答案:

答案 0 :(得分:44)


var html = 'this is my <b>string</b> and it\'s pretty cool<br />isn\'t it?<br>Yep, it is. <strong>More HTML tags</strong>';
html = html.replace(/<(?!br\s*\/?)[^>]+>/g, '');

//this is my string and it's pretty cool<br />isn't it?<br>Yep, it is. More HTML tags


答案 1 :(得分:8)


 function remove_tags(html)
   var html = html.replace("<br>","||br||");  
   var tmp = document.createElement("DIV");
   tmp.innerHTML = html;
   html = tmp.textContent||tmp.innerText;
   return html.replace("||br||","<br>");  

答案 2 :(得分:5)


function strip_tags( _html /*you can put each single tag per argument*/ )
    var _tags = [], _tag = "" ;
    for( var _a = 1 ; _a < arguments.length ; _a++ )
        _tag = arguments[_a].replace( /<|>/g, '' ).trim() ;
        if ( arguments[_a].length > 0 ) _tags.push( _tag, "/"+_tag );

    if ( !( typeof _html == "string" ) && !( _html instanceof String ) ) return "" ;
    else if ( _tags.length == 0 ) return _html.replace( /<(\s*\/?)[^>]+>/g, "" ) ;
        var _re = new RegExp( "<(?!("+_tags.join("|")+")\s*\/?)[^>]+>", "g" );
        return _html.replace( _re, '' );

var _html = "<b>Just</b> some <i>tags</i> and text to test <u>this code</u>" ;
document.write( "This is the original html code including some tags<br>" );
document.write( _html + "<br><br>" ); // original html code
document.write( "Now we remove all tags (plain text)<br>" );
document.write( strip_tags( _html ) + "<br><br>" ); // remove all tags
document.write( "Only the bold tag is kept<br>" );
document.write( strip_tags( _html, "b" ) + "<br><br>" ); // keep <b> only
document.write( "Only the underline tag is kept<br>" );
document.write( strip_tags( _html, "u" ) + "<br><br>" ); // keep <u> only
document.write( "Only the italic tag is kept<br>" );
document.write( strip_tags( _html, "<i>" ) + "<br><br>" ); // keep <i> only
document.write( "Keeping both italic and underline<br>" );
document.write( strip_tags( _html, "i", "u" ) ); // keep both <i> and <u>

答案 3 :(得分:2)



答案 4 :(得分:1)

这是一个古老但排名较高的问题,所以我想我会提供一个更通用的 ES6 解决方案。


如果您想处理 Paste 事件并简化 HTML,这将特别有用。

它还删除 HTML 注释,因为有时复制/粘贴包括 <!--StartFragment--> 等。

  function strip_tags(html, ...args) {
    return html.replace(/<(\/?)(\w+)[^>]*\/?>/g, (_, endMark, tag) => {
      return args.includes(tag) ? '<' + endMark + tag + '>' :'';
    }).replace(/<!--.*?-->/g, '');



// Strip all except basic formatting and paragraphs and breaks.
const h = strip_tags(html, 'b', 'i', 'u', 'p', 'br');

这是我用来处理简单 HTML 编辑器的粘贴事件的方法。它并不完美,因为它不能处理像 ">" 嵌入在标签属性中的奇怪情况,但这似乎不太可能发生。


答案 5 :(得分:1)

基于 h2oooooooo 为您的问题回答正确的正则表达式是:


该解决方案甚至适用于 Wolfie 和 Olivier 提到的案例。


答案 6 :(得分:0)

我已经修改了Sandro Rosa的功能来解决Nikita提到的问题:

function strip_tags( _html /*you can put each single tag per argument*/ )
    var _tags = [], _tag = "";
    for ( var _a = 1 ; _a < arguments.length ; _a++ ) {
        _tag = arguments[_a].replace(/[<>\/]/g, '').trim();
        if ( arguments[_a].length > 0 ) _tags.push( _tag );

    if ( !( typeof _html == "string" ) && !( _html instanceof String ) ) return "";
    else if ( _tags.length == 0 ) return _html.replace( /<\s*\/?[^>]+>/g, "" );
        var _re = new RegExp( "<(?!\\s*\\/?(" + _tags.join("|") + ")\\s*\\/?>)[^>]*>", "g" );
        return _html.replace( _re, '' );

var _html = "<b>Just</b> some <i>tags</i> and text to test <u>this code</u>" ;
console.log( "This is the original html code including some tags" );
console.log( _html ); // original html code
console.log( "Now we remove all tags (plain text)" );
console.log( strip_tags( _html ) ); // remove all tags
console.log( "Only the bold tag is kept" );
console.log( strip_tags( _html, "b" ) ); // keep <b> only
console.log( "Only the underline tag is kept" );
console.log( strip_tags( _html, "u" ) ); // keep <u> only
console.log( "Only the italic tag is kept" );
console.log( strip_tags( _html, "<i>" ) ); // keep <i> only
console.log( "Keeping both italic and underline" );
console.log( strip_tags( _html, "i", "u" ) ); // keep both <i> and <u>

_html = "this is my <b>string</b> and it's pretty cool<br />isn't it?<br>Yep, it is.<strong>More HTML tags</strong><span></span><bol>" ;
console.log( "Keeping just the bold tag" );
console.log( strip_tags( _html, "b" ) ); // keep just the <b>, not the <br> or <bol>

答案 7 :(得分:0)

你的观点非常好,那么整个方法应该改变,因为没有办法结合正则表达式边界和负前瞻。 因此,我重写了整个函数,它似乎与建议的示例一起工作得很好。 我还扩展了对嵌入属性的标签的删除。

基本上,这只是一种粗暴的方法:预先扫描输入文本并首先获取所有标签。如果输入参数和初始存档之间不匹配,它们将在所有输入文本中被删除。 我还注释了一些可能对调试有用的行。

<SCRIPT LANGUAGE="javascript" TYPE="text/javascript">
function strip_tags( _html /*you can put each single tag per argument*/ )
    var _tags = [], _tag = "" ;
    for( var _a = 1 ; _a < arguments.length ; _a++ )
        _tag = arguments[_a].replace( /<|>/g, '' ).trim() ;
        if ( arguments[_a].length > 0 ) _tags.push( _tag );
    _tags = [...new Set( _tags )]; // unique values

    //console.log( "KEEP THESE TAGS:", _tags.length == 0 ? "delete all" : _tags.join( ", " ) );

    if ( !( typeof _html == "string" ) && !( _html instanceof String ) ) return "" ;
    else if ( _tags.length == 0 ) return _html.replace( /<(\s*\/?)[^>]+>/g, "" ) ;
        //console.log( "in:", _html );
        var _all_tags_prescan = _html.match( /\<[A-Za-z]+/g ), _re;
            _all_tags_prescan = [...new Set( _all_tags_prescan )]; // unique values
        //console.log( "all tags prescan:", _all_tags_prescan );
        _all_tags_prescan.forEach( function( _tag, _i ){
            _all_tags_prescan[ _i ] = _tag.replace( /[\<\>]/g, '' );
        } );

        //console.log( "all tags prescan (clean):", _all_tags_prescan );
        //console.log( "input tags (clean):", _tags );
        for( var _t = 0 ; _t < _all_tags_prescan.length; _t++ )
            //console.log( _all_tags_prescan[_t], _tags.includes( _tags[_t] ) );
            if ( _tags.includes( _all_tags_prescan[_t] ) ) continue;
            _re = new RegExp( "<"+_all_tags_prescan[_t]+"\\s*\\w*\=*\"[\\w\:\;\#]*\">", "g" );
            //console.log( _re, _html.match( _re ) );
            _html = _html.replace( _re, '' );

            if ( _all_tags_prescan[_t][0] != "/" ) _all_tags_prescan[_t] = "\/?" + _all_tags_prescan[_t];
            _re = new RegExp( "<"+_all_tags_prescan[_t]+">", "g" );
            //console.log( _re, _html.match( _re ) );
            _html = _html.replace( _re, '' );

        return _html;

var _html = "<b>Just</b> some <i STYLE=\"color:#323232;\">tags</i> and <pre>text</pre> to <b>test</b> <u>this code</u>" ;
console.log( "This is the original html code including some tag" );
console.log( _html ); // original html code
console.log( "GOAL: remove all tags (plain text)" );
console.log( strip_tags( _html ) ); // remove all tags
console.log( "GOAL: only the bold tag is kept" );
console.log( strip_tags( _html, "b" ) ); // keep <b> only
console.log( "GOAL: only the bold and the underline tags are kept" );
console.log( strip_tags( _html, "b", "i" ) ); // keep <b> and <i>
console.log( "GOAL: only the italic tag is kept" );
console.log( strip_tags( _html, "i" ) ); // keep <i>