如何在python

时间:2018-05-25 01:05:40

标签: python regex replace

考虑下面的mcve:

import re
import textwrap

import traceback
import unittest


def replace_words(content, replacements):
    rc = re.compile(r"[A-Za-z_]\w*")

    def translate(match):
        word = match.group(0)
        return replacements.get(word, word)
    return rc.sub(translate, content, re.IGNORECASE | re.MULTILINE)


class class_name(unittest.TestCase):

    def setUp(self):
        self.replacements = [
            {
                'PLUS': '"+"',
                'DASH': '"-"',
                'BANG': '"!"',
                'TILDE': '"~"',
                'STAR': '"*"',
                'SLASH': '"/"',
                'PERCENT': '"%"',
                'LEFT_PAREN': '"("',
                'RIGHT_PAREN': '")"'
            }, {
                "IF": "fi",
                "FOO": "oof",
                "BAR": "rab",
                "OP_FOO": "oof_op"
            }
        ]
        self.texts = [
            textwrap.dedent("""\
                variable_identifier :
                    IDENTIFIER
                primary_expression :
                    foo1
                    foo2
                    foo3
                    LEFT_PAREN expression RIGHT_PAREN
                unary_operator :
                    PLUS
                    DASH
                    BANG
                    TILDE
                multiplicative_expression :
                    unary_expression
                    multiplicative_expression STAR unary_expression
                    multiplicative_expression SLASH unary_expression
                    multiplicative_expression PERCENT unary_expression\
            """),
            textwrap.dedent("""\
                IF identifier IDENTIFIER FOO BAR BARycentric
                OP_FOO
            """)
        ]
        self.expected_results = [
            textwrap.dedent("""\
                variable_identifier :
                    IDENTIFIER
                primary_expression :
                    foo1
                    foo2
                    foo3
                    "(" expression ")"
                unary_operator :
                    "+"
                    "-"
                    "!"
                    "~"
                multiplicative_expression :
                    unary_expression
                    multiplicative_expression "*" unary_expression
                    multiplicative_expression "/" unary_expression
                    multiplicative_expression "%" unary_expression\
            """),
            textwrap.dedent("""\
                fi identifier IDENTIFIER oof rab BARycentric
                oof_op
            """)
        ]

    def _tester(self, f):
        replacements = self.replacements
        expected_results = self.expected_results
        texts = self.texts
        self.assertEqual(f(texts[0], replacements[0]), expected_results[0])
        self.assertEqual(f(texts[1], replacements[1]), expected_results[1])

    def test_replace_words(self):
        self._tester(replace_words)


if __name__ == "__main__":
    unittest.main()

replace_words函数正在尝试使用代码上方的替换词典来搜索和替换给定文本中区分大小写的整个单词,但它会在行self.assertEqual(f(texts[0], replacements[0]), expected_results[0])中失败但我不会知道原因。

所以问题是,如何使用python中的替换词典找到并替换区分大小写的整个单词?

1 个答案:

答案 0 :(得分:3)

您可以使用re.subre.findall

import re
def regex_string(d, to_lower = False):
   if not to_lower: 
     return '|'.join(r'\b{}\b'.format(i) for i in d.keys())
   return '|'.join([c for b in [[r'\b{}\b'.format(i.lower()), r'\b{}\b'.format(i)] for i in d.keys()] for c in b])

replacements = {
    'PLUS': '"+"',
    'DASH': '"-"',
    'BANG': '"!"',
    'TILDE': '"~"',
    'STAR': '"*"',
    'SLASH': '"/"',
    'PERCENT': '"%"',
    'LEFT_PAREN': '"("',
    'RIGHT_PAREN': '")"'
}
replaced = re.sub(regex_string(replacements, True), '{}', content)
final_result = replaced.format(*[replacements.get(i, i) for i in re.findall(regex_string(replacements, True), content)])

输出(case 1):

variable_identifier :
IDENTIFIER
primary_expression :
   foo1
   foo2
   foo3
   "(" expression ")"
unary_operator :
   "+"
   "-"
   "!"
   "~"
multiplicative_expression :
   unary_expression
   multiplicative_expression "*" unary_expression
   multiplicative_expression "/" unary_expression
   multiplicative_expression "%" unary_expression  

输出(case 2):

fi identifier IDENTIFIER oof rab BARycentric
oof_op

或者,甚至更短:

replaced = re.sub(regex_string(replacements, True), lambda x:replacements.get(x.group(), x.group()), content)