RE.SUB不替换数据

时间:2017-03-01 16:26:01

标签: python regex python-2.7

我的程序遇到了另一个并发症,发现我的旧方法不尽如人意,found here

由于以下帮助,我现在可以让CRYPTO_RE的REGEX工作了。现在,get_hostname_from_file不会返回主机名。正则表达式的问题或我如何看待这些线?

更新 下面的代码现在有效。如果有人建议采用更有效的方式或更清洁的方式,请告诉我。还在这里学习! :)

完整代码:

import hashlib
import os
import re
import sys


# REGEX CONSTANTS
HOSTNAME_RE = re.compile(r'hostname +(\S+)')
REMOVE_RE = re.compile(r"((?:\bCurrent configuration)(?:.*\n?){6})")
#CRYPTO_RE = r'(?s)crypto pki certificate (.*)quit'
CRYPTO_RE = r"(?<=!\n)(crypto.*?quit)"
subst = ''

def hostname_parse(directory):
    results = {}
    for filename in os.listdir(directory):
        filename = os.path.join(directory, filename)
        info = get_file_info(filename)
        if info is not None:
            results[info[0]] = info
    return results

def get_file_info(filename):
    if filename.endswith(('.cfg', '.startup', '.confg')):
        with open(filename, "r") as in_file:
            return clean_file(filename, in_file.read())

def clean_file(filename, file_lines):
    file_lines = re.sub(CRYPTO_RE, subst, file_lines, flags=re.DOTALL)
    print file_lines
    return get_hostname_from_file(filename, file_lines)

def get_hostname_from_file(filename, file_lines):
    hostname = None
    match = HOSTNAME_RE.search(file_lines)
    if match:
        hostname = match.group(1)
    return hostname, filename, hash_file(file_lines)

def hash_file(file_lines):
    a_hash = hashlib.sha1()
    a_hash.update(file_lines.encode('utf-8'))
    return a_hash.hexdigest()


testpath1 = 'test1'
testpath2 = 'test2'

results1 = hostname_parse(testpath1)
results2 = hostname_parse(testpath2)

for hostname, filename, filehash in results1.values():
        if hostname in results2:
            _, filename2, filehash2 = results2[hostname]
            if filehash != filehash2:
                print ("%s has a change (%s, %s)" % (
                    hostname, filehash, filehash2))
                print(filename)
                print(filename2)

测试文件1:

!
hostname ROUTERORSWITCHNAME
!
crypto pki certificate chain **Censored***
 certificate self-signed 01
  3123445D 308201C6 A0030201 02020101 300D0609 2A864886 F70D0101 04050030 
  30312E30 2C060355 04031325 494F532D 53656C66 2D536967 6E65642D 43657274 
  69666963 6174652D 36363235 39393933 36301E17 0D393330 33303130 30303235 
  385A170D 32303031 11324564 30303030 5A303031 2E302C06 03550403 1325494F 
  532D5365 61265613 69676E65 642D4365 76472425 69636174 652D3636 32353939 
  39333630 819F300D 06092A86 4886F70D 01010105 0003818D 00308189 02818100 
  F4799A10 F1DB65EA FACC214B 88185DA2 133DBE2D B7DFFDB0 85DB1F87 FC111628
  quit
!
!
!

测试文件2: 直接复制文件1并更改了HEX数字。

期望的结果:

!
hostname ROUTERORSWITCHNAME
!

!
!
!

正如你所看到的,我需要整个HEX块消失。

1 个答案:

答案 0 :(得分:2)

尝试以下内容:

import re
regex = r"(?<=!\n)(crypto.*?quit)"
str = """!
hostname ROUTERORSWITCHNAME
!
crypto pki certificate chain **Censored***
 certificate self-signed 01
  3123445D 308201C6 A0030201 02020101 300D0609 2A864886 F70D0101 04050030 
  30312E30 2C060355 04031325 494F532D 53656C66 2D536967 6E65642D 43657274 
  69666963 6174652D 36363235 39393933 36301E17 0D393330 33303130 30303235 
  385A170D 32303031 11324564 30303030 5A303031 2E302C06 03550403 1325494F 
  532D5365 61265613 69676E65 642D4365 76472425 69636174 652D3636 32353939 
  39333630 819F300D 06092A86 4886F70D 01010105 0003818D 00308189 02818100 
  F4799A10 F1DB65EA FACC214B 88185DA2 133DBE2D B7DFFDB0 85DB1F87 FC111628
  quit
!
!
!"""
subst = ""
result = re.sub(regex, subst, str, 0, re.DOTALL)
if result:
    print (result)

<强> DEMO