在文本文件中获取起始和结束关键字之间的线,然后使用python

时间:2017-01-24 05:54:10

标签: python regex python-2.7

我有一个文本文件,我想提取起始(Socket :)和结束关键字(Socket :)之间的界限,然后进行处理。

输入:

Socket: 1
Device ID: 0x0B028041 0xCC344007 0x10000834 0x00000011
CB: 3/ID: 0x445DDC13
BIBID: 0x65C     

A:0xB0000190 D:0x310020FF
A:0xB0000194 D:0x00000000
A:0xB0000198 D:0x31002010
A:0xB000019C D:0x00000017
A:0xB00001A0 D:0x31002020
A:0xB00001A4 D:0x00000017
A:0xB00001A8 D:0x31002040
A:0xB00001AC D:0x00000000
A:0xB00001B0 D:0x31001000
ART: 0xB0000800 DRT: 0xB0000000
ART: 0xB0000804 DRT: 0xB0000000
ART: 0xB0000808 DRT: 0xB0000000
ART: 0xB000080C DRT: 0xB0000000
ART: 0xB0000810 DRT: 0xB0000000
ART: 0xB0000814 DRT: 0xB0000000
ART: 0xB0000818 DRT: 0xB0000000
ART: 0xB000081C DRT: 0xB0000000
ART: 0xB0000820 DRT: 0xB0000000
ART: 0xB0000824 DRT: 0xB0000000
ART: 0xB0000828 DRT: 0xB0000000
ART: 0xB000082C DRT: 0xB0000000
ART: 0xB0000830 DRT: 0xB0000000
ART: 0xB0000834 DRT: 0xB0000000
ART: 0xB0000838 DRT: 0xB0000000
ART: 0xB000083C DRT: 0xB0000000
ART: 0xB0000840 DRT: 0xB0000000
ART: 0xB0000844 DRT: 0xB0000000
ART: 0xB0000848 DRT: 0xB0000000
ART: 0xB000084C DRT: 0xB0000000
ART: 0xB0000850 DRT: 0xB0000000
ART: 0xB0000854 DRT: 0xB0000000
ART: 0xB0000858 DRT: 0xB0000000
ART: 0xB000085C DRT: 0xB0000000
ART: 0xB0000860 DRT: 0xB0000000
ART: 0xB0000864 DRT: 0xB0000000
ART: 0xB0000868 DRT: 0xB0000000
ART: 0xB000086C DRT: 0xB0000000
ART: 0xB0000870 DRT: 0xB0000000
ART: 0xB0000874 DRT: 0xB0000000
ART: 0xB0000878 DRT: 0xB0000000
ART: 0xB000087C DRT: 0xB0000000
...
Socket:2
...

当前代码:

    import re

    from collections import defaultdict
    dict2=defaultdict(list)
    dict3=defaultdict(list)
    socket_position_status = False
    dev_id_status = False
    CB_noS_status = False
    trf_val_flag = False
    dict1=defaultdict(list)
    pattern="QWL"
    rd_case= "Digital"
    setup_temp = "0C"
    readout_temp = "0C"
    address=[]

    def tde_file():
        with open(r'C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\tde\MRB_QWL_0c_Digital_PS60c_TC1798.tde', 'rb') as f:
            for line in f:
                pattern_tde = ":TEST_RESULT"
                if pattern_tde in line:

                    tde_addr = ':TEST_RESULT (\d+); addr: ([0-9A-Za-z]{10})'
                    x0 =re.match(tde_addr, line)

                    if x0:
                        address_tde = x0.group(2)
                        tde_addr_1 = 'cfp_(vqs|vcs)_m(\d+) \// HB05_SB255'
                        x1 = re.search(tde_addr_1,line)

                        tde_addr_2 = 'cfp_(vqs|vcs)_m\dm\d_(vth\d.\d) \// HB05_SB255'
                        x2 = re.search(tde_addr_2,line)

                        tde_addrs_1 = '(DTS_)value_(before|after)_test_(start|finish)'
                        y0 = re.search(tde_addrs_1,line)

                        if x1:
                            hlp_s = x1.group(2).zfill(2)
                            identifier =  x1.group(1)+"_m"+hlp_s

                        if x2:
                            identifier =  x2.group(1)+"_m"+x2.group(2)

                        try:
                            tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
                            hlp_s = re.search(tde_addr_3,line).group(1)
                            if hlp_s:
                                dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s

                        except AttributeError:pass

                        try:
                            tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
                            hlp_s = re.search(tde_addr_3,line).group(3)
                            if hlp_s:
                                dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s

                        except AttributeError: pass

                        try:
                            if y0.group(1) and y0.group(3):
                                dict1[pattern,rd_case,address_tde]=y0.group(1)+y0.group(3)+"_temp"
                        except AttributeError: pass
        #print dict1
        #print len(dict1.keys())
        #for k,v in sorted(dict1.items()):
            #print k,v


    def evaluate_lot_wxy(trf_dev_id_pattern):
        import re
        binary_value = ""

        line = trf_dev_id_pattern
        dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
        hex_inp1 = re.search(dev_id,line)
        #print hex_inp1.group()
        hex_inp2 = hex_inp1.group(4)+hex_inp1.group(3)+hex_inp1.group(2)+hex_inp1.group(1)
        hex_inp3 =  re.sub('0x', '', hex_inp2)
        #print hex_inp3
        for i,val in enumerate(str(hex_inp3)):
            binary_value1=str(bin(int(val,16))[2:]).zfill(4)
            binary_value = str(binary_value) + str(binary_value1)
        #print binary_value

        wafer_val = binary_value[90:96]
        wafer = int(wafer_val,2)
        y_pos_val = binary_value[106:113]
        y_pos = int(y_pos_val,2)
        x_pos_val = binary_value[98:105]
        x_pos = int(x_pos_val,2)
        year_val = binary_value[63:67]
        year = int(year_val,2)
        production_week_val = binary_value[67:73]
        production_week = int(production_week_val,2)
        serial_no_val = binary_value[73:83]
        serial_no=int(serial_no_val,2)
        lot ="ZA"+str(year)+str(production_week)+str(serial_no)
        if (1 <= wafer <= 25) and (1<= x_pos <= 65) and (1 <= y_pos <= 65):
            dev_id_status = True
        return lot,wafer,x_pos,y_pos,dev_id_status

    tde_file()

    with open(r"C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\1kCycling\Results_452_13384\Result     Files\temp\452_20170111_021021_TC1798_MRB_QWL_0c_Digital_PS60c_1021002999.trf", "rt") as f1:
    lines = f1.read()
    print lines
    #for lines in f1.read():

    match = re.search(r'Socket:(.*?)Socket:', lines, flags=re.DOTALL)
    #print match.group()
    for line in match.group().splitlines():
        if "Socket:" in line:
            trf_addr = 'Socket: (\d+)$'
            x0 =re.match(trf_addr, line)
            try:
                if x0.group(1).zfill(3):
                    socket_position = x0.group(1).zfill(3)
                    socket_position_status = True
                    #print socket_position


            except AttributeError: pass

        elif "Device ID:" in line:
            dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
            x1 = re.search(dev_id,line)
            try:
                if  x1.group(1) and x1.group(1) and x1.group(1) and x1.group(1):
                    trf_dev_id_pattern = x1.group()#x1.group(4)+x1.group(3)+x1.group(2)+x1.group(1)
                    lot_wafer_x_y = evaluate_lot_wxy(trf_dev_id_pattern)
                    dev_id_status = True
                    #print lot_wafer_x_y

            except AttributeError: pass

        elif "CB:" in line:
            CB_pat = 'CB: (\d+)\/'
            x2 = re.search(CB_pat,line)
            try:
                if  x2.group(1):
                    CB_noS_status = True
                    #print CB_noS_status
            except AttributeError: pass

        elif"ART:" in line:
            regex = re.search("ART: ([0-9A-Za-z]{10}) DRT: ([0-9A-Za-z]{10})",line)
            #print line
            try:
                if regex.group(1) and regex.group(2):
                    for key1,val1 in dict1.iteritems():
                        if regex.group(1) in key1:
                            #print "Address:"+regex.group(1)
                            hlp_a = val1
                            hlp_b = hlp_a.split("_")
                            identifier = hlp_b[0]
                            fail_class = hlp_b[1]
                            key_addtional = hlp_b[2]
                            val = regex.group(2)
                            value = int(val[3:],16)
                            dict2[rd_case,pattern,setup_temp,readout_temp,socket_position,fail_class,identifier,key_addtional]= value        

            except AttributeError: pass

    for k,v in sorted(dict2.items()):
        print k,v

当前输出:

目前代码打印第一场比赛的输出,我想获得输入文件中整个比赛的输出。

('Digital', 'QWL', '0C', '0C', '001', 'finish', 'DTS', 'temp') 16
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'start', 'DTS', 'temp') 14

以上是第一次匹配的输出,但我想得到文件中每个匹配的输出。有人可以帮助我吗?提前谢谢。

2 个答案:

答案 0 :(得分:1)

试试这个:

TagPickerViewController

代码输出:

import re
txt = '''Test_Socket: 1

TestA ID: 0x0B028041 0xCC344007 0x10000834 0x00000011

TestA_CB: 3/ID: 0x445DDC13

TESTA_BD: 0x65C

A:0xB0000190 D:0x310020FF

ART: 0xB0000878 DRT: 0xB0000000

ART: 0xB000087C DRT: 0xB0000000 ... Test_Socket:2'''

match = re.search(r'Test_Socket:(.*?)Test_Socket:', txt, flags=re.DOTALL)
print(match.group(1))

提取行之后,您可以迭代它们或运行另一个正则表达式来从这些行中获取所需的内容。

TestA ID: 0x0B028041 0xCC344007 0x10000834 0x00000011

TestA_CB: 3/ID: 0x445DDC13

TESTA_BD: 0x65C

A:0xB0000190 D:0x310020FF

ART: 0xB0000878 DRT: 0xB0000000

ART: 0xB000087C DRT: 0xB0000000 ... 

答案 1 :(得分:0)

我找到了一个解决方案,我在regex中尝试了re.finditer(),它按预期工作。请找到我的代码,如果有比这更好的方法,请告诉我。我要感谢大家。

CODE:

import re

from collections import defaultdict
dict2=defaultdict(list)
dict3=defaultdict(list)
dict1=defaultdict(list)
pattern="QWL"
rd_case= "Digital"
setup_temp = "0C"
readout_temp = "0C"
address=[]

def tde_file():
  with open(r'C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\tde\MRB_QWL_0c_Digital_PS60c_TC1798.tde', 'rb') as f:
    for line in f:
        pattern_tde = ":TEST_RESULT"
        if pattern_tde in line:

            tde_addr = ':TEST_RESULT (\d+); addr: ([0-9A-Za-z]{10})'
            x0 =re.match(tde_addr, line)

            if x0:
                address_tde = x0.group(2)
                tde_addr_1 = 'cfp_(vqs|vcs)_m(\d+) \// HB05_SB255'
                x1 = re.search(tde_addr_1,line)

                tde_addr_2 = 'cfp_(vqs|vcs)_m\dm\d_(vth\d.\d) \// HB05_SB255'
                x2 = re.search(tde_addr_2,line)

                tde_addrs_1 = '(DTS_)value_(before|after)_test_(start|finish)'
                y0 = re.search(tde_addrs_1,line)

                if x1:
                    hlp_s = x1.group(2).zfill(2)
                    identifier =  x1.group(1)+"_m"+hlp_s

                if x2:
                    identifier =  x2.group(1)+"_m"+x2.group(2)

                try:
                    tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
                    hlp_s = re.search(tde_addr_3,line).group(1)
                    if hlp_s:
                              dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s

                except AttributeError:pass

                try:
                    tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
                    hlp_s = re.search(tde_addr_3,line).group(3)
                    if hlp_s:
                        dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s

                except AttributeError: pass

                try:
                    if y0.group(1) and y0.group(3):
                        dict1[pattern,rd_case,address_tde]=y0.group(1)+y0.group(3)+"_temp"
                except AttributeError: pass

def evaluate_lot_wxy(trf_dev_id_pattern):
  import re
  binary_value = ""
  dev_id_status = False

  line = trf_dev_id_pattern
  dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
  hex_inp1 = re.search(dev_id,line)
  #print hex_inp1.group()
  hex_inp2 = hex_inp1.group(4)+hex_inp1.group(3)+hex_inp1.group(2)+hex_inp1.group(1)
  hex_inp3 =  re.sub('0x', '', hex_inp2)
  #print hex_inp3
  for i,val in enumerate(str(hex_inp3)):
     binary_value1=str(bin(int(val,16))[2:]).zfill(4)
     binary_value = str(binary_value) + str(binary_value1)
     #print binary_value

  wafer_val = binary_value[90:96]
  wafer = int(wafer_val,2)
  y_pos_val = binary_value[106:113]
  y_pos = int(y_pos_val,2)
  x_pos_val = binary_value[98:105]
  x_pos = int(x_pos_val,2)
  year_val = binary_value[63:67]
  year = int(year_val,2)
  production_week_val = binary_value[67:73]
  production_week = int(production_week_val,2)
  serial_no_val = binary_value[73:83]
  serial_no=int(serial_no_val,2)
  lot ="ZA"+str(year)+str(production_week)+str(serial_no)
  if (1 <= wafer <= 25) and (1<= x_pos <= 65) and (1 <= y_pos <= 65):
    dev_id_status = True
  return lot,wafer,x_pos,y_pos,dev_id_status

tde_file()
with open(r"C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\1k Cycling\Results_452_13384\Result Files\452_20170111_021021_TC1798_MRB_QWL_0c_Digital_PS60c_1021002999.trf") as f1:
        lines = f1.read()
        socket_position_status = False
        dev_id_status = False
        CB_noS_status = False
        trf_val_flag = False
        for m in re.finditer(r'Socket:(.*?)ART: 0xB00017EC DRT: 0x00000000\n', lines,flags=re.DOTALL):
            x1 = ('%s' % (m.group(0)))

            for line in x1.splitlines():
                if "Socket:" in line:
                    trf_addr = 'Socket: (\d+)$'
                    x0 =re.match(trf_addr, line)
                    try:
                        if x0.group(1).zfill(3):
                            socket_position = x0.group(1).zfill(3)
                            socket_position_status = True
                            #print socket_position


                    except AttributeError: pass

                elif "Device ID:" in line:
                    dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
                    x1 = re.search(dev_id,line)
                    try:
                        if  x1.group(1) and x1.group(1) and x1.group(1) and x1.group(1):
                            trf_dev_id_pattern = x1.group()#x1.group(4)+x1.group(3)+x1.group(2)+x1.group(1)
                            lot_wafer_x_y = evaluate_lot_wxy(trf_dev_id_pattern)
                            dev_id_status = lot_wafer_x_y[4]
                            #print lot_wafer_x_y

                    except AttributeError: pass

                elif "CB:" in line:
                    CB_pat = 'CB: (\d+)\/'
                    x2 = re.search(CB_pat,line)
                    try:
                        if  x2.group(1):
                            CB_no = x2.group(1)
                            CB_noS_status = True
                            #print CB_noS_status
                    except AttributeError: pass

                elif"ART:" in line:
                    regex = re.search("ART: ([0-9A-Za-z]{10}) DRT: ([0-9A-Za-z]{10}$)",line)
                    #print line
                    try:
                        if regex.group(1) and regex.group(2):
                            for key1,val1 in dict1.iteritems():
                                if regex.group(1) in key1:
                                    #print "Address:"+regex.group(1)
                                    hlp_a = val1
                                    hlp_b = hlp_a.split("_")
                                    identifier = hlp_b[0]
                                    fail_class = hlp_b[1]
                                    key_addtional = hlp_b[2]
                                    val = regex.group(2)
                                    value = int(val[3:],16)
                                    trf_val_flag = True
                                    if dev_id_status and trf_val_flag and CB_noS_status:
                                        dict3[rd_case,pattern,setup_temp,readout_temp,CB_no,socket_position,fail_class,identifier,key_addtional]= value
                                        continue


                    except AttributeError: pass

    #elif socket_position_status and dev_id_status and CB_noS_status and trf_val_flag:
        #dict3[rd_case,pattern,setup_temp,readout_temp,socket_position,fail_class,identifier]= value

print len(dict3.keys())