Question

更新

一个简单的问题。假设一个名为“unpack.py”的脚本将“inputfile.gz”放入stdout而不是使用以下命令创建“output.gz”： -

python unpack.py inputfile.gz

是否可以通过使用以下命令重定向stdout来创建“output.gz”？

python unpack.py inputfile.gz > outputfile.gz

重定向后，是"inputfile.gz" == "outputfile.gz"？

UPDATE2
它似乎确实创建“outputfile.gz”。经过file检查，之前和之后似乎相同。

$ file core.gz 
core.gz: gzip compressed data, was "core.cpio", from Unix, last modified: Sat Feb 18 22:22:16 2012, max compression
[sundar@localhost multibootusb]$ file core-redirect.gz 
core-redirect.gz: gzip compressed data, was "core.cpio", from Unix, last modified: Sat Feb 18 22:22:16 2012, max compression

但是，当我尝试再次提取这两个文件时，重定向的文件似乎没有按预期工作并引发错误gzip: core-redirect.gz: unexpected end of file

$ gunzip core-redirect.gz 
gzip: core-redirect.gz: unexpected end of file
$ gunzip core.gz
$

我的实际问题如下......

我一直在尝试使用barneygale编写的github上的python脚本列出并提取iso。实际的脚本在这里： -

import urllib
import struct

try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

class ISO9660IOError(IOError):
    def __init__(self, path):
        self.path = path

    def __str__(self):
        return "Path not found: %s" % self.path

class ISO9660(object):
    def __init__(self, url):
        self._buff  = None #input buffer
        self._root  = None #root node
        self._pvd   = {}   #primary volume descriptor
        self._paths = []   #path table

        self._url   = url
        self._get_sector = self._get_sector_url if url.startswith('http') else self._get_sector_file

        ### Volume Descriptors
        sector = 0x10
        while True:
            self._get_sector(sector, 2048)
            sector += 1
            ty = self._unpack('B')

            if ty == 1:
                self._unpack_pvd()
            elif ty == 255:
                break
            else:
                continue

        ### Path table
        l0 = self._pvd['path_table_size']
        self._get_sector(self._pvd['path_table_l_loc'], l0)

        while l0 > 0:
            p = {}
            l1 = self._unpack('B')
            l2 = self._unpack('B')
            p['ex_loc'] = self._unpack('<I')
            p['parent'] = self._unpack('<H')
            p['name']   = self._unpack_string(l1)

            if l1%2 == 1:
                self._unpack('B')

            self._paths.append(p)

            l0 -= 8 + l1 + (l1 % 2)

        assert l0 == 0

    ##
    ## Generator listing available files/folders
    ##

    def tree(self, get_files = True):
        if get_files:
            gen = self._tree_node(self._root)
        else:
            gen = self._tree_path('', 1)

        yield '/'
        for i in gen:
            yield i

    def _tree_path(self, name, index):
        spacer = lambda s: "%s/%s" % (name, s)
        for i, c in enumerate(self._paths):
            if c['parent'] == index and c['name'] not in '\x00\x01':
                yield spacer(c['name'])
                for d in self._tree_path(spacer(c['name']), i+1):
                    yield d

    def _tree_node(self, node):
        spacer = lambda s: "%s/%s" % (node['name'].lstrip('\x00\x01'), s)
        for c in list(self._unpack_dir_children(node)):
            yield spacer(c['name'])
            if c['flags'] & 2:
                for d in self._tree_node(c):
                    yield spacer(d)

    ##
    ## Retrieve file contents as a string
    ##

    def get_file(self, path):
        path = path.upper().strip('/').split('/')
        path, filename = path[:-1], path[-1]

        if len(path)==0:
            parent_dir = self._root
        else:
            try:
                parent_dir = self._dir_record_by_table(path)
            except ISO9660IOError:
                parent_dir = self._dir_record_by_root(path)

        f = self._search_dir_children(parent_dir, filename)

        self._get_sector(f['ex_loc'], f['ex_len'])
        return self._unpack_raw(f['ex_len'])

    ##
    ## Methods for retrieving partial contents
    ##

    def _get_sector_url(self, sector, length):
        start = sector * 2048
        if self._buff:
            self._buff.close()
        opener = urllib.FancyURLopener()
        opener.http_error_206 = lambda *a, **k: None
        opener.addheader("Range", "bytes=%d-%d" % (start, start+length-1))
        self._buff = opener.open(self._url)

    def _get_sector_file(self, sector, length):
        with open(self._url, 'rb') as f:
            f.seek(sector*2048)
            self._buff = StringIO(f.read(length))

    ##
    ## Return the record for final directory in a path
    ##

    def _dir_record_by_table(self, path):
        for e in self._paths[::-1]:
            search = list(path)
            f = e
            while f['name'] == search[-1]:
                search.pop()
                f = self._paths[f['parent']-1]
                if f['parent'] == 1:
                    e['ex_len'] = 2048 #TODO
                    return e

        raise ISO9660IOError(path)

    def _dir_record_by_root(self, path):
        current = self._root
        remaining = list(path)

        while remaining:
            current = self._search_dir_children(current, remaining[0])

            remaining.pop(0)

        return current

    ##
    ## Unpack the Primary Volume Descriptor
    ##

    def _unpack_pvd(self):
        self._pvd['type_code']                     = self._unpack_string(5)
        self._pvd['standard_identifier']           = self._unpack('B')
        self._unpack_raw(1)                        #discard 1 byte
        self._pvd['system_identifier']             = self._unpack_string(32)
        self._pvd['volume_identifier']             = self._unpack_string(32)
        self._unpack_raw(8)                        #discard 8 bytes
        self._pvd['volume_space_size']             = self._unpack_both('i')
        self._unpack_raw(32)                       #discard 32 bytes
        self._pvd['volume_set_size']               = self._unpack_both('h')
        self._pvd['volume_seq_num']                = self._unpack_both('h')
        self._pvd['logical_block_size']            = self._unpack_both('h')
        self._pvd['path_table_size']               = self._unpack_both('i')
        self._pvd['path_table_l_loc']              = self._unpack('<i')
        self._pvd['path_table_opt_l_loc']          = self._unpack('<i')
        self._pvd['path_table_m_loc']              = self._unpack('>i')
        self._pvd['path_table_opt_m_loc']          = self._unpack('>i')
        d, self._root = self._unpack_record()      #root directory record
        self._pvd['volume_set_identifer']          = self._unpack_string(128)
        self._pvd['publisher_identifier']          = self._unpack_string(128)
        self._pvd['data_preparer_identifier']      = self._unpack_string(128)
        self._pvd['application_identifier']        = self._unpack_string(128)
        self._pvd['copyright_file_identifier']     = self._unpack_string(38)
        self._pvd['abstract_file_identifier']      = self._unpack_string(36)
        self._pvd['bibliographic_file_identifier'] = self._unpack_string(37)
        self._pvd['volume_datetime_created']       = self._unpack_vd_datetime()
        self._pvd['volume_datetime_modified']      = self._unpack_vd_datetime()
        self._pvd['volume_datetime_expires']       = self._unpack_vd_datetime()
        self._pvd['volume_datetime_effective']     = self._unpack_vd_datetime()
        self._pvd['file_structure_version']        = self._unpack('B')

    ##
    ## Unpack a directory record (a listing of a file or folder)
    ##

    def _unpack_record(self):
        l0 = self._unpack('B')

        if l0 == 0: return 1, None

        l1 = self._unpack('B')

        d = dict()
        d['ex_loc']               = self._unpack_both('I')
        d['ex_len']               = self._unpack_both('I')
        d['datetime']             = self._unpack_dir_datetime()
        d['flags']                = self._unpack('B')
        d['interleave_unit_size'] = self._unpack('B')
        d['interleave_gap_size']  = self._unpack('B')
        d['volume_sequence']      = self._unpack_both('h')

        l2 = self._unpack('B')
        d['name'] = self._unpack_string(l2).split(';')[0]
        if l2 % 2 == 0:
            self._unpack('B')

        t = 34 + l2 - (l2 % 2)

        e = l0-t
        if e>0:
            extra = self._unpack_raw(e)

        return l0, d

    #Assuming d is a directory record, this generator yields its children
    def _unpack_dir_children(self, d):
        read = 0
        self._get_sector(d['ex_loc'], d['ex_len'])
        while read < d['ex_len']: #Iterate over files in the directory
            data, e = self._unpack_record()
            read += data

            if data == 1: #end of directory listing
                break
            if e['name'] not in '\x00\x01':
                yield e

    #Search for one child amongst the children
    def _search_dir_children(self, d, term):
        for e in self._unpack_dir_children(d):
            if e['name'] == term:
                return e

        raise ISO9660IOError(term)
    ##
    ## Datatypes
    ##

    def _unpack_raw(self, l):
        return self._buff.read(l)

    #both-endian
    def _unpack_both(self, st):
        a = self._unpack('<'+st)
        b = self._unpack('>'+st)
        assert a == b
        return a

    def _unpack_string(self, l):
        return self._buff.read(l).rstrip(' ')

    def _unpack(self, st):
        if st[0] not in ('<','>'):
            st = '<' + st
        d = struct.unpack(st, self._buff.read(struct.calcsize(st)))
        if len(st) == 2:
            return d[0]
        else:
            return d

    def _unpack_vd_datetime(self):
        return self._unpack_raw(17) #TODO

    def _unpack_dir_datetime(self):
        return self._unpack_raw(7) #TODO


if __name__ == '__main__':
    import sys
    if len(sys.argv) < 2:
        print "usage: python iso9660.py isourl [path]"
    else:
        iso_path = sys.argv[1]
        ret_path = sys.argv[2] if len(sys.argv) > 2 else None
        cd = ISO9660(iso_path)
        if ret_path:
            print cd.get_file(ret_path)
        else:
            for path in cd.tree():
                print path
            #print cd.get_file('md5sum.txt')

脚本几乎接近我想要的。它可以列出一个iso。例如： -

$ python ./iso9660.py aTinyCore-4.3.iso 
/
/BOOT
/BOOT/CORE.GZ
/BOOT/ISOLINUX
/BOOT/ISOLINUX/BOOT.CAT
/BOOT/ISOLINUX/BOOT.MSG
/BOOT/ISOLINUX/F2.
/BOOT/ISOLINUX/F3.
/CDE/XBASE.LST

但是，它不支持直接提取iso文件。相反，如果提供了驻留在iso中的文件路径，它会将所有数据/二进制文件写入stdout。

例如： -

$ python ./iso9660.py aTinyCore-4.3.iso /BOOT/ISOLINUX/ISOLINUX.CFG
display boot.msg
default tc

label tc
    kernel /boot/vmlinuz
    append initrd=/boot/core.gz quiet cde

F4 f4

我现在的问题是，是否可以将这些stdout写入文件（如果stdout内容是文件）/ data（如果stdout内容是数据）/ binary（如果stdout内容是二进制），那么内容可以提取iso？ python是否将文件和二进制文件的stdout相同？

高度赞赏任何领先的问题。

将数据从stdout写入文件/二进制/数据

0 个答案: