1 Star 0 Fork 0

个人收藏室 / r0capture

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
myhexdump.py 15.01 KB
一键复制 编辑 原始数据 按行查看 历史
# -*- coding: utf-8 -*-
# !/usr/bin/env python
# -*- coding: latin-1 -*-
# <-- removing this magic comment breaks Python 3.4 on Windows
"""
1. Dump binary data to the following text format:
00000000: 00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
00000010: 00 11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF .."3DUfw........
It is similar to the one used by:
Scapy
00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
00 11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF .."3DUfw........
Far Manager
000000000: 00 00 00 5B 68 65 78 64 ¦ 75 6D 70 5D 00 00 00 00 [hexdump]
000000010: 00 11 22 33 44 55 66 77 ¦ 88 99 AA BB CC DD EE FF ?"3DUfwˆ™ª»ÌÝîÿ
2. Restore binary data from the formats above as well
as from less exotic strings of raw hex
"""
__version__ = '3.3'
__author__ = 'anatoly techtonik <techtonik@gmail.com>'
__license__ = 'Public Domain'
__history__ = \
"""
3.3 (2015-01-22)
* accept input from sys.stdin if "-" is specified
for both dump and restore (issue #1)
* new normalize_py() helper to set sys.stdout to
binary mode on Windows
3.2 (2015-07-02)
* hexdump is now packaged as .zip on all platforms
(on Linux created archive was tar.gz)
* .zip is executable! try `python hexdump-3.2.zip`
* dump() now accepts configurable separator, patch
by Ian Land (PR #3)
3.1 (2014-10-20)
* implemented workaround against mysterious coding
issue with Python 3 (see revision 51302cf)
* fix Python 3 installs for systems where UTF-8 is
not default (Windows), thanks to George Schizas
(the problem was caused by reading of README.txt)
3.0 (2014-09-07)
* remove unused int2byte() helper
* add dehex(text) helper to convert hex string
to binary data
* add 'size' argument to dump() helper to specify
length of chunks
2.0 (2014-02-02)
* add --restore option to command line mode to get
binary data back from hex dump
* support saving test output with `--test logfile`
* restore() from hex strings without spaces
* restore() now raises TypeError if input data is
not string
* hexdump() and dumpgen() now don't return unicode
strings in Python 2.x when generator is requested
1.0 (2013-12-30)
* length of address is reduced from 10 to 8
* hexdump() got new 'result' keyword argument, it
can be either 'print', 'generator' or 'return'
* actual dumping logic is now in new dumpgen()
generator function
* new dump(binary) function that takes binary data
and returns string like "66 6F 72 6D 61 74"
* new genchunks(mixed, size) function that chunks
both sequences and file like objects
0.5 (2013-06-10)
* hexdump is now also a command line utility (no
restore yet)
0.4 (2013-06-09)
* fix installation with Python 3 for non English
versions of Windows, thanks to George Schizas
0.3 (2013-04-29)
* fully Python 3 compatible
0.2 (2013-04-28)
* restore() to recover binary data from a hex dump in
native, Far Manager and Scapy text formats (others
might work as well)
* restore() is Python 3 compatible
0.1 (2013-04-28)
* working hexdump() function for Python 2
"""
import binascii # binascii is required for Python 3
import sys
# --- constants
PY3K = sys.version_info >= (3, 0)
# --- workaround against Python consistency issues
def normalize_py():
''' Problem 001 - sys.stdout in Python is by default opened in
text mode, and writes to this stdout produce corrupted binary
data on Windows
python -c "import sys; sys.stdout.write('_\n_')" > file
python -c "print(repr(open('file', 'rb').read()))"
'''
if sys.platform == "win32":
# set sys.stdout to binary mode on Windows
import os, msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
# --- - chunking helpers
def chunks(seq, size):
'''Generator that cuts sequence (bytes, memoryview, etc.)
into chunks of given size. If `seq` length is not multiply
of `size`, the lengh of the last chunk returned will be
less than requested.
>>> list( chunks([1,2,3,4,5,6,7], 3) )
[[1, 2, 3], [4, 5, 6], [7]]
'''
d, m = divmod(len(seq), size)
for i in range(d):
yield seq[i * size:(i + 1) * size]
if m:
yield seq[d * size:]
def chunkread(f, size):
'''Generator that reads from file like object. May return less
data than requested on the last read.'''
c = f.read(size)
while len(c):
yield c
c = f.read(size)
def genchunks(mixed, size):
'''Generator to chunk binary sequences or file like objects.
The size of the last chunk returned may be less than
requested.'''
if hasattr(mixed, 'read'):
return chunkread(mixed, size)
else:
return chunks(mixed, size)
# --- - /chunking helpers
def dehex(hextext):
"""
Convert from hex string to binary data stripping
whitespaces from `hextext` if necessary.
"""
if PY3K:
return bytes.fromhex(hextext)
else:
hextext = "".join(hextext.split())
return hextext.decode('hex')
def dump(binary, size=2, sep=' '):
'''
Convert binary data (bytes in Python 3 and str in
Python 2) to hex string like '00 DE AD BE EF'.
`size` argument specifies length of text chunks
and `sep` sets chunk separator.
'''
hexstr = binascii.hexlify(binary)
if PY3K:
hexstr = hexstr.decode('ascii')
return sep.join(chunks(hexstr.upper(), size))
def dumpgen(data, only_str):
'''
Generator that produces strings:
'00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................'
'''
generator = genchunks(data, 16)
for addr, d in enumerate(generator):
line = ""
if not only_str:
# 00000000:
line = '%08X: ' % (addr * 16)
# 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
dumpstr = dump(d)
line += dumpstr[:8 * 3]
if len(d) > 8: # insert separator if needed
line += ' ' + dumpstr[8 * 3:]
# ................
# calculate indentation, which may be different for the last line
pad = 2
if len(d) < 16:
pad += 3 * (16 - len(d))
if len(d) <= 8:
pad += 1
line += ' ' * pad
for byte in d:
# printable ASCII range 0x20 to 0x7E
if not PY3K:
byte = ord(byte)
if 0x20 <= byte <= 0x7E:
line += chr(byte)
else:
line += '.'
yield line
def hexdump(data, result='print', only_str=False):
'''
Transform binary data to the hex dump text format:
00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
[x] data argument as a binary string
[x] data argument as a file like object
Returns result depending on the `result` argument:
'print' - prints line by line
'return' - returns single string
'generator' - returns generator that produces lines
'''
if PY3K and type(data) == str:
raise TypeError('Abstract unicode data (expected bytes sequence)')
gen = dumpgen(data, only_str=only_str)
if result == 'generator':
return gen
elif result == 'return':
return '\n'.join(gen)
elif result == 'print':
for line in gen:
print(line)
else:
raise ValueError('Unknown value of `result` argument')
def restore(dump):
'''
Restore binary data from a hex dump.
[x] dump argument as a string
[ ] dump argument as a line iterator
Supported formats:
[x] hexdump.hexdump
[x] Scapy
[x] Far Manager
'''
minhexwidth = 2 * 16 # minimal width of the hex part - 00000... style
bytehexwidth = 3 * 16 - 1 # min width for a bytewise dump - 00 00 ... style
result = bytes() if PY3K else ''
if type(dump) != str:
raise TypeError('Invalid data for restore')
text = dump.strip() # ignore surrounding empty lines
for line in text.split('\n'):
# strip address part
addrend = line.find(':')
if 0 < addrend < minhexwidth: # : is not in ascii part
line = line[addrend + 1:]
line = line.lstrip()
# check dump type
if line[2] == ' ': # 00 00 00 ... type of dump
# check separator
sepstart = (2 + 1) * 7 + 2 # ('00'+' ')*7+'00'
sep = line[sepstart:sepstart + 3]
if sep[:2] == ' ' and sep[2:] != ' ': # ...00 00 00 00...
hexdata = line[:bytehexwidth + 1]
elif sep[2:] == ' ': # ...00 00 | 00 00... - Far Manager
hexdata = line[:sepstart] + line[sepstart + 3:bytehexwidth + 2]
else: # ...00 00 00 00... - Scapy, no separator
hexdata = line[:bytehexwidth]
line = hexdata
result += dehex(line)
return result
def runtest(logfile=None):
'''Run hexdump tests. Requires hexfile.bin to be in the same
directory as hexdump.py itself'''
class TeeOutput(object):
def __init__(self, stream1, stream2):
self.outputs = [stream1, stream2]
# -- methods from sys.stdout / sys.stderr
def write(self, data):
for stream in self.outputs:
if PY3K:
if 'b' in stream.mode:
data = data.encode('utf-8')
stream.write(data)
stream.flush()
def tell(self):
raise IOError
def flush(self):
for stream in self.outputs:
stream.flush()
# --/ sys.stdout
if logfile:
openlog = open(logfile, 'wb')
# copy stdout and stderr streams to log file
savedstd = sys.stderr, sys.stdout
sys.stderr = TeeOutput(sys.stderr, openlog)
sys.stdout = TeeOutput(sys.stdout, openlog)
def echo(msg, linefeed=True):
sys.stdout.write(msg)
if linefeed:
sys.stdout.write('\n')
expected = '''\
00000000: 00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
00000010: 00 11 22 33 44 55 66 77 88 99 0A BB CC DD EE FF .."3DUfw........\
'''
# get path to hexfile.bin
# this doesn't work from .zip
# import os.path as osp
# hexfile = osp.dirname(osp.abspath(__file__)) + '/hexfile.bin'
# this doesn't work either
# hexfile = osp.dirname(sys.modules[__name__].__file__) + '/hexfile.bin'
# this works
import pkgutil
bin = pkgutil.get_data('hexdump', 'data/hexfile.bin')
# varios length of input data
hexdump(b'zzzz' * 12)
hexdump(b'o' * 17)
hexdump(b'p' * 24)
hexdump(b'q' * 26)
# allowable character set filter
hexdump(b'line\nfeed\r\ntest')
hexdump(b'\x00\x00\x00\x5B\x68\x65\x78\x64\x75\x6D\x70\x5D\x00\x00\x00\x00'
b'\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\x0A\xBB\xCC\xDD\xEE\xFF')
print('---')
# dumping file-like binary object to screen (default behavior)
hexdump(bin)
print('return output')
hexout = hexdump(bin, result='return')
assert hexout == expected, 'returned hex didn\'t match'
print('return generator')
hexgen = hexdump(bin, result='generator')
assert next(hexgen) == expected.split('\n')[0], 'hex generator 1 didn\'t match'
assert next(hexgen) == expected.split('\n')[1], 'hex generator 2 didn\'t match'
# binary restore test
bindata = restore(
'''
00000000: 00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
00000010: 00 11 22 33 44 55 66 77 88 99 0A BB CC DD EE FF .."3DUfw........
''')
echo('restore check ', linefeed=False)
assert bin == bindata, 'restore check failed'
echo('passed')
far = \
'''
000000000: 00 00 00 5B 68 65 78 64 ¦ 75 6D 70 5D 00 00 00 00 [hexdump]
000000010: 00 11 22 33 44 55 66 77 ¦ 88 99 0A BB CC DD EE FF ?"3DUfwˆ™ª»ÌÝîÿ
'''
echo('restore far format ', linefeed=False)
assert bin == restore(far), 'far format check failed'
echo('passed')
scapy = '''\
00 00 00 5B 68 65 78 64 75 6D 70 5D 00 00 00 00 ...[hexdump]....
00 11 22 33 44 55 66 77 88 99 0A BB CC DD EE FF .."3DUfw........
'''
echo('restore scapy format ', linefeed=False)
assert bin == restore(scapy), 'scapy format check failed'
echo('passed')
if not PY3K:
assert restore('5B68657864756D705D') == '[hexdump]', 'no space check failed'
assert dump('\\\xa1\xab\x1e', sep='').lower() == '5ca1ab1e'
else:
assert restore('5B68657864756D705D') == b'[hexdump]', 'no space check failed'
assert dump(b'\\\xa1\xab\x1e', sep='').lower() == '5ca1ab1e'
print('---[test file hexdumping]---')
import os
import tempfile
hexfile = tempfile.NamedTemporaryFile(delete=False)
try:
hexfile.write(bin)
hexfile.close()
hexdump(open(hexfile.name, 'rb'))
finally:
os.remove(hexfile.name)
if logfile:
sys.stderr, sys.stdout = savedstd
openlog.close()
def main():
from optparse import OptionParser
parser = OptionParser(usage='''
%prog [binfile|-]
%prog -r hexfile
%prog --test [logfile]''', version=__version__)
parser.add_option('-r', '--restore', action='store_true',
help='restore binary from hex dump')
parser.add_option('--test', action='store_true', help='run hexdump sanity checks')
options, args = parser.parse_args()
if options.test:
if args:
runtest(logfile=args[0])
else:
runtest()
elif not args or len(args) > 1:
parser.print_help()
sys.exit(-1)
else:
## dump file
if not options.restore:
# [x] memory effective dump
if args[0] == '-':
if not PY3K:
hexdump(sys.stdin)
else:
hexdump(sys.stdin.buffer)
else:
hexdump(open(args[0], 'rb'))
## restore file
else:
# prepare input stream
if args[0] == '-':
instream = sys.stdin
else:
if PY3K:
instream = open(args[0])
else:
instream = open(args[0], 'rb')
# output stream
# [ ] memory efficient restore
if PY3K:
sys.stdout.buffer.write(restore(instream.read()))
else:
# Windows - binary mode for sys.stdout to prevent data corruption
normalize_py()
sys.stdout.write(restore(instream.read()))
if __name__ == '__main__':
main()
# [x] file restore from command line utility
# [ ] write dump with LF on Windows for consistency
# [ ] encoding param for hexdump()ing Python 3 str if anybody requests that
# [ ] document chunking API
# [ ] document hexdump API
# [ ] blog about sys.stdout text mode problem on Windows
1
https://gitee.com/software-reverse-engineering/r0capture.git
git@gitee.com:software-reverse-engineering/r0capture.git
software-reverse-engineering
r0capture
r0capture
main

搜索帮助