aboutsummaryrefslogtreecommitdiff
path: root/devtools/tasmrecover/tasm
diff options
context:
space:
mode:
authorVladimir2011-06-05 15:05:12 +0400
committerAlyssa Milburn2011-06-15 17:29:44 +0200
commit9cf2a7ba0e686b35a4f1041f67c51cb9ef2fa867 (patch)
tree6d7d41f4443878cb323717b2ef0a0ee246f1e40e /devtools/tasmrecover/tasm
parent1f063c947b34ba9e56fabcbdf838094da80b3727 (diff)
downloadscummvm-rg350-9cf2a7ba0e686b35a4f1041f67c51cb9ef2fa867.tar.gz
scummvm-rg350-9cf2a7ba0e686b35a4f1041f67c51cb9ef2fa867.tar.bz2
scummvm-rg350-9cf2a7ba0e686b35a4f1041f67c51cb9ef2fa867.zip
DREAMWEB: added tasm-recover tool
Diffstat (limited to 'devtools/tasmrecover/tasm')
-rw-r--r--devtools/tasmrecover/tasm/__init__.py0
-rw-r--r--devtools/tasmrecover/tasm/__init__.pycbin0 -> 137 bytes
-rw-r--r--devtools/tasmrecover/tasm/cpp.py544
-rw-r--r--devtools/tasmrecover/tasm/cpp.pycbin0 -> 22205 bytes
-rw-r--r--devtools/tasmrecover/tasm/lex.py52
-rw-r--r--devtools/tasmrecover/tasm/lex.pycbin0 -> 1058 bytes
-rw-r--r--devtools/tasmrecover/tasm/op.py394
-rw-r--r--devtools/tasmrecover/tasm/op.pycbin0 -> 32291 bytes
-rw-r--r--devtools/tasmrecover/tasm/parser.py248
-rw-r--r--devtools/tasmrecover/tasm/parser.pycbin0 -> 8391 bytes
-rw-r--r--devtools/tasmrecover/tasm/proc.py84
-rw-r--r--devtools/tasmrecover/tasm/proc.pycbin0 -> 3100 bytes
12 files changed, 1322 insertions, 0 deletions
diff --git a/devtools/tasmrecover/tasm/__init__.py b/devtools/tasmrecover/tasm/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/devtools/tasmrecover/tasm/__init__.py
diff --git a/devtools/tasmrecover/tasm/__init__.pyc b/devtools/tasmrecover/tasm/__init__.pyc
new file mode 100644
index 0000000000..8582b7b592
--- /dev/null
+++ b/devtools/tasmrecover/tasm/__init__.pyc
Binary files differ
diff --git a/devtools/tasmrecover/tasm/cpp.py b/devtools/tasmrecover/tasm/cpp.py
new file mode 100644
index 0000000000..53338c1d02
--- /dev/null
+++ b/devtools/tasmrecover/tasm/cpp.py
@@ -0,0 +1,544 @@
+import op, traceback, re, proc
+from copy import copy
+proc_module = proc
+
+class CrossJump(Exception):
+ pass
+
+def parse_bin(s):
+ b = s.group(1)
+ v = hex(int(b, 2))
+ #print "BINARY: %s -> %s" %(b, v)
+ return v
+
+class cpp:
+ def __init__(self, context, namespace, skip_first = 0, blacklist = []):
+ self.namespace = namespace
+ fname = namespace + ".cpp"
+ header = namespace + ".h"
+ self.fd = open(fname, "wt")
+ self.hd = open(header, "wt")
+ self.context = context
+ self.data_seg = context.binary_data
+ self.procs = context.proc_list
+ self.skip_first = skip_first
+ self.proc_queue = []
+ self.proc_done = []
+ self.blacklist = blacklist
+ self.failed = list(blacklist)
+ self.translated = []
+ self.proc_addr = []
+ self.forwards = []
+ self.fd.write("""#include \"%s\"
+
+namespace %s {
+
+""" %(header, namespace))
+
+ def expand_cb(self, match):
+ name = match.group(0).lower()
+ if len(name) == 2 and \
+ ((name[0] in ['a', 'b', 'c', 'd'] and name[1] in ['h', 'x', 'l']) or name in ['si', 'di', 'es', 'ds', 'cs']):
+ return "context.%s" %name
+
+ if self.indirection == -1:
+ try:
+ offset,p,p = self.context.get_offset(name)
+ print "OFFSET = %d" %offset
+ self.indirection = 0
+ return str(offset)
+ except:
+ pass
+
+ g = self.context.get_global(name)
+ if isinstance(g, op.const):
+ value = self.expand(g.value)
+ elif isinstance(g, proc.proc):
+ if self.indirection != -1:
+ raise Exception("invalid proc label usage")
+ value = str(g.offset)
+ self.indirection = 0
+ else:
+ size = g.size
+ if size == 0:
+ raise Exception("invalid var '%s' size %u" %(name, size))
+ if self.indirection == 0:
+ value = "context.data.%s(%d)" %("byte" if size == 1 else "word", g.offset)
+ elif self.indirection == -1:
+ value = "%s" %g.offset
+ self.indirection = 0
+ else:
+ raise Exception("invalid indirection %d" %self.indirection)
+ return value
+
+ def get_size(self, expr):
+ #print 'get_size("%s")' %expr
+ try:
+ v = self.context.parse_int(expr)
+ return 1 if v < 256 else 2
+ except:
+ pass
+
+ if re.match(r'byte\s+ptr\s', expr) is not None:
+ return 1
+
+ if re.match(r'word\s+ptr\s', expr) is not None:
+ return 2
+
+ if len(expr) == 2 and expr[0] in ['a', 'b', 'c', 'd'] and expr[1] in ['h', 'l']:
+ return 1
+ if expr in ['ax', 'bx', 'cx', 'dx', 'si', 'di', 'sp', 'bp', 'ds', 'cs', 'es', 'fs']:
+ return 2
+
+ m = re.match(r'[a-zA-Z_]\w*', expr)
+ if m is not None:
+ name = m.group(0)
+ try:
+ g = self.context.get_global(name)
+ return g.size
+ except:
+ pass
+
+ return 0
+
+ def expand(self, expr, def_size = 0):
+ #print "EXPAND \"%s\"" %expr
+ size = self.get_size(expr) if def_size == 0 else def_size
+ indirection = 0
+ seg = None
+ reg = True
+
+ m = re.match(r'seg\s+(.*?)$', expr)
+ if m is not None:
+ return "context.ds"
+
+ match_id = True
+ m = re.match(r'offset\s+(.*?)$', expr)
+ if m is not None:
+ indirection -= 1
+ expr = m.group(1).strip()
+
+ m = re.match(r'byte\s+ptr\s+(.*?)$', expr)
+ if m is not None:
+ expr = m.group(1).strip()
+
+ m = re.match(r'word\s+ptr\s+(.*?)$', expr)
+ if m is not None:
+ expr = m.group(1).strip()
+
+ m = re.match(r'\[(.*)\]$', expr)
+ if m is not None:
+ indirection += 1
+ expr = m.group(1).strip()
+
+ m = re.match(r'(\w{2,2}):(.*)$', expr)
+ if m is not None:
+ seg = m.group(1)
+ expr = m.group(2).strip()
+ print "SEGMENT %s, remains: %s" %(seg, expr)
+
+ m = re.match(r'(([abcd][xhl])|si|di|bp|sp)([\+-].*)?$', expr)
+ if m is not None:
+ reg = m.group(1)
+ plus = m.group(3)
+ if plus is not None:
+ plus = self.expand(plus)
+ else:
+ plus = ""
+ match_id = False
+ #print "COMMON_REG: ", reg, plus
+ expr = "context.%s%s" %(reg, plus)
+
+ expr = re.sub(r'\b([0-9][a-fA-F0-9]*)h', '0x\\1', expr)
+ expr = re.sub(r'\b([0-1]+)b', parse_bin, expr)
+ expr = re.sub(r'"(.)"', '\'\\1\'', expr)
+ if match_id:
+ #print "BEFORE: %d" %indirection
+ self.indirection = indirection
+ expr = re.sub(r'\b[a-zA-Z_][a-zA-Z0-9_]+\b', self.expand_cb, expr)
+ indirection = self.indirection
+ #print "AFTER: %d" %indirection
+
+ if indirection == 1:
+ if size == 1:
+ expr = "context.data.byte(%s)" %expr
+ elif size == 2:
+ expr = "context.data.word(%s)" %expr
+ else:
+ expr = "@invalid size 0"
+ elif indirection == 0:
+ pass
+ elif indirection == -1:
+ expr = "&%s" %expr
+ else:
+ raise Exception("invalid indirection %d" %indirection)
+ return expr
+
+ def mangle_label(self, name):
+ name = name.lower()
+ return re.sub(r'\$', '_tmp', name)
+
+ def resolve_label(self, name):
+ name = name.lower()
+ if not name in self.proc.labels:
+ try:
+ offset, proc, pos = self.context.get_offset(name)
+ except:
+ print "no label %s, trying procedure" %name
+ proc = self.context.get_global(name)
+ pos = 0
+ if not isinstance(proc, proc_module.proc):
+ raise CrossJump("cross-procedure jump to non label and non procedure %s" %(name))
+ self.proc.labels.add(name)
+ for i in xrange(0, len(self.unbounded)):
+ u = self.unbounded[i]
+ if u[1] == proc:
+ if pos < u[2]:
+ self.unbounded[i] = (name, proc, pos)
+ return self.mangle_label(name)
+ self.unbounded.append((name, proc, pos))
+
+ return self.mangle_label(name)
+
+ def jump_to_label(self, name):
+ jump_proc = False
+ if name in self.blacklist:
+ jump_proc = True
+
+ if self.context.has_global(name) :
+ g = self.context.get_global(name)
+ if isinstance(g, proc_module.proc):
+ jump_proc = True
+
+ if jump_proc:
+ self.add_forward(name)
+ return "{ %s(context); return; }" %name
+ else:
+ return "goto %s" %self.resolve_label(name)
+
+ def _label(self, name):
+ self.body += "%s:\n" %self.mangle_label(name)
+
+ def schedule(self, name):
+ name = name.lower()
+ if name in self.proc_queue or name in self.proc_done or name in self.failed:
+ return
+ print "+scheduling function %s..." %name
+ self.proc_queue.append(name)
+
+ def add_forward(self, name):
+ if name not in self.forwards and name not in self.failed:
+ self.forwards.append(name)
+
+ def _call(self, name):
+ name = name.lower()
+ if name == 'ax':
+ self.body += "\t__dispatch_call(context, %s);\n" %self.expand('ax', 2)
+ return
+ self.body += "\t%s(context);\n" %name
+ self.add_forward(name);
+ self.schedule(name)
+
+ def _ret(self):
+ self.body += "\treturn;\n"
+
+ def parse2(self, dst, src):
+ dst_size, src_size = self.get_size(dst), self.get_size(src)
+ if dst_size == 0:
+ if src_size == 0:
+ raise Exception("both sizes are 0")
+ dst_size = src_size
+ if src_size == 0:
+ src_size = dst_size
+
+ dst = self.expand(dst, dst_size)
+ src = self.expand(src, src_size)
+ return dst, src
+
+ def _mov(self, dst, src):
+ self.body += "\t%s = %s;\n" %self.parse2(dst, src)
+
+ def _add(self, dst, src):
+ self.body += "\tcontext._add(%s, %s);\n" %self.parse2(dst, src)
+
+ def _sub(self, dst, src):
+ self.body += "\tcontext._sub(%s, %s);\n" %self.parse2(dst, src)
+
+ def _and(self, dst, src):
+ self.body += "\tcontext._and(%s, %s);\n" %self.parse2(dst, src)
+
+ def _or(self, dst, src):
+ self.body += "\tcontext._or(%s, %s);\n" %self.parse2(dst, src)
+
+ def _xor(self, dst, src):
+ self.body += "\tcontext._xor(%s, %s);\n" %self.parse2(dst, src)
+
+ def _neg(self, dst):
+ dst = self.expand(dst)
+ self.body += "\tcontext._neg(%s);\n" %(dst)
+
+ def _cbw(self):
+ self.body += "\tcontext.ax.cbw();\n"
+
+ def _shr(self, dst, src):
+ self.body += "\tcontext._shr(%s, %s);\n" %self.parse2(dst, src)
+
+ def _shl(self, dst, src):
+ self.body += "\tcontext._shl(%s, %s);\n" %self.parse2(dst, src)
+
+ def _sar(self, dst, src):
+ self.body += "\tcontext._sar(%s%s);\n" %self.parse2(dst, src)
+
+ def _sal(self, dst, src):
+ self.body += "\tcontext._sal(%s, %s);\n" %self.parse2(dst, src)
+
+ def _rcl(self, dst, src):
+ self.body += "\tcontext._rcl(%s, %s);\n" %self.parse2(dst, src)
+
+ def _rcr(self, dst, src):
+ self.body += "\tcontext._rcr(%s, %s);\n" %self.parse2(dst, src)
+
+ def _mul(self, src):
+ src = self.expand(src)
+ self.body += "\tcontext._mul(%s);\n" %(src)
+
+ def _div(self, src):
+ src = self.expand(src)
+ self.body += "\tcontext._div(%s);\n" %(src)
+
+ def _inc(self, dst):
+ dst = self.expand(dst)
+ self.body += "\tcontext._add(%s, 1);\n" %(dst)
+
+ def _dec(self, dst):
+ dst = self.expand(dst)
+ self.body += "\tcontext._sub(%s, 1);\n" %(dst)
+
+ def _cmp(self, a, b):
+ self.body += "\tcontext._cmp(%s, %s);\n" %self.parse2(a, b)
+
+ def _test(self, a, b):
+ self.body += "\tcontext._test(%s, %s);\n" %self.parse2(a, b)
+
+ def _js(self, label):
+ self.body += "\tif (context.flags.s()) %s;\n" %(self.jump_to_label(label))
+
+ def _jns(self, label):
+ self.body += "\tif (!context.flags.s()) %s;\n" %(self.jump_to_label(label))
+
+ def _jz(self, label):
+ self.body += "\tif (context.flags.z()) %s;\n" %(self.jump_to_label(label))
+
+ def _jnz(self, label):
+ self.body += "\tif (!context.flags.z()) %s;\n" %(self.jump_to_label(label))
+
+ def _jl(self, label):
+ self.body += "\tif (context.flags.l()) %s;\n" %(self.jump_to_label(label))
+
+ def _jg(self, label):
+ self.body += "\tif (!context.flags.g()) %s;\n" %(self.jump_to_label(label))
+
+ def _jle(self, label):
+ self.body += "\tif (context.flags.le()) %s;\n" %(self.jump_to_label(label))
+
+ def _jge(self, label):
+ self.body += "\tif (!context.flags.ge()) %s;\n" %(self.jump_to_label(label))
+
+ def _jc(self, label):
+ self.body += "\tif (context.flags.c()) %s;\n" %(self.jump_to_label(label))
+
+ def _jnc(self, label):
+ self.body += "\tif (!context.flags.c()) %s;\n" %(self.jump_to_label(label))
+
+ def _xchg(self, dst, src):
+ self.body += "\tcontext._xchg(%s, %s);\n" %self.parse2(dst, src)
+
+ def _jmp(self, label):
+ self.body += "\t%s;\n" %(self.jump_to_label(label))
+
+ def _loop(self, label):
+ self.body += "\tif (--context.cx) %s;\n" %self.jump_to_label(label)
+
+ def _push(self, regs):
+ p = str();
+ for r in regs:
+ r = self.expand(r)
+ p += "\tcontext.push(%s);\n" %(r)
+ self.body += p
+
+ def _pop(self, regs):
+ p = str();
+ for r in regs:
+ self.temps_count -= 1
+ i = self.temps_count
+ r = self.expand(r)
+ p += "\t%s = context.pop();\n" %r
+ self.body += p
+
+ def _rep(self):
+ self.body += "\twhile(--context.cx) ";
+
+ def _lodsb(self):
+ self.body += "\tcontext._lodsb();\n";
+
+ def _lodsw(self):
+ self.body += "\tcontext._lodsw();\n";
+
+ def _stosb(self):
+ self.body += "\tcontext._stosb();\n";
+
+ def _stosw(self):
+ self.body += "\tcontext._stosw();\n";
+
+ def _movsb(self):
+ self.body += "\tcontext._movsb();\n ";
+
+ def _movsw(self):
+ self.body += "\tcontext._movsw();\n ";
+
+ def __proc(self, name, def_skip = 0):
+ try:
+ skip = def_skip
+ self.temps_count = 0
+ self.temps_max = 0
+ if self.context.has_global(name):
+ self.proc = self.context.get_global(name)
+ else:
+ print "No procedure named %s, trying label" %name
+ off, src_proc, skip = self.context.get_offset(name)
+
+ self.proc = proc_module.proc(name)
+ self.proc.stmts = copy(src_proc.stmts)
+ self.proc.labels = copy(src_proc.labels)
+ #for p in xrange(skip, len(self.proc.stmts)):
+ # s = self.proc.stmts[p]
+ # if isinstance(s, op.basejmp):
+ # o, p, s = self.context.get_offset(s.label)
+ # if p == src_proc and s < skip:
+ # skip = s
+
+
+ self.proc_addr.append((name, self.proc.offset))
+ self.body = str()
+ self.body += "static void %s(Context & context) {\n" %name;
+ self.proc.optimize()
+ self.unbounded = []
+ self.proc.visit(self, skip)
+
+ #adding remaining labels:
+ for i in xrange(0, len(self.unbounded)):
+ u = self.unbounded[i]
+ print "UNBOUNDED: ", u
+ proc = u[1]
+ for p in xrange(u[2], len(proc.stmts)):
+ s = proc.stmts[p]
+ if isinstance(s, op.basejmp):
+ self.resolve_label(s.label)
+
+ #adding statements
+ for label, proc, offset in self.unbounded:
+ self.body += "/*continuing to unbounded code: %s from %s:%d-%d*/\n" %(label, proc.name, offset, len(proc.stmts))
+ start = len(self.proc.stmts)
+ self.proc.add_label(label)
+ for s in proc.stmts[offset:]:
+ if isinstance(s, op.label):
+ self.proc.labels.add(s.name)
+ self.proc.stmts.append(s)
+ self.proc.add("ret")
+ print "skipping %d instructions, todo: %d" %(start, len(self.proc.stmts) - start)
+ self.proc.visit(self, start)
+ self.body += "}\n";
+ self.translated.insert(0, self.body)
+ self.proc = None
+ if self.temps_count > 0:
+ raise Exception("temps count == %d at the exit of proc" %self.temps_count);
+ return True
+ except (CrossJump, op.Unsupported) as e:
+ print "%s: ERROR: %s" %(name, e)
+ self.failed.append(name)
+ except:
+ raise
+
+ def get_type(self, width):
+ return "uint%d_t" %(width * 8)
+
+ def write_stubs(self, fname, procs):
+ fd = open(fname, "wt")
+ fd.write("namespace %s {\n" %self.namespace)
+ for p in procs:
+ fd.write("void %s(Context &context) {\n\t::error(\"%s\");\n}\n\n" %(p, p))
+ fd.write("} /*namespace %s */\n" %self.namespace)
+ fd.close()
+
+
+ def generate(self, start):
+ #print self.prologue()
+ #print context
+ self.proc_queue.append(start)
+ while len(self.proc_queue):
+ name = self.proc_queue.pop()
+ if name in self.failed or name in self.proc_done:
+ continue
+ if len(self.proc_queue) == 0 and len(self.procs) > 0:
+ print "queue's empty, adding remaining procs:"
+ for p in self.procs:
+ self.schedule(p)
+ self.procs = []
+ print "continuing on %s" %name
+ self.proc_done.append(name)
+ self.__proc(name)
+ self.write_stubs("_stubs.cpp", self.failed)
+ done, failed = len(self.proc_done), len(self.failed)
+ for f in self.forwards:
+ if f not in self.failed:
+ self.fd.write("static void %s(Context &context);\n" %f)
+ self.fd.write("\n")
+ self.fd.write("\n".join(self.translated))
+ self.fd.write("\n\n")
+ print "%d ok, %d failed of %d, %.02g%% translated" %(done, failed, done + failed, 100.0 * done / (done + failed))
+ print "\n".join(self.failed)
+ data_decl = "struct Data : public Segment {\n\t\tData();\n"
+ data_impl = "Data::Data() {\n"
+ data_bin = self.data_seg
+ data_impl += "\tstatic const uint8 src[] = {\n\t\t"
+ n = 0
+ for v in data_bin:
+ data_impl += "%s, " %v
+ n += 1
+ if (n & 0xf) == 0:
+ data_impl += "\n\t\t"
+ data_impl += "};\ndata.assign(src, src + sizeof(src));\n"
+ hid = "TASMRECOVER_%s_STUBS_H__" %self.namespace.upper()
+ data_decl += "\t};\n\n"
+ data_impl += "\t};\n\n"
+ self.hd.write("""#ifndef %s
+#define %s
+""" %(hid, hid))
+ self.hd.write(
+"""\n#\tinclude "runtime.h"
+
+namespace %s {
+
+ %s
+ typedef RegisterContext<Data> Context;
+
+ void __dispatch_call(Context &context, unsigned addr);
+ void __start(Context &context);
+
+""" %(self.namespace, data_decl))
+ self.fd.write(data_impl)
+
+ for f in self.failed:
+ self.hd.write("\tvoid %s(Context &context);\n" %f)
+ self.hd.write("\n}\n\n#endif\n")
+ self.hd.close()
+
+ self.fd.write("\nvoid __start(Context &context) { %s(context); }\n" %start)
+
+ self.fd.write("\nvoid __dispatch_call(Context &context, unsigned addr) {\n\tswitch(addr) {\n")
+ self.proc_addr.sort(cmp = lambda x, y: x[1] - y[1])
+ for name,addr in self.proc_addr:
+ self.fd.write("\t\tcase 0x%04x: %s(context); break;\n" %(addr, name))
+ self.fd.write("\n\t}\n}\n\n} /*namespace*/\n")
+
+ self.fd.close()
diff --git a/devtools/tasmrecover/tasm/cpp.pyc b/devtools/tasmrecover/tasm/cpp.pyc
new file mode 100644
index 0000000000..b0406b3252
--- /dev/null
+++ b/devtools/tasmrecover/tasm/cpp.pyc
Binary files differ
diff --git a/devtools/tasmrecover/tasm/lex.py b/devtools/tasmrecover/tasm/lex.py
new file mode 100644
index 0000000000..ba66611bce
--- /dev/null
+++ b/devtools/tasmrecover/tasm/lex.py
@@ -0,0 +1,52 @@
+def parse_args(text):
+ #print "parsing: [%s]" %text
+ escape = False
+ string = False
+ result = []
+ token = str()
+ value = 0;
+ for c in text:
+ #print "[%s]%s: %s: %s" %(token, c, escape, string)
+ if c == '\\':
+ escape = True
+ continue
+
+ if escape:
+ if not string:
+ raise SyntaxError("escape found in no string: %s" %text);
+
+ #print "escaping[%s]" %c
+ escape = False
+ token += c
+ continue
+
+ if string:
+ if c == '\'' or c == '"':
+ string = False
+
+ token += c
+ continue
+
+ if c == '\'' or c == '"':
+ string = True
+ token += c
+ continue
+
+ if c == ',':
+ result.append(token.strip())
+ token = str()
+ continue
+
+ if c == ';': #comment, bailing out
+ break
+
+ token += c
+ token = token.strip()
+ if len(token):
+ result.append(token)
+ #print result
+ return result
+
+def compile(width, data):
+ print data
+ return data
diff --git a/devtools/tasmrecover/tasm/lex.pyc b/devtools/tasmrecover/tasm/lex.pyc
new file mode 100644
index 0000000000..aaab26e8cf
--- /dev/null
+++ b/devtools/tasmrecover/tasm/lex.pyc
Binary files differ
diff --git a/devtools/tasmrecover/tasm/op.py b/devtools/tasmrecover/tasm/op.py
new file mode 100644
index 0000000000..c77eda2b7e
--- /dev/null
+++ b/devtools/tasmrecover/tasm/op.py
@@ -0,0 +1,394 @@
+import re
+import lex
+
+class Unsupported(Exception):
+ pass
+
+class var:
+ def __init__(self, size, offset):
+ self.size = size
+ self.offset = offset
+
+class const:
+ def __init__(self, value):
+ self.value = value
+
+class reg:
+ def __init__(self, name):
+ self.name = name
+ def size(self):
+ return 2 if self.name[1] == 'x' else 1
+ def __str__(self):
+ return "<register %s>" %self.name
+
+class unref:
+ def __init__(self, exp):
+ self.exp = exp
+ def __str__(self):
+ return "<unref %s>" %self.exp
+
+class ref:
+ def __init__(self, name):
+ self.name = name
+ def __str__(self):
+ return "<ref %s>" %self.name
+
+class glob:
+ def __init__(self, name):
+ self.name = name
+ def __str__(self):
+ return "<global %s>" %self.name
+
+class segment:
+ def __init__(self, name):
+ self.name = name
+ def __str__(self):
+ return "<segment %s>" %self.name
+
+class baseop(object):
+ def parse_arg(self, arg):
+ return arg
+
+ def split(self, text):
+ a, b = lex.parse_args(text)
+ return self.parse_arg(a), self.parse_arg(b)
+ def __str__(self):
+ return str(self.__class__)
+
+class basejmp(baseop):
+ pass
+
+class _call(baseop):
+ def __init__(self, arg):
+ self.name = arg
+ def visit(self, visitor):
+ visitor._call(self.name)
+ def __str__(self):
+ return "call(%s)" %self.name
+
+class _rep(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ visitor._rep()
+
+class _mov(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._mov(self.dst, self.src)
+ def __str__(self):
+ return "mov(%s, %s)" %(self.dst, self.src)
+
+class _mov2(baseop):
+ def __init__(self, dst, src):
+ self.dst, self.src = dst, src
+ def visit(self, visitor):
+ visitor._mov(self.dst, self.src)
+
+class _shr(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._shr(self.dst, self.src)
+
+class _shl(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._shl(self.dst, self.src)
+
+class _ror(baseop):
+ def __init__(self, arg):
+ pass
+
+class _rol(baseop):
+ def __init__(self, arg):
+ pass
+
+class _sar(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._sar(self.dst, self.src)
+
+class _sal(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._sal(self.dst, self.src)
+
+class _rcl(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._rcl(self.dst, self.src)
+
+class _rcr(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._rcr(self.dst, self.src)
+
+class _neg(baseop):
+ def __init__(self, arg):
+ self.arg = arg
+ def visit(self, visitor):
+ visitor._neg(self.arg)
+
+class _dec(baseop):
+ def __init__(self, arg):
+ self.dst = arg
+ def visit(self, visitor):
+ visitor._dec(self.dst)
+
+class _inc(baseop):
+ def __init__(self, arg):
+ self.dst = arg
+ def visit(self, visitor):
+ visitor._inc(self.dst)
+
+class _add(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._add(self.dst, self.src)
+
+class _sub(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._sub(self.dst, self.src)
+
+class _mul(baseop):
+ def __init__(self, arg):
+ self.arg = self.parse_arg(arg)
+ def visit(self, visitor):
+ visitor._mul(self.arg)
+
+class _div(baseop):
+ def __init__(self, arg):
+ self.arg = self.parse_arg(arg)
+ def visit(self, visitor):
+ visitor._div(self.arg)
+
+class _and(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._and(self.dst, self.src)
+
+class _xor(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._xor(self.dst, self.src)
+
+class _or(baseop):
+ def __init__(self, arg):
+ self.dst, self.src = self.split(arg)
+ def visit(self, visitor):
+ visitor._or(self.dst, self.src)
+
+class _cmp(baseop):
+ def __init__(self, arg):
+ self.a, self.b = self.split(arg)
+ def visit(self, visitor):
+ visitor._cmp(self.a, self.b)
+
+class _test(baseop):
+ def __init__(self, arg):
+ self.a, self.b = self.split(arg)
+ def visit(self, visitor):
+ visitor._test(self.a, self.b)
+
+class _xchg(baseop):
+ def __init__(self, arg):
+ self.a, self.b = self.split(arg)
+ def visit(self, visitor):
+ visitor._xchg(self.a, self.b)
+
+class _jnz(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jnz(self.label)
+
+class _jz(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jz(self.label)
+
+class _jc(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jc(self.label)
+
+class _jnc(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jnc(self.label)
+
+class _js(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._js(self.label)
+
+class _jns(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jns(self.label)
+
+class _jl(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jl(self.label)
+
+class _jg(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jg(self.label)
+
+class _jle(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jle(self.label)
+
+class _jge(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jge(self.label)
+
+class _jmp(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._jmp(self.label)
+
+class _loop(basejmp):
+ def __init__(self, label):
+ self.label = label
+ def visit(self, visitor):
+ visitor._loop(self.label)
+
+class _push(baseop):
+ def __init__(self, arg):
+ self.regs = []
+ for r in arg.split():
+ self.regs.append(self.parse_arg(r))
+ def visit(self, visitor):
+ visitor._push(self.regs)
+
+class _pop(baseop):
+ def __init__(self, arg):
+ self.regs = []
+ for r in arg.split():
+ self.regs.append(self.parse_arg(r))
+ def visit(self, visitor):
+ visitor._pop(self.regs)
+
+class _ret(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ visitor._ret()
+
+class _lodsb(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ visitor._lodsb()
+
+class _lodsw(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ visitor._lodsw()
+
+class _stosw(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ visitor._stosw()
+
+class _stosb(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ visitor._stosb()
+
+class _movsw(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ visitor._movsw()
+
+class _movsb(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ visitor._movsb()
+
+class _in(baseop):
+ def __init__(self, arg):
+ self.arg = arg
+ def visit(self, visitor):
+ raise Unsupported("input from port: %s" %self.arg)
+
+class _out(baseop):
+ def __init__(self, arg):
+ self.arg = arg
+ def visit(self, visitor):
+ raise Unsupported("out to port: %s" %self.arg)
+
+class _cli(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ raise Unsupported("cli")
+
+class _sti(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ raise Unsupported("sli")
+
+class _int(baseop):
+ def __init__(self, arg):
+ self.arg = arg
+ def visit(self, visitor):
+ raise Unsupported("interrupt: %s" %self.arg)
+
+class _iret(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ raise Unsupported("interrupt return")
+
+class _cbw(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ visitor._cbw()
+
+class _nop(baseop):
+ def __init__(self, arg):
+ pass
+ def visit(self, visitor):
+ pass
+
+class label(baseop):
+ def __init__(self, name):
+ self.name = name
+ def visit(self, visitor):
+ visitor._label(self.name)
+
diff --git a/devtools/tasmrecover/tasm/op.pyc b/devtools/tasmrecover/tasm/op.pyc
new file mode 100644
index 0000000000..2e212ae64c
--- /dev/null
+++ b/devtools/tasmrecover/tasm/op.pyc
Binary files differ
diff --git a/devtools/tasmrecover/tasm/parser.py b/devtools/tasmrecover/tasm/parser.py
new file mode 100644
index 0000000000..8dbf54c5a3
--- /dev/null
+++ b/devtools/tasmrecover/tasm/parser.py
@@ -0,0 +1,248 @@
+import os, re
+from proc import proc
+import lex
+import op
+
+class parser:
+ def __init__(self):
+ self.strip_path = 0
+ self.__globals = {}
+ self.__offsets = {}
+ self.__stack = []
+ self.proc = None
+ self.proc_list = []
+ self.binary_data = []
+
+ self.symbols = []
+ self.link_later = []
+
+ def visible(self):
+ for i in self.__stack:
+ if not i or i == 0:
+ return False
+ return True
+
+ def push_if(self, text):
+ value = self.eval(text)
+ #print "if %s -> %s" %(text, value)
+ self.__stack.append(value)
+
+ def push_else(self):
+ #print "else"
+ self.__stack[-1] = not self.__stack[-1]
+
+ def pop_if(self):
+ #print "endif"
+ return self.__stack.pop()
+
+ def set_global(self, name, value):
+ if len(name) == 0:
+ raise Exception("empty name is not allowed")
+ name = name.lower()
+ #print "adding global %s -> %s" %(name, value)
+ if self.__globals.has_key(name):
+ raise Exception("global %s was already defined", name)
+ self.__globals[name] = value
+
+ def get_global(self, name):
+ name = name.lower()
+ return self.__globals[name]
+
+ def has_global(self, name):
+ name = name.lower()
+ return self.__globals.has_key(name)
+
+ def set_offset(self, name, value):
+ if len(name) == 0:
+ raise Exception("empty name is not allowed")
+ name = name.lower()
+ #print "adding global %s -> %s" %(name, value)
+ if self.__offsets.has_key(name):
+ raise Exception("global %s was already defined", name)
+ self.__offsets[name] = value
+
+ def get_offset(self, name):
+ name = name.lower()
+ return self.__offsets[name]
+
+ def include(self, basedir, fname):
+ path = fname.split('\\')[self.strip_path:]
+ path = os.path.join(basedir, os.path.pathsep.join(path))
+ #print "including %s" %(path)
+
+ self.parse(path)
+
+ def eval(self, stmt):
+ try:
+ return self.parse_int(stmt)
+ except:
+ pass
+ value = self.__globals[stmt.lower()].value
+ return int(value)
+
+ def expr_callback(self, match):
+ name = match.group(1).lower()
+ g = self.get_global(name)
+ if isinstance(g, op.const):
+ return g.value
+ else:
+ return "0x%04x" %g.offset
+
+ def eval_expr(self, expr):
+ n = 1
+ while n > 0:
+ expr, n = re.subn(r'\b([a-zA-Z_]+[a-zA-Z0-9_]*)', self.expr_callback, expr)
+ return eval(expr)
+
+ def expand_globals(self, text):
+ return text
+
+ def fix_dollar(self, v):
+ return re.sub(r'\$', "%d" %len(self.binary_data), v)
+
+ def parse_int(self, v):
+ if re.match(r'[01]+b$', v):
+ v = int(v[:-1], 2)
+ if re.match(r'[\+-]?[0-9a-f]+h$', v):
+ v = int(v[:-1], 16)
+ return int(v)
+
+ def compact_data(self, width, data):
+ #print "COMPACTING %d %s" %(width, data)
+ r = []
+ base = 0x100 if width == 1 else 0x10000
+ for v in data:
+ if v[0] == '"':
+ if v[-1] != '"':
+ raise Exception("invalid string %s" %v)
+ if width == 2:
+ raise Exception("string with data width more than 1") #we could allow it :)
+ for i in xrange(1, len(v) - 1):
+ r.append(ord(v[i]))
+ continue
+
+ m = re.match(r'(\w+)\s+dup\s+\((\s*\S+\s*)\)', v)
+ if m is not None:
+ #we should parse that
+ n = self.parse_int(m.group(1))
+ if m.group(2) != '?':
+ value = self.parse_int(m.group(2))
+ else:
+ value = 0
+ for i in xrange(0, n):
+ v = value
+ for b in xrange(0, width):
+ r.append(v & 0xff);
+ v >>= 8
+ continue
+
+ try:
+ v = self.parse_int(v)
+ if v < 0:
+ v += base
+ except:
+ #global name
+ print "global/expr: %s" %v
+ try:
+ g = self.get_global(v)
+ v = g.offset
+ except:
+ print "unknown address %s" %(v)
+ self.link_later.append((len(self.binary_data) + len(r), v))
+ v = 0
+
+ for b in xrange(0, width):
+ r.append(v & 0xff);
+ v >>= 8
+ #print r
+ return r
+
+ def parse(self, fname):
+# print "opening file %s..." %(fname, basedir)
+ fd = open(fname, 'rb')
+ for line in fd:
+ line = line.strip()
+ if len(line) == 0 or line[0] == ';' or line[0] == chr(0x1a):
+ continue
+
+ #print cmd
+ m = re.match('(\w+)\s*?:', line)
+ if m is not None:
+ line = line[len(m.group(0)):]
+ if self.visible():
+ name = m.group(1)
+ if self.proc is not None:
+ self.proc.add_label(name)
+ self.set_offset(name, (len(self.binary_data), self.proc, len(self.proc.stmts) if self.proc is not None else 0))
+
+ cmd = line.split()
+ if len(cmd) == 0:
+ continue
+
+ cmd0 = str(cmd[0])
+ if cmd0 == 'if':
+ self.push_if(cmd[1])
+ continue
+ elif cmd0 == 'else':
+ self.push_else()
+ continue
+ elif cmd0 == 'endif':
+ self.pop_if()
+ continue
+
+ if not self.visible():
+ continue
+
+ if cmd0 == 'db' or cmd0 == 'dw' or cmd0 == 'dd':
+ arg = " ".join(cmd[1:])
+ binary_width = {'b': 1, 'w': 2, 'd': 4}[cmd0[1]]
+ self.binary_data += self.compact_data(binary_width, lex.parse_args(arg))
+ continue
+ elif cmd0 == 'include':
+ self.include(os.path.dirname(fname), cmd[1])
+ continue
+ elif cmd0 == 'endp':
+ self.proc = None
+ continue
+ elif cmd0 == 'assume':
+ print "skipping: %s" %line
+ continue
+ elif cmd0 == 'rep':
+ self.proc.add(cmd0)
+ self.proc.add(" ".join(cmd[1:]))
+ continue
+
+ if len(cmd) >= 3:
+ cmd1 = cmd[1]
+ if cmd1 == 'equ':
+ v = cmd[2]
+ self.set_global(cmd0, op.const(self.fix_dollar(v)))
+ elif cmd1 == 'db' or cmd1 == 'dw' or cmd1 == 'dd':
+ binary_width = {'b': 1, 'w': 2, 'd': 4}[cmd1[1]]
+ self.binary_data += self.compact_data(binary_width, lex.parse_args(" ".join(cmd[2:])))
+ self.set_global(cmd0.lower(), op.var(binary_width, len(self.binary_data)))
+ continue
+ elif cmd1 == 'proc':
+ name = cmd0.lower()
+ self.proc = proc(name)
+ print "procedure %s, #%d" %(name, len(self.proc_list))
+ self.proc_list.append(name)
+ self.set_global(name, self.proc)
+ continue
+ if (self.proc):
+ self.proc.add(line)
+ else:
+ #print line
+ pass
+
+ fd.close()
+ return self
+
+ def link(self):
+ for addr, expr in self.link_later:
+ v = self.eval_expr(expr)
+ print "link: patching %04x -> %04x" %(addr, v)
+ while v != 0:
+ self.binary_data[addr] = v & 0xff
+ addr += 1
+ v >>= 8
diff --git a/devtools/tasmrecover/tasm/parser.pyc b/devtools/tasmrecover/tasm/parser.pyc
new file mode 100644
index 0000000000..d7049c84b4
--- /dev/null
+++ b/devtools/tasmrecover/tasm/parser.pyc
Binary files differ
diff --git a/devtools/tasmrecover/tasm/proc.py b/devtools/tasmrecover/tasm/proc.py
new file mode 100644
index 0000000000..4337d4c936
--- /dev/null
+++ b/devtools/tasmrecover/tasm/proc.py
@@ -0,0 +1,84 @@
+import re
+import op
+
+class proc:
+ last_addr = 0xc000
+
+ def __init__(self, name):
+ self.name = name
+ self.calls = []
+ self.stmts = []
+ self.labels = set()
+ self.__label_re = re.compile(r'^(\S+):(.*)$')
+ self.offset = proc.last_addr
+ proc.last_addr += 4
+
+ def add_label(self, label):
+ self.stmts.append(op.label(label))
+ self.labels.add(label)
+
+ def optimize(self):
+ print "optimizing..."
+ #trivial simplifications, removing last ret
+ while len(self.stmts) and isinstance(self.stmts[-1], op.label):
+ print "stripping last label"
+ self.stmts.pop()
+ #if isinstance(self.stmts[-1], op._ret) and (len(self.stmts) < 2 or not isinstance(self.stmts[-2], op.label)):
+ # print "stripping last ret"
+ # self.stmts.pop()
+ #merging push ax pop bx constructs
+ i = 0
+ while i + 1 < len(self.stmts):
+ a, b = self.stmts[i], self.stmts[i + 1]
+ if isinstance(a, op._push) and isinstance(b, op._pop):
+ ar, br = a.regs, b.regs
+ movs = []
+ while len(ar) and len(br):
+ src = ar.pop()
+ dst = br.pop(0)
+ movs.append(op._mov2(dst, src))
+ if len(br) == 0:
+ self.stmts.pop(i + 1)
+ print "merging %d push-pops into movs" %(len(movs))
+ for m in movs:
+ print "\t%s <- %s" %(m.dst, m.src)
+ self.stmts[i + 1:i + 1] = movs
+ if len(ar) == 0:
+ self.stmts.pop(i)
+ else:
+ i += 1
+ #fixme: add local?
+
+ def add(self, stmt):
+ #print stmt
+ comment = stmt.rfind(';')
+ if comment >= 0:
+ stmt = stmt[:comment]
+ stmt = stmt.strip()
+
+ r = self.__label_re.search(stmt)
+ if r is not None:
+ #label
+ self.add_label(r.group(1).lower())
+ #print "remains: %s" %r.group(2)
+ stmt = r.group(2).strip()
+
+ if len(stmt) == 0:
+ return
+
+ s = stmt.split(None)
+ cmd = s[0]
+ cl = getattr(op, '_' + cmd)
+ arg = " ".join(s[1:]) if len(s) > 1 else str()
+ o = cl(arg)
+ self.stmts.append(o)
+
+ def __str__(self):
+ r = []
+ for i in self.stmts:
+ r.append(i.__str__())
+ return "\n".join(r)
+
+ def visit(self, visitor, skip = 0):
+ for i in xrange(skip, len(self.stmts)):
+ self.stmts[i].visit(visitor)
diff --git a/devtools/tasmrecover/tasm/proc.pyc b/devtools/tasmrecover/tasm/proc.pyc
new file mode 100644
index 0000000000..cd8f967509
--- /dev/null
+++ b/devtools/tasmrecover/tasm/proc.pyc
Binary files differ