-
Notifications
You must be signed in to change notification settings - Fork 20
/
simple_jack.py
212 lines (150 loc) · 6.02 KB
/
simple_jack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
"""
Simple Jack symbol porting tool by goose ([email protected]).
This tool exists because for some reason diaphora does not assign
enough priority to "perfect" matches. The main idea is to get
as many symbols right so later on you can run diaphora and iterate.
The drill is simple, hash the bytes of all the functions in a db
and save them to a file. We will call this the 'primary' database.
Do the same for another DB (we call this the 'secondary' database)
and compare both. We only import identical matches and the only
info we import from the 'primary' is the function name.
This script has two modes:
SCRIPT_MODE_DUMP:
Used to create the primary and secondary database.
You should run the script twice, first in the primary
binary, which has your symbolicated binary, and second
in the secondary binary, that is the one that will
receive the information imported from the primary.
SCRIPT_MODE_DIFF:
Used once you've generated both the primary and secondary
databases. It will read them both and perform the diffing.
Once it finds matches, it will import the function name
from the primary into the secondary.
This mode is destructive, that is, it will change your
IDB. Only run it if you are positive that you like the
results.
"""
import pickle
import hashlib
import idaapi
from idc import *
from idaapi import *
from idautils import *
# Available modes, pick one.
SCRIPT_MODE_DUMP = 0
SCRIPT_MODE_DIFF = 1
# IMPORTANT: Manually set this to the mode you need, because fuck idapython.
CURRENT_SCRIPT_MODE = SCRIPT_MODE_DUMP
# Set this to true if you want to see some debugging output.
GLOBAL_DEBUG = False
def hash_bytes(bytes):
return hashlib.md5(bytes).hexdigest()
def log(msg):
Message("[%s] %s\n" % (time.asctime(), msg))
def load_db(db_name):
db = None
log("Loading DB from %s" % db_name)
with open(db_name, 'rb') as input:
db = pickle.load(input)
return db
def save_db(db, db_name):
log("Saving DB to %s" % db_name)
with open(db_name, 'wb') as output:
pickle.dump(db, output, pickle.HIGHEST_PROTOCOL)
def build_db():
collision_keys = set()
func_list = []
segments = list(Segments())
for seg_ea in segments:
func_list.extend(list(Functions(seg_ea, SegEnd(seg_ea))))
total_funcs = len(func_list)
log("Total number of functions to export: %u" % total_funcs)
functions_db = {}
for f in func_list:
# Get the function for this address.
func = get_func(f)
if not func:
log("Cannot get a function object for 0x%x" % f)
continue
# Get the number of instructions.
n_ins = 0
flow = FlowChart(func)
for block in flow:
n_ins += len(list(Heads(block.startEA, block.endEA)))
# Get the name of the function without demangling.
name = GetFunctionName(f)
# Calculate the size of the function.
size = func.endEA - func.startEA
# Do some sanity checks.
assert (size == func.size()), "Invalid size."
assert (func.startEA < func.endEA), "Invalid startEA / endEA values."
# Get the hash of the function.
ins_hash = hash_bytes(idc.GetManyBytes(func.startEA, size))
# Check if we collide with another entry.
if functions_db.has_key(ins_hash):
log("Function @ 0x%.8x collides with function @ 0x%.8x" %
(func.startEA, functions_db[ins_hash][2]))
# Keep track of the collision.
collision_keys.add(ins_hash)
continue
# Create an entry in the DB.
functions_db[ins_hash] = (name, n_ins, func.startEA)
if GLOBAL_DEBUG:
log("Function name:%s start:0x%.8x end:0x%.8x size:%u n_ins:%u hash:%s" %
(name, func.startEA, func.endEA, size, n_ins, ins_hash))
# Delete the collision otherwise we may match functions incorrectly.
for collision_key in collision_keys:
del functions_db[collision_key]
return functions_db
def do_diff():
primary_db_path = AskFile(0, "primary.db", "Select the primary db file.")
if primary_db_path is None:
log("No file selected, exiting")
return False
secondary_db_path = AskFile(
0, "secondary.db", "Select the secondary db file.")
if secondary_db_path is None:
log("No file selected, exiting")
return False
# Load the databases
primary_db = load_db(primary_db_path)
secondary_db = load_db(secondary_db_path)
log("Diffing ...")
# Proceed with the diffing.
matches = 0
for primary_hash, primary_val in primary_db.iteritems():
# Check if 'primary_hash' from the primary is present in the secondary.
if not secondary_db.has_key(primary_hash):
continue
# Hashes match.
secondary_val = secondary_db[primary_hash]
# Only match functions with a different name.
if primary_val[0] == secondary_val[0]:
continue
function_ea = secondary_val[2]
function_name_old = secondary_val[0]
function_name_new = primary_val[0]
# if GLOBAL_DEBUG:
log("Function @ 0x%.8x -> From '%s' to '%s'" %
(function_ea, function_name_old, function_name_new))
# Set the secondary function name.
if not MakeNameEx(function_ea, function_name_new, SN_NOWARN | SN_NOCHECK):
log("Error setting function name to '%s'" % (function_name_new))
matches += 1
log("Number of matches: %u" % matches)
def do_save():
db_path = AskFile(1, "*.db", "Select the file to store the db.")
if db_path is None:
log("No file selected, exiting")
return False
# Build the db for the current IDB.
db = build_db()
log("Number of entries in the DB: %u" % len(db))
# Write the DB to disk.
save_db(db, db_path)
if CURRENT_SCRIPT_MODE == SCRIPT_MODE_DUMP:
do_save()
elif CURRENT_SCRIPT_MODE == SCRIPT_MODE_DIFF:
do_diff()
else:
log("Invalid script mode")