-
Notifications
You must be signed in to change notification settings - Fork 4
/
usnjrnl_rewind.py
369 lines (320 loc) · 14.7 KB
/
usnjrnl_rewind.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
"""
(c) 2024 Yogesh Khatri (@Swiftforensics), CyberCX
Script to process USNJRNL data output from MFTEcmd
and add the correct full path information by
rewinding the journal entries one by one.
By doing so, this provides the correct paths for
files and folders as they existed when the journal
event occurred.
Author : Yogesh Khatri
License : MIT
"""
import argparse
import csv
import os
import random
import sqlite3
import time
version = 0.6
from csv_to_sqlite import import_csv, sanitize_remove_nulls
from enum import IntFlag
from string import ascii_uppercase
class Reason(IntFlag):
DataOverwrite = 0x00000001
DataExtend = 0x00000002
DataTruncation = 0x00000004
UNK_0x8 = 0x00000008
NamedDataOverwrite = 0x00000010
NamedDataExtend = 0x00000020
NamedDataTruncation = 0x00000040
UNK_0x80 = 0x00000080
FileCreate = 0x00000100
FileDelete = 0x00000200
EaChange = 0x00000400
SecurityChange = 0x00000800
RenameOldName = 0x00001000
RenameNewName = 0x00002000
IndexableChange = 0x00004000
BasicInfoChange = 0x00008000
HardLinkChange = 0x00010000
CompressionChange = 0x00020000
EncryptionChange = 0x00040000
ObjectIdChange = 0x00080000
ReparsePointChange = 0x00100000
StreamChange = 0x00200000
TransactedChange = 0x00400000
IntegrityChange = 0x00800000
UNK_0x1000000 = 0x01000000
UNK_0x2000000 = 0x02000000
UNK_0x4000000 = 0x04000000
UNK_0x8000000 = 0x08000000
UNK_0x10000000 = 0x10000000
UNK_0x20000000 = 0x20000000
UNK_0x40000000 = 0x40000000
Close = 0x80000000
def __repr__(self):
return '|'.join(val.name for val in Reason if self.value & val)
def get_time_taken_string(start_time, end_time):
time_taken = end_time - start_time
try:
run_time_HMS = time.strftime('%H:%M:%S', time.gmtime(time_taken))
except (OSError, ValueError) as ex:
print('[!] Failed to calc time string',str(ex))
run_time_HMS = f'-failed-to-calc , time_taken={time_taken}'
return run_time_HMS
def add_to_sqlite(path, sqlite_db_path, table_name, perform_cleaning=True):
temp_folder = os.path.split(os.path.realpath(path))[0]
cleaned_csv_path = sanitize_remove_nulls(path, temp_folder) if perform_cleaning else path
ret = import_csv(cleaned_csv_path, sqlite_db_path, table_name, guess_column_types=True)
if cleaned_csv_path != path:
try:
os.remove(cleaned_csv_path)
except OSError as ex:
print(f'[!] Failed to remove temp file : {cleaned_csv_path} Error was:', str(ex))
return ret
def create_sqlitedb(output_path, mft_csv_path, usnjrnl_csv_path):
'''Create db and return path if successful, else return empty string'''
start_time = time.time()
sqlite_path = os.path.join(output_path, 'NTFS.sqlite')
if os.path.exists(sqlite_path):
rand = ''.join(random.choice(ascii_uppercase) for i in range(4))
sqlite_path = os.path.join(output_path, f'NTFS_{rand}.sqlite')
print(f'[.] Creating an SQLite database here: {sqlite_path}')
print('[.] Adding MFT data to database..')
if not add_to_sqlite(mft_csv_path, sqlite_path, 'MFT'):
print('[!] Failed to add to sqlite.')
return ''
print('[.] Adding USNJRNL:$J data to database..')
if not add_to_sqlite(usnjrnl_csv_path, sqlite_path, 'USNJRNL'):
print('[!] Failed to add to sqlite.')
return ''
end_time = time.time()
print(f'[.] Database creation time: {get_time_taken_string(start_time, end_time)}')
return sqlite_path
def rewind(output_path, mft_csv_path, usnjrnl_csv_path):
start_time = time.time()
sqlite_path = create_sqlitedb(output_path, mft_csv_path, usnjrnl_csv_path)
if not sqlite_path:
return
out_csv_path = os.path.join(output_path, 'USNJRNL.fullPaths.csv')
print('[.] ..Rewinding journal and computing the full paths now..')
create_journal_rewind_csv(sqlite_path, out_csv_path, 'MFT', 'USNJRNL')
print(f'[.] Created the USNJRNL full path csv here: {out_csv_path}')
print(f'[.] Adding full path data to database..')
if not add_to_sqlite(out_csv_path, sqlite_path, 'USNJRNL_FullPaths', perform_cleaning=False):
print('[!] Failed to add csv to sqlite.')
end_time = time.time()
print(f'[.] Finished in total time: {get_time_taken_string(start_time, end_time)}')
def get_full_path(entry, lookup_dict, path):
parent_path = "<UNKNOWN>"
if entry in lookup_dict:
file_name, parent_entry, parent_name = lookup_dict[entry]
if parent_entry == "5-5":
parent_path = "."
else:
parent_path = get_full_path(parent_entry, lookup_dict, parent_name)
if path:
# If parent is number, python may treat as int, not str, hence explicit
# conversion to string below
return str(parent_path) + '\\' + str(path)
return parent_path
def clean_reasons_string(reasons):
'''
This functions checks if 'reasons' is an integer, if so
it will convert it to the flags equivalent string.
This happens when MFtEcmd encounters unknown flags and
returns the reasons integer value as is.
'''
try:
i = int(reasons)
reasons = repr(Reason(i))
except ValueError:
# Reasons is not an int
pass
return reasons
def create_journal_rewind_csv(sqlite_db_path, out_csv_path, mft_table_name, usn_table_name):
try:
db = sqlite3.connect(sqlite_db_path)
db.row_factory = sqlite3.Row
except:
print(f"[!] Failed to open db at {sqlite_db_path}")
return False
# The below query will convert all -ve ParentSequenceNumbers to +ve
# This occurs due to a bug in older MFTEcmd (fixed on 9 Mar 2024).
query_bugfix = '''
UPDATE {MFT_TABLE} SET ParentSequenceNumber=ParentSequenceNumber&65535
where ParentSequenceNumber < 0
'''
try:
mft_query = query_bugfix.format(MFT_TABLE=mft_table_name)
results = db.execute(mft_query)
if results.rowcount:
print(f'[.] Buggy values fixed for {results.rowcount} rows')
db.commit()
except sqlite3.Error as ex:
print(f"[!] Failed query. Exception was " + str(ex))
print(f"[!] Query was {mft_query}")
db.close()
return False
query = '''
SELECT m1.EntryNumber || '-' || m1.SequenceNumber as Entry, m1.FileName,
m1.ParentEntryNumber || '-' || m1.ParentSequenceNumber as ParentEntry,
ifnull(m2.FileName, '') as ParentName
FROM {MFT_TABLE} m1 LEFT JOIN {MFT_TABLE} m2
ON m1.ParentEntryNumber = m2.EntryNumber AND m1.ParentSequenceNumber = m2.SequenceNumber
WHERE m1.InUse = "True"
UNION ALL
SELECT m1.EntryNumber || '-' || (m1.SequenceNumber - 1) as Entry, m1.FileName,
m1.ParentEntryNumber || '-' || m1.ParentSequenceNumber as ParentEntry,
ifnull(m2.FileName, '') as ParentName
FROM {MFT_TABLE} m1 LEFT JOIN {MFT_TABLE} m2
ON m1.ParentEntryNumber = m2.EntryNumber AND m1.ParentSequenceNumber = m2.SequenceNumber
WHERE m1.InUse = "False" and m2.Inuse = "True"
UNION ALL
SELECT m1.EntryNumber || '-' || (m1.SequenceNumber - 1) as Entry, m1.FileName,
m1.ParentEntryNumber || '-' || m1.ParentSequenceNumber as ParentEntry,
ifnull(m2.FileName, '') as ParentName
FROM {MFT_TABLE} m1 LEFT JOIN {MFT_TABLE} m2
ON m1.ParentEntryNumber = m2.EntryNumber AND m1.ParentSequenceNumber = (m2.SequenceNumber - 1)
WHERE m1.InUse = "False" and m2.Inuse = "False"
'''
# The above query produces multiple output for entry when ADS are encountered:
# EG: 10-10 $UpCase 5-5 .
# 10-10 $UpCase:$Info 5-5 .
try:
mft_query = query.format(MFT_TABLE=mft_table_name)
results = db.execute(mft_query)
except sqlite3.Error as ex:
print(f"[!] Failed query. Exception was " + str(ex))
print(f"[!] Query was {mft_query}")
db.close()
return False
parent_lookup = {} # { Entry : (EntryName, ParentEntry, ParentName), .. }
for result in results:
parent_lookup[result['Entry']] = (result['FileName'], result['ParentEntry'], result['ParentName'])
query = '''
SELECT EntryNumber, ParentEntryNumber, Name, ParentPath,
UpdateReasons, UpdateTimestamp, ParentSequenceNumber, SequenceNumber,
FileAttributes, Extension, UpdateSequenceNumber, OffsetToData, SourceFile,
EntryNumber||'-'||SequenceNumber as Entry,
ParentEntryNumber||'-'||ParentSequenceNumber as ParentEntry
FROM {USNJRNL_TABLE}
ORDER BY UpdateTimestamp DESC, UpdateSequenceNumber DESC
'''
field_names = ('Name', 'Extension', 'EntryNumber', 'SequenceNumber', 'ParentEntryNumber',
'ParentSequenceNumber', 'ParentPath', 'UpdateSequenceNumber', 'UpdateTimestamp',
'UpdateReasons', 'FileAttributes', 'OffsetToData', 'SourceFile')
with open(out_csv_path, 'w', encoding='utf8', newline='', buffering=50000) as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=field_names)
writer.writeheader()
items_to_write = []
try:
db = sqlite3.connect(sqlite_db_path)
db.row_factory = sqlite3.Row
except:
print(f"[!] Failed to open db at {sqlite_db_path}")
return False
try:
usn_query = query.format(USNJRNL_TABLE=usn_table_name, MFT_TABLE=mft_table_name)
results = db.execute(usn_query)
except sqlite3.Error as ex:
print(f"[!] Failed query. Exception was " + str(ex))
print(f"[!] Query was {usn_query}")
db.close()
return False
last_item = {}
for result in results:
name = result['Name']
entry_num = int(result['EntryNumber'])
parent_entry_num = int(result['ParentEntryNumber'])
ts = result['UpdateTimestamp']
reasons = result['UpdateReasons']
attributes = result['FileAttributes']
extension = result['Extension']
seq_num = result['SequenceNumber']
parent_seq_num = result['ParentSequenceNumber']
update_seq_number = result['UpdateSequenceNumber']
off_to_data = result['OffsetToData']
source_file = result['SourceFile']
entry = result['Entry']
parent_entry = result['ParentEntry']
path_changed = False
reasons = clean_reasons_string(reasons)
if "RenameOldName" in reasons:
# Replace entry in lookup dict, need parent name for this
parent_name = parent_lookup.get(parent_entry, ('','',''))[0]
# Replace with new parent entry & parent name
parent_lookup[entry] = name, parent_entry, parent_name
path_changed = True
elif "FileDelete" in reasons:
# Check if it currently exits. If not, add to parent_lookup
if entry not in parent_lookup:
# try to lookup parent name
p_name = parent_lookup.get(parent_entry, ('','',''))[0]
parent_lookup[entry] = (name, parent_entry, p_name)
if parent_entry == "5-5":
path_prefix = '.' # nothing to do
# see if prev computed full path
elif last_item and path_changed == False and \
last_item['timestamp'] == ts and \
last_item['entry'] == entry and \
last_item['parent_entry'] == parent_entry:
path_prefix = last_item['path_prefix']
elif parent_entry in parent_lookup:
parent_file_name, _, _ = parent_lookup[parent_entry]
path_prefix = get_full_path(parent_entry, parent_lookup, parent_file_name)
else:
# unknown
path_prefix = "<UNKNOWN>"
print(f'[!] Error: Encountered an UNKNOWN path, report this to the developer! Item update_seq_number={update_seq_number}')
#fullpath = path_prefix + '\\' + name
item = {'Name': name,
'Extension': extension,
'EntryNumber': entry_num,
'SequenceNumber': seq_num,
'ParentEntryNumber': parent_entry_num,
'ParentSequenceNumber': parent_seq_num,
'ParentPath': path_prefix,
'UpdateSequenceNumber': update_seq_number,
'UpdateTimestamp': ts,
'UpdateReasons': reasons,
'FileAttributes': attributes,
'OffsetToData': off_to_data,
'SourceFile': source_file
}
items_to_write.append(item)
last_item['entry'] = entry
last_item['parent_entry'] = parent_entry
last_item['timestamp'] = ts
last_item['path_prefix'] = path_prefix
if len(items_to_write) >= 50000:
writer.writerows(items_to_write)
items_to_write = []
if items_to_write:
writer.writerows(items_to_write)
db.close()
return True
def main():
usage = '''(c) 2024 Yogesh Khatri, CyberCX. \n\n
This tool needs the output of Mftecmd for both USN and MFT
(no need to process both together when processing the USN in mftecmd)\n '''
parser = argparse.ArgumentParser(description=f'USN full path builder v{version}', epilog=usage,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-m', '--mft_processed_csv_file', help='processed $MFT csv from MFTECMD (required)', required=True)
parser.add_argument('-u', '--usnjrnl_processed_csv_file', help='processed $Usnjrnl:$J csv from MFTECMD (required)', required=True)
parser.add_argument('output_path', help='Output folder path (will create if non-existent)')
args = parser.parse_args()
output_path = args.output_path
usnjrnl_csv_path = args.usnjrnl_processed_csv_file
mft_csv_path = args.mft_processed_csv_file
if not os.path.exists(mft_csv_path):
print('[!] Error: Need to specify processed $MFT\'s csv path to proceed')
return
if not os.path.exists(usnjrnl_csv_path):
print('[!] Error: Need to specify processed $Usnjrnl:$J\'s csv path to proceed')
return
if not os.path.exists(output_path):
os.makedirs(output_path)
rewind(output_path, mft_csv_path, usnjrnl_csv_path)
if __name__ == "__main__":
main()