Skip to content

Commit bc23518

Browse files
committed
chg: [module extractor] signal timeout global extraction + reduce regex and yara timeout
1 parent f27acbf commit bc23518

File tree

1 file changed

+45
-31
lines changed

1 file changed

+45
-31
lines changed

bin/lib/module_extractor.py

+45-31
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from lib.ail_users import get_user_org
1818
from lib.objects import ail_objects
1919
from lib.objects.Titles import Title
20+
from lib.exceptions import TimeoutException
2021
from lib import correlations_engine
2122
from lib import regex_helper
2223
from lib.ConfigLoader import ConfigLoader
@@ -38,6 +39,15 @@
3839

3940
r_key = regex_helper.generate_redis_cache_key('extractor')
4041

42+
43+
# SIGNAL ALARM
44+
import signal
45+
def timeout_handler(signum, frame):
46+
raise TimeoutException
47+
48+
49+
signal.signal(signal.SIGALRM, timeout_handler)
50+
4151
# TODO UI Link
4252

4353
CORRELATION_TO_EXTRACT = {
@@ -98,7 +108,7 @@ def get_correl_match(extract_type, obj, content):
98108
sha256_val = sha256(value.encode()).hexdigest()
99109
map_value_id[sha256_val] = value
100110
if to_extract:
101-
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj.get_global_id(), content)
111+
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj.get_global_id(), content, max_time=5)
102112
if extract_type == 'title' and objs:
103113
objs = [objs[0]]
104114
for ob in objs:
@@ -154,13 +164,13 @@ def get_tracker_match(user_org, user_id, obj, content):
154164
# print(tracker_type)
155165
tracked = tracker.get_tracked()
156166
if tracker_type == 'regex': # TODO Improve word detection -> word delimiter
157-
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_gid, content)
167+
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_gid, content, max_time=5)
158168
for match in regex_match:
159169
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
160170
elif tracker_type == 'yara':
161171
rule = tracker.get_rule()
162172
rule.match(data=content.encode(), callback=_get_yara_match,
163-
which_callbacks=yara.CALLBACK_MATCHES, timeout=30)
173+
which_callbacks=yara.CALLBACK_MATCHES, timeout=5)
164174
yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
165175
r_cache.delete(f'extractor:yara:match:{r_key}')
166176
extracted = []
@@ -176,7 +186,7 @@ def get_tracker_match(user_org, user_id, obj, content):
176186
words = [tracked]
177187
for word in words:
178188
regex = _get_word_regex(word)
179-
regex_match = regex_helper.regex_finditer(r_key, regex, obj_gid, content)
189+
regex_match = regex_helper.regex_finditer(r_key, regex, obj_gid, content, max_time=5)
180190
# print(regex_match)
181191
for match in regex_match:
182192
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
@@ -194,7 +204,7 @@ def get_tracker_match(user_org, user_id, obj, content):
194204
retro_hunt.delete_objs()
195205

196206
rule.match(data=content.encode(), callback=_get_yara_match,
197-
which_callbacks=yara.CALLBACK_MATCHES, timeout=30)
207+
which_callbacks=yara.CALLBACK_MATCHES, timeout=5)
198208
yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
199209
r_cache.delete(f'extractor:yara:match:{r_key}')
200210
extracted = []
@@ -234,35 +244,39 @@ def extract(user_id, obj_type, subtype, obj_id, content=None):
234244
r_cache.expire(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', 300)
235245
return json.loads(cached)
236246

237-
if not content:
238-
content = obj.get_content()
239-
240-
extracted = get_tracker_match(user_org, user_id, obj, content)
241-
242-
# print(item.get_tags())
243-
for tag in obj.get_tags():
244-
if MODULES.get(tag):
245-
# print(tag)
246-
module = MODULES.get(tag)
247-
matches = module.extract(obj, content, tag)
247+
signal.alarm(60)
248+
try:
249+
if not content:
250+
content = obj.get_content()
251+
extracted = get_tracker_match(user_org, user_id, obj, content)
252+
# print(item.get_tags())
253+
for tag in obj.get_tags():
254+
if MODULES.get(tag):
255+
# print(tag)
256+
module = MODULES.get(tag)
257+
matches = module.extract(obj, content, tag)
258+
if matches:
259+
extracted = extracted + matches
260+
261+
for obj_t in CORRELATION_TO_EXTRACT[obj.type]:
262+
matches = get_correl_match(obj_t, obj, content)
248263
if matches:
249264
extracted = extracted + matches
250265

251-
for obj_t in CORRELATION_TO_EXTRACT[obj.type]:
252-
matches = get_correl_match(obj_t, obj, content)
253-
if matches:
254-
extracted = extracted + matches
255-
256-
# SORT By Start Pos
257-
if extracted:
258-
extracted = sorted(extracted, key=itemgetter(0))
259-
extracted = merge_overlap(extracted)
260-
261-
# Save In Cache
262-
if extracted:
263-
extracted_dump = json.dumps(extracted)
264-
r_cache.set(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', extracted_dump)
265-
r_cache.expire(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', 300) # TODO Reduce CACHE ???????????????
266+
# SORT By Start Pos
267+
if extracted:
268+
extracted = sorted(extracted, key=itemgetter(0))
269+
extracted = merge_overlap(extracted)
270+
271+
# Save In Cache
272+
if extracted:
273+
extracted_dump = json.dumps(extracted)
274+
r_cache.set(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', extracted_dump)
275+
r_cache.expire(f'extractor:cache:{obj_gid}:{user_org}:{user_id}', 300) # TODO Reduce CACHE ???????????????
276+
except TimeoutException:
277+
extracted = []
278+
else:
279+
signal.alarm(0)
266280

267281
return extracted
268282

0 commit comments

Comments
 (0)