Skip to content

Commit 59b2745

Browse files
authored
Merge pull request #449 from CIRCL/tags_v2
Tags v2 - Tagging system refractoring
2 parents a4dd224 + 5f8b81f commit 59b2745

32 files changed

+1247
-345
lines changed

OVERVIEW.md

+8-3
Original file line numberDiff line numberDiff line change
@@ -198,22 +198,27 @@ Redis and ARDB overview
198198
##### Hset:
199199
| Key | Field | Value |
200200
| ------ | ------ | ------ |
201-
| per_paste_**epoch** | **term** | **nb_seen** |
202-
| | |
203201
| tag_metadata:**tag** | first_seen | **date** |
204202
| tag_metadata:**tag** | last_seen | **date** |
205203

206204
##### Set:
207205
| Key | Value |
208206
| ------ | ------ |
209207
| list_tags | **tag** |
208+
| list_tags:**object_type** | **tag** |
209+
| list_tags:domain | **tag** |
210+
||
210211
| active_taxonomies | **taxonomie** |
211212
| active_galaxies | **galaxie** |
212213
| active_tag_**taxonomie or galaxy** | **tag** |
213214
| synonym_tag_misp-galaxy:**galaxy** | **tag synonym** |
214215
| list_export_tags | **user_tag** |
216+
||
215217
| **tag**:**date** | **paste** |
216-
218+
| **object_type**:**tag** | **object_id** |
219+
||
220+
| DB7 |
221+
| tag:**object_id** | **tag** |
217222

218223
##### old:
219224
| Key | Value |

bin/Tags.py

+4-53
Original file line numberDiff line numberDiff line change
@@ -8,29 +8,11 @@
88
This module create tags.
99
1010
"""
11-
import redis
12-
1311
import time
14-
import datetime
1512

1613
from pubsublogger import publisher
1714
from Helper import Process
18-
from packages import Paste
19-
from packages import Item
20-
21-
22-
def get_item_date(item_filename):
23-
l_directory = item_filename.split('/')
24-
return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2])
25-
26-
def set_tag_metadata(tag, date):
27-
# First time we see this tag ## TODO: filter paste from the paste ?
28-
if not server.hexists('tag_metadata:{}'.format(tag), 'first_seen'):
29-
server.hset('tag_metadata:{}'.format(tag), 'first_seen', date)
30-
# Check and Set tag last_seen
31-
last_seen = server.hget('tag_metadata:{}'.format(tag), 'last_seen')
32-
if last_seen is None or date > last_seen:
33-
server.hset('tag_metadata:{}'.format(tag), 'last_seen', date)
15+
from packages import Tag
3416

3517
if __name__ == '__main__':
3618

@@ -45,18 +27,6 @@ def set_tag_metadata(tag, date):
4527
# Setup the I/O queues
4628
p = Process(config_section)
4729

48-
server = redis.StrictRedis(
49-
host=p.config.get("ARDB_Tags", "host"),
50-
port=p.config.get("ARDB_Tags", "port"),
51-
db=p.config.get("ARDB_Tags", "db"),
52-
decode_responses=True)
53-
54-
server_metadata = redis.StrictRedis(
55-
host=p.config.get("ARDB_Metadata", "host"),
56-
port=p.config.get("ARDB_Metadata", "port"),
57-
db=p.config.get("ARDB_Metadata", "db"),
58-
decode_responses=True)
59-
6030
# Sent to the logging a description of the module
6131
publisher.info("Tags module started")
6232

@@ -71,27 +41,8 @@ def set_tag_metadata(tag, date):
7141
continue
7242

7343
else:
74-
tag, path = message.split(';')
75-
# add the tag to the tags word_list
76-
res = server.sadd('list_tags', tag)
77-
if res == 1:
78-
print("new tags added : {}".format(tag))
79-
# add the path to the tag set
80-
#curr_date = datetime.date.today().strftime("%Y%m%d")
81-
item_date = get_item_date(path)
82-
res = server.sadd('{}:{}'.format(tag, item_date), path)
83-
if res == 1:
84-
print("new paste: {}".format(path))
85-
print(" tagged: {}".format(tag))
86-
set_tag_metadata(tag, item_date)
87-
server_metadata.sadd('tag:{}'.format(path), tag)
88-
89-
# Domain Object
90-
if Item.is_crawled(path) and tag!='infoleak:submission="crawler"':
91-
domain = Item.get_item_domain(path)
92-
server_metadata.sadd('tag:{}'.format(domain), tag)
93-
server.sadd('domain:{}:{}'.format(tag, item_date), domain)
44+
print(message)
45+
tag, item_id = message.split(';')
9446

95-
curr_date = datetime.date.today().strftime("%Y%m%d")
96-
server.hincrby('daily_tags:{}'.format(item_date), tag, 1)
47+
Tag.add_tag("item", tag, item_id)
9748
p.populate_set_out(message, 'MISP_The_Hive_feeder')

bin/lib/Correlate_object.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@
2323
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
2424
config_loader = None
2525

26+
def is_valid_object_type(object_type):
27+
if object_type in ['domain', 'item', 'image']:
28+
return True
29+
else:
30+
return False
31+
32+
def get_all_objects():
33+
return ['domain', 'paste', 'pgp', 'cryptocurrency', 'decoded', 'screenshot']
34+
2635
def get_all_correlation_names():
2736
'''
2837
Return a list of all available correlations
@@ -178,11 +187,21 @@ def get_item_url(correlation_name, value, correlation_type=None):
178187
elif correlation_name == 'domain':
179188
endpoint = 'crawler_splash.showDomain'
180189
url = url_for(endpoint, domain=value)
181-
elif correlation_name == 'paste':
190+
elif correlation_name == 'item':
191+
endpoint = 'showsavedpastes.showsavedpaste'
192+
url = url_for(endpoint, paste=value)
193+
elif correlation_name == 'paste': ### # TODO: remove me
182194
endpoint = 'showsavedpastes.showsavedpaste'
183195
url = url_for(endpoint, paste=value)
184196
return url
185197

198+
def get_obj_tag_table_keys(object_type):
199+
'''
200+
Warning: use only in flask (dynamic templates)
201+
'''
202+
if object_type=="domain":
203+
return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot
204+
186205

187206
def create_graph_links(links_set):
188207
graph_links_list = []
@@ -310,6 +329,7 @@ def get_graph_node_object_correlation(object_type, root_value, mode, correlation
310329

311330

312331
######## API EXPOSED ########
313-
314-
332+
def sanitize_object_type(object_type):
333+
if not is_valid_object_type(object_type):
334+
return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400)
315335
######## ########

bin/lib/Domain.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def get_domain_items_crawled(domain, domain_type, port, epoch=None, items_link=F
292292
if item_screenshot:
293293
dict_item['screenshot'] = Item.get_item_screenshot(item)
294294
if item_tag:
295-
dict_item['tags'] = Tag.get_item_tags_minimal(item)
295+
dict_item['tags'] = Tag.get_obj_tags_minimal(item)
296296
item_crawled['items'].append(dict_item)
297297
return item_crawled
298298

@@ -365,7 +365,7 @@ def get_domain_tags(domain):
365365
366366
:param domain: crawled domain
367367
'''
368-
return Tag.get_item_tags(domain)
368+
return Tag.get_obj_tag(domain)
369369

370370
def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False):
371371
'''

bin/lib/Screenshot.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,16 @@ def get_screenshot_items_list(sha256_string):
4343
else:
4444
return []
4545

46+
def get_item_screenshot(item_id):
47+
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot')
48+
4649
def get_item_screenshot_list(item_id):
4750
'''
4851
Retun all decoded item of a given item id.
4952
5053
:param item_id: item id
5154
'''
52-
screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot')
55+
screenshot = get_item_screenshot(item_id)
5356
if screenshot:
5457
return [screenshot]
5558
else:

bin/packages/Date.py

+3
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ def substract_day(self, numDay):
7979
comp_day = str(computed_date.day).zfill(2)
8080
return comp_year + comp_month + comp_day
8181

82+
def get_today_date_str():
83+
return datetime.date.today().strftime("%Y%m%d")
84+
8285
def date_add_day(date, num_day=1):
8386
new_date = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8])) + datetime.timedelta(num_day)
8487
new_date = str(new_date).replace('-', '')

bin/packages/Item.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def get_item(request_dict):
104104
dict_item['date'] = get_item_date(item_id, add_separator=add_separator)
105105
tags = request_dict.get('tags', True)
106106
if tags:
107-
dict_item['tags'] = Tag.get_item_tags(item_id)
107+
dict_item['tags'] = Tag.get_obj_tag(item_id)
108108

109109
size = request_dict.get('size', False)
110110
if size:
@@ -242,7 +242,7 @@ def get_item_pgp_correlation(item_id):
242242
def get_item_list_desc(list_item_id):
243243
desc_list = []
244244
for item_id in list_item_id:
245-
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
245+
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} )
246246
return desc_list
247247

248248
# # TODO: add an option to check the tag

0 commit comments

Comments
 (0)