@@ -62,7 +62,7 @@ def sanitize_domain_type(domain_type):
62
62
return 'regular'
63
63
64
64
######## DOMAINS ########
65
- def get_all_domains_up (domain_type ):
65
+ def get_all_domains_up (domain_type , r_list = True ):
66
66
'''
67
67
Get all domain up (at least one time)
68
68
@@ -72,7 +72,13 @@ def get_all_domains_up(domain_type):
72
72
:return: list of domain
73
73
:rtype: list
74
74
'''
75
- return list (r_serv_onion .smembers ("full_{}_up" .format (domain_type )))
75
+ domains = r_serv_onion .smembers ("full_{}_up" .format (domain_type ))
76
+ if r_list :
77
+ if domains :
78
+ list (domains )
79
+ else :
80
+ domains = []
81
+ return domains
76
82
77
83
def get_domains_up_by_month (date_year_month , domain_type , rlist = False ):
78
84
'''
@@ -128,6 +134,64 @@ def get_domains_up_by_daterange(date_from, date_to, domain_type):
128
134
domains_up = []
129
135
return domains_up
130
136
137
+ def paginate_iterator (iter_elems , nb_obj = 50 , page = 1 ):
138
+ dict_page = {}
139
+ dict_page ['nb_all_elem' ] = len (iter_elems )
140
+ nb_pages = dict_page ['nb_all_elem' ] / nb_obj
141
+ if not nb_pages .is_integer ():
142
+ nb_pages = int (nb_pages )+ 1
143
+ else :
144
+ nb_pages = int (nb_pages )
145
+ if page > nb_pages :
146
+ page = nb_pages
147
+
148
+ # multiple pages
149
+ if nb_pages > 1 :
150
+ dict_page ['list_elem' ] = []
151
+ start = nb_obj * (page - 1 )
152
+ stop = (nb_obj * page ) - 1
153
+ current_index = 0
154
+ for elem in iter_elems :
155
+ if current_index > stop :
156
+ break
157
+ if start <= current_index and stop >= current_index :
158
+ dict_page ['list_elem' ].append (elem )
159
+ current_index += 1
160
+ stop += 1
161
+ if stop > dict_page ['nb_all_elem' ]:
162
+ stop = dict_page ['nb_all_elem' ]
163
+
164
+ else :
165
+ start = 0
166
+ stop = dict_page ['nb_all_elem' ]
167
+ dict_page ['list_elem' ] = list (iter_elems )
168
+ dict_page ['page' ] = page
169
+ dict_page ['nb_pages' ] = nb_pages
170
+ # UI
171
+ dict_page ['nb_first_elem' ] = start + 1
172
+ dict_page ['nb_last_elem' ] = stop
173
+ return dict_page
174
+
175
+ def domains_up_by_page (domain_type , nb_obj = 28 , page = 1 ):
176
+ '''
177
+ Get a list of domains up (alpha sorted)
178
+
179
+ :param domain_type: domain type
180
+ :type domain_type: str
181
+
182
+ :return: list of domain
183
+ :rtype: list
184
+ '''
185
+ domains = sorted (get_all_domains_up (domain_type , r_list = False ))
186
+ domains = paginate_iterator (domains , nb_obj = nb_obj , page = page )
187
+
188
+ # # TODO: get tags + root_screenshot + metadata
189
+ l_domains = []
190
+ for domain in domains ['list_elem' ]:
191
+ l_domains .append (get_domain_metadata (domain , domain_type , first_seen = True , last_ckeck = True , status = True , ports = True , tags = True , screenshot = True ))
192
+ domains ['list_elem' ] = l_domains
193
+ return domains
194
+
131
195
######## DOMAIN ########
132
196
133
197
def get_domain_type (domain ):
@@ -367,7 +431,15 @@ def get_domain_tags(domain):
367
431
'''
368
432
return Tag .get_obj_tag (domain )
369
433
370
- def get_domain_metadata (domain , domain_type , first_seen = True , last_ckeck = True , status = True , ports = True , tags = False ):
434
+ def get_domain_random_screenshot (domain ):
435
+ '''
436
+ Retun last screenshot (core item).
437
+
438
+ :param domain: crawled domain
439
+ '''
440
+ return Screenshot .get_randon_domain_screenshot (domain )
441
+
442
+ def get_domain_metadata (domain , domain_type , first_seen = True , last_ckeck = True , status = True , ports = True , tags = False , screenshot = False ):
371
443
'''
372
444
Get Domain basic metadata
373
445
@@ -384,6 +456,7 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s
384
456
:rtype: dict
385
457
'''
386
458
dict_metadata = {}
459
+ dict_metadata ['id' ] = domain
387
460
if first_seen :
388
461
res = get_domain_first_seen (domain , domain_type = domain_type )
389
462
if res is not None :
@@ -398,6 +471,8 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s
398
471
dict_metadata ['ports' ] = get_domain_all_ports (domain , domain_type )
399
472
if tags :
400
473
dict_metadata ['tags' ] = get_domain_tags (domain )
474
+ if screenshot :
475
+ dict_metadata ['screenshot' ] = get_domain_random_screenshot (domain )
401
476
return dict_metadata
402
477
403
478
def get_domain_metadata_basic (domain , domain_type = None ):
0 commit comments