|
44 | 44 | CSV_ROWS_PER_PARSE = 2 ** 10
|
45 | 45 | LOGGER_TIME_DELAY = 5.0
|
46 | 46 | INITIAL_BOUNDING_BOX = [-180, -90, 180, 90]
|
| 47 | +# Max points within an AOI bounding box before rejecting the AOI. |
| 48 | +MAX_ALLOWABLE_QUERY = 30_000_000 |
47 | 49 |
|
48 | 50 | Pyro5.config.SERIALIZER = 'marshal' # lets us pass null bytes in strings
|
49 | 51 |
|
@@ -85,7 +87,8 @@ def __init__(
|
85 | 87 | raw_csv_filename=None,
|
86 | 88 | quadtree_pickle_filename=None,
|
87 | 89 | max_points_per_node=GLOBAL_MAX_POINTS_PER_NODE,
|
88 |
| - max_depth=GLOBAL_DEPTH, dataset_name='flickr'): |
| 90 | + max_depth=GLOBAL_DEPTH, dataset_name='flickr', |
| 91 | + max_allowable_query=MAX_ALLOWABLE_QUERY): |
89 | 92 | """Initialize RecModel object.
|
90 | 93 |
|
91 | 94 | The object can be initialized either with a path to a CSV file
|
@@ -156,6 +159,7 @@ def __init__(
|
156 | 159 | # self.global_cache_dir = global_cache
|
157 | 160 | self.min_year = min_year
|
158 | 161 | self.max_year = max_year
|
| 162 | + self.max_allowable_query = max_allowable_query |
159 | 163 | self.acronym = 'PUD' if dataset_name == 'flickr' else 'TUD'
|
160 | 164 |
|
161 | 165 | def get_valid_year_range(self):
|
@@ -198,6 +202,9 @@ def fetch_workspace_aoi(self, workspace_id): # pylint: disable=no-self-use
|
198 | 202 | with open(out_zip_file_path, 'rb') as out_zipfile:
|
199 | 203 | return out_zipfile.read()
|
200 | 204 |
|
| 205 | + def get_aoi_query_size(self): |
| 206 | + return (50_000_000, self.max_allowable_query) |
| 207 | + |
201 | 208 | # @_try_except_wrapper("exception in calc_user_days_in_aoi")
|
202 | 209 | def calc_user_days_in_aoi(
|
203 | 210 | self, zip_file_binary, date_range, out_vector_filename):
|
@@ -982,25 +989,33 @@ class RecManager(object):
|
982 | 989 | def __init__(self, servers_dict):
|
983 | 990 | self.servers = servers_dict
|
984 | 991 |
|
| 992 | + def get_valid_year_range(self, dataset): |
| 993 | + server = self.servers[dataset] |
| 994 | + return server.get_valid_year_range() |
| 995 | + |
| 996 | + def get_aoi_query_size(self, dataset): |
| 997 | + server = self.servers[dataset] |
| 998 | + return server.get_aoi_query_size() |
| 999 | + |
985 | 1000 | @_try_except_wrapper("calculate_userdays exited while multiprocessing.")
|
986 | 1001 | def calculate_userdays(self, zip_file_binary, start_year, end_year, dataset_list):
|
987 | 1002 | results = {}
|
988 | 1003 | with concurrent.futures.ProcessPoolExecutor(max_workers=2) as executor:
|
989 | 1004 | future_to_label = {}
|
990 | 1005 | for dataset in dataset_list:
|
991 | 1006 | server = self.servers[dataset]
|
992 |
| - # validate available year range |
993 |
| - min_year, max_year = server.get_valid_year_range() |
994 |
| - LOGGER.info( |
995 |
| - f"Server supports year queries between {min_year} and {max_year}") |
996 |
| - if not min_year <= int(start_year) <= max_year: |
997 |
| - raise ValueError( |
998 |
| - f"Start year must be between {min_year} and {max_year}.\n" |
999 |
| - f" User input: ({start_year})") |
1000 |
| - if not min_year <= int(end_year) <= max_year: |
1001 |
| - raise ValueError( |
1002 |
| - f"End year must be between {min_year} and {max_year}.\n" |
1003 |
| - f" User input: ({end_year})") |
| 1007 | + # # validate available year range |
| 1008 | + # min_year, max_year = server.get_valid_year_range() |
| 1009 | + # LOGGER.info( |
| 1010 | + # f"Server supports year queries between {min_year} and {max_year}") |
| 1011 | + # if not min_year <= int(start_year) <= max_year: |
| 1012 | + # raise ValueError( |
| 1013 | + # f"Start year must be between {min_year} and {max_year}.\n" |
| 1014 | + # f" User input: ({start_year})") |
| 1015 | + # if not min_year <= int(end_year) <= max_year: |
| 1016 | + # raise ValueError( |
| 1017 | + # f"End year must be between {min_year} and {max_year}.\n" |
| 1018 | + # f" User input: ({end_year})") |
1004 | 1019 |
|
1005 | 1020 | # append jan 1 to start and dec 31 to end
|
1006 | 1021 | date_range = (str(start_year)+'-01-01',
|
|
0 commit comments