-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #24 from e-lie/http_dl_improvment
Fix samples download and prepare for future asset collection download
- Loading branch information
Showing
11 changed files
with
174 additions
and
222 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
import os | ||
import requests | ||
import time | ||
from datetime import datetime | ||
from renardo_gatherer.config_dir import SAMPLES_DIR_PATH | ||
from urllib.parse import urljoin, urlparse | ||
from concurrent.futures import ThreadPoolExecutor, as_completed | ||
|
||
SAMPLES_DOWNLOAD_SERVER = 'https://collections.renardo.org/samples' | ||
# DEFAULT_SAMPLES_PACK_NAME = '0_foxdot_default_testing' | ||
DEFAULT_SAMPLES_PACK_NAME = '0_foxdot_default' | ||
LOOP_SUBDIR = '_loop_' | ||
|
||
|
||
nonalpha = {"&": "ampersand", | ||
"*": "asterix", | ||
"@": "at", | ||
"\\": "backslash", | ||
"|": "bar", | ||
"^": "caret", | ||
":": "colon", | ||
"$": "dollar", | ||
"=": "equals", | ||
"!": "exclamation", | ||
"/": "forwardslash", | ||
"#": "hash", | ||
"-": "hyphen", | ||
"<": "lessthan", | ||
"%": "percent", | ||
"+": "plus", | ||
"?": "question", | ||
";": "semicolon", | ||
"~": "tilde", | ||
",": "comma", | ||
"0": "0", | ||
"1": "1", | ||
"2": "2", | ||
"3": "3", | ||
"4": "4", | ||
"5": "5", | ||
"6": "6", | ||
"7": "7", | ||
"8": "8", | ||
"9": "9"} | ||
|
||
|
||
def ensure_renardo_samples_directory(): | ||
if not SAMPLES_DIR_PATH.exists(): | ||
SAMPLES_DIR_PATH.mkdir(parents=True, exist_ok=True) | ||
|
||
def download_file_in_pool(url, dest_path, retries=5, delay=1, logger=None): | ||
filename = os.path.basename(urlparse(url).path) | ||
for attempt in range(retries): | ||
try: | ||
response = requests.get(url, stream=True) | ||
response.raise_for_status() | ||
with open(dest_path, "wb") as f: | ||
for chunk in response.iter_content(chunk_size=8192): | ||
f.write(chunk) | ||
if logger: | ||
logger.write_line(f"Downloaded {filename} to {dest_path}") | ||
return True | ||
except requests.RequestException as e: | ||
if logger: | ||
logger.write_line(f"Error downloading {url}: {e}") | ||
if attempt < retries - 1: | ||
if logger: | ||
logger.write_line(f"Retrying ({attempt + 1}/{retries})...") | ||
time.sleep(delay) | ||
else: | ||
if logger: | ||
logger.write_line(f"Failed to download {url} after {retries} attempts") | ||
return False | ||
|
||
|
||
def download_files_from_json_index_concurrent(json_url, download_dir, max_workers=3, logger=None): | ||
def download_json_index_from_url(url, logger=logger): | ||
try: | ||
response = requests.get(url) | ||
response.raise_for_status() | ||
return response.json() | ||
except requests.exceptions.RequestException as e: | ||
logger.write_line(f"Error downloading collection JSON index: {e}") | ||
return None | ||
|
||
def process_node(node, base_url="", current_dir=""): | ||
tasks = [] | ||
if "url" in node: | ||
# Full file download URL | ||
file_url = urljoin(base_url, node["url"]) | ||
# Full local path including any subdirectory structure | ||
file_path = os.path.join(download_dir, current_dir, os.path.basename(node["path"])) | ||
tasks.append((file_url, file_path)) | ||
os.makedirs(os.path.dirname(file_path), exist_ok=True) | ||
if "children" in node: | ||
for child in node["children"]: | ||
# For each child, pass its directory structure down the chain | ||
child_dir = os.path.join(current_dir, os.path.basename(node["path"])) | ||
tasks.extend(process_node(child, base_url, child_dir)) | ||
os.makedirs(child_dir, exist_ok=True) | ||
return tasks | ||
|
||
# Ensure the download directory exists | ||
os.makedirs(download_dir, exist_ok=True) | ||
|
||
# Download JSON content from URL | ||
file_tree = download_json_index_from_url(json_url) | ||
|
||
# Generate list of all files to download | ||
download_tasks = process_node(file_tree, json_url) | ||
|
||
# Use ThreadPoolExecutor to download files concurrently | ||
with ThreadPoolExecutor(max_workers=max_workers) as executor: | ||
futures = [ | ||
executor.submit(download_file_in_pool, url, path, 5, 1, logger) | ||
for url, path in download_tasks | ||
] | ||
for future in as_completed(futures): | ||
# Handle each completed download here if needed (e.g., check for success) | ||
result = future.result() | ||
if not result: | ||
if logger: | ||
logger.write_line("A download failed.") | ||
|
||
|
||
def download_default_sample_pack(logger=None): | ||
|
||
logger.write_line(f"Downloading Default Sample Pack {DEFAULT_SAMPLES_PACK_NAME} from {SAMPLES_DOWNLOAD_SERVER}\n") | ||
download_files_from_json_index_concurrent( | ||
json_url=f'{SAMPLES_DOWNLOAD_SERVER}/{DEFAULT_SAMPLES_PACK_NAME}/collection_index.json', | ||
download_dir=SAMPLES_DIR_PATH, | ||
logger=logger | ||
) | ||
|
||
try: | ||
with open(SAMPLES_DIR_PATH / DEFAULT_SAMPLES_PACK_NAME / 'downloaded_at.txt', mode="w") as file: | ||
file.write(str(datetime.now())) | ||
except Exception as e: | ||
print(e) | ||
|
||
|
||
def is_default_spack_initialized(): | ||
return (SAMPLES_DIR_PATH / DEFAULT_SAMPLES_PACK_NAME / 'downloaded_at.txt').exists() | ||
|
||
def sample_path_from_symbol(symbol, spack_path=SAMPLES_DIR_PATH/DEFAULT_SAMPLES_PACK_NAME): | ||
""" Return the sample search directory for a symbol """ | ||
sample_path = None | ||
if symbol.isalpha(): | ||
low_up_dirname = 'upper' if symbol.isupper() else 'lower' | ||
sample_path = spack_path / symbol.lower() / low_up_dirname | ||
elif symbol in nonalpha: | ||
longname = nonalpha[symbol] | ||
sample_path = spack_path / '_' / longname | ||
return sample_path | ||
|
||
def default_loop_path(): | ||
return SAMPLES_DIR_PATH/DEFAULT_SAMPLES_PACK_NAME/LOOP_SUBDIR |
Oops, something went wrong.