-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathramm_scraper.py
36 lines (30 loc) · 1.12 KB
/
ramm_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import requests
import time
from bs4 import BeautifulSoup
import argparse
from scripts.functions import *
parser = argparse.ArgumentParser(description='RammsteinShop scraper.')
parser.add_argument('--start', type=int, default=0, help='Starting id. Default: 0')
parser.add_argument('--end', type=int, default=10000, help='Ending id. Default: 10.000')
parser.add_argument('--new', action='store_true', help='Enable scraping new items.')
args = vars(parser.parse_args())
start_id = args['start']
end_id = args['end']
scrape_new = args['new']
BASE_URL = 'https://shop.rammstein.de/en/catalog/product_info.php?products_id='
WHATS_NEW_URL = 'https://shop.rammstein.de/en/catalog/whats-new.html'
if scrape_new:
pass
else:
for current_id in range(start_id, end_id+1):
time.sleep(1)
url = BASE_URL + str(current_id)
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
if response.status_code == 200:
print(f'{current_id} ok')
print(save_page(soup, url, current_id))
else:
print(f'{current_id} empty')
exit()