-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathseniorly_main.py
67 lines (57 loc) · 3.12 KB
/
seniorly_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import seniorly_scraper as scraper
import pandas as pd
import csv
urls=[]
df=pd.read_csv('test.csv')
for i in df['urls']:
urls.append(i)
pageTitle=[]
metaDescription=[]
communityName=[]
communityStreetAddres=[]
communityZipCode=[]
communityState=[]
images=[]
content=[]
careTypesProvided=[]
AmenitiesProvided=[]
pricingStartsFrom=[]
pricingByRoomType=[]
url=[]
def saveData(u,pageTitle,metaDescription,communityName,communityStreetAddres,communityZipCode,communityState,images,content,careTypesProvided,AmenitiesProvided,pricingStartsFrom,pricingByRoomType):
with open("sample.csv", "w", newline='', encoding="utf-8") as csvFile:
fieldnames = ['url', 'pageTitle', 'metaDescription', 'communityName', 'communityStreetAddress',
'communityZipCode', 'communityState',
'images', 'content', 'careTypes', 'Amenities', 'pricingStartsFrom', 'pricingByRoomType']
writer = csv.DictWriter(csvFile, fieldnames=fieldnames)
writer.writeheader()
for u, pageTitle, metaDescription, communityName, communityStreetAddres, communityZipCode, communityState, images, content, careTypesProvided, AmenitiesProvided, pricingStartsFrom, pricingByRoomType in zip(
url, pageTitle, metaDescription, communityName, communityStreetAddres, communityZipCode, communityState
, images, content, careTypesProvided, AmenitiesProvided, pricingStartsFrom, pricingByRoomType):
writer.writerow(
{'url': u, 'pageTitle': pageTitle, 'metaDescription': metaDescription, 'communityName': communityName,
'communityStreetAddress': communityStreetAddres, 'communityZipCode': communityZipCode,
'communityState': communityState,
'images': images, 'content': content, 'careTypes': careTypesProvided, 'Amenities': AmenitiesProvided,
'pricingStartsFrom': pricingStartsFrom, 'pricingByRoomType': pricingByRoomType})
for i in urls:
try:
print("Scraping No:",i)
soup=scraper.pageRequests(i)
ad=scraper.getCommunityStreetAddress(soup)
communityStreetAddres.append(ad)
url.append(i)
pageTitle.append(scraper.getPageTitle(soup))
metaDescription.append(scraper.getMetaDescription(soup))
communityName.append(scraper.getCommunityName(soup))
communityZipCode.append(scraper.getCommunityZipCode(soup))
communityState.append(scraper.getCommunityState(soup))
images.append(scraper.getImages(soup))
content.append(scraper.getCommunityContent(soup))
careTypesProvided.append(scraper.getCareTypesProvided(soup))
AmenitiesProvided.append(scraper.getAmenitiesProvided(soup))
pricingStartsFrom.append(scraper.getPricingStartsFrom(soup))
pricingByRoomType.append(scraper.getPricingByRoomType(soup))
saveData(url,pageTitle,metaDescription,communityName,communityStreetAddres,communityZipCode,communityState,images,content,careTypesProvided,AmenitiesProvided,pricingStartsFrom,pricingByRoomType)
except:
continue