-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDownloadLibraryHistory_selenium.py
98 lines (88 loc) · 3.26 KB
/
DownloadLibraryHistory_selenium.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#Code for finding and printing webpages
#Need to have installed with webdriver in path. Instructions at http://selenium-python.readthedocs.io/installation.html
#Pre-load stuff
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from bs4 import BeautifulSoup
import os
import pandas as pd
#User Inputted Values
#Input Login Values
UserInputs = pd.read_csv("UserInfo.txt", header=None)
cardNum = UserInputs[0][0]
pin = UserInputs[0][1]
#Working Directory for Saving Location
savedir = UserInputs[0][2]
os.chdir(savedir)
#Select a File Name to Save Output
outFile = UserInputs[0][3]
#Start Driver
driver = webdriver.Firefox()
#Get to Starting URL
url = UserInputs[0][4]
driver.get(url)
assert UserInputs[0][5] in driver.title
#Navigate to Account Login Page
accountButton = driver.find_element_by_css_selector("a[href*='myaccount']")
driver.get(accountButton.get_property('href'))
time.sleep(2)
#Input and execute Login
cardInput = driver.find_element_by_id("code")
cardInput.send_keys(cardNum)
pinInput = driver.find_element_by_id("pin")
pinInput.send_keys(pin)
submitButton = driver.find_element_by_css_selector("span.buttonSpriteSpan2")
submitButton.click()
time.sleep(4)
#Navigate to Book List
patronLink = driver.find_element_by_css_selector("a.myAccountLink")
patronLink.click()
time.sleep(2)
historyLink = driver.find_element_by_id("webpacFuncDirectLinkComponent_1")
historyLink.click()
time.sleep(2)
#Move to Table Storage Page and get Table Limits
historyFrame = driver.find_element_by_css_selector("iframe#accountContentIframe")
driver.get(historyFrame.get_attribute('src'))
pageSelector = driver.find_element_by_css_selector('td.browsePager')
pageElements = pageSelector.find_elements_by_css_selector('a')
numPages = len(pageElements)
#Extract Data page by page
bookHistory = []
for pg in range(numPages):
tableBody = driver.find_element_by_css_selector('tbody')
pgSoup = BeautifulSoup(tableBody.get_attribute('outerHTML'),'lxml') #Extract HTML
titleList = pgSoup.find_all(class_='patFuncTitleMain')
authorList = pgSoup.find_all(class_='patFuncAuthor')
dateList = pgSoup.find_all(class_='patFuncDate')
detailList = pgSoup.find_all(class_='patFuncDetails')
#Loop through each list
for (tit, au, dt, det) in zip(titleList, authorList, dateList, detailList):
a = str(au).split('>')[1]
author = a.split('<')[0]
d = str(dt).split('>')[1]
date = d.split('<')[0]
de = str(det).split('>')[1]
rawDetails = de.split('<')[0]
t = str(tit).split('>')[1]
rawTitle = t.split('<')[0]
if '/' in rawTitle:
splitTitle = rawTitle.split('/')
title = splitTitle[0]
titleDeets = splitTitle[1]
details = rawDetails + ' / ' + titleDeets
else:
title = rawTitle
details = rawDetails
bookHistory.append((title,author,date,details))
if pg < (numPages-1):
nextButton = driver.find_element_by_xpath("//*[contains(text(), 'Next')]")
nextButton.click() #Click to move to next page
time.sleep(2)
#Convert list to Pandas Data Frame
labels = ['Title', 'Author', 'Date', 'Details']
df = pd.DataFrame.from_records(bookHistory, columns=labels);
df.to_csv(outFile)
#Close Webdriver
driver.quit()