Skip to content

Commit 16347e7

Browse files
committed
Fix urls for getaggregrate and getstructure
1 parent 7866306 commit 16347e7

File tree

1 file changed

+31
-28
lines changed

1 file changed

+31
-28
lines changed

hathitrust_api/data_api.py

+31-28
Original file line numberDiff line numberDiff line change
@@ -15,53 +15,56 @@ def __init__(self, client_key, client_secret, secure=True):
1515
client_secret: secret OAuth key
1616
secure: toggles http/https session. Defaults to
1717
http, use https for access to restricted content.
18-
Initializes a persistent Requests session and attaches
19-
OAuth credentials to the session. All queries are performed as
18+
Initializes a persistent Requests session and attaches
19+
OAuth credentials to the session. All queries are performed as
2020
method calls on the HTDataInterface object.
2121
For now, all queries return the raw content string, rather than
2222
processing the json or xml structures.
2323
"""
2424

2525
self.client_key = client_key
2626
self.client_secret = client_secret
27-
self.oauth = OAuth1(client_key=client_key,
28-
client_secret=client_secret,
27+
self.oauth = OAuth1(client_key=client_key,
28+
client_secret=client_secret,
2929
signature_type='query')
3030

3131
self.rsession = requests.Session()
3232
self.rsession.auth = self.oauth
3333

3434
if secure:
3535
self.baseurl = SECURE_DATA_BASEURL
36-
else:
36+
else:
3737
self.baseurl = DATA_BASEURL
3838

3939

40-
def _makerequest(self, resource, doc_id, doc_type='volume', sequence=None,
40+
def _makerequest(self, resource, doc_id, doc_type='volume', sequence=None,
4141
v=2, json=False, callback=None):
4242
""" Construct and perform URI request.
4343
Args:
4444
resource: resource type
4545
doc_id: document identifier of target
4646
doc_type: type of document: volume or article
4747
sequence: page number for single page resources
48-
v: API version
48+
v: API version
4949
json: if json=True, the json representation of
50-
the resource is returned. Only valid for resources that
50+
the resource is returned. Only valid for resources that
5151
are xml or xml+atom by default.
52-
callback: optional javascript callback function,
52+
callback: optional javascript callback function,
5353
which only has an effect if json=True.
54-
Return:
54+
Return:
5555
content of the response, in bytes
56-
Note there's not much error checking on url construction,
57-
but errors do get raised after badly formed requests.
58-
To do: implement some exception checking here, and identify
59-
what sort of errors are being returned (eg. BadRequest,
60-
Unauthorized, NotFound, etc.)
56+
Note there's not much error checking on url construction,
57+
but errors do get raised after badly formed requests.
58+
To do: implement some exception checking here, and identify
59+
what sort of errors are being returned (eg. BadRequest,
60+
Unauthorized, NotFound, etc.)
6161
"""
6262

63-
url = "".join([self.baseurl, doc_type, '/', resource, '/', doc_id])
64-
63+
if doc_type:
64+
doc_type = '%s/' % doc_type
65+
66+
url = "".join([self.baseurl, doc_type, resource, '/', doc_id])
67+
6568
if sequence:
6669
url += '/' + str(sequence)
6770

@@ -70,7 +73,7 @@ def _makerequest(self, resource, doc_id, doc_type='volume', sequence=None,
7073
params['format'] = 'json'
7174
if callback:
7275
params['callback'] = callback
73-
76+
7477
r = self.rsession.get(url, params=params)
7578
r.raise_for_status()
7679

@@ -82,19 +85,19 @@ def getmeta(self, doc_id, doc_type='volume', json=False):
8285
Args:
8386
doc_id: document identifier
8487
json: if json=True, the json representation of
85-
the resource is returned, otherwise efaults to an atom+xml
88+
the resource is returned, otherwise efaults to an atom+xml
8689
format.
87-
Return:
90+
Return:
8891
xml or json string
8992
"""
9093
return self._makerequest('meta', doc_id, doc_type=doc_type, json=json)
9194

9295

93-
def getstructure(self, doc_id, doc_type='volume', json=False):
96+
def getstructure(self, doc_id, doc_type='', json=False):
9497
""" Retrieve a METS document.
9598
Args:
9699
doc_id: target document
97-
json: toggles json/xml
100+
json: toggles json/xml
98101
Return:
99102
xml or json string
100103
"""
@@ -106,9 +109,9 @@ def getpagemeta(self, doc_id, seq, doc_type='volume', json=False):
106109
return self._makerequest('pagemeta', doc_id, doc_type=doc_type, sequence=seq, json=json)
107110

108111

109-
def getaggregate(self, doc_id, doc_type='volume'):
110-
""" Get aggregate record data.
111-
Return:
112+
def getaggregate(self, doc_id, doc_type=''):
113+
""" Get aggregate record data.
114+
Return:
112115
zip content that contains tiff/jp2/jpeg, .txt OCR files,
113116
+ Source METS (not the same as Hathi METS)
114117
"""
@@ -152,7 +155,7 @@ def getdocumentocr(self, doc_id, start_page = 1, end_page = 1e5 , doc_type = 'vo
152155
except:
153156
break
154157
return outPages
155-
158+
156159
def getdocumentcoordocr(self, doc_id, start_page = 1, end_page = 1e5 , doc_type = 'volume'):
157160
""" Get coordinate OCR for an entire document.
158161
Return:
@@ -167,6 +170,6 @@ def getdocumentcoordocr(self, doc_id, start_page = 1, end_page = 1e5 , doc_type
167170
except:
168171
break
169172
return outPages
170-
171173

172-
174+
175+

0 commit comments

Comments
 (0)