@@ -15,53 +15,56 @@ def __init__(self, client_key, client_secret, secure=True):
15
15
client_secret: secret OAuth key
16
16
secure: toggles http/https session. Defaults to
17
17
http, use https for access to restricted content.
18
- Initializes a persistent Requests session and attaches
19
- OAuth credentials to the session. All queries are performed as
18
+ Initializes a persistent Requests session and attaches
19
+ OAuth credentials to the session. All queries are performed as
20
20
method calls on the HTDataInterface object.
21
21
For now, all queries return the raw content string, rather than
22
22
processing the json or xml structures.
23
23
"""
24
24
25
25
self .client_key = client_key
26
26
self .client_secret = client_secret
27
- self .oauth = OAuth1 (client_key = client_key ,
28
- client_secret = client_secret ,
27
+ self .oauth = OAuth1 (client_key = client_key ,
28
+ client_secret = client_secret ,
29
29
signature_type = 'query' )
30
30
31
31
self .rsession = requests .Session ()
32
32
self .rsession .auth = self .oauth
33
33
34
34
if secure :
35
35
self .baseurl = SECURE_DATA_BASEURL
36
- else :
36
+ else :
37
37
self .baseurl = DATA_BASEURL
38
38
39
39
40
- def _makerequest (self , resource , doc_id , doc_type = 'volume' , sequence = None ,
40
+ def _makerequest (self , resource , doc_id , doc_type = 'volume' , sequence = None ,
41
41
v = 2 , json = False , callback = None ):
42
42
""" Construct and perform URI request.
43
43
Args:
44
44
resource: resource type
45
45
doc_id: document identifier of target
46
46
doc_type: type of document: volume or article
47
47
sequence: page number for single page resources
48
- v: API version
48
+ v: API version
49
49
json: if json=True, the json representation of
50
- the resource is returned. Only valid for resources that
50
+ the resource is returned. Only valid for resources that
51
51
are xml or xml+atom by default.
52
- callback: optional javascript callback function,
52
+ callback: optional javascript callback function,
53
53
which only has an effect if json=True.
54
- Return:
54
+ Return:
55
55
content of the response, in bytes
56
- Note there's not much error checking on url construction,
57
- but errors do get raised after badly formed requests.
58
- To do: implement some exception checking here, and identify
59
- what sort of errors are being returned (eg. BadRequest,
60
- Unauthorized, NotFound, etc.)
56
+ Note there's not much error checking on url construction,
57
+ but errors do get raised after badly formed requests.
58
+ To do: implement some exception checking here, and identify
59
+ what sort of errors are being returned (eg. BadRequest,
60
+ Unauthorized, NotFound, etc.)
61
61
"""
62
62
63
- url = "" .join ([self .baseurl , doc_type , '/' , resource , '/' , doc_id ])
64
-
63
+ if doc_type :
64
+ doc_type = '%s/' % doc_type
65
+
66
+ url = "" .join ([self .baseurl , doc_type , resource , '/' , doc_id ])
67
+
65
68
if sequence :
66
69
url += '/' + str (sequence )
67
70
@@ -70,7 +73,7 @@ def _makerequest(self, resource, doc_id, doc_type='volume', sequence=None,
70
73
params ['format' ] = 'json'
71
74
if callback :
72
75
params ['callback' ] = callback
73
-
76
+
74
77
r = self .rsession .get (url , params = params )
75
78
r .raise_for_status ()
76
79
@@ -82,19 +85,19 @@ def getmeta(self, doc_id, doc_type='volume', json=False):
82
85
Args:
83
86
doc_id: document identifier
84
87
json: if json=True, the json representation of
85
- the resource is returned, otherwise efaults to an atom+xml
88
+ the resource is returned, otherwise efaults to an atom+xml
86
89
format.
87
- Return:
90
+ Return:
88
91
xml or json string
89
92
"""
90
93
return self ._makerequest ('meta' , doc_id , doc_type = doc_type , json = json )
91
94
92
95
93
- def getstructure (self , doc_id , doc_type = 'volume ' , json = False ):
96
+ def getstructure (self , doc_id , doc_type = '' , json = False ):
94
97
""" Retrieve a METS document.
95
98
Args:
96
99
doc_id: target document
97
- json: toggles json/xml
100
+ json: toggles json/xml
98
101
Return:
99
102
xml or json string
100
103
"""
@@ -106,9 +109,9 @@ def getpagemeta(self, doc_id, seq, doc_type='volume', json=False):
106
109
return self ._makerequest ('pagemeta' , doc_id , doc_type = doc_type , sequence = seq , json = json )
107
110
108
111
109
- def getaggregate (self , doc_id , doc_type = 'volume ' ):
110
- """ Get aggregate record data.
111
- Return:
112
+ def getaggregate (self , doc_id , doc_type = '' ):
113
+ """ Get aggregate record data.
114
+ Return:
112
115
zip content that contains tiff/jp2/jpeg, .txt OCR files,
113
116
+ Source METS (not the same as Hathi METS)
114
117
"""
@@ -152,7 +155,7 @@ def getdocumentocr(self, doc_id, start_page = 1, end_page = 1e5 , doc_type = 'vo
152
155
except :
153
156
break
154
157
return outPages
155
-
158
+
156
159
def getdocumentcoordocr (self , doc_id , start_page = 1 , end_page = 1e5 , doc_type = 'volume' ):
157
160
""" Get coordinate OCR for an entire document.
158
161
Return:
@@ -167,6 +170,6 @@ def getdocumentcoordocr(self, doc_id, start_page = 1, end_page = 1e5 , doc_type
167
170
except :
168
171
break
169
172
return outPages
170
-
171
173
172
-
174
+
175
+
0 commit comments