-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxkcd.py
116 lines (95 loc) · 3.59 KB
/
xkcd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# *- coding: utf-8 *-
import json
import os
import urllib2, urllib
import requests
fp = open("backup.json", "r")
backup = json.load(fp)
fp.close()
TEMPLATE = u"Title: {title} \n\
Slug: {num} \n\
Category: xkcd \n\
Date: {date} \n\
SourceNum: {num} \n\
SourceTitle: {sourcetitle} \n\
Image: /comics/{num:0>4}{ext} \n\
MicroImage: /comics/{num:0>4}_micro{uext} \n\
MiniImage: /comics/{num:0>4}_mini{mext} \n\
Description: {description} \n\
\n\
{transcription}\
"
try:
from cStringIO import StringIO
except ImportError, msg:
from StringIO import StringIO
class RangeError(IOError):
"""Error raised when an unsatisfiable range is requested."""
pass
class HTTPRangeHandler(urllib2.BaseHandler):
"""Handler that enables HTTP Range headers.
This was extremely simple. The Range header is a HTTP feature to
begin with so all this class does is tell urllib2 that the
"206 Partial Content" reponse from the HTTP server is what we
expected.
Example:
import urllib2
import byterange
range_handler = range.HTTPRangeHandler()
opener = urllib2.build_opener(range_handler)
# install it
urllib2.install_opener(opener)
# create Request and set Range header
req = urllib2.Request('http://www.python.org/')
req.header['Range'] = 'bytes=30-50'
f = urllib2.urlopen(req)
"""
def http_error_206(self, req, fp, code, msg, hdrs):
# 206 Partial Content Response
r = urllib.addinfourl(fp, hdrs, req.get_full_url())
r.code = code
r.msg = msg
return r
def http_error_416(self, req, fp, code, msg, hdrs):
# HTTP's Range Not Satisfiable error
r = urllib.addinfourl(fp, hdrs, req.get_full_url())
r.code = code
r.msg = msg
return r
def download_partial(url, target):
range_handler = HTTPRangeHandler()
opener = urllib2.build_opener(range_handler)
urllib2.install_opener(opener)
url_handle = urllib2.Request(url)
if os.path.exists(target):
file_handler = open(target, 'a', buffering=-1)
size = os.path.getsize(target)
url_handle.add_header("Range", "bytes={}-".format(size))
else:
file_handler = open(target, 'w', buffering=-1)
size = 0
target_url = urllib2.urlopen(url_handle)
if int(target_url.headers['Content-Length']) == size:
file_handler.writelines(target_url)
file_handler.close()
if __name__ == "__main__":
for comic in backup:
c = comic["fields"]
cid = c["cid"]
published = c["published"]
title = c["title"]
text = c["text"]
image = c["image"]
thumb = c["thumbnail"]
transcription = c["transcription"]
if c["published"] != None:
ext = str(os.path.splitext(image)[1])
print u"wget -c http://xkcd.ru/{img} -O content/comics/{num:0>4}{ext}".format(img=image, num=cid, ext=ext)
uext = str(os.path.splitext(thumb)[1])
print u"wget -c http://xkcd.ru/{img} -O content/comics/{num:0>4}_micro{ext}".format(img=thumb, num=cid, ext=uext)
print u"Get xkcd.com/{}".format(cid)
sourcetitle = requests.get("http://xkcd.com/{num}/info.0.json".format(num=cid)).json()["title"]
print u"Written content/xkcd/{num:0>4}.md".format(num=cid)
fp = open(u"content/xkcd/{num:0>4}.md".format(num=cid), "w")
fp.write(TEMPLATE.format(title=title, num=cid, date=published, sourcetitle=sourcetitle, description=text, transcription=transcription, ext=ext, mext=ext, uext=uext).encode('utf-8'))
fp.close()