-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnotifier.py
executable file
·110 lines (91 loc) · 3.03 KB
/
notifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import argparse
import logging
import sqlite3
import sys
import time
from urllib.error import URLError
from sms.notifier import (
diff_bytes,
fetch_page,
generate_hash,
is_modified,
load_configuration,
notify,
)
def main() -> None:
"""
Main script entry point. Loads configuration from config.json in the
current directory, checks the specified pages one by one and sends
email notification for pages that have changes since the last check.
"""
# Load user configuration
config = load_configuration("config.json")
if config is None:
print("Error: Could not load config file", file=sys.stderr)
sys.exit(1)
if not isinstance(config, dict):
print(
"Error: Config file is not valid. The top-level structure"
"must be a dictionary",
file=sys.stderr,
)
sys.exit(1)
watched_pages = config["pages"]
# Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
logging.basicConfig(
datefmt="%H:%M:%S",
format="[%(asctime)s] %(levelname)s - %(message)s",
level=logging.DEBUG if args.verbose else logging.INFO,
)
logger = logging.getLogger()
db = sqlite3.connect("sms.db")
for page in watched_pages:
logger.info("Downloading %s ...", page["url"])
try:
contents = fetch_page(page["url"])
except URLError as exc:
logger.error("HTTP error occurred: %s", exc)
continue
new_hash = generate_hash(contents)
cur = db.cursor()
cur.execute(
"SELECT `hash`, `old_text` FROM `sms_hashes` WHERE `url`=?",
(page["url"],),
)
res = cur.fetchone()
# Create a record for the page if it hasn't been scraped yet
if not res or not res[0]:
logger.debug("New page, saving...")
cur.execute(
"INSERT INTO `sms_hashes` (`url`, `hash`, "
"`old_text`) VALUES(?, ?, ?)",
(page["url"], new_hash, contents),
)
db.commit()
logger.debug("Page saved")
continue
# Check if the page has changed
old_hash = res[0]
if is_modified(contents, old_hash):
logger.info("Page not modified")
continue
# Update the database first
cur.execute(
"UPDATE `sms_hashes` SET `hash` = ?, `old_text` = ? WHERE `url` = ?",
(new_hash, contents, page["url"]),
)
db.commit()
# Compile and send the message
oldlines = [b""] if res[1] is None else res[1].splitlines()
newlines = contents.splitlines()
logger.debug("Page modified, sending notification...")
notify(config, diff_bytes(oldlines, newlines), page)
logger.debug("Notification sent")
time.sleep(0.5)
db.close()
logger.info("All pages checked")
if __name__ == "__main__":
main()