Skip to content

Commit 33491bd

Browse files
#681 Refactored and simplified Dropbox sync mechanism
1 parent 707f369 commit 33491bd

File tree

15 files changed

+324
-68
lines changed

15 files changed

+324
-68
lines changed

.coveragerc

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ omit = ./*/__init__.py
77
./*/migrations/*
88
./dsmr_backend/mixins.py
99
./dsmr_plugins/modules/*
10+
./dsmr_dropbox/dropboxinc/*
-40 Bytes
Binary file not shown.

docs/locale/nl/LC_MESSAGES/changelog.po

-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ msgid ""
88
msgstr ""
99
"Project-Id-Version: DSMR Reader v1.x\n"
1010
"Report-Msgid-Bugs-To: Dennis Siemensma <github@dennissiemensma.nl>\n"
11-
"POT-Creation-Date: 2019-08-11 15:09+0200\n"
12-
"PO-Revision-Date: 2019-08-11 15:13+0200\n"
1311
"Last-Translator: Dennis Siemensma <github@dennissiemensma.nl>\n"
1412
"Language: nl\n"
1513
"Language-Team: Dennis Siemensma <github@dennissiemensma.nl>\n"

dsmr_backup/services/backup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def get_backup_directory():
4848
if backup_directory.startswith('/'):
4949
return os.path.abspath(backup_directory)
5050
else:
51-
return os.path.join(settings.BASE_DIR, '..', backup_directory)
51+
return os.path.abspath(os.path.join(settings.BASE_DIR, '..', backup_directory))
5252

5353

5454
def create():

dsmr_backup/tests/test_backup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def test_get_backup_directory(self):
9393
# Default.
9494
self.assertEqual(
9595
dsmr_backup.services.backup.get_backup_directory(),
96-
os.path.join(settings.BASE_DIR, '..', 'backups/')
96+
os.path.abspath(os.path.join(settings.BASE_DIR, '..', 'backups/'))
9797
)
9898

9999
# Custom.

dsmr_dropbox/dropboxinc/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""
2+
Copyright (c) 2017 Dropbox, Inc.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
from __future__ import absolute_import, division, print_function, unicode_literals
17+
18+
import hashlib
19+
import six
20+
21+
22+
class DropboxContentHasher(object):
23+
"""
24+
Computes a hash using the same algorithm that the Dropbox API uses for the
25+
the "content_hash" metadata field.
26+
27+
The digest() method returns a raw binary representation of the hash. The
28+
hexdigest() convenience method returns a hexadecimal-encoded version, which
29+
is what the "content_hash" metadata field uses.
30+
31+
This class has the same interface as the hashers in the standard 'hashlib'
32+
package.
33+
34+
Example:
35+
36+
hasher = DropboxContentHasher()
37+
with open('some-file', 'rb') as f:
38+
while True:
39+
chunk = f.read(1024) # or whatever chunk size you want
40+
if len(chunk) == 0:
41+
break
42+
hasher.update(chunk)
43+
print(hasher.hexdigest())
44+
"""
45+
46+
BLOCK_SIZE = 4 * 1024 * 1024
47+
48+
def __init__(self):
49+
self._overall_hasher = hashlib.sha256()
50+
self._block_hasher = hashlib.sha256()
51+
self._block_pos = 0
52+
53+
self.digest_size = self._overall_hasher.digest_size
54+
# hashlib classes also define 'block_size', but I don't know how people use that value
55+
56+
def update(self, new_data):
57+
if self._overall_hasher is None:
58+
raise AssertionError(
59+
"can't use this object anymore; you already called digest()")
60+
61+
assert isinstance(new_data, six.binary_type), (
62+
"Expecting a byte string, got {!r}".format(new_data))
63+
64+
new_data_pos = 0
65+
while new_data_pos < len(new_data):
66+
if self._block_pos == self.BLOCK_SIZE:
67+
self._overall_hasher.update(self._block_hasher.digest())
68+
self._block_hasher = hashlib.sha256()
69+
self._block_pos = 0
70+
71+
space_in_block = self.BLOCK_SIZE - self._block_pos
72+
part = new_data[new_data_pos:(new_data_pos+space_in_block)]
73+
self._block_hasher.update(part)
74+
75+
self._block_pos += len(part)
76+
new_data_pos += len(part)
77+
78+
def _finish(self):
79+
if self._overall_hasher is None:
80+
raise AssertionError(
81+
"can't use this object anymore; you already called digest() or hexdigest()")
82+
83+
if self._block_pos > 0:
84+
self._overall_hasher.update(self._block_hasher.digest())
85+
self._block_hasher = None
86+
h = self._overall_hasher
87+
self._overall_hasher = None # Make sure we can't use this object anymore.
88+
return h
89+
90+
def digest(self):
91+
return self._finish().digest()
92+
93+
def hexdigest(self):
94+
return self._finish().hexdigest()
95+
96+
def copy(self):
97+
c = DropboxContentHasher.__new__(DropboxContentHasher)
98+
c._overall_hasher = self._overall_hasher.copy()
99+
c._block_hasher = self._block_hasher.copy()
100+
c._block_pos = self._block_pos
101+
return c
102+
103+
104+
class StreamHasher(object):
105+
"""
106+
A wrapper around a file-like object (either for reading or writing)
107+
that hashes everything that passes through it. Can be used with
108+
DropboxContentHasher or any 'hashlib' hasher.
109+
110+
Example:
111+
112+
hasher = DropboxContentHasher()
113+
with open('some-file', 'rb') as f:
114+
wrapped_f = StreamHasher(f, hasher)
115+
response = some_api_client.upload(wrapped_f)
116+
117+
locally_computed = hasher.hexdigest()
118+
assert response.content_hash == locally_computed
119+
"""
120+
121+
def __init__(self, f, hasher):
122+
self._f = f
123+
self._hasher = hasher
124+
125+
def close(self):
126+
return self._f.close()
127+
128+
def flush(self):
129+
return self._f.flush()
130+
131+
def fileno(self):
132+
return self._f.fileno()
133+
134+
def tell(self):
135+
return self._f.tell()
136+
137+
def read(self, *args):
138+
b = self._f.read(*args)
139+
self._hasher.update(b)
140+
return b
141+
142+
def write(self, b):
143+
self._hasher.update(b)
144+
return self._f.write(b)
145+
146+
def next(self):
147+
b = self._f.next()
148+
self._hasher.update(b)
149+
return b
150+
151+
def readline(self, *args):
152+
b = self._f.readline(*args)
153+
self._hasher.update(b)
154+
return b
155+
156+
def readlines(self, *args):
157+
bs = self._f.readlines(*args)
158+
for b in bs:
159+
self._hasher.update(b)
160+
return b

dsmr_dropbox/management/__init__.py

Whitespace-only changes.

dsmr_dropbox/management/commands/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from django.core.management.base import BaseCommand
2+
from django.utils.translation import ugettext as _
3+
from django.utils import timezone
4+
5+
from dsmr_backup.models.settings import DropboxSettings
6+
import dsmr_dropbox.services
7+
8+
9+
class Command(BaseCommand):
10+
help = _('Forces Dropbox sync.')
11+
12+
def handle(self, **options):
13+
DropboxSettings.objects.all().update(next_sync=timezone.now())
14+
dsmr_dropbox.services.sync()

dsmr_dropbox/services.py

+51-26
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import dropbox
88

99
from dsmr_backup.models.settings import DropboxSettings
10+
from dsmr_dropbox.dropboxinc.dropbox_content_hasher import DropboxContentHasher
1011
from dsmr_frontend.models.message import Notification
1112
import dsmr_backup.services.backup
1213

@@ -26,11 +27,14 @@ def sync():
2627

2728
backup_directory = dsmr_backup.services.backup.get_backup_directory()
2829

29-
# Just check for modified files since the last sync.
30-
for (_, _, filenames) in os.walk(backup_directory):
30+
# Sync each file, recursively.
31+
for (root, dirs, filenames) in os.walk(backup_directory):
3132
for current_file in filenames:
32-
current_file_path = os.path.join(backup_directory, current_file)
33-
check_synced_file(file_path=current_file_path, dropbox_settings=dropbox_settings)
33+
sync_file(
34+
dropbox_settings=dropbox_settings,
35+
local_root_dir=backup_directory,
36+
abs_file_path=os.path.abspath(os.path.join(root, current_file))
37+
)
3438

3539
# Try again in a while.
3640
DropboxSettings.objects.update(
@@ -41,24 +45,37 @@ def sync():
4145
)
4246

4347

44-
def check_synced_file(file_path, dropbox_settings):
45-
file_stats = os.stat(file_path)
48+
def sync_file(dropbox_settings, local_root_dir, abs_file_path):
49+
# The path we use in our Dropbox app folder.
50+
relative_file_path = abs_file_path.replace(local_root_dir, '')
4651

4752
# Ignore empty files.
48-
if file_stats.st_size == 0:
53+
if os.stat(abs_file_path).st_size == 0:
4954
return
5055

51-
last_modified = timezone.datetime.fromtimestamp(file_stats.st_mtime)
52-
last_modified = timezone.make_aware(last_modified)
53-
last_modified = timezone.localtime(last_modified)
54-
latest_sync = dropbox_settings.latest_sync
56+
# Check whether the file is already at Dropbox, if so, check its hash.
57+
dbx = dropbox.Dropbox(dropbox_settings.access_token)
5558

56-
# Ignore when file was not altered since last sync.
57-
if latest_sync and last_modified < timezone.localtime(latest_sync):
58-
return
59+
try:
60+
dropbox_meta = dbx.files_get_metadata(relative_file_path)
61+
except dropbox.exceptions.ApiError as exception:
62+
error_message = str(exception.error)
63+
dropbox_meta = None
64+
65+
# Unexpected.
66+
if 'not_found' not in error_message:
67+
return logger.error(' - Dropbox error: %s', error_message)
68+
69+
# Calculate local hash and compare with remote. Ignore if the remote file is exactly the same.
70+
if dropbox_meta and calculate_content_hash(abs_file_path) == dropbox_meta.content_hash:
71+
return logger.debug(' - Dropbox content hash is the same, skipping: %s', relative_file_path)
5972

6073
try:
61-
upload_chunked(file_path=file_path)
74+
upload_chunked(
75+
dropbox_settings=dropbox_settings,
76+
local_file_path=abs_file_path,
77+
remote_file_path=relative_file_path
78+
)
6279
except dropbox.exceptions.DropboxException as exception:
6380
error_message = str(exception.error)
6481
logger.error(' - Dropbox error: %s', error_message)
@@ -94,28 +111,23 @@ def check_synced_file(file_path, dropbox_settings):
94111
raise
95112

96113

97-
def upload_chunked(file_path):
114+
def upload_chunked(dropbox_settings, local_file_path, remote_file_path):
98115
""" Uploads a file in chucks to Dropbox, allowing it to resume on (connection) failure. """
99-
# For backend logging in Supervisor.
100-
logger.info(' - Uploading file to Dropbox: %s', file_path)
101-
102-
dropbox_settings = DropboxSettings.get_solo()
103-
file_name = os.path.split(file_path)[-1]
104-
dest_path = '/{}'.format(file_name) # The slash indicates it's relative to the root of app folder.
116+
logger.info(' - Syncing file with Dropbox: %s', remote_file_path)
105117

106118
dbx = dropbox.Dropbox(dropbox_settings.access_token)
107119
write_mode = dropbox.files.WriteMode.overwrite
108120

109-
file_handle = open(file_path, 'rb')
110-
file_size = os.path.getsize(file_path)
121+
file_handle = open(local_file_path, 'rb')
122+
file_size = os.path.getsize(local_file_path)
111123

112124
# Many thanks to https://stackoverflow.com/documentation/dropbox-api/409/uploading-a-file/1927/uploading-a-file-usin
113125
# g-the-dropbox-python-sdk#t=201610181733061624381
114126
CHUNK_SIZE = 2 * 1024 * 1024
115127

116128
# Small uploads should be transfers at one go.
117129
if file_size <= CHUNK_SIZE:
118-
dbx.files_upload(file_handle.read(), dest_path, mode=write_mode)
130+
dbx.files_upload(file_handle.read(), remote_file_path, mode=write_mode)
119131

120132
# Large uploads can be sent in chunks, by creating a session allowing multiple separate uploads.
121133
else:
@@ -125,7 +137,7 @@ def upload_chunked(file_path):
125137
session_id=upload_session_start_result.session_id,
126138
offset=file_handle.tell()
127139
)
128-
commit = dropbox.files.CommitInfo(path=dest_path, mode=write_mode)
140+
commit = dropbox.files.CommitInfo(path=remote_file_path, mode=write_mode)
129141

130142
# We keep sending the data in chunks, until we reach the last one, then we instruct Dropbox to finish the upload
131143
# by combining all the chunks sent previously.
@@ -137,3 +149,16 @@ def upload_chunked(file_path):
137149
cursor.offset = file_handle.tell()
138150

139151
file_handle.close()
152+
153+
154+
def calculate_content_hash(file_path):
155+
""" Calculates the Dropbox hash of a file: https://www.dropbox.com/developers/reference/content-hash """
156+
hasher = DropboxContentHasher()
157+
with open(file_path, 'rb') as f:
158+
while True:
159+
chunk = f.read(DropboxContentHasher.BLOCK_SIZE)
160+
if len(chunk) == 0:
161+
break
162+
hasher.update(chunk)
163+
164+
return hasher.hexdigest()

dsmr_dropbox/tests/dummy.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lorem ipsum dolor sit amet, consectetur adipiscing elit.

0 commit comments

Comments
 (0)