Skip to content

Commit 68a1e08

Browse files
authored
File Sizes cleanup (#1752)
* sizes Kib to Bytes, size formatting functions cleanup * space between size and unit, removed factor multiplication from size calcualtion * file_size to bytes in SubmissionDetail * added missing migration * migration conflict resolved
1 parent 2883349 commit 68a1e08

22 files changed

+158
-159
lines changed

compute_worker/compute_worker.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def get_folder_size_in_gb(folder):
161161
total_size += os.path.getsize(path)
162162
elif os.path.isdir(path):
163163
total_size += get_folder_size_in_gb(path)
164-
return total_size / 1024 / 1024 / 1024
164+
return total_size / 1000 / 1000 / 1000 # GB: decimal system (1000^3)
165165

166166

167167
def delete_files_in_folder(folder):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 2.2.28 on 2025-02-18 11:43
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('analytics', '0001_initial'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='adminstoragedatapoint',
15+
name='backups_total',
16+
field=models.DecimalField(blank=True, decimal_places=2, max_digits=20, null=True),
17+
),
18+
]

src/apps/analytics/models.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class UserStorageDataPoint(models.Model):
4848

4949
class AdminStorageDataPoint(models.Model):
5050
backups_total = models.DecimalField(
51-
max_digits=14, decimal_places=2, null=True, blank=True
52-
)
51+
max_digits=20, decimal_places=2, null=True, blank=True
52+
) # stores bytes
5353
at_date = models.DateTimeField()
5454
created_at = models.DateTimeField(auto_now_add=True)

src/apps/analytics/tasks.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ def create_storage_analytics_snapshot():
4343
for dataset in Data.objects.filter(Q(file_size__isnull=True) | Q(file_size__lt=0)):
4444
try:
4545
dataset.file_size = Decimal(
46-
dataset.data_file.size / 1024
47-
) # file_size is in KiB
46+
dataset.data_file.size
47+
) # file_size is in Bytes
4848
except Exception:
4949
dataset.file_size = Decimal(-1)
5050
finally:
@@ -56,8 +56,8 @@ def create_storage_analytics_snapshot():
5656
):
5757
try:
5858
submission.prediction_result_file_size = Decimal(
59-
submission.prediction_result.size / 1024
60-
) # prediction_result_file_size is in KiB
59+
submission.prediction_result.size
60+
) # prediction_result_file_size is in Bytes
6161
except Exception:
6262
submission.prediction_result_file_size = Decimal(-1)
6363
finally:
@@ -68,8 +68,8 @@ def create_storage_analytics_snapshot():
6868
):
6969
try:
7070
submission.scoring_result_file_size = Decimal(
71-
submission.scoring_result.size / 1024
72-
) # scoring_result_file_size is in KiB
71+
submission.scoring_result.size
72+
) # scoring_result_file_size is in Bytes
7373
except Exception:
7474
submission.scoring_result_file_size = Decimal(-1)
7575
finally:
@@ -80,8 +80,8 @@ def create_storage_analytics_snapshot():
8080
):
8181
try:
8282
submission.detailed_result_file_size = Decimal(
83-
submission.detailed_result.size / 1024
84-
) # detailed_result_file_size is in KiB
83+
submission.detailed_result.size
84+
) # detailed_result_file_size is in Bytes
8585
except Exception:
8686
submission.detailed_result_file_size = Decimal(-1)
8787
finally:
@@ -92,8 +92,8 @@ def create_storage_analytics_snapshot():
9292
):
9393
try:
9494
submissiondetails.file_size = Decimal(
95-
submissiondetails.data_file.size / 1024
96-
) # file_size is in KiB
95+
submissiondetails.data_file.size
96+
) # file_size is in Bytes
9797
except Exception:
9898
submissiondetails.file_size = Decimal(-1)
9999
finally:
@@ -277,7 +277,7 @@ def create_storage_analytics_snapshot():
277277
admin_storage_at_date[date] += size
278278

279279
for date in admin_storage_day_range:
280-
defaults = {"backups_total": admin_storage_at_date[date] / 1024.0}
280+
defaults = {"backups_total": admin_storage_at_date[date]}
281281
lookup_params = {"at_date": date}
282282
AdminStorageDataPoint.objects.update_or_create(
283283
defaults=defaults, **lookup_params
@@ -528,7 +528,7 @@ def create_storage_analytics_snapshot():
528528
)
529529
admin_data_point = AdminStorageDataPoint.objects.filter(at_date=date).first()
530530
admin_usage = (admin_data_point.backups_total or 0) if admin_data_point else 0
531-
orphaned_file_usage = Decimal(orphaned_files_size_per_date[date] / 1024)
531+
orphaned_file_usage = Decimal(orphaned_files_size_per_date[date])
532532
total_usage = (
533533
users_usage + admin_usage + orphaned_file_usage
534534
) # competitions_usage is included inside users_usage

src/apps/api/tests/test_datasets.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
class DatasetAPITests(APITestCase):
1313
def setUp(self):
1414
self.creator = UserFactory(username='creator', password='creator')
15-
self.existing_dataset = DataFactory(created_by=self.creator, name="Test!", file_size=1024)
15+
self.existing_dataset = DataFactory(created_by=self.creator, name="Test!", file_size=1000)
1616

1717
def test_dataset_api_checks_duplicate_names_for_same_user(self):
1818
self.client.login(username='creator', password='creator')
@@ -23,7 +23,7 @@ def test_dataset_api_checks_duplicate_names_for_same_user(self):
2323
'type': Data.COMPETITION_BUNDLE,
2424
'request_sassy_file_name': faker.file_name(),
2525
'file_name': faker.file_name(),
26-
'file_size': 1024,
26+
'file_size': 1000,
2727
})
2828

2929
assert resp.status_code == 400
@@ -34,7 +34,7 @@ def test_dataset_api_checks_duplicate_names_for_same_user(self):
3434
'name': 'Test!',
3535
'type': Data.COMPETITION_BUNDLE,
3636
'request_sassy_file_name': faker.file_name(),
37-
'file_size': 1024,
37+
'file_size': 1000,
3838
})
3939
assert resp.status_code == 200
4040

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Generated by Django 2.2.28 on 2025-02-18 11:51
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('competitions', '0052_auto_20250129_1058'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='submission',
15+
name='detailed_result_file_size',
16+
field=models.DecimalField(blank=True, decimal_places=2, max_digits=15, null=True),
17+
),
18+
migrations.AlterField(
19+
model_name='submission',
20+
name='prediction_result_file_size',
21+
field=models.DecimalField(blank=True, decimal_places=2, max_digits=15, null=True),
22+
),
23+
migrations.AlterField(
24+
model_name='submission',
25+
name='scoring_result_file_size',
26+
field=models.DecimalField(blank=True, decimal_places=2, max_digits=15, null=True),
27+
),
28+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 2.2.28 on 2025-03-24 06:22
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('competitions', '0053_auto_20250218_1151'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='submissiondetails',
15+
name='file_size',
16+
field=models.DecimalField(blank=True, decimal_places=2, max_digits=15, null=True),
17+
),
18+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Generated by Django 2.2.28 on 2025-03-24 06:50
2+
3+
from django.db import migrations
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('competitions', '0054_auto_20250324_0622'),
10+
('competitions', '0053_competition_forum_enabled'),
11+
]
12+
13+
operations = [
14+
]

src/apps/competitions/models.py

+6-10
Original file line numberDiff line numberDiff line change
@@ -466,18 +466,16 @@ class SubmissionDetails(models.Model):
466466
]
467467
name = models.CharField(max_length=50)
468468
data_file = models.FileField(upload_to=PathWrapper('submission_details'), storage=BundleStorage)
469-
file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB
469+
file_size = models.DecimalField(max_digits=15, decimal_places=2, null=True, blank=True) # in Bytes
470470
submission = models.ForeignKey('Submission', on_delete=models.CASCADE, related_name='details')
471471
is_scoring = models.BooleanField(default=False)
472472

473473
def save(self, *args, **kwargs):
474474
if self.data_file and (not self.file_size or self.file_size == -1):
475475
try:
476-
# save file size as KiB
477476
# self.data_file.size returns bytes
478-
self.file_size = self.data_file.size / 1024
477+
self.file_size = self.data_file.size
479478
except TypeError:
480-
# file returns a None size, can't divide None / 1024
481479
# -1 indicates an error
482480
self.file_size = -1
483481
except botocore.exceptions.ClientError:
@@ -528,9 +526,9 @@ class Submission(ChaHubSaveMixin, models.Model):
528526
detailed_result = models.FileField(upload_to=PathWrapper('detailed_result'), null=True, blank=True,
529527
storage=BundleStorage)
530528

531-
prediction_result_file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB
532-
scoring_result_file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB
533-
detailed_result_file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB
529+
prediction_result_file_size = models.DecimalField(max_digits=15, decimal_places=2, null=True, blank=True) # in Bytes
530+
scoring_result_file_size = models.DecimalField(max_digits=15, decimal_places=2, null=True, blank=True) # in Bytes
531+
detailed_result_file_size = models.DecimalField(max_digits=15, decimal_places=2, null=True, blank=True) # in Bytes
534532

535533
secret = models.UUIDField(default=uuid.uuid4)
536534
celery_task_id = models.UUIDField(null=True, blank=True)
@@ -644,11 +642,9 @@ def save(self, ignore_submission_limit=False, **kwargs):
644642
for file_path_attr, file_size_attr in files_and_sizes_dict.items():
645643
if getattr(self, file_path_attr) and (not getattr(self, file_size_attr) or getattr(self, file_size_attr) == -1):
646644
try:
647-
# save file size as KiB
648645
# self.data_file.size returns bytes
649-
setattr(self, file_size_attr, getattr(self, file_path_attr).size / 1024)
646+
setattr(self, file_size_attr, getattr(self, file_path_attr).size)
650647
except TypeError:
651-
# file returns a None size, can't divide None / 1024
652648
# -1 indicates an error
653649
setattr(self, file_size_attr, Decimal(-1))
654650
except botocore.exceptions.ClientError:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 2.2.28 on 2025-02-18 11:00
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('datasets', '0009_merge_20241203_1313'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='data',
15+
name='file_size',
16+
field=models.DecimalField(blank=True, decimal_places=2, max_digits=15, null=True),
17+
),
18+
]

src/apps/datasets/models.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class Data(ChaHubSaveMixin, models.Model):
5959
key = models.UUIDField(default=uuid.uuid4, blank=True, unique=True)
6060
is_public = models.BooleanField(default=False)
6161
upload_completed_successfully = models.BooleanField(default=False)
62-
file_size = models.DecimalField(max_digits=10, decimal_places=2, null=True, blank=True) # in KiB
62+
file_size = models.DecimalField(max_digits=15, decimal_places=2, null=True, blank=True) # in Bytes
6363

6464
# This is true if the Data model was created as part of unpacking a competition. Competition bundles themselves
6565
# are NOT marked True, since they are not created by unpacking!
@@ -74,11 +74,10 @@ def get_download_url(self):
7474
def save(self, *args, **kwargs):
7575
if self.data_file and (not self.file_size or self.file_size == -1):
7676
try:
77-
# save file size as KiB
77+
# save file size in bytes
7878
# self.data_file.size returns bytes
79-
self.file_size = self.data_file.size / 1024
79+
self.file_size = self.data_file.size
8080
except TypeError:
81-
# file returns a None size, can't divide None / 1024
8281
# -1 indicates an error
8382
self.file_size = Decimal(-1)
8483
except botocore.exceptions.ClientError:

src/apps/pages/views.py

+3-19
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from announcements.models import Announcement, NewsPost
77

88
from django.shortcuts import render
9+
from utils.data import pretty_bytes
910

1011

1112
class HomeView(TemplateView):
@@ -89,9 +90,9 @@ def get_context_data(self, *args, **kwargs):
8990
for submission in context['submissions']:
9091
# Get filesize from each submissions's data
9192
if submission.data:
92-
submission.file_size = self.format_file_size(submission.data.file_size)
93+
submission.file_size = pretty_bytes(submission.data.file_size)
9394
else:
94-
submission.file_size = self.format_file_size(0)
95+
submission.file_size = pretty_bytes(0)
9596

9697
# Get queue from each submission
9798
queue_name = ""
@@ -110,23 +111,6 @@ def get_context_data(self, *args, **kwargs):
110111

111112
return context
112113

113-
def format_file_size(self, file_size):
114-
"""
115-
A custom function to convert file size to KB, MB, GB and return with the unit
116-
"""
117-
try:
118-
n = float(file_size)
119-
except Exception:
120-
return ""
121-
122-
units = ['KB', 'MB', 'GB']
123-
i = 0
124-
while n >= 1000 and i < len(units) - 1:
125-
n /= 1000
126-
i += 1
127-
128-
return f"{n:.1f} {units[i]}"
129-
130114

131115
class MonitorQueuesView(TemplateView):
132116
template_name = 'pages/monitor_queues.html'

src/apps/profiles/models.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,6 @@ def get_used_storage_space(self, binary=False):
159159
Returns in bytes
160160
"""
161161

162-
factor = 1024 if binary else 1000
163162
from datasets.models import Data
164163
from competitions.models import Submission, SubmissionDetails
165164

@@ -170,7 +169,7 @@ def get_used_storage_space(self, binary=False):
170169
created_by_id=self.id, file_size__gt=0, file_size__isnull=False
171170
).aggregate(Sum("file_size"))["file_size__sum"]
172171

173-
storage_used += users_datasets * factor if users_datasets else 0
172+
storage_used += users_datasets if users_datasets else 0
174173

175174
# Submissions
176175
users_submissions = Submission.objects.filter(owner_id=self.id).aggregate(
@@ -202,14 +201,14 @@ def get_used_storage_space(self, binary=False):
202201
)
203202
)
204203

205-
storage_used += users_submissions["size"] * factor if users_submissions["size"] else 0
204+
storage_used += users_submissions["size"] if users_submissions["size"] else 0
206205

207206
# Submissions details
208207
users_submissions_details = SubmissionDetails.objects.filter(
209208
submission__owner_id=self.id, file_size__gt=0, file_size__isnull=False
210209
).aggregate(Sum("file_size"))["file_size__sum"]
211210

212-
storage_used += users_submissions_details * factor if users_submissions_details else 0
211+
storage_used += users_submissions_details if users_submissions_details else 0
213212

214213
return storage_used
215214

src/static/js/ours/utils.js

+9-2
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,23 @@ function pretty_date(date_string) {
9090
}
9191

9292
function pretty_bytes(bytes, decimalPlaces = 1, suffix = "B", binary = false) {
93+
94+
// Ensure bytes is a valid number
95+
bytes = parseFloat(bytes)
96+
if (isNaN(bytes) || bytes < 0) {
97+
return "" // Return empty string for invalid or negative values
98+
}
99+
93100
const factor = binary ? 1024.0 : 1000.0;
94101
const units = binary ? ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi'] : ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z'];
95102

96103
for (const unit of units) {
97104
if (Math.abs(bytes) < factor || unit === units[units.length - 1]) {
98-
return bytes.toFixed(decimalPlaces) + unit + suffix;
105+
return bytes.toFixed(decimalPlaces) + ' ' + unit + suffix;
99106
}
100107
bytes /= factor;
101108
}
102-
return bytes.toFixed(decimalPlaces) + units[units.length - 1] + suffix;
109+
return bytes.toFixed(decimalPlaces) + ' ' + units[units.length - 1] + suffix;
103110
}
104111

105112
/* ----------------------------------------------------------------------------

0 commit comments

Comments
 (0)