Skip to content

Commit 469e75f

Browse files
authored
Merge pull request #422 from aiven/alex-add-delta-stats-to-local-tar-backup
Add basebackup mode with delta statistics [BF-356] #422
2 parents 2d160a4 + a5b2d60 commit 469e75f

File tree

6 files changed

+137
-12
lines changed

6 files changed

+137
-12
lines changed

.pylintrc

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ disable=
2828

2929
[FORMAT]
3030
max-line-length=125
31-
max-module-lines=1000
31+
max-module-lines=1100
3232

3333
[REPORTS]
3434
output-format=text

README.rst

+10
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,16 @@ tablespaces.
690690
Note that the ``local-tar`` backup mode can not be used on replica servers
691691
prior to PostgreSQL 9.6 unless the pgespresso extension is installed.
692692

693+
When using ``delta`` mode, only changed files are uploaded into the storage.
694+
On every backup snapshot of the data files is taken, this results in a manifest file,
695+
describing the hashes of all the files needed to be backed up.
696+
New hashes are uploaded to the storage and used together with complementary
697+
manifest from control file for restoration.
698+
In order to properly assess the efficiency of ``delta`` mode in comparison with
699+
``local-tar``, one can use ``local-tar-delta-stats`` mode, which behaves the same as
700+
``local-tar``, but also collects the metrics as if it was ``delta`` mode. It can help
701+
in decision making of switching to ``delta`` mode.
702+
693703
``basebackup_threads`` (default ``1``)
694704

695705
How many threads to use for tar, compress and encrypt tasks. Only applies for

pghoard/basebackup.py

+121-10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
See LICENSE for details
66
"""
77
import datetime
8+
import hashlib
89
import io
910
import logging
1011
import os
@@ -18,7 +19,7 @@
1819
from queue import Empty, Queue
1920
from tempfile import NamedTemporaryFile
2021
from threading import Thread
21-
from typing import Optional
22+
from typing import Dict, Optional
2223

2324
import psycopg2
2425

@@ -29,11 +30,12 @@
2930
from . import common, version, wal
3031
from .basebackup_delta import DeltaBaseBackup
3132
from .common import (
32-
BackupFailure, BaseBackupFormat, BaseBackupMode, connection_string_using_pgpass,
33+
BackupFailure, BaseBackupFormat, BaseBackupMode, connection_string_using_pgpass, extract_pghoard_bb_v2_metadata,
3334
replication_connection_string_and_slot_using_pgpass, set_stream_nonblocking,
3435
set_subprocess_stdout_and_stderr_nonblocking, terminate_subprocess
3536
)
3637
from .patchedtarfile import tarfile
38+
from .rohmu.delta.common import EMBEDDED_FILE_SIZE
3739

3840
BASEBACKUP_NAME = "pghoard_base_backup"
3941
EMPTY_DIRS = [
@@ -82,6 +84,26 @@ def from_config(config) -> "CompressionData":
8284
return CompressionData(algorithm=algorithm, level=level)
8385

8486

87+
class HashFile:
88+
def __init__(self, *, path):
89+
self._file = open(path, "rb")
90+
self.hash = hashlib.blake2s()
91+
92+
def __enter__(self):
93+
return self
94+
95+
def __exit__(self, t, v, tb):
96+
self._file.close()
97+
98+
def read(self, n=None):
99+
data = self._file.read(n)
100+
self.hash.update(data)
101+
return data
102+
103+
def __getattr__(self, attr):
104+
return getattr(self._file, attr)
105+
106+
85107
class PGBaseBackup(Thread):
86108
def __init__(
87109
self,
@@ -126,6 +148,8 @@ def run(self):
126148
self.run_local_tar_basebackup()
127149
elif basebackup_mode == BaseBackupMode.delta:
128150
self.run_local_tar_basebackup(delta=True)
151+
elif basebackup_mode == BaseBackupMode.local_tar_delta_stats:
152+
self.run_local_tar_basebackup(with_delta_stats=True)
129153
elif basebackup_mode == BaseBackupMode.pipe:
130154
self.run_piped_basebackup()
131155
else:
@@ -409,7 +433,7 @@ def get_control_entries_for_tar(self, *, metadata, pg_control, backup_label):
409433
ti.mtime = mtime
410434
yield ti, None, False
411435

412-
def write_files_to_tar(self, *, files, tar):
436+
def write_files_to_tar(self, *, files, tar, delta_stats=None):
413437
for archive_path, local_path, missing_ok in files:
414438
if not self.running:
415439
raise BackupFailure("thread termination requested")
@@ -419,7 +443,18 @@ def write_files_to_tar(self, *, files, tar):
419443
continue
420444

421445
try:
422-
tar.add(local_path, arcname=archive_path, recursive=False)
446+
if delta_stats is None:
447+
tar.add(local_path, arcname=archive_path, recursive=False)
448+
else:
449+
if os.path.isdir(local_path):
450+
tar.add(local_path, arcname=archive_path, recursive=False)
451+
else:
452+
with HashFile(path=local_path) as fileobj:
453+
ti = tar.gettarinfo(name=local_path, arcname=archive_path)
454+
tar.addfile(ti, fileobj=fileobj)
455+
if ti.size > EMBEDDED_FILE_SIZE:
456+
# Tiny files are not uploaded separately, they are embed into the manifest, so skip them
457+
delta_stats[fileobj.hash.hexdigest()] = ti.size
423458
except (FileNotFoundError if missing_ok else NoException):
424459
self.log.warning("File %r went away while writing to tar, ignoring", local_path)
425460

@@ -508,7 +543,15 @@ def compression_data(self) -> CompressionData:
508543
return CompressionData.from_config(self.config)
509544

510545
def tar_one_file(
511-
self, *, temp_dir, chunk_path, files_to_backup, callback_queue, filetype="basebackup_chunk", extra_metadata=None
546+
self,
547+
*,
548+
temp_dir,
549+
chunk_path,
550+
files_to_backup,
551+
callback_queue,
552+
filetype="basebackup_chunk",
553+
extra_metadata=None,
554+
delta_stats=None
512555
):
513556
start_time = time.monotonic()
514557

@@ -522,7 +565,7 @@ def tar_one_file(
522565
fileobj=raw_output_obj
523566
) as output_obj:
524567
with tarfile.TarFile(fileobj=output_obj, mode="w") as output_tar:
525-
self.write_files_to_tar(files=files_to_backup, tar=output_tar)
568+
self.write_files_to_tar(files=files_to_backup, tar=output_tar, delta_stats=delta_stats)
526569

527570
input_size = output_obj.tell()
528571

@@ -585,13 +628,14 @@ def wait_for_chunk_transfer_to_complete(self, chunk_count, upload_results, chunk
585628
)
586629
return False
587630

588-
def handle_single_chunk(self, *, chunk_callback_queue, chunk_path, chunks, index, temp_dir):
631+
def handle_single_chunk(self, *, chunk_callback_queue, chunk_path, chunks, index, temp_dir, delta_stats=None):
589632
one_chunk_files = chunks[index]
590633
chunk_name, input_size, result_size = self.tar_one_file(
591634
callback_queue=chunk_callback_queue,
592635
chunk_path=chunk_path,
593636
temp_dir=temp_dir,
594637
files_to_backup=one_chunk_files,
638+
delta_stats=delta_stats,
595639
)
596640
self.log.info(
597641
"Queued backup chunk %r for transfer, chunks on disk (including partial): %r, current: %r, total chunks: %r",
@@ -604,7 +648,9 @@ def handle_single_chunk(self, *, chunk_callback_queue, chunk_path, chunks, index
604648
"files": [chunk[0] for chunk in one_chunk_files]
605649
}
606650

607-
def create_and_upload_chunks(self, chunks, data_file_format, temp_base_dir):
651+
def create_and_upload_chunks(
652+
self, chunks, data_file_format, temp_base_dir, delta_stats: Optional[Dict[str, int]] = None
653+
):
608654
start_time = time.monotonic()
609655
chunk_files = []
610656
upload_results = []
@@ -633,6 +679,7 @@ def create_and_upload_chunks(self, chunks, data_file_format, temp_base_dir):
633679
chunks=chunks,
634680
index=i,
635681
temp_dir=temp_base_dir,
682+
delta_stats=delta_stats,
636683
)
637684
pending_compress_and_encrypt_tasks.append(task)
638685
self.chunks_on_disk += 1
@@ -650,7 +697,31 @@ def create_and_upload_chunks(self, chunks, data_file_format, temp_base_dir):
650697

651698
return chunk_files
652699

653-
def run_local_tar_basebackup(self, delta=False):
700+
def fetch_all_data_files_hashes(self):
701+
hashes: Dict[str, int] = {}
702+
703+
for backup in self.get_remote_basebackups_info(self.site):
704+
if backup["metadata"].get("format") != BaseBackupFormat.v2:
705+
continue
706+
707+
key = os.path.join(self.site_config["prefix"], "basebackup", backup["name"])
708+
bmeta_compressed = self.storage.get_contents_to_string(key)[0]
709+
710+
with rohmufile.file_reader(
711+
fileobj=io.BytesIO(bmeta_compressed),
712+
metadata=backup["metadata"],
713+
key_lookup=lambda key_id: self.site_config["encryption_keys"][key_id]["private"]
714+
) as input_obj:
715+
meta = extract_pghoard_bb_v2_metadata(input_obj)
716+
717+
if "delta_stats" not in meta:
718+
continue
719+
720+
hashes.update(meta["delta_stats"]["hashes"])
721+
722+
return hashes
723+
724+
def run_local_tar_basebackup(self, delta=False, with_delta_stats=False):
654725
control_files_metadata_extra = {}
655726
pgdata = self.site_config["pg_data_directory"]
656727
if not os.path.isdir(pgdata):
@@ -756,13 +827,53 @@ def run_local_tar_basebackup(self, delta=False):
756827
pgdata=pgdata, tablespaces=tablespaces, target_chunk_size=target_chunk_size
757828
)
758829
chunks_count = len(chunks)
830+
831+
delta_stats: Optional[Dict[str, int]] = None
832+
if with_delta_stats:
833+
delta_stats = {}
834+
759835
# Tar up the chunks and submit them for upload; note that we start from chunk 1 here; chunk 0
760836
# is reserved for special files and metadata and will be generated last.
761-
chunk_files = self.create_and_upload_chunks(chunks, data_file_format, temp_base_dir)
837+
chunk_files = self.create_and_upload_chunks(
838+
chunks, data_file_format, temp_base_dir, delta_stats=delta_stats
839+
)
762840

763841
total_size_plain = sum(item["input_size"] for item in chunk_files)
764842
total_size_enc = sum(item["result_size"] for item in chunk_files)
765843

844+
if with_delta_stats:
845+
control_files_metadata_extra["delta_stats"] = {"hashes": delta_stats}
846+
847+
existing_hashes = self.fetch_all_data_files_hashes()
848+
new_hashes = {k: delta_stats[k] for k in set(delta_stats).difference(set(existing_hashes))}
849+
850+
planned_upload_size = sum(new_hashes.values())
851+
planned_upload_count = len(new_hashes)
852+
853+
if existing_hashes:
854+
# Send ratio metrics for every backup except for the first one
855+
planned_total_size = sum(delta_stats.values())
856+
planned_total_count = len(delta_stats)
857+
if planned_total_count:
858+
self.metrics.gauge(
859+
"pghoard.planned_delta_backup_changed_data_files_ratio",
860+
planned_upload_count / planned_total_count
861+
)
862+
if planned_total_size:
863+
self.metrics.gauge(
864+
"pghoard.planned_delta_backup_changed_data_size_ratio",
865+
planned_upload_size / planned_total_size
866+
)
867+
self.metrics.gauge(
868+
"pghoard.planned_delta_backup_remained_data_size_raw",
869+
planned_total_size - planned_upload_size,
870+
)
871+
872+
self.metrics.increase("pghoard.planned_delta_backup_total_size", inc_value=planned_upload_size)
873+
self.metrics.gauge("pghoard.planned_delta_backup_upload_size", planned_upload_size)
874+
self.metrics.increase("pghoard.planned_delta_backup_total_files", inc_value=planned_upload_count)
875+
self.metrics.gauge("pghoard.planned_delta_backup_upload_files", planned_upload_count)
876+
766877
control_files_metadata_extra["chunks"] = chunk_files
767878

768879
# Everything is now tarred up, grab the latest pg_control and stop the backup process

pghoard/common.py

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class BaseBackupMode(StrEnum):
4242
basic = "basic"
4343
delta = "delta"
4444
local_tar = "local-tar"
45+
local_tar_delta_stats = "local-tar-delta-stats"
4546
pipe = "pipe"
4647

4748

requirements.dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Use pip for build requirements to harmonize between OS versions
22
mock
3-
pylint>=2.4.3
3+
pylint>=2.4.3,<=2.7.2
44
pylint-quotes
55
pytest
66
pytest-mock

test/test_basebackup.py

+3
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,9 @@ def test_basebackups_basic_lzma(self, capsys, db, pghoard_lzma, tmpdir):
362362
def test_basebackups_delta(self, capsys, db, pghoard, tmpdir):
363363
self._test_basebackups(capsys, db, pghoard, tmpdir, BaseBackupMode.delta)
364364

365+
def test_basebackups_local_tar_with_delta_stats(self, capsys, db, pghoard, tmpdir):
366+
self._test_basebackups(capsys, db, pghoard, tmpdir, BaseBackupMode.local_tar_delta_stats)
367+
365368
def test_basebackups_local_tar_nonexclusive(self, capsys, db, pghoard, tmpdir):
366369
if db.pgver < "9.6":
367370
pytest.skip("PostgreSQL 9.6+ required for non-exclusive backups")

0 commit comments

Comments
 (0)