Skip to content

Commit 78ce24f

Browse files
Merge pull request #552 from aiven/alex-fix-hardcoded-wal-type
Fix handling of compressed timeline files on startup [BF-1391] v
2 parents f0d3221 + 7e713c6 commit 78ce24f

File tree

3 files changed

+32
-14
lines changed

3 files changed

+32
-14
lines changed

pghoard/pghoard.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -601,11 +601,13 @@ def startup_walk_for_missed_files(self):
601601
with open(metadata_path, "r") as fp:
602602
metadata = json.load(fp)
603603

604+
file_type = FileType.Wal if is_xlog else FileType.Timeline
605+
604606
transfer_event = UploadEvent(
605-
file_type=FileType.Wal,
607+
file_type=file_type,
606608
backup_site_name=site,
607609
file_size=os.path.getsize(full_path),
608-
file_path=FileTypePrefixes[FileType.Wal] / filename,
610+
file_path=FileTypePrefixes[file_type] / filename,
609611
source_data=Path(full_path),
610612
callback_queue=None,
611613
metadata=metadata

test/test_pghoard.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -529,27 +529,34 @@ def test_startup_walk_for_missed_uncompressed_files(self):
529529
assert self.pghoard.compression_queue.qsize() == 2
530530
assert self.pghoard.transfer_queue.qsize() == 0
531531

532-
def test_startup_walk_for_missed_uncompressed_files_timeline(self):
532+
@pytest.mark.parametrize(
533+
"file_type, file_name", [(FileType.Wal, "000000010000000000000004"), (FileType.Timeline, "00000002.history")]
534+
)
535+
def test_startup_walk_for_missed_uncompressed_file_type(self, file_type: FileType, file_name: str):
533536
compressed_wal_path, _ = self.pghoard.create_backup_site_paths(self.test_site)
534537
uncompressed_wal_path = compressed_wal_path + "_incoming"
535-
with open(os.path.join(uncompressed_wal_path, "00000002.history"), "wb") as fp:
538+
with open(os.path.join(uncompressed_wal_path, file_name), "wb") as fp:
536539
fp.write(b"foo")
537540
self.pghoard.startup_walk_for_missed_files()
538541
assert self.pghoard.compression_queue.qsize() == 1
539542
assert self.pghoard.transfer_queue.qsize() == 0
540543
compress_event = self.pghoard.compression_queue.get(timeout=1.0)
541-
assert compress_event.file_type == FileType.Timeline
544+
assert compress_event.file_type == file_type
542545

543-
def test_startup_walk_for_missed_uncompressed_files_wal(self):
546+
@pytest.mark.parametrize(
547+
"file_type, file_name", [(FileType.Wal, "000000010000000000000005"), (FileType.Timeline, "00000003.history")]
548+
)
549+
def test_startup_walk_for_missed_compressed_file_type(self, file_type: FileType, file_name: str):
544550
compressed_wal_path, _ = self.pghoard.create_backup_site_paths(self.test_site)
545-
uncompressed_wal_path = compressed_wal_path + "_incoming"
546-
with open(os.path.join(uncompressed_wal_path, "000000010000000000000004"), "wb") as fp:
551+
with open(os.path.join(compressed_wal_path, file_name), "wb") as fp:
547552
fp.write(b"foo")
553+
with open(os.path.join(compressed_wal_path, f"{file_name}.metadata"), "wb") as fp:
554+
fp.write(b"{}")
548555
self.pghoard.startup_walk_for_missed_files()
549-
assert self.pghoard.compression_queue.qsize() == 1
550-
assert self.pghoard.transfer_queue.qsize() == 0
551-
compress_event = self.pghoard.compression_queue.get(timeout=1.0)
552-
assert compress_event.file_type == FileType.Wal
556+
assert self.pghoard.compression_queue.qsize() == 0
557+
assert self.pghoard.transfer_queue.qsize() == 1
558+
upload_event = self.pghoard.transfer_queue.get(timeout=1.0)
559+
assert upload_event.file_type == file_type
553560

554561

555562
class TestPGHoardWithPG:
@@ -597,8 +604,6 @@ def test_pause_on_disk_full(self, db, pghoard_separate_volume, caplog):
597604
# MiB so if logic for automatically suspending pg_receive(xlog|wal) wasn't working the volume
598605
# would certainly fill up and the files couldn't be processed. Now this should work fine.
599606
for _ in range(16):
600-
# Note: do not combine two function call in one select, PG executes it differently and
601-
# sometimes looks like it generates less WAL files than we wanted
602607
switch_wal(conn)
603608
conn.close()
604609

@@ -625,6 +630,10 @@ def test_surviving_pg_receivewal_hickup(self, db, pghoard):
625630
if pghoard.receivexlogs[pghoard.test_site].is_alive():
626631
pghoard.receivexlogs[pghoard.test_site].join()
627632
del pghoard.receivexlogs[pghoard.test_site]
633+
# stopping the thread is not enough, it's possible that killed receiver will leave incomplete partial files
634+
# around, pghoard is capable of cleaning those up but needs to be restarted, for the test it should be OK
635+
# just to call startup_walk_for_missed_files, so it takes care of cleaning up
636+
pghoard.startup_walk_for_missed_files()
628637

629638
n_xlogs = pghoard.transfer_agent_state[pghoard.test_site]["upload"]["xlog"]["xlogs_since_basebackup"]
630639

test/util.py

+7
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,16 @@ def switch_wal(connection):
2424
cur = connection.cursor()
2525
# Force allocating a XID, otherwise if there was no activity we will
2626
# stay on the same WAL
27+
# Note: do not combine two function call in one select, PG executes it differently and
28+
# sometimes looks like it generates less WAL files than we wanted
2729
cur.execute("SELECT txid_current()")
2830
if connection.server_version >= 100000:
2931
cur.execute("SELECT pg_switch_wal()")
3032
else:
3133
cur.execute("SELECT pg_switch_xlog()")
34+
# This should fix flaky tests, which expect a specific number of WAL files which never arrive.
35+
# Quite often the last WAL would not be finalized by walreceiver unless there is some extra activity after
36+
# switching, the bug should be fixed in PG 15
37+
# https://github.com/postgres/postgres/commit/596ba75cb11173a528c6b6ec0142a282e42b69ec
38+
cur.execute("SELECT txid_current()")
3239
cur.close()

0 commit comments

Comments
 (0)