From 25bbf017ab4027ea3b90caff611b6629a86995e2 Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Thu, 1 Sep 2022 14:33:59 -0700 Subject: [PATCH 1/5] Add airbyte-cron to bumpversion --- .bumpversion.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index bbc158631598f..430da1b8c707e 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -14,6 +14,8 @@ serialize = [bumpversion:file:airbyte-container-orchestrator/Dockerfile] +[bumpversion:file:airbyte-cron/Dockerfile] + [bumpversion:file:airbyte-metrics/reporter/Dockerfile] [bumpversion:file:airbyte-server/Dockerfile] From 77a178087a06f340a18c88dcf34aa22ebe67bb66 Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Thu, 1 Sep 2022 14:35:45 -0700 Subject: [PATCH 2/5] Update airbyte-cron version to current --- airbyte-cron/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cron/Dockerfile b/airbyte-cron/Dockerfile index 01398125e71b6..9a60b94e5c28d 100644 --- a/airbyte-cron/Dockerfile +++ b/airbyte-cron/Dockerfile @@ -2,7 +2,7 @@ ARG JDK_VERSION=19-slim-bullseye ARG JDK_IMAGE=openjdk:${JDK_VERSION} FROM ${JDK_IMAGE} AS cron -ARG VERSION=0.40.0-alpha +ARG VERSION=0.40.3 ENV APPLICATION airbyte-cron ENV VERSION ${VERSION} From af1c2204b35dbc9678c40ae676f0361fb119c179 Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Thu, 1 Sep 2022 15:47:31 -0700 Subject: [PATCH 3/5] Add workspace clean up job --- airbyte-cron/build.gradle | 1 + .../cron/selfhealing/WorkspaceCleaner.java | 66 +++++++++++++++++++ docker-compose.yaml | 3 + 3 files changed, 70 insertions(+) create mode 100644 airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java diff --git a/airbyte-cron/build.gradle b/airbyte-cron/build.gradle index df9d61e101545..74c2500681591 100644 --- a/airbyte-cron/build.gradle +++ b/airbyte-cron/build.gradle @@ -3,6 +3,7 @@ plugins { } dependencies { + implementation project(':airbyte-config:config-models') implementation project(':airbyte-workers') runtimeOnly 'io.micronaut:micronaut-http-server-netty:3.6.0' diff --git a/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java b/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java new file mode 100644 index 0000000000000..e966c72aeb236 --- /dev/null +++ b/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cron.selfhealing; + +import io.airbyte.config.Configs; +import io.airbyte.config.EnvConfigs; +import io.micronaut.scheduling.annotation.Scheduled; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.util.Date; +import javax.inject.Singleton; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.AgeFileFilter; + +@Singleton +@Slf4j +public class WorkspaceCleaner { + + private final Path workspaceRoot; + private final long maxAgeFilesInDays; + + WorkspaceCleaner() { + // TODO Configs should get injected through micronaut + final Configs configs = new EnvConfigs(); + + this.workspaceRoot = configs.getWorkspaceRoot(); + // We align max file age on temporal for history consistency + // It might make sense configure this independently in the future + this.maxAgeFilesInDays = configs.getTemporalRetentionInDays(); + } + + /* + * Delete files older than maxAgeFilesInDays from the workspace + * + * NOTE: this is currently only intended to work for docker + */ + @Scheduled(fixedRate = "1d") + public void deleteOldFiles() throws IOException { + final Date oldestAllowed = getDateFromDaysAgo(maxAgeFilesInDays); + log.info("Deleting files older than {} days ({})", maxAgeFilesInDays, oldestAllowed); + + Files.walk(workspaceRoot) + .map(Path::toFile) + .filter(f -> new AgeFileFilter(oldestAllowed).accept(f)) + .forEach(file -> { + log.info("Deleting file: " + file.toString()); + FileUtils.deleteQuietly(file); + final File parentDir = file.getParentFile(); + if (parentDir.isDirectory() && parentDir.listFiles().length == 0) { + FileUtils.deleteQuietly(parentDir); + } + }); + } + + private static Date getDateFromDaysAgo(final long daysAgo) { + return Date.from(LocalDateTime.now().minusDays(daysAgo).toInstant(OffsetDateTime.now().getOffset())); + } + +} diff --git a/docker-compose.yaml b/docker-compose.yaml index a7935832e8f3f..d746cc3cba12d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -187,6 +187,9 @@ services: - POSTGRES_PWD=${DATABASE_PASSWORD} - POSTGRES_SEEDS=${DATABASE_HOST} - POSTGRES_USER=${DATABASE_USER} + - WORKSPACE_ROOT=${WORKSPACE_ROOT} + volumes: + - workspace:${WORKSPACE_ROOT} volumes: workspace: name: ${WORKSPACE_DOCKER_MOUNT} From 9dc9579c5ade24cef130e6155ea1837b5e1c54ad Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Thu, 1 Sep 2022 15:55:45 -0700 Subject: [PATCH 4/5] Add missing env var to docker-compose --- docker-compose.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index d746cc3cba12d..4a3d55c5dd007 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -187,6 +187,7 @@ services: - POSTGRES_PWD=${DATABASE_PASSWORD} - POSTGRES_SEEDS=${DATABASE_HOST} - POSTGRES_USER=${DATABASE_USER} + - TEMPORAL_HISTORY_RETENTION_IN_DAYS=${TEMPORAL_HISTORY_RETENTION_IN_DAYS} - WORKSPACE_ROOT=${WORKSPACE_ROOT} volumes: - workspace:${WORKSPACE_ROOT} From 3358efa5475ef0f1a4bd7e4acb497fb84f2fd77d Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Fri, 2 Sep 2022 10:06:47 -0700 Subject: [PATCH 5/5] Update file deletion logging --- .../java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java b/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java index e966c72aeb236..574574fe59824 100644 --- a/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java +++ b/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java @@ -14,6 +14,7 @@ import java.time.LocalDateTime; import java.time.OffsetDateTime; import java.util.Date; +import java.util.concurrent.atomic.AtomicInteger; import javax.inject.Singleton; import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.FileUtils; @@ -46,17 +47,20 @@ public void deleteOldFiles() throws IOException { final Date oldestAllowed = getDateFromDaysAgo(maxAgeFilesInDays); log.info("Deleting files older than {} days ({})", maxAgeFilesInDays, oldestAllowed); + final AtomicInteger counter = new AtomicInteger(0); Files.walk(workspaceRoot) .map(Path::toFile) .filter(f -> new AgeFileFilter(oldestAllowed).accept(f)) .forEach(file -> { - log.info("Deleting file: " + file.toString()); + log.debug("Deleting file: " + file.toString()); FileUtils.deleteQuietly(file); + counter.incrementAndGet(); final File parentDir = file.getParentFile(); if (parentDir.isDirectory() && parentDir.listFiles().length == 0) { FileUtils.deleteQuietly(parentDir); } }); + log.info("deleted {} files", counter.get()); } private static Date getDateFromDaysAgo(final long daysAgo) {