diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 941b8f7e1bf67..0642342ce4d42 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -14,6 +14,8 @@ serialize = [bumpversion:file:airbyte-container-orchestrator/Dockerfile] +[bumpversion:file:airbyte-cron/Dockerfile] + [bumpversion:file:airbyte-metrics/reporter/Dockerfile] [bumpversion:file:airbyte-server/Dockerfile] diff --git a/airbyte-cron/Dockerfile b/airbyte-cron/Dockerfile index 01398125e71b6..9a60b94e5c28d 100644 --- a/airbyte-cron/Dockerfile +++ b/airbyte-cron/Dockerfile @@ -2,7 +2,7 @@ ARG JDK_VERSION=19-slim-bullseye ARG JDK_IMAGE=openjdk:${JDK_VERSION} FROM ${JDK_IMAGE} AS cron -ARG VERSION=0.40.0-alpha +ARG VERSION=0.40.3 ENV APPLICATION airbyte-cron ENV VERSION ${VERSION} diff --git a/airbyte-cron/build.gradle b/airbyte-cron/build.gradle index df9d61e101545..74c2500681591 100644 --- a/airbyte-cron/build.gradle +++ b/airbyte-cron/build.gradle @@ -3,6 +3,7 @@ plugins { } dependencies { + implementation project(':airbyte-config:config-models') implementation project(':airbyte-workers') runtimeOnly 'io.micronaut:micronaut-http-server-netty:3.6.0' diff --git a/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java b/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java new file mode 100644 index 0000000000000..574574fe59824 --- /dev/null +++ b/airbyte-cron/src/main/java/io/airbyte/cron/selfhealing/WorkspaceCleaner.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cron.selfhealing; + +import io.airbyte.config.Configs; +import io.airbyte.config.EnvConfigs; +import io.micronaut.scheduling.annotation.Scheduled; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.util.Date; +import java.util.concurrent.atomic.AtomicInteger; +import javax.inject.Singleton; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.AgeFileFilter; + +@Singleton +@Slf4j +public class WorkspaceCleaner { + + private final Path workspaceRoot; + private final long maxAgeFilesInDays; + + WorkspaceCleaner() { + // TODO Configs should get injected through micronaut + final Configs configs = new EnvConfigs(); + + this.workspaceRoot = configs.getWorkspaceRoot(); + // We align max file age on temporal for history consistency + // It might make sense configure this independently in the future + this.maxAgeFilesInDays = configs.getTemporalRetentionInDays(); + } + + /* + * Delete files older than maxAgeFilesInDays from the workspace + * + * NOTE: this is currently only intended to work for docker + */ + @Scheduled(fixedRate = "1d") + public void deleteOldFiles() throws IOException { + final Date oldestAllowed = getDateFromDaysAgo(maxAgeFilesInDays); + log.info("Deleting files older than {} days ({})", maxAgeFilesInDays, oldestAllowed); + + final AtomicInteger counter = new AtomicInteger(0); + Files.walk(workspaceRoot) + .map(Path::toFile) + .filter(f -> new AgeFileFilter(oldestAllowed).accept(f)) + .forEach(file -> { + log.debug("Deleting file: " + file.toString()); + FileUtils.deleteQuietly(file); + counter.incrementAndGet(); + final File parentDir = file.getParentFile(); + if (parentDir.isDirectory() && parentDir.listFiles().length == 0) { + FileUtils.deleteQuietly(parentDir); + } + }); + log.info("deleted {} files", counter.get()); + } + + private static Date getDateFromDaysAgo(final long daysAgo) { + return Date.from(LocalDateTime.now().minusDays(daysAgo).toInstant(OffsetDateTime.now().getOffset())); + } + +} diff --git a/docker-compose.yaml b/docker-compose.yaml index 70942dd0b37d5..d57f7be175404 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -186,6 +186,10 @@ services: - POSTGRES_PWD=${DATABASE_PASSWORD} - POSTGRES_SEEDS=${DATABASE_HOST} - POSTGRES_USER=${DATABASE_USER} + - TEMPORAL_HISTORY_RETENTION_IN_DAYS=${TEMPORAL_HISTORY_RETENTION_IN_DAYS} + - WORKSPACE_ROOT=${WORKSPACE_ROOT} + volumes: + - workspace:${WORKSPACE_ROOT} volumes: workspace: name: ${WORKSPACE_DOCKER_MOUNT}