Skip to content

Commit

Permalink
Add scheduled task to clean up old files from workspace (#16247)
Browse files Browse the repository at this point in the history
* Add airbyte-cron to bumpversion

* Update airbyte-cron version to current

* Add workspace clean up job

* Add missing env var to docker-compose

* Update file deletion logging
  • Loading branch information
gosusnp authored Sep 2, 2022
1 parent e831a71 commit bf48791
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .bumpversion.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ serialize =

[bumpversion:file:airbyte-container-orchestrator/Dockerfile]

[bumpversion:file:airbyte-cron/Dockerfile]

[bumpversion:file:airbyte-metrics/reporter/Dockerfile]

[bumpversion:file:airbyte-server/Dockerfile]
Expand Down
2 changes: 1 addition & 1 deletion airbyte-cron/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ ARG JDK_VERSION=19-slim-bullseye
ARG JDK_IMAGE=openjdk:${JDK_VERSION}
FROM ${JDK_IMAGE} AS cron

ARG VERSION=0.40.0-alpha
ARG VERSION=0.40.3

ENV APPLICATION airbyte-cron
ENV VERSION ${VERSION}
Expand Down
1 change: 1 addition & 0 deletions airbyte-cron/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ plugins {
}

dependencies {
implementation project(':airbyte-config:config-models')
implementation project(':airbyte-workers')

runtimeOnly 'io.micronaut:micronaut-http-server-netty:3.6.0'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.cron.selfhealing;

import io.airbyte.config.Configs;
import io.airbyte.config.EnvConfigs;
import io.micronaut.scheduling.annotation.Scheduled;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.util.Date;
import java.util.concurrent.atomic.AtomicInteger;
import javax.inject.Singleton;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.AgeFileFilter;

@Singleton
@Slf4j
public class WorkspaceCleaner {

private final Path workspaceRoot;
private final long maxAgeFilesInDays;

WorkspaceCleaner() {
// TODO Configs should get injected through micronaut
final Configs configs = new EnvConfigs();

this.workspaceRoot = configs.getWorkspaceRoot();
// We align max file age on temporal for history consistency
// It might make sense configure this independently in the future
this.maxAgeFilesInDays = configs.getTemporalRetentionInDays();
}

/*
* Delete files older than maxAgeFilesInDays from the workspace
*
* NOTE: this is currently only intended to work for docker
*/
@Scheduled(fixedRate = "1d")
public void deleteOldFiles() throws IOException {
final Date oldestAllowed = getDateFromDaysAgo(maxAgeFilesInDays);
log.info("Deleting files older than {} days ({})", maxAgeFilesInDays, oldestAllowed);

final AtomicInteger counter = new AtomicInteger(0);
Files.walk(workspaceRoot)
.map(Path::toFile)
.filter(f -> new AgeFileFilter(oldestAllowed).accept(f))
.forEach(file -> {
log.debug("Deleting file: " + file.toString());
FileUtils.deleteQuietly(file);
counter.incrementAndGet();
final File parentDir = file.getParentFile();
if (parentDir.isDirectory() && parentDir.listFiles().length == 0) {
FileUtils.deleteQuietly(parentDir);
}
});
log.info("deleted {} files", counter.get());
}

private static Date getDateFromDaysAgo(final long daysAgo) {
return Date.from(LocalDateTime.now().minusDays(daysAgo).toInstant(OffsetDateTime.now().getOffset()));
}

}
4 changes: 4 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ services:
- POSTGRES_PWD=${DATABASE_PASSWORD}
- POSTGRES_SEEDS=${DATABASE_HOST}
- POSTGRES_USER=${DATABASE_USER}
- TEMPORAL_HISTORY_RETENTION_IN_DAYS=${TEMPORAL_HISTORY_RETENTION_IN_DAYS}
- WORKSPACE_ROOT=${WORKSPACE_ROOT}
volumes:
- workspace:${WORKSPACE_ROOT}
volumes:
workspace:
name: ${WORKSPACE_DOCKER_MOUNT}
Expand Down

0 comments on commit bf48791

Please sign in to comment.