-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add scheduled task to clean up old files from workspace #16247
Changes from 5 commits
25bbf01
77a1780
af1c220
9dc9579
ddcc293
3358efa
3f22f28
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* | ||
* Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cron.selfhealing; | ||
|
||
import io.airbyte.config.Configs; | ||
import io.airbyte.config.EnvConfigs; | ||
import io.micronaut.scheduling.annotation.Scheduled; | ||
import java.io.File; | ||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.time.LocalDateTime; | ||
import java.time.OffsetDateTime; | ||
import java.util.Date; | ||
import javax.inject.Singleton; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.apache.commons.io.FileUtils; | ||
import org.apache.commons.io.filefilter.AgeFileFilter; | ||
|
||
@Singleton | ||
@Slf4j | ||
public class WorkspaceCleaner { | ||
|
||
private final Path workspaceRoot; | ||
private final long maxAgeFilesInDays; | ||
|
||
WorkspaceCleaner() { | ||
// TODO Configs should get injected through micronaut | ||
final Configs configs = new EnvConfigs(); | ||
|
||
this.workspaceRoot = configs.getWorkspaceRoot(); | ||
// We align max file age on temporal for history consistency | ||
// It might make sense configure this independently in the future | ||
this.maxAgeFilesInDays = configs.getTemporalRetentionInDays(); | ||
} | ||
|
||
/* | ||
* Delete files older than maxAgeFilesInDays from the workspace | ||
* | ||
* NOTE: this is currently only intended to work for docker | ||
*/ | ||
@Scheduled(fixedRate = "1d") | ||
public void deleteOldFiles() throws IOException { | ||
final Date oldestAllowed = getDateFromDaysAgo(maxAgeFilesInDays); | ||
log.info("Deleting files older than {} days ({})", maxAgeFilesInDays, oldestAllowed); | ||
|
||
Files.walk(workspaceRoot) | ||
.map(Path::toFile) | ||
.filter(f -> new AgeFileFilter(oldestAllowed).accept(f)) | ||
.forEach(file -> { | ||
log.info("Deleting file: " + file.toString()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: this should probably be lowered to debug, as I suspect it will cause log file spam when we actually delete stuff. Perhaps replace it with a counter and a log message to indicate how many files were cleaned at the end. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
FileUtils.deleteQuietly(file); | ||
final File parentDir = file.getParentFile(); | ||
if (parentDir.isDirectory() && parentDir.listFiles().length == 0) { | ||
FileUtils.deleteQuietly(parentDir); | ||
} | ||
}); | ||
} | ||
|
||
private static Date getDateFromDaysAgo(final long daysAgo) { | ||
return Date.from(LocalDateTime.now().minusDays(daysAgo).toInstant(OffsetDateTime.now().getOffset())); | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just curious if we know why this didn't get bumped by @evantahler's PR. Do we need to add
airbyte-cron
to a script somewhere to ensure it gets updated?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gosusnp found the problem!
.bumpversion.cfg
is what the publish command uses to know which files to bump. This wasn't in there.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, that should be the issue.