Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check ignore matches before Bucket item downloads #337

Merged
merged 3 commits into from
Apr 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/run-tests/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM golang:1.15-alpine
FROM golang:1.16-alpine

# Add any build or testing essential system packages
RUN apk add --no-cache build-base git pkgconf
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Docker buildkit multi-arch build requires golang alpine
FROM golang:1.15-alpine as builder
FROM golang:1.16-alpine as builder

RUN apk add gcc pkgconfig libc-dev
RUN apk add --no-cache musl~=1.2 libgit2-dev~=1.1
Expand Down
2 changes: 1 addition & 1 deletion api/go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/fluxcd/source-controller/api

go 1.15
go 1.16

require (
github.com/fluxcd/pkg/apis/meta v0.8.0
Expand Down
28 changes: 26 additions & 2 deletions controllers/bucket_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
"github.com/fluxcd/pkg/runtime/predicates"

sourcev1 "github.com/fluxcd/source-controller/api/v1beta1"
"github.com/fluxcd/source-controller/pkg/sourceignore"
)

// +kubebuilder:rbac:groups=source.toolkit.fluxcd.io,resources=buckets,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -202,6 +203,25 @@ func (r *BucketReconciler) reconcile(ctx context.Context, bucket sourcev1.Bucket
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
}

// Look for file with ignore rules first
// NB: S3 has flat filepath keys making it impossible to look
// for files in "subdirectories" without building up a tree first.
path := filepath.Join(tempDir, sourceignore.IgnoreFile)
if err := s3Client.FGetObject(ctxTimeout, bucket.Spec.BucketName, sourceignore.IgnoreFile, path, minio.GetObjectOptions{}); err != nil {
if resp, ok := err.(minio.ErrorResponse); ok && resp.Code != "NoSuchKey" {
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
}
}
ps, err := sourceignore.ReadIgnoreFile(path, nil)
if err != nil {
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
}
// In-spec patterns take precedence
if bucket.Spec.Ignore != nil {
ps = append(ps, sourceignore.ReadPatterns(strings.NewReader(*bucket.Spec.Ignore), nil)...)
}
matcher := sourceignore.NewMatcher(ps)

// download bucket content
for object := range s3Client.ListObjects(ctxTimeout, bucket.Spec.BucketName, minio.ListObjectsOptions{
Recursive: true,
Expand All @@ -212,7 +232,11 @@ func (r *BucketReconciler) reconcile(ctx context.Context, bucket sourcev1.Bucket
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
}

if strings.HasSuffix(object.Key, "/") {
if strings.HasSuffix(object.Key, "/") || object.Key == sourceignore.IgnoreFile {
continue
}

if matcher.Match([]string{object.Key}, false) {
continue
}

Expand Down Expand Up @@ -255,7 +279,7 @@ func (r *BucketReconciler) reconcile(ctx context.Context, bucket sourcev1.Bucket
defer unlock()

// archive artifact and check integrity
if err := r.Storage.Archive(&artifact, tempDir, bucket.Spec.Ignore); err != nil {
if err := r.Storage.Archive(&artifact, tempDir, nil); err != nil {
err = fmt.Errorf("storage archive error: %w", err)
return sourcev1.BucketNotReady(bucket, sourcev1.StorageOperationFailedReason, err.Error()), err
}
Expand Down
12 changes: 11 additions & 1 deletion controllers/gitrepository_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"io/ioutil"
"os"
"strings"
"time"

"github.com/go-logr/logr"
Expand All @@ -45,6 +46,7 @@ import (
sourcev1 "github.com/fluxcd/source-controller/api/v1beta1"
"github.com/fluxcd/source-controller/pkg/git"
"github.com/fluxcd/source-controller/pkg/git/strategy"
"github.com/fluxcd/source-controller/pkg/sourceignore"
)

// +kubebuilder:rbac:groups=source.toolkit.fluxcd.io,resources=gitrepositories,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -270,7 +272,15 @@ func (r *GitRepositoryReconciler) reconcile(ctx context.Context, repository sour
defer unlock()

// archive artifact and check integrity
if err := r.Storage.Archive(&artifact, tmpGit, repository.Spec.Ignore); err != nil {
ps, err := sourceignore.LoadIgnorePatterns(tmpGit, nil)
if err != nil {
err = fmt.Errorf(".sourceignore error: %w", err)
return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err
}
if repository.Spec.Ignore != nil {
ps = append(ps, sourceignore.ReadPatterns(strings.NewReader(*repository.Spec.Ignore), nil)...)
}
if err := r.Storage.Archive(&artifact, tmpGit, SourceIgnoreFilter(ps, nil)); err != nil {
err = fmt.Errorf("storage archive error: %w", err)
return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err
}
Expand Down
169 changes: 62 additions & 107 deletions controllers/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ package controllers

import (
"archive/tar"
"bufio"
"bytes"
"compress/gzip"
"crypto/sha1"
"fmt"
Expand All @@ -39,14 +37,7 @@ import (

sourcev1 "github.com/fluxcd/source-controller/api/v1beta1"
"github.com/fluxcd/source-controller/internal/fs"
)

const (
excludeFile = ".sourceignore"
excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes"
excludeExt = "*.jpg,*.jpeg,*.gif,*.png,*.wmv,*.flv,*.tar.gz,*.zip"
excludeCI = ".github/,.circleci/,.travis.yml,.gitlab-ci.yml,appveyor.yml,.drone.yml,cloudbuild.yaml,codeship-services.yml,codeship-steps.yml"
excludeExtra = "**/.goreleaser.yml,**/.sops.yaml,**/.flux.yaml"
"github.com/fluxcd/source-controller/pkg/sourceignore"
)

// Storage manages artifacts
Expand Down Expand Up @@ -151,19 +142,35 @@ func (s *Storage) ArtifactExist(artifact sourcev1.Artifact) bool {
return fi.Mode().IsRegular()
}

// Archive atomically archives the given directory as a tarball to the given v1beta1.Artifact
// path, excluding any VCS specific files and directories, or any of the excludes defined in
// the excludeFiles. If successful, it sets the checksum and last update time on the artifact.
func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, ignore *string) (err error) {
if f, err := os.Stat(dir); os.IsNotExist(err) || !f.IsDir() {
return fmt.Errorf("invalid dir path: %s", dir)
// ArchiveFileFilter must return true if a file should not be included
// in the archive after inspecting the given path and/or os.FileInfo.
type ArchiveFileFilter func(p string, fi os.FileInfo) bool

// SourceIgnoreFilter returns an ArchiveFileFilter that filters out
// files matching sourceignore.VCSPatterns and any of the provided
// patterns. If an empty gitignore.Pattern slice is given, the matcher
// is set to sourceignore.NewDefaultMatcher.
func SourceIgnoreFilter(ps []gitignore.Pattern, domain []string) ArchiveFileFilter {
matcher := sourceignore.NewDefaultMatcher(ps, domain)
if len(ps) > 0 {
ps = append(sourceignore.VCSPatterns(domain), ps...)
matcher = sourceignore.NewMatcher(ps)
}
return func(p string, fi os.FileInfo) bool {
// The directory is always false as the archiver does already skip
// directories.
return matcher.Match(strings.Split(p, string(filepath.Separator)), false)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I somehow went looking for this method but could not find it :-S. Thanks!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or no, it was not. I came across this method as well but it splits them by filepath.ListSeparator, resulting in e.g. [/a/b/c, /d/f/g] instead of the [a, b, c] we are after for the domain.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curses! I have made exactly this mistake three or four times now aaaaa

}
}

ps, err := loadExcludePatterns(dir, ignore)
if err != nil {
return err
// Archive atomically archives the given directory as a tarball to the
// given v1beta1.Artifact path, excluding directories and any
// ArchiveFileFilter matches. If successful, it sets the checksum and
// last update time on the artifact.
func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, filter ArchiveFileFilter) (err error) {
if f, err := os.Stat(dir); os.IsNotExist(err) || !f.IsDir() {
return fmt.Errorf("invalid dir path: %s", dir)
}
matcher := gitignore.NewMatcher(ps)

localPath := s.LocalPath(*artifact)
tf, err := ioutil.TempFile(filepath.Split(localPath))
Expand All @@ -182,43 +189,7 @@ func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, ignore *strin

gw := gzip.NewWriter(mw)
tw := tar.NewWriter(gw)
if err := writeToArchiveExcludeMatches(dir, matcher, tw); err != nil {
tw.Close()
gw.Close()
tf.Close()
return err
}

if err := tw.Close(); err != nil {
gw.Close()
tf.Close()
return err
}
if err := gw.Close(); err != nil {
tf.Close()
return err
}
if err := tf.Close(); err != nil {
return err
}

if err := os.Chmod(tmpName, 0644); err != nil {
return err
}

if err := fs.RenameWithFallback(tmpName, localPath); err != nil {
return err
}

artifact.Checksum = fmt.Sprintf("%x", h.Sum(nil))
artifact.LastUpdateTime = metav1.Now()
return nil
}

// writeToArchiveExcludeMatches walks over the given dir and writes any regular file that does
// not match the given gitignore.Matcher.
func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer *tar.Writer) error {
fn := func(p string, fi os.FileInfo, err error) error {
if err := filepath.Walk(dir, func(p string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
Expand All @@ -228,8 +199,8 @@ func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer
return nil
}

// Ignore excluded extensions and files
if matcher.Match(strings.Split(p, "/"), false) {
// Skip filtered files
if filter != nil && filter(p, fi) {
return nil
}

Expand All @@ -249,7 +220,7 @@ func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer
}
header.Name = relFilePath

if err := writer.WriteHeader(header); err != nil {
if err := tw.WriteHeader(header); err != nil {
return err
}

Expand All @@ -258,13 +229,42 @@ func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer
f.Close()
return err
}
if _, err := io.Copy(writer, f); err != nil {
if _, err := io.Copy(tw, f); err != nil {
f.Close()
return err
}
return f.Close()
}); err != nil {
tw.Close()
gw.Close()
tf.Close()
return err
}

if err := tw.Close(); err != nil {
gw.Close()
tf.Close()
return err
}
if err := gw.Close(); err != nil {
tf.Close()
return err
}
if err := tf.Close(); err != nil {
return err
}
return filepath.Walk(dir, fn)

if err := os.Chmod(tmpName, 0644); err != nil {
return err
}

if err := fs.RenameWithFallback(tmpName, localPath); err != nil {
return err
}

artifact.Checksum = fmt.Sprintf("%x", h.Sum(nil))
artifact.LastUpdateTime = metav1.Now()
return nil
}

// AtomicWriteFile atomically writes the io.Reader contents to the v1beta1.Artifact path.
Expand Down Expand Up @@ -400,51 +400,6 @@ func (s *Storage) LocalPath(artifact sourcev1.Artifact) string {
return filepath.Join(s.BasePath, artifact.Path)
}

// getPatterns collects ignore patterns from the given reader and returns them
// as a gitignore.Pattern slice.
func getPatterns(reader io.Reader, path []string) []gitignore.Pattern {
var ps []gitignore.Pattern
scanner := bufio.NewScanner(reader)

for scanner.Scan() {
s := scanner.Text()
if !strings.HasPrefix(s, "#") && len(strings.TrimSpace(s)) > 0 {
ps = append(ps, gitignore.ParsePattern(s, path))
}
}

return ps
}

// loadExcludePatterns loads the excluded patterns from sourceignore or other
// sources.
func loadExcludePatterns(dir string, ignore *string) ([]gitignore.Pattern, error) {
path := strings.Split(dir, "/")

var ps []gitignore.Pattern
for _, p := range strings.Split(excludeVCS, ",") {
ps = append(ps, gitignore.ParsePattern(p, path))
}

if ignore == nil {
all := strings.Join([]string{excludeExt, excludeCI, excludeExtra}, ",")
for _, p := range strings.Split(all, ",") {
ps = append(ps, gitignore.ParsePattern(p, path))
}

if f, err := os.Open(filepath.Join(dir, excludeFile)); err == nil {
defer f.Close()
ps = append(ps, getPatterns(f, path)...)
} else if !os.IsNotExist(err) {
return nil, err
}
} else {
ps = append(ps, getPatterns(bytes.NewBufferString(*ignore), path)...)
}

return ps, nil
}

// newHash returns a new SHA1 hash.
func newHash() hash.Hash {
return sha1.New()
Expand Down
Loading