From 361d0fad31ee62c2cce4712dd11d5f15c7c3475c Mon Sep 17 00:00:00 2001 From: Scott Balmos <399112+sbalmos@users.noreply.github.com> Date: Wed, 19 Feb 2025 11:47:13 -0500 Subject: [PATCH 1/5] feat(aws_s3 source) Support vhost-style S3 bucket addressing --- changelog.d/s3-source-vhosts.feature.md | 3 +++ src/sources/aws_s3/mod.rs | 9 +++++++-- website/cue/reference/components/sources/base/aws_s3.cue | 9 +++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 changelog.d/s3-source-vhosts.feature.md diff --git a/changelog.d/s3-source-vhosts.feature.md b/changelog.d/s3-source-vhosts.feature.md new file mode 100644 index 0000000000000..26cdb1b97706f --- /dev/null +++ b/changelog.d/s3-source-vhosts.feature.md @@ -0,0 +1,3 @@ +Adds a `force_path_style` option to the `aws_s3` source, matching support added in the `aws_s3` sink previously, that allows users to configure usage of virtual host-style bucket addressing. The value defaults to `true` to maintain existing (path-based addressing) behavior. + +Authors: sbalmos diff --git a/src/sources/aws_s3/mod.rs b/src/sources/aws_s3/mod.rs index 69631176fea46..79c3a7a034090 100644 --- a/src/sources/aws_s3/mod.rs +++ b/src/sources/aws_s3/mod.rs @@ -130,6 +130,12 @@ pub struct AwsS3Config { #[serde(default = "default_decoding")] #[derivative(Default(value = "default_decoding()"))] pub decoding: DeserializerConfig, + + /// Specifies which addressing style to use. + /// + /// This controls if the bucket name is in the hostname or part of the URL. + #[serde(default = "crate::serde::default_true")] + pub force_path_style: bool, } const fn default_framing() -> FramingConfig { @@ -230,11 +236,10 @@ impl AwsS3Config { ) -> crate::Result { let region = self.region.region(); let endpoint = self.region.endpoint(); - let force_path_style_value: bool = true; let s3_client = create_client::( &S3ClientBuilder { - force_path_style: Some(force_path_style_value), + force_path_style: Some(self.force_path_style), }, &self.auth, region.clone(), diff --git a/website/cue/reference/components/sources/base/aws_s3.cue b/website/cue/reference/components/sources/base/aws_s3.cue index befaf245e2110..9186f37e83ce4 100644 --- a/website/cue/reference/components/sources/base/aws_s3.cue +++ b/website/cue/reference/components/sources/base/aws_s3.cue @@ -410,6 +410,15 @@ base: components: sources: aws_s3: configuration: { required: false type: string: examples: ["http://127.0.0.0:5000/path/to/service"] } + force_path_style: { + description: """ + Specifies which addressing style to use. + + This controls if the bucket name is in the hostname or part of the URL. + """ + required: false + type: bool: default: true + } framing: { description: """ Framing configuration. From 8220df03981a155280d7927b1c4f10e118ccc1b0 Mon Sep 17 00:00:00 2001 From: Scott Balmos <399112+sbalmos@users.noreply.github.com> Date: Sat, 22 Feb 2025 11:55:08 -0500 Subject: [PATCH 2/5] Update website/cue/reference/components/sources/base/aws_s3.cue Co-authored-by: Bryce Eadie --- website/cue/reference/components/sources/base/aws_s3.cue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/cue/reference/components/sources/base/aws_s3.cue b/website/cue/reference/components/sources/base/aws_s3.cue index 9186f37e83ce4..3d318a9af852c 100644 --- a/website/cue/reference/components/sources/base/aws_s3.cue +++ b/website/cue/reference/components/sources/base/aws_s3.cue @@ -414,7 +414,7 @@ base: components: sources: aws_s3: configuration: { description: """ Specifies which addressing style to use. - This controls if the bucket name is in the hostname or part of the URL. + This controls whether the bucket name is in the hostname, or part of the URL. """ required: false type: bool: default: true From 3113e44ebd5302d6165df1f2a2e85e8f94a9c078 Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Wed, 26 Feb 2025 14:37:13 -0500 Subject: [PATCH 3/5] fix spellchecker --- .github/actions/spelling/allow.txt | 1 + changelog.d/s3-source-vhosts.feature.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index 5416eb8fa9366..8fe4f184ebae1 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -523,3 +523,4 @@ requestline jetbrains JetBrains gifs +vhosts diff --git a/changelog.d/s3-source-vhosts.feature.md b/changelog.d/s3-source-vhosts.feature.md index 26cdb1b97706f..56ae48ad934c4 100644 --- a/changelog.d/s3-source-vhosts.feature.md +++ b/changelog.d/s3-source-vhosts.feature.md @@ -1,3 +1,3 @@ Adds a `force_path_style` option to the `aws_s3` source, matching support added in the `aws_s3` sink previously, that allows users to configure usage of virtual host-style bucket addressing. The value defaults to `true` to maintain existing (path-based addressing) behavior. -Authors: sbalmos +authors: sbalmos From 6dd70ef42a7dc6ece5582a19c95e55fcee12174e Mon Sep 17 00:00:00 2001 From: Pavlos Rontidis Date: Wed, 26 Feb 2025 17:00:31 -0500 Subject: [PATCH 4/5] fix bad rebase --- .github/actions/spelling/allow.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index b51543ed54a94..8ef994585355e 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -441,6 +441,7 @@ scriptblock servlet Sinjo sublocation +shannon sundar svcb snyk From 6a24b425cabc39a462a448d9ae82837453ebe41d Mon Sep 17 00:00:00 2001 From: Scott Balmos <399112+sbalmos@users.noreply.github.com> Date: Wed, 26 Feb 2025 22:24:27 -0500 Subject: [PATCH 5/5] Fixup param default to ensure proper website docs generation --- src/sources/aws_s3/mod.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sources/aws_s3/mod.rs b/src/sources/aws_s3/mod.rs index 79c3a7a034090..f0b89d1629477 100644 --- a/src/sources/aws_s3/mod.rs +++ b/src/sources/aws_s3/mod.rs @@ -133,8 +133,9 @@ pub struct AwsS3Config { /// Specifies which addressing style to use. /// - /// This controls if the bucket name is in the hostname or part of the URL. - #[serde(default = "crate::serde::default_true")] + /// This controls whether the bucket name is in the hostname, or part of the URL. + #[serde(default = "default_true")] + #[derivative(Default(value = "default_true()"))] pub force_path_style: bool, } @@ -145,6 +146,10 @@ const fn default_framing() -> FramingConfig { }) } +const fn default_true() -> bool { + true +} + impl_generate_config_from_default!(AwsS3Config); #[async_trait::async_trait]