Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(override-base-url): add overrideBaseURL in site.config.fetchConfig #633

Merged
merged 9 commits into from
Feb 28, 2025
Merged
890 changes: 40 additions & 850 deletions package-lock.json

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion packages/spacecat-shared-data-access/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"access": "public"
},
"dependencies": {
"@adobe/spacecat-shared-utils": "1.26.4",
"@adobe/spacecat-shared-utils": "1.33.1",
"@aws-sdk/client-dynamodb": "3.751.0",
"@aws-sdk/lib-dynamodb": "3.751.0",
"@types/joi": "17.2.3",
Expand All @@ -48,6 +48,7 @@
"chai": "5.2.0",
"chai-as-promised": "8.0.1",
"dynamo-db-local": "9.4.0",
"nock": "14.0.1",
"sinon": "19.0.2",
"sinon-chai": "4.0.0"
}
Expand Down
11 changes: 8 additions & 3 deletions packages/spacecat-shared-data-access/src/models/site/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ export const IMPORT_TYPE_SCHEMAS = {
[IMPORT_TYPES.ORGANIC_KEYWORDS]: Joi.object({
type: Joi.string().valid(IMPORT_TYPES.ORGANIC_KEYWORDS).required(),
...IMPORT_BASE_KEYS,
limit: Joi.number().integer().min(1).max(100),
pageUrl: Joi.string().uri(),
geo: Joi.string().optional(),
limit: Joi.number().integer().min(1).max(100)
.optional(),
pageUrl: Joi.string().uri().optional(),
}),
[IMPORT_TYPES.ORGANIC_TRAFFIC]: Joi.object({
type: Joi.string().valid(IMPORT_TYPES.ORGANIC_TRAFFIC).required(),
Expand All @@ -49,7 +51,9 @@ export const IMPORT_TYPE_SCHEMAS = {
[IMPORT_TYPES.TOP_PAGES]: Joi.object({
type: Joi.string().valid(IMPORT_TYPES.TOP_PAGES).required(),
...IMPORT_BASE_KEYS,
geo: Joi.string(),
geo: Joi.string().optional(),
limit: Joi.number().integer().min(1).max(2000)
.optional(),
}),
};

Expand Down Expand Up @@ -86,6 +90,7 @@ export const configSchema = Joi.object({
),
fetchConfig: Joi.object({
headers: Joi.object().pattern(Joi.string(), Joi.string()),
overrideBaseURL: Joi.string().uri().optional(),
}).optional(),
handlers: Joi.object().pattern(Joi.string(), Joi.object({
mentions: Joi.object().pattern(Joi.string(), Joi.array().items(Joi.string())),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export interface ImportConfig {
enabled: boolean;
pageUrl?: string;
geo?: string;
limit?: number;
}

export interface SiteConfig {
Expand Down Expand Up @@ -83,6 +84,7 @@ export interface SiteConfig {
}>;
fetchConfig?: {
headers?: Record<string, string>;
overrideBaseURL?: string;
};
};
getSlackConfig(): { workspace?: string; channel?: string; invitedUserCount?: number };
Expand All @@ -102,7 +104,7 @@ export interface SiteConfig {
getGroupedURLs(type: string): Array<{ name: string; pattern: string }> | undefined;
getLatestMetrics(type: string):
{ pageViewsChange: number; ctrChange: number; projectedTrafficValue: number } | undefined;
getFetchConfig(): { headers?: Record<string, string> } | undefined;
getFetchConfig(): { headers?: Record<string, string>, overrideBaseURL?: string } | undefined;
}

export interface Site extends BaseModel {
Expand Down
27 changes: 27 additions & 0 deletions packages/spacecat-shared-data-access/src/models/site/site.model.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* governing permissions and limitations under the License.
*/

import { composeAuditURL, hasText, isValidUrl } from '@adobe/spacecat-shared-utils';
import BaseModel from '../base/base.model.js';

/**
Expand All @@ -32,6 +33,32 @@ class Site extends BaseModel {
this.setIsLive(newIsLive);
return this;
}

/**
* Resolves the site's base URL to a final URL by fetching the URL,
* following the redirects and returning the final URL.
*
* If the site has a configured overrideBaseURL, that one will be returned.
* Otherwise, the site's base URL will be used.
*
* If the site has a configured User-Agent, it will be used to resolve the URL.
*
* @returns a promise that resolves the final URL.
* @throws {Error} if the final URL cannot be resolved.
*/
async resolveFinalURL() {
const overrideBaseURL = this.getConfig()?.getFetchConfig()?.overrideBaseURL;
if (isValidUrl(overrideBaseURL)) {
return overrideBaseURL.replace(/^https?:\/\//, '');
}

const userAgentConfigured = this.getConfig()?.getFetchConfig()?.headers?.['User-Agent'];
if (hasText(userAgentConfigured)) {
return composeAuditURL(this.getBaseURL(), userAgentConfigured);
}

return composeAuditURL(this.getBaseURL());
}
}

export default Site;
2 changes: 2 additions & 0 deletions packages/spacecat-shared-data-access/test/setup-env.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
* governing permissions and limitations under the License.
*/
// eslint-disable-next-line no-console
console.log('Forcing HTTP/1.1 for Adobe Fetch');
process.env.HELIX_FETCH_FORCE_HTTP1 = 'true';
console.log('Disabling AWS XRay');
process.env.AWS_XRAY_SDK_ENABLED = 'false';
process.env.AWS_XRAY_CONTEXT_MISSING = 'IGNORE_ERROR';
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ describe('Config Tests', () => {
headers: {
'User-Agent': 'custom-agent',
},
overrideBaseURL: 'https://example.com',
};
config.updateFetchConfig(fetchConfig);
expect(config.getFetchConfig()).to.deep.equal(fetchConfig);
Expand Down Expand Up @@ -580,6 +581,8 @@ describe('Config Tests', () => {
sources: ['ahrefs'],
pageUrl: 'https://example.com',
enabled: false,
geo: 'us',
limit: 5,
},
{
type: 'organic-traffic',
Expand All @@ -593,12 +596,14 @@ describe('Config Tests', () => {
sources: ['ahrefs'],
enabled: true,
geo: 'us',
limit: 100,
},
],
fetchConfig: {
headers: {
'User-Agent': 'test-agent',
},
overrideBaseURL: 'https://example.com',
},
};
const validated = validateConfiguration(config);
Expand Down Expand Up @@ -702,7 +707,7 @@ describe('Config Tests', () => {
});
});

it('throws error for invalid fetchConfig', () => {
it('throws error for invalid fetchConfig headers', () => {
const config = {
fetchConfig: {
headers: 'not-an-object',
Expand All @@ -712,6 +717,16 @@ describe('Config Tests', () => {
.to.throw('Configuration validation error: "fetchConfig.headers" must be of type object');
});

it('throws error for invalid fetchConfig overrideBaseUrl', () => {
const config = {
fetchConfig: {
overrideBaseURL: 'not-a-url',
},
};
expect(() => validateConfiguration(config))
.to.throw('Configuration validation error: "fetchConfig.overrideBaseURL" must be a valid uri');
});

it('validates multiple import types with different configurations', () => {
const config = {
imports: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { expect, use as chaiUse } from 'chai';
import chaiAsPromised from 'chai-as-promised';
import { stub } from 'sinon';
import sinonChai from 'sinon-chai';
import nock from 'nock';

import Site from '../../../../src/models/site/site.model.js';
import siteFixtures from '../../../fixtures/sites.fixture.js';
Expand Down Expand Up @@ -162,4 +163,55 @@ describe('SiteModel', () => {
expect(instance.getIsLive()).to.equal(true);
});
});

describe('resolveFinalURL', () => {
afterEach(() => {
nock.cleanAll();
});

it('resolves the final URL using the base URL', async () => {
instance.setBaseURL('https://spacecat.com');
const config = instance.getConfig();
config.getFetchConfig = () => ({});

nock(instance.getBaseURL())
.get('/')
.reply(301, undefined, { Location: 'https://redirected.com' });
nock('https://redirected.com')
.get('/')
.reply(200);

const finalURL = await instance.resolveFinalURL();

expect(finalURL).to.equal('redirected.com');
});

it('resolves the final URL using the overrideBaseURL', async () => {
const config = instance.getConfig();
config.getFetchConfig = () => ({ overrideBaseURL: 'http://override.com' });

const finalURL = await instance.resolveFinalURL();

expect(finalURL).to.equal('override.com');
});

it('resolves the final URL using the User-Agent header', async () => {
instance.setBaseURL('https://spacecat.com');
const userAgent = 'Mozilla/5.0';
const config = instance.getConfig();
config.getFetchConfig = () => ({ headers: { 'User-Agent': userAgent } });

nock(instance.getBaseURL(), {
reqheaders: {
'User-Agent': userAgent,
},
})
.get('/')
.reply(200);

const finalURL = await instance.resolveFinalURL();

expect(finalURL).to.equal(instance.getBaseURL().replace(/^https?:\/\//, ''));
});
});
});