diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json index 4402251686201..034d07ac76926 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "eb4c9e00-db83-4d63-a386-39cfa91012a8", "name": "Google Search Console", "dockerRepository": "airbyte/source-google-search-console", - "dockerImageTag": "0.1.9", + "dockerImageTag": "0.1.11", "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-search-console", "icon": "googlesearchconsole.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index d3cd840965729..06ea8ac2abcbb 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2458,7 +2458,7 @@ - - "client_secret" oauthFlowOutputParameters: - - "refresh_token" -- dockerImage: "airbyte/source-google-search-console:0.1.10" +- dockerImage: "airbyte/source-google-search-console:0.1.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/google-search-console" connectionSpecification: @@ -2475,6 +2475,7 @@ type: "array" items: type: "string" + title: "Site URLs" description: "Website URLs property; do not include the domain-level property\ \ in the list" examples: @@ -2483,16 +2484,18 @@ order: 0 start_date: type: "string" - description: "The date from which you'd like to replicate data in the format\ - \ YYYY-MM-DD." + title: "Start Date" + description: "UTC date in the format 2017-01-25. Any data before this date\ + \ will not be replicated." examples: - "2021-01-01" pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" order: 1 end_date: type: "string" - description: "The date from which you'd like to replicate data in the format\ - \ YYYY-MM-DD. Must be greater or equal start_date field" + title: "End Date" + description: "UTC date in the format 2017-01-25. Any data after this date\ + \ will not be replicated. Must be greater or equal to the Start Date field." examples: - "2021-12-12" pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" @@ -2520,24 +2523,24 @@ client_id: title: "Client ID" type: "string" - description: "The Client ID of your developer application" + description: "The Client ID of your Google Search Console developer\ + \ application." airbyte_secret: true client_secret: title: "Client Secret" type: "string" - description: "The client secret of your developer application" + description: "The Client Secret of your Google Search Console developer\ + \ application." airbyte_secret: true access_token: title: "Access Token" type: "string" - description: "An access token generated using the above client ID\ - \ and secret" + description: "Access Token for making authenticated requests." airbyte_secret: true refresh_token: title: "Refresh Token" type: "string" - description: "A refresh token generated using the above client ID\ - \ and secret" + description: "The token for obtaining new access token." airbyte_secret: true - type: "object" title: "Service Account Key Authentication" diff --git a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py index 1649ba58e0a6b..afc05f754b53d 100755 --- a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py +++ b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py @@ -236,6 +236,24 @@ def get_updated_state( """ With the existing nested loop implementation, we have to store a `cursor_field` for each `site_url` and `searchType`. This functionality is placed in `get_update_state`. + + { + "stream": { + "http://domain1.com": { + "web": {"date": "2022-01-03"}, + "news": {"date": "2022-01-03"}, + "image": {"date": "2022-01-03"}, + "video": {"date": "2022-01-03"} + }, + "http://domain2.com": { + "web": {"date": "2022-01-03"}, + "news": {"date": "2022-01-03"}, + "image": {"date": "2022-01-03"}, + "video": {"date": "2022-01-03"} + }, + "date": "2022-01-03", + } + } """ latest_benchmark = latest_record[self.cursor_field] @@ -243,16 +261,10 @@ def get_updated_state( site_url = latest_record.get("site_url") search_type = latest_record.get("search_type") - if current_stream_state.get(site_url, {}).get(search_type): - current_stream_state[site_url][search_type] = { - self.cursor_field: max(latest_benchmark, current_stream_state[site_url][search_type][self.cursor_field]) - } - - elif current_stream_state.get(site_url): - current_stream_state[site_url][search_type] = {self.cursor_field: latest_benchmark} - - else: - current_stream_state = {site_url: {search_type: {self.cursor_field: latest_benchmark}}} + value = current_stream_state.get(site_url, {}).get(search_type, {}).get(self.cursor_field) + if value: + latest_benchmark = max(latest_benchmark, value) + current_stream_state.setdefault(site_url, {}).setdefault(search_type, {})[self.cursor_field] = latest_benchmark # we need to get the max date over all searchTypes but the current acceptance test YAML format doesn't # support that diff --git a/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py index 9273554a9a40f..2f1c14841292c 100755 --- a/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py @@ -89,3 +89,19 @@ def test_state(current_stream_state, latest_record, expected): value = stream.get_updated_state(current_stream_state, latest_record) assert value == expected + + +def test_updated_state(): + stream = SearchAnalyticsByDate(NoAuth(), ["https://domain1.com", "https://domain2.com"], "start_date", "end_date") + + state = {} + record = {"site_url": "https://domain1.com", "search_type": "web", "date": "2022-01-01"} + state = stream.get_updated_state(state, record) + record = {"site_url": "https://domain2.com", "search_type": "web", "date": "2022-01-01"} + state = stream.get_updated_state(state, record) + + assert state == { + "https://domain1.com": {"web": {"date": "2022-01-01"}}, + "https://domain2.com": {"web": {"date": "2022-01-01"}}, + "date": "2022-01-01", + } diff --git a/docs/integrations/sources/google-search-console.md b/docs/integrations/sources/google-search-console.md index 6a0eb2da4c490..5ce46c7935bd9 100644 --- a/docs/integrations/sources/google-search-console.md +++ b/docs/integrations/sources/google-search-console.md @@ -96,7 +96,7 @@ You should now be ready to use the Google Workspace Admin Reports API connector | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| `0.1.11` | 2022-01-05 | [9186](https://github.com/airbytehq/airbyte/pull/9186) | Update titles and descriptions | +| `0.1.11` | 2022-01-05 | [9186](https://github.com/airbytehq/airbyte/pull/9186) [9194](https://github.com/airbytehq/airbyte/pull/9194) | Fix incremental sync: keep all urls in state object | | `0.1.10` | 2021-12-23 | [9073](https://github.com/airbytehq/airbyte/pull/9073) | Add slicing by date range | | `0.1.9` | 2021-12-22 | [9047](https://github.com/airbytehq/airbyte/pull/9047) | Add 'order' to spec.json props | | `0.1.8` | 2021-12-21 | [8248](https://github.com/airbytehq/airbyte/pull/8248) | Enable Sentry for performance and errors tracking |