From cdde769d33bba5e38d7a3e05064002b5cd473371 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Wed, 29 Dec 2021 19:22:52 +0200 Subject: [PATCH 1/7] bugfix: keep all urls in state object Signed-off-by: Sergey Chvalyuk --- .../eb4c9e00-db83-4d63-a386-39cfa91012a8.json | 2 +- .../main/resources/seed/source_definitions.yaml | 2 +- .../source-google-search-console/Dockerfile | 2 +- .../source_google_search_console/streams.py | 14 ++++---------- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json index 4402251686201..582b6d6830028 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "eb4c9e00-db83-4d63-a386-39cfa91012a8", "name": "Google Search Console", "dockerRepository": "airbyte/source-google-search-console", - "dockerImageTag": "0.1.9", + "dockerImageTag": "0.1.10", "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-search-console", "icon": "googlesearchconsole.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index d60297abbe21b..c0af7e6182b19 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -252,7 +252,7 @@ - name: Google Search Console sourceDefinitionId: eb4c9e00-db83-4d63-a386-39cfa91012a8 dockerRepository: airbyte/source-google-search-console - dockerImageTag: 0.1.9 + dockerImageTag: 0.1.10 documentationUrl: https://docs.airbyte.io/integrations/sources/google-search-console icon: googlesearchconsole.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-google-search-console/Dockerfile b/airbyte-integrations/connectors/source-google-search-console/Dockerfile index 7a392def2eb18..c14914f5360ea 100755 --- a/airbyte-integrations/connectors/source-google-search-console/Dockerfile +++ b/airbyte-integrations/connectors/source-google-search-console/Dockerfile @@ -13,5 +13,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENV SENTRY_DSN "https://d4b03de0c4574c78999b8d58e55243dc@o1009025.ingest.sentry.io/6102835" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.9 +LABEL io.airbyte.version=0.1.10 LABEL io.airbyte.name=airbyte/source-google-search-console diff --git a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py index 6b448248215a0..c535c7e8c2b5a 100755 --- a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py +++ b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py @@ -226,16 +226,10 @@ def get_updated_state( site_url = latest_record.get("site_url") search_type = latest_record.get("search_type") - if current_stream_state.get(site_url, {}).get(search_type): - current_stream_state[site_url][search_type] = { - self.cursor_field: max(latest_benchmark, current_stream_state[site_url][search_type][self.cursor_field]) - } - - elif current_stream_state.get(site_url): - current_stream_state[site_url][search_type] = {self.cursor_field: latest_benchmark} - - else: - current_stream_state = {site_url: {search_type: {self.cursor_field: latest_benchmark}}} + value = current_stream_state.get(site_url, {}).get(search_type, {}).get(self.cursor_field) + if value: + latest_benchmark = max(latest_benchmark, value) + current_stream_state.setdefault(site_url, {}).setdefault(search_type, {})[self.cursor_field] = latest_benchmark # we need to get the max date over all searchTypes but the current acceptance test YAML format doesn't # support that From d76d4e110185a1b0d1f23444bc59fe0bc5e09fe7 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Wed, 29 Dec 2021 19:27:41 +0200 Subject: [PATCH 2/7] google-search-console.md updated Signed-off-by: Sergey Chvalyuk --- docs/integrations/sources/google-search-console.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/integrations/sources/google-search-console.md b/docs/integrations/sources/google-search-console.md index 86d74155b401b..d936f55a31f53 100644 --- a/docs/integrations/sources/google-search-console.md +++ b/docs/integrations/sources/google-search-console.md @@ -96,6 +96,7 @@ You should now be ready to use the Google Workspace Admin Reports API connector | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.1.10` | 2021-12-29 | [9194](https://github.com/airbytehq/airbyte/pull/9194) | Fix incremental sync: keep all urls in state object | | `0.1.9` | 2021-12-22 | [9047](https://github.com/airbytehq/airbyte/pull/9047) | Add 'order' to spec.json props | | `0.1.8` | 2021-12-21 | [8248](https://github.com/airbytehq/airbyte/pull/8248) | Enable Sentry for performance and errors tracking | | `0.1.7` | 2021-11-26 | [7431](https://github.com/airbytehq/airbyte/pull/7431) | Add default `end_date` param value | From a2f65327bbdd0dd2af38676a80e4a256bc045bed Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Fri, 31 Dec 2021 13:41:44 +0200 Subject: [PATCH 3/7] fix 0.1.10 -> 0.1.11 Signed-off-by: Sergey Chvalyuk --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- .../connectors/source-google-search-console/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index c094e496eb8c3..6a11494b7fd11 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -252,7 +252,7 @@ - name: Google Search Console sourceDefinitionId: eb4c9e00-db83-4d63-a386-39cfa91012a8 dockerRepository: airbyte/source-google-search-console - dockerImageTag: 0.1.10 + dockerImageTag: 0.1.11 documentationUrl: https://docs.airbyte.io/integrations/sources/google-search-console icon: googlesearchconsole.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-google-search-console/Dockerfile b/airbyte-integrations/connectors/source-google-search-console/Dockerfile index c14914f5360ea..ac0985737b872 100755 --- a/airbyte-integrations/connectors/source-google-search-console/Dockerfile +++ b/airbyte-integrations/connectors/source-google-search-console/Dockerfile @@ -13,5 +13,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENV SENTRY_DSN "https://d4b03de0c4574c78999b8d58e55243dc@o1009025.ingest.sentry.io/6102835" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.10 +LABEL io.airbyte.version=0.1.11 LABEL io.airbyte.name=airbyte/source-google-search-console From cbdacae3c59d3453c369b8c706e484e8fa2d96ba Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Fri, 31 Dec 2021 13:42:23 +0200 Subject: [PATCH 4/7] 0.1.10 -> 0.1.11 (fix) Signed-off-by: Sergey Chvalyuk --- .../eb4c9e00-db83-4d63-a386-39cfa91012a8.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json index 582b6d6830028..034d07ac76926 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "eb4c9e00-db83-4d63-a386-39cfa91012a8", "name": "Google Search Console", "dockerRepository": "airbyte/source-google-search-console", - "dockerImageTag": "0.1.10", + "dockerImageTag": "0.1.11", "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-search-console", "icon": "googlesearchconsole.svg" } From aa3e3492c80d69e31446d2efc067b84e7ef17482 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Fri, 31 Dec 2021 14:24:21 +0200 Subject: [PATCH 5/7] test_updated_state added --- .../unit_tests/unit_test.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py index 9273554a9a40f..2f1c14841292c 100755 --- a/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py @@ -89,3 +89,19 @@ def test_state(current_stream_state, latest_record, expected): value = stream.get_updated_state(current_stream_state, latest_record) assert value == expected + + +def test_updated_state(): + stream = SearchAnalyticsByDate(NoAuth(), ["https://domain1.com", "https://domain2.com"], "start_date", "end_date") + + state = {} + record = {"site_url": "https://domain1.com", "search_type": "web", "date": "2022-01-01"} + state = stream.get_updated_state(state, record) + record = {"site_url": "https://domain2.com", "search_type": "web", "date": "2022-01-01"} + state = stream.get_updated_state(state, record) + + assert state == { + "https://domain1.com": {"web": {"date": "2022-01-01"}}, + "https://domain2.com": {"web": {"date": "2022-01-01"}}, + "date": "2022-01-01", + } From b9b30e0b93564b5a7e720cf9b80a4f3510e3b48b Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Wed, 5 Jan 2022 17:00:04 +0200 Subject: [PATCH 6/7] add example of state structure Signed-off-by: Sergey Chvalyuk --- .../source_google_search_console/streams.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py index 947dc9a26ed66..afc05f754b53d 100755 --- a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py +++ b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py @@ -236,6 +236,24 @@ def get_updated_state( """ With the existing nested loop implementation, we have to store a `cursor_field` for each `site_url` and `searchType`. This functionality is placed in `get_update_state`. + + { + "stream": { + "http://domain1.com": { + "web": {"date": "2022-01-03"}, + "news": {"date": "2022-01-03"}, + "image": {"date": "2022-01-03"}, + "video": {"date": "2022-01-03"} + }, + "http://domain2.com": { + "web": {"date": "2022-01-03"}, + "news": {"date": "2022-01-03"}, + "image": {"date": "2022-01-03"}, + "video": {"date": "2022-01-03"} + }, + "date": "2022-01-03", + } + } """ latest_benchmark = latest_record[self.cursor_field] From 614d31163b4632bf5e2df862197452a9abe75c44 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Wed, 5 Jan 2022 18:39:16 +0200 Subject: [PATCH 7/7] source_specs.yaml updated Signed-off-by: Sergey Chvalyuk --- .../src/main/resources/seed/source_specs.yaml | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index d3cd840965729..06ea8ac2abcbb 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2458,7 +2458,7 @@ - - "client_secret" oauthFlowOutputParameters: - - "refresh_token" -- dockerImage: "airbyte/source-google-search-console:0.1.10" +- dockerImage: "airbyte/source-google-search-console:0.1.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/google-search-console" connectionSpecification: @@ -2475,6 +2475,7 @@ type: "array" items: type: "string" + title: "Site URLs" description: "Website URLs property; do not include the domain-level property\ \ in the list" examples: @@ -2483,16 +2484,18 @@ order: 0 start_date: type: "string" - description: "The date from which you'd like to replicate data in the format\ - \ YYYY-MM-DD." + title: "Start Date" + description: "UTC date in the format 2017-01-25. Any data before this date\ + \ will not be replicated." examples: - "2021-01-01" pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" order: 1 end_date: type: "string" - description: "The date from which you'd like to replicate data in the format\ - \ YYYY-MM-DD. Must be greater or equal start_date field" + title: "End Date" + description: "UTC date in the format 2017-01-25. Any data after this date\ + \ will not be replicated. Must be greater or equal to the Start Date field." examples: - "2021-12-12" pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" @@ -2520,24 +2523,24 @@ client_id: title: "Client ID" type: "string" - description: "The Client ID of your developer application" + description: "The Client ID of your Google Search Console developer\ + \ application." airbyte_secret: true client_secret: title: "Client Secret" type: "string" - description: "The client secret of your developer application" + description: "The Client Secret of your Google Search Console developer\ + \ application." airbyte_secret: true access_token: title: "Access Token" type: "string" - description: "An access token generated using the above client ID\ - \ and secret" + description: "Access Token for making authenticated requests." airbyte_secret: true refresh_token: title: "Refresh Token" type: "string" - description: "A refresh token generated using the above client ID\ - \ and secret" + description: "The token for obtaining new access token." airbyte_secret: true - type: "object" title: "Service Account Key Authentication"