5
5
import time
6
6
from abc import ABC
7
7
from datetime import datetime
8
+ from enum import Enum
8
9
from typing import Any , Iterable , List , Mapping , MutableMapping , Optional , Tuple
9
10
from urllib .parse import parse_qsl , urlparse
10
11
13
14
from airbyte_cdk .sources import AbstractSource
14
15
from airbyte_cdk .sources .streams import Stream
15
16
from airbyte_cdk .sources .streams .http import HttpStream
16
- from airbyte_cdk .sources .streams .http .auth import HttpAuthenticator , TokenAuthenticator
17
+ from airbyte_cdk .sources .streams .http .requests_native_auth import TokenAuthenticator
18
+ from requests .auth import AuthBase
17
19
18
20
19
21
class IntercomStream (HttpStream , ABC ):
@@ -27,14 +29,24 @@ class IntercomStream(HttpStream, ABC):
27
29
28
30
def __init__ (
29
31
self ,
30
- authenticator : HttpAuthenticator ,
32
+ authenticator : AuthBase ,
31
33
start_date : str = None ,
32
34
** kwargs ,
33
35
):
34
36
self .start_date = start_date
35
37
36
38
super ().__init__ (authenticator = authenticator )
37
39
40
+ @property
41
+ def authenticator (self ):
42
+ """
43
+ Fix of the bug when isinstance(authenticator, AuthBase) and
44
+ default logic returns incorrect authenticator values
45
+ """
46
+ if self ._session .auth :
47
+ return self ._session .auth
48
+ return super ().authenticator
49
+
38
50
def next_page_token (self , response : requests .Response ) -> Optional [Mapping [str , Any ]]:
39
51
"""
40
52
Abstract method of HttpStream - should be overwritten.
@@ -95,7 +107,7 @@ def filter_by_state(self, stream_state: Mapping[str, Any] = None, record: Mappin
95
107
during the slicing.
96
108
"""
97
109
98
- if not stream_state or record [self .cursor_field ] >= stream_state .get (self .cursor_field ):
110
+ if not stream_state or record [self .cursor_field ] > stream_state .get (self .cursor_field ):
99
111
yield record
100
112
101
113
def parse_response (self , response : requests .Response , stream_state : Mapping [str , Any ], ** kwargs ) -> Iterable [Mapping ]:
@@ -124,10 +136,12 @@ class ChildStreamMixin:
124
136
parent_stream_class : Optional [IntercomStream ] = None
125
137
126
138
def stream_slices (self , sync_mode , ** kwargs ) -> Iterable [Optional [Mapping [str , any ]]]:
127
- for item in self .parent_stream_class (authenticator = self .authenticator , start_date = self .start_date ).read_records (
128
- sync_mode = sync_mode
129
- ):
130
- yield {"id" : item ["id" ]}
139
+ parent_stream = self .parent_stream_class (authenticator = self .authenticator , start_date = self .start_date )
140
+ for slice in parent_stream .stream_slices (sync_mode = sync_mode ):
141
+ for item in self .parent_stream_class (
142
+ authenticator = self .authenticator , start_date = self .start_date , stream_slice = slice
143
+ ).read_records (sync_mode = sync_mode ):
144
+ yield {"id" : item ["id" ]}
131
145
132
146
133
147
class Admins (IntercomStream ):
@@ -144,24 +158,55 @@ def path(self, **kwargs) -> str:
144
158
145
159
class Companies (IncrementalIntercomStream ):
146
160
"""Return list of all companies.
147
- API Docs: https://developers.intercom.com/intercom-api-reference/reference#iterating-over-all-companies
148
- Endpoint: https://api.intercom.io/companies/scroll
161
+ The Intercom API provides 2 similar endpoint for loading of companies:
162
+ 1) "standard" - https://developers.intercom.com/intercom-api-reference/reference#list-companies.
163
+ But this endpoint does not work well for huge datasets and can have performance problems.
164
+ 2) "scroll" - https://developers.intercom.com/intercom-api-reference/reference#iterating-over-all-companies
165
+ It has good performance but at same time only one script/client can use it across the client's entire account.
166
+
167
+ According to above circumstances no one endpoint can't be used permanently. That's why this stream tries can
168
+ apply both endpoints according to the following logic:
169
+ 1) By default the stream tries to load data by "scroll" endpoint.
170
+ 2) Try to wait a "scroll" request within a minute (3 attempts with delay 20,5 seconds)
171
+ if a "stroll" is busy by another script
172
+ 3) Switch to using of the "standard" endpoint.
149
173
"""
150
174
175
+ class EndpointType (Enum ):
176
+ scroll = "companies/scroll"
177
+ standard = "companies"
178
+
179
+ def __init__ (self , * args , ** kwargs ):
180
+ super ().__init__ (* args , ** kwargs )
181
+ self ._backoff_count = 0
182
+ self ._endpoint_type = self .EndpointType .scroll
183
+ self ._total_count = None # uses for saving of a total_count value once
184
+
151
185
def next_page_token (self , response : requests .Response ) -> Optional [Mapping [str , Any ]]:
152
186
"""For reset scroll needs to iterate pages untill the last.
153
187
Another way need wait 1 min for the scroll to expire to get a new list for companies segments."""
154
-
155
188
data = response .json ()
156
- scroll_param = data .get ("scroll_param" )
189
+ if self ._total_count is None and data .get ("total_count" ):
190
+ self ._total_count = data ["total_count" ]
191
+ self .logger .info (f"found { self ._total_count } companies" )
192
+ if self .can_use_scroll ():
193
+
194
+ scroll_param = data .get ("scroll_param" )
195
+
196
+ # this stream always has only one data field
197
+ data_field = self .data_fields [0 ]
198
+ if scroll_param and data .get (data_field ):
199
+ return {"scroll_param" : scroll_param }
200
+ elif not data .get ("errors" ):
201
+ return super ().next_page_token (response )
202
+ return None
157
203
158
- # this stream always has only one data field
159
- data_field = self .data_fields [0 ]
160
- if scroll_param and data .get (data_field ):
161
- return {"scroll_param" : scroll_param }
204
+ def can_use_scroll (self ):
205
+ """Check backoff count"""
206
+ return self ._backoff_count <= 3
162
207
163
208
def path (self , ** kwargs ) -> str :
164
- return "companies/scroll"
209
+ return self . _endpoint_type . value
165
210
166
211
@classmethod
167
212
def check_exists_scroll (cls , response : requests .Response ) -> bool :
@@ -174,8 +219,25 @@ def check_exists_scroll(cls, response: requests.Response) -> bool:
174
219
175
220
return False
176
221
222
+ @property
223
+ def raise_on_http_errors (self ) -> bool :
224
+ if not self .can_use_scroll () and self ._endpoint_type == self .EndpointType .scroll :
225
+ return False
226
+ return True
227
+
228
+ def stream_slices (self , sync_mode , ** kwargs ) -> Iterable [Optional [Mapping [str , any ]]]:
229
+ yield None
230
+ if not self .can_use_scroll ():
231
+ self ._endpoint_type = self .EndpointType .standard
232
+ yield None
233
+
177
234
def should_retry (self , response : requests .Response ) -> bool :
178
235
if self .check_exists_scroll (response ):
236
+ self ._backoff_count += 1
237
+ if not self .can_use_scroll ():
238
+ self .logger .error ("Can't create a new scroll request within an minute. " "Let's try to use a standard non-scroll endpoint." )
239
+ return False
240
+
179
241
return True
180
242
return super ().should_retry (response )
181
243
@@ -186,6 +248,13 @@ def backoff_time(self, response: requests.Response) -> Optional[float]:
186
248
return 20.5
187
249
return super ().backoff_time (response )
188
250
251
+ def parse_response (self , response : requests .Response , stream_state : Mapping [str , Any ], ** kwargs ) -> Iterable [Mapping ]:
252
+ if not self .raise_on_http_errors :
253
+ data = response .json ()
254
+ if data .get ("errors" ):
255
+ return
256
+ yield from super ().parse_response (response , stream_state = stream_state , ** kwargs )
257
+
189
258
190
259
class CompanySegments (ChildStreamMixin , IncrementalIntercomStream ):
191
260
"""Return list of all company segments.
0 commit comments