4
4
5
5
import base64
6
6
import logging
7
- import urllib .parse as urlparse
8
7
from abc import ABC
9
8
from datetime import datetime
10
- from typing import Any , Iterable , Iterator , List , Mapping , MutableMapping , Sequence , TYPE_CHECKING , Optional
9
+ from typing import Any , Iterable , Iterator , List , Mapping , MutableMapping , TYPE_CHECKING , Optional
11
10
12
11
import pendulum
13
12
import requests
14
13
from airbyte_cdk .models import SyncMode
15
14
from airbyte_cdk .sources .streams import Stream
16
15
from airbyte_cdk .sources .utils .transform import TransformConfig , TypeTransformer
17
16
from cached_property import cached_property
17
+ from facebook_business .adobjects .abstractobject import AbstractObject
18
18
from facebook_business .api import FacebookAdsApiBatch , FacebookRequest , FacebookResponse
19
- from .common import batch , deep_merge
19
+ from .common import deep_merge
20
20
21
21
if TYPE_CHECKING :
22
22
from source_facebook_marketing .api import API
23
23
24
24
logger = logging .getLogger ("airbyte" )
25
25
26
26
27
- def remove_params_from_url (url : str , params : List [str ]) -> str :
28
- """
29
- Parses a URL and removes the query parameters specified in params
30
- :param url: URL
31
- :param params: list of query parameters
32
- :return: URL with params removed
33
- """
34
- parsed = urlparse .urlparse (url )
35
- query = urlparse .parse_qs (parsed .query , keep_blank_values = True )
36
- filtered = dict ((k , v ) for k , v in query .items () if k not in params )
37
- return urlparse .urlunparse (
38
- [parsed .scheme , parsed .netloc , parsed .path , parsed .params , urlparse .urlencode (filtered , doseq = True ), parsed .fragment ]
39
- )
40
-
41
-
42
27
def fetch_thumbnail_data_url (url : str ) -> Optional [str ]:
43
28
"""Request thumbnail image and return it embedded into the data-link"""
44
29
try :
@@ -47,8 +32,10 @@ def fetch_thumbnail_data_url(url: str) -> Optional[str]:
47
32
_type = response .headers ["content-type" ]
48
33
data = base64 .b64encode (response .content )
49
34
return f"data:{ _type } ;base64,{ data .decode ('ascii' )} "
50
- except requests .exceptions .RequestException :
51
- pass
35
+ else :
36
+ logger .warning (f"Got { repr (response )} while requesting thumbnail image." )
37
+ except requests .exceptions .RequestException as exc :
38
+ logger .warning (f"Got { str (exc )} while requesting thumbnail image." )
52
39
return None
53
40
54
41
@@ -59,10 +46,13 @@ class FBMarketingStream(Stream, ABC):
59
46
transformer : TypeTransformer = TypeTransformer (TransformConfig .DefaultSchemaNormalization )
60
47
61
48
page_size = 100
49
+ use_batch = False
62
50
63
51
enable_deleted = False
64
52
entity_prefix = None
65
53
54
+ MAX_BATCH_SIZE = 50
55
+
66
56
def __init__ (self , api : 'API' , include_deleted : bool = False , ** kwargs ):
67
57
super ().__init__ (** kwargs )
68
58
self ._api = api
@@ -73,26 +63,36 @@ def fields(self) -> List[str]:
73
63
"""List of fields that we want to query, for now just all properties from stream's schema"""
74
64
return list (self .get_json_schema ().get ("properties" , {}).keys ())
75
65
76
- def execute_in_batch (self , requests : Iterable [FacebookRequest ]) -> Sequence [MutableMapping [str , Any ]]:
66
+ def _execute_batch (self , batch ):
67
+ """Execute batch, retry in case of failures"""
68
+ while batch :
69
+ batch = batch .execute ()
70
+ if batch :
71
+ logger .info ("Retry failed requests in batch" )
72
+
73
+ def execute_in_batch (self , pending_requests : Iterable [FacebookRequest ]) -> Iterable [MutableMapping [str , Any ]]:
77
74
"""Execute list of requests in batches"""
78
75
records = []
79
76
80
77
def success (response : FacebookResponse ):
81
78
records .append (response .json ())
82
79
83
80
def failure (response : FacebookResponse ):
84
- logger .info (f"Request failed with response: { response .body ()} " )
81
+ # FIXME: stop sync or retry
82
+ logger .warning (f"Request failed with response: { response .body ()} " )
85
83
86
84
api_batch : FacebookAdsApiBatch = self ._api .api .new_batch ()
87
- for request in requests :
85
+ for request in pending_requests :
88
86
api_batch .add_request (request , success = success , failure = failure )
87
+ if len (api_batch ) == self .MAX_BATCH_SIZE :
88
+ self ._execute_batch (api_batch )
89
89
90
- while api_batch :
91
- api_batch = api_batch .execute ()
92
- if api_batch :
93
- logger .info ("Retry failed requests in batch" )
90
+ yield from records
91
+ api_batch : FacebookAdsApiBatch = self ._api .api .new_batch ()
92
+ records = []
94
93
95
- return records
94
+ self ._execute_batch (api_batch )
95
+ yield from records
96
96
97
97
def read_records (
98
98
self ,
@@ -102,19 +102,23 @@ def read_records(
102
102
stream_state : Mapping [str , Any ] = None ,
103
103
) -> Iterable [Mapping [str , Any ]]:
104
104
"""Main read method used by CDK"""
105
- for record in self ._read_records (params = self .request_params (stream_state = stream_state )):
106
- yield self ._extend_record (record , fields = self .fields )
105
+ records_iter = self ._read_records (params = self .request_params (stream_state = stream_state ))
106
+ loaded_records_iter = (record .api_get (fields = self .fields , pending = self .use_batch ) for record in records_iter )
107
+ if self .use_batch :
108
+ loaded_records_iter = self .execute_in_batch (loaded_records_iter )
109
+
110
+ for record in loaded_records_iter :
111
+ if isinstance (record , AbstractObject ):
112
+ yield record .export_all_data ()
113
+ else :
114
+ yield record
107
115
108
116
def _read_records (self , params : Mapping [str , Any ]) -> Iterable :
109
117
"""Wrapper around query to backoff errors.
110
118
We have default implementation because we still can override read_records so this method is not mandatory.
111
119
"""
112
120
return []
113
121
114
- def _extend_record (self , obj : Any , ** kwargs ):
115
- """Wrapper around api_get to backoff errors"""
116
- return obj .api_get (** kwargs ).export_all_data ()
117
-
118
122
def request_params (self , ** kwargs ) -> MutableMapping [str , Any ]:
119
123
"""Parameters that should be passed to query_records method"""
120
124
params = {"limit" : self .page_size }
@@ -205,39 +209,30 @@ class AdCreatives(FBMarketingStream):
205
209
"""
206
210
207
211
entity_prefix = "adcreative"
208
- batch_size = 50
212
+ use_batch = True
209
213
210
214
def __init__ (self , fetch_thumbnail_images : bool = False , ** kwargs ):
211
215
super ().__init__ (** kwargs )
212
216
self ._fetch_thumbnail_images = fetch_thumbnail_images
213
217
218
+ @cached_property
219
+ def fields (self ) -> List [str ]:
220
+ """ Remove "thumbnail_data_url" field because it is computed field and it's not a field that we can request from Facebook
221
+ """
222
+ return [f for f in super ().fields if f != "thumbnail_data_url" ]
223
+
214
224
def read_records (
215
225
self ,
216
226
sync_mode : SyncMode ,
217
227
cursor_field : List [str ] = None ,
218
228
stream_slice : Mapping [str , Any ] = None ,
219
229
stream_state : Mapping [str , Any ] = None ,
220
230
) -> Iterable [Mapping [str , Any ]]:
221
- """Read records using batch API"""
222
- records = self ._read_records (params = self .request_params (stream_state = stream_state ))
223
- # "thumbnail_data_url" is a field in our stream's schema because we
224
- # output it (see fix_thumbnail_urls below), but it's not a field that
225
- # we can request from Facebook
226
- request_fields = [f for f in self .fields if f != "thumbnail_data_url" ]
227
- requests = [record .api_get (fields = request_fields , pending = True ) for record in records ]
228
- for requests_batch in batch (requests , size = self .batch_size ):
229
- for record in self .execute_in_batch (requests_batch ):
230
- yield self .fix_thumbnail_urls (record )
231
-
232
- def fix_thumbnail_urls (self , record : MutableMapping [str , Any ]) -> MutableMapping [str , Any ]:
233
- """Cleans and, if enabled, fetches thumbnail URLs for each creative."""
234
- # The thumbnail_url contains some extra query parameters that don't affect the validity of the URL, but break SAT
235
- thumbnail_url = record .get ("thumbnail_url" )
236
- if thumbnail_url :
237
- record ["thumbnail_url" ] = remove_params_from_url (thumbnail_url , ["_nc_hash" , "d" ])
231
+ """Read with super method and append thumbnail_data_url if enabled"""
232
+ for record in super ().read_records (sync_mode , cursor_field , stream_slice , stream_state ):
238
233
if self ._fetch_thumbnail_images :
239
- record ["thumbnail_data_url" ] = fetch_thumbnail_data_url (thumbnail_url )
240
- return record
234
+ record ["thumbnail_data_url" ] = fetch_thumbnail_data_url (record . get ( " thumbnail_url" ) )
235
+ yield record
241
236
242
237
def _read_records (self , params : Mapping [str , Any ]) -> Iterator :
243
238
return self ._api .account .get_ad_creatives (params = params )
0 commit comments