Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kasiah/community dot com connector #1112

Merged
merged 34 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
7f4241f
build out community.com connector
KasiaHinkson Aug 5, 2024
277b159
fix url
KasiaHinkson Aug 5, 2024
439d053
add to init
KasiaHinkson Aug 5, 2024
bc49be2
get not post
KasiaHinkson Aug 5, 2024
efbf3d4
small edits to fix connector
KasiaHinkson Aug 5, 2024
657b86e
Update community.py
KasiaHinkson Aug 6, 2024
528e2df
change some names and add test
KasiaHinkson Aug 8, 2024
27dbb09
fix param name
KasiaHinkson Aug 8, 2024
d0a31bd
fix url
KasiaHinkson Aug 8, 2024
bd593ce
wrong url again
KasiaHinkson Aug 8, 2024
c8438e9
maybe this will magically work
KasiaHinkson Aug 8, 2024
bbc1518
add a log for test
KasiaHinkson Aug 8, 2024
71489d9
another log
KasiaHinkson Aug 8, 2024
48fb07a
another log
KasiaHinkson Aug 8, 2024
98fec87
trying again
KasiaHinkson Aug 8, 2024
5b9e0d6
will this pass
KasiaHinkson Aug 8, 2024
ed8e712
more stuff
KasiaHinkson Aug 8, 2024
b62a481
change test response
KasiaHinkson Aug 8, 2024
fc2551e
content not json
KasiaHinkson Aug 8, 2024
4f27edc
another log
KasiaHinkson Aug 8, 2024
36b0651
more logs
KasiaHinkson Aug 8, 2024
d052f40
different test values
KasiaHinkson Aug 8, 2024
83f8674
shorten the test string
KasiaHinkson Aug 8, 2024
126453a
values don't matter
KasiaHinkson Aug 8, 2024
8877778
cleanup
KasiaHinkson Aug 8, 2024
2ae666f
docs
KasiaHinkson Aug 8, 2024
d868766
fix link
KasiaHinkson Aug 8, 2024
ab5b88f
remove commented code
KasiaHinkson Aug 8, 2024
d07daab
don't need these
KasiaHinkson Aug 8, 2024
96af8fa
remove another thing
KasiaHinkson Aug 8, 2024
82576c6
linting
KasiaHinkson Aug 8, 2024
0e6667d
incorporate sharine's fix with an extra try except
KasiaHinkson Aug 9, 2024
22f8841
update links
KasiaHinkson Aug 12, 2024
5d6c17c
links
KasiaHinkson Aug 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions parsons/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
("parsons.catalist.catalist", "CatalistMatch"),
("parsons.census.census", "Census"),
("parsons.civis.civisclient", "CivisClient"),
("parsons.community.community", "Community"),
("parsons.controlshift.controlshift", "Controlshift"),
("parsons.copper.copper", "Copper"),
("parsons.crowdtangle.crowdtangle", "CrowdTangle"),
Expand Down
3 changes: 3 additions & 0 deletions parsons/community/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from parsons.community.community import Community

__all__ = ["Community"]
101 changes: 101 additions & 0 deletions parsons/community/community.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import logging
from parsons.utilities.api_connector import APIConnector
from parsons.utilities import check_env
from parsons.etl import Table

logger = logging.getLogger(__name__)

COMMUNITY_API_ENDPOINT = "https://dl.community.com/download/v1/files/"


class Community(object):
"""
Instantiate class.

`Args:`
community_client_id: str
The Community provided Client ID. Not required if ``COMMUNITY_CLIENT_ID`` env
variable set.
community_access_token: str
The Community provided access token. Not required if ``COMMUNITY_ACCESS_TOKEN`` env
variable set.
community_uri: str
The URI to access the API. Not required, default is
https://dl.community.com/download/v1/files/. You can set an ``COMMUNITY_URL`` env
variable or use this URI parameter if a different endpoint is necessary.

API Documentation: https://developer.community.com/reference/data-export-api-downloading-data
"""

def __init__(self, community_client_id=None, community_access_token=None, community_url=None):
self.community_client_id = check_env.check("community_client_id", community_client_id)
self.community_access_token = check_env.check(
"community_access_token", community_access_token
)
self.uri = (
check_env.check("COMMUNITY_URL", community_url, optional=True)
or f"{COMMUNITY_API_ENDPOINT}/{community_client_id}/"
)
self.headers = {
"Authorization": f"Bearer {self.community_access_token}",
}
self.client = APIConnector(
self.uri,
headers=self.headers,
)

def get_request(self, filename):
"""
GET request to Community.com API to get the CSV data.

`Args:`
filename: str
Data filename you are requesting.
Options:
'campaigns': Campaign Performance data
'outbound_message_type_usage`: Message Segment Usage data
'campaign_links': Campaign Link Performance data
'members': Member Details data
'member_state_changes': Member Subscription Status data
'custom_member_data': Custom Member Data
'communities': Communities data
'member_communities': Member Communities data

`Returns:`
Response of GET request; a successful response returns the CSV formatted data
"""

logger.info(f"Requesting {filename}")
url = (
f"{filename}.csv.gz"
if filename != "outbound_message_type_usage"
else f"{filename}.csv.gz/segment-based-subscription"
)
response = self.client.get_request(url=url, return_format="content")
return response

def get_data_export(self, filename):
"""
Get specified data from Community.com API as Parsons table.

`Args:`
filename: str
Data filename you are requesting.
Options:
'campaigns': Campaign Performance data
'outbound_message_type_usage`: Message Segment Usage data
'campaign_links': Campaign Link Performance data
'members': Member Details data
'member_state_changes': Member Subscription Status data
'custom_member_data': Custom Member Data
'communities': Communities data
'member_communities': Member Communities data

`Returns:`
Contents of the generated contribution CSV as a Parsons table.
"""

get_request_response = self.get_request(filename=filename)
response_string = get_request_response.decode("utf-8")
table = Table.from_csv_string(response_string)
return table
37 changes: 12 additions & 25 deletions parsons/google/google_cloud_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ class GoogleCloudStorage(object):

def __init__(self, app_creds=None, project=None):
env_credentials_path = str(uuid.uuid4())
setup_google_application_credentials(
app_creds, target_env_var_name=env_credentials_path
)
setup_google_application_credentials(app_creds, target_env_var_name=env_credentials_path)
credentials = load_google_application_credentials(env_credentials_path)
self.project = project

Expand Down Expand Up @@ -297,9 +295,7 @@ def delete_blob(self, bucket_name, blob_name):
blob.delete()
logger.info(f"{blob_name} blob in {bucket_name} bucket deleted.")

def upload_table(
self, table, bucket_name, blob_name, data_type="csv", default_acl=None
):
def upload_table(self, table, bucket_name, blob_name, data_type="csv", default_acl=None):
"""
Load the data from a Parsons table into a blob.

Expand Down Expand Up @@ -327,17 +323,18 @@ def upload_table(
# CSVView. Once any transformations are made, the Table.table
# becomes a different petl class
if isinstance(table.table, petl.io.csv_py3.CSVView):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change looks okay to me, I'm curious about the reasoning? That will help me understand how big the breaking change would be - right now this code is so similar that it's hard to tell how it might cause problems for downstream users.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reasoning for the change is that something about how our Parsons operator is working is changing the attributes of the CSVView, and we just need to do the to_csv instead of getting the filename. It would presumably break for someone if they want it to fail when encountering an Attribute Error. I assume this is unlikely and that this probably wouldn't really break anything for anyone, but technically if someone is depending on getting an AttributeError in some case, this would mean they instead get an actual table. Not sure why that would be a problem, but trying to be proactive!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay gotcha, that makes sense. Yeah, it's hard to imagine someone depending on getting an error there - there's no reason to call the function at all if you error out there. It hasn't uploaded the data to the blob yet, and it's not saving anything on the object that you can access, so there's no reason to call the function and catch the attribute error unless you're checking if table.table.source.filename exists, doing something, then calling the function again. But that would be a very weird pattern, like you could literally just check if table.table.source.filename exists directly. So yes, this is technically a breaking change but I think the chances of anyone being bothered by it are very very small so we are okay to merge it into main instead of major release.

local_file = table.table.source.filename
try:
local_file = table.table.source.filename
except AttributeError:
local_file = table.to_csv()
else:
local_file = table.to_csv()
content_type = "text/csv"
elif data_type == "json":
local_file = table.to_json()
content_type = "application/json"
else:
raise ValueError(
f"Unknown data_type value ({data_type}): must be one of: csv or json"
)
raise ValueError(f"Unknown data_type value ({data_type}): must be one of: csv or json")

try:
blob.upload_from_filename(
Expand Down Expand Up @@ -407,9 +404,7 @@ def copy_bucket_to_gcs(
Secret key to authenticate storage transfer
"""
if source not in ["gcs", "s3"]:
raise ValueError(
f"Blob transfer only supports gcs and s3 sources [source={source}]"
)
raise ValueError(f"Blob transfer only supports gcs and s3 sources [source={source}]")
if source_path and source_path[-1] != "/":
raise ValueError("Source path much end in a '/'")

Expand Down Expand Up @@ -596,13 +591,9 @@ def unzip_blob(
}

file_extension = compression_params[compression_type]["file_extension"]
compression_function = compression_params[compression_type][
"compression_function"
]
compression_function = compression_params[compression_type]["compression_function"]

compressed_filepath = self.download_blob(
bucket_name=bucket_name, blob_name=blob_name
)
compressed_filepath = self.download_blob(bucket_name=bucket_name, blob_name=blob_name)

decompressed_filepath = compressed_filepath.replace(file_extension, "")
decompressed_blob_name = (
Expand Down Expand Up @@ -634,9 +625,7 @@ def __gzip_decompress_and_write_to_gcs(self, **kwargs):
bucket_name = kwargs.pop("bucket_name")

with gzip.open(compressed_filepath, "rb") as f_in:
logger.debug(
f"Uploading uncompressed file to GCS: {decompressed_blob_name}"
)
logger.debug(f"Uploading uncompressed file to GCS: {decompressed_blob_name}")
bucket = self.get_bucket(bucket_name=bucket_name)
blob = storage.Blob(name=decompressed_blob_name, bucket=bucket)
blob.upload_from_file(file_obj=f_in, rewind=True, timeout=3600)
Expand All @@ -656,9 +645,7 @@ def __zip_decompress_and_write_to_gcs(self, **kwargs):
with zipfile.ZipFile(compressed_filepath) as path_:
# Open the underlying file
with path_.open(decompressed_blob_in_archive) as f_in:
logger.debug(
f"Uploading uncompressed file to GCS: {decompressed_blob_name}"
)
logger.debug(f"Uploading uncompressed file to GCS: {decompressed_blob_name}")
bucket = self.get_bucket(bucket_name=bucket_name)
blob = storage.Blob(name=decompressed_blob_name, bucket=bucket)
blob.upload_from_file(file_obj=f_in, rewind=True, timeout=3600)
11 changes: 8 additions & 3 deletions parsons/utilities/api_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def request(self, url, req_type, json=None, data=None, params=None):
params=params,
)

def get_request(self, url, params=None):
def get_request(self, url, params=None, return_format="json"):
"""
Make a GET request.

Expand All @@ -96,9 +96,14 @@ def get_request(self, url, params=None):

r = self.request(url, "GET", params=params)
self.validate_response(r)
logger.debug(r.json())

return r.json()
if return_format == "json":
logger.debug(r.json())
return r.json()
elif return_format == "content":
return r.content
else:
raise RuntimeError(f"{return_format} is not a valid format, change to json or content")

def post_request(
self, url, params=None, data=None, json=None, success_codes=[200, 201, 202, 204]
Expand Down
40 changes: 40 additions & 0 deletions test/test_community/test_community.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import unittest
import requests_mock
from parsons import Community


TEST_CLIENT_ID = "someuuid"
TEST_CLIENT_TOKEN = "somesecret"

TEST_FILENAME = "campaigns"
TEST_URI = f"https://faketestingurl.com/{TEST_CLIENT_ID}"
TEST_FULL_URL = f"{TEST_URI}/{TEST_FILENAME}.csv.gz"

TEST_GET_RESPONSE_CSV_STRING = b'"CAMPAIGN_ID","LEADER_ID"\n"0288","6e83b"\n'

TEST_EXPECTED_COLUMNS = [
"CAMPAIGN_ID",
"LEADER_ID",
]


class TestCommunity(unittest.TestCase):
@requests_mock.Mocker()
def setUp(self, m):
self.com = Community(TEST_CLIENT_ID, TEST_CLIENT_TOKEN, TEST_URI)

@requests_mock.Mocker()
def test_successful_get_request(self, m):
m.get(TEST_FULL_URL, content=TEST_GET_RESPONSE_CSV_STRING)

assert self.com.get_request(filename=TEST_FILENAME) == TEST_GET_RESPONSE_CSV_STRING

# test get resource
@requests_mock.Mocker()
def test_successful_get_data_export(self, m):
m.get(TEST_FULL_URL, content=TEST_GET_RESPONSE_CSV_STRING)

table = self.com.get_data_export(
TEST_FILENAME,
)
assert TEST_EXPECTED_COLUMNS == table.columns
Loading