Skip to content

Commit

Permalink
Fixes python string interpolation in SQL 😱 to use proper psycopg tools.
Browse files Browse the repository at this point in the history
  • Loading branch information
BielStela authored and alexeh committed May 31, 2023
1 parent 9d12ac7 commit 07ea0fc
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 19 deletions.
62 changes: 46 additions & 16 deletions data/h3_data_importer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from re import sub

import jsonschema
import pandas as pd
import psycopg
from jsonschema import ValidationError
from psycopg2 import sql
from psycopg2.extensions import connection

log = logging.getLogger(__name__) # here we can use __name__ because it is an imported module
Expand Down Expand Up @@ -62,30 +64,42 @@ def insert_to_h3_data_and_contextual_layer_tables(
with connection:
with connection.cursor() as cursor:
# remove existing entries
cursor.execute(f"""DELETE FROM "h3_data" WHERE "h3tableName" = '{table}';""")
cursor.execute(f"""DELETE FROM "contextual_layer" WHERE "name" = '{dataset}'""")
cursor.execute('DELETE FROM "h3_data" WHERE "h3tableName" = (%s)', (table,))
cursor.execute('DELETE FROM "contextual_layer" WHERE "name" = (%s)', (dataset,))

# insert new entries
log.info("Inserting record into h3_data table...")

cursor.execute(
f"""INSERT INTO "h3_data" ("h3tableName", "h3columnName", "h3resolution", "year")
VALUES ('{table}', '{column}', {h3_res}, {year});"""
h3_data_query = sql.SQL(
"""
INSERT INTO "h3_data" ("h3tableName", "h3columnName", "h3resolution", "year")
VALUES ({table}, {column}, {h3_res}, {year})
"""
).format(
table=sql.Identifier(table),
column=sql.Identifier(column),
h3_res=sql.Literal(h3_res),
year=sql.Literal(year),
)
cursor.execute(h3_data_query)

log.info("Inserting record into contextual_layer table...")
cursor.execute(
f"""INSERT INTO "contextual_layer" ("name", "metadata", "category")
VALUES ('{dataset}', '{json.dumps(get_metadata(table))}', '{category}')
RETURNING id;
metadata = json.dumps(get_metadata(table))
insert_query = sql.SQL(
"""
)
INSERT INTO "contextual_layer" ("name", "metadata", "category")
VALUES ({dataset}, {metadata}, {category})
RETURNING id;
"""
).format(dataset=sql.Literal(dataset), metadata=sql.Literal(metadata), category=sql.Literal(category))
cursor.execute(insert_query)
contextual_data_id = cursor.fetchall()[0][0]
# insert contextual_layer entry id into h3_table
cursor.execute(
f"""update "h3_data" set "contextualLayerId" = '{contextual_data_id}'
where "h3tableName" = '{table}';"""
)
update_query = sql.SQL(
"""
UPDATE "h3_data" SET "contextualLayerId" = {contextual_data_id}
WHERE "h3tableName" = {table};
"""
).format(contextual_data_id=sql.Literal(contextual_data_id), table=sql.Identifier(table))
cursor.execute(update_query)


def get_metadata(table: str) -> dict:
Expand Down Expand Up @@ -137,3 +151,19 @@ def get_connection_info() -> str:
user=os.getenv("API_POSTGRES_USERNAME"),
password=os.getenv("API_POSTGRES_PASSWORD"),
)


def h3_table_schema(df: pd.DataFrame) -> sql.Composable:
"""Construct an SQL schema for an H3 table from a pandas DataFrame
TODO: make this func used everywhere and carefull with psycpg version
Examples:
>>> schema = h3_table_schema(df)
>>> sql.SQL("CREATE TABLE {} ({})").format(sql.Identifier(table), schema)
"""
index = [sql.SQL("h3index h3index PRIMARY KEY")]
extra = [
sql.SQL("{} {}").format(sql.Identifier(col), sql.SQL(DTYPES_TO_PG[str(dtype)]))
for col, dtype in zip(df.columns, df.dtypes)
]
schema = sql.SQL(", ").join(index + extra)
return schema
8 changes: 5 additions & 3 deletions data/h3_data_importer/vector_folder_to_h3_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
import pandas as pd
import psycopg2
from h3ronpy import vector
from psycopg2 import sql
from psycopg2.pool import ThreadedConnectionPool

from data.h3_data_importer.utils import h3_table_schema
from utils import insert_to_h3_data_and_contextual_layer_tables, link_to_indicator_table, slugify

DTYPES_TO_PG = {
Expand Down Expand Up @@ -115,12 +118,11 @@ def create_h3_grid_table(
):
"""Creates the h3 data table (like `h3_grid_nio_global`) with the correct data types"""
dtypes = df.dtypes.to_dict()
schema = ", ".join([f'"{slugify(col)}" {DTYPES_TO_PG[str(dtype)]}' for col, dtype in dtypes.items()])
cursor = connection.cursor()
if drop_if_exists:
cursor.execute(f"DROP TABLE IF EXISTS {table_name};")
cursor.execute(sql.SQL("DROP TABLE IF EXISTS {};").format(sql.Identifier(table_name)))
log.info(f"Dropped table {table_name}")
cursor.execute(f"CREATE TABLE {table_name} (h3index h3index PRIMARY KEY, {schema});")
cursor.execute(sql.SQL("CREATE TABLE {} ({})").format(sql.Identifier(table_name), h3_table_schema(df)))
log.info(f"Created table {table_name} with columns {', '.join(dtypes.keys())}")
connection.commit()
cursor.close()
Expand Down

0 comments on commit 07ea0fc

Please sign in to comment.