cancervariants · jsstevenson · Jan 4, 2025 · Jan 4, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,7 +12,7 @@ repos:
       - id: mixed-line-ending
         args: [ --fix=lf ]
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.5.0  # ruff version
+    rev: v0.8.6  # ruff version
     hooks:
       - id: ruff-format
       - id: ruff

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,7 +46,13 @@ etl = [
     "pyyaml"
 ]
 tests = ["pytest", "pytest-cov", "pytest-mock", "isodate"]
-dev = ["pre-commit>=3.7.1", "ruff==0.5.0", "lxml", "xmlformatter", "types-pyyaml"]
+dev = [
+    "pre-commit>=3.7.1",
+    "ruff==0.8.6",
+    "lxml",
+    "xmlformatter",
+    "types-pyyaml",
+]
 
 [project.urls]
 Homepage = "https://github.com/cancervariants/therapy-normalization"
@@ -80,6 +86,7 @@ branch = true
 
 [tool.ruff]
 src = ["src"]
+exclude = ["analysis", "scripts"]
 
 [tool.ruff.lint]
 select = [
@@ -108,10 +115,15 @@ select = [
     "RSE",  # https://docs.astral.sh/ruff/rules/#flake8-raise-rse
     "RET",  # https://docs.astral.sh/ruff/rules/#flake8-return-ret
     "SLF",  # https://docs.astral.sh/ruff/rules/#flake8-self-slf
+    "SLOT",  # https://docs.astral.sh/ruff/rules/#flake8-slots-slot
     "SIM",  # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
     "ARG",  # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg
     "PTH",  # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
     "PGH",  # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh
+    "FAST",  # https://docs.astral.sh/ruff/rules/#fastapi-fast
+    "PLC",  # https://docs.astral.sh/ruff/rules/#convention-c
+    "PLE",  # https://docs.astral.sh/ruff/rules/#error-e_1
+    "TRY",  # https://docs.astral.sh/ruff/rules/#tryceratops-try
     "PERF",  # https://docs.astral.sh/ruff/rules/#perflint-perf
     "FURB",  # https://docs.astral.sh/ruff/rules/#refurb-furb
     "RUF",  # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
@@ -130,13 +142,15 @@ fixable = [
     "PT",
     "RSE",
     "SIM",
+    "FAST",
+    "PLC",
+    "PLE",
+    "TRY",
     "PERF",
     "FURB",
     "RUF"
 ]
 # ANN003 - missing-type-kwargs
-# ANN101 - missing-type-self
-# ANN102 - missing-type-cls
 # D203 - one-blank-line-before-class
 # D205 - blank-line-after-summary
 # D206 - indent-with-spaces*
@@ -151,20 +165,21 @@ fixable = [
 # PGH003 - blanket-type-ignore
 # W191 - tab-indentation*
 # S321 - suspicious-ftp-lib-usage
+# PLC0206 - dict-index-missing-items
 # *ignored for compatibility with formatter
 ignore = [
-    "ANN003", "ANN101", "ANN102",
+    "ANN003",
     "D203", "D205", "D206", "D213", "D300", "D400", "D415",
     "E111", "E114", "E117", "E501",
     "PGH003",
     "W191",
     "S321",
+    "PLC0206",
 ]
 
 [tool.ruff.lint.per-file-ignores]
 # ANN001 - missing-type-function-argument
 # ANN2 - missing-return-type
-# ANN102 - missing-type-cls
 # D100 - undocumented-public-module
 # D102 - undocumented-public-class
 # S101 - assert
@@ -174,10 +189,10 @@ ignore = [
 # INP001 - implicit-namespace-package
 # SLF001 - private-member-access
 # PERF401 - manual-list-comprehension
+# S608 - hardcoded-sql-expression
 "tests/*" = [
     "ANN001",
     "ANN2",
-    "ANN102",
     "D100",
     "D102",
     "S101",
@@ -187,6 +202,7 @@ ignore = [
     "PERF401"
 ]
 "tests/unit/test_emit_warnings.py" = ["RUF001"]
+"tests/scripts/build_chembl_data.py" = ["S608"]
 "src/therapy/schemas.py" = ["ANN001", "ANN201", "N805"]
 
 [tool.ruff.lint.flake8-annotations]

diff --git a/src/therapy/cli.py b/src/therapy/cli.py
@@ -103,13 +103,13 @@ def _load_source(
             f"Encountered ModuleNotFoundError attempting to import {e.name}. Are ETL dependencies installed?"
         )
         click.get_current_context().exit()
-    SourceClass = eval(name.value)  # noqa: N806 PGH001 S307
+    SourceClass = eval(name.value)  # noqa: N806, S307
 
     source = SourceClass(database=db, silent=False)
     try:
         processed_ids += source.perform_etl(use_existing)
     except EtlError as e:
-        _logger.error(e)
+        _logger.exception("Encountered ETL error while loading source %s", name)
         click.echo(f"Encountered error while loading {name}: {e}.")
         click.get_current_context().exit()
     end_load = timer()
@@ -260,13 +260,13 @@ def update_normalizer_db(
 
         if len(sources_split) == 0:
             msg = "Must enter 1 or more source names to update"
-            raise Exception(msg)
+            raise ValueError(msg)
 
         non_sources = set(sources_split) - set(SOURCES)
 
         if len(non_sources) != 0:
             msg = f"Not valid source(s): {non_sources}"
-            raise Exception(msg)
+            raise ValueError(msg)
 
         parsed_source_names = {SourceName(SOURCES[s]) for s in sources_split}
         if (

diff --git a/src/therapy/database/dynamodb.py b/src/therapy/database/dynamodb.py
@@ -110,8 +110,8 @@ def drop_db(self) -> None:
         try:
             if not self._check_delete_okay():
                 return
-        except DatabaseWriteError as e:
-            raise e
+        except DatabaseWriteError:  # noqa: TRY203
+            raise
 
         if self.therapy_table in self.list_tables():
             self.dynamodb.Table(self.therapy_table).delete()
@@ -239,30 +239,32 @@ def get_record_by_id(
             otherwise.
         :return: complete therapy record, if match is found; None otherwise
         """
+        if merge:
+            pk = f"{concept_id.lower()}##{RecordType.MERGER.value}"
+        else:
+            pk = f"{concept_id.lower()}##{RecordType.IDENTITY.value}"
         try:
-            if merge:
-                pk = f"{concept_id.lower()}##{RecordType.MERGER.value}"
-            else:
-                pk = f"{concept_id.lower()}##{RecordType.IDENTITY.value}"
             if case_sensitive:
                 match = self.therapies.get_item(
                     Key={"label_and_type": pk, "concept_id": concept_id}
                 )
                 return match["Item"]
-            exp = Key("label_and_type").eq(pk)
-            response = self.therapies.query(KeyConditionExpression=exp)
-            record = response["Items"][0]
-            del record["label_and_type"]
-            return record
+            response = self.therapies.query(
+                KeyConditionExpression=Key("label_and_type").eq(pk)
+            )
         except ClientError as e:
-            _logger.error(
+            _logger.exception(
                 "boto3 client error on get_records_by_id for search term %s: %s",
                 concept_id,
                 e.response["Error"]["Message"],
             )
             return None
         except (KeyError, IndexError):  # record doesn't exist
             return None
+        else:
+            record = response["Items"][0]
+            del record["label_and_type"]
+            return record
 
     def get_refs_by_type(self, search_term: str, ref_type: RefType) -> list[str]:
         """Retrieve concept IDs for records matching the user's query. Other methods
@@ -278,7 +280,7 @@ def get_refs_by_type(self, search_term: str, ref_type: RefType) -> list[str]:
             matches = self.therapies.query(KeyConditionExpression=filter_exp)
             return [m["concept_id"] for m in matches.get("Items", None)]
         except ClientError as e:
-            _logger.error(
+            _logger.exception(
                 "boto3 client error on get_refs_by_type for search term %s: %s",
                 search_term,
                 e.response["Error"]["Message"],
@@ -296,8 +298,8 @@ def get_rxnorm_id_by_brand(self, brand_id: str) -> str | None:
         try:
             matches = self.therapies.query(KeyConditionExpression=filter_exp)
         except ClientError as e:
-            _logger.error(
-                "boto3 client error on rx_brand fetch for brand ID {brand_id}: {e.response['Error']['Message']}",
+            _logger.exception(
+                "boto3 client error on rx_brand fetch for brand ID %s: %s",
                 brand_id,
                 e.response["Error"]["Message"],
             )
@@ -428,7 +430,7 @@ def add_rxnorm_brand(self, brand_id: str, record_id: str) -> None:
         try:
             self.batch.put_item(Item=item)
         except ClientError as e:
-            _logger.error(
+            _logger.exception(
                 "boto3 client error on add_rxnorm_brand for %s -> %s: %s",
                 brand_id,
                 record_id,
@@ -449,7 +451,7 @@ def add_record(self, record: dict, src_name: SourceName) -> None:
         try:
             self.batch.put_item(Item=record)
         except ClientError as e:
-            _logger.error(
+            _logger.exception(
                 "boto3 client error on add_record for %s: %s",
                 concept_id,
                 e.response["Error"]["Message"],
@@ -482,7 +484,7 @@ def add_merged_record(self, record: dict) -> None:
         try:
             self.batch.put_item(Item=record)
         except ClientError as e:
-            _logger.error(
+            _logger.exception(
                 "boto3 client error on add_record for %s: %s",
                 concept_id,
                 e.response["Error"]["Message"],
@@ -509,7 +511,7 @@ def _add_ref_record(
         try:
             self.batch.put_item(Item=record)
         except ClientError as e:
-            _logger.error(
+            _logger.exception(
                 "boto3 client error adding reference %s for %s with match type %s: %s",
                 term,
                 concept_id,
@@ -541,7 +543,7 @@ def update_merge_ref(self, concept_id: str, merge_ref: str) -> None:
             if code == "ConditionalCheckFailedException":
                 msg = f"No such record exists for keys {label_and_type}, {concept_id}"
                 raise DatabaseWriteError(msg) from e
-            _logger.error(
+            _logger.exception(
                 "boto3 client error in `database.update_record()`: %s",
                 e.response["Error"]["Message"],
             )

diff --git a/src/therapy/etl/__init__.py b/src/therapy/etl/__init__.py
@@ -13,11 +13,11 @@
 from .wikidata import Wikidata
 
 __all__ = [
-    "EtlError",
     "ChEMBL",
     "ChemIDplus",
     "DrugBank",
     "DrugsAtFDA",
+    "EtlError",
     "GuideToPHARMACOLOGY",
     "HemOnc",
     "Merge",

diff --git a/src/therapy/etl/base.py b/src/therapy/etl/base.py
@@ -244,9 +244,9 @@ def _load_therapy(self, therapy: dict) -> None:
         """
         try:
             Therapy(**therapy)
-        except ValidationError as e:
-            _logger.error("Attempted to load invalid therapy: %s", therapy)
-            raise e
+        except ValidationError:
+            _logger.exception("Attempted to load invalid therapy: %s", therapy)
+            raise
 
         therapy = self._rules.apply_rules_to_therapy(therapy)
         therapy = self._process_searchable_attributes(therapy)

diff --git a/src/therapy/etl/hemonc.py b/src/therapy/etl/hemonc.py
@@ -147,7 +147,7 @@ def _get_rels(self, therapies: dict, brand_names: dict, conditions: dict) -> dic
                     try:
                         year = self._id_to_yr(row[1])
                     except TypeError:
-                        _logger.error(
+                        _logger.exception(
                             "Failed parse of FDA approval year ID %s for HemOnc ID %s",
                             row[1],
                             row[0],
@@ -168,7 +168,7 @@ def _get_rels(self, therapies: dict, brand_names: dict, conditions: dict) -> dic
                         label = conditions[row[1]]
                     except KeyError:
                         # concept is deprecated or otherwise unavailable
-                        _logger.error(
+                        _logger.exception(
                             "Unable to process relation with indication %s -- deprecated?",
                             row[0],
                         )

diff --git a/src/therapy/etl/merge.py b/src/therapy/etl/merge.py
@@ -75,15 +75,17 @@ def create_merged_concepts(self, record_ids: set[str]) -> None:
                 merge_ref = merged_record["concept_id"]
                 try:
                     self.database.update_merge_ref(concept_id, merge_ref)
-                except DatabaseWriteError as dw:
-                    if str(dw).startswith("No such record exists"):
-                        logger.error(
+                except DatabaseWriteError as e:
+                    if str(e).startswith("No such record exists"):
+                        logger.exception(
                             "Updating nonexistent record: %s for merge ref to %s",
                             concept_id,
                             merge_ref,
                         )
                     else:
-                        logger.error(str(dw))
+                        logger.exception(
+                            "Unrecognized database write error encountered"
+                        )
             uploaded_ids |= group
         self.database.complete_write_transaction()
         logger.info("Merged concept generation successful.")

diff --git a/src/therapy/etl/rules.py b/src/therapy/etl/rules.py
@@ -23,6 +23,7 @@ class Rules:
 
     def __init__(self, source_name: SourceName) -> None:
         """Initialize rules class.
+
         :param source_name: name of source to use, for filtering unneeded rules
         """
         rules_path = APP_ROOT / "etl" / "rules.csv"
@@ -40,6 +41,7 @@ def __init__(self, source_name: SourceName) -> None:
     def apply_rules_to_therapy(self, therapy: dict) -> dict:
         """Apply all rules to therapy. First find relevant rules, then call the
         apply method.
+
         :param therapy: therapy object from ETL base
         :return: processed therapy object
         """
@@ -52,14 +54,16 @@ def _apply_rule_to_field(
         self, therapy: dict, field: str, value: str | list | dict | int | float
     ) -> dict:
         """Given a (field, value) rule, apply it to the given therapy object.
+
         :param therapy: therapy object ready to load to DB
         :param field: name of object property field to check
         :param value: value to remove from field, if possible
         :return: therapy object with rule applied
+        :raise NotImplementedError: if unsupported field attempted
         """
         if field not in {"aliases", "trade_names", "xrefs", "associated_with"}:
             msg = "Non-scalar fields currently not implemented"
-            raise Exception(msg)
+            raise NotImplementedError(msg)
         field_data = set(therapy.get(field, []))
         if value in field_data:
             field_data.remove(value)