Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

style: update ruff #454

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repos:
- id: mixed-line-ending
args: [ --fix=lf ]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.0 # ruff version
rev: v0.8.6 # ruff version
hooks:
- id: ruff-format
- id: ruff
Expand Down
28 changes: 22 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,13 @@ etl = [
"pyyaml"
]
tests = ["pytest", "pytest-cov", "pytest-mock", "isodate"]
dev = ["pre-commit>=3.7.1", "ruff==0.5.0", "lxml", "xmlformatter", "types-pyyaml"]
dev = [
"pre-commit>=3.7.1",
"ruff==0.8.6",
"lxml",
"xmlformatter",
"types-pyyaml",
]

[project.urls]
Homepage = "https://github.com/cancervariants/therapy-normalization"
Expand Down Expand Up @@ -80,6 +86,7 @@ branch = true

[tool.ruff]
src = ["src"]
exclude = ["analysis", "scripts"]

[tool.ruff.lint]
select = [
Expand Down Expand Up @@ -108,10 +115,15 @@ select = [
"RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse
"RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret
"SLF", # https://docs.astral.sh/ruff/rules/#flake8-self-slf
"SLOT", # https://docs.astral.sh/ruff/rules/#flake8-slots-slot
"SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
"ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg
"PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
"PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh
"FAST", # https://docs.astral.sh/ruff/rules/#fastapi-fast
"PLC", # https://docs.astral.sh/ruff/rules/#convention-c
"PLE", # https://docs.astral.sh/ruff/rules/#error-e_1
"TRY", # https://docs.astral.sh/ruff/rules/#tryceratops-try
"PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf
"FURB", # https://docs.astral.sh/ruff/rules/#refurb-furb
"RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
Expand All @@ -130,13 +142,15 @@ fixable = [
"PT",
"RSE",
"SIM",
"FAST",
"PLC",
"PLE",
"TRY",
"PERF",
"FURB",
"RUF"
]
# ANN003 - missing-type-kwargs
# ANN101 - missing-type-self
# ANN102 - missing-type-cls
# D203 - one-blank-line-before-class
# D205 - blank-line-after-summary
# D206 - indent-with-spaces*
Expand All @@ -151,20 +165,21 @@ fixable = [
# PGH003 - blanket-type-ignore
# W191 - tab-indentation*
# S321 - suspicious-ftp-lib-usage
# PLC0206 - dict-index-missing-items
# *ignored for compatibility with formatter
ignore = [
"ANN003", "ANN101", "ANN102",
"ANN003",
"D203", "D205", "D206", "D213", "D300", "D400", "D415",
"E111", "E114", "E117", "E501",
"PGH003",
"W191",
"S321",
"PLC0206",
]

[tool.ruff.lint.per-file-ignores]
# ANN001 - missing-type-function-argument
# ANN2 - missing-return-type
# ANN102 - missing-type-cls
# D100 - undocumented-public-module
# D102 - undocumented-public-class
# S101 - assert
Expand All @@ -174,10 +189,10 @@ ignore = [
# INP001 - implicit-namespace-package
# SLF001 - private-member-access
# PERF401 - manual-list-comprehension
# S608 - hardcoded-sql-expression
"tests/*" = [
"ANN001",
"ANN2",
"ANN102",
"D100",
"D102",
"S101",
Expand All @@ -187,6 +202,7 @@ ignore = [
"PERF401"
]
"tests/unit/test_emit_warnings.py" = ["RUF001"]
"tests/scripts/build_chembl_data.py" = ["S608"]
"src/therapy/schemas.py" = ["ANN001", "ANN201", "N805"]

[tool.ruff.lint.flake8-annotations]
Expand Down
8 changes: 4 additions & 4 deletions src/therapy/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ def _load_source(
f"Encountered ModuleNotFoundError attempting to import {e.name}. Are ETL dependencies installed?"
)
click.get_current_context().exit()
SourceClass = eval(name.value) # noqa: N806 PGH001 S307
SourceClass = eval(name.value) # noqa: N806, S307

source = SourceClass(database=db, silent=False)
try:
processed_ids += source.perform_etl(use_existing)
except EtlError as e:
_logger.error(e)
_logger.exception("Encountered ETL error while loading source %s", name)
click.echo(f"Encountered error while loading {name}: {e}.")
click.get_current_context().exit()
end_load = timer()
Expand Down Expand Up @@ -260,13 +260,13 @@ def update_normalizer_db(

if len(sources_split) == 0:
msg = "Must enter 1 or more source names to update"
raise Exception(msg)
raise ValueError(msg)

non_sources = set(sources_split) - set(SOURCES)

if len(non_sources) != 0:
msg = f"Not valid source(s): {non_sources}"
raise Exception(msg)
raise ValueError(msg)

parsed_source_names = {SourceName(SOURCES[s]) for s in sources_split}
if (
Expand Down
42 changes: 22 additions & 20 deletions src/therapy/database/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def drop_db(self) -> None:
try:
if not self._check_delete_okay():
return
except DatabaseWriteError as e:
raise e
except DatabaseWriteError: # noqa: TRY203
raise

if self.therapy_table in self.list_tables():
self.dynamodb.Table(self.therapy_table).delete()
Expand Down Expand Up @@ -239,30 +239,32 @@ def get_record_by_id(
otherwise.
:return: complete therapy record, if match is found; None otherwise
"""
if merge:
pk = f"{concept_id.lower()}##{RecordType.MERGER.value}"
else:
pk = f"{concept_id.lower()}##{RecordType.IDENTITY.value}"
try:
if merge:
pk = f"{concept_id.lower()}##{RecordType.MERGER.value}"
else:
pk = f"{concept_id.lower()}##{RecordType.IDENTITY.value}"
if case_sensitive:
match = self.therapies.get_item(
Key={"label_and_type": pk, "concept_id": concept_id}
)
return match["Item"]
exp = Key("label_and_type").eq(pk)
response = self.therapies.query(KeyConditionExpression=exp)
record = response["Items"][0]
del record["label_and_type"]
return record
response = self.therapies.query(
KeyConditionExpression=Key("label_and_type").eq(pk)
)
except ClientError as e:
_logger.error(
_logger.exception(
"boto3 client error on get_records_by_id for search term %s: %s",
concept_id,
e.response["Error"]["Message"],
)
return None
except (KeyError, IndexError): # record doesn't exist
return None
else:
record = response["Items"][0]
del record["label_and_type"]
return record

def get_refs_by_type(self, search_term: str, ref_type: RefType) -> list[str]:
"""Retrieve concept IDs for records matching the user's query. Other methods
Expand All @@ -278,7 +280,7 @@ def get_refs_by_type(self, search_term: str, ref_type: RefType) -> list[str]:
matches = self.therapies.query(KeyConditionExpression=filter_exp)
return [m["concept_id"] for m in matches.get("Items", None)]
except ClientError as e:
_logger.error(
_logger.exception(
"boto3 client error on get_refs_by_type for search term %s: %s",
search_term,
e.response["Error"]["Message"],
Expand All @@ -296,8 +298,8 @@ def get_rxnorm_id_by_brand(self, brand_id: str) -> str | None:
try:
matches = self.therapies.query(KeyConditionExpression=filter_exp)
except ClientError as e:
_logger.error(
"boto3 client error on rx_brand fetch for brand ID {brand_id}: {e.response['Error']['Message']}",
_logger.exception(
"boto3 client error on rx_brand fetch for brand ID %s: %s",
brand_id,
e.response["Error"]["Message"],
)
Expand Down Expand Up @@ -428,7 +430,7 @@ def add_rxnorm_brand(self, brand_id: str, record_id: str) -> None:
try:
self.batch.put_item(Item=item)
except ClientError as e:
_logger.error(
_logger.exception(
"boto3 client error on add_rxnorm_brand for %s -> %s: %s",
brand_id,
record_id,
Expand All @@ -449,7 +451,7 @@ def add_record(self, record: dict, src_name: SourceName) -> None:
try:
self.batch.put_item(Item=record)
except ClientError as e:
_logger.error(
_logger.exception(
"boto3 client error on add_record for %s: %s",
concept_id,
e.response["Error"]["Message"],
Expand Down Expand Up @@ -482,7 +484,7 @@ def add_merged_record(self, record: dict) -> None:
try:
self.batch.put_item(Item=record)
except ClientError as e:
_logger.error(
_logger.exception(
"boto3 client error on add_record for %s: %s",
concept_id,
e.response["Error"]["Message"],
Expand All @@ -509,7 +511,7 @@ def _add_ref_record(
try:
self.batch.put_item(Item=record)
except ClientError as e:
_logger.error(
_logger.exception(
"boto3 client error adding reference %s for %s with match type %s: %s",
term,
concept_id,
Expand Down Expand Up @@ -541,7 +543,7 @@ def update_merge_ref(self, concept_id: str, merge_ref: str) -> None:
if code == "ConditionalCheckFailedException":
msg = f"No such record exists for keys {label_and_type}, {concept_id}"
raise DatabaseWriteError(msg) from e
_logger.error(
_logger.exception(
"boto3 client error in `database.update_record()`: %s",
e.response["Error"]["Message"],
)
Expand Down
2 changes: 1 addition & 1 deletion src/therapy/etl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from .wikidata import Wikidata

__all__ = [
"EtlError",
"ChEMBL",
"ChemIDplus",
"DrugBank",
"DrugsAtFDA",
"EtlError",
"GuideToPHARMACOLOGY",
"HemOnc",
"Merge",
Expand Down
6 changes: 3 additions & 3 deletions src/therapy/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,9 @@ def _load_therapy(self, therapy: dict) -> None:
"""
try:
Therapy(**therapy)
except ValidationError as e:
_logger.error("Attempted to load invalid therapy: %s", therapy)
raise e
except ValidationError:
_logger.exception("Attempted to load invalid therapy: %s", therapy)
raise

therapy = self._rules.apply_rules_to_therapy(therapy)
therapy = self._process_searchable_attributes(therapy)
Expand Down
4 changes: 2 additions & 2 deletions src/therapy/etl/hemonc.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def _get_rels(self, therapies: dict, brand_names: dict, conditions: dict) -> dic
try:
year = self._id_to_yr(row[1])
except TypeError:
_logger.error(
_logger.exception(
"Failed parse of FDA approval year ID %s for HemOnc ID %s",
row[1],
row[0],
Expand All @@ -168,7 +168,7 @@ def _get_rels(self, therapies: dict, brand_names: dict, conditions: dict) -> dic
label = conditions[row[1]]
except KeyError:
# concept is deprecated or otherwise unavailable
_logger.error(
_logger.exception(
"Unable to process relation with indication %s -- deprecated?",
row[0],
)
Expand Down
10 changes: 6 additions & 4 deletions src/therapy/etl/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,17 @@ def create_merged_concepts(self, record_ids: set[str]) -> None:
merge_ref = merged_record["concept_id"]
try:
self.database.update_merge_ref(concept_id, merge_ref)
except DatabaseWriteError as dw:
if str(dw).startswith("No such record exists"):
logger.error(
except DatabaseWriteError as e:
if str(e).startswith("No such record exists"):
logger.exception(
"Updating nonexistent record: %s for merge ref to %s",
concept_id,
merge_ref,
)
else:
logger.error(str(dw))
logger.exception(
"Unrecognized database write error encountered"
)
uploaded_ids |= group
self.database.complete_write_transaction()
logger.info("Merged concept generation successful.")
Expand Down
6 changes: 5 additions & 1 deletion src/therapy/etl/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Rules:

def __init__(self, source_name: SourceName) -> None:
"""Initialize rules class.

:param source_name: name of source to use, for filtering unneeded rules
"""
rules_path = APP_ROOT / "etl" / "rules.csv"
Expand All @@ -40,6 +41,7 @@ def __init__(self, source_name: SourceName) -> None:
def apply_rules_to_therapy(self, therapy: dict) -> dict:
"""Apply all rules to therapy. First find relevant rules, then call the
apply method.

:param therapy: therapy object from ETL base
:return: processed therapy object
"""
Expand All @@ -52,14 +54,16 @@ def _apply_rule_to_field(
self, therapy: dict, field: str, value: str | list | dict | int | float
) -> dict:
"""Given a (field, value) rule, apply it to the given therapy object.

:param therapy: therapy object ready to load to DB
:param field: name of object property field to check
:param value: value to remove from field, if possible
:return: therapy object with rule applied
:raise NotImplementedError: if unsupported field attempted
"""
if field not in {"aliases", "trade_names", "xrefs", "associated_with"}:
msg = "Non-scalar fields currently not implemented"
raise Exception(msg)
raise NotImplementedError(msg)
field_data = set(therapy.get(field, []))
if value in field_data:
field_data.remove(value)
Expand Down
Loading
Loading