Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ New sample-relationships API #665

Merged
merged 15 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dataservice/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from dataservice.api.family_relationship import FamilyRelationshipListAPI
from dataservice.api.sample import SampleAPI
from dataservice.api.sample import SampleListAPI
from dataservice.api.sample_relationship import SampleRelationshipAPI
from dataservice.api.sample_relationship import SampleRelationshipListAPI
from dataservice.api.biospecimen import BiospecimenAPI
from dataservice.api.biospecimen import BiospecimenListAPI
from dataservice.api.diagnosis import DiagnosisAPI
Expand Down
6 changes: 6 additions & 0 deletions dataservice/api/sample_relationship/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from dataservice.api.sample_relationship.resources import (
SampleRelationshipAPI
)
from dataservice.api.sample_relationship.resources import (
SampleRelationshipListAPI
)
165 changes: 165 additions & 0 deletions dataservice/api/sample_relationship/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from sqlalchemy import event, or_


from dataservice.extensions import db
from dataservice.api.common.model import Base, KfId
from dataservice.api.sample.models import Sample


class SampleRelationship(db.Model, Base):
"""
Represents a relationship between two samples.

The relationship table represents a tree.

:param kf_id: Primary key given by the Kid's First DCC
:param created_at: Time of object creation
:param modified_at: Last time of object modification
:param parent_id: Kids first id of the parent Sample in the
relationship
:param child_id: Kids first id of the child Sample
in the relationship
:param external_parent_id: Name given to parent sample by contributor
:param external_child_id: Name given to child sample by contributor
Comment on lines +22 to +23
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this being populated in the relationship table? shouldn't this just be coming from sample?

Copy link
Member Author

@znatty22 znatty22 May 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a bit redundant but I think its a tradeoff with ease of implementation + complexity of the query when fetching/searching for relationships with the external sample IDs.

If we didn't have these additional columns on the relationship table, then in order to get all relationships or search for specific relationships using the external parent or child sample IDs, we'd have to join the relationships table with the sample table on both the parent_id and child_id FK columns.

We'd also have to break the pattern we follow in the schema layer of the API in order to return the parent Sample.external_id and the child Sample.external_id along with each sample relationship. We could do this but I didn't think it was worth figuring this out with the old documentation and time constraints

:param notes: Text notes from source describing the sample relationship
"""
__tablename__ = 'sample_relationship'
__prefix__ = 'SR'
__table_args__ = (db.UniqueConstraint('child_id',),)

external_id = db.Column(db.Text(),
doc='external id used by contributor')
parent_id = db.Column(
KfId(),
db.ForeignKey('sample.kf_id'),
nullable=False,
doc='kf_id of one sample in the relationship')

child_id = db.Column(
KfId(),
db.ForeignKey('sample.kf_id'),
nullable=False,
doc='kf_id of the other sample in the relationship')

external_parent_id = db.Column(db.Text())

external_child_id = db.Column(db.Text())

notes = db.Column(
db.Text(),
doc='Text notes describing the relationship'
)

parent = db.relationship(
Sample,
primaryjoin=parent_id == Sample.kf_id,
backref=db.backref('outgoing_sample_relationships',
cascade='all, delete-orphan'))

child = db.relationship(
Sample,
primaryjoin=child_id == Sample.kf_id,
backref=db.backref('incoming_sample_relationships',
cascade='all, delete-orphan'))

@classmethod
def query_all_relationships(cls, sample_kf_id=None,
model_filter_params=None):
"""
Find all sample relationships for a sample

:param sample_kf_id: Kids First ID of the sample
:param model_filter_params: Filter parameters to the query

Given a sample's kf_id, return all of the immediate/direct sample
relationships of the sample.

We cannot return a all samples in the tree bc this would require
a recursive query which Dataservice would likely need to do in a
longer-running task. The service is not setup for this
"""
# Apply model property filter params
if model_filter_params is None:
model_filter_params = {}
q = SampleRelationship.query.filter_by(**model_filter_params)

# Get sample relationships and join with sample
q = q.join(Sample, or_(SampleRelationship.parent,
SampleRelationship.child))

# Do this bc query.get() errors out if passed None
if sample_kf_id:
sa = Sample.query.get(sample_kf_id)
q = q.filter(or_(
SampleRelationship.parent_id == sample_kf_id,
SampleRelationship.child_id == sample_kf_id))

# Don't want duplicates - return unique sample relationships
q = q.group_by(SampleRelationship.kf_id)

return q

def __repr__(self):
return f"{self.parent.kf_id} parent of {self.child.kf_id}"


def validate_sample_relationship(target):
"""
Ensure that the reverse relationship does not already exist
Ensure that the parent != child

If these are not the case then raise DatabaseValidationError

:param target: the sample_relationship being validated
:type target: SampleRelationship
"""
from dataservice.api.errors import DatabaseValidationError

# Return if sample_relationship is None
if not target:
return

# Get samples in relationship by id
parent = Sample.query.get(target.parent_id)
child = Sample.query.get(target.child_id)

# Check that both are existing samples
if not (parent and child):
raise DatabaseValidationError(
SampleRelationship.__tablename__,
"modify",
"Either parent sample or child sample or both does not exist"
)

# Check for reverse relation
sr = SampleRelationship.query.filter_by(
parent_id=child.kf_id,
child_id=parent.kf_id,
).first()

if sr:
raise DatabaseValidationError(
SampleRelationship.__tablename__,
"modify",
f"Reverse relationship, Parent: {target.parent.kf_id} -> Child: "
f"{target.child.kf_id}, not allowed since the SampleRelationship, "
f"Parent: {sr.parent_id} -> Child: {sr.child_id}, already exists"
)

# Check for parent = child
if target.parent_id == target.child_id:
raise DatabaseValidationError(
SampleRelationship.__tablename__,
"modify",
f"Cannot create Sample relationship where parent sample is the"
" same as the child sample"
)


@event.listens_for(SampleRelationship, 'before_insert')
@event.listens_for(SampleRelationship, 'before_update')
def relationship_on_insert_or_update(mapper, connection, target):
"""
Run preprocessing/validation of relationship before insert or update
"""
validate_sample_relationship(target)
176 changes: 176 additions & 0 deletions dataservice/api/sample_relationship/resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
from flask import abort, request
from marshmallow import ValidationError
from webargs.flaskparser import use_args

from dataservice.extensions import db
from dataservice.api.common.pagination import paginated, Pagination
from dataservice.api.sample_relationship.models import SampleRelationship
from dataservice.api.sample_relationship.schemas import (
SampleRelationshipSchema,
SampleRelationshipFilterSchema
)
from dataservice.api.common.views import CRUDView
from dataservice.api.common.schemas import filter_schema_factory


class SampleRelationshipListAPI(CRUDView):
"""
SampleRelationship REST API
"""
endpoint = 'sample_relationships_list'
rule = '/sample-relationships'
schemas = {'SampleRelationship': SampleRelationshipSchema}

@paginated
@use_args(filter_schema_factory(SampleRelationshipFilterSchema),
locations=('query',))
def get(self, filter_params, after, limit):
"""
Get all sample_relationships
---
description: Get all sample_relationships
template:
path:
get_list.yml
properties:
resource:
SampleRelationship
"""
# Get and remove special filter parameters - those which are not
# part of model properties
# Study id
study_id = filter_params.pop('study_id', None)
# Sample id
sample_id = filter_params.pop('sample_id', None)

# Get sample relationships joined w samples
q = SampleRelationship.query_all_relationships(
sample_kf_id=sample_id,
model_filter_params=filter_params)

# Filter by study
if study_id:
from dataservice.api.participant.models import Participant
from dataservice.api.sample.models import Sample
q = (q.join(Sample.participant)
.filter(Participant.study_id == study_id))

return (SampleRelationshipSchema(many=True)
.jsonify(Pagination(q, after, limit)))

def post(self):
"""
Create a new sample_relationship
---
template:
path:
new_resource.yml
properties:
resource:
SampleRelationship
"""

body = request.get_json(force=True)

# Deserialize
try:
sa = SampleRelationshipSchema(strict=True).load(body).data
# Request body not valid
except ValidationError as e:
abort(400, 'could not create sample_relationship: {}'
.format(e.messages))

# Add to and save in database
db.session.add(sa)
db.session.commit()

return SampleRelationshipSchema(201, 'sample_relationship {} created'
.format(sa.kf_id)).jsonify(sa), 201


class SampleRelationshipAPI(CRUDView):
"""
SampleRelationship REST API
"""
endpoint = 'sample_relationships'
rule = '/sample-relationships/<string:kf_id>'
schemas = {'SampleRelationship': SampleRelationshipSchema}

def get(self, kf_id):
"""
Get a sample_relationship by id
---
template:
path:
get_by_id.yml
properties:
resource:
SampleRelationship
"""
# Get one
sa = SampleRelationship.query.get(kf_id)
if sa is None:
abort(404, 'could not find {} `{}`'
.format('sample_relationship', kf_id))
return SampleRelationshipSchema().jsonify(sa)

def patch(self, kf_id):
"""
Update an existing sample_relationship.

Allows partial update of resource
---
template:
path:
update_by_id.yml
properties:
resource:
SampleRelationship
"""
sa = SampleRelationship.query.get(kf_id)
if sa is None:
abort(404, 'could not find {} `{}`'
.format('sample_relationship', kf_id))

# Partial update - validate but allow missing required fields
body = request.get_json(force=True) or {}
try:
sa = SampleRelationshipSchema(strict=True).load(body, instance=sa,
partial=True).data
except ValidationError as err:
abort(400, 'could not update sample_relationship: {}'
.format(err.messages))

db.session.add(sa)
db.session.commit()

return SampleRelationshipSchema(
200, 'sample_relationship {} updated'.format(sa.kf_id)
).jsonify(sa), 200

def delete(self, kf_id):
"""
Delete sample_relationship by id

Deletes a sample_relationship given a Kids First id
---
template:
path:
delete_by_id.yml
properties:
resource:
SampleRelationship
"""

# Check if sample_relationship exists
sa = SampleRelationship.query.get(kf_id)
if sa is None:
abort(404, 'could not find {} `{}`'
.format('sample_relationship', kf_id))

# Save in database
db.session.delete(sa)
db.session.commit()

return SampleRelationshipSchema(200, 'sample_relationship {} deleted'
.format(sa.kf_id)).jsonify(sa), 200
Loading