Skip to content

Commit

Permalink
feat: only grab the mate cigar if absolutely necessary
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Dec 27, 2024
1 parent 4e64644 commit 8c4ce6f
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 41 deletions.
26 changes: 17 additions & 9 deletions fgpyo/sam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ class PairOrientation(enum.Enum):
"""A pair orientation for tandem (forward-forward or reverse-reverse) reads."""

@classmethod
def build(
def from_recs( # noqa: C901 # `from_recs` is too complex (11 > 10)
cls, rec1: AlignedSegment, rec2: Optional[AlignedSegment] = None
) -> Optional["PairOrientation"]:
"""Returns the pair orientation if both reads are mapped to the same reference sequence.
Expand All @@ -646,22 +646,30 @@ def build(
return None

if rec2 is None:
if not rec1.has_tag("MC"):
raise ValueError('Cannot determine proper pair status without R2\'s cigar ("MC")!')
rec2_cigar = Cigar.from_cigarstring(str(rec1.get_tag("MC")))
rec2_is_forward = rec1.mate_is_forward
rec2_reference_start = rec1.next_reference_start
rec2_reference_end = rec1.next_reference_start + rec2_cigar.length_on_target()
else:
rec2_is_forward = rec2.is_forward
rec2_reference_start = rec2.reference_start
rec2_reference_end = rec2.reference_end

if rec1.is_forward is rec2_is_forward:
return PairOrientation.TANDEM
elif rec1.is_forward and rec1.reference_start < rec2_reference_end:
if rec1.is_forward and rec1.reference_start <= rec2_reference_start:
return PairOrientation.FR
if rec1.is_reverse and rec2_reference_start < rec1.reference_end:
return PairOrientation.FR
elif rec1.is_reverse and rec2_reference_start < rec1.reference_end:
if rec1.is_reverse and rec2_reference_start >= rec1.reference_end:
return PairOrientation.RF

if rec2 is None:
if not rec1.has_tag("MC"):
raise ValueError('Cannot determine proper pair status without R2\'s cigar ("MC")!')
rec2_cigar = Cigar.from_cigarstring(str(rec1.get_tag("MC")))
rec2_reference_end = rec1.next_reference_start + rec2_cigar.length_on_target()
else:
rec2_reference_end = rec2.reference_end

Check warning on line 670 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L670

Added line #L670 was not covered by tests

if rec1.reference_start < rec2_reference_end:
return PairOrientation.FR

Check warning on line 673 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L673

Added line #L673 was not covered by tests
else:
return PairOrientation.RF
Expand Down Expand Up @@ -705,7 +713,7 @@ def is_proper_pair(
rec1.is_mapped
and rec2_is_mapped
and rec1.reference_id == rec2_reference_id
and PairOrientation.build(rec1=rec1, rec2=rec2) in orientations
and PairOrientation.from_recs(rec1=rec1, rec2=rec2) in orientations
# TODO: consider replacing with `abs(isize(r1, r2)) <= max_insert_size`
# which can only be done if isize() is modified to allow for an optional R2.
and 0 < abs(rec1.template_length) <= max_insert_size
Expand Down
86 changes: 54 additions & 32 deletions tests/fgpyo/sam/test_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,39 +306,60 @@ def test_pair_orientation_build_with_r2() -> None:
"""Test that we can build all pair orientations with R1 and R2."""
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
assert PairOrientation.build(r1, r2) is PairOrientation.FR
assert PairOrientation.from_recs(r1, r2) is PairOrientation.FR
assert PairOrientation.from_recs(r1) is PairOrientation.FR
assert PairOrientation.from_recs(r2) is PairOrientation.FR

r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
r1.is_forward = False
r2.is_forward = True
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1, r2) is PairOrientation.RF
assert PairOrientation.from_recs(r1, r2) is PairOrientation.RF
assert PairOrientation.from_recs(r1) is PairOrientation.RF
assert PairOrientation.from_recs(r2) is PairOrientation.RF

r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
r1.is_forward = True
r2.is_forward = True
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1, r2) is PairOrientation.TANDEM
assert PairOrientation.from_recs(r1, r2) is PairOrientation.TANDEM
assert PairOrientation.from_recs(r1) is PairOrientation.TANDEM
assert PairOrientation.from_recs(r2) is PairOrientation.TANDEM

r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
r1.is_forward = False
r2.is_forward = False
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1, r2) is PairOrientation.TANDEM
assert PairOrientation.from_recs(r1, r2) is PairOrientation.TANDEM
assert PairOrientation.from_recs(r1) is PairOrientation.TANDEM
assert PairOrientation.from_recs(r2) is PairOrientation.TANDEM


def test_pair_orientation_is_fr_if_opposite_directions_and_overlapping() -> None:
"""Test the pair orientation is always FR if the reads overlap and are oriented opposite."""
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="10M", start2=100, cigar2="10M")
assert PairOrientation.build(r1, r2) is PairOrientation.FR
assert PairOrientation.from_recs(r1, r2) is PairOrientation.FR
assert PairOrientation.from_recs(r1) is PairOrientation.FR
assert PairOrientation.from_recs(r2) is PairOrientation.FR

builder = SamBuilder()
r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="10M", start2=100, cigar2="10M")
r1.is_reverse = True
r2.is_reverse = False
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1, r2) is PairOrientation.FR
assert PairOrientation.from_recs(r1, r2) is PairOrientation.FR
assert PairOrientation.from_recs(r1) is PairOrientation.FR
assert PairOrientation.from_recs(r2) is PairOrientation.FR


def test_a_single_bp_alignment_at_end_of_rec_one_is_still_fr_orientations() -> None:
"""Test a single bp alignment at the end of a mate's alignment is still FR based on rec1."""
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom="chr1", start1=5, cigar1="5M", start2=5, cigar2="1M")
assert PairOrientation.from_recs(r1, r2) is PairOrientation.FR
assert PairOrientation.from_recs(r1) is PairOrientation.FR
assert PairOrientation.from_recs(r2) is PairOrientation.FR


def test_pair_orientation_build_with_either_unmapped() -> None:
Expand All @@ -347,75 +368,76 @@ def test_pair_orientation_build_with_either_unmapped() -> None:
r1, r2 = builder.add_pair()
assert r1.is_unmapped
assert r2.is_unmapped
assert PairOrientation.build(r1, r2) is None
assert PairOrientation.from_recs(r1, r2) is None
assert PairOrientation.from_recs(r1) is None
assert PairOrientation.from_recs(r2) is None

r1, r2 = builder.add_pair(chrom="chr1", start1=100)
assert r1.is_mapped
assert r2.is_unmapped
assert PairOrientation.build(r1, r2) is None
assert PairOrientation.from_recs(r1, r2) is None
assert PairOrientation.from_recs(r1) is None
assert PairOrientation.from_recs(r2) is None

r1, r2 = builder.add_pair(chrom="chr1", start2=100)
assert r1.is_unmapped
assert r2.is_mapped
assert PairOrientation.build(r1, r2) is None
assert PairOrientation.from_recs(r1, r2) is None
assert PairOrientation.from_recs(r1) is None
assert PairOrientation.from_recs(r2) is None


def test_pair_orientation_build_with_no_r2_but_r2_mapped() -> None:
"""Test that we can build all pair orientations with R1 and no R2, but R2 is mapped."""
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
assert PairOrientation.build(r1) is PairOrientation.FR
assert PairOrientation.from_recs(r1) is PairOrientation.FR

r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
r1.is_forward = False
r2.is_forward = True
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1) is PairOrientation.RF
assert PairOrientation.from_recs(r1) is PairOrientation.RF

r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
r1.is_forward = True
r2.is_forward = True
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1) is PairOrientation.TANDEM
assert PairOrientation.from_recs(r1) is PairOrientation.TANDEM

r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
r1.is_forward = False
r2.is_forward = False
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1) is PairOrientation.TANDEM
assert PairOrientation.from_recs(r1) is PairOrientation.TANDEM


def test_pair_orientation_build_with_either_unmapped_but_no_r2() -> None:
"""Test that we can return None with either R1 and R2 unmapped (or both), but no R2."""
def test_pair_orientation_build_raises_if_it_cant_find_mate_cigar_tag_positive_fr() -> None:
"""Test that an exception is raised if we cannot find the mate cigar tag."""
builder = SamBuilder()
r1, r2 = builder.add_pair()
assert r1.is_unmapped
assert r2.is_unmapped
r1, r2 = builder.add_pair(chrom="chr1", start1=16, cigar1="10M", start2=15, cigar2="10M")
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1) is None
r1.set_tag("MC", None) # Clear out the MC tag.
r2.set_tag("MC", None) # Clear out the MC tag.

r1, r2 = builder.add_pair(chrom="chr1", start1=100)
assert r1.is_mapped
assert r2.is_unmapped
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1) is None
with pytest.raises(ValueError):
PairOrientation.from_recs(r1)

r1, r2 = builder.add_pair(chrom="chr1", start2=100)
assert r1.is_unmapped
assert r2.is_mapped
sam.set_pair_info(r1, r2)
assert PairOrientation.build(r1) is None
assert PairOrientation.from_recs(r2) is PairOrientation.FR


def test_pair_orientation_build_raises_if_it_cant_find_mate_cigar_tag() -> None:
def test_pair_orientation_build_raises_if_it_cant_find_mate_cigar_tag_positive_rf() -> None:
"""Test that an exception is raised if we cannot find the mate cigar tag."""
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom="chr1", start1=10, start2=30)
r1, r2 = builder.add_pair(chrom="chr1", start1=16, cigar1="1M", start2=15, cigar2="1M")
sam.set_pair_info(r1, r2)
r1.set_tag("MC", None) # Clear out the MC tag.
r2.set_tag("MC", None) # Clear out the MC tag.

with pytest.raises(ValueError):
PairOrientation.build(r1)
PairOrientation.from_recs(r1)

assert PairOrientation.from_recs(r2) is PairOrientation.RF


def test_is_proper_pair_when_actually_proper() -> None:
Expand Down

0 comments on commit 8c4ce6f

Please sign in to comment.