-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdraw-operons.py
193 lines (148 loc) · 4.63 KB
/
draw-operons.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import argparse
import io
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Tuple
from BCBio import GFF
from dna_features_viewer import BiopythonTranslator
from matplotlib import pyplot as plt
@dataclass
class InputArgs:
annotation: Path
#
output: Path
label: str
no_colors: bool
size: Tuple[int, int]
def parse_gff(gff_file: io.FileIO):
return GFF.parse(gff_file)
def qual(feature, qualifier_key, default=None):
return feature.qualifiers.get(qualifier_key, [default])[0]
def draw_region(
record,
*,
start: int,
end: int,
title: str,
args: InputArgs,
):
"""
Function produces schematic plot of DNA region specified with coordinates;
visualizes only gene feature types and labels them
"""
labels = [
"label",
"Name",
"name",
"product",
"source",
"locus_tag",
"note",
]
if args.label:
labels = [args.label] + labels
translator = BiopythonTranslator()
translator.ignored_features_types = ["region", "remark"]
translator.label_fields = labels
graphic_record = translator.translate_record(record)
operon = graphic_record.crop((start, end))
operon.plot(elevate_outline_annotations=False)
ax = plt.gca()
ax.set_title(title)
if args.size:
fig = plt.gcf()
fig.set_size_inches(*args.size)
plt.savefig(args.output / f"{title}.png")
def group_by_operons(features):
groups: Dict[str, Any] = defaultdict(list)
for feature in features:
if operon := qual(feature, "operon"):
groups[operon].append(feature)
return groups
def filter_by_antigens(features):
for feature in features:
if qual(feature, "antigen") == "True":
return qual(feature, "antigen_type")
return None
def antigen_operons(operons):
for operon, features in operons.items():
if type := filter_by_antigens(features):
yield operon, features, type
def boundaries(features):
start = None
end = None
for feature in features:
loc = feature.location
if start is None and end is None:
start = loc.start
end = loc.end
start = min(start, loc.start)
end = max(end, loc.end)
return start, end
def all_antigen_operons(records):
for record in records:
operon_groups = group_by_operons(record.features)
for operon, features, type in antigen_operons(operon_groups):
yield record, operon, boundaries(features), type
def colorize(records):
for record in records:
for feature in record.features:
if qual(feature, "antigen") == "True":
feature.qualifiers["color"] = "#dc6678"
yield record
def main(args: InputArgs):
with args.annotation.open() as annotation:
ann = parse_gff(annotation)
if not args.no_colors:
ann = colorize(ann)
for record, operon, (start, end), type in all_antigen_operons(ann):
draw_region(
record,
start=start,
end=end,
title=f"{type}-antigen, Contig: {record.id}, Operon: {operon}",
args=args,
)
def parse_args(args: list[str] = None):
arp = argparse.ArgumentParser(
"draw-operons",
description="Draws antigenes from gff generated by merge-operons.py",
)
g = arp.add_argument_group("Gff input")
g.add_argument(
"annotation",
type=Path,
help="gff annotation from the output of the merge-operons.py",
)
g = arp.add_argument_group("Output options")
g.add_argument("-o", "--output", type=Path, help="output dir path")
g.add_argument(
"-l",
"--label",
default="gene",
help="which label for the gene to use (from gff qualifiers)",
)
g.add_argument(
"--no-colors",
action="store_true",
help="do not highlight antigen genes",
)
g.add_argument(
"--size",
nargs=2,
type=int,
default=[],
metavar=("WIDTH", "HEIGHT"),
help="size of the figures in inches (example `... --size 10 5`)",
)
###########################################################
parsed = InputArgs(**arp.parse_args(args=args).__dict__)
if parsed.output is None:
prefix = "pictures-"
out_name = prefix + parsed.annotation.name
parsed.output = parsed.annotation.parent / out_name
parsed.output.mkdir(exist_ok=True, parents=True)
return parsed
if __name__ == "__main__":
main(parse_args())