Skip to content

Commit 2062c5a

Browse files
Merge pull request #19 from moka-guys/development
Development (#19) Co-Authored-By: rebeccahaines1 <84131466+rebeccahaines1@users.noreply.github.com> Co-Authored-By: Graeme <graeme_c_smith@hotmail.com>
2 parents d83c995 + 381ddcb commit 2062c5a

10 files changed

+123
-61
lines changed

README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Runs a series of checks on the sample sheet, collects any errors identified. Che
1919
* Pan numbers are in the list of allowed pan numbers supplied to the script
2020
* Samplesheet contains any TSO samples
2121

22-
If samplesheet contains an input dev_panno, the package will skip samplesheet chcks for the samplesheet.
22+
If samplesheet contains an input dev_pannos, the package will skip samplesheet checks for the samplesheet.
2323

2424
## Usage
2525

@@ -40,7 +40,7 @@ sscheck_obj = SamplesheetCheck(
4040
sequencer_ids, # list
4141
panels, # list
4242
tso_panels, # list
43-
dev_panno, # str
43+
dev_pannos, # list
4444
logdir, # str
4545
)
4646
sscheck_obj.ss_checks() # Carry out samplesheeet validation
@@ -73,8 +73,8 @@ options:
7373
Comma separated string of allowed panel numbers
7474
-T TSO_PANELS, --tso_panels TSO_PANELS
7575
Comma separated string of tso panels
76-
-D DEV_PANNO, --dev_panno DEV_PANNO
77-
Development pan number
76+
-D DEV_PANNOS, --dev_pannos DEV_PANNOS
77+
Development pan numbers
7878
-L LOGDIR, --logdir LOGDIR
7979
Directory to save the output logfile to
8080
-NSH NO_STREAM_HANDLER, --no_stream_handler NO_STRAM_HANDLER

pytest.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
[pytest]
2-
addopts = -v --ignore=test/data/ --ignore=test/temp/ --cov=. --cov-report term-missing --sequencer_ids=NB551068,NB552085,M02353,M02631,A01229 --tso_panels=Pan4969,Pan5085,Pan5112,Pan5114 --dev_panno=Pan5180 --panels=Pan5180,Pan4009,Pan2835,Pan4940,Pan4396,Pan5113,Pan5115,Pan4969,Pan5085,Pan5112,Pan5114,Pan5007,Pan5008,Pan5009,Pan5010,Pan5011,Pan5012,Pan5013,Pan5014,Pan5015,Pan5016,Pan4119,Pan4121,Pan4122,Pan4125,Pan4126,Pan4974,Pan4975,Pan4976,Pan4977,Pan4978,Pan4979,Pan4980,Pan4981,Pan4982,Pan4983,Pan4984,Pan4821,Pan4822,Pan4823,Pan4824,Pan4825,Pan4149,Pan4150,Pan4129,Pan4964,Pan4130,Pan5121,Pan5185,Pan5186,Pan5143,Pan5147,Pan4816,Pan4817,Pan5122,Pan5144,Pan5148,Pan4819,Pan4820,Pan4145,Pan4146,Pan4132,Pan4134,Pan4136,Pan4137,Pan4138,Pan4143,Pan4144,Pan4151,Pan4314,Pan4351,Pan4387,Pan4390,Pan4826,Pan4827,Pan4828,Pan4829,Pan4830,Pan4831,Pan4832,Pan4833,Pan4834,Pan4835,Pan4836 --logdir=.
2+
addopts = -v --ignore=test/data/ --ignore=test/temp/ --cov=. --cov-report term-missing --sequencer_ids=NB551068,NB552085,M02353,M02631,A01229 --tso_panels=Pan5085,Pan5112,Pan5114 --dev_pannos=Pan5180,Pan5227 --panels=Pan5180,Pan4009,Pan2835,Pan4940,Pan4396,Pan5113,Pan5115,Pan5226,Pan5085,Pan5112,Pan5114,Pan5007,Pan5008,Pan5009,Pan5010,Pan5011,Pan5012,Pan5013,Pan5014,Pan5015,Pan5016,Pan4119,Pan4121,Pan4122,Pan4125,Pan4126,Pan4974,Pan4975,Pan4976,Pan4977,Pan4978,Pan4979,Pan4980,Pan4981,Pan4982,Pan4983,Pan4984,Pan4821,Pan4822,Pan4823,Pan4824,Pan4825,Pan4149,Pan4150,Pan4129,Pan4964,Pan4130,Pan5121,Pan5185,Pan5186,Pan5143,Pan5147,Pan4816,Pan4817,Pan5122,Pan5144,Pan5148,Pan4819,Pan4820,Pan4145,Pan4146,Pan4132,Pan4134,Pan4136,Pan4137,Pan4138,Pan4143,Pan4144,Pan4151,Pan4314,Pan4351,Pan4387,Pan4390,Pan4826,Pan4827,Pan4828,Pan4829,Pan4830,Pan4831,Pan4832,Pan4833,Pan4834,Pan4835,Pan4836 --logdir=.

samplesheet_validator/__main__.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ def get_arguments():
5050
)
5151
parser.add_argument(
5252
"-D",
53-
"--dev_panno",
53+
"--dev_pannos",
5454
type=str,
5555
required=True,
56-
help="Development pan number",
56+
help="Comma separated string of development pan numbers",
5757
)
5858
parser.add_argument(
5959
"-L",
@@ -65,7 +65,7 @@ def get_arguments():
6565
parser.add_argument(
6666
"-NSH",
6767
"--no_stream_handler",
68-
action='store_true',
68+
action="store_true",
6969
required=False,
7070
help=(
7171
"Provide flag when we don't want a stream handler (prevents duplication of log messages "
@@ -101,7 +101,6 @@ def is_valid_dir(parser: argparse.ArgumentParser, dir: str) -> str:
101101
return dir
102102

103103

104-
105104
if __name__ == "__main__":
106105
parsed_args = get_arguments()
107106
logger = set_root_logger(parsed_args.no_stream_handler)
@@ -110,8 +109,7 @@ def is_valid_dir(parser: argparse.ArgumentParser, dir: str) -> str:
110109
parsed_args.sequencer_ids,
111110
parsed_args.panels,
112111
parsed_args.tso_panels,
113-
parsed_args.dev_panno,
112+
parsed_args.dev_pannos,
114113
parsed_args.logdir,
115114
)
116115
sscheck_obj.ss_checks() # Carry out samplesheeet validation
117-

samplesheet_validator/git_tag.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@ def git_tag() -> str:
1111
cmd = f"git -C {filepath} describe --tags"
1212

1313
proc = subprocess.Popen(
14-
[cmd], stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True,
14+
[cmd],
15+
stderr=subprocess.PIPE,
16+
stdout=subprocess.PIPE,
17+
shell=True,
1518
)
1619
out, _ = proc.communicate()
1720
# Return standard out, removing any new line characters
18-
return out.rstrip().decode("utf-8")
21+
return out.rstrip().decode("utf-8")

samplesheet_validator/samplesheet_validator.py

+30-15
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import re
1515
import logging
1616
from typing import Union
17-
from . import config
1817
from .ss_logger import SSLogger
1918
from seglh_naming.sample import Sample
2019
from seglh_naming.samplesheet import Samplesheet
@@ -45,7 +44,7 @@ class SamplesheetCheck:
4544
development_panels (list): Development pan numbers
4645
runfolder_name (str): Name of runfolder
4746
logfile_path (str): Path to use for logfile
48-
logger (logging.Logger): Logger
47+
logger (logging.Logger): Logger
4948
5049
Methods:
5150
get_logger()
@@ -91,7 +90,7 @@ def __init__(
9190
sequencer_ids: list,
9291
panels: list,
9392
tso_panels: list,
94-
dev_panno: str,
93+
dev_pannos: list,
9594
logdir: str,
9695
):
9796
"""
@@ -100,7 +99,7 @@ def __init__(
10099
:param sequencer_ids (list): Allowed sequencer IDs
101100
:param panels (list): Allowed pan numbers
102101
:param tso_panels (list): TSO500 pan numbers
103-
:param dev_panno (str): Development pan number
102+
:param dev_pannos (list): Development pan numbers
104103
:param logdir (str): Log file directory
105104
"""
106105
self.samplesheet_path = samplesheet_path
@@ -118,20 +117,22 @@ def __init__(
118117
self.sequencer_ids = sequencer_ids
119118
self.panels = panels
120119
self.tso_panels = tso_panels
121-
self.dev_panno = dev_panno
122-
self.runfolder_name = (self.samplesheet_path.split("/")[-1]).split("_SampleSheet.csv")[0]
123-
self.logfile_path = f"{os.path.join(logdir, self.runfolder_name)}_samplesheet_validator.log"
120+
self.dev_pannos = dev_pannos
121+
self.runfolder_name = (self.samplesheet_path.split("/")[-1]).split(
122+
"_SampleSheet.csv"
123+
)[0]
124+
self.logfile_path = (
125+
f"{os.path.join(logdir, self.runfolder_name)}_samplesheet_validator.log"
126+
)
124127
self.logger = self.get_logger()
125128

126-
127129
def get_logger(self) -> logging.Logger:
128130
"""
129131
Get logger for the class
130132
:return (object): Logger
131133
"""
132134
return SSLogger(self.logfile_path, self.runfolder_name).get_logger(__name__)
133135

134-
135136
def ss_checks(self) -> None:
136137
"""
137138
Run checks at samplesheet and sample level. Performs required extra checks for
@@ -222,7 +223,11 @@ def development_run(self) -> Union[bool, None]:
222223
:return True | None: True if contains dev pan numbers, None if does not
223224
"""
224225
strings_to_check = self.samples["Sample_ID"] + self.samples["Sample_Name"]
225-
if any(self.dev_panno in sample_name for sample_name in strings_to_check):
226+
227+
if any(
228+
any(dev_panno in sample_string for sample_string in strings_to_check)
229+
for dev_panno in self.dev_pannos
230+
):
226231
self.logger.info(
227232
self.logger.log_msgs["dev_run"],
228233
self.samplesheet_path,
@@ -286,7 +291,9 @@ def get_data_section(self) -> None:
286291
self.extract_headers(line, line_index)
287292
break
288293
elif len(line.split(",")[0]) < 2:
289-
self.logger.info(self.logger.log_msgs["found_empty_line"], line_index)
294+
self.logger.info(
295+
self.logger.log_msgs["found_empty_line"], line_index
296+
)
290297
pass # Skip empty lines
291298
else: # Contains sample
292299
self.extract_sample_name_id(line, line_index)
@@ -307,7 +314,8 @@ def extract_headers(self, line: str, line_index: int) -> None:
307314
)
308315
self.add_msg_to_error_dict(
309316
"Error extracting headers",
310-
self.logger.log_msgs["error_extracting_headers"] % (line_index, exception),
317+
self.logger.log_msgs["error_extracting_headers"]
318+
% (line_index, exception),
311319
)
312320

313321
def extract_sample_name_id(self, line: str, line_index: int) -> None:
@@ -324,11 +332,16 @@ def extract_sample_name_id(self, line: str, line_index: int) -> None:
324332
except Exception as exception:
325333
self.errors = True
326334
self.logger.warning(
327-
self.logger.log_msgs["col_extraction_error"], col_name, line_index, line, exception,
335+
self.logger.log_msgs["col_extraction_error"],
336+
col_name,
337+
line_index,
338+
line,
339+
exception,
328340
)
329341
self.add_msg_to_error_dict(
330342
"Error extracting sample name and ID",
331-
self.logger.log_msgs["col_extraction_error"] % (col_name, line_index, line, exception)
343+
self.logger.log_msgs["col_extraction_error"]
344+
% (col_name, line_index, line, exception),
332345
)
333346

334347
def check_expected_headers(self) -> None:
@@ -340,7 +353,9 @@ def check_expected_headers(self) -> None:
340353
if not all(
341354
header in self.data_headers for header in self.expected_data_headers
342355
):
343-
self.missing_headers = list(set(self.expected_data_headers).difference(self.data_headers))
356+
self.missing_headers = list(
357+
set(self.expected_data_headers).difference(self.data_headers)
358+
)
344359
self.errors = True
345360
self.add_msg_to_error_dict(
346361
"Missing headers",

samplesheet_validator/ss_logger.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
Class used to create Samplesheet Validator logfiles
44
"""
5+
56
import sys
67
from . import config
78
import logging
@@ -30,7 +31,6 @@ def set_root_logger(no_stream_handler: bool):
3031
return logger
3132

3233

33-
3434
class SSLogger:
3535
"""
3636
Creates a python logging object with a file handler and syslog handler

settings.json

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"python.testing.pytestArgs": [
3+
"."
4+
],
5+
"python.testing.unittestEnabled": false,
6+
"python.testing.pytestEnabled": true,
7+
"python.envFile": "${workspaceFolder}/.venv",
8+
"python.analysis.extraPaths": [
9+
],
10+
"editor.formatOnSaveMode": "file",
11+
"editor.formatOnSave": true,
12+
"editor.codeActionsOnSave": {
13+
"source.organizeImports": "explicit"
14+
},
15+
"[python]": {
16+
"editor.defaultFormatter": "ms-python.black-formatter",
17+
"editor.formatOnSave": true,
18+
"editor.codeActionsOnSave": {
19+
"source.organizeImports": "explicit"
20+
}
21+
},
22+
"isort.args": [
23+
"--profile",
24+
"black"
25+
],
26+
"flake8.args": [
27+
"--max-line-length=120"
28+
],
29+
"pylint.args": [
30+
"--max-line-length=120"
31+
],
32+
"black-formatter.args": [
33+
"--line-length",
34+
"120"
35+
],
36+
"python.analysis.typeCheckingMode": "basic"
37+
}

test/conftest.py

+6-31
Original file line numberDiff line numberDiff line change
@@ -13,36 +13,11 @@
1313

1414

1515
def pytest_addoption(parser):
16-
parser.addoption(
17-
"--sequencer_ids",
18-
action="store",
19-
type=str,
20-
required=True,
21-
)
22-
parser.addoption(
23-
"--panels",
24-
action="store",
25-
type=str,
26-
required=True,
27-
)
28-
parser.addoption(
29-
"--tso_panels",
30-
action="store",
31-
type=str,
32-
required=True,
33-
)
34-
parser.addoption(
35-
"--dev_panno",
36-
action="store",
37-
type=str,
38-
required=True
39-
)
40-
parser.addoption(
41-
"--logdir",
42-
action="store",
43-
type=str,
44-
required=True,
45-
)
16+
parser.addoption("--sequencer_ids", action="store", type=str, required=True)
17+
parser.addoption("--panels", action="store", type=str, required=True)
18+
parser.addoption("--tso_panels", action="store", type=str, required=True)
19+
parser.addoption("--dev_pannos", action="store", type=str, required=True)
20+
parser.addoption("--logdir", action="store", type=str, required=True)
4621

4722

4823
def pytest_configure(config):
@@ -52,7 +27,7 @@ def pytest_configure(config):
5227
os.environ["sequencer_ids"] = config.getoption("sequencer_ids")
5328
os.environ["panels"] = config.getoption("panels")
5429
os.environ["tso_panels"] = config.getoption("tso_panels")
55-
os.environ["dev_panno"] = config.getoption("dev_panno")
30+
os.environ["dev_pannos"] = config.getoption("dev_pannos")
5631
data_dir = os.path.abspath("test/data/")
5732
os.environ["samplesheet_dir"] = f'{os.path.join(data_dir, "samplesheets")}'
5833
# Temporary directories to copy test files into and to contain outputs
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[Header],,,,,,,,,
2+
IEMFileVersion,4,,,,,,,,
3+
Investigator Name,DEV01,,,,,,,,
4+
Experiment Name,DEV01,,,,,,,,
5+
Date,12/10/2023,,,,,,,,
6+
Workflow,GenerateFASTQ,,,,,,,,
7+
Application,FASTQ Only,,,,,,,,
8+
Assay,Nextera XT,,,,,,,,
9+
Description,DEV01,,,,,,,,
10+
Chemistry,Amplicon,,,,,,,,
11+
,,,,,,,,,
12+
[Reads],,,,,,,,,
13+
251,,,,,,,,,
14+
251,,,,,,,,,
15+
,,,,,,,,,
16+
[Settings],,,,,,,,,
17+
ReverseComplement,0,,,,,,,,
18+
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA,,,,,,,,
19+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT,,,,,,,,
20+
,,,,,,,,,
21+
[Data],,,,,,,,,
22+
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
23+
DEV01_01_000000_0000000_Pan5227,DEV01_01_000000_0000000_Pan5227,,,IDT8_UDI_1_2,CTGATCGT,IDT8_UDI_1_1,ATATGCGC,,
24+
DEV01_02_000000_0000000_Pan5227,DEV01_02_000000_0000000_Pan5227,,,IDT8_UDI_2_2,ACTCTCGA,IDT8_UDI_2_1,TGGTACAG,,
25+
DEV01_03_000000_NTC0000_Pan5227,DEV01_03_000000_NTC0000_Pan5227,,,IDT8_UDI_3_2,TGAGCTAG,IDT8_UDI_3_1,AACCGTTC,,
26+
DEV01_04_000000_0000000_Pan5227,DEV01_04_000000_0000000_Pan5227,,,IDT8_UDI_4_2,GAGACGAT,IDT8_UDI_4_1,TAACCGGT,,
27+
DEV01_05_000000_0000000_Pan5227,DEV01_05_000000_0000000_Pan5227,,,IDT8_UDI_5_2,CTTGTCGA,IDT8_UDI_5_1,GAACATCG,,

test/test_samplesheet_validator.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from samplesheet_validator.__main__ import is_valid_dir, is_valid_file
1111

1212

13+
# TODO add second dev pan number in
14+
1315
def shutdown_logs(logger: object) -> None:
1416
"""
1517
To prevent duplicate filehandlers and system handlers close
@@ -33,7 +35,7 @@ def get_sscheck_obj(samplesheet: str) -> object:
3335
os.getenv("sequencer_ids").split(","),
3436
os.getenv("panels").split(","),
3537
os.getenv("tso_panels").split(","),
36-
os.getenv("dev_panno"),
38+
os.getenv("dev_pannos").split(","),
3739
os.getenv("temp_dir"),
3840
)
3941
sscheck_obj.ss_checks()
@@ -66,6 +68,11 @@ def valid_dev_samplesheet():
6668
os.getenv("samplesheet_dir"),
6769
"valid",
6870
"231012_M02631_0285_000000000-LBGMH_SampleSheet.csv",
71+
),
72+
os.path.join(
73+
os.getenv("samplesheet_dir"),
74+
"valid",
75+
"231012_M02631_0285_000000000-ERTFB_SampleSheet.csv",
6976
)
7077
]
7178

0 commit comments

Comments
 (0)