Skip to content

[CI] OASIS-to-BIDS : Add option --subjects-list and its functionality for oasis-to-bids converter #1474

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 25, 2025
17 changes: 17 additions & 0 deletions clinica/converters/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"validate_input_path",
"viscode_to_session",
"load_clinical_csv",
"get_subjects_list_from_file",
]


Expand Down Expand Up @@ -571,3 +572,19 @@ def load_clinical_csv(clinical_dir: Path, filename: str) -> pd.DataFrame:
f"File {str(files_matching_pattern[0])} was found but could not "
"be loaded as a DataFrame. Please check your data."
)


def get_subjects_list_from_file(subjects_list_path: Path) -> list[str]:
"""Gets the list of subjects from a subjects list file.

Parameters
----------
subjects_list_path : Path
The path to the subjects list file.

Returns
-------
list[str] :
List of subjects.
"""
return subjects_list_path.read_text().splitlines()
8 changes: 3 additions & 5 deletions clinica/converters/ixi_to_bids/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ def _get_subjects_list_from_data(data_directory: Path) -> List[str]:
)


def _get_subjects_list_from_file(subjs_list_path: Path) -> List[str]:
return subjs_list_path.read_text().splitlines()


def define_participants(
data_directory: Path,
subjs_list_path: Optional[Path] = None,
Expand All @@ -65,11 +61,13 @@ def define_participants(

"""

from .._utils import get_subjects_list_from_file

list_from_data = _get_subjects_list_from_data(data_directory)
if subjs_list_path is None:
return sorted(list_from_data)
cprint("Loading a subjects list provided by the user...")
list_from_file = _get_subjects_list_from_file(subjs_list_path)
list_from_file = get_subjects_list_from_file(subjs_list_path)
list_filtered = [subject for subject in list_from_file if subject in list_from_data]
invalid_subjects = list(set(list_from_file) - set(list_filtered))
if invalid_subjects:
Expand Down
32 changes: 18 additions & 14 deletions clinica/converters/oasis_to_bids/_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,13 @@ def convert(
bids_dir = validate_input_path(bids_dir, check_exist=False)
path_to_clinical = validate_input_path(path_to_clinical)
if subjects:
cprint(
(
f"Subject filtering is not yet implemented in {get_converter_name(StudyName.OASIS)} converter. "
"All subjects available will be converted."
),
lvl="warning",
)
subjects = validate_input_path(subjects)

OasisToBids().convert(
path_to_dataset,
bids_dir,
path_to_clinical,
subjects=subjects,
n_procs=n_procs,
)

Expand All @@ -51,10 +47,13 @@ def convert(
source_dir: Path,
destination_dir: Path,
clinical_data_dir: Path,
subjects: Optional[Path] = None,
n_procs: Optional[int] = 1,
):
self._create_modality_agnostic_files(destination_dir)
self.convert_images(source_dir, destination_dir, n_procs=n_procs)
self.convert_images(
source_dir, destination_dir, subjects=subjects, n_procs=n_procs
)
self.convert_clinical_data(clinical_data_dir, destination_dir)

def convert_clinical_data(self, clinical_data_dir: Path, bids_dir: Path):
Expand Down Expand Up @@ -183,7 +182,11 @@ def convert_single_subject(subj_folder: Path, dest_dir: Path):
)

def convert_images(
self, source_dir: Path, dest_dir: Path, n_procs: Optional[int] = 1
self,
source_dir: Path,
dest_dir: Path,
subjects: Optional[Path] = None,
n_procs: Optional[int] = 1,
):
"""Convert T1w images to BIDS.

Expand All @@ -193,6 +196,8 @@ def convert_images(

dest_dir: path to the BIDS directory

subjects: path to list of subjects to process

n_procs : int, optional
The requested number of processes.
If specified, it should be between 1 and the number of available CPUs.
Expand All @@ -206,14 +211,13 @@ def convert_images(
from functools import partial
from multiprocessing import Pool

from ._utils import get_subjects_list

if not dest_dir.exists():
dest_dir.mkdir(parents=True)

subjects_folders = [
path
for path in source_dir.rglob("OAS1_*")
if path.is_dir() and path.name.endswith("_MR1")
]
subjects_folders = get_subjects_list(source_dir, subjects)

func = partial(self.convert_single_subject, dest_dir=dest_dir)
# If n_procs==1 do not rely on a Process Pool to enable classical debugging
if n_procs == 1:
Expand Down
58 changes: 57 additions & 1 deletion clinica/converters/oasis_to_bids/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Iterable, Union
from typing import Iterable, Optional, Union

import nibabel as nb
import numpy as np
Expand All @@ -8,6 +8,7 @@
from clinica.converters.study_models import StudyName, bids_id_factory

__all__ = [
"get_subjects_list",
"create_sessions_df",
"write_sessions_tsv",
"write_scans_tsv",
Expand All @@ -16,6 +17,61 @@
]


def _get_subjects_list_from_data(source_dir: Path) -> list[str]:
return [
folder.name
for folder in source_dir.iterdir()
if not folder.name.startswith(".")
]


def _filter_oasis_subjects(source_dir: Path, subjects_list: list[str]) -> list[Path]:
import re

rgx = re.compile(r"OAS1_\d{4}_MR1")

return list(
filter(
lambda path: path.is_dir(),
[
source_dir / subj
for subj in filter(
rgx.fullmatch,
subjects_list,
)
],
)
)


def get_subjects_list(
source_dir: Path, subjs_list_path: Optional[Path] = None
) -> list[Path]:
"""Gets the list of paths to the subjects folders.

Parameters
----------
source_dir : Path
The path to the input dataset folder.

subjs_list_path : Optional[Path]
The path to the subjects list file.

Returns
-------
list[Path] :
List of paths to the subjects folders.
"""
from .._utils import get_subjects_list_from_file

if subjs_list_path:
return _filter_oasis_subjects(
source_dir, get_subjects_list_from_file(subjs_list_path)
)

return _filter_oasis_subjects(source_dir, _get_subjects_list_from_data(source_dir))


def _convert_cdr_to_diagnosis(cdr: Union[int, str]) -> str:
if cdr == 0:
return "CN"
Expand Down
4 changes: 4 additions & 0 deletions clinica/converters/oasis_to_bids/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from typing import Optional

import click
Expand All @@ -10,12 +11,14 @@
@cli_param.dataset_directory
@cli_param.clinical_data_directory
@cli_param.bids_directory
@cli_param.subjects_list
@option.global_option_group
@option.n_procs
def cli(
dataset_directory: str,
clinical_data_directory: str,
bids_directory: str,
subjects_list: Optional[Path] = None,
n_procs: Optional[int] = None,
) -> None:
"""OASIS to BIDS converter.
Expand All @@ -30,6 +33,7 @@ def cli(
dataset_directory,
bids_directory,
clinical_data_directory,
subjects=subjects_list,
n_procs=n_procs,
)

Expand Down
12 changes: 12 additions & 0 deletions test/unittests/converters/test_converter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,3 +413,15 @@ def test_viscode_to_session_with_custom_baseline_identifiers():
assert viscode_to_session("foo", baseline_identifiers={"base", "foo"}) == "ses-M000"
with pytest.raises(ValueError, match="The viscode bl is not correctly formatted."):
viscode_to_session("bl", baseline_identifiers={"base", "foo"})


def test_get_subjects_list_from_file(tmp_path):
from clinica.converters._utils import get_subjects_list_from_file

with open(tmp_path / "subjects.txt", "w") as f:
f.write("IXI123\nIXI001")

assert get_subjects_list_from_file(tmp_path / "subjects.txt") == [
"IXI123",
"IXI001",
]
12 changes: 0 additions & 12 deletions test/unittests/converters/test_ixi_to_bids_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,6 @@ def test_get_subjects_list_from_data(tmp_path):
assert _get_subjects_list_from_data(tmp_path) == ["IXI123"]


def test_get_subjects_list_from_file(tmp_path):
from clinica.converters.ixi_to_bids._utils import _get_subjects_list_from_file

with open(tmp_path / "subjects.txt", "w") as f:
f.write("IXI123\nIXI001")

assert _get_subjects_list_from_file(tmp_path / "subjects.txt") == [
"IXI123",
"IXI001",
]


def test_define_participants_filter(tmp_path):
from clinica.converters.ixi_to_bids._utils import define_participants

Expand Down
61 changes: 58 additions & 3 deletions test/unittests/converters/test_oasis_to_bids_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def test_write_scans_tsv(tmp_path, bids_dir: Path) -> None:
assert file.empty


def test_get_first_image(tmp_path: Path) -> None:
def test_get_first_image(tmp_path) -> None:
from clinica.converters.oasis_to_bids._utils import get_first_image

folder_path = tmp_path / "folder"
Expand All @@ -256,7 +256,7 @@ def test_get_first_image(tmp_path: Path) -> None:
assert get_first_image(folder_path) == (folder_path / "file_2.img")


def test_get_first_image_not_found_error(tmp_path: Path) -> None:
def test_get_first_image_not_found_error(tmp_path) -> None:
from clinica.converters.oasis_to_bids._utils import get_first_image

folder_path = tmp_path / "folder"
Expand All @@ -270,7 +270,7 @@ def test_get_first_image_not_found_error(tmp_path: Path) -> None:
get_first_image(folder_path)


def test_get_image_with_good_orientation(tmp_path: Path) -> None:
def test_get_image_with_good_orientation(tmp_path) -> None:
from clinica.converters.oasis_to_bids._utils import get_image_with_good_orientation

folder_path = tmp_path / "folder"
Expand Down Expand Up @@ -310,6 +310,61 @@ def test_get_image_with_good_orientation(tmp_path: Path) -> None:
assert len(image.shape) == 3 # the image dimension should be 3D


def test_get_subjects_list_from_data(tmp_path) -> None:
from clinica.converters.oasis_to_bids._utils import get_subjects_list

source_dir = tmp_path / "dataset"
source_dir.mkdir()

for filename in (
"OAS1_0001_MR1",
"OAS2_0002_MR1",
"OAS1_0003_MR2",
"OAS1_004_MR1",
"foo",
):
(source_dir / filename).mkdir()

(source_dir / "OAS1_0005_MR1").touch()

assert get_subjects_list(source_dir) == [source_dir / "OAS1_0001_MR1"]


def test_get_subjects_list_from_file(tmp_path) -> None:
from clinica.converters.oasis_to_bids._utils import get_subjects_list

source_dir = tmp_path / "dataset"
source_dir.mkdir()

lines = [
"OAS1_0001_MR1\n",
"OAS2_0002_MR1\n",
"OAS1_0003_MR2\n",
"OAS1_004_MR1\n",
"foo\n",
]

for filename in lines:
(source_dir / filename[:-1]).mkdir()

(source_dir / "OAS1_0005_MR1").touch()

lines.append("OAS1_0005_MR1\n")

subjects_list_dir = tmp_path / "subjects_list_dir"
subjects_list_dir.mkdir()

subjects_list = subjects_list_dir / "subjects_list.txt"
subjects_list.touch()

with open(str(subjects_list), "a") as file:
file.writelines(lines)

assert get_subjects_list(source_dir, subjects_list) == [
source_dir / "OAS1_0001_MR1"
]


@pytest.mark.parametrize(
"cdr,diagnosis",
[
Expand Down