Skip to content

Commit

Permalink
feat: allow lineterminator specification in MetricWriter (#220)
Browse files Browse the repository at this point in the history
I stumbled upon the fact that `csv.DictWriter` uses windows new line
endings by default. This PR lets you change that when using
`MetricWriter`.

I'm arguing for the default to be `"\n"` instead of `os.linesep`
because:
- [Python has universal new line
support](https://peps.python.org/pep-0278/) (so it doesn't care what we
do)
- If you're working on a shared analysis and checking in files to
version control, you don't want to worry about if you're running on the
same OS as your collaborator
- In my experience, more bioinformatics tools have problems with windows
line endings than with unix line endings
  • Loading branch information
znorgaard authored Feb 14, 2025
1 parent 857ebbd commit 03bb237
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
4 changes: 4 additions & 0 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ def __init__(
delimiter: str = "\t",
include_fields: Optional[List[str]] = None,
exclude_fields: Optional[List[str]] = None,
lineterminator: str = "\n",
) -> None:
"""
Args:
Expand All @@ -464,6 +465,8 @@ def __init__(
exclude_fields: If specified, any listed fieldnames will be excluded when writing
records to file.
May not be used together with `include_fields`.
lineterminator: The string used to terminate lines produced by the MetricWriter.
Default = "\n".
Raises:
TypeError: If the provided metric class is not a dataclass- or attr-decorated
Expand Down Expand Up @@ -506,6 +509,7 @@ def __init__(
f=self._fout,
fieldnames=self._fieldnames,
delimiter=delimiter,
lineterminator=lineterminator,
)

# If we aren't appending to an existing file, write the header before any rows
Expand Down
17 changes: 17 additions & 0 deletions tests/fgpyo/util/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,23 @@ def test_writer_from_str(tmp_path: Path) -> None:
next(f)


@pytest.mark.parametrize("lineterminator", ["\n", "\r", "\r\n"])
def test_writer_lineterminator(tmp_path: Path, lineterminator: str) -> None:
fpath = tmp_path / "test.txt"

with MetricWriter(
filename=fpath, append=False, metric_class=FakeMetric, lineterminator=lineterminator
) as writer:
writer.write(FakeMetric(foo="abc", bar=1))

with fpath.open("r") as f:
assert next(f) == "foo\tbar\n"
assert repr(f.newlines) == repr(lineterminator)
assert next(f) == "abc\t1\n"
with pytest.raises(StopIteration):
next(f)


def test_writer_writeall(tmp_path: Path) -> None:
fpath = tmp_path / "test.txt"

Expand Down

0 comments on commit 03bb237

Please sign in to comment.