Skip to content

Commit 58d4fc7

Browse files
authored
Merge pull request #3637 from zachlindsey/improve_csv_reader_quote_handling_and_separators
FIX: Parse commas in CSV fields
2 parents f277d18 + f746c34 commit 58d4fc7

File tree

3 files changed

+51
-8
lines changed

3 files changed

+51
-8
lines changed

nipype/interfaces/utility/csv.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# vi: set ft=python sts=4 ts=4 sw=4 et:
33
"""CSV Handling utilities
44
"""
5+
import csv
56
from ..base import traits, TraitedSpec, DynamicTraitedSpec, File, BaseInterface
67
from ..io import add_traits
78

@@ -13,6 +14,7 @@ class CSVReaderInputSpec(DynamicTraitedSpec, TraitedSpec):
1314
header = traits.Bool(
1415
False, usedefault=True, desc="True if the first line is a column header"
1516
)
17+
delimiter = traits.String(",", usedefault=True, desc="Delimiter to use.")
1618

1719

1820
class CSVReader(BaseInterface):
@@ -52,14 +54,11 @@ def _append_entry(self, outputs, entry):
5254
outputs[key].append(value)
5355
return outputs
5456

55-
def _parse_line(self, line):
56-
line = line.replace("\n", "")
57-
entry = [x.strip() for x in line.split(",")]
58-
return entry
59-
6057
def _get_outfields(self):
6158
with open(self.inputs.in_file) as fid:
62-
entry = self._parse_line(fid.readline())
59+
reader = csv.reader(fid, delimiter=self.inputs.delimiter)
60+
61+
entry = next(reader)
6362
if self.inputs.header:
6463
self._outfields = tuple(entry)
6564
else:
@@ -82,10 +81,10 @@ def _list_outputs(self):
8281
for key in self._outfields:
8382
outputs[key] = [] # initialize outfields
8483
with open(self.inputs.in_file) as fid:
85-
for line in fid.readlines():
84+
reader = csv.reader(fid, delimiter=self.inputs.delimiter)
85+
for entry in reader:
8686
if self.inputs.header and isHeader: # skip header line
8787
isHeader = False
8888
continue
89-
entry = self._parse_line(line)
9089
outputs = self._append_entry(outputs, entry)
9190
return outputs

nipype/interfaces/utility/tests/test_auto_CSVReader.py

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
def test_CSVReader_inputs():
66
input_map = dict(
7+
delimiter=dict(
8+
usedefault=True,
9+
),
710
header=dict(
811
usedefault=True,
912
),

nipype/interfaces/utility/tests/test_csv.py

+41
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,44 @@ def test_csvReader(tmpdir):
2626
assert out.outputs.column_0 == ["foo", "bar", "baz"]
2727
assert out.outputs.column_1 == ["hello", "world", "goodbye"]
2828
assert out.outputs.column_2 == ["300.1", "5", "0.3"]
29+
30+
31+
def test_csvReader_quoted(tmpdir):
32+
header = "files,labels,erosion\n"
33+
lines = ['foo,"hello, world",300.1\n']
34+
35+
name = tmpdir.join("testfile.csv").strpath
36+
with open(name, "w") as fid:
37+
reader = utility.CSVReader()
38+
fid.writelines(lines)
39+
fid.flush()
40+
reader.inputs.in_file = name
41+
out = reader.run()
42+
43+
assert out.outputs.column_0 == ["foo"]
44+
assert out.outputs.column_1 == ["hello, world"]
45+
assert out.outputs.column_2 == ["300.1"]
46+
47+
48+
def test_csvReader_tabs(tmpdir):
49+
header = "files\tlabels\terosion\n"
50+
lines = ["foo\thello\t300.1\n", "bar\tworld\t5\n", "baz\tgoodbye\t0.3\n"]
51+
for x in range(2):
52+
name = tmpdir.join("testfile.csv").strpath
53+
with open(name, "w") as fid:
54+
reader = utility.CSVReader(delimiter="\t")
55+
if x % 2 == 0:
56+
fid.write(header)
57+
reader.inputs.header = True
58+
fid.writelines(lines)
59+
fid.flush()
60+
reader.inputs.in_file = name
61+
out = reader.run()
62+
if x % 2 == 0:
63+
assert out.outputs.files == ["foo", "bar", "baz"]
64+
assert out.outputs.labels == ["hello", "world", "goodbye"]
65+
assert out.outputs.erosion == ["300.1", "5", "0.3"]
66+
else:
67+
assert out.outputs.column_0 == ["foo", "bar", "baz"]
68+
assert out.outputs.column_1 == ["hello", "world", "goodbye"]
69+
assert out.outputs.column_2 == ["300.1", "5", "0.3"]

0 commit comments

Comments
 (0)