Skip to content

Commit f24d247

Browse files
committed
Update pdftotext Docstring
1 parent cd44595 commit f24d247

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

src/invoice2data/input/pdftotext.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,17 @@ def to_text(path: str, area_details: Optional[Dict[str, Any]] = None) -> str:
1111
1212
Args:
1313
path (str): Path to the PDF file.
14-
area_details (Optional[Dict[str, Any]], optional):
14+
area_details (Optional[Dict[str, Any]], optional):
1515
Specific area in the PDF to extract text from.
1616
Defaults to None (extract from the entire page).
17+
If provided, should be a dictionary with the following keys:
18+
- "f": First page to extract from
19+
- "l": Last page to extract from
20+
- "x": x-coordinate of the top-left corner of the area to extract (in pixels)
21+
- "y": y-coordinate of the top-left corner of the area to extract (in pixels)
22+
- "W": Width of the area to extract (in pixels)
23+
- "H": Height of the area to extract (in pixels)
24+
- "r": Specifies the resolution, in DPI.
1725
1826
Returns:
1927
str: The extracted text.
@@ -32,8 +40,8 @@ def to_text(path: str, area_details: Optional[Dict[str, Any]] = None) -> str:
3240
if area_details is not None:
3341
# An area was specified
3442
# Validate the required keys were provided
35-
assert "f" in area_details, "Area r details missing"
36-
assert "l" in area_details, "Area r details missing"
43+
assert "f" in area_details, "Area f details missing"
44+
assert "l" in area_details, "Area l details missing"
3745
assert "r" in area_details, "Area r details missing"
3846
assert "x" in area_details, "Area x details missing"
3947
assert "y" in area_details, "Area y details missing"

0 commit comments

Comments
 (0)