@@ -11,9 +11,17 @@ def to_text(path: str, area_details: Optional[Dict[str, Any]] = None) -> str:
11
11
12
12
Args:
13
13
path (str): Path to the PDF file.
14
- area_details (Optional[Dict[str, Any]], optional):
14
+ area_details (Optional[Dict[str, Any]], optional):
15
15
Specific area in the PDF to extract text from.
16
16
Defaults to None (extract from the entire page).
17
+ If provided, should be a dictionary with the following keys:
18
+ - "f": First page to extract from
19
+ - "l": Last page to extract from
20
+ - "x": x-coordinate of the top-left corner of the area to extract (in pixels)
21
+ - "y": y-coordinate of the top-left corner of the area to extract (in pixels)
22
+ - "W": Width of the area to extract (in pixels)
23
+ - "H": Height of the area to extract (in pixels)
24
+ - "r": Specifies the resolution, in DPI.
17
25
18
26
Returns:
19
27
str: The extracted text.
@@ -32,8 +40,8 @@ def to_text(path: str, area_details: Optional[Dict[str, Any]] = None) -> str:
32
40
if area_details is not None :
33
41
# An area was specified
34
42
# Validate the required keys were provided
35
- assert "f" in area_details , "Area r details missing"
36
- assert "l" in area_details , "Area r details missing"
43
+ assert "f" in area_details , "Area f details missing"
44
+ assert "l" in area_details , "Area l details missing"
37
45
assert "r" in area_details , "Area r details missing"
38
46
assert "x" in area_details , "Area x details missing"
39
47
assert "y" in area_details , "Area y details missing"
0 commit comments