Skip to content

Commit

Permalink
Merge pull request #740 from sbrunner/auto-mask
Browse files Browse the repository at this point in the history
Auto mask
  • Loading branch information
sbrunner authored Aug 11, 2022
2 parents 49b8ca9 + 6d80d26 commit 7e18414
Show file tree
Hide file tree
Showing 12 changed files with 298 additions and 24 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ export DOCKER_BUILDKIT=1

.PHONY: jsonschema
jsonschema:
ci/schema-copy
jsonschema2md scan_to_paperless/config_schema.json config.md
jsonschema2md scan_to_paperless/process_schema.json process.md
c2cciutils-checks --fix --check=prettier
Expand Down
18 changes: 18 additions & 0 deletions ci/schema-copy
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env python3

import json


def _main() -> None:
with open("scan_to_paperless/config_schema.json", encoding="utf-8") as config_schema_file:
config_schema = json.load(config_schema_file)
with open("scan_to_paperless/process_schema.json", encoding="utf-8") as process_schema_file:
process_schema = json.load(process_schema_file)

process_schema["definitions"]["args"]["properties"] = config_schema["definitions"]["args"]["properties"]
with open("scan_to_paperless/process_schema.json", "w", encoding="utf-8") as process_schema_file:
json.dump(process_schema, process_schema_file, indent=2)


if __name__ == "__main__":
_main()
9 changes: 9 additions & 0 deletions config.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,12 @@
- **`run_ps2pdf`** _(boolean)_: Run the ps2pdf optimizer (=> JPEG). Default: `False`.
- **`jpeg`** _(boolean)_: Convert images to JPEG. Default: `False`.
- **`jpeg_quality`** _(integer)_: The JPEG quality. Default: `90`.
- **`auto_mask`** _(object)_: The auto mask configuration.
- **`lower_hsv_color`** _(array)_: The lower color in HSV representation. Default: `[0, 0, 108]`.
- **Items** _(integer)_
- **`upper_hsv_color`** _(array)_: The upper color in HSV representation. Default: `[255, 10, 148]`.
- **Items** _(integer)_
- **`de_noise_size`** _(integer)_: The size of the artifact that will be de noise. Default: `20`.
- **`de_noise_level`** _(integer)_: The threshold level used in de noise on the blurry image. Default: `220`.
- **`buffer_size`** _(integer)_: The size of the buffer add on the mask. Default: `100`.
- **`buffer_level`** _(integer)_: The threshold level used in buffer on the blurry image. Default: `20`.
9 changes: 9 additions & 0 deletions process.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,12 @@
- **`run_ps2pdf`** _(boolean)_: Run the ps2pdf optimizer (=> JPEG). Default: `False`.
- **`jpeg`** _(boolean)_: Convert images to JPEG. Default: `False`.
- **`jpeg_quality`** _(integer)_: The JPEG quality. Default: `90`.
- **`auto_mask`** _(object)_: The auto mask configuration.
- **`lower_hsv_color`** _(array)_: The lower color in HSV representation. Default: `[0, 0, 108]`.
- **Items** _(integer)_
- **`upper_hsv_color`** _(array)_: The upper color in HSV representation. Default: `[255, 10, 148]`.
- **Items** _(integer)_
- **`de_noise_size`** _(integer)_: The size of the artifact that will be de noise. Default: `20`.
- **`de_noise_level`** _(integer)_: The threshold level used in de noise on the blurry image. Default: `220`.
- **`buffer_size`** _(integer)_: The size of the buffer add on the mask. Default: `100`.
- **`buffer_level`** _(integer)_: The threshold level used in buffer on the blurry image. Default: `20`.
42 changes: 42 additions & 0 deletions scan_to_paperless/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@
#
# default: 90
"jpeg_quality": int,
# WARNING: The required are not correctly taken in account,
# See: https://github.com/camptocamp/jsonschema-gentypes/issues/6
"auto_mask": "_ArgumentsAutoMask",
},
total=False,
)
Expand Down Expand Up @@ -192,6 +195,45 @@
)


# The auto mask configuration
_ArgumentsAutoMask = TypedDict(
"_ArgumentsAutoMask",
{
# The lower color in HSV representation
#
# default:
# - 0
# - 0
# - 108
"lower_hsv_color": List[int],
# The upper color in HSV representation
#
# default:
# - 255
# - 10
# - 148
"upper_hsv_color": List[int],
# The size of the artifact that will be de noise
#
# default: 20
"de_noise_size": int,
# The threshold level used in de noise on the blurry image
#
# default: 220
"de_noise_level": int,
# The size of the buffer add on the mask
#
# default: 100
"buffer_size": int,
# The threshold level used in buffer on the blurry image
#
# default: 20
"buffer_level": int,
},
total=False,
)


_ConfigurationModesAdditionalproperties = TypedDict(
"_ConfigurationModesAdditionalproperties",
{
Expand Down
42 changes: 42 additions & 0 deletions scan_to_paperless/config_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,48 @@
"type": "integer",
"default": 90,
"description": "The JPEG quality"
},
"auto_mask": {
"type": "object",
"description": "The auto mask configuration",
"properties": {
"lower_hsv_color": {
"type": "array",
"description": "The lower color in HSV representation",
"default": [0, 0, 108],
"items": {
"type": "integer"
}
},
"upper_hsv_color": {
"type": "array",
"description": "The upper color in HSV representation",
"default": [255, 10, 148],
"items": {
"type": "integer"
}
},
"de_noise_size": {
"type": "integer",
"description": "The size of the artifact that will be de noise",
"default": 20
},
"de_noise_level": {
"type": "integer",
"description": "The threshold level used in de noise on the blurry image",
"default": 220
},
"buffer_size": {
"type": "integer",
"description": "The size of the buffer add on the mask",
"default": 100
},
"buffer_level": {
"type": "integer",
"description": "The threshold level used in buffer on the blurry image",
"default": 20
}
}
}
}
}
Expand Down
71 changes: 57 additions & 14 deletions scan_to_paperless/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,57 @@ def __init__( # pylint: disable=too-many-arguments

def init_mask(self) -> None:
"""Init the mask."""
if self.image is None:
raise Exception("The image is None")
if self.mask is None:
raise Exception("The mask is None")
self.mask_ready = cv2.resize(
cv2.cvtColor(self.mask, cv2.COLOR_BGR2GRAY), (self.image.shape[1], self.image.shape[0])
)
if "auto_mask" in self.config["args"]:
auto_mask_config = self.config["args"]["auto_mask"]
hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)

lower_val = np.array(auto_mask_config.get("lower_hsv_color", [0, 0, 108]))
upper_val = np.array(auto_mask_config.get("upper_hsv_color", [255, 10, 148]))
mask = cv2.inRange(hsv, lower_val, upper_val)
de_noise_size = auto_mask_config.get("de_noise_size", 20)
mask = cv2.copyMakeBorder(
mask,
de_noise_size,
de_noise_size,
de_noise_size,
de_noise_size,
cv2.BORDER_CONSTANT,
value=255,
)

blur = cv2.blur(
mask,
(de_noise_size, de_noise_size),
)
_, thresh1 = cv2.threshold(
blur, auto_mask_config.get("de_noise_level", 220), 255, cv2.THRESH_BINARY
)

blur = cv2.blur(
thresh1,
(
auto_mask_config.get("buffer_size", 100),
auto_mask_config.get("buffer_size", 100),
),
)
_, mask = cv2.threshold(blur, auto_mask_config.get("buffer_level", 20), 255, cv2.THRESH_BINARY)
self.mask = 255 - mask[de_noise_size:-de_noise_size, de_noise_size:-de_noise_size]
if os.environ.get("PROGRESS", "FALSE") == "TRUE" and self.root_folder:
cv2.imwrite(os.path.join(self.root_folder, "mask.png"), self.mask)
elif self.root_folder:
mask_file = os.path.join(self.root_folder, "mask.png")
if not os.path.exists(mask_file):
base_folder = os.path.dirname(self.root_folder)
assert base_folder
mask_file = os.path.join(base_folder, "mask.png")
if not os.path.exists(mask_file):
return

self.mask = cv2.imread(mask_file)

if self.image is not None and self.mask is not None:
maskbw = self.mask if len(self.mask.shape) == 2 else cv2.cvtColor(self.mask, cv2.COLOR_BGR2GRAY)
self.mask_ready = cv2.resize(maskbw, (self.image.shape[1], self.image.shape[0]))

def get_process_count(self) -> int:
"""Get the step number."""
Expand Down Expand Up @@ -230,9 +274,11 @@ def image_diff(image1: NpNdarrayInt, image2: NpNdarrayInt) -> Tuple[float, NpNda
height = max(image1.shape[0], image2.shape[0])
image1 = cv2.resize(image1, (width, height))
image2 = cv2.resize(image2, (width, height))
score, diff = structural_similarity(
cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY), cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY), full=True
)

image1 = image1 if len(image1.shape) == 2 else cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
image2 = image2 if len(image2.shape) == 2 else cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)

score, diff = structural_similarity(image1, image2, full=True)
diff = (255 - diff * 255).astype("uint8")
return score, diff

Expand Down Expand Up @@ -812,10 +858,7 @@ def transform(
image_status = image_config.setdefault("status", {})
assert context.image is not None
image_status["size"] = list(context.image.shape[:2][::-1])
mask_file = os.path.join(os.path.dirname(root_folder), "mask.png")
if os.path.exists(mask_file):
context.mask = cv2.imread(mask_file)
context.init_mask()
context.init_mask()
level(context)
deskew(context)
docrop(context)
Expand Down
56 changes: 52 additions & 4 deletions scan_to_paperless/process_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"additionalProperties": false,
"definitions": {
"args": {
"Editor note": "The properties of this object should be modified in the config_schema.json file",
"type": "object",
"title": "Arguments",
"properties": {
Expand Down Expand Up @@ -176,7 +177,9 @@
"pngquant_options": {
"type": "array",
"description": "The pngquant options",
"items": { "type": "string" }
"items": {
"type": "string"
}
},
"run_exiftool": {
"type": "boolean",
Expand All @@ -197,14 +200,58 @@
"type": "integer",
"default": 90,
"description": "The JPEG quality"
},
"auto_mask": {
"type": "object",
"description": "The auto mask configuration",
"properties": {
"lower_hsv_color": {
"type": "array",
"description": "The lower color in HSV representation",
"default": [0, 0, 108],
"items": {
"type": "integer"
}
},
"upper_hsv_color": {
"type": "array",
"description": "The upper color in HSV representation",
"default": [255, 10, 148],
"items": {
"type": "integer"
}
},
"de_noise_size": {
"type": "integer",
"description": "The size of the artifact that will be de noise",
"default": 20
},
"de_noise_level": {
"type": "integer",
"description": "The threshold level used in de noise on the blurry image",
"default": 220
},
"buffer_size": {
"type": "integer",
"description": "The size of the buffer add on the mask",
"default": 100
},
"buffer_level": {
"type": "integer",
"description": "The threshold level used in buffer on the blurry image",
"default": 20
}
}
}
}
}
},
"properties": {
"images": {
"type": "array",
"items": { "type": "string" },
"items": {
"type": "string"
},
"description": "The images"
},
"args": {
Expand Down Expand Up @@ -235,7 +282,9 @@
"sources": {
"type": "array",
"description": "The images obtain after the current step",
"items": { "type": "string" }
"items": {
"type": "string"
}
},
"process_count": {
"type": "integer",
Expand All @@ -248,7 +297,6 @@
"type": "array",
"items": {
"title": "Assisted split",

"type": "object",
"additionalProperties": false,
"description": "Assisted split configuration",
Expand Down
Loading

0 comments on commit 7e18414

Please sign in to comment.