Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically creates a mask from a color range of the source image #740

Merged
merged 1 commit into from
Aug 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ export DOCKER_BUILDKIT=1

.PHONY: jsonschema
jsonschema:
ci/schema-copy
jsonschema2md scan_to_paperless/config_schema.json config.md
jsonschema2md scan_to_paperless/process_schema.json process.md
c2cciutils-checks --fix --check=prettier
Expand Down
18 changes: 18 additions & 0 deletions ci/schema-copy
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env python3

import json


def _main() -> None:
with open("scan_to_paperless/config_schema.json", encoding="utf-8") as config_schema_file:
config_schema = json.load(config_schema_file)
with open("scan_to_paperless/process_schema.json", encoding="utf-8") as process_schema_file:
process_schema = json.load(process_schema_file)

process_schema["definitions"]["args"]["properties"] = config_schema["definitions"]["args"]["properties"]
with open("scan_to_paperless/process_schema.json", "w", encoding="utf-8") as process_schema_file:
json.dump(process_schema, process_schema_file, indent=2)


if __name__ == "__main__":
_main()
9 changes: 9 additions & 0 deletions config.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,12 @@
- **`run_ps2pdf`** _(boolean)_: Run the ps2pdf optimizer (=> JPEG). Default: `False`.
- **`jpeg`** _(boolean)_: Convert images to JPEG. Default: `False`.
- **`jpeg_quality`** _(integer)_: The JPEG quality. Default: `90`.
- **`auto_mask`** _(object)_: The auto mask configuration.
- **`lower_hsv_color`** _(array)_: The lower color in HSV representation. Default: `[0, 0, 108]`.
- **Items** _(integer)_
- **`upper_hsv_color`** _(array)_: The upper color in HSV representation. Default: `[255, 10, 148]`.
- **Items** _(integer)_
- **`de_noise_size`** _(integer)_: The size of the artifact that will be de noise. Default: `20`.
- **`de_noise_level`** _(integer)_: The threshold level used in de noise on the blurry image. Default: `220`.
- **`buffer_size`** _(integer)_: The size of the buffer add on the mask. Default: `100`.
- **`buffer_level`** _(integer)_: The threshold level used in buffer on the blurry image. Default: `20`.
9 changes: 9 additions & 0 deletions process.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,12 @@
- **`run_ps2pdf`** _(boolean)_: Run the ps2pdf optimizer (=> JPEG). Default: `False`.
- **`jpeg`** _(boolean)_: Convert images to JPEG. Default: `False`.
- **`jpeg_quality`** _(integer)_: The JPEG quality. Default: `90`.
- **`auto_mask`** _(object)_: The auto mask configuration.
- **`lower_hsv_color`** _(array)_: The lower color in HSV representation. Default: `[0, 0, 108]`.
- **Items** _(integer)_
- **`upper_hsv_color`** _(array)_: The upper color in HSV representation. Default: `[255, 10, 148]`.
- **Items** _(integer)_
- **`de_noise_size`** _(integer)_: The size of the artifact that will be de noise. Default: `20`.
- **`de_noise_level`** _(integer)_: The threshold level used in de noise on the blurry image. Default: `220`.
- **`buffer_size`** _(integer)_: The size of the buffer add on the mask. Default: `100`.
- **`buffer_level`** _(integer)_: The threshold level used in buffer on the blurry image. Default: `20`.
42 changes: 42 additions & 0 deletions scan_to_paperless/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@
#
# default: 90
"jpeg_quality": int,
# WARNING: The required are not correctly taken in account,
# See: https://github.com/camptocamp/jsonschema-gentypes/issues/6
"auto_mask": "_ArgumentsAutoMask",
},
total=False,
)
Expand Down Expand Up @@ -192,6 +195,45 @@
)


# The auto mask configuration
_ArgumentsAutoMask = TypedDict(
"_ArgumentsAutoMask",
{
# The lower color in HSV representation
#
# default:
# - 0
# - 0
# - 108
"lower_hsv_color": List[int],
# The upper color in HSV representation
#
# default:
# - 255
# - 10
# - 148
"upper_hsv_color": List[int],
# The size of the artifact that will be de noise
#
# default: 20
"de_noise_size": int,
# The threshold level used in de noise on the blurry image
#
# default: 220
"de_noise_level": int,
# The size of the buffer add on the mask
#
# default: 100
"buffer_size": int,
# The threshold level used in buffer on the blurry image
#
# default: 20
"buffer_level": int,
},
total=False,
)


_ConfigurationModesAdditionalproperties = TypedDict(
"_ConfigurationModesAdditionalproperties",
{
Expand Down
42 changes: 42 additions & 0 deletions scan_to_paperless/config_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,48 @@
"type": "integer",
"default": 90,
"description": "The JPEG quality"
},
"auto_mask": {
"type": "object",
"description": "The auto mask configuration",
"properties": {
"lower_hsv_color": {
"type": "array",
"description": "The lower color in HSV representation",
"default": [0, 0, 108],
"items": {
"type": "integer"
}
},
"upper_hsv_color": {
"type": "array",
"description": "The upper color in HSV representation",
"default": [255, 10, 148],
"items": {
"type": "integer"
}
},
"de_noise_size": {
"type": "integer",
"description": "The size of the artifact that will be de noise",
"default": 20
},
"de_noise_level": {
"type": "integer",
"description": "The threshold level used in de noise on the blurry image",
"default": 220
},
"buffer_size": {
"type": "integer",
"description": "The size of the buffer add on the mask",
"default": 100
},
"buffer_level": {
"type": "integer",
"description": "The threshold level used in buffer on the blurry image",
"default": 20
}
}
}
}
}
Expand Down
71 changes: 57 additions & 14 deletions scan_to_paperless/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,57 @@ def __init__( # pylint: disable=too-many-arguments

def init_mask(self) -> None:
"""Init the mask."""
if self.image is None:
raise Exception("The image is None")
if self.mask is None:
raise Exception("The mask is None")
self.mask_ready = cv2.resize(
cv2.cvtColor(self.mask, cv2.COLOR_BGR2GRAY), (self.image.shape[1], self.image.shape[0])
)
if "auto_mask" in self.config["args"]:
auto_mask_config = self.config["args"]["auto_mask"]
hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)

lower_val = np.array(auto_mask_config.get("lower_hsv_color", [0, 0, 108]))
upper_val = np.array(auto_mask_config.get("upper_hsv_color", [255, 10, 148]))
mask = cv2.inRange(hsv, lower_val, upper_val)
de_noise_size = auto_mask_config.get("de_noise_size", 20)
mask = cv2.copyMakeBorder(
mask,
de_noise_size,
de_noise_size,
de_noise_size,
de_noise_size,
cv2.BORDER_CONSTANT,
value=255,
)

blur = cv2.blur(
mask,
(de_noise_size, de_noise_size),
)
_, thresh1 = cv2.threshold(
blur, auto_mask_config.get("de_noise_level", 220), 255, cv2.THRESH_BINARY
)

blur = cv2.blur(
thresh1,
(
auto_mask_config.get("buffer_size", 100),
auto_mask_config.get("buffer_size", 100),
),
)
_, mask = cv2.threshold(blur, auto_mask_config.get("buffer_level", 20), 255, cv2.THRESH_BINARY)
self.mask = 255 - mask[de_noise_size:-de_noise_size, de_noise_size:-de_noise_size]
if os.environ.get("PROGRESS", "FALSE") == "TRUE" and self.root_folder:
cv2.imwrite(os.path.join(self.root_folder, "mask.png"), self.mask)
elif self.root_folder:
mask_file = os.path.join(self.root_folder, "mask.png")
if not os.path.exists(mask_file):
base_folder = os.path.dirname(self.root_folder)
assert base_folder
mask_file = os.path.join(base_folder, "mask.png")
if not os.path.exists(mask_file):
return

self.mask = cv2.imread(mask_file)

if self.image is not None and self.mask is not None:
maskbw = self.mask if len(self.mask.shape) == 2 else cv2.cvtColor(self.mask, cv2.COLOR_BGR2GRAY)
self.mask_ready = cv2.resize(maskbw, (self.image.shape[1], self.image.shape[0]))

def get_process_count(self) -> int:
"""Get the step number."""
Expand Down Expand Up @@ -230,9 +274,11 @@ def image_diff(image1: NpNdarrayInt, image2: NpNdarrayInt) -> Tuple[float, NpNda
height = max(image1.shape[0], image2.shape[0])
image1 = cv2.resize(image1, (width, height))
image2 = cv2.resize(image2, (width, height))
score, diff = structural_similarity(
cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY), cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY), full=True
)

image1 = image1 if len(image1.shape) == 2 else cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
image2 = image2 if len(image2.shape) == 2 else cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)

score, diff = structural_similarity(image1, image2, full=True)
diff = (255 - diff * 255).astype("uint8")
return score, diff

Expand Down Expand Up @@ -812,10 +858,7 @@ def transform(
image_status = image_config.setdefault("status", {})
assert context.image is not None
image_status["size"] = list(context.image.shape[:2][::-1])
mask_file = os.path.join(os.path.dirname(root_folder), "mask.png")
if os.path.exists(mask_file):
context.mask = cv2.imread(mask_file)
context.init_mask()
context.init_mask()
level(context)
deskew(context)
docrop(context)
Expand Down
56 changes: 52 additions & 4 deletions scan_to_paperless/process_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"additionalProperties": false,
"definitions": {
"args": {
"Editor note": "The properties of this object should be modified in the config_schema.json file",
"type": "object",
"title": "Arguments",
"properties": {
Expand Down Expand Up @@ -176,7 +177,9 @@
"pngquant_options": {
"type": "array",
"description": "The pngquant options",
"items": { "type": "string" }
"items": {
"type": "string"
}
},
"run_exiftool": {
"type": "boolean",
Expand All @@ -197,14 +200,58 @@
"type": "integer",
"default": 90,
"description": "The JPEG quality"
},
"auto_mask": {
"type": "object",
"description": "The auto mask configuration",
"properties": {
"lower_hsv_color": {
"type": "array",
"description": "The lower color in HSV representation",
"default": [0, 0, 108],
"items": {
"type": "integer"
}
},
"upper_hsv_color": {
"type": "array",
"description": "The upper color in HSV representation",
"default": [255, 10, 148],
"items": {
"type": "integer"
}
},
"de_noise_size": {
"type": "integer",
"description": "The size of the artifact that will be de noise",
"default": 20
},
"de_noise_level": {
"type": "integer",
"description": "The threshold level used in de noise on the blurry image",
"default": 220
},
"buffer_size": {
"type": "integer",
"description": "The size of the buffer add on the mask",
"default": 100
},
"buffer_level": {
"type": "integer",
"description": "The threshold level used in buffer on the blurry image",
"default": 20
}
}
}
}
}
},
"properties": {
"images": {
"type": "array",
"items": { "type": "string" },
"items": {
"type": "string"
},
"description": "The images"
},
"args": {
Expand Down Expand Up @@ -235,7 +282,9 @@
"sources": {
"type": "array",
"description": "The images obtain after the current step",
"items": { "type": "string" }
"items": {
"type": "string"
}
},
"process_count": {
"type": "integer",
Expand All @@ -248,7 +297,6 @@
"type": "array",
"items": {
"title": "Assisted split",

"type": "object",
"additionalProperties": false,
"description": "Assisted split configuration",
Expand Down
Loading