-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultipleImageNoiseCreator.py
152 lines (121 loc) · 7.77 KB
/
multipleImageNoiseCreator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import glob, os, cv2
import numpy as np
import matplotlib.pyplot as plt
import random
from skimage.io import imread,imsave
######################################################################################
# #
# The following code expects you to use the #
# histopathologic-cancer-detection TRAINING datset from kaggle #
# Source https://www.kaggle.com/c/histopathologic-cancer-detection/data #
# #
######################################################################################
######################################################################################
# #
# BEFORE YOU BEGIN #
# #
######################################################################################
######################################################################################
# #
# IMPORTANT PRE PROCESSING STEP #
# #
######################################################################################
######################################################################################
# #
# RENAMING FILES #
# #
######################################################################################
# Please make sure that the file names are named in asceding order #
# For example 1.tif, 2.tif, 3.tif, 4.tif, 5.tif, 6.tif, and so on #
# If you do not have the files named this way, please use my #
# renaming_"files_ascending_order" snippet on the 'Utils' folder #
######################################################################################
######################################################################################
# #
# !!!WARNING!! #
# #
######################################################################################
# The following code will create a training dataset with #
# image pairs (clean,noise), it will be stored in your #
# selected "outputfolder" below. #
# If you are going to use the TRAINING kaggle dataset #
# It will create a total of 440050 images #
# 220025 clean images and 220025 noise images #
# Leaving your initial training folder untouched #
######################################################################################
# #
# REMINDER #
# #
######################################################################################
# The following code expects that the image files are named #
# in asceding order #
# For example 1.tif, 2.tif, 3.tif, 4.tif, 5.tif, 6.tif, and so on #
# If you do not have the files named this way, please use my #
# renaming_"files_ascending_order" snippet on the 'Utils' folder #
######################################################################################
#Please state the folder path for the TRAINING dataset from kaggle
#Default name: << does not have default folder name >> USER MUST SPECIFY IT.
inputfolder = ""
#The shape of the input image, leave it as it is
#Because the shape of the images from the kaggle dataset are fixed at (96,96,3)
shape = (96,96,3)
#List in which we are going to save the noise maps created
maps_of_noise = []
#List of possible standard deviation corruption, goes from 0 to 50.
noise_standard_deviation_list = np.arange(51)
#For each standard deviation, we are going to create a noise map
#and store it in our list of noise maps variable "maps_of_noise"
for noise_standard_deviation in noise_standard_deviation_list:
#For fair comparison and reproducibility we use seed(0)
np.random.seed(0)
#Generating the noise map with:
#loc = 0 ----> mean = 0
#scale = noise_standard_deviation/255 ----> standard deviation and divided by 255 to normalize the map
#size = shape ----> dimensions of the image, which are (96,96,3)
maps_of_noise.append(np.random.normal(loc=0, scale=noise_standard_deviation/255, size=shape))
#Inside the for loop below we will be increasing the standard deviation
#In order to create maps of 0 to 50 standard deviation
#Once the standard deviation hits 50 we reset it to 0
#This way we will be creating 4400 images of each standard deviation
#In other words:
#4400 for standar deviation 1
#4400 for standar deviation 2
#4400 for standar deviation 3
#...
#...
#...
#4400 for standar deviation 49
#4400 for standar deviation 50
#4400 for standar deviation 1
#4400 for standar deviation 2
#4400 for standar deviation 3
#And repeat until we reach 220025 images
standard_deviation_selector = 0
#Outputfolder for the training dataset that will be created
#Remember we need image pairs (clean,noise) to train the network
#Defaults to: "user_self_created_dataset/" folder (This is a folder name, dont get confused)
outputfolder = "user_self_created_dataset/"
#The TRAINING dataset from kaggle has a total of 220025 images
#This is where the number in the for loop is from
#Feel free to modify it in case you are using less images
for i in range(1,220025):
if standard_deviation_selector == 51:
standard_deviation_selector = 0
img = imread(inputfolder + "{}.tif".format(i))
img = img / 255.
#Clipping the image to better approximate real world noise
#According to: J.-S. Lee, “Refined filtering of image noise using local statistics,”Computer graphics and image processing, vol. 15, no. 4, pp. 380–389, 1981.
noise = np.clip((img + maps_of_noise[standard_deviation_selector]),0,1)
noise = noise.astype(np.float32)
noise = noise * 255
img = img * 255
imsave(outputfolder + "{}_noise.tif".format(i),noise.astype(np.uint8))
imsave(outputfolder + "{}_clean.tif".format(i),img.astype(np.uint8))
print("Current standard_deviation_selector: ",standard_deviation_selector)
standard_deviation_selector += 1
######################################################################################
# #
# Your freshly created dataset has been created #
# and ready to be used for training #
# #
######################################################################################