-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimage_preprocessor.py
More file actions
100 lines (68 loc) · 2.89 KB
/
Copy pathimage_preprocessor.py
File metadata and controls
100 lines (68 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import cv2
import numpy as np
from PIL import Image
def extract_digits(image):
"""Takes numpy image data from user and takes the digit
Args:
image: A numpy array
Returns:
List of individual digits from the canvas
"""
# This variable has all the digits stored in it once countors are formed
snip_digits = []
# Preprocess the image
thresh = preprocess_image(image)
# Find contours in the preprocessed image
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# # Filter contours based on area and aspect ratio (assuming digits are approximately square)
# filtered_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 10 and 0.2 <= cv2.arcLength(cnt, True) / cv2.contourArea(cnt) < 0.8]
# Sort contours from left to right
filtered_contours = sorted(contours, key=lambda cnt: cv2.boundingRect(cnt)[0])
# Extract and save individual digits
for cnt in filtered_contours:
x, y, w, h = cv2.boundingRect(cnt)
digit = thresh[y:y+h, x:x+w]
digit = image_padding(digit,w,h)
snip_digits.append(digit)
return snip_digits
def preprocess_image(image):
"""Takes numpy image data from user and grayscale's and applies threshold
Args:
image: A numpy array
Returns:
thresh: A numpy array
"""
# Read the image and convert it to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply thresholding to make the digits stand out
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
return thresh
def image_padding(image,w,h,margin=10):
"""Takes numpy image data, width and height of the image and margin (whitespace in the edges) and applies margin to the image
Args:
image: A numpy array
w: width of image (int)
h: height of image (int)
margin: margin for the image at the end (int)
Returns:
padded_image: A numpy array
"""
# if width is greater than height
if w>h:
diff = w-h
half_diff = int(round(diff/2))
padded_image = cv2.copyMakeBorder(image, half_diff+margin, half_diff+margin, margin, margin, cv2.BORDER_CONSTANT, value=[0, 0, 0])
return padded_image
# if height is greater than width
elif h>w:
diff = h-w
half_diff = int(round(diff/2))
padded_image = cv2.copyMakeBorder(image, margin, margin, half_diff+margin, half_diff+margin, cv2.BORDER_CONSTANT, value=[0, 0, 0])
return padded_image
def invert_colors_opencv(image):
# Convert the image to a NumPy array
if isinstance(image, Image.Image):
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
# Invert colors by subtracting each pixel value from 255 (assuming uint8 datatype)
inverted_image = 255 - image
return inverted_image