Handwritten-Digit-Recognition-from-MNIST-Dataset/image_preprocessor.py at main · IIsameerII/Handwritten-Digit-Recognition-from-MNIST-Dataset · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import cv2
import numpy as np
from PIL import Image

def extract_digits(image):
    """Takes numpy image data from user and takes the digit

    Args:
        image: A numpy array

    Returns:
        List of individual digits from the canvas
    """

    # This variable has all the digits stored in it once countors are formed
    snip_digits = []

    # Preprocess the image
    thresh = preprocess_image(image)

    # Find contours in the preprocessed image
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # # Filter contours based on area and aspect ratio (assuming digits are approximately square)
    # filtered_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 10 and 0.2 <= cv2.arcLength(cnt, True) / cv2.contourArea(cnt) < 0.8]

    # Sort contours from left to right
    filtered_contours = sorted(contours, key=lambda cnt: cv2.boundingRect(cnt)[0])

    # Extract and save individual digits
    for cnt in filtered_contours:
        x, y, w, h = cv2.boundingRect(cnt)
        digit = thresh[y:y+h, x:x+w]
        digit = image_padding(digit,w,h)
        snip_digits.append(digit)

    return snip_digits


def preprocess_image(image):
    """Takes numpy image data from user and grayscale's and applies threshold

    Args:
        image: A numpy array

    Returns:
        thresh: A numpy array
    """

    # Read the image and convert it to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply thresholding to make the digits stand out
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    return thresh

def image_padding(image,w,h,margin=10):
    """Takes numpy image data, width and height of the image and margin (whitespace in the edges) and applies margin to the image

    Args:
        image: A numpy array
        w: width of image (int)
        h: height of image (int)
        margin: margin for the image at the end (int)

    Returns:
        padded_image: A numpy array
    """

    # if width is greater than height
    if w>h:
        diff = w-h
        half_diff = int(round(diff/2))
        padded_image = cv2.copyMakeBorder(image, half_diff+margin, half_diff+margin, margin, margin, cv2.BORDER_CONSTANT, value=[0, 0, 0])
        return padded_image

    # if height is greater than width
    elif h>w:
        diff = h-w
        half_diff = int(round(diff/2))
        padded_image = cv2.copyMakeBorder(image, margin, margin, half_diff+margin, half_diff+margin, cv2.BORDER_CONSTANT, value=[0, 0, 0])
        return padded_image


def invert_colors_opencv(image):
    # Convert the image to a NumPy array
    if isinstance(image, Image.Image):
        image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

    # Invert colors by subtracting each pixel value from 255 (assuming uint8 datatype)
    inverted_image = 255 - image

    return inverted_image