import numpy as np
import matplotlib.pyplot as plt
import skimage
import skimage.io
import scipy.stats
from skimage.transform import resize
from PIL import ImageTk, Image, ImageDraw
import PIL
from tkinter import *


# Load in all digits
res = 28
digits = []
for i in range(10):
    digits.append([])
    I = skimage.io.imread("Digits/{}.png".format(i))/255.0
    row = 0
    col = 0
    while row < I.shape[0]:
        col = 0
        while col < I.shape[1]:
            img = I[row:row+res, col:col+res]
            if np.sum(img) > 0:
                digits[i].append(img)
            col += res
        row += res
    print(len(digits[i]), "unique ", i, " digits")

5923 unique  0  digits
6742 unique  1  digits
5958 unique  2  digits
6131 unique  3  digits
5842 unique  4  digits
5421 unique  5  digits
5918 unique  6  digits
6265 unique  7  digits
5851 unique  8  digits
5949 unique  9  digits


plt.imshow(digits[0][0], cmap='gray')
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x7f13f38bf070>


# Look at 200th example of a 3
thresh = 0.4
Y = np.zeros((28, 28))
for i in range(28):
    for j in range(28):
        if digits[0][0][i][j] > thresh:
            Y[i][j] = 0
        else:
            Y[i][j] = 1

plt.imshow(Y, cmap='gray')

<matplotlib.image.AxesImage at 0x7f13f37e9220>


Y = digits[0][0] < thresh
plt.imshow(Y, cmap='gray')

<matplotlib.image.AxesImage at 0x7f13f374e3d0>


# We have to multiply these by 1.0 to cast them to floats and avoid an 
# annoying "- operator is not supported for boolean variables"
X = 1.0*(digits[1][0] < thresh) 
Y = 1.0*(digits[7][0] < thresh)
diff = np.abs(X-Y)
dist = np.sum(diff)

plt.subplot(131)
plt.imshow(X)
plt.subplot(132)
plt.imshow(Y)
plt.subplot(133)
plt.imshow(diff)
plt.title("Dist = {}".format(dist))

Text(0.5, 1.0, 'Dist = 110.0')


def thresh_hamming_dist(X, Y, thresh=0.4):
    return np.sum(np.abs(1.0*(X < thresh) - 1.0*(Y < thresh)))

def euclidean_dist(X, Y):
    return np.sqrt(np.sum((X-Y)**2))


width = 200  # canvas width
height = 200 # canvas height
center = height//2

K = 10 # Number of nearest neighbors to consider

def classify():
    I = np.array(digit_image) # The image that we drew
    I = I[:, :, 0]
    dim = digits[0][0].shape[0]
    I = resize(I, (dim, dim), anti_aliasing=True)
    
    res = 3
    plt.figure(figsize=(12, 6))
    plot_num = 2
    
    
    ## TODO: Find k nearest neighbors by looping through all digit examples
    offset = 1
    for dist_fn, name in zip([euclidean_dist, thresh_hamming_dist], ["Euclidean", "Hamming"]):
        dists = [] # Store a list of distances
        idxs = [] # That's parallel with a list of tuples of (digit #, example index)
        for i in range(10):
            for j in range(len(digits[i])):
                dists.append(dist_fn(I, digits[i][j]))
                idxs.append((i, j))
        
        # Plot the guesses
        guesses = []
        for idx in np.argsort(dists)[0:K]: # Use argsort to find the closest ones
            d = dists[idx]
            digit = idxs[idx][0]
            j = idxs[idx][1]
            guesses.append(digit)
            plt.subplot(2, K+1, plot_num)
            J = np.zeros((I.shape[0], I.shape[1], 3))
            J[:, :, 0] = I
            J[:, :, 1] = digits[digit][j]
            plt.imshow(J)
            plt.title("{}\n{:.3f}".format(digit, d))
            plt.axis("off")
            plot_num += 1
        guess = scipy.stats.mode(guesses)[0][0] # Guess is the vote from all nearest neighbors
        # Plot the original digit with the mode of the guesses
        plt.subplot(2, K+1, offset)
        plt.imshow(I, cmap='gray')
        plt.title("Guess\n{}: {}".format(name, guess))
        plt.axis("off")
        offset = K+2
        plot_num = K+3
        
    root.destroy()

def paint(event):
    """
    Paint on the PIL canvas and the Tkinter canvas in parallel
    Draw canvas will be saved, while Tkinter canvas shows
    the user what they are drawing
    """
    bs = 10
    x1, y1 = (event.x - bs), (event.y - bs)
    x2, y2 = (event.x + bs), (event.y + bs)
    canvas.create_oval(x1, y1, x2, y2, fill="black")
    draw.ellipse([x1, y1, x2, y2], fill="#000000")

root = Tk()

# create a tkinter canvas to draw on
canvas = Canvas(root, width=width, height=height, bg='white')
canvas.pack()

# Create a PIL image and a drawer object
digit_image = PIL.Image.new("RGB", (width, height), (255, 255, 255))
draw = ImageDraw.Draw(digit_image)
canvas.pack(expand=YES, fill=BOTH)
canvas.bind("<B1-Motion>", paint)

# add a button to save the image
button=Button(text="classify",command=classify)
button.pack()

root.mainloop()

Digit Classification with Nearest Neighbors¶

Chris Tralie¶

$\sum_{i, j} |X[i][j] - Y[i][j]|$¶

$\sqrt{\sum_{i, j} (X[i, j] - Y[i, j])^2}$¶