-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathslides.tex
More file actions
126 lines (105 loc) · 4.13 KB
/
slides.tex
File metadata and controls
126 lines (105 loc) · 4.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
\documentclass{beamer}
\usetheme{default}
\begin{document}
\begin{frame}{Introduction}
\begin{center}
An Image Classifier
Brendan Miller
\end{center}
\begin{itemize}
\item An image classifier based on the generative/discriminative framework
presented in Yi Li's paper.
\item A ``bag of words'' classifier.
\item Uses SIFT key-point descriptors and color.
\end{itemize}
\end{frame}
\begin{frame}{Basic Idea}
A quick high level summary of Li's classification framework:
\begin{itemize}
\item Build a mixture model of features in an images containing a class of objects.
\item Determine how well a training set of images match each component of the model.
\item Build a vector of these match values (joint probabilities).
\item Use these values as the inputs to train a classifier (a neural network).
\end{itemize}
\end{frame}
\begin{frame}{Generative/discriminative model part 1}
Generative model.
\begin{itemize}
\item Gaussian mixture model of distribution of features constructed using EM algorithm.
\begin{equation*}
P(X^a|o) = \sum_{m=1}^{M^a} w^a_m N(X^a; \mu^a_m, \Sigma^a_m)
\end{equation*}
\item Joint probability of image features with each model component is calculated.
\begin{equation*}
P(X^a_{i,r},m^a) = w^a_m N(X^a_{i,r}, \mu^a_m, \Sigma^a_m)
\end{equation*}
\item For each image and gaussian component, the maximum joint probability is found.
\begin{equation*}
P(I_i, m^a) = max(\{P(X^a_{i,r}, m^a)|r \in \mbox{features of type $a$
in the $i$th image\}})
\end{equation*}
\end{itemize}
\end{frame}
\begin{frame}{Generative/discriminative model part 2}
Discriminative classifier.
\begin{itemize}
\item The maximum joint probabilities are used as inputs to train a neural network.
\item The neural network acts as a classifier.
\item The classifier understands not just how well an example image
fit the (mixture of gaussians) model, but how well it fit
\emph{each component} of the model.
\item Some components of the model found during the generative phase
may also be present in images which do not contain the object we
are looking for. Some components may be necessary but not
sufficient to establish the presence of an object. The
discriminative phase deals with these problems.
\end{itemize}
\end{frame}
\begin{frame}{Features}
\begin{itemize}
\item Original paper used regional features e.g. color, texture.
\item My project also uses key-point descripters i.e. SIFT.
\item I also keep color. Mainly useful for improving accuracy on
face detection where color can detect the distinctive skin tone.
\item Color is not that useful for classifying cars, planes,
motorcycles which may be painted any color.
\end{itemize}
\end{frame}
\begin{frame}{SIFT}
\begin{itemize}
\item SIFT alone produces good results.
\item Increasing the number of Gaussian components (50+) helps accuracy.
\item Objects are made of many parts. Ideally each Gaussian
component matches a part.
\end{itemize}
\end{frame}
\begin{frame}{Correct classification}
\includegraphics[width=100mm]{images/plane_true_positive.png}
\end{frame}
\begin{frame}{False Negative}
Face detection more reliant on color than others.
\includegraphics[width=100mm]{images/face_false_negative.png}
\end{frame}
\begin{frame}{Accuracy}
\begin{tabular}{| l | l | l | l |}
\hline
Image Type & Accuracy & False Positives & False Negatives \\
Cars & 0.94 & 0.035 & 0.025 \\
Planes & 0.78 & 0.11 & 0.11 \\
Faces & 0.9 & 0.055 & 0.045 \\
Bikes & 0.805 & 0.08 & 0.115 \\
\hline
\end{tabular}
\end{frame}
\begin{frame}{Future work}
\begin{itemize}
\item One idea: How can we find which parts of an image contain the classified object?
\item Given a classified image, it would be nice to be able to work
backwards and find the features which contributed the most to the
classification.
\item Given the most important features, a bounding box could be created.
\item A 3 layer neural network might not be ideal for this. It would
be easier to work backwards through a simpler classifier.
\end{itemize}
\end{frame}
\end{document}