Natural_Language_Processing_with_Deep_Learning_CS224n/a3/parser_model.py at master · kaushikacharya/Natural_Language_Processing_with_Deep_Learning_CS224n · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
CS224N 2018-19: Homework 3
parser_model.py: Feed-Forward Neural Network for Dependency Parsing
Sahil Chopra <schopra8@stanford.edu>
"""
import pickle
import os
import time

import torch
import torch.nn as nn
import torch.nn.functional as F

class ParserModel(nn.Module):
    """ Feedforward neural network with an embedding layer and single hidden layer.
    The ParserModel will predict which transition should be applied to a
    given partial parse configuration.

    PyTorch Notes:
        - Note that "ParserModel" is a subclass of the "nn.Module" class. In PyTorch all neural networks
            are a subclass of this "nn.Module".
        - The "__init__" method is where you define all the layers and their respective parameters
            (embedding layers, linear layers, dropout layers, etc.).
        - "__init__" gets automatically called when you create a new instance of your class, e.g.
            when you write "m = ParserModel()".
        - Other methods of ParserModel can access variables that have "self." prefix. Thus,
            you should add the "self." prefix layers, values, etc. that you want to utilize
            in other ParserModel methods.
        - For further documentation on "nn.Module" please see https://pytorch.org/docs/stable/nn.html.
    """
    def __init__(self, embeddings, n_features=36,
        hidden_size=200, n_classes=3, dropout_prob=0.5):
        """ Initialize the parser model.

        @param embeddings (Tensor): word embeddings (num_words, embedding_size)
        @param n_features (int): number of input features
        @param hidden_size (int): number of hidden units
        @param n_classes (int): number of output classes
        @param dropout_prob (float): dropout probability
        """
        super(ParserModel, self).__init__()
        self.n_features = n_features
        self.n_classes = n_classes
        self.dropout_prob = dropout_prob
        self.embed_size = embeddings.shape[1]
        self.hidden_size = hidden_size
        self.pretrained_embeddings = nn.Embedding(embeddings.shape[0], self.embed_size)
        self.pretrained_embeddings.weight = nn.Parameter(torch.tensor(embeddings))

        ### YOUR CODE HERE (~5 Lines)
        ### TODO:
        ###     1) Construct `self.embed_to_hidden` linear layer, initializing the weight matrix
        ###         with the `nn.init.xavier_uniform_` function with `gain = 1` (default)
        ###     2) Construct `self.dropout` layer.
        ###     3) Construct `self.hidden_to_logits` linear layer, initializing the weight matrix
        ###         with the `nn.init.xavier_uniform_` function with `gain = 1` (default)
        ###
        ### Note: Here, we use Xavier Uniform Initialization for our Weight initialization.
        ###         It has been shown empirically, that this provides better initial weights
        ###         for training networks than random uniform initialization.
        ###         For more details checkout this great blogpost:
        ###             http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization
        ### Hints:
        ###     - After you create a linear layer you can access the weight
        ###       matrix via:
        ###         linear_layer.weight
        ###
        ### Please see the following docs for support:
        ###     Linear Layer: https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Xavier Init: https://pytorch.org/docs/stable/nn.html#torch.nn.init.xavier_uniform_
        ###     Dropout: https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        # print("embeddings.shape: {} :: self.pretrained_embeddings.weight.shape: {}".format(embeddings.shape, self.pretrained_embeddings.weight.shape))
        # Note: learnable weights of the module of shape (out_features,in_features)
        #       Note that out_features comes first.

        # Construct embed_to_hidden linear layer
        self.embed_to_hidden = nn.Linear(in_features=n_features*self.embed_size, out_features=hidden_size)
        # Initialize weight matrix with xavier uniform
        self.embed_to_hidden.weight = nn.parameter.Parameter(nn.init.xavier_uniform_(tensor=torch.empty(hidden_size, n_features*self.embed_size)))

        # Dropout layer
        self.dropout = nn.Dropout(p=self.dropout_prob)

        # Construct hidden_to_logits linear layer
        self.hidden_to_logits = nn.Linear(in_features=hidden_size, out_features=n_classes)
        # Initialize weight matrix with xavier uniform
        self.hidden_to_logits.weight = nn.parameter.Parameter(nn.init.xavier_uniform_(tensor=torch.empty(n_classes, hidden_size)))
        ### END YOUR CODE

    def embedding_lookup(self, t):
        """ Utilize `self.pretrained_embeddings` to map input `t` from input tokens (integers)
            to embedding vectors.

            PyTorch Notes:
                - `self.pretrained_embeddings` is a torch.nn.Embedding object that we defined in __init__
                - Here `t` is a tensor where each row represents a list of features. Each feature is represented by an integer (input token).
                - In PyTorch the Embedding object, e.g. `self.pretrained_embeddings`, allows you to
                    go from an index to embedding. Please see the documentation (https://pytorch.org/docs/stable/nn.html#torch.nn.Embedding)
                    to learn how to use `self.pretrained_embeddings` to extract the embeddings for your tensor `t`.

            @param t (Tensor): input tensor of tokens (batch_size, n_features)

            @return x (Tensor): tensor of embeddings for words represented in t
                                (batch_size, n_features * embed_size)
        """
        ### YOUR CODE HERE (~1-3 Lines)
        ### TODO:
        ###     1) Use `self.pretrained_embeddings` to lookup the embeddings for the input tokens in `t`.
        ###     2) After you apply the embedding lookup, you will have a tensor shape (batch_size, n_features, embedding_size).
        ###         Use the tensor `view` method to reshape the embeddings tensor to (batch_size, n_features * embedding_size)
        ###
        ### Note: In order to get batch_size, you may need use the tensor .size() function:
        ###         https://pytorch.org/docs/stable/tensors.html#torch.Tensor.size
        ###
        ###  Please see the following docs for support:
        ###     Embedding Layer: https://pytorch.org/docs/stable/nn.html#torch.nn.Embedding
        ###     View: https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view

        # lookup the embeddings
        x = self.pretrained_embeddings(t)

        # reshape the embeddings
        x = x.view(x.shape[0], x.shape[1]*x.shape[2])

        ### END YOUR CODE
        return x


    def forward(self, t):
        """ Run the model forward.

            Note that we will not apply the softmax function here because it is included in the loss function nn.CrossEntropyLoss

            PyTorch Notes:
                - Every nn.Module object (PyTorch model) has a `forward` function.
                - When you apply your nn.Module to an input tensor `t` this function is applied to the tensor.
                    For example, if you created an instance of your ParserModel and applied it to some `t` as follows,
                    the `forward` function would called on `t` and the result would be stored in the `output` variable:
                        model = ParserModel()
                        output = model(t) # this calls the forward function
                - For more details checkout: https://pytorch.org/docs/stable/nn.html#torch.nn.Module.forward

        @param t (Tensor): input tensor of tokens (batch_size, n_features)

        @return logits (Tensor): tensor of predictions (output after applying the layers of the network)
                                 without applying softmax (batch_size, n_classes)
        """
        ###  YOUR CODE HERE (~3-5 lines)
        ### TODO:
        ###     1) Apply `self.embedding_lookup` to `t` to get the embeddings
        ###     2) Apply `embed_to_hidden` linear layer to the embeddings
        ###     3) Apply relu non-linearity to the output of step 2 to get the hidden units.
        ###     4) Apply dropout layer to the output of step 3.
        ###     5) Apply `hidden_to_logits` layer to the output of step 4 to get the logits.
        ###
        ### Note: We do not apply the softmax to the logits here, because
        ### the loss function (torch.nn.CrossEntropyLoss) applies it more efficiently.
        ###
        ### Please see the following docs for support:
        ###     ReLU: https://pytorch.org/docs/stable/nn.html?highlight=relu#torch.nn.functional.relu

        # print("t.shape: {}".format(t.shape))

        # embedding lookup
        x = self.embedding_lookup(t=t)

        # print("x.shape: {} :: embed_to_hidden.weight.shape: {} :: embed_to_hidden.bias.shape: {}".format(x.shape, self.embed_to_hidden.weight.shape, self.embed_to_hidden.bias.shape))

        # Apply embed_to_hidden linear layer to the embeddings
        z = self.embed_to_hidden(x)

        # Apply relu non-linearity
        h = F.relu(input=z)

        # Apply dropout layer and hidden_to_logits to get the logits
        logits = self.hidden_to_logits(self.dropout(h))

        ### END YOUR CODE
        return logits