import torch
import torch.nn as nn

class ExampleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1 = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.MaxPool2d(stride=2, kernel_size=2) # 2x2 maxpooling
                                   )
        self.block2 = nn.Sequential(nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.MaxPool2d(stride=2, kernel_size=2) # 2x2 maxpooling
                                   )
        self.block3 = nn.Sequential(nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1, stride=1),
                                    nn.ReLU(),
                                    nn.MaxPool2d(stride=2, kernel_size=2) # 2x2 maxpooling
                                   )
        self.gap = nn.AdaptiveAvgPool2d((1, 1)) # global average pooling
        self.fc = nn.Linear(64, 10) # assume ten classes for this example

    def forward(self, x):
        B = x.size(0) # batch size
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.gap(x).view(B, -1) # flatten to vector for features of each input in the batch
        z = self.fc(x)
        return z


model = ExampleCNN()
print(model)

ExampleCNN(
  (block1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (gap): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Linear(in_features=64, out_features=10, bias=True)
)


# credit to [1]
# data: https://download.pytorch.org/tutorial/data.zip
import io
import os
import unicodedata
import string
import glob

import torch
import random

# alphabet small + capital letters + " .,;'"
ALL_LETTERS = string.ascii_letters + " .,;'"
N_LETTERS = len(ALL_LETTERS)

# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicode_to_ascii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in ALL_LETTERS
    )

def load_data():
    # Build the category_lines dictionary, a list of names per language
    category_lines = {}
    all_categories = []
    
    def find_files(path):
        return glob.glob(path)
    
    # Read a file and split into lines
    def read_lines(filename):
        lines = io.open(filename, encoding='utf-8').read().strip().split('\n')
        return [unicode_to_ascii(line) for line in lines]
    
    for filename in find_files('data/names/*.txt'):
        category = os.path.splitext(os.path.basename(filename))[0]
        all_categories.append(category)
        
        lines = read_lines(filename)
        category_lines[category] = lines
        
    return category_lines, all_categories



"""
To represent a single letter, we use a “one-hot vector” of 
size <1 x n_letters>. A one-hot vector is filled with 0s
except for a 1 at index of the current letter, e.g. "b" = <0 1 0 0 0 ...>.

To make a word we join a bunch of those into a
2D matrix <line_length x 1 x n_letters>.

That extra 1 dimension is because PyTorch assumes
everything is in batches - we’re just using a batch size of 1 here.
"""

# Find letter index from all_letters, e.g. "a" = 0
def letter_to_index(letter):
    return ALL_LETTERS.find(letter)

# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
def letter_to_tensor(letter):
    tensor = torch.zeros(1, N_LETTERS)
    tensor[0][letter_to_index(letter)] = 1
    return tensor

# Turn a line into a <line_length x 1 x n_letters>,
# or an array of one-hot letter vectors
def line_to_tensor(line):
    tensor = torch.zeros(len(line), 1, N_LETTERS)
    for i, letter in enumerate(line):
        tensor[i][0][letter_to_index(letter)] = 1
    return tensor


def random_training_example(category_lines, all_categories):
    
    def random_choice(a):
        random_idx = random.randint(0, len(a) - 1)
        return a[random_idx]
    
    category = random_choice(all_categories)
    line = random_choice(category_lines[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tensor = line_to_tensor(line)
    return category, line, category_tensor, line_tensor



if __name__ == '__main__':
    print(ALL_LETTERS)
    print(unicode_to_ascii('Ślusàrski'))
    
    category_lines, all_categories = load_data()
    print(category_lines['Italian'][:5])
    
    print(letter_to_tensor('J')) # [1, 57]
    print(line_to_tensor('Jones').size()) # [5, 1, 57]

abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .,;'
Slusarski
['Abandonato', 'Abatangelo', 'Abatantuono', 'Abate', 'Abategiovanni']
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]])
torch.Size([5, 1, 57])


import torch
import torch.nn as nn 
import matplotlib.pyplot as plt


class RNN(nn.Module):
    # implement RNN from scratch rather than using nn.RNN
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.Lh = nn.Linear(input_size + hidden_size, hidden_size)
        self.Lo = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input_tensor, hidden_tensor):
        combined = torch.cat((input_tensor, hidden_tensor), 1)
        
        hidden = self.Lh(combined)
        output = self.Lo(combined)
        output = self.softmax(output)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)


#code adapted from [1]

category_lines, all_categories = load_data()
n_categories = len(all_categories)

n_hidden = 128
rnn = RNN(N_LETTERS, n_hidden, n_categories)

# one step
input_tensor = letter_to_tensor('A')
hidden_tensor = rnn.init_hidden()

output, next_hidden = rnn(input_tensor, hidden_tensor)
#print(output.size())
#print(next_hidden.size())

# whole sequence/name
input_tensor = line_to_tensor('Albert')
hidden_tensor = rnn.init_hidden()

output, next_hidden = rnn(input_tensor[0], hidden_tensor)
#print(output.size())
#print(next_hidden.size())

#
def category_from_output(output):
    category_idx = torch.argmax(output).item()
    return all_categories[category_idx]

print(category_from_output(output))

criterion = nn.NLLLoss()
learning_rate = 0.005
optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)

def train(line_tensor, category_tensor):
    hidden = rnn.init_hidden()
    
    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)
        
    loss = criterion(output, category_tensor)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return output, loss.item()

current_loss = 0
all_losses = []
plot_steps, print_steps = 1000, 5000
n_iters = 100000
for i in range(n_iters):
    category, line, category_tensor, line_tensor = random_training_example(category_lines, all_categories)
    
    output, loss = train(line_tensor, category_tensor)
    current_loss += loss 
    
    if (i+1) % plot_steps == 0:
        all_losses.append(current_loss / plot_steps)
        current_loss = 0
        
    if (i+1) % print_steps == 0:
        guess = category_from_output(output)
        correct = "CORRECT" if guess == category else f"WRONG ({category})"
        print(f"{i+1} {(i+1)/n_iters*100} {loss:.4f} {line} / {guess} {correct}")
        
    
plt.figure()
plt.plot(all_losses)
plt.show()

def predict(input_line):
    print(f"\n> {input_line}")
    with torch.no_grad():
        line_tensor = line_to_tensor(input_line)
        
        hidden = rnn.init_hidden()
    
        for i in range(line_tensor.size()[0]):
            output, hidden = rnn(line_tensor[i], hidden)
        
        guess = category_from_output(output)
        print(guess)

Spanish
5000 5.0 2.7458 Maurice / Italian WRONG (Irish)
10000 10.0 2.7139 Rowan / Irish WRONG (English)
15000 15.0 1.0818 Zong / Chinese CORRECT
20000 20.0 3.0011 Okeefe / Dutch WRONG (English)
25000 25.0 3.1092 Salazar / Arabic WRONG (Portuguese)
30000 30.0 1.9620 Moreno / Spanish WRONG (Portuguese)
35000 35.0 1.2889 Wang / Chinese WRONG (Korean)
40000 40.0 4.7081 Schallom / Scottish WRONG (Czech)
45000 45.0 3.8346 Georgijev / Russian WRONG (Czech)
50000 50.0 0.7720 An / Vietnamese CORRECT
55000 55.00000000000001 1.2099 Edmond / English CORRECT
60000 60.0 0.3003 Anselmetti / Italian CORRECT
65000 65.0 0.3678 Pesaresi / Italian CORRECT
70000 70.0 2.7452 Muhlbauer / German WRONG (Czech)
75000 75.0 1.2330 Sada / Japanese CORRECT
80000 80.0 0.0209 Sokolowski / Polish CORRECT
85000 85.0 0.0851 Dalianis / Greek CORRECT
90000 90.0 1.3073 Smets / Scottish WRONG (Dutch)
95000 95.0 1.0115 Slootmaekers / Greek WRONG (Dutch)
100000 100.0 0.7751 Liu / Chinese CORRECT


#refer to https://github.com/patrickloeber/pytorch-examples/blob/master/rnn-lstm-gru/main.py for full test of this model/code
#[1]
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True) 
        #batch_first allows us to batch our  
        # -> x needs to be: (batch_size, seq, input_size)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # Set initial hidden states (and cell states for LSTM)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)         
        # Forward propagate RNN
        out, _ = self.rnn(x, h0)   
        
        # Depending on your problem, you have to choose whether you return the last hidden state or a combination, 
        # Here we are returning the last hidden state. decode the hidden state of the last time step
        out = out[:, -1, :]
        #We need to pass our last hidden state to a fully connected layer (needs to be a logit for binary cross entropy loss function) 
        out = self.fc(out)
        return out


class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # Set initial hidden states (and cell states for LSTM)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        # Forward propagate RNN
        out, _ = self.lstm(x, (h0,c0))  
        
        # Decode the hidden state of the last time step
        out = out[:, -1, :]         
        out = self.fc(out)
        return out


class GRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)#############
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # Set initial hidden states (and cell states for LSTM)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        out, _ = self.gru(x, h0)  ###############
        out = out[:, -1, :]
        out = self.fc(out)
        return out

Arabic	German	Irish	Japanese	Russian
Nahas	Faust	Bran	Akimoto	Babinov
Baz	Heinrich	Cassidy	Kira	Golovach
Amari	Tresler	Rory	Sakagami	Tsalko

Character	One-hot Vector
`"a"`	[ 1, 0, 0, 0, 0 ]
`"b"`	[ 0, 1, 0, 0, 0 ]
`"c"`	[ 0, 0, 1, 0, 0 ]
`"d"`	[ 0, 0, 0, 1, 0 ]
`"e"`	[ 0, 0, 0, 0, 1 ]

Lecture 16 - CNNs and RNNs - Oh my¶

ECE364 - Programming Methods for Machine Learning¶

Nickvash Kani¶

Slides based off prior lectures by Alex Schwing, Aigou Han, Farzad Kamalabadi, Corey Snyder. All mistakes are my own!¶

`nn.Sequential` objects¶

Fully Convolutional Networks¶

Improving Model Generalization¶

Image Augmentations¶

Dropout¶

Batch Norm¶

The `.eval()` and `.train()` Methods¶

Convolutional Neural Networks History¶

Image Classification¶

LeNet (1998)¶

ImageNet Dataset (2009-)¶

AlexNet (2012)¶

VGG (2014)¶

ResNet (2015)¶

Recurrent Neural Networks¶

Simple PyTorch RNN model¶

One hot encoding¶

Other types of RNNs¶

PyTorch RNN built-in¶

Bi-directional RNN¶

Stacked RNNs¶

Long Short-Term Memory Networks¶

Gated recurrent units (GRUs)¶

Unreasonable effectiveness of RNN models¶

That's it for today¶

References¶

Lecture 16 - CNNs and RNNs - Oh my¶

ECE364 - Programming Methods for Machine Learning¶

Nickvash Kani¶

Slides based off prior lectures by Alex Schwing, Aigou Han, Farzad Kamalabadi, Corey Snyder. All mistakes are my own!¶

nn.Sequential objects¶

Fully Convolutional Networks¶

Improving Model Generalization¶

Image Augmentations¶

Dropout¶

Batch Norm¶

The .eval() and .train() Methods¶

Convolutional Neural Networks History¶

Image Classification¶

LeNet (1998)¶

ImageNet Dataset (2009-)¶

AlexNet (2012)¶

VGG (2014)¶

ResNet (2015)¶

Recurrent Neural Networks¶

Simple PyTorch RNN model¶

One hot encoding¶

Other types of RNNs¶

PyTorch RNN built-in¶

Bi-directional RNN¶

Stacked RNNs¶

Long Short-Term Memory Networks¶

Gated recurrent units (GRUs)¶

Unreasonable effectiveness of RNN models¶

That's it for today¶

References¶

`nn.Sequential` objects¶

The `.eval()` and `.train()` Methods¶