Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| import torch.optim as optim | |
| from torch.autograd import Variable | |
| import torch.distributed as dist | |
| import math | |
| class LSTMCell(nn.Module): | |
| def __init__(self, input_size, hidden_size, bias=True): | |
| super(LSTMCell, self).__init__() | |
| self.input_size = input_size | |
| self.hidden_size = hidden_size | |
| self.bias = bias | |
| self.x2h = nn.Linear(input_size, 4 * hidden_size, bias=bias) | |
| self.h2h = nn.Linear(hidden_size, 4 * hidden_size, bias=bias) | |
| self.reset_parameters() | |
| def reset_parameters(self): | |
| std = 1.0 / math.sqrt(self.hidden_size) | |
| for w in self.parameters(): | |
| w.data.uniform_(-std, std) | |
| def forward(self, x, hidden): | |
| hx, cx = hidden | |
| x = x.view(-1, x.size(1)) | |
| gates = self.x2h(x) + self.h2h(hx) | |
| # print(f"gates: {gates.shape}") | |
| # gates = gates.squeeze() | |
| ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) | |
| ingate = F.sigmoid(ingate) | |
| forgetgate = F.sigmoid(forgetgate) | |
| cellgate = F.tanh(cellgate) | |
| outgate = F.sigmoid(outgate) | |
| cy = torch.mul(cx, forgetgate) + torch.mul(ingate, cellgate) | |
| hy = torch.mul(outgate, F.tanh(cy)) | |
| return (hy, cy) | |
| class LSTMModel(nn.Module): | |
| def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, bias=True): | |
| super(LSTMModel, self).__init__() | |
| # Hidden dimensions | |
| self.hidden_dim = hidden_dim | |
| # Number of hidden layers | |
| self.layer_dim = layer_dim | |
| self.lstm = LSTMCell(input_dim, hidden_dim, layer_dim) | |
| self.fc = nn.Linear(hidden_dim, output_dim) | |
| def forward(self, x): | |
| # Initialize hidden state with zeros | |
| ####################### | |
| # USE GPU FOR MODEL # | |
| ####################### | |
| #print(x.shape,"x.shape")100, 28, 28 | |
| if torch.cuda.is_available(): | |
| h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda()) | |
| else: | |
| h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)) | |
| # Initialize cell state | |
| if torch.cuda.is_available(): | |
| c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda()) | |
| else: | |
| c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)) | |
| outs = [] | |
| cn = c0[0,:,:] | |
| hn = h0[0,:,:] | |
| for seq in range(x.size(1)): | |
| hn, cn = self.lstm(x[:,seq,:], (hn,cn)) | |
| outs.append(hn) | |
| out = outs[-1] # .squeeze() | |
| out = self.fc(out) | |
| # out.size() --> 100, 10 | |
| return out | |
| class LSTM_model(nn.Module): | |
| def __init__(self, vocab_size, n_hidden): | |
| super(LSTM_model, self).__init__() | |
| self.embedding = nn.Embedding(vocab_size, n_hidden) | |
| self.lstm = LSTMModel(n_hidden, n_hidden, n_hidden, n_hidden) | |
| self.fc_output = nn.Linear(n_hidden, 1) | |
| self.loss = nn.BCEWithLogitsLoss() | |
| def forward(self, X, t, train=True): | |
| embed = self.embedding(X) # batch_size, time_steps, features | |
| no_of_timesteps = embed.shape[1] | |
| n_hidden = embed.shape[2] | |
| input = embed | |
| # print(f"input: {input.shape}") | |
| fc_out = self.lstm(input) ## bsz x nnhidden_dim | |
| # print(f"fc_out: {fc_out.size()}") | |
| h = self.fc_output(fc_out) | |
| # print(f"h: {h.size()}") | |
| return self.loss(h[:, 0], t), h[:, 0] | |
| class BiLSTM(nn.Module): | |
| def __init__(self, input_size, hidden_size, bias=True): | |
| super(BiLSTM, self).__init__() | |
| self.forward_cell = LSTMCell(input_size, hidden_size, bias) | |
| self.backward_cell = LSTMCell(input_size, hidden_size, bias) | |
| def forward(self, input_seq): | |
| forward_outputs = [] | |
| backward_outputs = [] | |
| forward_hidden = (torch.zeros(input_seq.size(0), self.forward_cell.hidden_size).to(input_seq.device), | |
| torch.zeros(input_seq.size(0), self.forward_cell.hidden_size).to(input_seq.device)) | |
| backward_hidden = (torch.zeros(input_seq.size(0), self.backward_cell.hidden_size).to(input_seq.device), | |
| torch.zeros(input_seq.size(0), self.backward_cell.hidden_size).to(input_seq.device)) | |
| for t in range(input_seq.size(1)): | |
| forward_hidden = self.forward_cell(input_seq[:, t], forward_hidden) | |
| forward_outputs.append(forward_hidden[0]) | |
| for t in range(input_seq.size(1)-1, -1, -1): | |
| backward_hidden = self.backward_cell(input_seq[:, t], backward_hidden) | |
| backward_outputs.append(backward_hidden[0]) | |
| forward_outputs = torch.stack(forward_outputs, dim=1) | |
| backward_outputs = torch.stack(backward_outputs, dim=1) | |
| outputs = torch.cat((forward_outputs, backward_outputs), dim=2) | |
| return outputs | |
| class BiLSTMModel(nn.Module): | |
| def __init__(self, vocab_size, n_hidden): | |
| super(BiLSTMModel, self).__init__() | |
| self.embedding = nn.Embedding(vocab_size, n_hidden) | |
| self.bilstm = BiLSTM(n_hidden, n_hidden) | |
| self.fc_output = nn.Linear(2*n_hidden, 1) | |
| self.loss = nn.BCEWithLogitsLoss() | |
| def forward(self, X, t, train=True): | |
| embed = self.embedding(X) # batch_size, time_steps, features | |
| no_of_timesteps = embed.shape[1] | |
| n_hidden = embed.shape[2] | |
| input = embed | |
| bilstm_out = self.bilstm(input) ## bsz x nnhidden_dim | |
| bilstm_out = bilstm_out[:, -1, :] | |
| h = self.fc_output(bilstm_out) | |
| # print(f"bilstm_out: {bilstm_out.shape}, h: {h.shape}, t: {t.shape}") | |
| return self.loss(h[:,0], t), h[:, 0] | |