gpt 5 hidden and new dense layer

2023-02-12 23:38:32 +01:00 · 2023-02-12 23:38:32 +01:00 · 858ef8fc58
commit 858ef8fc58
parent 5acc5cd96a
1 changed files with 7 additions and 11 deletions
--- a/gpt2.py
+++ b/gpt2.py
@ -60,18 +60,16 @@ class GPT2ClassificationHeadCustom(nn.Module):
        hidden_size = config.n_embd
        self.dense_1_input = nn.Linear(hidden_size, 2 * hidden_size)
        self.dense_1_hidden = nn.Linear(hidden_size, 2 * hidden_size)
-        self.dense_2 = nn.Linear(4 * hidden_size, hidden_size)
+        self.dense_2 = nn.Linear(4 * hidden_size, 2 * hidden_size)
+        self.dense_3 = nn.Linear(2 * hidden_size, hidden_size)
        self.dropout = nn.Dropout(config.resid_pdrop)
        self.out_proj = nn.Linear(hidden_size, config.num_labels, bias=False)

    def forward(self, x, **kwargs):
        if 'hidden_states' in kwargs and kwargs['hidden_states'] is not None:
-            # Get hidden states from second from the end
-            print('Hidden states found!')
-            print(len(kwargs['hidden_states']))
-            hidden = kwargs['hidden_states'][-2]
+            # Get last 5 hidden states from the end
+            hidden = kwargs['hidden_states'][-5:]
        else:
-            print('no hidden states :(')
            hidden = torch.zeros(x.size(), dtype=x.dtype, device=x.device)

        x = self.dense_1_input(x)
@ -86,13 +84,11 @@ class GPT2ClassificationHeadCustom(nn.Module):
        x = self.dense_2(x)
        x = torch.relu(x)
        x = self.dropout(x)
-        
+
+        x = self.dense_3(x)
        x = torch.relu(x)
        x = self.dropout(x)
-        
-        x = torch.relu(x)
-        x = self.dropout(x)
-	
+
        x = self.out_proj(x)
        return x