diff --git a/roberta.py b/roberta.py index c581d18..be4fda1 100644 --- a/roberta.py +++ b/roberta.py @@ -53,7 +53,7 @@ class RobertaForSequenceClassificationCustomSimple(RobertaForSequenceClassificat # Version with custom forward 1 # - +####### EDITED ####### class RobertaClassificationHeadCustom(nn.Module): """Head for sentence-level classification tasks.""" @@ -70,6 +70,7 @@ class RobertaClassificationHeadCustom(nn.Module): config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob ) self.dropout = nn.Dropout(classifier_dropout) + self.leaky_relu = nn.LeakyReLU() self.out_proj = nn.Linear(hidden_size, config.num_labels) def forward(self, features, **kwargs): @@ -98,17 +99,16 @@ class RobertaClassificationHeadCustom(nn.Module): ) x = self.dense_1(x) - x = torch.relu(x) + x = self.leaky_relu(x) x = self.dropout(x) x = self.dense_2(x) - x = torch.relu(x) + x = self.leaky_relu(x) x = self.dropout(x) x = self.out_proj(x) return x - class RobertaForSequenceClassificationCustom(RobertaForSequenceClassification): _keys_to_ignore_on_load_missing = [r"position_ids"] @@ -118,6 +118,7 @@ class RobertaForSequenceClassificationCustom(RobertaForSequenceClassification): self.config = config self.roberta = RobertaModel(config, add_pooling_layer=False) + ####### EDITED ####### self.classifier = RobertaClassificationHeadCustom(config) # Initialize weights and apply final processing