1

I'm trying to create a custom model that takes sentence and POS tag as input as well, but the model is predicting the same label over and over for each token. Well, I tried different parameters (e.g., for learning rate, I tried 1e-5 to even 1), but it's still acting the same. `

class CustomDebertaEmbeddings(DebertaV2Embeddings):
    def __init__(self, config):
        super().__init__(config)
        self.num_pos_tags = config.num_pos_tags
        self.num_constituency_labels = config.num_constituency_labels
    self.projection = nn.Linear(
        config.hidden_size + config.num_pos_tags + config.num_constituency_labels,
        config.hidden_size
    )

def forward(self, input_ids, token_type_ids=None, position_ids=None, pos_tags1=None, constituency_labels1=None):
    embeddings = super().forward(input_ids, token_type_ids, position_ids)

    pos_one_hot1 = F.one_hot(pos_tags1, num_classes=self.num_pos_tags).float()
    const_one_hot1 = F.one_hot(constituency_labels1, num_classes=self.num_constituency_labels).float()




    embeddings = torch.cat([embeddings, pos_one_hot1, const_one_hot1,], dim=-1)
    embeddings = self.projection(embeddings)


    # embeddings = self.LayerNorm(embeddings)
    embeddings = self.dropout(embeddings)
    return embeddings

# Custom Model for Token Classification
class CustomModel(nn.Module):
    def __init__(self, model_name, num_labels, num_pos_tags=pos, num_constituency_labels=cos):
        super().__init__()
        model_path = model_mapping.get(model_name, 'xlm-roberta-base')


        self.config = AutoConfig.from_pretrained(model_path, num_labels=num_labels)

        self.config.num_pos_tags = num_pos_tags
        self.config.num_constituency_labels = num_constituency_labels
        self.num_labels = num_labels
        self.dropout = nn.Dropout()
        self.model = AutoModelForTokenClassification.from_pretrained(model_path, config=self.config)
        self.loss_fn = nn.CrossEntropyLoss(ignore_index=-100)
        self.model.deberta.embeddings = CustomDebertaEmbeddings(self.config)


    def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None,
                pos_tags1=None, constituency_labels1=None, labels=None):


        embeddings = self.model.deberta.embeddings(input_ids, token_type_ids, position_ids,pos_tags1,constituency_labels1,pos_tags2, constituency_labels2,pos_tags3, constituency_labels3)
        encoder_outputs = self.model.deberta.encoder(embeddings, attention_mask=attention_mask)


        sequence_output = encoder_outputs[0]
        sequence_output = self.dropout(sequence_output)
        logits = self.model.classifier(sequence_output)

        loss = None
        if labels is not None:
            loss = self.loss_fn(logits.view(-1, self.num_labels), labels.view(-1))

        return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits}

Finally, I even tried commenting out the concatenating part (I thought it would just behave like Deberta model and give similar scores to what I got without using this custom model); the model is still performing the same even after removing the concatenating part, i.e., still predicting every label as 1.

embeddings = torch.cat([embeddings, pos_one_hot1, const_one_hot1,], dim=-1)
embeddings = torch.cat([embeddings, pos_one_hot1, const_one_hot1,], dim=-1)      
embeddings = self.projection(embeddings)
embeddings = self.dropout(embeddings)```
Animy
  • 11
  • 1

0 Answers0