I'm trying to create a custom model that takes sentence and POS tag as input as well, but the model is predicting the same label over and over for each token. Well, I tried different parameters (e.g., for learning rate, I tried 1e-5 to even 1), but it's still acting the same. `
class CustomDebertaEmbeddings(DebertaV2Embeddings):
def __init__(self, config):
super().__init__(config)
self.num_pos_tags = config.num_pos_tags
self.num_constituency_labels = config.num_constituency_labels
self.projection = nn.Linear(
config.hidden_size + config.num_pos_tags + config.num_constituency_labels,
config.hidden_size
)
def forward(self, input_ids, token_type_ids=None, position_ids=None, pos_tags1=None, constituency_labels1=None):
embeddings = super().forward(input_ids, token_type_ids, position_ids)
pos_one_hot1 = F.one_hot(pos_tags1, num_classes=self.num_pos_tags).float()
const_one_hot1 = F.one_hot(constituency_labels1, num_classes=self.num_constituency_labels).float()
embeddings = torch.cat([embeddings, pos_one_hot1, const_one_hot1,], dim=-1)
embeddings = self.projection(embeddings)
# embeddings = self.LayerNorm(embeddings)
embeddings = self.dropout(embeddings)
return embeddings
# Custom Model for Token Classification
class CustomModel(nn.Module):
def __init__(self, model_name, num_labels, num_pos_tags=pos, num_constituency_labels=cos):
super().__init__()
model_path = model_mapping.get(model_name, 'xlm-roberta-base')
self.config = AutoConfig.from_pretrained(model_path, num_labels=num_labels)
self.config.num_pos_tags = num_pos_tags
self.config.num_constituency_labels = num_constituency_labels
self.num_labels = num_labels
self.dropout = nn.Dropout()
self.model = AutoModelForTokenClassification.from_pretrained(model_path, config=self.config)
self.loss_fn = nn.CrossEntropyLoss(ignore_index=-100)
self.model.deberta.embeddings = CustomDebertaEmbeddings(self.config)
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None,
pos_tags1=None, constituency_labels1=None, labels=None):
embeddings = self.model.deberta.embeddings(input_ids, token_type_ids, position_ids,pos_tags1,constituency_labels1,pos_tags2, constituency_labels2,pos_tags3, constituency_labels3)
encoder_outputs = self.model.deberta.encoder(embeddings, attention_mask=attention_mask)
sequence_output = encoder_outputs[0]
sequence_output = self.dropout(sequence_output)
logits = self.model.classifier(sequence_output)
loss = None
if labels is not None:
loss = self.loss_fn(logits.view(-1, self.num_labels), labels.view(-1))
return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits}
Finally, I even tried commenting out the concatenating part (I thought it would just behave like Deberta model and give similar scores to what I got without using this custom model); the model is still performing the same even after removing the concatenating part, i.e., still predicting every label as 1.
embeddings = torch.cat([embeddings, pos_one_hot1, const_one_hot1,], dim=-1)
embeddings = torch.cat([embeddings, pos_one_hot1, const_one_hot1,], dim=-1)
embeddings = self.projection(embeddings)
embeddings = self.dropout(embeddings)```