this is my code that use to train reward model:
import os
import torch
from datasets import load_dataset,Dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
TrainingArguments,
pipeline,
logging,
HfArgumentParser
)
import pandas as pd
from peft import LoraConfig, TaskType
from trl import RewardConfig, RewardTrainer
df = pd.read_csv('data.csv')
raw_dataset = Dataset.from_pandas(df[:3])
model_id = 'meta-llama/Llama-2-7b-hf'
compute_dtype = getattr(torch, "float16")
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(model_id,use_auth_token=hf_auth)
model.config.use_cache = False
model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(model_id,use_auth_token=hf_auth)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
def formatting_func(examples):
kwargs = {
"padding": "max_length",
"truncation": True,
"max_length": 256,
"return_tensors": "pt"
}
# Prepend the prompt and a line break to the chosen and rejected responses.
prompt_plus_chosen_response = examples["prompt"] + "\n" + examples["chosen"]
prompt_plus_rejected_response = examples["prompt"] + "\n" + examples["rejected"]
# Tokenize the modified fields.
tokens_chosen = tokenizer.encode_plus(prompt_plus_chosen_response, **kwargs)
tokens_rejected = tokenizer.encode_plus(prompt_plus_rejected_response, **kwargs)
return {
"input_ids": tokens_chosen["input_ids"][0],
"attention_mask": tokens_chosen["attention_mask"][0],
"labels": tokens_rejected["input_ids"][0], # Use rejected as labels for causal LM
"input_ids_chosen": tokens_chosen["input_ids"][0],
"attention_mask_chosen": tokens_chosen["attention_mask"][0],
"input_ids_rejected": tokens_rejected["input_ids"][0],
"attention_mask_rejected": tokens_rejected["attention_mask"][0],
}
raw_datasets = raw_dataset.map(formatting_func)
OUTPUT_DIR = "/kaggle/working/"
training_args = RewardConfig(
output_dir=OUTPUT_DIR,
num_train_epochs=10,
per_device_train_batch_size=4,
gradient_accumulation_steps=1,
optim="paged_adamw_32bit",
save_steps=25,
logging_steps=25,
learning_rate=2e-4,
weight_decay=0.001,
fp16=False,
bf16=False,
max_grad_norm=0.3,
max_steps=-1,
warmup_ratio=0.03,
group_by_length=True,
lr_scheduler_type="constant",
no_cuda=False,
report_to="wandb",
run_name="reward_model",
)
peft_config = LoraConfig(
task_type=TaskType.SEQ_CLS,
inference_mode=False,
r=8,
lora_alpha=32,
lora_dropout=0.1,
)
trainer = RewardTrainer(
model=model,
tokenizer=tokenizer,
args=training_args,
train_dataset=raw_datasets,
peft_config=peft_config,
# max_length=None
)
trainer.train()
This code gives IndexError: index out of range in self in google colab. And im also use Kaggle notebooks with T4x2. I cannot load this models in boths GPU's Can anyone tell me what is the issue??
Kaggle:
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
I load this model into the CPU without quantization. Then it shows:
IndexError: index out of range in self