1

this is my code that use to train reward model:

import os
import torch
from datasets import load_dataset,Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
    HfArgumentParser
)
import pandas as pd
from peft import LoraConfig, TaskType
from trl import RewardConfig, RewardTrainer

df = pd.read_csv('data.csv') raw_dataset = Dataset.from_pandas(df[:3])

model_id = 'meta-llama/Llama-2-7b-hf'

compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=compute_dtype, bnb_4bit_use_double_quant=False, )

model = AutoModelForCausalLM.from_pretrained(model_id,use_auth_token=hf_auth) model.config.use_cache = False model.config.pretraining_tp = 1 tokenizer = AutoTokenizer.from_pretrained(model_id,use_auth_token=hf_auth) tokenizer.add_special_tokens({'pad_token': '[PAD]'}) def formatting_func(examples): kwargs = { "padding": "max_length", "truncation": True, "max_length": 256, "return_tensors": "pt" }

# Prepend the prompt and a line break to the chosen and rejected responses.
prompt_plus_chosen_response = examples["prompt"] + "\n" + examples["chosen"]
prompt_plus_rejected_response = examples["prompt"] + "\n" + examples["rejected"]


# Tokenize the modified fields.
tokens_chosen = tokenizer.encode_plus(prompt_plus_chosen_response, **kwargs)
tokens_rejected = tokenizer.encode_plus(prompt_plus_rejected_response, **kwargs)

return {
    "input_ids": tokens_chosen["input_ids"][0],
    "attention_mask": tokens_chosen["attention_mask"][0],
    "labels": tokens_rejected["input_ids"][0],  # Use rejected as labels for causal LM
    "input_ids_chosen": tokens_chosen["input_ids"][0],
    "attention_mask_chosen": tokens_chosen["attention_mask"][0],
    "input_ids_rejected": tokens_rejected["input_ids"][0],
    "attention_mask_rejected": tokens_rejected["attention_mask"][0],
}

raw_datasets = raw_dataset.map(formatting_func)

OUTPUT_DIR = "/kaggle/working/" training_args = RewardConfig( output_dir=OUTPUT_DIR, num_train_epochs=10, per_device_train_batch_size=4, gradient_accumulation_steps=1, optim="paged_adamw_32bit", save_steps=25, logging_steps=25, learning_rate=2e-4, weight_decay=0.001, fp16=False, bf16=False, max_grad_norm=0.3, max_steps=-1, warmup_ratio=0.03, group_by_length=True, lr_scheduler_type="constant", no_cuda=False, report_to="wandb", run_name="reward_model", )

peft_config = LoraConfig( task_type=TaskType.SEQ_CLS, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1, )

trainer = RewardTrainer( model=model, tokenizer=tokenizer, args=training_args, train_dataset=raw_datasets, peft_config=peft_config, # max_length=None )

trainer.train()

This code gives IndexError: index out of range in self in google colab. And im also use Kaggle notebooks with T4x2. I cannot load this models in boths GPU's Can anyone tell me what is the issue??

Kaggle: RuntimeError: CUDA error: device-side assert triggered CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1. Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.

I load this model into the CPU without quantization. Then it shows:

IndexError: index out of range in self

1 Answers1

1

Problem is solved.

The issue is max_length. when lower value used to max_length this issue is not occurs. that means 30GB gpu not enogh to for this process.