diff --git a/backend/generate_answer.py b/backend/generate_answer.py index 2dde683..4bce9b6 100644 --- a/backend/generate_answer.py +++ b/backend/generate_answer.py @@ -80,20 +80,28 @@ def generate_note(page_content, note_content, model, tokenizer): state = model.get_state(page_content, note_content) outputs = model.act(state) return tokenizer.decode(outputs[0], skip_special_tokens=True) - else: - # fallback to original implementation - inputs = tokenizer(f"I did not like this note: {note_content}. Generate new notes for the given content: {page_content}", return_tensors="pt") - # move inputs to same device as model - inputs = {k: v.to(device) for k, v in inputs.items()} - - outputs = model.generate( - **inputs, - max_length=2048, - num_return_sequences=1, - pad_token_id=tokenizer.eos_token_id, - temperature=0.7, - ) - return tokenizer.decode(outputs[0], skip_special_tokens=True) + + # fallback to original implementation + inputs = tokenizer(f"I did not like this note: {note_content}. Generate new notes for the given content: {page_content}", return_tensors="pt") + # move inputs to same device as model + inputs = {k: v.to(device) for k, v in inputs.items()} + + outputs = model.generate( + **inputs, + max_length=2048, + pad_token_id=tokenizer.eos_token_id, + num_return_sequences=1, + temperature=0.7, + output_scores=False, # Exclude unnecessary scores + return_dict_in_generate=True, # Return generation metadata + ) + + # Extract the generated tokens beyond the input tokens + generated_tokens = outputs.sequences[0][inputs['input_ids'].shape[-1]:] + + # Decode the generated tokens + final_output = tokenizer.decode(generated_tokens, skip_special_tokens=True) + return final_output def generate_quiz_review(origin_content, wrong_questions, student_history): rl_model = RLModel(Config.MODEL_NAME) diff --git a/backend/lora.py b/backend/lora.py index 04fdd59..e495882 100644 --- a/backend/lora.py +++ b/backend/lora.py @@ -9,6 +9,7 @@ def fine_tune_and_save_lora_weights(model_name, data, output_dir="./lora_weights """ Fine-tunes the model using the given dataset and saves the LoRA weights. """ + device = "cuda" if torch.cuda.is_available() else "cpu" dataset = Dataset.from_list(data) # Use GPU if available @@ -21,14 +22,15 @@ def fine_tune_and_save_lora_weights(model_name, data, output_dir="./lora_weights ) # Load tokenizer and model - tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right", truncation_side="right") + tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right", truncation_side="right", max_next_tokens=300, token=Config.HUGGINGFACE_ACCESS_TOKEN) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", quantization_config=bnb_config, - torch_dtype=torch.float16 + torch_dtype=torch.float16, + token=Config.HUGGINGFACE_ACCESS_TOKEN ).to(device) model.config.pad_token_id = tokenizer.pad_token_id model.config.use_cache = False @@ -125,12 +127,13 @@ def apply_lora_weights_to_model(base_model_name, lora_weights_dir): ) # Load tokenizer and base model - tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side="right", truncation_side="right") + tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side="right", truncation_side="right", token=Config.HUGGINGFACE_ACCESS_TOKEN, max_new_tokens=300) base_model = AutoModelForCausalLM.from_pretrained( base_model_name, device_map="auto", quantization_config=bnb_config, torch_dtype=torch.float16, + token=Config.HUGGINGFACE_ACCESS_TOKEN, ).to(device) # Apply LoRA weights