File size: 471 Bytes
d988af5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from transformers import T5Tokenizer, T5ForCausalLM

# Initialize the tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForCausalLM.from_pretrained("t5-base")

# Read in your dataset
with open("cyberpunk_lore.txt", "r") as f:
    dataset = f.read()

# Tokenize the dataset
input_ids = tokenizer.batch_encode_plus(dataset, return_tensors="pt")["input_ids"]

# Fine-tune the model
model.fit(input_ids, input_ids, lr=1e-5, num_train_epochs=5)