from transformers import T5Tokenizer, T5ForCausalLM # Initialize the tokenizer and model tokenizer = T5Tokenizer.from_pretrained("t5-base") model = T5ForCausalLM.from_pretrained("t5-base") # Read in your dataset with open("cyberpunk_lore.txt", "r") as f: dataset = f.read() # Tokenize the dataset input_ids = tokenizer.batch_encode_plus(dataset, return_tensors="pt")["input_ids"] # Fine-tune the model model.fit(input_ids, input_ids, lr=1e-5, num_train_epochs=5)