hdallatorre commited on
Commit
6fdc78c
1 Parent(s): f1ff02b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -5
README.md CHANGED
@@ -42,11 +42,6 @@ import torch
42
  tokenizer = AutoTokenizer.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
43
  model = AutoModelForMaskedLM.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
44
 
45
- # Choose the length to which the input sequences are padded. By default, the
46
- # model max length is chosen, but feel free to decrease it as the time taken to
47
- # obtain the embeddings increases significantly with it.
48
- max_length = tokenizer.model_max_length
49
-
50
  # Create a dummy dna sequence and tokenize it
51
  sequences = ["ATTCCGATTCCGATTCCG", "ATTTCTCTCTCTCTCTGAGATCGATCGATCGAT"]
52
  tokens_ids = tokenizer.batch_encode_plus(sequences, return_tensors="pt", padding="max_length", max_length = max_length)["input_ids"]
 
42
  tokenizer = AutoTokenizer.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
43
  model = AutoModelForMaskedLM.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
44
 
 
 
 
 
 
45
  # Create a dummy dna sequence and tokenize it
46
  sequences = ["ATTCCGATTCCGATTCCG", "ATTTCTCTCTCTCTCTGAGATCGATCGATCGAT"]
47
  tokens_ids = tokenizer.batch_encode_plus(sequences, return_tensors="pt", padding="max_length", max_length = max_length)["input_ids"]