hdallatorre
commited on
Commit
•
6fdc78c
1
Parent(s):
f1ff02b
Update README.md
Browse files
README.md
CHANGED
@@ -42,11 +42,6 @@ import torch
|
|
42 |
tokenizer = AutoTokenizer.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
|
43 |
model = AutoModelForMaskedLM.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
|
44 |
|
45 |
-
# Choose the length to which the input sequences are padded. By default, the
|
46 |
-
# model max length is chosen, but feel free to decrease it as the time taken to
|
47 |
-
# obtain the embeddings increases significantly with it.
|
48 |
-
max_length = tokenizer.model_max_length
|
49 |
-
|
50 |
# Create a dummy dna sequence and tokenize it
|
51 |
sequences = ["ATTCCGATTCCGATTCCG", "ATTTCTCTCTCTCTCTGAGATCGATCGATCGAT"]
|
52 |
tokens_ids = tokenizer.batch_encode_plus(sequences, return_tensors="pt", padding="max_length", max_length = max_length)["input_ids"]
|
|
|
42 |
tokenizer = AutoTokenizer.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
|
43 |
model = AutoModelForMaskedLM.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
|
44 |
|
|
|
|
|
|
|
|
|
|
|
45 |
# Create a dummy dna sequence and tokenize it
|
46 |
sequences = ["ATTCCGATTCCGATTCCG", "ATTTCTCTCTCTCTCTGAGATCGATCGATCGAT"]
|
47 |
tokens_ids = tokenizer.batch_encode_plus(sequences, return_tensors="pt", padding="max_length", max_length = max_length)["input_ids"]
|