InstaDeepAI
/

nucleotide-transformer-v2-50m-multi-species

Inference Endpoints

Model card Files Files and versions Community

hdallatorre commited on Oct 11, 2023

Commit

6fdc78c

•

1 Parent(s): f1ff02b

Update README.md

Files changed (1) hide show

README.md +0 -5

README.md CHANGED Viewed

@@ -42,11 +42,6 @@ import torch
 tokenizer = AutoTokenizer.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
 model = AutoModelForMaskedLM.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
-# Choose the length to which the input sequences are padded. By default, the
-# model max length is chosen, but feel free to decrease it as the time taken to
-# obtain the embeddings increases significantly with it.
-max_length = tokenizer.model_max_length
 # Create a dummy dna sequence and tokenize it
 sequences = ["ATTCCGATTCCGATTCCG", "ATTTCTCTCTCTCTCTGAGATCGATCGATCGAT"]
 tokens_ids = tokenizer.batch_encode_plus(sequences, return_tensors="pt", padding="max_length", max_length = max_length)["input_ids"]

 tokenizer = AutoTokenizer.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
 model = AutoModelForMaskedLM.from_pretrained("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", trust_remote_code=True)
 # Create a dummy dna sequence and tokenize it
 sequences = ["ATTCCGATTCCGATTCCG", "ATTTCTCTCTCTCTCTGAGATCGATCGATCGAT"]
 tokens_ids = tokenizer.batch_encode_plus(sequences, return_tensors="pt", padding="max_length", max_length = max_length)["input_ids"]