"Unable to load vocabulary from file. "

#2
by navins - opened

I'm trying to use CrossEncoder('cross-encoder/nli-deberta-v3-xsmall') but I'm getting


TypeError Traceback (most recent call last)
Cell In[8], line 2
1 from sentence_transformers import CrossEncoder
----> 2 model = CrossEncoder('cross-encoder/nli-deberta-v3-xsmall')
3 scores = model.predict([('A man is eating pizza', 'A man eats something'), ('A black race car starts up in front of a crowd of people.', 'A man is driving down a lonely road.')])
5 #Convert scores to labels

File ~\AppData\Roaming\Python\Python311\site-packages\sentence_transformers\cross_encoder\CrossEncoder.py:51, in CrossEncoder.init(self, model_name, num_labels, max_length, device, tokenizer_args, automodel_args, default_activation_function)
48 self.config.num_labels = num_labels
50 self.model = AutoModelForSequenceClassification.from_pretrained(model_name, config=self.config, **automodel_args)
---> 51 self.tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_args)
52 self.max_length = max_length
54 if device is None:

File ~\AppData\Roaming\Python\Python311\site-packages\transformers\models\auto\tokenization_auto.py:702, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
698 if tokenizer_class is None:
699 raise ValueError(
700 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
701 )
--> 702 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
704 # Otherwise we have to be creative.
705 # if model is an encoder decoder, the encoder tokenizer class is used by default
706 if isinstance(config, EncoderDecoderConfig):

File ~\AppData\Roaming\Python\Python311\site-packages\transformers\tokenization_utils_base.py:1811, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1808 else:
1809 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1811 return cls._from_pretrained(
1812 resolved_vocab_files,
1813 pretrained_model_name_or_path,
1814 init_configuration,
1815 *init_inputs,
1816 use_auth_token=use_auth_token,
1817 cache_dir=cache_dir,
1818 local_files_only=local_files_only,
1819 _commit_hash=commit_hash,
1820 **kwargs,
1821 )

File ~\AppData\Roaming\Python\Python311\site-packages\transformers\tokenization_utils_base.py:1841, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, *init_inputs, **kwargs)
1839 has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
1840 if (from_slow or not has_tokenizer_file) and cls.slow_tokenizer_class is not None:
-> 1841 slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(
1842 copy.deepcopy(resolved_vocab_files),
1843 pretrained_model_name_or_path,
1844 copy.deepcopy(init_configuration),
1845 *init_inputs,
1846 use_auth_token=use_auth_token,
1847 cache_dir=cache_dir,
1848 local_files_only=local_files_only,
1849 _commit_hash=_commit_hash,
1850 **(copy.deepcopy(kwargs)),
1851 )
1852 else:
1853 slow_tokenizer = None

File ~\AppData\Roaming\Python\Python311\site-packages\transformers\tokenization_utils_base.py:1965, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, *init_inputs, **kwargs)
1963 # Instantiate tokenizer.
1964 try:
-> 1965 tokenizer = cls(*init_inputs, **init_kwargs)
1966 except OSError:
1967 raise OSError(
1968 "Unable to load vocabulary from file. "
1969 "Please check that the provided vocabulary is accessible and not corrupted."
1970 )

File ~\AppData\Roaming\Python\Python311\site-packages\transformers\models\deberta_v2\tokenization_deberta_v2.py:142, in DebertaV2Tokenizer.init(self, vocab_file, do_lower_case, split_by_punct, bos_token, eos_token, unk_token, sep_token, pad_token, cls_token, mask_token, sp_model_kwargs, **kwargs)
126 self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
128 super().init(
129 do_lower_case=do_lower_case,
130 bos_token=bos_token,
(...)
139 **kwargs,
140 )
--> 142 if not os.path.isfile(vocab_file):
143 raise ValueError(
144 f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained"
145 " model use tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)"
146 )
147 self.do_lower_case = do_lower_case

File :30, in isfile(path)

TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType

kindly support ASAP.

Sign up or log in to comment