SpirinEgor TatonkaHF commited on
Commit
7452992
1 Parent(s): 568d9dd

Tokenizer fix (#1)

Browse files

- Tokenizer fix (2bc192c0384d35cb1821ba2f14f8d33f7135f02f)


Co-authored-by: Boris Malashenko <[email protected]>

Files changed (1) hide show
  1. tokenizer.json +5 -5
tokenizer.json CHANGED
@@ -85,8 +85,8 @@
85
  "pre_tokenizer": {
86
  "type": "Metaspace",
87
  "replacement": "▁",
88
- "prepend_scheme": "always",
89
- "split": true
90
  },
91
  "post_processor": {
92
  "type": "TemplateProcessing",
@@ -172,8 +172,8 @@
172
  "decoder": {
173
  "type": "Metaspace",
174
  "replacement": "▁",
175
- "prepend_scheme": "always",
176
- "split": true
177
  },
178
  "model": {
179
  "type": "Unigram",
@@ -184846,4 +184846,4 @@
184846
  ],
184847
  "byte_fallback": false
184848
  }
184849
- }
 
85
  "pre_tokenizer": {
86
  "type": "Metaspace",
87
  "replacement": "▁",
88
+ "add_prefix_space": true,
89
+ "prepend_scheme": "always"
90
  },
91
  "post_processor": {
92
  "type": "TemplateProcessing",
 
172
  "decoder": {
173
  "type": "Metaspace",
174
  "replacement": "▁",
175
+ "add_prefix_space": true,
176
+ "prepend_scheme": "always"
177
  },
178
  "model": {
179
  "type": "Unigram",
 
184846
  ],
184847
  "byte_fallback": false
184848
  }
184849
+ }