KurtMica commited on
Commit
11118de
1 Parent(s): b0f8c26

Model files.

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.th filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - allennlp
4
+ ---
5
+
6
+ # TODO: Fill this model card
best.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbc539b197a21a12e9776bac545da7cc54b3b7eb2bfec740980c8c21dc602df
3
+ size 504187727
config.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "multitask",
4
+ "readers": {
5
+ "pos": {
6
+ "type": "mlrs_pos",
7
+ "language_specific": true,
8
+ "token_indexers": {
9
+ "transformer": {
10
+ "type": "pretrained_transformer_mismatched",
11
+ "max_length": 512,
12
+ "model_name": "MLRS/BERTu"
13
+ }
14
+ }
15
+ }
16
+ }
17
+ },
18
+ "model": {
19
+ "type": "multitask",
20
+ "arg_name_mapping": {
21
+ "backbone": {
22
+ "tokens": "text",
23
+ "words": "text"
24
+ }
25
+ },
26
+ "backbone": {
27
+ "type": "embedder_and_mask",
28
+ "text_field_embedder": {
29
+ "token_embedders": {
30
+ "transformer": {
31
+ "type": "pretrained_transformer_mismatched_with_dropout",
32
+ "last_layer_only": false,
33
+ "layer_dropout": 0.1,
34
+ "max_length": 512,
35
+ "model_name": "MLRS/BERTu",
36
+ "tokenizer_kwargs": {},
37
+ "train_parameters": true
38
+ }
39
+ }
40
+ }
41
+ },
42
+ "heads": {
43
+ "pos": {
44
+ "type": "linear_tagger",
45
+ "dropout": 0.3,
46
+ "encoder": {
47
+ "type": "pass_through",
48
+ "input_dim": 768
49
+ },
50
+ "initializer": {
51
+ "regexes": [
52
+ [
53
+ ".*projection.*weight",
54
+ {
55
+ "type": "xavier_uniform"
56
+ }
57
+ ],
58
+ [
59
+ ".*projection.*bias",
60
+ {
61
+ "type": "zero"
62
+ }
63
+ ],
64
+ [
65
+ ".*tag_bilinear.*weight",
66
+ {
67
+ "type": "xavier_uniform"
68
+ }
69
+ ],
70
+ [
71
+ ".*tag_bilinear.*bias",
72
+ {
73
+ "type": "zero"
74
+ }
75
+ ],
76
+ [
77
+ ".*weight_ih.*",
78
+ {
79
+ "type": "xavier_uniform"
80
+ }
81
+ ],
82
+ [
83
+ ".*weight_hh.*",
84
+ {
85
+ "type": "orthogonal"
86
+ }
87
+ ],
88
+ [
89
+ ".*bias_ih.*",
90
+ {
91
+ "type": "zero"
92
+ }
93
+ ],
94
+ [
95
+ ".*bias_hh.*",
96
+ {
97
+ "type": "lstm_hidden_bias"
98
+ }
99
+ ]
100
+ ]
101
+ }
102
+ }
103
+ }
104
+ },
105
+ "train_data_path": {
106
+ "pos": "MLRS POS Gold/mlrs_pos-train.tsv"
107
+ },
108
+ "validation_data_path": {
109
+ "pos": "MLRS POS Gold/mlrs_pos-dev.tsv"
110
+ },
111
+ "trainer": {
112
+ "callbacks": [
113
+ {
114
+ "tensorboard_writer": {
115
+ "should_log_learning_rate": true,
116
+ "should_log_parameter_statistics": true
117
+ },
118
+ "type": "tensorboard"
119
+ }
120
+ ],
121
+ "cuda_device": 0,
122
+ "grad_norm": 5,
123
+ "learning_rate_scheduler": {
124
+ "type": "ulmfit_sqrt",
125
+ "affected_group_count": 2,
126
+ "decay_factor": 0.05,
127
+ "discriminative_fine_tuning": true,
128
+ "factor": 5,
129
+ "gradual_unfreezing": true,
130
+ "model_size": 1,
131
+ "start_step": 39,
132
+ "warmup_steps": 39
133
+ },
134
+ "num_epochs": 200,
135
+ "optimizer": {
136
+ "type": "huggingface_adamw",
137
+ "betas": [
138
+ 0.9,
139
+ 0.999
140
+ ],
141
+ "correct_bias": false,
142
+ "lr": 0.0005,
143
+ "parameter_groups": [
144
+ [
145
+ [
146
+ "text_field_embedder.*transformer_model.embeddings.*_embeddings.*",
147
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).weight"
148
+ ],
149
+ {}
150
+ ],
151
+ [
152
+ [
153
+ "text_field_embedder.*transformer_model.embeddings.LayerNorm.*",
154
+ "text_field_embedder.*transformer_model.encoder.*.output.LayerNorm.*",
155
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).bias",
156
+ "text_field_embedder.*transformer_model.pooler.dense.bias"
157
+ ],
158
+ {
159
+ "weight_decay": 0
160
+ }
161
+ ],
162
+ [
163
+ [
164
+ "text_field_embedder.*._scalar_mix.*",
165
+ "text_field_embedder.*transformer_model.pooler.dense.weight",
166
+ "_head_sentinel",
167
+ "head_arc_feedforward._linear_layers.*.weight",
168
+ "child_arc_feedforward._linear_layers.*.weight",
169
+ "head_tag_feedforward._linear_layers.*.weight",
170
+ "child_tag_feedforward._linear_layers.*.weight",
171
+ "arc_attention._weight_matrix",
172
+ "tag_bilinear.weight",
173
+ "tag_projection_layer._module.weight",
174
+ "crf",
175
+ "linear.weight",
176
+ "tagger_linear.weight"
177
+ ],
178
+ {}
179
+ ],
180
+ [
181
+ [
182
+ "head_arc_feedforward._linear_layers.*.bias",
183
+ "child_arc_feedforward._linear_layers.*.bias",
184
+ "head_tag_feedforward._linear_layers.*.bias",
185
+ "child_tag_feedforward._linear_layers.*.bias",
186
+ "arc_attention._bias",
187
+ "tag_bilinear.bias",
188
+ "tag_projection_layer._module.bias",
189
+ "linear.bias",
190
+ "tagger_linear.bias"
191
+ ],
192
+ {
193
+ "weight_decay": 0
194
+ }
195
+ ]
196
+ ],
197
+ "weight_decay": 0.01
198
+ },
199
+ "patience": 20,
200
+ "validation_metric": [
201
+ "+pos_accuracy"
202
+ ]
203
+ },
204
+ "data_loader": {
205
+ "type": "multitask",
206
+ "scheduler": {
207
+ "type": "unbalanced_homogeneous_roundrobin",
208
+ "batch_size": 128,
209
+ "dataset_sizes": {
210
+ "pos": 4935
211
+ }
212
+ },
213
+ "shuffle": true
214
+ },
215
+ "numpy_seed": 1337,
216
+ "pytorch_seed": 133,
217
+ "random_seed": 13370,
218
+ "validation_data_loader": {
219
+ "type": "multitask",
220
+ "scheduler": {
221
+ "type": "homogeneous_roundrobin",
222
+ "batch_size": 128
223
+ },
224
+ "shuffle": true
225
+ }
226
+ }
log/train/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aee610111be68267533e274ec2043701c0551d621dbcd1584fb94e9b48d0f63
3
+ size 2612432
log/validation/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9179d2186c523786665d18865ba96699609822380230c906b79816a7231081b5
3
+ size 4630
metrics.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_epoch": 10,
3
+ "peak_worker_0_memory_MB": 4651.65234375,
4
+ "peak_gpu_0_memory_MB": 20491.86572265625,
5
+ "training_duration": "0:05:16.923942",
6
+ "training_start_epoch": 0,
7
+ "training_epochs": 29,
8
+ "epoch": 29,
9
+ "training_pos_accuracy": 0.9999208681890119,
10
+ "training_pos_accuracy_words_only": 0.9999208681890119,
11
+ "training_loss": 0.0004258193467439224,
12
+ "training_worker_0_memory_MB": 4651.65234375,
13
+ "training_gpu_0_memory_MB": 20491.86572265625,
14
+ "validation_pos_accuracy": 0.9899515204936096,
15
+ "validation_pos_accuracy_words_only": 0.9899515204936096,
16
+ "validation_loss": 0.08878542482852936,
17
+ "best_validation_pos_accuracy": 0.990304098721904,
18
+ "best_validation_pos_accuracy_words_only": 0.990304098721904,
19
+ "best_validation_loss": 0.06906605958938598
20
+ }
vocabulary/.lock ADDED
File without changes
vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *labels
2
+ *tags
vocabulary/pos.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @@UNKNOWN@@
2
+ NOUN
3
+ X_PUN
4
+ VERB
5
+ DEF
6
+ PREP
7
+ ADJ
8
+ NOUN_PROP
9
+ CONJ_CORD
10
+ COMP
11
+ PREP_DEF
12
+ ADV
13
+ KIEN
14
+ GEN_DEF
15
+ CONJ_SUB
16
+ GEN
17
+ PRON_INT
18
+ PRON_DEM
19
+ PART_PASS
20
+ VERB_PSEU
21
+ NEG
22
+ QUAN
23
+ PRON_PERS
24
+ X_DIG
25
+ X_ABV
26
+ PREP_PRON
27
+ GEN_PRON
28
+ FOC
29
+ PROG
30
+ PRON_INDEF
31
+ X_ENG
32
+ HEMM
33
+ FUT
34
+ NUM_CRD
35
+ LIL_DEF
36
+ PRON_PERS_NEG
37
+ PART_ACT
38
+ NUM_WHD
39
+ LIL
40
+ NUM_ORD
41
+ INT
42
+ X_BOR
43
+ PRON_REF
44
+ X_FOR
45
+ LIL_PRON
46
+ PRON_DEM_DEF
47
+ NUM_FRC
48
+ PRON_REC