Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

README.md +13 -13
assets/confusion_matrix_GGU.png +0 -0
assets/confusion_matrix_sentiment.png +0 -0
assets/loss_plot_GGU.png +0 -0
assets/loss_plot_sentiment.png +0 -0
heads/GGU.pth +1 -1
heads/sentiment.pth +1 -1
multi-head-sequence-classification-model-model.pth +1 -1
train.py +12 -4

README.md CHANGED Viewed

@@ -42,7 +42,7 @@ The model is a simple sequence classification model based on hidden output layer
 The backbone of the model is BAAI/bge-m3 with 1024.
-An additional layer of (GGU: 3) is added to the output of the backbone to classify the input sequence.
 Using the provided implementation (in repository) of `MultiHeadClassificationTrainer`.
@@ -231,20 +231,20 @@ def _eval_model(self, dataloader, label_map, sample_key, label_key):
 For evaluation, we used the following metrics: accuracy, precision, recall, f1-score. You can find a detailed classification report here:
 **GGU:**
-|    | index        |   precision |   recall |   f1-score |   support |
-|---:|:-------------|------------:|---------:|-----------:|----------:|
-|  0 | Greeting     |    0.278481 | 0.709677 |   0.4      |        31 |
-|  1 | Gratitude    |    0.428571 | 0.176471 |   0.25     |        34 |
-|  2 | Other        |    0.25     | 0.075    |   0.115385 |        40 |
-|  3 | macro avg    |    0.319017 | 0.320383 |   0.255128 |       105 |
-|  4 | weighted avg |    0.316232 | 0.295238 |   0.243004 |       105 |
 **sentiment:**
 |    | index        |   precision |   recall |   f1-score |   support |
 |---:|:-------------|------------:|---------:|-----------:|----------:|
-|  0 | Positive     |    0.568182 | 0.714286 |   0.632911 |        35 |
-|  1 | Negative     |    0.605263 | 0.821429 |   0.69697  |        28 |
-|  2 | Neutral      |    0.869565 | 0.47619  |   0.615385 |        42 |
-|  3 | macro avg    |    0.681003 | 0.670635 |   0.648422 |       105 |
-|  4 | weighted avg |    0.698624 | 0.647619 |   0.642983 |       105 |

 The backbone of the model is BAAI/bge-m3 with 1024.
+An additional layer of (GGU: 3,sentiment: 3) is added to the output of the backbone to classify the input sequence.
 Using the provided implementation (in repository) of `MultiHeadClassificationTrainer`.
 For evaluation, we used the following metrics: accuracy, precision, recall, f1-score. You can find a detailed classification report here:
 **GGU:**
+|    | index        |   precision |    recall |   f1-score |   support |
+|---:|:-------------|------------:|----------:|-----------:|----------:|
+|  0 | Greeting     |   0.0555556 | 0.03125   |   0.04     |        32 |
+|  1 | Gratitude    |   0.320513  | 0.892857  |   0.471698 |        28 |
+|  2 | Other        |   0.111111  | 0.0222222 |   0.037037 |        45 |
+|  3 | macro avg    |   0.162393  | 0.315443  |   0.182912 |       105 |
+|  4 | weighted avg |   0.15002   | 0.257143  |   0.15385  |       105 |
 **sentiment:**
 |    | index        |   precision |   recall |   f1-score |   support |
 |---:|:-------------|------------:|---------:|-----------:|----------:|
+|  0 | Positive     |    0.653846 | 0.586207 |   0.618182 |        29 |
+|  1 | Negative     |    0.777778 | 0.736842 |   0.756757 |        38 |
+|  2 | Neutral      |    0.72093  | 0.815789 |   0.765432 |        38 |
+|  3 | macro avg    |    0.717518 | 0.712946 |   0.713457 |       105 |
+|  4 | weighted avg |    0.722976 | 0.72381  |   0.721623 |       105 |

assets/confusion_matrix_GGU.png CHANGED Viewed

assets/confusion_matrix_sentiment.png CHANGED Viewed

assets/loss_plot_GGU.png CHANGED Viewed

assets/loss_plot_sentiment.png CHANGED Viewed

heads/GGU.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7addbfb97d15aa2703e981078bd21c32b5f2d1783d3b7227e412301eff6f796e
 size 7552

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff2b644eeb54e3b01ec332e5979b5e98477b4a29464b8db4e2beb29fe1548f27
 size 7552

heads/sentiment.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:964c8089b6a5252343f3d372a8ffb81a545d2c83ba7d00a5a567ff07083e5118
 size 7652

 version https://git-lfs.github.com/spec/v1
+oid sha256:446843989a3b3ca5b766463a54e00f443a928a6a131e7d6e51e5238657b62758
 size 7652

multi-head-sequence-classification-model-model.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23318136d699081f48df309d2daa21a9b5bcd6381f483c6682f609597a094f6a
 size 1135701541

 version https://git-lfs.github.com/spec/v1
+oid sha256:6400c69f1f1efffd158930fc7be49cf7147fe2fba2764bdf3f12d39235060521
 size 1135701541

train.py CHANGED Viewed

@@ -167,14 +167,19 @@ class MultiHeadClassification(nn.Module):
         Returns:
             None
         """
         if head_name in self.heads:
-            self.heads[head_name].load_state_dict(torch.load(path))
-            self.to(self.device)
             return
-        model = torch.load(path)
         assert model['weight'].shape[1] == self.backbone.config.hidden_size
-        self.heads[head_name] = nn.Linear(self.backbone.config.hidden_size, model['weight'].shape[0])
         self.heads[head_name].load_state_dict(model)
         self.to(self.torch_dtype).to(self.device)
@@ -286,6 +291,7 @@ class MultiHeadClassification(nn.Module):
         """
         self.heads[head_name] = nn.Linear(self.backbone.config.hidden_size, num_classes)
         self.heads[head_name].to(self.torch_dtype).to(self.device)
     def remove_head(self, head_name):
         """
@@ -294,6 +300,7 @@ class MultiHeadClassification(nn.Module):
         if head_name not in self.heads:
             raise ValueError(f'Head {head_name} not found')
         del self.heads[head_name]
     @classmethod
     def from_pretrained(cls, model_name, head_config=None, dropout=0.1, l2_reg=0.01):
@@ -331,6 +338,7 @@ class MultiHeadClassification(nn.Module):
         backbone = AutoModel.from_pretrained(os.path.join(model_path, 'pretrained/backbone.pth'))
         instance = cls(backbone, head_config, dropout, l2_reg)
         instance.load(os.path.join(model_path, 'pretrained/model.pth'))
         return instance
 class MultiHeadClassificationTrainer:

         Returns:
             None
         """
+        model = torch.load(path)
         if head_name in self.heads:
+            num_classes = model['weight'].shape[0]
+            self.heads[head_name].load_state_dict(model)
+            self.to(self.torch_dtype).to(self.device)
+            self.head_config[head_name] = num_classes
             return
         assert model['weight'].shape[1] == self.backbone.config.hidden_size
+        num_classes = model['weight'].shape[0]
+        self.heads[head_name] = nn.Linear(self.backbone.config.hidden_size, num_classes)
         self.heads[head_name].load_state_dict(model)
+        self.head_config[head_name] = num_classes
         self.to(self.torch_dtype).to(self.device)
         """
         self.heads[head_name] = nn.Linear(self.backbone.config.hidden_size, num_classes)
         self.heads[head_name].to(self.torch_dtype).to(self.device)
+        self.head_config[head_name] = num_classes
     def remove_head(self, head_name):
         """
         if head_name not in self.heads:
             raise ValueError(f'Head {head_name} not found')
         del self.heads[head_name]
+        del self.head_config[head_name]
     @classmethod
     def from_pretrained(cls, model_name, head_config=None, dropout=0.1, l2_reg=0.01):
         backbone = AutoModel.from_pretrained(os.path.join(model_path, 'pretrained/backbone.pth'))
         instance = cls(backbone, head_config, dropout, l2_reg)
         instance.load(os.path.join(model_path, 'pretrained/model.pth'))
+        instance.head_config = {k: v. instance.heads}
         return instance
 class MultiHeadClassificationTrainer: