Skip to content

Commit

Permalink
Merge pull request #27 from KDT-AiVENGERS/chore/#26
Browse files Browse the repository at this point in the history
[#26]CHORE: fix done
  • Loading branch information
dkqp authored Jul 28, 2023
2 parents 26afd10 + e5049f1 commit e64686c
Show file tree
Hide file tree
Showing 10 changed files with 271 additions and 41 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ baselines/log
dataset
experiment/*/checkpoints
experiment/*/wandb
experiment/*/configs
Reference_MNIST/checkpoints
Reference_MNIST/wandb
baselines/wandb
Expand Down
27 changes: 18 additions & 9 deletions baselines/Bert_embedding.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -389,7 +389,7 @@
" print('No predict_model or predict_target_cols available!')\n",
"\n",
" outputs = self.predict_model(**batch)\n",
" pooler_outputs = outputs['pooler_output'] # these are the sentence embedding vectors (768 dim each)\n",
" pooler_outputs = outputs[1] # these are the sentence embedding vectors (768 dim each)\n",
" outputs_concated = []\n",
" for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n",
" outputs_concated.append(torch.concat(list(pooler_outputs[i * len(self.predict_target_cols):(i + 1) * len(self.predict_target_cols)])))\n",
Expand All @@ -403,16 +403,18 @@
},
{
"cell_type": "code",
"execution_count": 172,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']\n",
"Some weights of the model checkpoint at dkqp/AiVENGERS_BERT_FineTuned were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']\n",
"- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of BertModel were not initialized from the model checkpoint at dkqp/AiVENGERS_BERT_FineTuned and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"GPU available: True (mps), used: True\n",
"TPU available: False, using: 0 TPU cores\n",
"IPU available: False, using: 0 IPUs\n",
Expand All @@ -423,7 +425,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Predicting DataLoader 0: 100%|██████████| 334/334 [03:25<00:00, 1.62it/s]\n"
"Predicting DataLoader 0: 100%|██████████| 334/334 [03:13<00:00, 1.73it/s]\n"
]
}
],
Expand All @@ -432,14 +434,14 @@
"\n",
"predict_target_cols = ['자격요건', '우대조건', '복지', '회사소개', '주요업무']\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')\n",
"tokenizer = AutoTokenizer.from_pretrained('dkqp/AiVENGERS_BERT_FineTuned')\n",
"data_module = HFBertDataModule(\n",
" tokenizer=tokenizer,\n",
" max_batch_size=15,\n",
" predict_target_cols=predict_target_cols,\n",
")\n",
"\n",
"model = AutoModel.from_pretrained('bert-base-multilingual-cased')\n",
"model = AutoModel.from_pretrained('dkqp/AiVENGERS_BERT_FineTuned', torchscript=True)\n",
"task = HFBertTask(tokenizer=tokenizer, predict_model=model, predict_target_cols=predict_target_cols)\n",
"\n",
"trainer = pl.Trainer()\n",
Expand All @@ -450,7 +452,7 @@
},
{
"cell_type": "code",
"execution_count": 175,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand All @@ -459,7 +461,7 @@
"torch.Size([1000, 3840])"
]
},
"execution_count": 175,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -734,6 +736,13 @@
"\n",
"hf_trainer.save_model('../models')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
4 changes: 2 additions & 2 deletions experiment/ch/Bert_embedding_new.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@
"\n",
" outputs = self.predict_model(**batch)\n",
" # these are the sentence embedding vectors (768 dim each)\n",
" pooler_outputs = outputs['pooler_output']\n",
" pooler_outputs = outputs[1]\n",
" outputs_concated = []\n",
" for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n",
" outputs_concated.append(torch.concat(list(\n",
Expand Down Expand Up @@ -656,7 +656,7 @@
" tokenizer=tokenizer\n",
")\n",
"\n",
"hf_trainer.save_model('../models')\n"
"hf_trainer.save_model('../../models')\n"
]
}
],
Expand Down
4 changes: 2 additions & 2 deletions experiment/ej/Bert_embedding_new.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@
"\n",
" outputs = self.predict_model(**batch)\n",
" # these are the sentence embedding vectors (768 dim each)\n",
" pooler_outputs = outputs['pooler_output']\n",
" pooler_outputs = outputs[1]\n",
" outputs_concated = []\n",
" for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n",
" outputs_concated.append(torch.concat(list(\n",
Expand Down Expand Up @@ -656,7 +656,7 @@
" tokenizer=tokenizer\n",
")\n",
"\n",
"hf_trainer.save_model('../models')\n"
"hf_trainer.save_model('../../models')\n"
]
}
],
Expand Down
4 changes: 2 additions & 2 deletions experiment/ih/Bert_embedding_new.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@
"\n",
" outputs = self.predict_model(**batch)\n",
" # these are the sentence embedding vectors (768 dim each)\n",
" pooler_outputs = outputs['pooler_output']\n",
" pooler_outputs = outputs[1]\n",
" outputs_concated = []\n",
" for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n",
" outputs_concated.append(torch.concat(list(\n",
Expand Down Expand Up @@ -656,7 +656,7 @@
" tokenizer=tokenizer\n",
")\n",
"\n",
"hf_trainer.save_model('../models')\n"
"hf_trainer.save_model('../../models')\n"
]
}
],
Expand Down
4 changes: 2 additions & 2 deletions experiment/jh/Bert_embedding_new.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@
"\n",
" outputs = self.predict_model(**batch)\n",
" # these are the sentence embedding vectors (768 dim each)\n",
" pooler_outputs = outputs['pooler_output']\n",
" pooler_outputs = outputs[1]\n",
" outputs_concated = []\n",
" for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n",
" outputs_concated.append(torch.concat(list(\n",
Expand Down Expand Up @@ -656,7 +656,7 @@
" tokenizer=tokenizer\n",
")\n",
"\n",
"hf_trainer.save_model('../models')\n"
"hf_trainer.save_model('../../models')\n"
]
}
],
Expand Down
4 changes: 2 additions & 2 deletions experiment/sl/Bert_embedding_new.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@
"\n",
" outputs = self.predict_model(**batch)\n",
" # these are the sentence embedding vectors (768 dim each)\n",
" pooler_outputs = outputs['pooler_output']\n",
" pooler_outputs = outputs[1]\n",
" outputs_concated = []\n",
" for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n",
" outputs_concated.append(torch.concat(list(\n",
Expand Down Expand Up @@ -656,7 +656,7 @@
" tokenizer=tokenizer\n",
")\n",
"\n",
"hf_trainer.save_model('../models')\n"
"hf_trainer.save_model('../../models')\n"
]
}
],
Expand Down
Loading

0 comments on commit e64686c

Please sign in to comment.