diff --git a/.gitignore b/.gitignore index 8361080..2109ef7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ baselines/log dataset experiment/*/checkpoints experiment/*/wandb +experiment/*/configs Reference_MNIST/checkpoints Reference_MNIST/wandb baselines/wandb diff --git a/baselines/Bert_embedding.ipynb b/baselines/Bert_embedding.ipynb index 5fffe09..1cd5762 100644 --- a/baselines/Bert_embedding.ipynb +++ b/baselines/Bert_embedding.ipynb @@ -279,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -389,7 +389,7 @@ " print('No predict_model or predict_target_cols available!')\n", "\n", " outputs = self.predict_model(**batch)\n", - " pooler_outputs = outputs['pooler_output'] # these are the sentence embedding vectors (768 dim each)\n", + " pooler_outputs = outputs[1] # these are the sentence embedding vectors (768 dim each)\n", " outputs_concated = []\n", " for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n", " outputs_concated.append(torch.concat(list(pooler_outputs[i * len(self.predict_target_cols):(i + 1) * len(self.predict_target_cols)])))\n", @@ -403,16 +403,18 @@ }, { "cell_type": "code", - "execution_count": 172, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']\n", + "Some weights of the model checkpoint at dkqp/AiVENGERS_BERT_FineTuned were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']\n", "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of BertModel were not initialized from the model checkpoint at dkqp/AiVENGERS_BERT_FineTuned and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "GPU available: True (mps), used: True\n", "TPU available: False, using: 0 TPU cores\n", "IPU available: False, using: 0 IPUs\n", @@ -423,7 +425,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Predicting DataLoader 0: 100%|██████████| 334/334 [03:25<00:00, 1.62it/s]\n" + "Predicting DataLoader 0: 100%|██████████| 334/334 [03:13<00:00, 1.73it/s]\n" ] } ], @@ -432,14 +434,14 @@ "\n", "predict_target_cols = ['자격요건', '우대조건', '복지', '회사소개', '주요업무']\n", "\n", - "tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')\n", + "tokenizer = AutoTokenizer.from_pretrained('dkqp/AiVENGERS_BERT_FineTuned')\n", "data_module = HFBertDataModule(\n", " tokenizer=tokenizer,\n", " max_batch_size=15,\n", " predict_target_cols=predict_target_cols,\n", ")\n", "\n", - "model = AutoModel.from_pretrained('bert-base-multilingual-cased')\n", + "model = AutoModel.from_pretrained('dkqp/AiVENGERS_BERT_FineTuned', torchscript=True)\n", "task = HFBertTask(tokenizer=tokenizer, predict_model=model, predict_target_cols=predict_target_cols)\n", "\n", "trainer = pl.Trainer()\n", @@ -450,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": 175, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -459,7 +461,7 @@ "torch.Size([1000, 3840])" ] }, - "execution_count": 175, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -734,6 +736,13 @@ "\n", "hf_trainer.save_model('../models')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/experiment/ch/Bert_embedding_new.ipynb b/experiment/ch/Bert_embedding_new.ipynb index a1efac2..db5b4c1 100644 --- a/experiment/ch/Bert_embedding_new.ipynb +++ b/experiment/ch/Bert_embedding_new.ipynb @@ -410,7 +410,7 @@ "\n", " outputs = self.predict_model(**batch)\n", " # these are the sentence embedding vectors (768 dim each)\n", - " pooler_outputs = outputs['pooler_output']\n", + " pooler_outputs = outputs[1]\n", " outputs_concated = []\n", " for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n", " outputs_concated.append(torch.concat(list(\n", @@ -656,7 +656,7 @@ " tokenizer=tokenizer\n", ")\n", "\n", - "hf_trainer.save_model('../models')\n" + "hf_trainer.save_model('../../models')\n" ] } ], diff --git a/experiment/ej/Bert_embedding_new.ipynb b/experiment/ej/Bert_embedding_new.ipynb index a1efac2..db5b4c1 100644 --- a/experiment/ej/Bert_embedding_new.ipynb +++ b/experiment/ej/Bert_embedding_new.ipynb @@ -410,7 +410,7 @@ "\n", " outputs = self.predict_model(**batch)\n", " # these are the sentence embedding vectors (768 dim each)\n", - " pooler_outputs = outputs['pooler_output']\n", + " pooler_outputs = outputs[1]\n", " outputs_concated = []\n", " for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n", " outputs_concated.append(torch.concat(list(\n", @@ -656,7 +656,7 @@ " tokenizer=tokenizer\n", ")\n", "\n", - "hf_trainer.save_model('../models')\n" + "hf_trainer.save_model('../../models')\n" ] } ], diff --git a/experiment/ih/Bert_embedding_new.ipynb b/experiment/ih/Bert_embedding_new.ipynb index a1efac2..db5b4c1 100644 --- a/experiment/ih/Bert_embedding_new.ipynb +++ b/experiment/ih/Bert_embedding_new.ipynb @@ -410,7 +410,7 @@ "\n", " outputs = self.predict_model(**batch)\n", " # these are the sentence embedding vectors (768 dim each)\n", - " pooler_outputs = outputs['pooler_output']\n", + " pooler_outputs = outputs[1]\n", " outputs_concated = []\n", " for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n", " outputs_concated.append(torch.concat(list(\n", @@ -656,7 +656,7 @@ " tokenizer=tokenizer\n", ")\n", "\n", - "hf_trainer.save_model('../models')\n" + "hf_trainer.save_model('../../models')\n" ] } ], diff --git a/experiment/jh/Bert_embedding_new.ipynb b/experiment/jh/Bert_embedding_new.ipynb index a1efac2..db5b4c1 100644 --- a/experiment/jh/Bert_embedding_new.ipynb +++ b/experiment/jh/Bert_embedding_new.ipynb @@ -410,7 +410,7 @@ "\n", " outputs = self.predict_model(**batch)\n", " # these are the sentence embedding vectors (768 dim each)\n", - " pooler_outputs = outputs['pooler_output']\n", + " pooler_outputs = outputs[1]\n", " outputs_concated = []\n", " for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n", " outputs_concated.append(torch.concat(list(\n", @@ -656,7 +656,7 @@ " tokenizer=tokenizer\n", ")\n", "\n", - "hf_trainer.save_model('../models')\n" + "hf_trainer.save_model('../../models')\n" ] } ], diff --git a/experiment/sl/Bert_embedding_new.ipynb b/experiment/sl/Bert_embedding_new.ipynb index a1efac2..db5b4c1 100644 --- a/experiment/sl/Bert_embedding_new.ipynb +++ b/experiment/sl/Bert_embedding_new.ipynb @@ -410,7 +410,7 @@ "\n", " outputs = self.predict_model(**batch)\n", " # these are the sentence embedding vectors (768 dim each)\n", - " pooler_outputs = outputs['pooler_output']\n", + " pooler_outputs = outputs[1]\n", " outputs_concated = []\n", " for i in range(int(len(pooler_outputs) / len(self.predict_target_cols))):\n", " outputs_concated.append(torch.concat(list(\n", @@ -656,7 +656,7 @@ " tokenizer=tokenizer\n", ")\n", "\n", - "hf_trainer.save_model('../models')\n" + "hf_trainer.save_model('../../models')\n" ] } ], diff --git a/experiment/tg/Bert_embedding_new.ipynb b/experiment/tg/Bert_embedding_new.ipynb index a1efac2..0abc09e 100644 --- a/experiment/tg/Bert_embedding_new.ipynb +++ b/experiment/tg/Bert_embedding_new.ipynb @@ -18,9 +18,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/tglim/miniforge3/envs/aiinfra/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "# Modules About Hydra\n", "import os\n", @@ -71,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -101,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -293,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -429,7 +438,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -458,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -470,9 +479,216 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mltg0513\u001b[0m (\u001b[33maivengersteam\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, + { + "data": { + "text/html": [ + "wandb version 0.15.7 is available! To upgrade, please run:\n", + " $ pip install wandb --upgrade" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.5" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in ./wandb/run-20230728_111907-cl4bt2gr" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run v5_s0 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/aivengersteam/wandb_test" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/aivengersteam/wandb_test/runs/cl4bt2gr" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']\n", + "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']\n", + "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "Token indices sequence length is longer than the specified maximum sequence length for this model (595 > 512). Running this sequence through the model will result in indexing errors\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------------------\n", + "0 | predict_model | BertModel | 177 M \n", + "1 | train_model | BertForMaskedLM | 177 M \n", + "--------------------------------------------------\n", + "355 M Trainable params\n", + "0 Non-trainable params\n", + "355 M Total params\n", + "1,423.312 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 181/181 [04:50<00:00, 1.61s/it, v_num=t2gr, train_loss=0.100] \n", + "Epoch 0, Avg. Training Loss: 0.058 Avg. Validation Loss: 0.046\n", + "Epoch 0: 100%|██████████| 181/181 [05:10<00:00, 1.72s/it, v_num=t2gr, train_loss=0.100, val_loss=0.0455]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 181/181 [05:18<00:00, 1.76s/it, v_num=t2gr, train_loss=0.100, val_loss=0.0455]\n", + "Testing DataLoader 0: 100%|██████████| 25/25 [00:14<00:00, 1.76it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test_loss             0.04557517170906067    │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.04557517170906067 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Waiting for W&B process to finish... (success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "

Run history:


epoch▁▁▁▁█
test_loss
train_avg_loss
train_loss█▅▁
trainer/global_step▁▄▆██
val_avg_loss
val_loss

Run summary:


epoch1
test_loss0.04558
train_avg_loss0.0584
train_loss0.03422
trainer/global_step181
val_avg_loss0.04584
val_loss0.04552

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run v5_s0 at: https://wandb.ai/aivengersteam/wandb_test/runs/cl4bt2gr
Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20230728_111907-cl4bt2gr/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "def generate_train_func(cfg):\n", " def find_key(cfg, query, new_value):\n", @@ -639,7 +855,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -656,7 +872,7 @@ " tokenizer=tokenizer\n", ")\n", "\n", - "hf_trainer.save_model('../models')\n" + "hf_trainer.save_model('../../models')\n" ] } ], diff --git a/experiment/tg/config.yaml b/experiment/tg/config.yaml index d6f1802..1d9e629 100644 --- a/experiment/tg/config.yaml +++ b/experiment/tg/config.yaml @@ -1,4 +1,4 @@ -member_name: eui-jin +member_name: tae-geun universal: tokenizer: @@ -7,14 +7,17 @@ universal: train_target_cols: - 자격요건 - - 직무내용 + - 우대조건 + - 복지 + - 회사소개 + - 주요업무 predict_target_cols: - 자격요건 - - 직무내용 + - 우대조건 data: data_dir: "../../dataset/pre_result_2.csv" - max_batch_size: 64 + max_batch_size: 4 train_test_ratio: 0.9 train_val_ratio: 0.8 sliding_window_interval: 200 @@ -33,7 +36,7 @@ models: task: optimizer: _target_: "torch.optim.AdamW" - lr: 2e-5 + lr: 1e-5 lr_scheduler: scheduler: _target_: "torch.optim.lr_scheduler.CosineAnnealingWarmRestarts" @@ -47,14 +50,14 @@ train: callbacks: checkpoint_callback: monitor: "val_avg_loss" - filename: "best-model-{epoch:02d}-{val_acc:.2f}" - save_top_k: 1 + filename: "best-model-{epoch:02d}-{val_avg_loss:.3f}" + save_top_k: 3 save_last: True mode: "min" early_stop_callback: monitor: "val_avg_loss" min_delta: 0.01 - patience: 3 + patience: 5 verbose: False mode: "min" @@ -69,7 +72,7 @@ train: project: wandb_test trainer: - max_epochs: 1 + max_epochs: 5 sweep: method: "bayes" @@ -78,7 +81,8 @@ sweep: goal: "minimize" parameters: max_batch_size: - values: [4] + values: [8, 16, 32] + lr: [1e-3, 1e-4, 1e-5, 1e-6] early_terminate: type: "hyperband" min_iter: 1 diff --git a/experiment/tg/global.yaml b/experiment/tg/global.yaml index fac8eda..42ac78e 100644 --- a/experiment/tg/global.yaml +++ b/experiment/tg/global.yaml @@ -1,2 +1,2 @@ next_sweep_count: 0 -next_version_count: 0 +next_version_count: 6