update cfgs

InternLM · Oct 9, 2023 · c1e18b9 · c1e18b9
1 parent eae7da8
commit c1e18b9
Show file tree

Hide file tree

Showing 234 changed files with 1,025 additions and 561 deletions.
diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_alpaca_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_alpaca_e3.py
@@ -15,7 +15,7 @@
 from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory
 from xtuner.engine import DatasetInfoHook, EvaluateChatHook
 from xtuner.model import SupervisedFinetune
-from xtuner.utils import PROMPT_TEMPLATE
+from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
 
 #######################################################################
 #                          PART 1  Settings                           #
@@ -25,7 +25,7 @@
 
 # Data
 alpaca_en_path = 'tatsu-lab/alpaca'
-prompt_template = PROMPT_TEMPLATE.alpaca
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -42,6 +42,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 500
+SYSTEM = SYSTEM_TEMPLATE.alpaca
 evaluation_inputs = [
     '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
 ]
@@ -137,7 +138,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_alpaca_enzh_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_alpaca_enzh_e3.py
@@ -16,7 +16,7 @@
                                     template_map_fn_factory)
 from xtuner.engine import DatasetInfoHook, EvaluateChatHook
 from xtuner.model import SupervisedFinetune
-from xtuner.utils import PROMPT_TEMPLATE
+from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
 
 #######################################################################
 #                          PART 1  Settings                           #
@@ -27,7 +27,7 @@
 # Data
 alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese'
 alpaca_en_path = 'tatsu-lab/alpaca'
-prompt_template = PROMPT_TEMPLATE.alpaca
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -44,6 +44,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 500
+SYSTEM = SYSTEM_TEMPLATE.alpaca
 evaluation_inputs = [
     '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
 ]
@@ -155,7 +156,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_alpaca_enzh_oasst1_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_alpaca_enzh_oasst1_e3.py
@@ -16,7 +16,7 @@
                                     oasst1_map_fn, template_map_fn_factory)
 from xtuner.engine import DatasetInfoHook, EvaluateChatHook
 from xtuner.model import SupervisedFinetune
-from xtuner.utils import PROMPT_TEMPLATE
+from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
 
 #######################################################################
 #                          PART 1  Settings                           #
@@ -28,7 +28,7 @@
 alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese'
 alpaca_en_path = 'tatsu-lab/alpaca'
 oasst1_path = 'timdettmers/openassistant-guanaco'
-prompt_template = PROMPT_TEMPLATE.alpaca
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -45,6 +45,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 500
+SYSTEM = SYSTEM_TEMPLATE.alpaca
 evaluation_inputs = [
     '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
 ]
@@ -168,7 +169,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_alpaca_zh_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_alpaca_zh_e3.py
@@ -15,7 +15,7 @@
 from xtuner.dataset.map_fns import alpaca_zh_map_fn, template_map_fn_factory
 from xtuner.engine import DatasetInfoHook, EvaluateChatHook
 from xtuner.model import SupervisedFinetune
-from xtuner.utils import PROMPT_TEMPLATE
+from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
 
 #######################################################################
 #                          PART 1  Settings                           #
@@ -25,7 +25,7 @@
 
 # Data
 alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese'
-prompt_template = PROMPT_TEMPLATE.alpaca
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -42,6 +42,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 500
+SYSTEM = SYSTEM_TEMPLATE.alpaca
 evaluation_inputs = [
     '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
 ]
@@ -137,7 +138,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_arxiv_gentitle_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_arxiv_gentitle_e3.py
@@ -15,7 +15,7 @@
 from xtuner.dataset.map_fns import arxiv_map_fn, template_map_fn_factory
 from xtuner.engine import DatasetInfoHook, EvaluateChatHook
 from xtuner.model import SupervisedFinetune
-from xtuner.utils import PROMPT_TEMPLATE
+from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
 
 #######################################################################
 #                          PART 1  Settings                           #
@@ -27,7 +27,7 @@
 # 1. Download data from https://kaggle.com/datasets/Cornell-University/arxiv
 # 2. Process data by `xtuner preprocess arxiv ${DOWNLOADED_DATA} ./data/arxiv_data.json [optional arguments]`  # noqa: E501
 data_path = './data/arxiv_data.json'
-prompt_template = PROMPT_TEMPLATE.title
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -44,6 +44,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 500
+SYSTEM = SYSTEM_TEMPLATE.arxiv_gentile
 evaluation_inputs = [
     ('We present InternLM, a multilingual foundational language '
      'model with 104B parameters. InternLM is pre-trained on a large '
@@ -172,7 +173,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_code_alpaca_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_code_alpaca_e3.py
@@ -15,7 +15,7 @@
 from xtuner.dataset.map_fns import code_alpaca_map_fn, template_map_fn_factory
 from xtuner.engine import DatasetInfoHook, EvaluateChatHook
 from xtuner.model import SupervisedFinetune
-from xtuner.utils import PROMPT_TEMPLATE
+from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
 
 #######################################################################
 #                          PART 1  Settings                           #
@@ -25,7 +25,7 @@
 
 # Data
 data_path = 'HuggingFaceH4/CodeAlpaca_20K'
-prompt_template = PROMPT_TEMPLATE.coder
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -42,6 +42,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 100
+SYSTEM = SYSTEM_TEMPLATE.coder
 evaluation_inputs = [
     ('写一个Python函数，将十六进制颜色代码（如#0066ee）转换为对应的'
      '红、绿、蓝（RGB）三个颜色分量值，并以元组的形式返回。'),
@@ -141,7 +142,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_colorist_e5.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_colorist_e5.py
@@ -15,7 +15,7 @@
 from xtuner.dataset.map_fns import colors_map_fn, template_map_fn_factory
 from xtuner.engine import DatasetInfoHook, EvaluateChatHook
 from xtuner.model import SupervisedFinetune
-from xtuner.utils import PROMPT_TEMPLATE
+from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
 
 #######################################################################
 #                          PART 1  Settings                           #
@@ -25,7 +25,7 @@
 
 # Data
 data_path = 'burkelibbey/colors'
-prompt_template = PROMPT_TEMPLATE.colorist
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -42,6 +42,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 200
+SYSTEM = SYSTEM_TEMPLATE.colorist
 evaluation_inputs = [
     '请给我一个像天空一样清澈透明的蓝色。', 'Please give me a clear blue like the sky.'
 ]
@@ -137,7 +138,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_lawyer_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_lawyer_e3.py
@@ -17,7 +17,7 @@
                                     template_map_fn_factory)
 from xtuner.engine import DatasetInfoHook, EvaluateChatHook
 from xtuner.model import SupervisedFinetune
-from xtuner.utils import PROMPT_TEMPLATE
+from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
 
 #######################################################################
 #                          PART 1  Settings                           #
@@ -29,7 +29,7 @@
 # download data from https://github.com/LiuHC0428/LAW-GPT
 crime_kg_assitant_path = './data/CrimeKgAssitant清洗后_52k.json'
 law_reference_data_path = './data/训练数据_带法律依据_92k.json'
-prompt_template = PROMPT_TEMPLATE.lawyer
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -46,6 +46,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 500
+SYSTEM = SYSTEM_TEMPLATE.lawyer
 evaluation_inputs = ['请问离婚需要准备什么材料？', '销售鳄鱼皮包违法吗？']
 
 #######################################################################
@@ -163,7 +164,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_oasst1_512_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_oasst1_512_e3.py
@@ -25,7 +25,7 @@
 
 # Data
 data_path = 'timdettmers/openassistant-guanaco'
-prompt_template = PROMPT_TEMPLATE.openassistant
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 512
 pack_to_max_length = False
 
@@ -42,6 +42,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 500
+SYSTEM = ''
 evaluation_inputs = [
     '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
 ]
@@ -137,7 +138,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_oasst1_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_oasst1_e3.py
@@ -25,7 +25,7 @@
 
 # Data
 data_path = 'timdettmers/openassistant-guanaco'
-prompt_template = PROMPT_TEMPLATE.openassistant
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -42,6 +42,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 500
+SYSTEM = ''
 evaluation_inputs = [
     '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
 ]
@@ -137,7 +138,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks

diff --git a/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_open_platypus_e3.py b/xtuner/configs/baichuan/baichuan2_13b_base/baichuan2_13b_base_qlora_open_platypus_e3.py
@@ -15,7 +15,7 @@
 from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory
 from xtuner.engine import DatasetInfoHook, EvaluateChatHook
 from xtuner.model import SupervisedFinetune
-from xtuner.utils import PROMPT_TEMPLATE
+from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
 
 #######################################################################
 #                          PART 1  Settings                           #
@@ -25,7 +25,7 @@
 
 # Data
 data_path = 'garage-bAInd/Open-Platypus'
-prompt_template = PROMPT_TEMPLATE.alpaca
+prompt_template = PROMPT_TEMPLATE.baichuan2_chat
 max_length = 2048
 pack_to_max_length = True
 
@@ -42,6 +42,7 @@
 
 # Evaluate the generation performance during the training
 evaluation_freq = 500
+SYSTEM = SYSTEM_TEMPLATE.alpaca
 evaluation_inputs = [
     '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
 ]
@@ -137,7 +138,8 @@
         tokenizer=tokenizer,
         every_n_iters=evaluation_freq,
         evaluation_inputs=evaluation_inputs,
-        instruction=prompt_template.INSTRUCTION_START)
+        system=SYSTEM,
+        prompt_template=prompt_template)
 ]
 
 # configure default hooks