Skip to content

Commit

Permalink
update cfgs
Browse files Browse the repository at this point in the history
  • Loading branch information
LZHgrla committed Oct 9, 2023
1 parent eae7da8 commit c1e18b9
Show file tree
Hide file tree
Showing 234 changed files with 1,025 additions and 561 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory
from xtuner.engine import DatasetInfoHook, EvaluateChatHook
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE

#######################################################################
# PART 1 Settings #
Expand All @@ -25,7 +25,7 @@

# Data
alpaca_en_path = 'tatsu-lab/alpaca'
prompt_template = PROMPT_TEMPLATE.alpaca
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -42,6 +42,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = SYSTEM_TEMPLATE.alpaca
evaluation_inputs = [
'请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
]
Expand Down Expand Up @@ -137,7 +138,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
template_map_fn_factory)
from xtuner.engine import DatasetInfoHook, EvaluateChatHook
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE

#######################################################################
# PART 1 Settings #
Expand All @@ -27,7 +27,7 @@
# Data
alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese'
alpaca_en_path = 'tatsu-lab/alpaca'
prompt_template = PROMPT_TEMPLATE.alpaca
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -44,6 +44,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = SYSTEM_TEMPLATE.alpaca
evaluation_inputs = [
'请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
]
Expand Down Expand Up @@ -155,7 +156,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
oasst1_map_fn, template_map_fn_factory)
from xtuner.engine import DatasetInfoHook, EvaluateChatHook
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE

#######################################################################
# PART 1 Settings #
Expand All @@ -28,7 +28,7 @@
alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese'
alpaca_en_path = 'tatsu-lab/alpaca'
oasst1_path = 'timdettmers/openassistant-guanaco'
prompt_template = PROMPT_TEMPLATE.alpaca
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -45,6 +45,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = SYSTEM_TEMPLATE.alpaca
evaluation_inputs = [
'请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
]
Expand Down Expand Up @@ -168,7 +169,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from xtuner.dataset.map_fns import alpaca_zh_map_fn, template_map_fn_factory
from xtuner.engine import DatasetInfoHook, EvaluateChatHook
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE

#######################################################################
# PART 1 Settings #
Expand All @@ -25,7 +25,7 @@

# Data
alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese'
prompt_template = PROMPT_TEMPLATE.alpaca
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -42,6 +42,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = SYSTEM_TEMPLATE.alpaca
evaluation_inputs = [
'请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
]
Expand Down Expand Up @@ -137,7 +138,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from xtuner.dataset.map_fns import arxiv_map_fn, template_map_fn_factory
from xtuner.engine import DatasetInfoHook, EvaluateChatHook
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE

#######################################################################
# PART 1 Settings #
Expand All @@ -27,7 +27,7 @@
# 1. Download data from https://kaggle.com/datasets/Cornell-University/arxiv
# 2. Process data by `xtuner preprocess arxiv ${DOWNLOADED_DATA} ./data/arxiv_data.json [optional arguments]` # noqa: E501
data_path = './data/arxiv_data.json'
prompt_template = PROMPT_TEMPLATE.title
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -44,6 +44,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = SYSTEM_TEMPLATE.arxiv_gentile
evaluation_inputs = [
('We present InternLM, a multilingual foundational language '
'model with 104B parameters. InternLM is pre-trained on a large '
Expand Down Expand Up @@ -172,7 +173,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from xtuner.dataset.map_fns import code_alpaca_map_fn, template_map_fn_factory
from xtuner.engine import DatasetInfoHook, EvaluateChatHook
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE

#######################################################################
# PART 1 Settings #
Expand All @@ -25,7 +25,7 @@

# Data
data_path = 'HuggingFaceH4/CodeAlpaca_20K'
prompt_template = PROMPT_TEMPLATE.coder
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -42,6 +42,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 100
SYSTEM = SYSTEM_TEMPLATE.coder
evaluation_inputs = [
('写一个Python函数,将十六进制颜色代码(如#0066ee)转换为对应的'
'红、绿、蓝(RGB)三个颜色分量值,并以元组的形式返回。'),
Expand Down Expand Up @@ -141,7 +142,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from xtuner.dataset.map_fns import colors_map_fn, template_map_fn_factory
from xtuner.engine import DatasetInfoHook, EvaluateChatHook
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE

#######################################################################
# PART 1 Settings #
Expand All @@ -25,7 +25,7 @@

# Data
data_path = 'burkelibbey/colors'
prompt_template = PROMPT_TEMPLATE.colorist
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -42,6 +42,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 200
SYSTEM = SYSTEM_TEMPLATE.colorist
evaluation_inputs = [
'请给我一个像天空一样清澈透明的蓝色。', 'Please give me a clear blue like the sky.'
]
Expand Down Expand Up @@ -137,7 +138,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
template_map_fn_factory)
from xtuner.engine import DatasetInfoHook, EvaluateChatHook
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE

#######################################################################
# PART 1 Settings #
Expand All @@ -29,7 +29,7 @@
# download data from https://github.com/LiuHC0428/LAW-GPT
crime_kg_assitant_path = './data/CrimeKgAssitant清洗后_52k.json'
law_reference_data_path = './data/训练数据_带法律依据_92k.json'
prompt_template = PROMPT_TEMPLATE.lawyer
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -46,6 +46,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = SYSTEM_TEMPLATE.lawyer
evaluation_inputs = ['请问离婚需要准备什么材料?', '销售鳄鱼皮包违法吗?']

#######################################################################
Expand Down Expand Up @@ -163,7 +164,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

# Data
data_path = 'timdettmers/openassistant-guanaco'
prompt_template = PROMPT_TEMPLATE.openassistant
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 512
pack_to_max_length = False

Expand All @@ -42,6 +42,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = ''
evaluation_inputs = [
'请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
]
Expand Down Expand Up @@ -137,7 +138,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

# Data
data_path = 'timdettmers/openassistant-guanaco'
prompt_template = PROMPT_TEMPLATE.openassistant
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -42,6 +42,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = ''
evaluation_inputs = [
'请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
]
Expand Down Expand Up @@ -137,7 +138,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory
from xtuner.engine import DatasetInfoHook, EvaluateChatHook
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE

#######################################################################
# PART 1 Settings #
Expand All @@ -25,7 +25,7 @@

# Data
data_path = 'garage-bAInd/Open-Platypus'
prompt_template = PROMPT_TEMPLATE.alpaca
prompt_template = PROMPT_TEMPLATE.baichuan2_chat
max_length = 2048
pack_to_max_length = True

Expand All @@ -42,6 +42,7 @@

# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = SYSTEM_TEMPLATE.alpaca
evaluation_inputs = [
'请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai'
]
Expand Down Expand Up @@ -137,7 +138,8 @@
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
instruction=prompt_template.INSTRUCTION_START)
system=SYSTEM,
prompt_template=prompt_template)
]

# configure default hooks
Expand Down
Loading

0 comments on commit c1e18b9

Please sign in to comment.