File tree 2 files changed +10
-3
lines changed
docs/examples/mix_datasets
2 files changed +10
-3
lines changed Original file line number Diff line number Diff line change 9
9
output_dir = Path (__file__ ).parent .joinpath ("output" )
10
10
output_dir .mkdir (exist_ok = True )
11
11
12
+ system_prompt = "You are a helpful assistant."
13
+
12
14
concatenate_recipe_yaml = Path (__file__ ).parent .joinpath ("concatenate_recipe.yaml" )
13
15
concatenated_output_jsonl = output_dir .joinpath ("concatenated.jsonl" )
14
- mix_datasets (concatenate_recipe_yaml , concatenated_output_jsonl )
16
+ mix_datasets (
17
+ concatenate_recipe_yaml , concatenated_output_jsonl , system_prompt = system_prompt
18
+ )
15
19
16
20
weighted_recipe_yaml = Path (__file__ ).parent .joinpath ("weighted_recipe.yaml" )
17
21
weighted_output_jsonl = output_dir .joinpath ("weighted.jsonl" )
18
- mix_datasets (weighted_recipe_yaml , weighted_output_jsonl )
22
+ mix_datasets (weighted_recipe_yaml , weighted_output_jsonl , system_prompt = system_prompt )
Original file line number Diff line number Diff line change @@ -603,8 +603,9 @@ def mix_datasets(
603
603
recipe_file : str ,
604
604
output_file : str ,
605
605
num_proc : Optional [int ] = 8 ,
606
+ system_prompt : Optional [str ] = None ,
606
607
):
607
- recipe = Recipe (recipe_file )
608
+ recipe = Recipe (recipe_file , system_prompt )
608
609
if recipe .datasets :
609
610
recipe .save_mixed_dataset (output_file , num_proc )
610
611
else :
@@ -719,10 +720,12 @@ def generate_data(
719
720
mix_datasets (
720
721
recipe_file = f"{ output_dir } /skills_recipe_{ date_suffix } .yaml" ,
721
722
output_file = f"{ output_dir } /skills_train_msgs_{ date_suffix } .jsonl" ,
723
+ system_prompt = system_prompt ,
722
724
)
723
725
mix_datasets (
724
726
recipe_file = f"{ output_dir } /knowledge_recipe_{ date_suffix } .yaml" ,
725
727
output_file = f"{ output_dir } /knowledge_train_msgs_{ date_suffix } .jsonl" ,
728
+ system_prompt = system_prompt ,
726
729
)
727
730
728
731
generate_duration = time .time () - generate_start
You can’t perform that action at this time.
0 commit comments