Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for no_trainer and fix existing examples #16656

Merged
merged 9 commits into from
Apr 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ jobs:
- run: pip install --upgrade pip
- run: pip install .[sklearn,torch,sentencepiece,testing,torch-speech]
- run: pip install -r examples/pytorch/_tests_requirements.txt
- run: pip install git+https://github.com/huggingface/accelerate
- save_cache:
key: v0.4-torch_examples-{{ checksum "setup.py" }}
paths:
Expand Down
14 changes: 12 additions & 2 deletions examples/pytorch/language-modeling/run_clm_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.

import argparse
import json
import logging
import math
import os
Expand Down Expand Up @@ -537,7 +538,10 @@ def group_texts(examples):

if isinstance(checkpointing_steps, int):
if completed_steps % checkpointing_steps == 0:
accelerator.save_state(f"step_{completed_steps}")
output_dir = f"step_{completed_steps}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if completed_steps >= args.max_train_steps:
break
Expand Down Expand Up @@ -581,7 +585,10 @@ def group_texts(examples):
)

if args.checkpointing_steps == "epoch":
accelerator.save_state(f"epoch_{epoch}")
output_dir = f"epoch_{epoch}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -592,6 +599,9 @@ def group_texts(examples):
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)

with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"perplexity": perplexity}, f)


if __name__ == "__main__":
main()
22 changes: 17 additions & 5 deletions examples/pytorch/language-modeling/run_mlm_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
# You can also adapt this script on your own mlm task. Pointers for this are left as comments.

import argparse
import json
import logging
import math
import os
Expand Down Expand Up @@ -457,9 +458,11 @@ def group_texts(examples):
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["validation"]

# Log a few random samples from the training set:
for index in random.sample(range(len(train_dataset)), 3):
logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")
# Conditional for small test subsets
if len(train_dataset) > 3:
# Log a few random samples from the training set:
for index in random.sample(range(len(train_dataset)), 3):
logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

# Data collator
# This one will take care of randomly masking the tokens.
Expand Down Expand Up @@ -581,7 +584,10 @@ def group_texts(examples):

if isinstance(checkpointing_steps, int):
if completed_steps % checkpointing_steps == 0:
accelerator.save_state(f"step_{completed_steps}")
output_dir = f"step_{completed_steps}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if completed_steps >= args.max_train_steps:
break
Expand Down Expand Up @@ -625,7 +631,10 @@ def group_texts(examples):
)

if args.checkpointing_steps == "epoch":
accelerator.save_state(f"epoch_{epoch}")
output_dir = f"epoch_{epoch}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -636,6 +645,9 @@ def group_texts(examples):
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)

with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"perplexity": perplexity}, f)


if __name__ == "__main__":
main()
14 changes: 13 additions & 1 deletion examples/pytorch/multiple-choice/run_swag_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# You can also adapt this script on your own multiple choice task. Pointers for this are left as comments.

import argparse
import json
import logging
import math
import os
Expand Down Expand Up @@ -540,7 +541,10 @@ def preprocess_function(examples):

if isinstance(checkpointing_steps, int):
if completed_steps % checkpointing_steps == 0:
accelerator.save_state(f"step_{completed_steps}")
output_dir = f"step_{completed_steps}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if completed_steps >= args.max_train_steps:
break
Expand Down Expand Up @@ -578,6 +582,12 @@ def preprocess_function(examples):
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

if args.checkpointing_steps == "epoch":
output_dir = f"epoch_{epoch}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if args.output_dir is not None:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
Expand All @@ -586,6 +596,8 @@ def preprocess_function(examples):
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"eval_accuracy": eval_metric["accuracy"]}, f)


if __name__ == "__main__":
Expand Down
17 changes: 13 additions & 4 deletions examples/pytorch/question-answering/run_qa_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# You can also adapt this script on your own question answering task. Pointers for this are left as comments.

import argparse
import json
import logging
import math
import os
Expand Down Expand Up @@ -783,11 +784,20 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):

if isinstance(checkpointing_steps, int):
if completed_steps % checkpointing_steps == 0:
accelerator.save_state(f"step_{completed_steps}")
output_dir = f"step_{completed_steps}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if completed_steps >= args.max_train_steps:
break

if args.checkpointing_steps == "epoch":
output_dir = f"epoch_{epoch}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
Expand Down Expand Up @@ -879,9 +889,6 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):

accelerator.log(log, step=completed_steps)

if args.checkpointing_steps == "epoch":
accelerator.save_state(f"epoch_{epoch}")

if args.output_dir is not None:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
Expand All @@ -890,6 +897,8 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"eval_f1": eval_metric["f1"], "eval_exact": eval_metric["exact"]}, f)


if __name__ == "__main__":
Expand Down
21 changes: 19 additions & 2 deletions examples/pytorch/summarization/run_summarization_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# You can also adapt this script on your own summarization task. Pointers for this are left as comments.

import argparse
import json
import logging
import math
import os
Expand Down Expand Up @@ -602,7 +603,10 @@ def postprocess_text(preds, labels):

if isinstance(checkpointing_steps, int):
if completed_steps % checkpointing_steps == 0:
accelerator.save_state(f"step_{completed_steps}")
output_dir = f"step_{completed_steps}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if completed_steps >= args.max_train_steps:
break
Expand Down Expand Up @@ -669,7 +673,10 @@ def postprocess_text(preds, labels):
)

if args.checkpointing_steps == "epoch":
accelerator.save_state(f"epoch_{epoch}")
output_dir = f"epoch_{epoch}"
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -679,6 +686,16 @@ def postprocess_text(preds, labels):
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump(
{
"eval_rouge1": result["rouge1"],
"eval_rouge2": result["rouge2"],
"eval_rougeL": result["rougeL"],
"eval_rougeLsum": result["rougeLsum"],
},
f,
)


if __name__ == "__main__":
Expand Down
Loading