From 696f30581e82a02d931812cc1a6b1e7faa0b613d Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Tue, 26 Mar 2024 16:45:10 +0800 Subject: [PATCH 1/2] try except sp --- paddlenlp/transformers/__init__.py | 24 +++++++++++++--------- paddlenlp/transformers/gpt/modeling.py | 18 +++++++++------- paddlenlp/transformers/gpt/modeling_pp.py | 10 ++++++--- paddlenlp/transformers/llama/modeling.py | 17 ++++++++------- paddlenlp/transformers/mixtral/modeling.py | 17 ++++++++------- 5 files changed, 52 insertions(+), 34 deletions(-) diff --git a/paddlenlp/transformers/__init__.py b/paddlenlp/transformers/__init__.py index f58b3d837191..918d2c640aad 100644 --- a/paddlenlp/transformers/__init__.py +++ b/paddlenlp/transformers/__init__.py @@ -29,16 +29,20 @@ from .feature_extraction_utils import BatchFeature, FeatureExtractionMixin from .image_processing_utils import ImageProcessingMixin from .attention_utils import create_bigbird_rand_mask_idx_list -from paddle.distributed.fleet.utils.sequence_parallel_utils import ( - GatherOp, - ScatterOp, - AllGatherOp, - ReduceScatterOp, - ColumnSequenceParallelLinear, - RowSequenceParallelLinear, - mark_as_sequence_parallel_parameter, - register_sequence_parallel_allreduce_hooks, -) + +try: + from paddle.distributed.fleet.utils.sequence_parallel_utils import ( + GatherOp, + ScatterOp, + AllGatherOp, + ReduceScatterOp, + ColumnSequenceParallelLinear, + RowSequenceParallelLinear, + mark_as_sequence_parallel_parameter, + register_sequence_parallel_allreduce_hooks, + ) +except: + pass from .export import export_model # isort: split diff --git a/paddlenlp/transformers/gpt/modeling.py b/paddlenlp/transformers/gpt/modeling.py index 167879c46256..47a4a9cb5261 100644 --- a/paddlenlp/transformers/gpt/modeling.py +++ b/paddlenlp/transformers/gpt/modeling.py @@ -29,13 +29,17 @@ from paddle.distributed import fleet from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker from paddle.distributed.fleet.utils import recompute -from paddle.distributed.fleet.utils.sequence_parallel_utils import ( - ColumnSequenceParallelLinear, - GatherOp, - RowSequenceParallelLinear, - ScatterOp, - mark_as_sequence_parallel_parameter, -) + +try: + from paddle.distributed.fleet.utils.sequence_parallel_utils import ( + ColumnSequenceParallelLinear, + GatherOp, + RowSequenceParallelLinear, + ScatterOp, + mark_as_sequence_parallel_parameter, + ) +except: + pass from paddle.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss from ...utils.converter import StateDictNameMapping diff --git a/paddlenlp/transformers/gpt/modeling_pp.py b/paddlenlp/transformers/gpt/modeling_pp.py index fb4946febc46..fc2f50c1857f 100644 --- a/paddlenlp/transformers/gpt/modeling_pp.py +++ b/paddlenlp/transformers/gpt/modeling_pp.py @@ -20,9 +20,13 @@ SharedLayerDesc, ) from paddle.distributed.fleet.utils import recompute -from paddle.distributed.fleet.utils.sequence_parallel_utils import ( - mark_as_sequence_parallel_parameter, -) + +try: + from paddle.distributed.fleet.utils.sequence_parallel_utils import ( + mark_as_sequence_parallel_parameter, + ) +except: + pass from paddlenlp.transformers.model_utils import PipelinePretrainedModel diff --git a/paddlenlp/transformers/llama/modeling.py b/paddlenlp/transformers/llama/modeling.py index 77ea2929a31d..000e0d6f2c34 100755 --- a/paddlenlp/transformers/llama/modeling.py +++ b/paddlenlp/transformers/llama/modeling.py @@ -44,13 +44,16 @@ def swiglu(x, y=None): return F.silu(x) * y -from paddle.distributed.fleet.utils.sequence_parallel_utils import ( - ColumnSequenceParallelLinear, - GatherOp, - RowSequenceParallelLinear, - ScatterOp, - mark_as_sequence_parallel_parameter, -) +try: + from paddle.distributed.fleet.utils.sequence_parallel_utils import ( + ColumnSequenceParallelLinear, + GatherOp, + RowSequenceParallelLinear, + ScatterOp, + mark_as_sequence_parallel_parameter, + ) +except: + pass from paddle.utils import try_import from paddlenlp.transformers.conversion_utils import ( diff --git a/paddlenlp/transformers/mixtral/modeling.py b/paddlenlp/transformers/mixtral/modeling.py index 592f9a47847a..7a8254d6877c 100644 --- a/paddlenlp/transformers/mixtral/modeling.py +++ b/paddlenlp/transformers/mixtral/modeling.py @@ -33,13 +33,16 @@ except ImportError: fused_rotary_position_embedding = None -from paddle.distributed.fleet.utils.sequence_parallel_utils import ( - ColumnSequenceParallelLinear, - GatherOp, - RowSequenceParallelLinear, - ScatterOp, - mark_as_sequence_parallel_parameter, -) +try: + from paddle.distributed.fleet.utils.sequence_parallel_utils import ( + ColumnSequenceParallelLinear, + GatherOp, + RowSequenceParallelLinear, + ScatterOp, + mark_as_sequence_parallel_parameter, + ) +except: + pass from paddlenlp.transformers.conversion_utils import ( StateDictNameMapping, From cef772f01fb58ea35a5132be6147da0ea290595f Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Fri, 12 Apr 2024 15:31:03 +0800 Subject: [PATCH 2/2] fix sp import --- .../language_model/gpt/dygraph/hybrid_model.py | 17 ++++++++++------- .../models/language_model/language_module.py | 9 ++++++--- paddlenlp/transformers/gpt/modeling_auto.py | 12 ++++++++---- .../transformers/mc2_seqence_parallel_linear.py | 12 ++++++++---- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py index f47d800c5f15..f4c1ee8d46a7 100644 --- a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py +++ b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py @@ -48,13 +48,16 @@ MinLengthLogitsProcessor, RepetitionPenaltyLogitsProcessor, ) -from paddle.distributed.fleet.utils.sequence_parallel_utils import ( - ColumnSequenceParallelLinear, - GatherOp, - RowSequenceParallelLinear, - ScatterOp, - mark_as_sequence_parallel_parameter, -) +try: + from paddle.distributed.fleet.utils.sequence_parallel_utils import ( + ColumnSequenceParallelLinear, + GatherOp, + RowSequenceParallelLinear, + ScatterOp, + mark_as_sequence_parallel_parameter, + ) +except: + pass from paddlenlp.transformers.segment_parallel_utils import ReshardLayer diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/language_module.py b/model_zoo/gpt-3/ppfleetx/models/language_model/language_module.py index 1a73a35982ff..c86fa300e352 100644 --- a/model_zoo/gpt-3/ppfleetx/models/language_model/language_module.py +++ b/model_zoo/gpt-3/ppfleetx/models/language_model/language_module.py @@ -24,9 +24,12 @@ from ppfleetx.core.module.basic_module import BasicModule from ppfleetx.data.tokenizers import GPTTokenizer from ppfleetx.distributed.apis import env -from paddle.distributed.fleet.utils.sequence_parallel_utils import ( - register_sequence_parallel_allreduce_hooks, -) +try: + from paddle.distributed.fleet.utils.sequence_parallel_utils import ( + register_sequence_parallel_allreduce_hooks, + ) +except: + pass from ppfleetx.utils.log import logger # TODO(haohongxiang): to solve the problem of cross-reference diff --git a/paddlenlp/transformers/gpt/modeling_auto.py b/paddlenlp/transformers/gpt/modeling_auto.py index 255763be395f..2e508339ab39 100644 --- a/paddlenlp/transformers/gpt/modeling_auto.py +++ b/paddlenlp/transformers/gpt/modeling_auto.py @@ -30,10 +30,14 @@ from paddle.distributed import fleet from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker from paddle.distributed.fleet.utils import recompute -from paddle.distributed.fleet.utils.sequence_parallel_utils import ( - ScatterOp, - mark_as_sequence_parallel_parameter, -) + +try: + from paddle.distributed.fleet.utils.sequence_parallel_utils import ( + ScatterOp, + mark_as_sequence_parallel_parameter, + ) +except: + pass from ...utils.converter import StateDictNameMapping from .. import PretrainedModel, register_base_model diff --git a/paddlenlp/transformers/mc2_seqence_parallel_linear.py b/paddlenlp/transformers/mc2_seqence_parallel_linear.py index 7d669833e690..c39a78cc6252 100644 --- a/paddlenlp/transformers/mc2_seqence_parallel_linear.py +++ b/paddlenlp/transformers/mc2_seqence_parallel_linear.py @@ -23,10 +23,14 @@ from paddle import distributed as dist from paddle.autograd import PyLayer -from paddle.distributed.fleet.utils.sequence_parallel_utils import ( - ColumnSequenceParallelLinear, - RowSequenceParallelLinear, -) + +try: + from paddle.distributed.fleet.utils.sequence_parallel_utils import ( + ColumnSequenceParallelLinear, + RowSequenceParallelLinear, + ) +except: + pass __all_gather_recomputation__ = False if int(os.getenv("MC2_Recompute", 0)):