Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] Try except sequence parallel utils #8189

Merged
merged 3 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,16 @@
MinLengthLogitsProcessor,
RepetitionPenaltyLogitsProcessor,
)
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
GatherOp,
RowSequenceParallelLinear,
ScatterOp,
mark_as_sequence_parallel_parameter,
)
try:
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
GatherOp,
RowSequenceParallelLinear,
ScatterOp,
mark_as_sequence_parallel_parameter,
)
except:
pass

from paddlenlp.transformers.segment_parallel_utils import ReshardLayer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@
from ppfleetx.core.module.basic_module import BasicModule
from ppfleetx.data.tokenizers import GPTTokenizer
from ppfleetx.distributed.apis import env
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
register_sequence_parallel_allreduce_hooks,
)
try:
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
register_sequence_parallel_allreduce_hooks,
)
except:
pass
from ppfleetx.utils.log import logger

# TODO(haohongxiang): to solve the problem of cross-reference
Expand Down
24 changes: 14 additions & 10 deletions paddlenlp/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,20 @@
from .feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from .image_processing_utils import ImageProcessingMixin
from .attention_utils import create_bigbird_rand_mask_idx_list
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
GatherOp,
ScatterOp,
AllGatherOp,
ReduceScatterOp,
ColumnSequenceParallelLinear,
RowSequenceParallelLinear,
mark_as_sequence_parallel_parameter,
register_sequence_parallel_allreduce_hooks,
)

try:
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
GatherOp,
ScatterOp,
AllGatherOp,
ReduceScatterOp,
ColumnSequenceParallelLinear,
RowSequenceParallelLinear,
mark_as_sequence_parallel_parameter,
register_sequence_parallel_allreduce_hooks,
)
except:
pass
from .export import export_model

# isort: split
Expand Down
18 changes: 11 additions & 7 deletions paddlenlp/transformers/gpt/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,17 @@
from paddle.distributed import fleet
from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker
from paddle.distributed.fleet.utils import recompute
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
GatherOp,
RowSequenceParallelLinear,
ScatterOp,
mark_as_sequence_parallel_parameter,
)

try:
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
GatherOp,
RowSequenceParallelLinear,
ScatterOp,
mark_as_sequence_parallel_parameter,
)
except:
pass
from paddle.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from paddle.utils import try_import

Expand Down
12 changes: 8 additions & 4 deletions paddlenlp/transformers/gpt/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,14 @@
from paddle.distributed import fleet
from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker
from paddle.distributed.fleet.utils import recompute
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ScatterOp,
mark_as_sequence_parallel_parameter,
)

try:
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ScatterOp,
mark_as_sequence_parallel_parameter,
)
except:
pass

from ...utils.converter import StateDictNameMapping
from .. import PretrainedModel, register_base_model
Expand Down
10 changes: 7 additions & 3 deletions paddlenlp/transformers/gpt/modeling_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@
SharedLayerDesc,
)
from paddle.distributed.fleet.utils import recompute
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
mark_as_sequence_parallel_parameter,
)

try:
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
mark_as_sequence_parallel_parameter,
)
except:
pass

from paddlenlp.transformers.model_utils import PipelinePretrainedModel

Expand Down
17 changes: 10 additions & 7 deletions paddlenlp/transformers/llama/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,16 @@ def swiglu(x, y=None):
return F.silu(x) * y


from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
GatherOp,
RowSequenceParallelLinear,
ScatterOp,
mark_as_sequence_parallel_parameter,
)
try:
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
GatherOp,
RowSequenceParallelLinear,
ScatterOp,
mark_as_sequence_parallel_parameter,
)
except:
pass
from paddle.utils import try_import

from paddlenlp.transformers.conversion_utils import (
Expand Down
12 changes: 8 additions & 4 deletions paddlenlp/transformers/mc2_seqence_parallel_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,14 @@

from paddle import distributed as dist
from paddle.autograd import PyLayer
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
RowSequenceParallelLinear,
)

try:
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
RowSequenceParallelLinear,
)
except:
pass

__all_gather_recomputation__ = False
if int(os.getenv("MC2_Recompute", 0)):
Expand Down
17 changes: 10 additions & 7 deletions paddlenlp/transformers/mixtral/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,16 @@
except ImportError:
fused_rotary_position_embedding = None

from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
GatherOp,
RowSequenceParallelLinear,
ScatterOp,
mark_as_sequence_parallel_parameter,
)
try:
from paddle.distributed.fleet.utils.sequence_parallel_utils import (
ColumnSequenceParallelLinear,
GatherOp,
RowSequenceParallelLinear,
ScatterOp,
mark_as_sequence_parallel_parameter,
)
except:
pass

from paddlenlp.transformers.conversion_utils import (
StateDictNameMapping,
Expand Down
Loading