From bb804d853cd7213e0666fe628871b0b72cf0f72b Mon Sep 17 00:00:00 2001 From: Lev Kurilenko Date: Wed, 8 Jan 2025 21:48:31 +0000 Subject: [PATCH 1/2] Add position_ids arg to OPTEmbedding forward function --- deepspeed/module_inject/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/module_inject/layers.py b/deepspeed/module_inject/layers.py index 22d24820d404..722ba413a671 100644 --- a/deepspeed/module_inject/layers.py +++ b/deepspeed/module_inject/layers.py @@ -191,7 +191,7 @@ def __init__(self, weight_shape=None, weight=None, bias=None): self.offset = 2 super().__init__(weight_shape, weight=weight) - def forward(self, attention_mask: torch.LongTensor, past_key_values_length: int = 0): + def forward(self, attention_mask: torch.LongTensor, past_key_values_length: int = 0, position_ids: int = 0): """`input_ids_shape` is expected to be [bsz x seqlen].""" attention_mask = attention_mask.long() From d6669705608cabe18d567234b37e391bbb551309 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Thu, 9 Jan 2025 09:25:25 -0800 Subject: [PATCH 2/2] Unpin transformers --- .github/workflows/nv-ds-chat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nv-ds-chat.yml b/.github/workflows/nv-ds-chat.yml index 329a1060f5eb..7e209cbe4397 100644 --- a/.github/workflows/nv-ds-chat.yml +++ b/.github/workflows/nv-ds-chat.yml @@ -43,7 +43,7 @@ jobs: - name: Install deepspeed run: | - pip install transformers==4.45.2 + pip install transformers pip install .[dev] ds_report