Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add text generation #253

Merged
merged 23 commits into from
Feb 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions optimum/graphcore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import poptorch

from .ipu_configuration import IPUConfig
from .models.bart import PipelinedBartForConditionalGeneration, PipelinedBartForSequenceClassification
from .models.bert import (
Expand Down Expand Up @@ -57,3 +59,7 @@
from .training_args import IPUTrainingArguments
from .training_args_seq2seq import IPUSeq2SeqTrainingArguments
from .version import __version__


# Disable poptorch compiler warnings by default
poptorch.setLogLevel("ERR")
2,531 changes: 358 additions & 2,173 deletions optimum/graphcore/generation_utils.py

Large diffs are not rendered by default.

38 changes: 13 additions & 25 deletions optimum/graphcore/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def from_transformers(cls, model: PreTrainedModel, ipu_config: IPUConfig):
pipelined_model = cls(config)
pipelined_model.load_state_dict(model.state_dict())
pipelined_model.ipu_config = copy.deepcopy(ipu_config)
pipelined_model.training = model.training
return pipelined_model

@classmethod
Expand Down Expand Up @@ -224,29 +225,6 @@ def num_parameters(self, only_trainable: bool = False, exclude_embeddings: bool
return sum(p.numel() for p in self.parameters() if p.requires_grad or not only_trainable)


class GenerationMethodsMixin:
def get_encoder(
self,
device_iterations: Optional[int] = None,
replication_factor: Optional[int] = None,
for_inference: bool = True,
):
if not hasattr(self, "_wrapped_encoder"):
encoder = super().get_encoder()
if self.ipu_config.execute_encoder_on_cpu_for_generation:
self._wrapped_encoder = encoder.to(torch.float32)
else:
self.eval_opts = self.ipu_config.to_options(for_inference=True)
self._wrapped_encoder = poptorch.inferenceModel(
encoder, options=self.ipu_config.to_options(for_inference=True)
)
return self._wrapped_encoder

def prepare_inputs_for_generation(self, input_ids: torch.LongTensor, **kwargs) -> Dict[str, Any]:
inputs = super().prepare_inputs_for_generation(input_ids, **kwargs)
return {k: v for k, v in inputs.items() if k in signature(self._forward_for_generate).parameters}


def get_layer_ipu(layers_per_ipu: List[int], target_number_of_layers: Optional[Union[int, List]] = None):
# List of the IPU Id for each encoder layer
layer_ipu: List[int] = []
Expand Down Expand Up @@ -448,8 +426,18 @@ def forward(
# decoder_inputs_embeds = self.shared(decoder_input_ids) * decoder_embed_scale
# combined, n1, n2 = self._combine_inputs(input_ids, decoder_input_ids)
# encoder_inputs_embeds, decoder_inputs_embeds = self._separate_inputs(self.shared(combined), n1, n2)
idx, combined = self._combine_inputs(input_ids, decoder_input_ids)
encoder_inputs_embeds, decoder_inputs_embeds = self._separate_inputs(idx, self.shared(combined))
encoder_inputs_embeds, decoder_inputs_embeds = None, None
if input_ids is None:
# call on decoder_input_ids only
decoder_inputs_embeds = self.shared(decoder_input_ids)
elif decoder_input_ids is None:
# call on input_ids only
encoder_inputs_embeds = self.shared(input_ids)
else:
# Call on the combined case
# This case is assuming input_ids and decoder_input_ids are not None
idx, combined = self._combine_inputs(input_ids, decoder_input_ids)
encoder_inputs_embeds, decoder_inputs_embeds = self._separate_inputs(idx, self.shared(combined))

if encoder_embed_scale:
encoder_inputs_embeds = encoder_inputs_embeds * encoder_embed_scale
Expand Down
21 changes: 11 additions & 10 deletions optimum/graphcore/models/bart/modeling_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@

from ...generation_utils import IPUGenerationMixin
from ...modeling_utils import (
GenerationMethodsMixin,
PipelineMixin,
SerializedLinear,
SharedEmbedding,
Expand Down Expand Up @@ -413,7 +412,7 @@ def forward(
raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
elif input_ids is not None:
input = input_ids
input_shape = input.shape()
input_shape = input.shape
input_ids = input_ids.view(-1, input_shape[-1])
elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1]
Expand Down Expand Up @@ -648,11 +647,15 @@ def forward(
)
# If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True
elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):
encoder_outputs = BaseModelOutput(
last_hidden_state=encoder_outputs[0],
hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None,
attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,
)
# BUG: BaseModelOutput type being lost by poptorch?
if isinstance(encoder_outputs, dict):
encoder_outputs = BaseModelOutput(encoder_outputs)
else:
encoder_outputs = BaseModelOutput(
last_hidden_state=encoder_outputs[0],
hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None,
attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,
)

# decoder outputs consists of (dec_features, past_key_value, dec_hidden, dec_attn)
decoder_outputs = self.decoder(
Expand Down Expand Up @@ -686,9 +689,7 @@ def forward(


@register(BartForConditionalGeneration)
class PipelinedBartForConditionalGeneration(
GenerationMethodsMixin, BartForConditionalGeneration, PipelineMixin, IPUGenerationMixin
):
class PipelinedBartForConditionalGeneration(BartForConditionalGeneration, PipelineMixin, IPUGenerationMixin):
def parallelize(self):
"""
Transform the model to run in an IPU pipeline.
Expand Down
7 changes: 4 additions & 3 deletions optimum/graphcore/models/gpt2/modeling_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions, SequenceClassifierOutputWithPast
from transformers.models.gpt2.modeling_gpt2 import GPT2Attention

from ...generation_utils import IPUGenerationMixin
from ...modeling_utils import (
PipelineMixin,
SerializedEmbedding,
Expand Down Expand Up @@ -107,7 +108,7 @@ def deparallelize(self):


@register(GPT2LMHeadModel)
class PipelinedGPT2LMHeadModel(GPT2LMHeadModel, PipelineMixin):
class PipelinedGPT2LMHeadModel(GPT2LMHeadModel, PipelineMixin, IPUGenerationMixin):
def parallelize(self):
"""
Transform the model to run in an IPU pipeline.
Expand Down Expand Up @@ -150,8 +151,8 @@ def parallelize(self):
logger.info("-------------------- Device Allocation --------------------")
logger.info("Token Embedding --> IPU 0")
self.transformer.wte = poptorch.BeginBlock(self.transformer.wte, "Token embedding", ipu_id=0)
logger.info("Position Embedding --> IPU 1")
self.transformer.wpe = poptorch.BeginBlock(self.transformer.wpe, "Position embedding", ipu_id=1)
logger.info("Position Embedding --> IPU 0")
self.transformer.wpe = poptorch.BeginBlock(self.transformer.wpe, "Position embedding", ipu_id=0)
hs = outline_attribute(self.transformer.ln_f, "LayerNorm")
self._hooks.extend(hs)

Expand Down
19 changes: 10 additions & 9 deletions optimum/graphcore/models/t5/modeling_t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

from ...generation_utils import IPUGenerationMixin
from ...modeling_utils import (
GenerationMethodsMixin,
PipelineMixin,
SerializedLinear,
SharedEmbedding,
Expand Down Expand Up @@ -152,9 +151,7 @@ def invert_attention_mask(self, encoder_attention_mask: Tensor) -> Tensor:


@register(T5ForConditionalGeneration)
class PipelinedT5ForConditionalGeneration(
GenerationMethodsMixin, T5ForConditionalGeneration, PipelineMixin, IPUGenerationMixin
):
class PipelinedT5ForConditionalGeneration(T5ForConditionalGeneration, PipelineMixin, IPUGenerationMixin):
@property
def is_encoder_and_decoder_embeddings_computation_shared(self):
return isinstance(self.shared, SharedEmbedding)
Expand Down Expand Up @@ -375,11 +372,15 @@ def forward(
return_dict=return_dict,
)
elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):
encoder_outputs = BaseModelOutput(
last_hidden_state=encoder_outputs[0],
hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None,
attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,
)
# BUG: BaseModelOutput type being lost by poptorch?
if isinstance(encoder_outputs, dict):
encoder_outputs = BaseModelOutput(encoder_outputs)
else:
encoder_outputs = BaseModelOutput(
last_hidden_state=encoder_outputs[0],
hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None,
attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,
)

hidden_states = encoder_outputs[0]

Expand Down
Loading