From e64a6fa4fbe3bc695807f766e0885718d91d8c16 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Fri, 29 Apr 2022 17:42:15 -0400 Subject: [PATCH] Result of new doc style with fixes (#17015) * Result of new doc style with fixes * Add last two files * Bump hf-doc-builder --- docs/source/en/model_doc/bert-generation.mdx | 4 ++-- docs/source/en/model_doc/luke.mdx | 2 +- setup.py | 2 +- src/transformers/dependency_versions_table.py | 2 +- .../modeling_encoder_decoder.py | 2 +- .../modeling_tf_encoder_decoder.py | 2 +- .../models/gpt2/modeling_tf_gpt2.py | 2 +- .../models/imagegpt/modeling_imagegpt.py | 2 +- .../models/longformer/modeling_longformer.py | 8 +++---- src/transformers/models/luke/modeling_luke.py | 6 ++--- .../models/openai/modeling_openai.py | 2 +- .../models/openai/modeling_tf_openai.py | 4 ++-- .../models/prophetnet/modeling_prophetnet.py | 6 ++--- src/transformers/models/rag/modeling_rag.py | 4 ++-- src/transformers/models/rag/retrieval_rag.py | 2 +- .../models/realm/modeling_realm.py | 2 +- .../modeling_tf_speech_to_text.py | 2 +- .../models/t5/modeling_flax_t5.py | 2 +- src/transformers/models/t5/modeling_t5.py | 6 ++--- src/transformers/models/t5/modeling_tf_t5.py | 6 ++--- .../models/tapas/modeling_tapas.py | 2 +- .../models/tapas/modeling_tf_tapas.py | 2 +- .../modeling_tf_vision_encoder_decoder.py | 2 +- .../models/wav2vec2/modeling_flax_wav2vec2.py | 4 ++-- src/transformers/models/xlm/modeling_xlm.py | 2 +- .../xlm_prophetnet/modeling_xlm_prophetnet.py | 4 ++-- .../models/xlnet/modeling_tf_xlnet.py | 8 +++---- .../models/xlnet/modeling_xlnet.py | 24 +++++++++---------- 28 files changed, 58 insertions(+), 58 deletions(-) diff --git a/docs/source/en/model_doc/bert-generation.mdx b/docs/source/en/model_doc/bert-generation.mdx index 3c6c229b6a59..e300917ea5e6 100644 --- a/docs/source/en/model_doc/bert-generation.mdx +++ b/docs/source/en/model_doc/bert-generation.mdx @@ -49,7 +49,7 @@ Usage: >>> input_ids = tokenizer( ... "This is a long article to summarize", add_special_tokens=False, return_tensors="pt" ->>> ).input_ids +... ).input_ids >>> labels = tokenizer("This is a short summary", return_tensors="pt").input_ids >>> # train... @@ -67,7 +67,7 @@ Usage: >>> input_ids = tokenizer( ... "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt" ->>> ).input_ids +... ).input_ids >>> outputs = sentence_fuser.generate(input_ids) diff --git a/docs/source/en/model_doc/luke.mdx b/docs/source/en/model_doc/luke.mdx index 1bbf6acb8454..6900367bb836 100644 --- a/docs/source/en/model_doc/luke.mdx +++ b/docs/source/en/model_doc/luke.mdx @@ -97,7 +97,7 @@ Example: >>> entities = [ ... "Beyoncé", ... "Los Angeles", ->>> ] # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles" +... ] # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles" >>> entity_spans = [(0, 7), (17, 28)] # character-based entity spans corresponding to "Beyoncé" and "Los Angeles" >>> inputs = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt") >>> outputs = model(**inputs) diff --git a/setup.py b/setup.py index e62d7d4d0197..c8f959e43e58 100644 --- a/setup.py +++ b/setup.py @@ -111,7 +111,7 @@ "ftfy", "fugashi>=1.0", "GitPython<3.1.19", - "hf-doc-builder>=0.2.0", + "hf-doc-builder>=0.3.0", "huggingface-hub>=0.1.0,<1.0", "importlib_metadata", "ipadic>=1.0.0,<2.0", diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 2468fe9b6e1d..2cd160757448 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -18,7 +18,7 @@ "ftfy": "ftfy", "fugashi": "fugashi>=1.0", "GitPython": "GitPython<3.1.19", - "hf-doc-builder": "hf-doc-builder>=0.2.0", + "hf-doc-builder": "hf-doc-builder>=0.3.0", "huggingface-hub": "huggingface-hub>=0.1.0,<1.0", "importlib_metadata": "importlib_metadata", "ipadic": "ipadic>=1.0.0,<2.0", diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index 972b80db7b4d..6eae4c876f44 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -457,7 +457,7 @@ def forward( >>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") >>> model = EncoderDecoderModel.from_encoder_decoder_pretrained( ... "bert-base-uncased", "bert-base-uncased" - >>> ) # initialize Bert2Bert from pre-trained checkpoints + ... ) # initialize Bert2Bert from pre-trained checkpoints >>> # training >>> model.config.decoder_start_token_id = tokenizer.cls_token_id diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index 9e92e767b1b8..96c93d31cac2 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -528,7 +528,7 @@ def call( >>> # forward >>> input_ids = tokenizer.encode( ... "Hello, my dog is cute", add_special_tokens=True, return_tensors="tf" - >>> ) # Batch size 1 + ... ) # Batch size 1 >>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids) >>> # training diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 3a11f46bdb3b..45d29b6779ee 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -1061,7 +1061,7 @@ def call( >>> embedding_layer = model.resize_token_embeddings( ... len(tokenizer) - >>> ) # Update the model embeddings with the new vocabulary size + ... ) # Update the model embeddings with the new vocabulary size >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] >>> encoded_choices = [tokenizer.encode(s) for s in choices] diff --git a/src/transformers/models/imagegpt/modeling_imagegpt.py b/src/transformers/models/imagegpt/modeling_imagegpt.py index 5866744bd8d0..22186a6159e3 100755 --- a/src/transformers/models/imagegpt/modeling_imagegpt.py +++ b/src/transformers/models/imagegpt/modeling_imagegpt.py @@ -1000,7 +1000,7 @@ def forward( >>> samples = output[:, 1:].cpu().detach().numpy() >>> samples_img = [ ... np.reshape(np.rint(127.5 * (clusters[s] + 1.0)), [n_px, n_px, 3]).astype(np.uint8) for s in samples - >>> ] # convert color cluster tokens back to pixels + ... ] # convert color cluster tokens back to pixels >>> f, axes = plt.subplots(1, batch_size, dpi=300) >>> for img, ax in zip(samples_img, axes): diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 647bb8fb7319..6d18e11a2b40 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1634,10 +1634,10 @@ def forward( >>> attention_mask = torch.ones( ... input_ids.shape, dtype=torch.long, device=input_ids.device - >>> ) # initialize to local attention + ... ) # initialize to local attention >>> global_attention_mask = torch.zeros( ... input_ids.shape, dtype=torch.long, device=input_ids.device - >>> ) # initialize to global attention to be deactivated for all tokens + ... ) # initialize to global attention to be deactivated for all tokens >>> global_attention_mask[ ... :, ... [ @@ -1645,7 +1645,7 @@ def forward( ... 4, ... 21, ... ], - >>> ] = 1 # Set global attention to random tokens for the sake of this example + ... ] = 1 # Set global attention to random tokens for the sake of this example >>> # Usually, set global attention based on the task. For example, >>> # classification: the token >>> # QA: question tokens @@ -2025,7 +2025,7 @@ def forward( >>> answer_tokens = all_tokens[torch.argmax(start_logits) : torch.argmax(end_logits) + 1] >>> answer = tokenizer.decode( ... tokenizer.convert_tokens_to_ids(answer_tokens) - >>> ) # remove space prepending space token + ... ) # remove space prepending space token ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict diff --git a/src/transformers/models/luke/modeling_luke.py b/src/transformers/models/luke/modeling_luke.py index cd5a53ddae30..7388e2031bab 100644 --- a/src/transformers/models/luke/modeling_luke.py +++ b/src/transformers/models/luke/modeling_luke.py @@ -953,11 +953,11 @@ def forward( >>> entities = [ ... "Beyoncé", ... "Los Angeles", - >>> ] # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles" + ... ] # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles" >>> entity_spans = [ ... (0, 7), ... (17, 28), - >>> ] # character-based entity spans corresponding to "Beyoncé" and "Los Angeles" + ... ] # character-based entity spans corresponding to "Beyoncé" and "Los Angeles" >>> encoding = tokenizer( ... text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt" @@ -1435,7 +1435,7 @@ def forward( >>> entity_spans = [ ... (0, 7), ... (17, 28), - >>> ] # character-based entity spans corresponding to "Beyoncé" and "Los Angeles" + ... ] # character-based entity spans corresponding to "Beyoncé" and "Los Angeles" >>> inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt") >>> outputs = model(**inputs) >>> logits = outputs.logits diff --git a/src/transformers/models/openai/modeling_openai.py b/src/transformers/models/openai/modeling_openai.py index 8ded535cef10..2262db9aa8cf 100644 --- a/src/transformers/models/openai/modeling_openai.py +++ b/src/transformers/models/openai/modeling_openai.py @@ -674,7 +674,7 @@ def forward( >>> model = OpenAIGPTDoubleHeadsModel.from_pretrained("openai-gpt") >>> tokenizer.add_special_tokens( ... {"cls_token": "[CLS]"} - >>> ) # Add a [CLS] to the vocabulary (we should train it also!) + ... ) # Add a [CLS] to the vocabulary (we should train it also!) >>> model.resize_token_embeddings(len(tokenizer)) >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 5215ad7c2ff6..24a7935eb005 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -693,9 +693,9 @@ def call( >>> inputs = {k: tf.expand_dims(v, 0) for k, v in encoding.items()} >>> inputs["mc_token_ids"] = tf.constant( ... [inputs["input_ids"].shape[-1] - 1, inputs["input_ids"].shape[-1] - 1] - >>> )[ + ... )[ ... None, : - >>> ] # Batch size 1 + ... ] # Batch size 1 >>> outputs = model(inputs) >>> lm_prediction_scores, mc_prediction_scores = outputs[:2] ```""" diff --git a/src/transformers/models/prophetnet/modeling_prophetnet.py b/src/transformers/models/prophetnet/modeling_prophetnet.py index 84fb75f3f334..c869d6373bc5 100644 --- a/src/transformers/models/prophetnet/modeling_prophetnet.py +++ b/src/transformers/models/prophetnet/modeling_prophetnet.py @@ -1813,7 +1813,7 @@ def forward( >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) @@ -1935,7 +1935,7 @@ def forward( >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) @@ -2202,7 +2202,7 @@ def forward( >>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids >>> labels = tokenizer_dec( ... "us rejects charges against its ambassador in bolivia", return_tensors="pt" - >>> ).input_ids + ... ).input_ids >>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:]) >>> loss = outputs.loss diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index 642b13c580c0..205e825cbc1f 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -826,7 +826,7 @@ def forward( >>> docs_dict = retriever(input_ids.numpy(), question_hidden_states.detach().numpy(), return_tensors="pt") >>> doc_scores = torch.bmm( ... question_hidden_states.unsqueeze(1), docs_dict["retrieved_doc_embeds"].float().transpose(1, 2) - >>> ).squeeze(1) + ... ).squeeze(1) >>> # 3. Forward to generator >>> outputs = model( ... context_input_ids=docs_dict["context_input_ids"], @@ -1293,7 +1293,7 @@ def forward( >>> docs_dict = retriever(input_ids.numpy(), question_hidden_states.detach().numpy(), return_tensors="pt") >>> doc_scores = torch.bmm( ... question_hidden_states.unsqueeze(1), docs_dict["retrieved_doc_embeds"].float().transpose(1, 2) - >>> ).squeeze(1) + ... ).squeeze(1) >>> # 3. Forward to generator >>> outputs = model( ... context_input_ids=docs_dict["context_input_ids"], diff --git a/src/transformers/models/rag/retrieval_rag.py b/src/transformers/models/rag/retrieval_rag.py index f39fc48d27c8..7a3c5635f24f 100644 --- a/src/transformers/models/rag/retrieval_rag.py +++ b/src/transformers/models/rag/retrieval_rag.py @@ -354,7 +354,7 @@ class RagRetriever: >>> dataset = ( ... ... - >>> ) # dataset must be a datasets.Datasets object with columns "title", "text" and "embeddings", and it must have a faiss index + ... ) # dataset must be a datasets.Datasets object with columns "title", "text" and "embeddings", and it must have a faiss index >>> retriever = RagRetriever.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base", indexed_dataset=dataset) >>> # To load your own indexed dataset built with the datasets library that was saved on disk. More info in examples/rag/use_own_knowledge_dataset.py diff --git a/src/transformers/models/realm/modeling_realm.py b/src/transformers/models/realm/modeling_realm.py index eec4fb2b7deb..f8849a7878db 100644 --- a/src/transformers/models/realm/modeling_realm.py +++ b/src/transformers/models/realm/modeling_realm.py @@ -1782,7 +1782,7 @@ def forward( ... add_special_tokens=False, ... return_token_type_ids=False, ... return_attention_mask=False, - >>> ).input_ids + ... ).input_ids >>> reader_output, predicted_answer_ids = model(**question_ids, answer_ids=answer_ids, return_dict=False) >>> predicted_answer = tokenizer.decode(predicted_answer_ids) diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index 8980636c3b32..c78d19056bd3 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -1387,7 +1387,7 @@ def call( >>> input_features = processor( ... ds["speech"][0], sampling_rate=16000, return_tensors="tf" - >>> ).input_features # Batch size 1 + ... ).input_features # Batch size 1 >>> generated_ids = model.generate(input_features) >>> transcription = processor.batch_decode(generated_ids) diff --git a/src/transformers/models/t5/modeling_flax_t5.py b/src/transformers/models/t5/modeling_flax_t5.py index 263412578c23..767caea3eb38 100644 --- a/src/transformers/models/t5/modeling_flax_t5.py +++ b/src/transformers/models/t5/modeling_flax_t5.py @@ -1344,7 +1344,7 @@ class FlaxT5Model(FlaxT5PreTrainedModel): >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="np" - >>> ).input_ids + ... ).input_ids >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="np").input_ids >>> # forward pass diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index 6d06f910df40..68b40c5fd45e 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -1375,7 +1375,7 @@ def forward( >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> # forward pass @@ -1583,7 +1583,7 @@ def forward( >>> # inference >>> input_ids = tokenizer( ... "summarize: studies have shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> outputs = model.generate(input_ids) >>> print(tokenizer.decode(outputs[0], skip_special_tokens=True)) >>> # studies have shown that owning a dog is good for you. @@ -1831,7 +1831,7 @@ def forward( >>> model = T5EncoderModel.from_pretrained("t5-small") >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> outputs = model(input_ids=input_ids) >>> last_hidden_states = outputs.last_hidden_state ```""" diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 2e48174a9048..3434a6ea4f37 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -1165,7 +1165,7 @@ def call( >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="tf" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="tf").input_ids # Batch size 1 >>> # forward pass @@ -1353,7 +1353,7 @@ def call( >>> # inference >>> inputs = tokenizer( ... "summarize: studies have shown that owning a dog is good for you", return_tensors="tf" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> outputs = model.generate(inputs) >>> print(tokenizer.decode(outputs[0], skip_special_tokens=True)) >>> # studies have shown that owning a dog is good for you @@ -1642,7 +1642,7 @@ def call( >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="tf" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> outputs = model(input_ids) ```""" diff --git a/src/transformers/models/tapas/modeling_tapas.py b/src/transformers/models/tapas/modeling_tapas.py index e34c1abb57ec..ceb250f5161f 100644 --- a/src/transformers/models/tapas/modeling_tapas.py +++ b/src/transformers/models/tapas/modeling_tapas.py @@ -1068,7 +1068,7 @@ def forward( ... ) >>> labels = tokenizer( ... table=table, queries="How many movies has George Clooney played in?", return_tensors="pt" - >>> )["input_ids"] + ... )["input_ids"] >>> outputs = model(**inputs, labels=labels) >>> logits = outputs.logits diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index e91baaab8edb..d2da0644627a 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -1095,7 +1095,7 @@ def call( ... ) >>> labels = tokenizer( ... table=table, queries="How many movies has George Clooney played in?", return_tensors="tf" - >>> )["input_ids"] + ... )["input_ids"] >>> outputs = model(**inputs, labels=labels) >>> logits = outputs.logits diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index edc2973a0734..6bbf51409103 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -326,7 +326,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> output_ids = model.generate( ... pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True - >>> ).sequences + ... ).sequences >>> preds = decoder_tokenizer.batch_decode(output_ids, skip_special_tokens=True) >>> preds = [pred.strip() for pred in preds] diff --git a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py index 1386ca37b075..7709e43ab955 100644 --- a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py @@ -1081,7 +1081,7 @@ class FlaxWav2Vec2Model(FlaxWav2Vec2PreTrainedModel): >>> input_values = processor( ... ds["speech"][0], sampling_rate=16_000, return_tensors="np" - >>> ).input_values # Batch size 1 + ... ).input_values # Batch size 1 >>> hidden_states = model(input_values).last_hidden_state ``` """ @@ -1200,7 +1200,7 @@ class FlaxWav2Vec2ForCTC(FlaxWav2Vec2PreTrainedModel): >>> input_values = processor( ... ds["speech"][0], sampling_rate=16_000, return_tensors="np" - >>> ).input_values # Batch size 1 + ... ).input_values # Batch size 1 >>> logits = model(input_values).logits >>> predicted_ids = jnp.argmax(logits, axis=-1) diff --git a/src/transformers/models/xlm/modeling_xlm.py b/src/transformers/models/xlm/modeling_xlm.py index ed0817afbb89..ebb3c503475c 100755 --- a/src/transformers/models/xlm/modeling_xlm.py +++ b/src/transformers/models/xlm/modeling_xlm.py @@ -1039,7 +1039,7 @@ def forward( >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze( ... 0 - >>> ) # Batch size 1 + ... ) # Batch size 1 >>> start_positions = torch.tensor([1]) >>> end_positions = torch.tensor([3]) diff --git a/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py b/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py index dfb7b394915b..8961fbbfc374 100644 --- a/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py +++ b/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py @@ -98,7 +98,7 @@ class XLMProphetNetModel(ProphetNetModel): >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) @@ -124,7 +124,7 @@ class XLMProphetNetForConditionalGeneration(ProphetNetForConditionalGeneration): >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 + ... ).input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index f5a1cba3c837..df4111d26317 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -1281,17 +1281,17 @@ def call( >>> # We show how to setup inputs to predict a next token using a bi-directional context. >>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is very ", add_special_tokens=True))[ ... None, : - >>> ] # We will predict the masked token + ... ] # We will predict the masked token >>> perm_mask = np.zeros((1, input_ids.shape[1], input_ids.shape[1])) >>> perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token >>> target_mapping = np.zeros( ... (1, 1, input_ids.shape[1]) - >>> ) # Shape [1, 1, seq_length] => let's predict one token + ... ) # Shape [1, 1, seq_length] => let's predict one token >>> target_mapping[ ... 0, 0, -1 - >>> ] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) + ... ] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) >>> outputs = model( ... input_ids, @@ -1301,7 +1301,7 @@ def call( >>> next_token_logits = outputs[ ... 0 - >>> ] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] + ... ] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] ```""" transformer_outputs = self.transformer( input_ids=input_ids, diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index 079c636628f2..dc7f78eeb8e2 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -1400,47 +1400,47 @@ def forward( >>> # We show how to setup inputs to predict a next token using a bi-directional context. >>> input_ids = torch.tensor( ... tokenizer.encode("Hello, my dog is very ", add_special_tokens=False) - >>> ).unsqueeze( + ... ).unsqueeze( ... 0 - >>> ) # We will predict the masked token + ... ) # We will predict the masked token >>> perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) >>> perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token >>> target_mapping = torch.zeros( ... (1, 1, input_ids.shape[1]), dtype=torch.float - >>> ) # Shape [1, 1, seq_length] => let's predict one token + ... ) # Shape [1, 1, seq_length] => let's predict one token >>> target_mapping[ ... 0, 0, -1 - >>> ] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) + ... ] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) >>> outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) >>> next_token_logits = outputs[ ... 0 - >>> ] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] + ... ] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] >>> # The same way can the XLNetLMHeadModel be used to be trained by standard auto-regressive language modeling. >>> input_ids = torch.tensor( ... tokenizer.encode("Hello, my dog is very ", add_special_tokens=False) - >>> ).unsqueeze( + ... ).unsqueeze( ... 0 - >>> ) # We will predict the masked token + ... ) # We will predict the masked token >>> labels = torch.tensor(tokenizer.encode("cute", add_special_tokens=False)).unsqueeze(0) >>> assert labels.shape[0] == 1, "only one word will be predicted" >>> perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) >>> perm_mask[ ... :, :, -1 - >>> ] = 1.0 # Previous tokens don't see last token as is done in standard auto-regressive lm training + ... ] = 1.0 # Previous tokens don't see last token as is done in standard auto-regressive lm training >>> target_mapping = torch.zeros( ... (1, 1, input_ids.shape[1]), dtype=torch.float - >>> ) # Shape [1, 1, seq_length] => let's predict one token + ... ) # Shape [1, 1, seq_length] => let's predict one token >>> target_mapping[ ... 0, 0, -1 - >>> ] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) + ... ] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) >>> outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping, labels=labels) >>> loss = outputs.loss >>> next_token_logits = ( ... outputs.logits - >>> ) # Logits have shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] + ... ) # Logits have shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1980,7 +1980,7 @@ def forward( >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze( ... 0 - >>> ) # Batch size 1 + ... ) # Batch size 1 >>> start_positions = torch.tensor([1]) >>> end_positions = torch.tensor([3]) >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)