Merge remote-tracking branch 'upstream/main' into chat_templates

huggingface · Jul 16, 2024 · 78a5876 · 78a5876
2 parents 7c215bd + 6fbea6d
commit 78a5876
Show file tree

Hide file tree

Showing 18 changed files with 148 additions and 183 deletions.
diff --git a/.github/workflows/build-ci-docker-images.yml b/.github/workflows/build-ci-docker-images.yml
@@ -27,10 +27,10 @@ jobs:
     strategy:
       matrix:
         file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch",  "examples-tf"]
-    continue-on-error: true 
+    continue-on-error: true
 
     steps:
-      - 
+      -
         name: Set tag
         run: |
               if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
@@ -61,4 +61,17 @@ jobs:
             REF=${{ github.sha }}
           file: "./docker/${{ matrix.file }}.dockerfile"
           push: ${{ contains(github.event.head_commit.message, 'ci-image]') ||  github.event_name == 'schedule' }}
-          tags: ${{ env.TAG }}
+          tags: ${{ env.TAG }}
+
+  notify:
+    runs-on: ubuntu-22.04
+    if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
+    steps:
+      - name: Post to Slack
+        if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }}
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: "#transformers-ci-circleci-images"
+          title: 🤗 New docker images for CircleCI are pushed.
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
diff --git a/src/transformers/commands/user.py b/src/transformers/commands/user.py
@@ -185,7 +185,7 @@ def run(self):
                 print("Abort")
                 exit()
         try:
-            url = create_repo(token, name=self.args.name, organization=self.args.organization)
+            url = create_repo(repo_id=full_name, token=token)
         except HTTPError as e:
             print(e)
             print(ANSI.red(e.response.text))

diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py
@@ -113,10 +113,10 @@ class GenerationConfig(PushToHubMixin):
             heuristic is applied and the generation stops when is it very unlikely to find better candidates;
             `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical
             beam search algorithm).
-        max_time(`float`, *optional*):
+        max_time (`float`, *optional*):
             The maximum amount of time you allow the computation to run for in seconds. generation will still finish
             the current pass after allocated time has been passed.
-        stop_strings(`str or List[str]`, *optional*):
+        stop_strings (`str or List[str]`, *optional*):
             A string or a list of strings that should terminate generation if the model outputs them.
 
         > Parameters that control the generation strategy used
@@ -181,10 +181,10 @@ class GenerationConfig(PushToHubMixin):
             `length_penalty` < 0.0 encourages shorter sequences.
         no_repeat_ngram_size (`int`, *optional*, defaults to 0):
             If set to int > 0, all ngrams of that size can only occur once.
-        bad_words_ids(`List[List[int]]`, *optional*):
+        bad_words_ids (`List[List[int]]`, *optional*):
             List of list of token ids that are not allowed to be generated. Check
             [`~generation.NoBadWordsLogitsProcessor`] for further documentation and examples.
-        force_words_ids(`List[List[int]]` or `List[List[List[int]]]`, *optional*):
+        force_words_ids (`List[List[int]]` or `List[List[List[int]]]`, *optional*):
             List of token ids that must be generated. If given a `List[List[int]]`, this is treated as a simple list of
             words that must be included, the opposite to `bad_words_ids`. If given `List[List[List[int]]]`, this
             triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one
@@ -200,7 +200,7 @@ class GenerationConfig(PushToHubMixin):
             The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for
             multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target
             language token.
-        forced_eos_token_id (`Union[int, List[int]]`, *optional*, defaults to `model.config.forced_eos_token_id`):
+        forced_eos_token_id (`int` or List[int]`, *optional*, defaults to `model.config.forced_eos_token_id`):
             The id of the token to force as the last generated token when `max_length` is reached. Optionally, use a
             list to set multiple *end-of-sequence* tokens.
         remove_invalid_values (`bool`, *optional*, defaults to `model.config.remove_invalid_values`):
@@ -210,7 +210,7 @@ class GenerationConfig(PushToHubMixin):
             This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been
             generated. The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where
             penalty starts and `decay_factor` represents the factor of exponential decay
-        suppress_tokens  (`List[int]`, *optional*):
+        suppress_tokens (`List[int]`, *optional*):
             A list of tokens that will be suppressed at generation. The `SupressTokens` logit processor will set their
             log probs to `-inf` so that they are not sampled.
         begin_suppress_tokens  (`List[int]`, *optional*):
@@ -234,7 +234,7 @@ class GenerationConfig(PushToHubMixin):
         low_memory (`bool`, *optional*):
             Switch to sequential beam search and sequential topk for contrastive search to reduce peak memory.
             Used with beam search and contrastive search.
-        watermarking_config (Union[`WatermarkingConfig`, `dict`], *optional*):
+        watermarking_config (`WatermarkingConfig` or `dict`, *optional*):
             Arguments used to watermark the model outputs by adding a small bias to randomly selected set of "green" tokens.
             If passed as `Dict`, it will be converted to a `WatermarkingConfig` internally.
             See [this paper](https://arxiv.org/abs/2306.04634) for more details. Accepts the following keys:
@@ -249,12 +249,12 @@ class GenerationConfig(PushToHubMixin):
                     - "lefthash" (default): "green" tokens selection depend on the last token (Algorithm 2 from the paper)
                     - "selfhash": "green" tokens selection depends on the current token itself (Algorithm 3 from the paper)
                         The downside of this scheme is that it considers all possible next tokens and can be slower than "lefthash".
-            - context_width(`int`):
+            - context_width (`int`):
                 The context length of previous tokens to use in seeding. Higher context length makes watermarking more robust.
 
         > Parameters that define the output variables of generate
 
-        num_return_sequences(`int`, *optional*, defaults to 1):
+        num_return_sequences (`int`, *optional*, defaults to 1):
             The number of independently computed returned sequences for each element in the batch.
         output_attentions (`bool`, *optional*, defaults to `False`):
             Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
@@ -284,7 +284,7 @@ class GenerationConfig(PushToHubMixin):
         encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0):
             If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the
             `decoder_input_ids`.
-        decoder_start_token_id (`Union[int, List[int]]`, *optional*):
+        decoder_start_token_id (`int` or `List[int]`, *optional*):
             If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token or a list of length
             `batch_size`. Indicating a list enables different start ids for each element in the batch
             (e.g. multilingual models with different target languages in one batch)
@@ -323,7 +323,7 @@ class GenerationConfig(PushToHubMixin):
 
         cache_implementation (`str`, *optional*, default to `None`):
             Cache class that should be used when generating.
-        cache_config (`Union[CacheConfig, dict]`, *optional*, default to `None`):
+        cache_config (`CacheConfig` or `dict`, *optional*, default to `None`):
             Arguments used in the key-value cache class can be passed in `cache_config`. Can be passed as a `Dict` and
             it will be converted to its repsective `CacheConfig` internally.
             Otherwise can be passed as a `CacheConfig` class matching the indicated `cache_implementation`.

diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py
@@ -2219,7 +2219,7 @@ def _dola_decoding(
         generation_config: GenerationConfig,
         synced_gpus: bool,
         streamer: "BaseStreamer",
-        logits_warper: LogitsProcessorList,
+        logits_warper: Optional[LogitsProcessorList],
         **model_kwargs,
     ) -> Union[GenerateNonBeamOutput, torch.LongTensor]:
         r"""
@@ -2826,7 +2826,7 @@ def _sample(
         generation_config: GenerationConfig,
         synced_gpus: bool,
         streamer: Optional["BaseStreamer"],
-        logits_warper: LogitsProcessorList,
+        logits_warper: Optional[LogitsProcessorList],
         **model_kwargs,
     ) -> Union[GenerateNonBeamOutput, torch.LongTensor]:
         r"""
@@ -3033,7 +3033,7 @@ def _beam_search(
         stopping_criteria: StoppingCriteriaList,
         generation_config: GenerationConfig,
         synced_gpus: bool,
-        logits_warper: LogitsProcessorList,
+        logits_warper: Optional[LogitsProcessorList],
         **model_kwargs,
     ) -> Union[GenerateBeamOutput, torch.LongTensor]:
         r"""

diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py
@@ -26,7 +26,7 @@
 from torch.nn import CrossEntropyLoss
 
 from ...activations import ACT2FN
-from ...generation.configuration_utils import GenerationConfig
+from ...generation.configuration_utils import GenerationConfig, GenerationMode
 from ...generation.logits_process import ClassifierFreeGuidanceLogitsProcessor, LogitsProcessorList
 from ...generation.stopping_criteria import StoppingCriteriaList
 from ...modeling_attn_mask_utils import (
@@ -1618,16 +1618,7 @@ def generate(
         model_kwargs["delay_pattern_mask"] = delay_pattern_mask
 
         # 7. determine generation mode
-        is_greedy_gen_mode = (
-            (generation_config.num_beams == 1)
-            and (generation_config.num_beam_groups == 1)
-            and generation_config.do_sample is False
-        )
-        is_sample_gen_mode = (
-            (generation_config.num_beams == 1)
-            and (generation_config.num_beam_groups == 1)
-            and generation_config.do_sample is True
-        )
+        generation_mode = generation_config.get_generation_mode()
 
         # 8. prepare batched CFG externally (to enable coexistance with the unbatched CFG)
         if generation_config.guidance_scale is not None and generation_config.guidance_scale > 1:
@@ -1649,27 +1640,13 @@ def generate(
             generation_config=generation_config, stopping_criteria=stopping_criteria
         )
 
-        if is_greedy_gen_mode:
-            if generation_config.num_return_sequences > 1:
-                raise ValueError(
-                    "num_return_sequences has to be 1 when doing greedy search, "
-                    f"but is {generation_config.num_return_sequences}."
-                )
-
-            # 11. run greedy search
-            outputs = self._sample(
-                input_ids,
-                logits_processor=logits_processor,
-                stopping_criteria=stopping_criteria,
-                generation_config=generation_config,
-                synced_gpus=synced_gpus,
-                streamer=streamer,
-                **model_kwargs,
-            )
-
-        elif is_sample_gen_mode:
+        if generation_mode in (GenerationMode.SAMPLE, GenerationMode.GREEDY_SEARCH):
             # 11. prepare logits warper
-            logits_warper = self._get_logits_warper(generation_config, device=input_ids.device)
+            prepared_logits_warper = (
+                self._get_logits_warper(generation_config, device=input_ids.device)
+                if generation_config.do_sample
+                else None
+            )
 
             # expand input_ids with `num_return_sequences` additional sequences per batch
             input_ids, model_kwargs = self._expand_inputs_for_generation(
@@ -1682,7 +1659,7 @@ def generate(
             outputs = self._sample(
                 input_ids,
                 logits_processor=logits_processor,
-                logits_warper=logits_warper,
+                logits_warper=prepared_logits_warper,
                 stopping_criteria=stopping_criteria,
                 generation_config=generation_config,
                 synced_gpus=synced_gpus,
@@ -2714,16 +2691,7 @@ def generate(
             streamer.put(input_ids.cpu())
 
         # 7. determine generation mode
-        is_greedy_gen_mode = (
-            (generation_config.num_beams == 1)
-            and (generation_config.num_beam_groups == 1)
-            and generation_config.do_sample is False
-        )
-        is_sample_gen_mode = (
-            (generation_config.num_beams == 1)
-            and (generation_config.num_beam_groups == 1)
-            and generation_config.do_sample is True
-        )
+        generation_mode = generation_config.get_generation_mode()
 
         # 8. prepare batched CFG externally (to enable coexistance with the unbatched CFG)
         if generation_config.guidance_scale is not None and generation_config.guidance_scale > 1:
@@ -2745,27 +2713,13 @@ def generate(
             generation_config=generation_config, stopping_criteria=stopping_criteria
         )
 
-        if is_greedy_gen_mode:
-            if generation_config.num_return_sequences > 1:
-                raise ValueError(
-                    "num_return_sequences has to be 1 when doing greedy search, "
-                    f"but is {generation_config.num_return_sequences}."
-                )
-
-            # 11. run greedy search
-            outputs = self._sample(
-                input_ids,
-                logits_processor=logits_processor,
-                stopping_criteria=stopping_criteria,
-                generation_config=generation_config,
-                synced_gpus=synced_gpus,
-                streamer=streamer,
-                **model_kwargs,
-            )
-
-        elif is_sample_gen_mode:
+        if generation_mode in (GenerationMode.SAMPLE, GenerationMode.GREEDY_SEARCH):
             # 11. prepare logits warper
-            logits_warper = self._get_logits_warper(generation_config, device=input_ids.device)
+            prepared_logits_warper = (
+                self._get_logits_warper(generation_config, device=input_ids.device)
+                if generation_config.do_sample
+                else None
+            )
 
             # expand input_ids with `num_return_sequences` additional sequences per batch
             input_ids, model_kwargs = self._expand_inputs_for_generation(
@@ -2779,7 +2733,7 @@ def generate(
             outputs = self._sample(
                 input_ids,
                 logits_processor=logits_processor,
-                logits_warper=logits_warper,
+                logits_warper=prepared_logits_warper,
                 stopping_criteria=stopping_criteria,
                 generation_config=generation_config,
                 synced_gpus=synced_gpus,