From 454bc14d90ddf8eca69c6721a2dbd0af834804d0 Mon Sep 17 00:00:00 2001 From: Sai-Suraj-27 Date: Mon, 15 Jul 2024 14:35:08 +0530 Subject: [PATCH 1/7] fix: Removed a wrong key-word argument in `sigmoid_focal_loss()` function call (#31951) Removed a wrong key-word argument in sigmoid_focal_loss() function call. --- src/transformers/models/rt_detr/modeling_rt_detr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/rt_detr/modeling_rt_detr.py b/src/transformers/models/rt_detr/modeling_rt_detr.py index 850b8dc2f627b3..e61521d8880077 100644 --- a/src/transformers/models/rt_detr/modeling_rt_detr.py +++ b/src/transformers/models/rt_detr/modeling_rt_detr.py @@ -2163,7 +2163,7 @@ def loss_labels_focal(self, outputs, targets, indices, num_boxes, log=True): target_classes[idx] = target_classes_original target = F.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1] - loss = sigmoid_focal_loss(src_logits, target, self.alpha, self.gamma, reduction="none") + loss = sigmoid_focal_loss(src_logits, target, self.alpha, self.gamma) loss = loss.mean(1).sum() * src_logits.shape[1] / num_boxes return {"loss_focal": loss} From 907500423d240cd660944960dd32a6d56d4ca693 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Mon, 15 Jul 2024 11:07:53 +0100 Subject: [PATCH 2/7] Generate: handle `logits_warper` update in models with custom generate fn (#31957) handle logits_warper update in models with custom generate fn --- src/transformers/generation/utils.py | 6 +- .../models/musicgen/modeling_musicgen.py | 80 ++++--------------- .../modeling_musicgen_melody.py | 80 ++++--------------- src/transformers/models/rag/modeling_rag.py | 2 + 4 files changed, 39 insertions(+), 129 deletions(-) diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 970475c98879a8..9ce16f7a395e0b 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -2219,7 +2219,7 @@ def _dola_decoding( generation_config: GenerationConfig, synced_gpus: bool, streamer: "BaseStreamer", - logits_warper: LogitsProcessorList, + logits_warper: Optional[LogitsProcessorList], **model_kwargs, ) -> Union[GenerateNonBeamOutput, torch.LongTensor]: r""" @@ -2826,7 +2826,7 @@ def _sample( generation_config: GenerationConfig, synced_gpus: bool, streamer: Optional["BaseStreamer"], - logits_warper: LogitsProcessorList, + logits_warper: Optional[LogitsProcessorList], **model_kwargs, ) -> Union[GenerateNonBeamOutput, torch.LongTensor]: r""" @@ -3033,7 +3033,7 @@ def _beam_search( stopping_criteria: StoppingCriteriaList, generation_config: GenerationConfig, synced_gpus: bool, - logits_warper: LogitsProcessorList, + logits_warper: Optional[LogitsProcessorList], **model_kwargs, ) -> Union[GenerateBeamOutput, torch.LongTensor]: r""" diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py index 5101fef3df4e4e..0102d1c267c7ad 100644 --- a/src/transformers/models/musicgen/modeling_musicgen.py +++ b/src/transformers/models/musicgen/modeling_musicgen.py @@ -26,7 +26,7 @@ from torch.nn import CrossEntropyLoss from ...activations import ACT2FN -from ...generation.configuration_utils import GenerationConfig +from ...generation.configuration_utils import GenerationConfig, GenerationMode from ...generation.logits_process import ClassifierFreeGuidanceLogitsProcessor, LogitsProcessorList from ...generation.stopping_criteria import StoppingCriteriaList from ...modeling_attn_mask_utils import ( @@ -1618,16 +1618,7 @@ def generate( model_kwargs["delay_pattern_mask"] = delay_pattern_mask # 7. determine generation mode - is_greedy_gen_mode = ( - (generation_config.num_beams == 1) - and (generation_config.num_beam_groups == 1) - and generation_config.do_sample is False - ) - is_sample_gen_mode = ( - (generation_config.num_beams == 1) - and (generation_config.num_beam_groups == 1) - and generation_config.do_sample is True - ) + generation_mode = generation_config.get_generation_mode() # 8. prepare batched CFG externally (to enable coexistance with the unbatched CFG) if generation_config.guidance_scale is not None and generation_config.guidance_scale > 1: @@ -1649,27 +1640,13 @@ def generate( generation_config=generation_config, stopping_criteria=stopping_criteria ) - if is_greedy_gen_mode: - if generation_config.num_return_sequences > 1: - raise ValueError( - "num_return_sequences has to be 1 when doing greedy search, " - f"but is {generation_config.num_return_sequences}." - ) - - # 11. run greedy search - outputs = self._sample( - input_ids, - logits_processor=logits_processor, - stopping_criteria=stopping_criteria, - generation_config=generation_config, - synced_gpus=synced_gpus, - streamer=streamer, - **model_kwargs, - ) - - elif is_sample_gen_mode: + if generation_mode in (GenerationMode.SAMPLE, GenerationMode.GREEDY_SEARCH): # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config, device=input_ids.device) + prepared_logits_warper = ( + self._get_logits_warper(generation_config, device=input_ids.device) + if generation_config.do_sample + else None + ) # expand input_ids with `num_return_sequences` additional sequences per batch input_ids, model_kwargs = self._expand_inputs_for_generation( @@ -1682,7 +1659,7 @@ def generate( outputs = self._sample( input_ids, logits_processor=logits_processor, - logits_warper=logits_warper, + logits_warper=prepared_logits_warper, stopping_criteria=stopping_criteria, generation_config=generation_config, synced_gpus=synced_gpus, @@ -2714,16 +2691,7 @@ def generate( streamer.put(input_ids.cpu()) # 7. determine generation mode - is_greedy_gen_mode = ( - (generation_config.num_beams == 1) - and (generation_config.num_beam_groups == 1) - and generation_config.do_sample is False - ) - is_sample_gen_mode = ( - (generation_config.num_beams == 1) - and (generation_config.num_beam_groups == 1) - and generation_config.do_sample is True - ) + generation_mode = generation_config.get_generation_mode() # 8. prepare batched CFG externally (to enable coexistance with the unbatched CFG) if generation_config.guidance_scale is not None and generation_config.guidance_scale > 1: @@ -2745,27 +2713,13 @@ def generate( generation_config=generation_config, stopping_criteria=stopping_criteria ) - if is_greedy_gen_mode: - if generation_config.num_return_sequences > 1: - raise ValueError( - "num_return_sequences has to be 1 when doing greedy search, " - f"but is {generation_config.num_return_sequences}." - ) - - # 11. run greedy search - outputs = self._sample( - input_ids, - logits_processor=logits_processor, - stopping_criteria=stopping_criteria, - generation_config=generation_config, - synced_gpus=synced_gpus, - streamer=streamer, - **model_kwargs, - ) - - elif is_sample_gen_mode: + if generation_mode in (GenerationMode.SAMPLE, GenerationMode.GREEDY_SEARCH): # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config, device=input_ids.device) + prepared_logits_warper = ( + self._get_logits_warper(generation_config, device=input_ids.device) + if generation_config.do_sample + else None + ) # expand input_ids with `num_return_sequences` additional sequences per batch input_ids, model_kwargs = self._expand_inputs_for_generation( @@ -2779,7 +2733,7 @@ def generate( outputs = self._sample( input_ids, logits_processor=logits_processor, - logits_warper=logits_warper, + logits_warper=prepared_logits_warper, stopping_criteria=stopping_criteria, generation_config=generation_config, synced_gpus=synced_gpus, diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py index 9a120dc3294f4c..3140b9f286448f 100644 --- a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py @@ -26,7 +26,7 @@ from torch.nn import CrossEntropyLoss from ...activations import ACT2FN -from ...generation.configuration_utils import GenerationConfig +from ...generation.configuration_utils import GenerationConfig, GenerationMode from ...generation.logits_process import ClassifierFreeGuidanceLogitsProcessor, LogitsProcessorList from ...generation.stopping_criteria import StoppingCriteriaList from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa @@ -1539,16 +1539,7 @@ def generate( model_kwargs["delay_pattern_mask"] = delay_pattern_mask # 7. determine generation mode - is_greedy_gen_mode = ( - (generation_config.num_beams == 1) - and (generation_config.num_beam_groups == 1) - and generation_config.do_sample is False - ) - is_sample_gen_mode = ( - (generation_config.num_beams == 1) - and (generation_config.num_beam_groups == 1) - and generation_config.do_sample is True - ) + generation_mode = generation_config.get_generation_mode() # 8. prepare batched CFG externally (to enable coexistance with the unbatched CFG) if generation_config.guidance_scale is not None and generation_config.guidance_scale > 1: @@ -1570,27 +1561,13 @@ def generate( generation_config=generation_config, stopping_criteria=stopping_criteria ) - if is_greedy_gen_mode: - if generation_config.num_return_sequences > 1: - raise ValueError( - "num_return_sequences has to be 1 when doing greedy search, " - f"but is {generation_config.num_return_sequences}." - ) - - # 11. run greedy search - outputs = self._sample( - input_ids, - logits_processor=logits_processor, - stopping_criteria=stopping_criteria, - generation_config=generation_config, - synced_gpus=synced_gpus, - streamer=streamer, - **model_kwargs, - ) - - elif is_sample_gen_mode: + if generation_mode in (GenerationMode.SAMPLE, GenerationMode.GREEDY_SEARCH): # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config, device=input_ids.device) + prepared_logits_warper = ( + self._get_logits_warper(generation_config, device=input_ids.device) + if generation_config.do_sample + else None + ) # expand input_ids with `num_return_sequences` additional sequences per batch input_ids, model_kwargs = self._expand_inputs_for_generation( @@ -1603,7 +1580,7 @@ def generate( outputs = self._sample( input_ids, logits_processor=logits_processor, - logits_warper=logits_warper, + logits_warper=prepared_logits_warper, stopping_criteria=stopping_criteria, generation_config=generation_config, synced_gpus=synced_gpus, @@ -2557,16 +2534,7 @@ def generate( streamer.put(input_ids.cpu()) # 7. determine generation mode - is_greedy_gen_mode = ( - (generation_config.num_beams == 1) - and (generation_config.num_beam_groups == 1) - and generation_config.do_sample is False - ) - is_sample_gen_mode = ( - (generation_config.num_beams == 1) - and (generation_config.num_beam_groups == 1) - and generation_config.do_sample is True - ) + generation_mode = generation_config.get_generation_mode() # 8. prepare batched CFG externally (to enable coexistance with the unbatched CFG) if generation_config.guidance_scale is not None and generation_config.guidance_scale > 1: @@ -2588,27 +2556,13 @@ def generate( generation_config=generation_config, stopping_criteria=stopping_criteria ) - if is_greedy_gen_mode: - if generation_config.num_return_sequences > 1: - raise ValueError( - "num_return_sequences has to be 1 when doing greedy search, " - f"but is {generation_config.num_return_sequences}." - ) - - # 11. run greedy search - outputs = self._sample( - input_ids, - logits_processor=logits_processor, - stopping_criteria=stopping_criteria, - generation_config=generation_config, - synced_gpus=synced_gpus, - streamer=streamer, - **model_kwargs, - ) - - elif is_sample_gen_mode: + if generation_mode in (GenerationMode.SAMPLE, GenerationMode.GREEDY_SEARCH): # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config, device=input_ids.device) + prepared_logits_warper = ( + self._get_logits_warper(generation_config, device=input_ids.device) + if generation_config.do_sample + else None + ) # expand input_ids with `num_return_sequences` additional sequences per batch input_ids, model_kwargs = self._expand_inputs_for_generation( @@ -2622,7 +2576,7 @@ def generate( outputs = self._sample( input_ids, logits_processor=logits_processor, - logits_warper=logits_warper, + logits_warper=prepared_logits_warper, stopping_criteria=stopping_criteria, generation_config=generation_config, synced_gpus=synced_gpus, diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index 4f6c8dc384266c..5b170bde8a3343 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -1558,6 +1558,7 @@ def extend_enc_output(tensor, num_beams=None): generation_config=generation_config, synced_gpus=False, streamer=None, + logits_warper=None, **model_kwargs, ) elif generation_config.num_beams > 1: @@ -1579,6 +1580,7 @@ def extend_enc_output(tensor, num_beams=None): stopping_criteria=prepared_stopping_criteria, generation_config=generation_config, synced_gpus=False, + logits_warper=None, **model_kwargs, ) else: From 556a4205f00594a852bdda237211a24cb09715a6 Mon Sep 17 00:00:00 2001 From: Sai-Suraj-27 Date: Mon, 15 Jul 2024 20:26:17 +0530 Subject: [PATCH 3/7] fix: Fixed the arguments in `create_repo()` function call (#31947) * Fixed the arguments in create_repo() function call. * Formatted the code properly using ruff. * Formatted the code more clearly. --- src/transformers/commands/user.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/commands/user.py b/src/transformers/commands/user.py index 938f4c8ea8b616..bf4072ce04689b 100644 --- a/src/transformers/commands/user.py +++ b/src/transformers/commands/user.py @@ -185,7 +185,7 @@ def run(self): print("Abort") exit() try: - url = create_repo(token, name=self.args.name, organization=self.args.organization) + url = create_repo(repo_id=full_name, token=token) except HTTPError as e: print(e) print(ANSI.red(e.response.text)) From 11efb4fc093683b3a057a213463ccc7beb544b56 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Mon, 15 Jul 2024 17:16:36 +0200 Subject: [PATCH 4/7] Notify new docker images built for circleci (#31701) * hello * hello * hello * hello * hello * hello * hello * notify * trigger * use new channel --------- Co-authored-by: ydshieh --- .github/workflows/build-ci-docker-images.yml | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-ci-docker-images.yml b/.github/workflows/build-ci-docker-images.yml index 6f29df82769d82..a07b99af65d0b6 100644 --- a/.github/workflows/build-ci-docker-images.yml +++ b/.github/workflows/build-ci-docker-images.yml @@ -27,10 +27,10 @@ jobs: strategy: matrix: file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch", "examples-tf"] - continue-on-error: true + continue-on-error: true steps: - - + - name: Set tag run: | if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then @@ -61,4 +61,17 @@ jobs: REF=${{ github.sha }} file: "./docker/${{ matrix.file }}.dockerfile" push: ${{ contains(github.event.head_commit.message, 'ci-image]') || github.event_name == 'schedule' }} - tags: ${{ env.TAG }} \ No newline at end of file + tags: ${{ env.TAG }} + + notify: + runs-on: ubuntu-22.04 + if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }} + steps: + - name: Post to Slack + if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }} + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: "#transformers-ci-circleci-images" + title: 🤗 New docker images for CircleCI are pushed. + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} \ No newline at end of file From a1a34657d41627b21dddf2bf9cc55941329a60b6 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Mon, 15 Jul 2024 17:56:24 +0200 Subject: [PATCH 5/7] Avoid race condition (#31973) * [test_all] hub * remove delete * remove delete * remove delete * remove delete * remove delete * remove delete * [test_all] * [test_all] * [test_all] * [test_all] * [test_all] * [test_all] --------- Co-authored-by: ydshieh --- tests/generation/test_configuration_utils.py | 14 ++++++++++---- tests/utils/test_configuration_utils.py | 14 ++++++++++---- tests/utils/test_feature_extraction_utils.py | 14 ++++++++++---- tests/utils/test_image_processing_utils.py | 14 ++++++++++---- tests/utils/test_modeling_flax_utils.py | 14 ++++++++++---- tests/utils/test_modeling_tf_utils.py | 14 ++++++++++---- tests/utils/test_modeling_utils.py | 14 ++++++++++---- tests/utils/test_tokenization_utils.py | 14 ++++++++++---- 8 files changed, 80 insertions(+), 32 deletions(-) diff --git a/tests/generation/test_configuration_utils.py b/tests/generation/test_configuration_utils.py index ece3f33a06070c..26b8d092fdcd2f 100644 --- a/tests/generation/test_configuration_utils.py +++ b/tests/generation/test_configuration_utils.py @@ -253,8 +253,11 @@ def test_push_to_hub(self): if k != "transformers_version": self.assertEqual(v, getattr(new_config, k)) - # Reset repo - delete_repo(token=self._token, repo_id="test-generation-config") + try: + # Reset repo + delete_repo(token=self._token, repo_id="test-generation-config") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: @@ -278,8 +281,11 @@ def test_push_to_hub_in_organization(self): if k != "transformers_version": self.assertEqual(v, getattr(new_config, k)) - # Reset repo - delete_repo(token=self._token, repo_id="valid_org/test-generation-config-org") + try: + # Reset repo + delete_repo(token=self._token, repo_id="valid_org/test-generation-config-org") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py index 6809b3a2ce5f0c..15adb213079e50 100644 --- a/tests/utils/test_configuration_utils.py +++ b/tests/utils/test_configuration_utils.py @@ -126,8 +126,11 @@ def test_push_to_hub(self): if k != "transformers_version": self.assertEqual(v, getattr(new_config, k)) - # Reset repo - delete_repo(token=self._token, repo_id="test-config") + try: + # Reset repo + delete_repo(token=self._token, repo_id="test-config") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: @@ -149,8 +152,11 @@ def test_push_to_hub_in_organization(self): if k != "transformers_version": self.assertEqual(v, getattr(new_config, k)) - # Reset repo - delete_repo(token=self._token, repo_id="valid_org/test-config-org") + try: + # Reset repo + delete_repo(token=self._token, repo_id="valid_org/test-config-org") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/utils/test_feature_extraction_utils.py b/tests/utils/test_feature_extraction_utils.py index d88fcb276056d7..0e68addb2adceb 100644 --- a/tests/utils/test_feature_extraction_utils.py +++ b/tests/utils/test_feature_extraction_utils.py @@ -85,8 +85,11 @@ def test_push_to_hub(self): for k, v in feature_extractor.__dict__.items(): self.assertEqual(v, getattr(new_feature_extractor, k)) - # Reset repo - delete_repo(token=self._token, repo_id="test-feature-extractor") + try: + # Reset repo + delete_repo(token=self._token, repo_id="test-feature-extractor") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: @@ -106,8 +109,11 @@ def test_push_to_hub_in_organization(self): for k, v in feature_extractor.__dict__.items(): self.assertEqual(v, getattr(new_feature_extractor, k)) - # Reset repo - delete_repo(token=self._token, repo_id="valid_org/test-feature-extractor") + try: + # Reset repo + delete_repo(token=self._token, repo_id="valid_org/test-feature-extractor") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/utils/test_image_processing_utils.py b/tests/utils/test_image_processing_utils.py index 4b2586a634d8a2..3681d1d1e1365b 100644 --- a/tests/utils/test_image_processing_utils.py +++ b/tests/utils/test_image_processing_utils.py @@ -96,8 +96,11 @@ def test_push_to_hub(self): for k, v in image_processor.__dict__.items(): self.assertEqual(v, getattr(new_image_processor, k)) - # Reset repo - delete_repo(token=self._token, repo_id="test-image-processor") + try: + # Reset repo + delete_repo(token=self._token, repo_id="test-image-processor") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: @@ -117,8 +120,11 @@ def test_push_to_hub_in_organization(self): for k, v in image_processor.__dict__.items(): self.assertEqual(v, getattr(new_image_processor, k)) - # Reset repo - delete_repo(token=self._token, repo_id="valid_org/test-image-processor") + try: + # Reset repo + delete_repo(token=self._token, repo_id="valid_org/test-image-processor") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/utils/test_modeling_flax_utils.py b/tests/utils/test_modeling_flax_utils.py index 0309a3bd8f8ce0..5011c240cc9282 100644 --- a/tests/utils/test_modeling_flax_utils.py +++ b/tests/utils/test_modeling_flax_utils.py @@ -83,8 +83,11 @@ def test_push_to_hub(self): max_diff = (base_params[key] - new_params[key]).sum().item() self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") - # Reset repo - delete_repo(token=self._token, repo_id="test-model-flax") + try: + # Reset repo + delete_repo(token=self._token, repo_id="test-model-flax") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: @@ -115,8 +118,11 @@ def test_push_to_hub_in_organization(self): max_diff = (base_params[key] - new_params[key]).sum().item() self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") - # Reset repo - delete_repo(token=self._token, repo_id="valid_org/test-model-flax-org") + try: + # Reset repo + delete_repo(token=self._token, repo_id="valid_org/test-model-flax-org") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/utils/test_modeling_tf_utils.py b/tests/utils/test_modeling_tf_utils.py index 8a281761333dc3..6332df014d57a1 100644 --- a/tests/utils/test_modeling_tf_utils.py +++ b/tests/utils/test_modeling_tf_utils.py @@ -723,8 +723,11 @@ def test_push_to_hub(self): break self.assertTrue(models_equal) - # Reset repo - delete_repo(token=self._token, repo_id="test-model-tf") + try: + # Reset repo + delete_repo(token=self._token, repo_id="test-model-tf") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: @@ -786,8 +789,11 @@ def test_push_to_hub_in_organization(self): break self.assertTrue(models_equal) - # Reset repo - delete_repo(token=self._token, repo_id="valid_org/test-model-tf-org") + try: + # Reset repo + delete_repo(token=self._token, repo_id="valid_org/test-model-tf-org") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index 83c8ec8499bfe4..ed540fd5e59b84 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -1847,8 +1847,11 @@ def test_push_to_hub(self): for p1, p2 in zip(model.parameters(), new_model.parameters()): self.assertTrue(torch.equal(p1, p2)) - # Reset repo - delete_repo(token=self._token, repo_id="test-model") + try: + # Reset repo + delete_repo(token=self._token, repo_id="test-model") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: @@ -1887,8 +1890,11 @@ def test_push_to_hub_in_organization(self): for p1, p2 in zip(model.parameters(), new_model.parameters()): self.assertTrue(torch.equal(p1, p2)) - # Reset repo - delete_repo(token=self._token, repo_id="valid_org/test-model-org") + try: + # Reset repo + delete_repo(token=self._token, repo_id="valid_org/test-model-org") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/utils/test_tokenization_utils.py b/tests/utils/test_tokenization_utils.py index 5b2f2021565812..0df86dc3cc658d 100644 --- a/tests/utils/test_tokenization_utils.py +++ b/tests/utils/test_tokenization_utils.py @@ -146,8 +146,11 @@ def test_push_to_hub(self): new_tokenizer = BertTokenizer.from_pretrained(f"{USER}/test-tokenizer") self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) - # Reset repo - delete_repo(token=self._token, repo_id="test-tokenizer") + try: + # Reset repo + delete_repo(token=self._token, repo_id="test-tokenizer") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: @@ -167,8 +170,11 @@ def test_push_to_hub_in_organization(self): new_tokenizer = BertTokenizer.from_pretrained("valid_org/test-tokenizer-org") self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) - # Reset repo - delete_repo(token=self._token, repo_id="valid_org/test-tokenizer-org") + try: + # Reset repo + delete_repo(token=self._token, repo_id="valid_org/test-tokenizer-org") + except: # noqa E722 + pass # Push to hub via save_pretrained with tempfile.TemporaryDirectory() as tmp_dir: From e4682de6358f9b9cefb73683588e588e4d9154f7 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Mon, 15 Jul 2024 18:49:37 +0100 Subject: [PATCH 6/7] Masking: remove flakiness from test (#31939) --- tests/models/whisper/test_modeling_whisper.py | 3 --- tests/test_modeling_common.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index dcb495d95a6e4d..5fc66f9a20551d 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -1571,9 +1571,6 @@ def test_custom_4d_attention_mask(self): out_last_tokens = logits[:, -1, :] # last tokens in each batch line out_shared_prefix_last_tokens = logits_shared_prefix[0, -3:, :] # last three tokens - # comparing greedily-chosen tokens: - assert torch.equal(out_last_tokens.max(axis=1).indices, out_shared_prefix_last_tokens.max(axis=1).indices) - # comparing softmax-normalized logits: normalized_0 = torch.nn.functional.softmax(out_last_tokens) normalized_1 = torch.nn.functional.softmax(out_shared_prefix_last_tokens) diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 0ed3cee3c57a53..a73417e4164821 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -4486,9 +4486,6 @@ def test_custom_4d_attention_mask(self): out_last_tokens = logits[:, -1, :] # last tokens in each batch line out_shared_prefix_last_tokens = logits_shared_prefix[0, -3:, :] # last three tokens - # comparing greedily-chosen tokens: - assert torch.equal(out_last_tokens.max(axis=1).indices, out_shared_prefix_last_tokens.max(axis=1).indices) - # comparing softmax-normalized logits: normalized_0 = F.softmax(out_last_tokens) normalized_1 = F.softmax(out_shared_prefix_last_tokens) From 6fbea6d237cbdfc3c229cdadfa3c968cfb2d5142 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Mon, 15 Jul 2024 19:59:20 +0100 Subject: [PATCH 7/7] Generate: doc nits (#31982) nits --- .../generation/configuration_utils.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py index dcdccad23a54c1..c7e626f1a7c284 100644 --- a/src/transformers/generation/configuration_utils.py +++ b/src/transformers/generation/configuration_utils.py @@ -113,10 +113,10 @@ class GenerationConfig(PushToHubMixin): heuristic is applied and the generation stops when is it very unlikely to find better candidates; `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm). - max_time(`float`, *optional*): + max_time (`float`, *optional*): The maximum amount of time you allow the computation to run for in seconds. generation will still finish the current pass after allocated time has been passed. - stop_strings(`str or List[str]`, *optional*): + stop_strings (`str or List[str]`, *optional*): A string or a list of strings that should terminate generation if the model outputs them. > Parameters that control the generation strategy used @@ -181,10 +181,10 @@ class GenerationConfig(PushToHubMixin): `length_penalty` < 0.0 encourages shorter sequences. no_repeat_ngram_size (`int`, *optional*, defaults to 0): If set to int > 0, all ngrams of that size can only occur once. - bad_words_ids(`List[List[int]]`, *optional*): + bad_words_ids (`List[List[int]]`, *optional*): List of list of token ids that are not allowed to be generated. Check [`~generation.NoBadWordsLogitsProcessor`] for further documentation and examples. - force_words_ids(`List[List[int]]` or `List[List[List[int]]]`, *optional*): + force_words_ids (`List[List[int]]` or `List[List[List[int]]]`, *optional*): List of token ids that must be generated. If given a `List[List[int]]`, this is treated as a simple list of words that must be included, the opposite to `bad_words_ids`. If given `List[List[List[int]]]`, this triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one @@ -200,7 +200,7 @@ class GenerationConfig(PushToHubMixin): The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target language token. - forced_eos_token_id (`Union[int, List[int]]`, *optional*, defaults to `model.config.forced_eos_token_id`): + forced_eos_token_id (`int` or List[int]`, *optional*, defaults to `model.config.forced_eos_token_id`): The id of the token to force as the last generated token when `max_length` is reached. Optionally, use a list to set multiple *end-of-sequence* tokens. remove_invalid_values (`bool`, *optional*, defaults to `model.config.remove_invalid_values`): @@ -210,7 +210,7 @@ class GenerationConfig(PushToHubMixin): This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been generated. The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where penalty starts and `decay_factor` represents the factor of exponential decay - suppress_tokens (`List[int]`, *optional*): + suppress_tokens (`List[int]`, *optional*): A list of tokens that will be suppressed at generation. The `SupressTokens` logit processor will set their log probs to `-inf` so that they are not sampled. begin_suppress_tokens (`List[int]`, *optional*): @@ -234,7 +234,7 @@ class GenerationConfig(PushToHubMixin): low_memory (`bool`, *optional*): Switch to sequential beam search and sequential topk for contrastive search to reduce peak memory. Used with beam search and contrastive search. - watermarking_config (Union[`WatermarkingConfig`, `dict`], *optional*): + watermarking_config (`WatermarkingConfig` or `dict`, *optional*): Arguments used to watermark the model outputs by adding a small bias to randomly selected set of "green" tokens. If passed as `Dict`, it will be converted to a `WatermarkingConfig` internally. See [this paper](https://arxiv.org/abs/2306.04634) for more details. Accepts the following keys: @@ -249,12 +249,12 @@ class GenerationConfig(PushToHubMixin): - "lefthash" (default): "green" tokens selection depend on the last token (Algorithm 2 from the paper) - "selfhash": "green" tokens selection depends on the current token itself (Algorithm 3 from the paper) The downside of this scheme is that it considers all possible next tokens and can be slower than "lefthash". - - context_width(`int`): + - context_width (`int`): The context length of previous tokens to use in seeding. Higher context length makes watermarking more robust. > Parameters that define the output variables of generate - num_return_sequences(`int`, *optional*, defaults to 1): + num_return_sequences (`int`, *optional*, defaults to 1): The number of independently computed returned sequences for each element in the batch. output_attentions (`bool`, *optional*, defaults to `False`): Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned @@ -284,7 +284,7 @@ class GenerationConfig(PushToHubMixin): encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0): If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`. - decoder_start_token_id (`Union[int, List[int]]`, *optional*): + decoder_start_token_id (`int` or `List[int]`, *optional*): If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token or a list of length `batch_size`. Indicating a list enables different start ids for each element in the batch (e.g. multilingual models with different target languages in one batch) @@ -323,7 +323,7 @@ class GenerationConfig(PushToHubMixin): cache_implementation (`str`, *optional*, default to `None`): Cache class that should be used when generating. - cache_config (`Union[CacheConfig, dict]`, *optional*, default to `None`): + cache_config (`CacheConfig` or `dict`, *optional*, default to `None`): Arguments used in the key-value cache class can be passed in `cache_config`. Can be passed as a `Dict` and it will be converted to its repsective `CacheConfig` internally. Otherwise can be passed as a `CacheConfig` class matching the indicated `cache_implementation`.