============================= test session starts ============================== platform linux -- Python 3.10.12, pytest-7.4.4, pluggy-1.4.0 -- /datadisk1/av11/coding/github/transformers/venv/bin/python cachedir: .pytest_cache hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase(PosixPath('/datadisk1/av11/coding/github/transformers/.hypothesis/examples')) rootdir: /datadisk1/av11/coding/github/transformers configfile: pyproject.toml plugins: anyio-4.3.0, hypothesis-6.98.18, picked-0.5.0, rich-0.1.1, timeout-2.3.1, xdist-3.5.0, dash-2.16.1 collecting ... collected 170 items tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_assisted_decoding_matches_greedy_search_0_random <- src/transformers/testing_utils.py PASSED [ 0%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_assisted_decoding_matches_greedy_search_1_same <- src/transformers/testing_utils.py PASSED [ 1%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_assisted_decoding_sample <- tests/generation/test_utils.py PASSED [ 1%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_attention_outputs <- tests/test_modeling_common.py PASSED [ 2%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batch_generation PASSED [ 2%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batch_generation_2heads PASSED [ 3%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batching_equivalence <- tests/test_modeling_common.py PASSED [ 4%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_sample_generate <- tests/generation/test_utils.py PASSED [ 4%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_sample_generate_dict_output <- tests/generation/test_utils.py PASSED [ 5%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_search_generate <- tests/generation/test_utils.py PASSED [ 5%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_search_generate_dict_output <- tests/generation/test_utils.py PASSED [ 6%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_search_generate_dict_outputs_use_cache <- tests/generation/test_utils.py PASSED [ 7%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_search_low_memory <- tests/generation/test_utils.py PASSED [ 7%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_can_use_safetensors <- tests/test_modeling_common.py PASSED [ 8%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_config PASSED [ 8%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_constrained_beam_search_generate <- tests/generation/test_utils.py PASSED [ 9%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_constrained_beam_search_generate_dict_output <- tests/generation/test_utils.py PASSED [ 10%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_contrastive_generate <- tests/generation/test_utils.py PASSED [ 10%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_contrastive_generate_dict_outputs_use_cache <- tests/generation/test_utils.py PASSED [ 11%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_contrastive_generate_low_memory <- tests/generation/test_utils.py PASSED [ 11%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_correct_missing_keys <- tests/test_modeling_common.py PASSED [ 12%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_cpu_offload <- tests/test_modeling_common.py PASSED [ 12%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_custom_4d_attention_mask <- tests/test_modeling_common.py SKIPPEDks) [ 13%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_determinism <- tests/test_modeling_common.py PASSED [ 14%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_disk_offload_bin <- tests/test_modeling_common.py PASSED [ 14%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_disk_offload_safetensors <- tests/test_modeling_common.py PASSED [ 15%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_matches_sdpa_generate <- tests/test_modeling_common.py PASSED [ 15%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_matches_sdpa_inference_0_float16 <- tests/test_modeling_common.py PASSED [ 16%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_matches_sdpa_inference_1_bfloat16 <- tests/test_modeling_common.py PASSED [ 17%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_matches_sdpa_inference_2_float32 <- tests/test_modeling_common.py PASSED [ 17%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_equivalence_flax_to_pt <- tests/test_modeling_common.py PASSED [ 18%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_equivalence_pt_to_flax <- tests/test_modeling_common.py PASSED [ 18%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_fast_init_context_manager <- tests/test_modeling_common.py PASSED [ 19%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_fast_init_tied_embeddings <- tests/test_modeling_common.py PASSED [ 20%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_feed_forward_chunking <- tests/test_modeling_common.py PASSED [ 20%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_conversion <- tests/test_modeling_common.py SKIPPED [ 21%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_fp32_ln <- tests/test_modeling_common.py SKIPPED [ 21%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_from_config <- tests/test_modeling_common.py SKIPPED [ 22%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_generate_left_padding <- src/transformers/testing_utils.py SKIPPED [ 22%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_generate_padding_right <- src/transformers/testing_utils.py SKIPPED [ 23%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_generate_use_cache <- tests/test_modeling_common.py SKIPPED [ 24%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_inference_equivalence <- src/transformers/testing_utils.py SKIPPED [ 24%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_inference_equivalence_right_padding <- src/transformers/testing_utils.py SKIPPED [ 25%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flax_from_pt_safetensors <- tests/test_modeling_common.py PASSED [ 25%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_forward_signature <- tests/test_modeling_common.py PASSED [ 26%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_from_pretrained_no_checkpoint <- tests/test_modeling_common.py PASSED [ 27%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_continue_from_past_key_values <- tests/generation/test_utils.py PASSED [ 27%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_from_inputs_embeds_decoder_only <- tests/generation/test_utils.py PASSED [ 28%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_with_head_masking <- tests/generation/test_utils.py PASSED [ 28%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_with_quant_cache <- tests/generation/test_utils.py SKIPPED [ 29%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_without_input_ids <- tests/generation/test_utils.py PASSED [ 30%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_double_lm_head_model PASSED [ 30%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_gradient_checkpointing PASSED [ 31%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_lm_head_model PASSED [ 31%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_model PASSED [ 32%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_model_att_mask_past PASSED [ 32%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_model_past PASSED [ 33%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_model_past_large_inputs PASSED [ 34%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_question_answering_model PASSED [ 34%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_reorder_and_upcast_attn PASSED [ 35%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_scale_attn_by_inverse_layer_idx PASSED [ 35%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_sequence_classification_model PASSED [ 36%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_token_classification_model PASSED [ 37%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_weight_initialization PASSED [ 37%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gradient_checkpointing_backward_compatibility <- tests/test_modeling_common.py PASSED [ 38%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gradient_checkpointing_enable_disable <- tests/test_modeling_common.py PASSED [ 38%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_greedy_generate <- tests/generation/test_utils.py PASSED [ 39%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_greedy_generate_dict_outputs <- tests/generation/test_utils.py PASSED [ 40%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_greedy_generate_dict_outputs_use_cache <- tests/generation/test_utils.py PASSED [ 40%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_group_beam_search_generate <- tests/generation/test_utils.py PASSED [ 41%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_group_beam_search_generate_dict_output <- tests/generation/test_utils.py PASSED [ 41%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_head_pruning <- tests/test_modeling_common.py PASSED [ 42%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_head_pruning_integration <- tests/test_modeling_common.py PASSED [ 42%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_head_pruning_save_load_from_config_init <- tests/test_modeling_common.py PASSED [ 43%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_head_pruning_save_load_from_pretrained <- tests/test_modeling_common.py PASSED [ 44%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_headmasking <- tests/test_modeling_common.py PASSED [ 44%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_hidden_states_output <- tests/test_modeling_common.py PASSED [ 45%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_initialization <- tests/test_modeling_common.py PASSED [ 45%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_inputs_embeds <- tests/test_modeling_common.py PASSED [ 46%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_inputs_embeds_matches_input_ids <- tests/test_modeling_common.py PASSED [ 47%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_keep_in_fp32_modules <- tests/test_modeling_common.py PASSED [ 47%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_left_padding_compatibility <- tests/generation/test_utils.py PASSED [ 48%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_load_save_without_tied_weights <- tests/test_modeling_common.py PASSED [ 48%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_load_with_mismatched_shapes <- tests/test_modeling_common.py PASSED [ 49%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist <- tests/test_modeling_common.py PASSED [ 50%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_mismatched_shapes_have_properly_initialized_weights <- tests/test_modeling_common.py PASSED [ 50%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_common_attributes <- tests/test_modeling_common.py PASSED [ 51%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_from_pretrained PASSED [ 51%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_is_small <- tests/test_modeling_common.py PASSED [ 52%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_main_input_name <- tests/test_modeling_common.py PASSED [ 52%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_outputs_equivalence <- tests/test_modeling_common.py PASSED [ 53%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_parallel_beam_search <- tests/generation/test_utils.py SKIPPED [ 54%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_parallel_equal_results <- tests/test_modeling_common.py SKIPPED [ 54%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_parallelism <- tests/test_modeling_common.py SKIPPED [ 55%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_parallelization <- tests/test_modeling_common.py SKIPPED [ 55%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_weights_reload_no_missing_tied_weights <- tests/test_modeling_common.py PASSED [ 56%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_multi_gpu_data_parallel_forward <- tests/test_modeling_common.py SKIPPED [ 57%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_new_cache_format_0 <- tests/generation/test_utils.py SKIPPED [ 57%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_new_cache_format_1 <- tests/generation/test_utils.py SKIPPED [ 58%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_new_cache_format_2 <- tests/generation/test_utils.py SKIPPED [ 58%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_past_key_values_format <- tests/generation/test_utils.py PASSED [ 59%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_audio_classification <- tests/test_pipeline_mixin.py SKIPPED `GPT2ModelTest`.) [ 60%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_automatic_speech_recognition <- tests/test_pipeline_mixin.py SKIPPEDng` for `GPT2ModelTest`.) [ 60%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_conversational <- tests/test_pipeline_mixin.py SKIPPEDodelTest`.) [ 61%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_depth_estimation <- tests/test_pipeline_mixin.py SKIPPEDT2ModelTest`.) [ 61%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_document_question_answering <- tests/test_pipeline_mixin.py SKIPPED [ 62%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_feature_extraction <- tests/test_pipeline_mixin.py PASSED [ 62%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_fill_mask <- tests/test_pipeline_mixin.py SKIPPEDest`.) [ 63%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_image_classification <- tests/test_pipeline_mixin.py SKIPPED `GPT2ModelTest`.) [ 64%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_image_feature_extraction <- tests/test_pipeline_mixin.py SKIPPEDfor `GPT2ModelTest`.) [ 64%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_image_segmentation <- tests/test_pipeline_mixin.py SKIPPEDGPT2ModelTest`.) [ 65%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_image_to_text <- tests/test_pipeline_mixin.py SKIPPEDdelTest`.) [ 65%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_mask_generation <- tests/test_pipeline_mixin.py SKIPPED [ 66%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_object_detection <- tests/test_pipeline_mixin.py SKIPPEDT2ModelTest`.) [ 67%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_question_answering <- tests/test_pipeline_mixin.py PASSED [ 67%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_summarization <- tests/test_pipeline_mixin.py SKIPPEDdelTest`.) [ 68%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_table_question_answering <- tests/test_pipeline_mixin.py SKIPPEDfor `GPT2ModelTest`.) [ 68%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text2text_generation <- tests/test_pipeline_mixin.py SKIPPED`GPT2ModelTest`.) [ 69%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text_classification <- tests/test_pipeline_mixin.py PASSED [ 70%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text_generation <- tests/test_pipeline_mixin.py PASSED [ 70%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text_to_audio <- tests/test_pipeline_mixin.py SKIPPEDdelTest`.) [ 71%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_token_classification <- tests/test_pipeline_mixin.py PASSED [ 71%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_translation <- tests/test_pipeline_mixin.py SKIPPEDlTest`.) [ 72%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_video_classification <- tests/test_pipeline_mixin.py SKIPPED `GPT2ModelTest`.) [ 72%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_visual_question_answering <- tests/test_pipeline_mixin.py SKIPPED for `GPT2ModelTest`.) [ 73%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_zero_shot <- tests/test_pipeline_mixin.py PASSED [ 74%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_zero_shot_audio_classification <- tests/test_pipeline_mixin.py SKIPPEDping` for `GPT2ModelTest`.) [ 74%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_zero_shot_image_classification <- tests/test_pipeline_mixin.py SKIPPEDping` for `GPT2ModelTest`.) [ 75%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_zero_shot_object_detection <- tests/test_pipeline_mixin.py SKIPPED` for `GPT2ModelTest`.) [ 75%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_problem_types <- tests/test_modeling_common.py PASSED [ 76%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_prompt_lookup_decoding_matches_greedy_search <- tests/generation/test_utils.py PASSED [ 77%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pt_tf_model_equivalence <- tests/test_modeling_common.py PASSED [ 77%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_resize_embeddings_untied <- tests/test_modeling_common.py PASSED [ 78%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_resize_position_vector_embeddings <- tests/test_modeling_common.py PASSED [ 78%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_resize_tokens_embeddings <- tests/test_modeling_common.py PASSED [ 79%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_retain_grad_hidden_states_attentions <- tests/test_modeling_common.py PASSED [ 80%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sample_generate <- tests/generation/test_utils.py PASSED [ 80%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sample_generate_dict_output <- tests/generation/test_utils.py PASSED [ 81%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load <- tests/test_modeling_common.py PASSED [ 81%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_fast_init_from_base <- tests/test_modeling_common.py PASSED [ 82%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_fast_init_to_base <- tests/test_modeling_common.py PASSED [ 82%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_keys_to_ignore_on_save <- tests/test_modeling_common.py PASSED [ 83%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_low_cpu_mem_usage <- tests/test_modeling_common.py PASSED [ 84%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_low_cpu_mem_usage_checkpoints <- tests/test_modeling_common.py PASSED [ 84%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_low_cpu_mem_usage_no_safetensors <- tests/test_modeling_common.py PASSED [ 85%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sdpa_can_compile_dynamic <- tests/test_modeling_common.py PASSED [ 85%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sdpa_can_dispatch_on_flash <- tests/test_modeling_common.py PASSED [ 86%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sdpa_matches_eager_sliding_window <- tests/test_modeling_common.py SKIPPED [ 87%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_tf_from_pt_safetensors <- tests/test_modeling_common.py PASSED [ 87%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_tie_model_weights <- tests/test_modeling_common.py PASSED [ 88%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_tied_weights_keys <- tests/test_modeling_common.py PASSED [ 88%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torch_compile <- tests/test_modeling_common.py SKIPPED [ 89%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torch_fx <- tests/test_modeling_common.py PASSED [ 90%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torch_fx_output_loss <- tests/test_modeling_common.py PASSED [ 90%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torch_save_load <- tests/test_modeling_common.py PASSED [ 91%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_output_attentions <- tests/test_modeling_common.py SKIPPED [ 91%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_output_hidden_state <- tests/test_modeling_common.py SKIPPED [ 92%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_simple <- tests/test_modeling_common.py SKIPPED [ 92%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_training <- tests/test_modeling_common.py PASSED [ 93%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_training_gradient_checkpointing SKIPPED check: https://github.com/huggingface/transformers/pull/27124) [ 94%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_training_gradient_checkpointing_use_reentrant SKIPPED check: https://github.com/huggingface/transformers/pull/27124) [ 94%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_training_gradient_checkpointing_use_reentrant_false SKIPPED check: https://github.com/huggingface/transformers/pull/27124) [ 95%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_contrastive_search_gpt2 FAILED [ 95%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_flash_attn_2_generate_padding_left SKIPPED [ 96%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_gpt2_sample FAILED [ 97%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_gpt2_sample_max_time FAILED [ 97%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2 PASSED [ 98%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_gradient_checkpointing PASSED [ 98%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_reorder_and_upcast_attn PASSED [ 99%] tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_scale_attn_by_inverse_layer_idx PASSED [100%] =================================== FAILURES =================================== _________ GPT2ModelLanguageGenerationTest.test_contrastive_search_gpt2 _________ self = @slow def test_contrastive_search_gpt2(self): article = ( "DeepMind Technologies is a British artificial intelligence subsidiary of Alphabet Inc. and research " "laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based" ) gpt2_tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large") > gpt2_model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2-large").to(torch_device) tests/models/gpt2/test_modeling_gpt2.py:845: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ src/transformers/modeling_utils.py:2727: in to return super().to(*args, **kwargs) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1152: in to return self._apply(convert) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply module._apply(fn) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply module._apply(fn) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply module._apply(fn) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply module._apply(fn) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply module._apply(fn) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:825: in _apply param_applied = fn(param) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ t = Parameter containing: tensor([[ 0.0393, -0.0687, 0.0226, ..., 0.0320, 0.0051, 0.0316], [ 0.0369, -0.0285,...7, 0.0118, 0.0310], [-0.0308, -0.0016, -0.0493, ..., 0.0289, 0.0172, 0.0127]], requires_grad=True) def convert(t): if convert_to_format is not None and t.dim() in (4, 5): return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking, memory_format=convert_to_format) > return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking) E torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacity of 15.73 GiB of which 15.00 MiB is free. Including non-PyTorch memory, this process has 15.29 GiB memory in use. Of the allocated memory 1.46 GiB is allocated by PyTorch, and 40.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1150: OutOfMemoryError _______________ GPT2ModelLanguageGenerationTest.test_gpt2_sample _______________ self = @slow def test_gpt2_sample(self): tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2") model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2") > model.to(torch_device) tests/models/gpt2/test_modeling_gpt2.py:772: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ src/transformers/modeling_utils.py:2727: in to return super().to(*args, **kwargs) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1152: in to return self._apply(convert) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply module._apply(fn) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply module._apply(fn) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:825: in _apply param_applied = fn(param) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ t = Parameter containing: tensor([[-0.1101, -0.0393, 0.0331, ..., -0.1364, 0.0151, 0.0453], [ 0.0403, -0.0486,...3, 0.0785, -0.0225], [ 0.0514, -0.0277, 0.0499, ..., 0.0070, 0.1552, 0.1207]], requires_grad=True) def convert(t): if convert_to_format is not None and t.dim() in (4, 5): return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking, memory_format=convert_to_format) > return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking) E torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacity of 15.73 GiB of which 15.00 MiB is free. Including non-PyTorch memory, this process has 15.29 GiB memory in use. Of the allocated memory 1.46 GiB is allocated by PyTorch, and 40.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1150: OutOfMemoryError __________ GPT2ModelLanguageGenerationTest.test_gpt2_sample_max_time ___________ self = @slow def test_gpt2_sample_max_time(self): tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2") model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2") model.to(torch_device) torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt", return_token_type_ids=True) input_ids = tokenized.input_ids.to(torch_device) MAX_TIME = 0.5 start = datetime.datetime.now() model.generate(input_ids, do_sample=True, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, num_beams=2, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=True, num_beams=2, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, max_time=None, max_length=256) duration = datetime.datetime.now() - start > self.assertGreater(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) E AssertionError: datetime.timedelta(microseconds=615467) not greater than datetime.timedelta(microseconds=750000) tests/models/gpt2/test_modeling_gpt2.py:835: AssertionError ----------------------------- Captured stderr call ----------------------------- The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. =============================== warnings summary =============================== venv/lib/python3.10/site-packages/tensorflow/__init__.py:30 /datadisk1/av11/coding/github/transformers/venv/lib/python3.10/site-packages/tensorflow/__init__.py:30: DeprecationWarning: The distutils package is deprecated and slated for removal in Python 3.12. Use setuptools or check PEP 632 for potential alternatives import distutils as _distutils venv/lib/python3.10/site-packages/_pytest/config/__init__.py:1373 /datadisk1/av11/coding/github/transformers/venv/lib/python3.10/site-packages/_pytest/config/__init__.py:1373: PytestConfigWarning: Unknown config option: doctest_glob self._warn_or_fail_if_strict(f"Unknown config option: {key}\n") src/transformers/deepspeed.py:24 /datadisk1/av11/coding/github/transformers/src/transformers/deepspeed.py:24: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations warnings.warn( tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batch_generation tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batch_generation_2heads tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_from_inputs_embeds_decoder_only tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text_generation tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2 tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_gradient_checkpointing tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_reorder_and_upcast_attn tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_scale_attn_by_inverse_layer_idx /datadisk1/av11/coding/github/transformers/src/transformers/generation/utils.py:1201: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation. warnings.warn( tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_equivalence_flax_to_pt /datadisk1/av11/coding/github/transformers/src/transformers/modeling_flax_pytorch_utils.py:459: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) pt_model_dict[flax_key] = torch.from_numpy(flax_tensor) tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_continue_from_past_key_values tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_from_inputs_embeds_decoder_only /datadisk1/av11/coding/github/transformers/src/transformers/generation/configuration_utils.py:512: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pas_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values. warnings.warn( tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_token_classification /datadisk1/av11/coding/github/transformers/src/transformers/pipelines/token_classification.py:392: UserWarning: Tokenizer does not support real words, using fallback heuristic warnings.warn( tests/models/gpt2/test_modeling_gpt2.py: 525 warnings /datadisk1/av11/coding/github/transformers/tests/test_modeling_common.py:507: DeprecationWarning: Please use assertEqual instead. self.assertEquals(p1.data.ne(p2.data).sum(), 0) tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_output_attentions tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_output_hidden_state tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_simple /datadisk1/av11/coding/github/transformers/src/transformers/modeling_utils.py:4492: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================== short test summary info ============================ FAILED tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_contrastive_search_gpt2 FAILED tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_gpt2_sample FAILED tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_gpt2_sample_max_time ====== 3 failed, 118 passed, 49 skipped, 543 warnings in 74.04s (0:01:14) ======