============================= test session starts ==============================
platform linux -- Python 3.10.12, pytest-7.4.4, pluggy-1.4.0 -- /datadisk1/av11/coding/github/transformers/venv/bin/python
cachedir: .pytest_cache
hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase(PosixPath('/datadisk1/av11/coding/github/transformers/.hypothesis/examples'))
rootdir: /datadisk1/av11/coding/github/transformers
configfile: pyproject.toml
plugins: anyio-4.3.0, hypothesis-6.98.18, picked-0.5.0, rich-0.1.1, timeout-2.3.1, xdist-3.5.0, dash-2.16.1
collecting ... collected 170 items

tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_assisted_decoding_matches_greedy_search_0_random <- src/transformers/testing_utils.py PASSED [  0%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_assisted_decoding_matches_greedy_search_1_same <- src/transformers/testing_utils.py PASSED [  1%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_assisted_decoding_sample <- tests/generation/test_utils.py PASSED [  1%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_attention_outputs <- tests/test_modeling_common.py PASSED [  2%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batch_generation PASSED [  2%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batch_generation_2heads PASSED [  3%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batching_equivalence <- tests/test_modeling_common.py PASSED [  4%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_sample_generate <- tests/generation/test_utils.py PASSED [  4%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_sample_generate_dict_output <- tests/generation/test_utils.py PASSED [  5%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_search_generate <- tests/generation/test_utils.py PASSED [  5%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_search_generate_dict_output <- tests/generation/test_utils.py PASSED [  6%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_search_generate_dict_outputs_use_cache <- tests/generation/test_utils.py PASSED [  7%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_beam_search_low_memory <- tests/generation/test_utils.py PASSED [  7%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_can_use_safetensors <- tests/test_modeling_common.py PASSED [  8%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_config PASSED [  8%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_constrained_beam_search_generate <- tests/generation/test_utils.py PASSED [  9%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_constrained_beam_search_generate_dict_output <- tests/generation/test_utils.py PASSED [ 10%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_contrastive_generate <- tests/generation/test_utils.py PASSED [ 10%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_contrastive_generate_dict_outputs_use_cache <- tests/generation/test_utils.py PASSED [ 11%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_contrastive_generate_low_memory <- tests/generation/test_utils.py PASSED [ 11%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_correct_missing_keys <- tests/test_modeling_common.py PASSED [ 12%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_cpu_offload <- tests/test_modeling_common.py PASSED [ 12%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_custom_4d_attention_mask <- tests/test_modeling_common.py SKIPPEDks) [ 13%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_determinism <- tests/test_modeling_common.py PASSED [ 14%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_disk_offload_bin <- tests/test_modeling_common.py PASSED [ 14%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_disk_offload_safetensors <- tests/test_modeling_common.py PASSED [ 15%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_matches_sdpa_generate <- tests/test_modeling_common.py PASSED [ 15%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_matches_sdpa_inference_0_float16 <- tests/test_modeling_common.py PASSED [ 16%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_matches_sdpa_inference_1_bfloat16 <- tests/test_modeling_common.py PASSED [ 17%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_matches_sdpa_inference_2_float32 <- tests/test_modeling_common.py PASSED [ 17%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_equivalence_flax_to_pt <- tests/test_modeling_common.py PASSED [ 18%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_equivalence_pt_to_flax <- tests/test_modeling_common.py PASSED [ 18%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_fast_init_context_manager <- tests/test_modeling_common.py PASSED [ 19%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_fast_init_tied_embeddings <- tests/test_modeling_common.py PASSED [ 20%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_feed_forward_chunking <- tests/test_modeling_common.py PASSED [ 20%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_conversion <- tests/test_modeling_common.py SKIPPED [ 21%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_fp32_ln <- tests/test_modeling_common.py SKIPPED [ 21%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_from_config <- tests/test_modeling_common.py SKIPPED [ 22%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_generate_left_padding <- src/transformers/testing_utils.py SKIPPED [ 22%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_generate_padding_right <- src/transformers/testing_utils.py SKIPPED [ 23%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_generate_use_cache <- tests/test_modeling_common.py SKIPPED [ 24%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_inference_equivalence <- src/transformers/testing_utils.py SKIPPED [ 24%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flash_attn_2_inference_equivalence_right_padding <- src/transformers/testing_utils.py SKIPPED [ 25%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_flax_from_pt_safetensors <- tests/test_modeling_common.py PASSED [ 25%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_forward_signature <- tests/test_modeling_common.py PASSED [ 26%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_from_pretrained_no_checkpoint <- tests/test_modeling_common.py PASSED [ 27%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_continue_from_past_key_values <- tests/generation/test_utils.py PASSED [ 27%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_from_inputs_embeds_decoder_only <- tests/generation/test_utils.py PASSED [ 28%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_with_head_masking <- tests/generation/test_utils.py PASSED [ 28%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_with_quant_cache <- tests/generation/test_utils.py SKIPPED [ 29%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_without_input_ids <- tests/generation/test_utils.py PASSED [ 30%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_double_lm_head_model PASSED [ 30%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_gradient_checkpointing PASSED [ 31%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_lm_head_model PASSED [ 31%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_model PASSED [ 32%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_model_att_mask_past PASSED [ 32%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_model_past PASSED [ 33%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_model_past_large_inputs PASSED [ 34%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_question_answering_model PASSED [ 34%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_reorder_and_upcast_attn PASSED [ 35%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_scale_attn_by_inverse_layer_idx PASSED [ 35%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_sequence_classification_model PASSED [ 36%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_token_classification_model PASSED [ 37%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gpt2_weight_initialization PASSED [ 37%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gradient_checkpointing_backward_compatibility <- tests/test_modeling_common.py PASSED [ 38%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_gradient_checkpointing_enable_disable <- tests/test_modeling_common.py PASSED [ 38%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_greedy_generate <- tests/generation/test_utils.py PASSED [ 39%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_greedy_generate_dict_outputs <- tests/generation/test_utils.py PASSED [ 40%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_greedy_generate_dict_outputs_use_cache <- tests/generation/test_utils.py PASSED [ 40%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_group_beam_search_generate <- tests/generation/test_utils.py PASSED [ 41%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_group_beam_search_generate_dict_output <- tests/generation/test_utils.py PASSED [ 41%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_head_pruning <- tests/test_modeling_common.py PASSED [ 42%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_head_pruning_integration <- tests/test_modeling_common.py PASSED [ 42%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_head_pruning_save_load_from_config_init <- tests/test_modeling_common.py PASSED [ 43%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_head_pruning_save_load_from_pretrained <- tests/test_modeling_common.py PASSED [ 44%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_headmasking <- tests/test_modeling_common.py PASSED [ 44%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_hidden_states_output <- tests/test_modeling_common.py PASSED [ 45%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_initialization <- tests/test_modeling_common.py PASSED [ 45%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_inputs_embeds <- tests/test_modeling_common.py PASSED [ 46%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_inputs_embeds_matches_input_ids <- tests/test_modeling_common.py PASSED [ 47%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_keep_in_fp32_modules <- tests/test_modeling_common.py PASSED [ 47%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_left_padding_compatibility <- tests/generation/test_utils.py PASSED [ 48%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_load_save_without_tied_weights <- tests/test_modeling_common.py PASSED [ 48%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_load_with_mismatched_shapes <- tests/test_modeling_common.py PASSED [ 49%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist <- tests/test_modeling_common.py PASSED [ 50%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_mismatched_shapes_have_properly_initialized_weights <- tests/test_modeling_common.py PASSED [ 50%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_common_attributes <- tests/test_modeling_common.py PASSED [ 51%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_from_pretrained PASSED [ 51%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_is_small <- tests/test_modeling_common.py PASSED [ 52%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_main_input_name <- tests/test_modeling_common.py PASSED [ 52%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_outputs_equivalence <- tests/test_modeling_common.py PASSED [ 53%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_parallel_beam_search <- tests/generation/test_utils.py SKIPPED [ 54%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_parallel_equal_results <- tests/test_modeling_common.py SKIPPED [ 54%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_parallelism <- tests/test_modeling_common.py SKIPPED [ 55%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_parallelization <- tests/test_modeling_common.py SKIPPED [ 55%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_model_weights_reload_no_missing_tied_weights <- tests/test_modeling_common.py PASSED [ 56%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_multi_gpu_data_parallel_forward <- tests/test_modeling_common.py SKIPPED [ 57%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_new_cache_format_0 <- tests/generation/test_utils.py SKIPPED [ 57%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_new_cache_format_1 <- tests/generation/test_utils.py SKIPPED [ 58%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_new_cache_format_2 <- tests/generation/test_utils.py SKIPPED [ 58%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_past_key_values_format <- tests/generation/test_utils.py PASSED [ 59%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_audio_classification <- tests/test_pipeline_mixin.py SKIPPED
`GPT2ModelTest`.)                                                        [ 60%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_automatic_speech_recognition <- tests/test_pipeline_mixin.py SKIPPEDng`
for `GPT2ModelTest`.)                                                    [ 60%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_conversational <- tests/test_pipeline_mixin.py SKIPPEDodelTest`.) [ 61%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_depth_estimation <- tests/test_pipeline_mixin.py SKIPPEDT2ModelTest`.) [ 61%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_document_question_answering <- tests/test_pipeline_mixin.py SKIPPED [ 62%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_feature_extraction <- tests/test_pipeline_mixin.py PASSED [ 62%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_fill_mask <- tests/test_pipeline_mixin.py SKIPPEDest`.) [ 63%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_image_classification <- tests/test_pipeline_mixin.py SKIPPED
`GPT2ModelTest`.)                                                        [ 64%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_image_feature_extraction <- tests/test_pipeline_mixin.py SKIPPEDfor
`GPT2ModelTest`.)                                                        [ 64%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_image_segmentation <- tests/test_pipeline_mixin.py SKIPPEDGPT2ModelTest`.) [ 65%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_image_to_text <- tests/test_pipeline_mixin.py SKIPPEDdelTest`.) [ 65%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_mask_generation <- tests/test_pipeline_mixin.py SKIPPED [ 66%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_object_detection <- tests/test_pipeline_mixin.py SKIPPEDT2ModelTest`.) [ 67%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_question_answering <- tests/test_pipeline_mixin.py PASSED [ 67%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_summarization <- tests/test_pipeline_mixin.py SKIPPEDdelTest`.) [ 68%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_table_question_answering <- tests/test_pipeline_mixin.py SKIPPEDfor
`GPT2ModelTest`.)                                                        [ 68%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text2text_generation <- tests/test_pipeline_mixin.py SKIPPED`GPT2ModelTest`.) [ 69%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text_classification <- tests/test_pipeline_mixin.py PASSED [ 70%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text_generation <- tests/test_pipeline_mixin.py PASSED [ 70%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text_to_audio <- tests/test_pipeline_mixin.py SKIPPEDdelTest`.) [ 71%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_token_classification <- tests/test_pipeline_mixin.py PASSED [ 71%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_translation <- tests/test_pipeline_mixin.py SKIPPEDlTest`.) [ 72%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_video_classification <- tests/test_pipeline_mixin.py SKIPPED
`GPT2ModelTest`.)                                                        [ 72%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_visual_question_answering <- tests/test_pipeline_mixin.py SKIPPED for
`GPT2ModelTest`.)                                                        [ 73%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_zero_shot <- tests/test_pipeline_mixin.py PASSED [ 74%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_zero_shot_audio_classification <- tests/test_pipeline_mixin.py SKIPPEDping`
for `GPT2ModelTest`.)                                                    [ 74%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_zero_shot_image_classification <- tests/test_pipeline_mixin.py SKIPPEDping`
for `GPT2ModelTest`.)                                                    [ 75%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_zero_shot_object_detection <- tests/test_pipeline_mixin.py SKIPPED` for
`GPT2ModelTest`.)                                                        [ 75%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_problem_types <- tests/test_modeling_common.py PASSED [ 76%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_prompt_lookup_decoding_matches_greedy_search <- tests/generation/test_utils.py PASSED [ 77%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pt_tf_model_equivalence <- tests/test_modeling_common.py PASSED [ 77%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_resize_embeddings_untied <- tests/test_modeling_common.py PASSED [ 78%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_resize_position_vector_embeddings <- tests/test_modeling_common.py PASSED [ 78%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_resize_tokens_embeddings <- tests/test_modeling_common.py PASSED [ 79%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_retain_grad_hidden_states_attentions <- tests/test_modeling_common.py PASSED [ 80%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sample_generate <- tests/generation/test_utils.py PASSED [ 80%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sample_generate_dict_output <- tests/generation/test_utils.py PASSED [ 81%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load <- tests/test_modeling_common.py PASSED [ 81%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_fast_init_from_base <- tests/test_modeling_common.py PASSED [ 82%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_fast_init_to_base <- tests/test_modeling_common.py PASSED [ 82%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_keys_to_ignore_on_save <- tests/test_modeling_common.py PASSED [ 83%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_low_cpu_mem_usage <- tests/test_modeling_common.py PASSED [ 84%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_low_cpu_mem_usage_checkpoints <- tests/test_modeling_common.py PASSED [ 84%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_save_load_low_cpu_mem_usage_no_safetensors <- tests/test_modeling_common.py PASSED [ 85%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sdpa_can_compile_dynamic <- tests/test_modeling_common.py PASSED [ 85%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sdpa_can_dispatch_on_flash <- tests/test_modeling_common.py PASSED [ 86%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_sdpa_matches_eager_sliding_window <- tests/test_modeling_common.py SKIPPED [ 87%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_tf_from_pt_safetensors <- tests/test_modeling_common.py PASSED [ 87%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_tie_model_weights <- tests/test_modeling_common.py PASSED [ 88%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_tied_weights_keys <- tests/test_modeling_common.py PASSED [ 88%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torch_compile <- tests/test_modeling_common.py SKIPPED [ 89%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torch_fx <- tests/test_modeling_common.py PASSED [ 90%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torch_fx_output_loss <- tests/test_modeling_common.py PASSED [ 90%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torch_save_load <- tests/test_modeling_common.py PASSED [ 91%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_output_attentions <- tests/test_modeling_common.py SKIPPED [ 91%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_output_hidden_state <- tests/test_modeling_common.py SKIPPED [ 92%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_simple <- tests/test_modeling_common.py SKIPPED [ 92%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_training <- tests/test_modeling_common.py PASSED [ 93%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_training_gradient_checkpointing SKIPPED check:
https://github.com/huggingface/transformers/pull/27124)                  [ 94%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_training_gradient_checkpointing_use_reentrant SKIPPED check:
https://github.com/huggingface/transformers/pull/27124)                  [ 94%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_training_gradient_checkpointing_use_reentrant_false SKIPPED check:
https://github.com/huggingface/transformers/pull/27124)                  [ 95%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_contrastive_search_gpt2 FAILED [ 95%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_flash_attn_2_generate_padding_left SKIPPED [ 96%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_gpt2_sample FAILED [ 97%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_gpt2_sample_max_time FAILED [ 97%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2 PASSED [ 98%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_gradient_checkpointing PASSED [ 98%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_reorder_and_upcast_attn PASSED [ 99%]
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_scale_attn_by_inverse_layer_idx PASSED [100%]

=================================== FAILURES ===================================
_________ GPT2ModelLanguageGenerationTest.test_contrastive_search_gpt2 _________

self = <tests.models.gpt2.test_modeling_gpt2.GPT2ModelLanguageGenerationTest testMethod=test_contrastive_search_gpt2>

    @slow
    def test_contrastive_search_gpt2(self):
        article = (
            "DeepMind Technologies is a British artificial intelligence subsidiary of Alphabet Inc. and research "
            "laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based"
        )
    
        gpt2_tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
>       gpt2_model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2-large").to(torch_device)

tests/models/gpt2/test_modeling_gpt2.py:845: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
src/transformers/modeling_utils.py:2727: in to
    return super().to(*args, **kwargs)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1152: in to
    return self._apply(convert)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply
    module._apply(fn)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply
    module._apply(fn)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply
    module._apply(fn)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply
    module._apply(fn)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply
    module._apply(fn)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:825: in _apply
    param_applied = fn(param)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

t = Parameter containing:
tensor([[ 0.0393, -0.0687,  0.0226,  ...,  0.0320,  0.0051,  0.0316],
        [ 0.0369, -0.0285,...7,  0.0118,  0.0310],
        [-0.0308, -0.0016, -0.0493,  ...,  0.0289,  0.0172,  0.0127]],
       requires_grad=True)

    def convert(t):
        if convert_to_format is not None and t.dim() in (4, 5):
            return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
                        non_blocking, memory_format=convert_to_format)
>       return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
E       torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacity of 15.73 GiB of which 15.00 MiB is free. Including non-PyTorch memory, this process has 15.29 GiB memory in use. Of the allocated memory 1.46 GiB is allocated by PyTorch, and 40.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1150: OutOfMemoryError
_______________ GPT2ModelLanguageGenerationTest.test_gpt2_sample _______________

self = <tests.models.gpt2.test_modeling_gpt2.GPT2ModelLanguageGenerationTest testMethod=test_gpt2_sample>

    @slow
    def test_gpt2_sample(self):
        tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
        model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
>       model.to(torch_device)

tests/models/gpt2/test_modeling_gpt2.py:772: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
src/transformers/modeling_utils.py:2727: in to
    return super().to(*args, **kwargs)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1152: in to
    return self._apply(convert)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply
    module._apply(fn)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:802: in _apply
    module._apply(fn)
venv/lib/python3.10/site-packages/torch/nn/modules/module.py:825: in _apply
    param_applied = fn(param)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

t = Parameter containing:
tensor([[-0.1101, -0.0393,  0.0331,  ..., -0.1364,  0.0151,  0.0453],
        [ 0.0403, -0.0486,...3,  0.0785, -0.0225],
        [ 0.0514, -0.0277,  0.0499,  ...,  0.0070,  0.1552,  0.1207]],
       requires_grad=True)

    def convert(t):
        if convert_to_format is not None and t.dim() in (4, 5):
            return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
                        non_blocking, memory_format=convert_to_format)
>       return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
E       torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacity of 15.73 GiB of which 15.00 MiB is free. Including non-PyTorch memory, this process has 15.29 GiB memory in use. Of the allocated memory 1.46 GiB is allocated by PyTorch, and 40.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1150: OutOfMemoryError
__________ GPT2ModelLanguageGenerationTest.test_gpt2_sample_max_time ___________

self = <tests.models.gpt2.test_modeling_gpt2.GPT2ModelLanguageGenerationTest testMethod=test_gpt2_sample_max_time>

    @slow
    def test_gpt2_sample_max_time(self):
        tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
        model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
        model.to(torch_device)
    
        torch.manual_seed(0)
        tokenized = tokenizer("Today is a nice day and", return_tensors="pt", return_token_type_ids=True)
        input_ids = tokenized.input_ids.to(torch_device)
    
        MAX_TIME = 0.5
    
        start = datetime.datetime.now()
        model.generate(input_ids, do_sample=True, max_time=MAX_TIME, max_length=256)
        duration = datetime.datetime.now() - start
        self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
        self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
    
        start = datetime.datetime.now()
        model.generate(input_ids, do_sample=False, max_time=MAX_TIME, max_length=256)
        duration = datetime.datetime.now() - start
        self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
        self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
    
        start = datetime.datetime.now()
        model.generate(input_ids, do_sample=False, num_beams=2, max_time=MAX_TIME, max_length=256)
        duration = datetime.datetime.now() - start
        self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
        self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
    
        start = datetime.datetime.now()
        model.generate(input_ids, do_sample=True, num_beams=2, max_time=MAX_TIME, max_length=256)
        duration = datetime.datetime.now() - start
        self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
        self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
    
        start = datetime.datetime.now()
        model.generate(input_ids, do_sample=False, max_time=None, max_length=256)
        duration = datetime.datetime.now() - start
>       self.assertGreater(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
E       AssertionError: datetime.timedelta(microseconds=615467) not greater than datetime.timedelta(microseconds=750000)

tests/models/gpt2/test_modeling_gpt2.py:835: AssertionError
----------------------------- Captured stderr call -----------------------------
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
=============================== warnings summary ===============================
venv/lib/python3.10/site-packages/tensorflow/__init__.py:30
  /datadisk1/av11/coding/github/transformers/venv/lib/python3.10/site-packages/tensorflow/__init__.py:30: DeprecationWarning: The distutils package is deprecated and slated for removal in Python 3.12. Use setuptools or check PEP 632 for potential alternatives
    import distutils as _distutils

venv/lib/python3.10/site-packages/_pytest/config/__init__.py:1373
  /datadisk1/av11/coding/github/transformers/venv/lib/python3.10/site-packages/_pytest/config/__init__.py:1373: PytestConfigWarning: Unknown config option: doctest_glob
  
    self._warn_or_fail_if_strict(f"Unknown config option: {key}\n")

src/transformers/deepspeed.py:24
  /datadisk1/av11/coding/github/transformers/src/transformers/deepspeed.py:24: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations
    warnings.warn(

tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batch_generation
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_batch_generation_2heads
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_from_inputs_embeds_decoder_only
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_text_generation
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_gradient_checkpointing
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_reorder_and_upcast_attn
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_lm_generate_gpt2_with_scale_attn_by_inverse_layer_idx
  /datadisk1/av11/coding/github/transformers/src/transformers/generation/utils.py:1201: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.
    warnings.warn(

tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_equivalence_flax_to_pt
  /datadisk1/av11/coding/github/transformers/src/transformers/modeling_flax_pytorch_utils.py:459: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)
    pt_model_dict[flax_key] = torch.from_numpy(flax_tensor)

tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_continue_from_past_key_values
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_generate_from_inputs_embeds_decoder_only
  /datadisk1/av11/coding/github/transformers/src/transformers/generation/configuration_utils.py:512: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pas_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.
    warnings.warn(

tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_pipeline_token_classification
  /datadisk1/av11/coding/github/transformers/src/transformers/pipelines/token_classification.py:392: UserWarning: Tokenizer does not support real words, using fallback heuristic
    warnings.warn(

tests/models/gpt2/test_modeling_gpt2.py: 525 warnings
  /datadisk1/av11/coding/github/transformers/tests/test_modeling_common.py:507: DeprecationWarning: Please use assertEqual instead.
    self.assertEquals(p1.data.ne(p2.data).sum(), 0)

tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_output_attentions
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_output_hidden_state
tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_torchscript_simple
  /datadisk1/av11/coding/github/transformers/src/transformers/modeling_utils.py:4492: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead
    warnings.warn(

-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_contrastive_search_gpt2
FAILED tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_gpt2_sample
FAILED tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelLanguageGenerationTest::test_gpt2_sample_max_time
====== 3 failed, 118 passed, 49 skipped, 543 warnings in 74.04s (0:01:14) ======