Merge pull request #340 from NVIDIA/add_more_nlp_ci_tests

Add more nlp ci tests
NVIDIA · Feb 7, 2020 · c6a3cdd · c6a3cdd
2 parents 4f299f4 + adb82a0
commit c6a3cdd
Show file tree

Hide file tree

Showing 17 changed files with 118 additions and 6,564 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -29,6 +29,7 @@ pipeline {
         sh './reinstall.sh && python -m unittest tests/*.py'
       }
     }
+
     stage('Unittests ASR') {
       steps {
         sh 'python -m unittest tests/asr/*.py'
@@ -61,6 +62,26 @@ pipeline {
       }
     }
 
+    stage('Parallel NLP-BERT pretraining') {
+      failFast true
+      parallel { 
+        stage('BERT on the fly preprocessing') {
+          steps {
+            sh 'cd examples/nlp/language_modeling && CUDA_VISIBLE_DEVICES=0 python bert_pretraining.py --amp_opt_level O1 --data_dir /home/mrjenkins/TestData/nlp/wikitext-2 --dataset_name wikitext-2 --work_dir outputs/bert_lm/wikitext2 --batch_size 64 --lr 0.01 --lr_policy CosineAnnealing --lr_warmup_proportion 0.05 --tokenizer sentence-piece --vocab_size 3200 --hidden_size 768 --intermediate_size 3072 --num_hidden_layers 6 --num_attention_heads 12 --hidden_act "gelu" --save_step_freq 200 --sample_size 10000000 --mask_probability 0.15 --short_seq_prob 0.1 --max_steps=300'
+            sh 'cd examples/nlp/language_modeling && LOSS=$(cat outputs/bert_lm/wikitext2/log_globalrank-0_localrank-0.txt |   grep "Loss" |tail -n 1| awk \'{print \$7}\' | egrep -o "[0-9.]+" ) && echo $LOSS && if [ $(echo "$LOSS < 8.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
+            sh 'rm -rf examples/nlp/language_modeling/outputs/wikitext2'
+          }
+        }        
+        stage('BERT offline preprocessing') {
+          steps {
+            sh 'cd examples/nlp/language_modeling && CUDA_VISIBLE_DEVICES=1 python bert_pretraining.py --amp_opt_level O1 --data_dir /home/mrjenkins/TestData/nlp/wiki_book_mini  --work_dir outputs/bert_lm/wiki_book --batch_size 8 --config_file /home/mrjenkins/TestData/nlp/bert_configs/uncased_L-12_H-768_A-12.json  --save_step_freq 200 --max_steps 300  --num_gpus 1  --batches_per_step 1 --lr_policy SquareRootAnnealing --beta2 0.999 --beta1 0.9  --lr_warmup_proportion 0.01 --optimizer adam_w  --weight_decay 0.01  --lr 0.875e-4 --preprocessed_data '
+            sh 'cd examples/nlp/language_modeling && LOSS=$(cat outputs/bert_lm/wiki_book/log_globalrank-0_localrank-0.txt |  grep "Loss" |tail -n 1| awk \'{print \$7}\' | egrep -o "[0-9.]+" ) && echo $LOSS && if [ $(echo "$LOSS < 15.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
+            sh 'rm -rf examples/nlp/language_modeling/outputs/wiki_book'
+          }
+        }
+      }
+    }
+
     stage('Parallel NLP Examples 1') {
       failFast true
       parallel {
@@ -85,6 +106,7 @@ pipeline {
       }
     }
 
+
     stage('Parallel NLP Examples 2') {
       failFast true
       parallel {
@@ -105,7 +127,42 @@ pipeline {
       }
     }
 
-    stage('Intent Detection/SLot Tagging Examples - Multi-GPU') {
+    stage('Parallel NLP-Squad') {
+      failFast true
+      parallel {       
+        stage('Squad v1.1') {
+          steps {
+            sh 'cd examples/nlp/question_answering && CUDA_VISIBLE_DEVICES=0 python question_answering_squad.py --amp_opt_level O1 --train_file /home/mrjenkins/TestData/nlp/squad_mini/v1.1/train-v1.1.json --dev_file /home/mrjenkins/TestData/nlp/squad_mini/v1.1/dev-v1.1.json --work_dir outputs/squadv1 --batch_size 8 --save_step_freq 300 --num_epochs 3 --lr_policy WarmupAnnealing  --lr 3e-5 --do_lower_case'
+            sh 'cd examples/nlp/question_answering && FSCORE=$(cat outputs/squadv1/log_globalrank-0_localrank-0.txt |  grep "f1" |tail -n 1 |egrep -o "[0-9.]+"|tail -n 1 ) && echo $FSCORE && if [ $(echo "$FSCORE > 50.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
+            sh 'rm -rf examples/nlp/question_answering/outputs/squadv1 && rm -rf /home/mrjenkins/TestData/nlp/squad_mini/v1.1/*cache*'
+          }
+        }
+        stage('Squad v2.0') {
+          steps {
+            sh 'cd examples/nlp/question_answering && CUDA_VISIBLE_DEVICES=1 python question_answering_squad.py --amp_opt_level O1 --train_file /home/mrjenkins/TestData/nlp/squad_mini/v2.0/train-v2.0.json --dev_file /home/mrjenkins/TestData/nlp/squad_mini/v2.0/dev-v2.0.json --work_dir outputs/squadv2 --batch_size 8 --save_step_freq 300 --num_epochs 3 --lr_policy WarmupAnnealing  --lr 3e-5 --do_lower_case --version_2_with_negative'
+            sh 'cd examples/nlp/question_answering && FSCORE=$(cat outputs/squadv2/log_globalrank-0_localrank-0.txt |  grep "f1" |tail -n 1 |egrep -o "[0-9.]+"|tail -n 1 ) && echo $FSCORE && if [ $(echo "$FSCORE > 50.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
+            sh 'rm -rf examples/nlp/question_answering/outputs/squadv2 && rm -rf /home/mrjenkins/TestData/nlp/squad_mini/v2.0/*cache*'
+          }
+        }
+      }
+    }
+
+
+
+    stage('NLP-ASR processing') {
+      failFast true
+      parallel { 
+        stage('asr_processing') {
+          steps {
+            sh 'cd examples/nlp/asr_postprocessor && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2  asr_postprocessor.py --data_dir=/home/mrjenkins/TestData/nlp/asr_postprocessor/pred_real --restore_from=/home/mrjenkins/TestData/nlp/asr_postprocessor/bert-base-uncased_decoder.pt --max_steps=50 --batch_size=512'
+            sh 'cd examples/nlp/asr_postprocessor && WER=$(cat outputs/asr_postprocessor/log_globalrank-0_localrank-0.txt | grep "Validation WER" | tail -n 1 | egrep -o "[0-9.]+" | tail -n 1) && echo $WER && if [ $(echo "$WER < 2.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
+            sh 'rm -rf examples/nlp/asr_postprocessor/outputs'
+          }
+        }
+      }
+    }
+
+    stage('NLP-Intent Detection/SLot Tagging Examples - Multi-GPU') {
       failFast true
         steps {
           sh 'cd examples/nlp/intent_detection_slot_tagging && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 joint_intent_slot_with_bert.py --num_gpus=2 --num_epochs=1 --max_seq_length=50 --dataset_name=jarvis-retail --data_dir=/home/mrjenkins/TestData/nlp/retail/ --eval_file_prefix=eval --batch_size=10 --num_train_samples=-1 --do_lower_case --shuffle_data --work_dir=outputs'
@@ -115,7 +172,7 @@ pipeline {
         }
       }
 
-    stage('NMT Example') {
+    stage('NLP-NMT Example') {
       failFast true
         steps {
 	      sh 'cd examples/nlp/neural_machine_translation/ && CUDA_VISIBLE_DEVICES=0 python machine_translation_tutorial.py --max_steps 100'
@@ -170,7 +227,7 @@ pipeline {
       failFast true
       steps {
         sh 'cd examples/tts && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 tacotron2.py --max_steps=51 --model_config=configs/tacotron2.yaml --train_dataset=/home/mrjenkins/TestData/an4_dataset/an4_train.json --amp_opt_level=O1 --eval_freq=50'
-        sh 'cd examples/tts && TTS_CHECKPOINT_DIR=$(ls | grep "Tacotron2") && echo $TTS_CHECKPOINT_DIR && LOSS=$(cat $TTS_CHECKPOINT_DIR/log_globalrank-0_localrank-0.txt | grep -o -E "Loss[ :0-9.]+" | grep -o -E "[0-9.]+" | tail -n 1) && echo $LOSS && if [ $(echo "$LOSS > 3.0" | bc -l) -eq 1 ]; then echo "FAILURE" && exit 1; else echo "SUCCESS"; fi'
+        sh 'cd examples/tts && TTS_CHECKPOINT_DIR=$(ls | grep "Tacotron2") && echo $TTS_CHECKPOINT_DIR && LOSS=$(cat $TTS_CHECKPOINT_DIR/log_globalrank-0_localrank-0.txt | grep -o -E "Loss[ :0-9.]+" | grep -o -E "[0-9.]+" | tail -n 1) && echo $LOSS && if [ $(echo "$LOSS < 3.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
         // sh 'cd examples/tts && TTS_CHECKPOINT_DIR=$(ls | grep "Tacotron2") && cp ../asr/multi_gpu/checkpoints/* $TTS_CHECKPOINT_DIR/checkpoints'
         // sh 'CUDA_VISIBLE_DEVICES=0 python tacotron2_an4_test.py --model_config=configs/tacotron2.yaml --eval_dataset=/home/mrjenkins/TestData/an4_dataset/an4_train.json --jasper_model_config=../asr/configs/jasper_an4.yaml --load_dir=$TTS_CHECKPOINT_DIR/checkpoints'
       }

diff --git a/...lp/asr_postprocesser/asr_postprocessor.py → ...lp/asr_postprocessor/asr_postprocessor.py b/...lp/asr_postprocesser/asr_postprocessor.py → ...lp/asr_postprocessor/asr_postprocessor.py
@@ -66,7 +66,7 @@
 parser.add_argument("--beam_size", default=4, type=int)
 parser.add_argument("--len_pen", default=0.0, type=float)
 parser.add_argument(
-    "--restore_from", dest="restore_from", type=str, default="../../scripts/bert-base-uncased_decoder.pt"
+    "--restore_from", dest="restore_from", type=str, default="../../../scripts/bert-base-uncased_decoder.pt"
 )
 args = parser.parse_args()
 
@@ -207,6 +207,11 @@ def print_loss(x):
     callbacks=callbacks,
     optimizer=args.optimizer,
     lr_policy=lr_policy,
-    optimization_params={"num_epochs": 300, "lr": args.lr, "weight_decay": args.weight_decay},
+    optimization_params={
+        "num_epochs": 300,
+        "max_steps": args.max_steps,
+        "lr": args.lr,
+        "weight_decay": args.weight_decay,
+    },
     batches_per_step=args.iter_per_step,
 )
diff --git a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer.py b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer.py
@@ -51,7 +51,7 @@
 See the list of pretrained models, call:
 nemo_nlp.huggingface.BERT.list_pretrained_models()
 """
-pretrained_bert_model = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(
+pretrained_bert_model = nemo.collections.nlp.nm.trainables.huggingface.BERT(
     pretrained_model_name=args.pretrained_bert_model
 )
 hidden_size = pretrained_bert_model.hidden_size

diff --git a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py
@@ -87,12 +87,10 @@
 nemo_nlp.huggingface.BERT.list_pretrained_models()
 """
 if args.bert_checkpoint and args.bert_config:
-    pretrained_bert_model = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(
-        config_filename=args.bert_config
-    )
+    pretrained_bert_model = nemo.collections.nlp.nm.trainables.huggingface.BERT(config_filename=args.bert_config)
     pretrained_bert_model.restore_from(args.bert_checkpoint)
 else:
-    pretrained_bert_model = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(
+    pretrained_bert_model = nemo.collections.nlp.nm.trainables.huggingface.BERT(
         pretrained_model_name=args.pretrained_bert_model
     )
 

diff --git a/examples/nlp/language_modeling/bert_pretraining.py b/examples/nlp/language_modeling/bert_pretraining.py
@@ -146,7 +146,7 @@
     log_dir=args.work_dir,
     create_tb_writer=True,
     files_to_copy=[__file__],
-    add_time_to_log_dir=True,
+    add_time_to_log_dir=False,
 )
 
 if args.config_file is not None:
@@ -318,7 +318,6 @@ def create_pipeline(data_file, batch_size, preprocessed_data=False, batches_per_
     optimization_params['num_epochs'] = args.num_epochs
 else:
     optimization_params['max_steps'] = args.max_steps
-
 nf.train(
     tensors_to_optimize=[train_loss],
     lr_policy=lr_policy_fn,

diff --git a/examples/nlp/question_answering/question_answering_squad.py b/examples/nlp/question_answering/question_answering_squad.py
@@ -24,7 +24,8 @@
 
 To finetune Squad v1.1 on pretrained BERT large uncased on 1 GPU:
 python question_answering_squad.py
---data_dir /path_to_data_dir/squad/v1.1
+--train_file /path_to_data_dir/squad/v1.1/train-v1.1.json
+--dev_file /path_to_data_dir/squad/v1.1/dev-v1.1.json
 --work_dir /path_to_output_folder
 --bert_checkpoint /path_to_bert_checkpoint
 --amp_opt_level "O1"
@@ -43,7 +44,8 @@
 To finetune Squad v1.1 on pretrained BERT large uncased on 8 GPU:
 python -m torch.distributed.launch --nproc_per_node=8 question_answering_squad.py
 --amp_opt_level "O1"
---data_dir /path_to_data_dir/squad/v1.1
+--train_file /path_to_data_dir/squad/v1.1/train-v1.1.json
+--dev_file /path_to_data_dir/squad/v1.1/dev-v1.1.json
 --bert_checkpoint /path_to_bert_checkpoint
 --batch_size 3
 --num_gpus 8
@@ -74,12 +76,10 @@
 def parse_args():
     parser = argparse.ArgumentParser(description="Squad_with_pretrained_BERT")
     parser.add_argument(
-        "--data_dir",
-        type=str,
-        required=True,
-        help="The input data dir. Should contain "
-        "train.*.json, dev.*.json files "
-        "(or other data files) for the task.",
+        "--train_file", type=str, help="The training data file. Should be *.json",
+    )
+    parser.add_argument(
+        "--dev_file", type=str, required=True, help="The evaluation data file. Should be *.json",
     )
     parser.add_argument(
         "--pretrained_bert_model", default="bert-base-uncased", type=str, help="Name of the pre-trained model"
@@ -202,7 +202,7 @@ def parse_args():
 
 
 def create_pipeline(
-    data_dir,
+    data_file,
     model,
     head,
     loss_fn,
@@ -220,7 +220,7 @@ def create_pipeline(
         version_2_with_negative=version_2_with_negative,
         batch_size=batch_size,
         tokenizer=tokenizer,
-        data_dir=data_dir,
+        data_file=data_file,
         max_query_length=max_query_length,
         max_seq_length=max_seq_length,
         doc_stride=doc_stride,
@@ -248,13 +248,14 @@ def create_pipeline(
 
 if __name__ == "__main__":
     args = parse_args()
-    if not os.path.exists(args.data_dir):
-        raise FileNotFoundError("SQUAD datasets not found. Datasets can be " "obtained using scripts/get_squad.py")
-
-    if not args.version_2_with_negative:
-        args.work_dir = f'{args.work_dir}/squad1.1'
-    else:
-        args.work_dir = f'{args.work_dir}/squad2.0'
+    if not os.path.exists(args.dev_file):
+        raise FileNotFoundError(
+            "eval data not found. Datasets can be " "obtained using examples/nlp/scripts/get_squad.py"
+        )
+    if not args.evaluation_only and not os.path.exists(args.train_file):
+        raise FileNotFoundError(
+            "train data not found. Datasets can be " "obtained using examples/nlp/scripts/get_squad.py"
+        )
 
     # Instantiate neural factory with supported backend
     nf = nemo_core.NeuralModuleFactory(
@@ -264,7 +265,7 @@ def create_pipeline(
         log_dir=args.work_dir,
         create_tb_writer=True,
         files_to_copy=[__file__],
-        add_time_to_log_dir=True,
+        add_time_to_log_dir=False,
     )
 
     if args.tokenizer == "sentencepiece":
@@ -303,7 +304,7 @@ def create_pipeline(
 
     if not args.evaluation_only:
         train_loss, train_steps_per_epoch, _, _ = create_pipeline(
-            data_dir=args.data_dir,
+            data_file=args.train_file,
             model=model,
             head=qa_head,
             loss_fn=squad_loss,
@@ -316,8 +317,9 @@ def create_pipeline(
             batches_per_step=args.batches_per_step,
             mode="train",
         )
+        logging.info(f"training step per epoch: {train_steps_per_epoch}")
     _, _, eval_output, eval_data_layer = create_pipeline(
-        data_dir=args.data_dir,
+        data_file=args.dev_file,
         model=model,
         head=qa_head,
         loss_fn=squad_loss,