udacity · Feudjo · Aug 6, 2023 · Aug 6, 2023 · Aug 19, 2023 · Aug 19, 2023
@@ -13,7 +13,8 @@ This model is then compared to an Azure AutoML run.
 
 
 ## Summary
-**In 1-2 sentences, explain the problem statement: e.g "This dataset contains data about... we seek to predict..."**
+The dataset contains data about direct marketing campaigns of a bank institution.
+Our goal is to predict if a client will subscribe a term deposit.
 
 **In 1-2 sentences, explain the solution: e.g. "The best performing model was a ..."**
 

@@ -0,0 +1 @@
+�
@@ -8,7 +8,9 @@
 from sklearn.preprocessing import OneHotEncoder
 import pandas as pd
 from azureml.core.run import Run
+from azureml.core import Workspace, Dataset 
 from azureml.data.dataset_factory import TabularDatasetFactory
+import joblib
 
 def clean_data(data):
     # Dict for cleaning data
@@ -42,31 +44,35 @@ def main():
     parser = argparse.ArgumentParser()
 
     parser.add_argument('--C', type=float, default=1.0, help="Inverse of regularization strength. Smaller values cause stronger regularization")
-    parser.add_argument('--max_iter', type=int, default=100, help="Maximum number of iterations to converge")
+    parser.add_argument('--max_iter', type=int, default=1000, help="Maximum number of iterations to converge")
 
     args = parser.parse_args()
 
     run = Run.get_context()
 
-    run.log("Regularization Strength:", np.float(args.C))
-    run.log("Max iterations:", np.int(args.max_iter))
+    run.log("Regularization Strength:", float(args.C))
+    run.log("Max iterations:", int(args.max_iter))
 
     # TODO: Create TabularDataset using TabularDatasetFactory
     # Data is located at:
-    # "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
+    url = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
 
-    ds = ### YOUR CODE HERE ###
+
+    ds = TabularDatasetFactory.from_delimited_files(url)
 
     x, y = clean_data(ds)
 
     # TODO: Split data into train and test sets.
 
-    ### YOUR CODE HERE ###a
+    x_train, x_test, y_train, y_test = train_test_split(x, y)
 
     model = LogisticRegression(C=args.C, max_iter=args.max_iter).fit(x_train, y_train)
 
+    os.makedirs('outputs', exist_ok=True)
+    joblib.dump(model, "outputs/model.pkl")
+
     accuracy = model.score(x_test, y_test)
-    run.log("Accuracy", np.float(accuracy))
+    run.log("Accuracy", float(accuracy))
 
 if __name__ == '__main__':
     main()