diff --git a/mnist_cnn_f32/Makefile b/mnist_cnn_f32/Makefile
new file mode 100644
index 00000000..e9a58a4c
--- /dev/null
+++ b/mnist_cnn_f32/Makefile
@@ -0,0 +1,43 @@
+# This is a simple Makefile used to build the example source code.
+# This example might requires some modifications in order to work correctly on
+# your system.
+# If you're not using the Armadillo wrapper, replace `armadillo` with linker commands
+# for the BLAS and LAPACK libraries that you are using.
+
+TARGET := mnist_cnn_f32
+SRC := mnist_cnn_f32.cpp
+LIBS_NAME := armadillo
+
+CXX := g++
+CXXFLAGS += -std=c++14 -Wall -Wextra -O3 -DNDEBUG -fopenmp
+# Use these CXXFLAGS instead if you want to compile with debugging symbols and
+# without optimizations.
+# CXXFLAGS += -std=c++14 -Wall -Wextra -g -O0
+LDFLAGS  += -fopenmp
+LDFLAGS += -L . # /path/to/mlpack/library/  # if installed locally.
+# Add header directories for any includes that aren't on the
+# default compiler search path.
+INCLFLAGS := -I .
+# If you have mlpack or ensmallen installed somewhere nonstandard, uncomment and
+# update the lines below.
+# INCLFLAGS += -I/path/to/mlpack/include/
+# INCLFLAGS += -I/path/to/ensmallen/include/
+CXXFLAGS  += $(INCLFLAGS)
+
+OBJS := $(SRC:.cpp=.o)
+LIBS := $(addprefix -l,$(LIBS_NAME))
+CLEAN_LIST := $(TARGET) $(OBJS)
+
+# default rule
+default: all
+
+$(TARGET): $(OBJS)
+	$(CXX) $(CXXFLAGS) $(OBJS) -o $(TARGET) $(LDFLAGS) $(LIBS)
+
+.PHONY: all
+all: $(TARGET)
+
+.PHONY: clean
+clean:
+	@echo CLEAN $(CLEAN_LIST)
+	@rm -f $(CLEAN_LIST)
diff --git a/mnist_cnn_f32/mnist_cnn_f32.cpp b/mnist_cnn_f32/mnist_cnn_f32.cpp
new file mode 100644
index 00000000..c106d6c8
--- /dev/null
+++ b/mnist_cnn_f32/mnist_cnn_f32.cpp
@@ -0,0 +1,234 @@
+/**
+ * An example of using Convolutional Neural Network (CNN) for
+ * solving Digit Recognizer problem from Kaggle website.
+ *
+ * The full description of a problem as well as datasets for training
+ * and testing are available here: https://www.kaggle.com/c/digit-recognizer.
+ *
+ * This example is similar to the mnist_cnn. The main difference is that,
+ * this one loads the dataset as a float32 and creates a float32 model.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ *
+ * @author Daivik Nema
+ */
+#define MLPACK_ENABLE_ANN_SERIALIZATION
+#include <mlpack.hpp>
+
+#if ((ENS_VERSION_MAJOR < 2) || ((ENS_VERSION_MAJOR == 2) && (ENS_VERSION_MINOR < 13)))
+  #error "need ensmallen version 2.13.0 or later"
+#endif
+
+using namespace arma;
+using namespace mlpack;
+using namespace std;
+
+CEREAL_REGISTER_MLPACK_LAYERS(arma::fmat);
+
+Row<size_t> getLabels(const arma::fmat& predOut)
+{
+  Row<size_t> predLabels(predOut.n_cols);
+  for (uword i = 0; i < predOut.n_cols; ++i)
+  {
+    predLabels(i) = predOut.col(i).index_max();
+  }
+  return predLabels;
+}
+
+int main()
+{
+  // Dataset is randomly split into validation
+  // and training parts with following ratio.
+  constexpr double RATIO = 0.1;
+
+  // Allow 60 passes over the training data, unless we are stopped early by
+  // EarlyStopAtMinLoss.
+  const int EPOCHS = 60;
+
+  // Number of data points in each iteration of SGD.
+  const int BATCH_SIZE = 50;
+
+  // Step size of the optimizer.
+  const double STEP_SIZE = 1.2e-3;
+
+  cout << "Reading data ..." << endl;
+
+  // Labeled dataset that contains data for training is loaded from CSV file.
+  // Rows represent features, columns represent data points.
+  arma::fmat dataset;
+
+  // The original file can be downloaded from
+  // https://www.kaggle.com/c/digit-recognizer/data
+  data::Load("../data/mnist_train.csv", dataset, true);
+
+  // Split the dataset into training and validation sets.
+  arma::fmat train, valid;
+  data::Split(dataset, train, valid, RATIO);
+
+  // The train and valid datasets contain both - the features as well as the
+  // class labels. Split these into separate mats.
+  const arma::fmat trainX = train.submat(1, 0, train.n_rows - 1, train.n_cols - 1) /
+      256.0;
+  const arma::fmat validX = valid.submat(1, 0, valid.n_rows - 1, valid.n_cols - 1) /
+      256.0;
+
+  // Labels should specify the class of a data point and be in the interval [0,
+  // numClasses).
+
+  // Create labels for training and validatiion datasets.
+  const arma::fmat trainY = train.row(0);
+  const arma::fmat validY = valid.row(0);
+
+  // Specify the NN model. NegativeLogLikelihood is the output layer that
+  // is used for classification problem. RandomInitialization means that
+  // initial weights are generated randomly in the interval from -1 to 1.
+  FFN<NegativeLogLikelihoodType<arma::fmat>, RandomInitialization, arma::fmat> model;
+
+  // Specify the model architecture.
+  // In this example, the CNN architecture is chosen similar to LeNet-5.
+  // The architecture follows a Conv-ReLU-Pool-Conv-ReLU-Pool-Dense schema. We
+  // have used leaky ReLU activation instead of vanilla ReLU. Standard
+  // max-pooling has been used for pooling. The first convolution uses 6 filters
+  // of size 5x5 (and a stride of 1). The second convolution uses 16 filters of
+  // size 5x5 (stride = 1). The final dense layer is connected to a softmax to
+  // ensure that we get a valid probability distribution over the output classes
+
+  // Layers schema.
+  // 28x28x1 --- conv (6 filters of size 5x5. stride = 1) ---> 24x24x6
+  // 24x24x6 --------------- Leaky ReLU ---------------------> 24x24x6
+  // 24x24x6 --- max pooling (over 2x2 fields. stride = 2) --> 12x12x6
+  // 12x12x6 --- conv (16 filters of size 5x5. stride = 1) --> 8x8x16
+  // 8x8x16  --------------- Leaky ReLU ---------------------> 8x8x16
+  // 8x8x16  --- max pooling (over 2x2 fields. stride = 2) --> 4x4x16
+  // 4x4x16  ------------------- Dense ----------------------> 10
+
+  // Add the first convolution layer.
+  model.Add<ConvolutionType<
+             NaiveConvolution<ValidConvolution>,
+             NaiveConvolution<FullConvolution>,
+             NaiveConvolution<ValidConvolution>,
+             arma::fmat>>(6,  // Number of output activation maps.
+                         5,  // Filter width.
+                         5,  // Filter height.
+                         1,  // Stride along width.
+                         1,  // Stride along height.
+                         0,  // Padding width.
+                         0   // Padding height.
+  );
+
+  // Add first ReLU.
+  model.Add<LeakyReLUType<arma::fmat>>();
+
+  // Add first pooling layer. Pools over 2x2 fields in the input.
+  model.Add<MaxPoolingType<arma::fmat>>(2, // Width of field.
+                        2, // Height of field.
+                        2, // Stride along width.
+                        2, // Stride along height.
+                        true);
+
+  // Add the second convolution layer.
+  model.Add<ConvolutionType<
+            NaiveConvolution<ValidConvolution>,
+            NaiveConvolution<FullConvolution>,
+            NaiveConvolution<ValidConvolution>,
+            arma::fmat>>(16, // Number of output activation maps.
+                         5,  // Filter width.
+                         5,  // Filter height.
+                         1,  // Stride along width.
+                         1,  // Stride along height.
+                         0,  // Padding width.
+                         0   // Padding height.
+  );
+
+  // Add the second ReLU.
+  model.Add<LeakyReLUType<arma::fmat>>();
+
+  // Add the second pooling layer.
+  model.Add<MaxPoolingType<arma::fmat>>(2, 2, 2, 2, true);
+
+  // Add the final dense layer.
+  model.Add<LinearType<arma::fmat>>(10);
+  model.Add<LogSoftMaxType<arma::fmat>>();
+
+  model.InputDimensions() = vector<size_t>({ 28, 28 });
+
+  cout << "Start training ..." << endl;
+
+  // Set parameters for the Adam optimizer.
+  ens::Adam optimizer(
+      STEP_SIZE,  // Step size of the optimizer.
+      BATCH_SIZE, // Batch size. Number of data points that are used in each
+                  // iteration.
+      0.9,        // Exponential decay rate for the first moment estimates.
+      0.999, // Exponential decay rate for the weighted infinity norm estimates.
+      1e-8,  // Value used to initialise the mean squared gradient parameter.
+      EPOCHS * trainX.n_cols, // Max number of iterations.
+      1e-8,           // Tolerance.
+      true);
+
+  // Train the CNN model. If this is the first iteration, weights are
+  // randomly initialized between -1 and 1. Otherwise, the values of weights
+  // from the previous iteration are used.
+  model.Train(trainX,
+              trainY,
+              optimizer,
+              ens::PrintLoss(),
+              ens::ProgressBar(),
+              // Stop the training using Early Stop at min loss.
+              ens::EarlyStopAtMinLossType<arma::fmat>(
+                  [&](const arma::fmat& /* param */)
+                  {
+                    double validationLoss = model.Evaluate(validX, validY);
+                    cout << "Validation loss: " << validationLoss << "."
+                        << endl;
+                    return validationLoss;
+                  }));
+
+  // Matrix to store the predictions on train and validation datasets.
+  arma::fmat predOut;
+  // Get predictions on training data points.
+  model.Predict(trainX, predOut);
+  // Calculate accuracy on training data points.
+  Row<size_t> predLabels = getLabels(predOut);
+  double trainAccuracy =
+      accu(predLabels == trainY) / (double) trainY.n_elem * 100;
+
+  // Get predictions on validation data points.
+  model.Predict(validX, predOut);
+  predLabels = getLabels(predOut);
+  // Calculate accuracy on validation data points.
+  double validAccuracy =
+      accu(predLabels == validY) / (double) validY.n_elem * 100;
+
+  cout << "Accuracy: train = " << trainAccuracy << "%,"
+            << "\t valid = " << validAccuracy << "%" << endl;
+
+  data::Save("model.bin", "model", model, false);
+
+  cout << "Predicting on test set..." << endl;
+
+  // Get predictions on test data points.
+  // The original file could be download from
+  // https://www.kaggle.com/c/digit-recognizer/data
+  data::Load("../data/mnist_test.csv", dataset, true);
+  const arma::fmat testX = dataset.submat(1, 0, dataset.n_rows - 1, dataset.n_cols - 1)
+      / 256.0;
+  const arma::fmat testY = dataset.row(0);
+  model.Predict(testX, predOut);
+  // Calculate accuracy on test data points.
+  predLabels = getLabels(predOut);
+  double testAccuracy =
+      accu(predLabels == testY) / (double) testY.n_elem * 100;
+
+  cout << "Accuracy: test = " << testAccuracy << "%" << endl;
+
+  cout << "Saving predicted labels to \"results.csv.\"..." << endl;
+  // Saving results into Kaggle compatible CSV file.
+  predLabels.save("results.csv", arma::csv_ascii);
+
+  cout << "Neural network model is saved to \"model.bin\"" << endl;
+  cout << "Finished" << endl;
+}
diff --git a/mnist_simple_f32/Makefile b/mnist_simple_f32/Makefile
new file mode 100644
index 00000000..278fa2b1
--- /dev/null
+++ b/mnist_simple_f32/Makefile
@@ -0,0 +1,43 @@
+# This is a simple Makefile used to build the example source code.
+# This example might requires some modifications in order to work correctly on
+# your system.
+# If you're not using the Armadillo wrapper, replace `armadillo` with linker commands
+# for the BLAS and LAPACK libraries that you are using.
+
+TARGET := mnist_simple_f32
+SRC := mnist_simple_f32.cpp
+LIBS_NAME := armadillo
+
+CXX := g++
+CXXFLAGS += -std=c++14 -Wall -Wextra -O3 -DNDEBUG -fopenmp
+# Use these CXXFLAGS instead if you want to compile with debugging symbols and
+# without optimizations.
+# CXXFLAGS += -std=c++14 -Wall -Wextra -g -O0
+
+LDFLAGS  += -fopenmp
+# Add header directories for any includes that aren't on the
+# default compiler search path.
+INCLFLAGS := -I .
+# If you have mlpack or ensmallen installed somewhere nonstandard, uncomment and
+# update the lines below.
+# INCLFLAGS += -I/path/to/mlpack/include/
+# INCLFLAGS += -I/path/to/ensmallen/include/
+CXXFLAGS += $(INCLFLAGS)
+
+OBJS := $(SRC:.cpp=.o)
+LIBS := $(addprefix -l,$(LIBS_NAME))
+CLEAN_LIST := $(TARGET) $(OBJS)
+
+# default rule
+default: all
+
+$(TARGET): $(OBJS)
+	$(CXX) $(OBJS) -o $(TARGET) $(LDFLAGS) $(LIBS)
+
+.PHONY: all
+all: $(TARGET)
+
+.PHONY: clean
+clean:
+	@echo CLEAN $(CLEAN_LIST)
+	@rm -f $(CLEAN_LIST)
diff --git a/mnist_simple_f32/mnist_simple_f32.cpp b/mnist_simple_f32/mnist_simple_f32.cpp
new file mode 100644
index 00000000..76954cc8
--- /dev/null
+++ b/mnist_simple_f32/mnist_simple_f32.cpp
@@ -0,0 +1,189 @@
+/**
+ * An example of using Feed Forward Neural Network (FFN) for
+ * solving Digit Recognizer problem from Kaggle website.
+ *
+ * The full description of a problem as well as datasets for training
+ * and testing are available here: https://www.kaggle.com/c/digit-recognizer.
+ *
+ * This example is similar to the mnist_simple. The main difference is that,
+ * this one loads the dataset as a float32 and creates a float32 model.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ *
+ * @author Eugene Freyman
+ * @author Omar Shrit
+ */
+#define MLPACK_ENABLE_ANN_SERIALIZATION
+#include <mlpack.hpp>
+
+#if ((ENS_VERSION_MAJOR < 2) || \
+    ((ENS_VERSION_MAJOR == 2) && (ENS_VERSION_MINOR < 13)))
+  #error "need ensmallen version 2.13.0 or later"
+#endif
+
+using namespace mlpack;
+using namespace std;
+
+CEREAL_REGISTER_MLPACK_LAYERS(arma::fmat);
+
+arma::Row<size_t> getLabels(arma::fmat predOut)
+{
+  arma::Row<size_t> predLabels(predOut.n_cols);
+  for (arma::uword i = 0; i < predOut.n_cols; ++i)
+  {
+    predLabels(i) = predOut.col(i).index_max();
+  }
+  return predLabels;
+}
+
+int main()
+{
+  // Dataset is randomly split into validation
+  // and training parts in the following ratio.
+  constexpr double RATIO = 0.1;
+  // The number of neurons in the first layer.
+  constexpr int H1 = 200;
+  // The number of neurons in the second layer.
+  constexpr int H2 = 100;
+  // Step size of the optimizer.
+  const double STEP_SIZE = 5e-3;
+  // Number of data points in each iteration of SGD
+  const size_t BATCH_SIZE = 64;
+  // Allow up to 50 epochs, unless we are stopped early by EarlyStopAtMinLoss.
+  const int EPOCHS = 50;
+
+  // Labeled dataset that contains data for training is loaded from CSV file,
+  // rows represent features, columns represent data points.
+  arma::fmat dataset;
+  data::Load("../data/mnist_train.csv", dataset, true);
+
+  // Originally on Kaggle dataset CSV file has header, so it's necessary to
+  // get rid of the this row, in Armadillo representation it's the first column.
+  arma::fmat headerLessDataset =
+      dataset.submat(0, 1, dataset.n_rows - 1, dataset.n_cols - 1);
+
+  // Splitting the training dataset on training and validation parts.
+  arma::fmat train, valid;
+  data::Split(headerLessDataset, train, valid, RATIO);
+
+  // Getting training and validating dataset with features only and then
+  // normalising
+  const arma::fmat trainX =
+      train.submat(1, 0, train.n_rows - 1, train.n_cols - 1) / 255.0;
+  const arma::fmat validX =
+      valid.submat(1, 0, valid.n_rows - 1, valid.n_cols - 1) / 255.0;
+
+  // Labels should specify the class of a data point and be in the interval [0,
+  // numClasses).
+
+  // Creating labels for training and validating dataset.
+  const arma::fmat trainY = train.row(0);
+  const arma::fmat validY = valid.row(0);
+
+  // Specifying the NN model. NegativeLogLikelihood is the output layer that
+  // is used for classification problem. GlorotInitialization means that
+  // initial weights in neurons are a uniform gaussian distribution.
+  FFN<NegativeLogLikelihoodType<arma::fmat>, GlorotInitialization, arma::fmat> model;
+  // This is intermediate layer that is needed for connection between input
+  // data and relu layer. Parameters specify the number of input features
+  // and number of neurons in the next layer.
+  model.Add<LinearType<arma::fmat>>(H1);
+  // The first relu layer.
+  model.Add<ReLUType<arma::fmat>>();
+  // Intermediate layer between relu layers.
+  model.Add<LinearType<arma::fmat>>(H2);
+  // The second relu layer.
+  model.Add<ReLUType<arma::fmat>>();
+  // Dropout layer for regularization. First parameter is the probability of
+  // setting a specific value to 0.
+  model.Add<DropoutType<arma::fmat>>(0.2);
+  // Intermediate layer.
+  model.Add<LinearType<arma::fmat>>(10);
+  // LogSoftMax layer is used together with NegativeLogLikelihood for mapping
+  // output values to log of probabilities of being a specific class.
+  model.Add<LogSoftMaxType<arma::fmat>>();
+
+  cout << "Start training ..." << endl;
+
+  // Set parameters for the Adam optimizer.
+  ens::Adam optimizer(
+      STEP_SIZE,  // Step size of the optimizer.
+      BATCH_SIZE, // Batch size. Number of data points that are used in each
+                  // iteration.
+      0.9,        // Exponential decay rate for the first moment estimates.
+      0.999, // Exponential decay rate for the weighted infinity norm estimates.
+      1e-8,  // Value used to initialise the mean squared gradient parameter.
+      EPOCHS * trainX.n_cols, // Max number of iterations.
+      1e-8,           // Tolerance.
+      true);
+
+  // Declare callback to store best training weights.
+  ens::StoreBestCoordinates<arma::fmat> bestCoordinates;
+
+  // Train neural network. If this is the first iteration, weights are
+  // random, using current values as starting point otherwise.
+  model.Train(trainX,
+              trainY,
+              optimizer,
+              ens::PrintLoss(),
+              ens::ProgressBar(),
+              // Stop the training using Early Stop at min loss.
+              ens::EarlyStopAtMinLossType<arma::fmat>(
+                  [&](const arma::fmat& /* param */)
+                  {
+                    double validationLoss = model.Evaluate(validX, validY);
+                    cout << "Validation loss: " << validationLoss << "."
+                        << endl;
+                    return validationLoss;
+                  }),
+              // Store best coordinates (neural network weights)
+              bestCoordinates);
+
+  // Save the best training weights into the model.
+  model.Parameters() = bestCoordinates.BestCoordinates();
+
+  arma::fmat predOut;
+  // Getting predictions on training data points.
+  model.Predict(trainX, predOut);
+  // Calculating accuracy on training data points.
+  arma::Row<size_t> predLabels = getLabels(predOut);
+  double trainAccuracy =
+      arma::accu(predLabels == trainY) / (double) trainY.n_elem * 100;
+  // Getting predictions on validating data points.
+  model.Predict(validX, predOut);
+  // Calculating accuracy on validating data points.
+  predLabels = getLabels(predOut);
+  double validAccuracy =
+      arma::accu(predLabels == validY) / (double) validY.n_elem * 100;
+
+  cout << "Accuracy: train = " << trainAccuracy << "%,"
+       << "\t valid = " << validAccuracy << "%" << endl;
+
+  data::Save("model.bin", "model", model, false);
+
+  // Loading test dataset (the one whose predicted labels
+  // should be sent to kaggle website).
+  data::Load("../data/mnist_test.csv", dataset, true);
+  arma::fmat testY = dataset.row(dataset.n_rows - 1);
+  dataset.shed_row(dataset.n_rows - 1); // Strip labels before predicting.
+
+  cout << "Predicting on test set..." << endl;
+  arma::fmat testPredOut;
+  // Getting predictions on test data points.
+  model.Predict(dataset, testPredOut);
+  // Generating labels for the test dataset.
+  arma::Row<size_t> testPred = getLabels(testPredOut);
+
+  double testAccuracy = arma::accu(testPred == testY) /
+      (double) testY.n_elem * 100;
+  cout << "Accuracy: test = " << testAccuracy << "%" << endl;
+
+  cout << "Saving predicted labels to \"results.csv\" ..." << endl;
+  testPred.save("results.csv", arma::csv_ascii);
+
+  cout << "Neural network model is saved to \"model.bin\"" << endl;
+  cout << "Finished" << endl;
+}