diff --git a/source/Samples/ApproveMultiSequenceLearning/HelperMethods.cs b/source/Samples/ApproveMultiSequenceLearning/HelperMethods.cs index 84225d7f8..acaa4da22 100644 --- a/source/Samples/ApproveMultiSequenceLearning/HelperMethods.cs +++ b/source/Samples/ApproveMultiSequenceLearning/HelperMethods.cs @@ -15,266 +15,128 @@ namespace MultiSequenceLearning { public class HelperMethods { - public HelperMethods() - { - //needs no implementation - } + // Constants for default settings + private const int DefaultRandomSeed = 42; + private const double MaxScalarValue = 20.0; + + // Avoid magic numbers in your code + private const int DefaultCellsPerColumn = 25; + private const double DefaultGlobalInhibitionDensity = 0.02; + private const double DefaultPotentialRadiusFactor = 0.15; + private const double DefaultMaxSynapsesPerSegmentFactor = 0.02; + private const double DefaultMaxBoost = 10.0; + private const int DefaultDutyCyclePeriod = 25; + private const double DefaultMinPctOverlapDutyCycles = 0.75; + private const int DefaultActivationThreshold = 15; + private const double DefaultConnectedPermanence = 0.5; + private const double DefaultPermanenceDecrement = 0.25; + private const double DefaultPermanenceIncrement = 0.15; + private const double DefaultPredictedSegmentDecrement = 0.1; /// /// HTM Config for creating Connections /// - /// input bits - /// number of columns - /// Object of HTMConfig public static HtmConfig FetchHTMConfig(int inputBits, int numColumns) { - HtmConfig cfg = new HtmConfig(new int[] { inputBits }, new int[] { numColumns }) + return new HtmConfig(new int[] { inputBits }, new int[] { numColumns }) { - Random = new ThreadSafeRandom(42), - - CellsPerColumn = 25, + Random = new ThreadSafeRandom(DefaultRandomSeed), + CellsPerColumn = DefaultCellsPerColumn, GlobalInhibition = true, LocalAreaDensity = -1, - NumActiveColumnsPerInhArea = 0.02 * numColumns, - PotentialRadius = (int)(0.15 * inputBits), - //InhibitionRadius = 15, - - MaxBoost = 10.0, - DutyCyclePeriod = 25, - MinPctOverlapDutyCycles = 0.75, - MaxSynapsesPerSegment = (int)(0.02 * numColumns), - - ActivationThreshold = 15, - ConnectedPermanence = 0.5, - - // Learning is slower than forgetting in this case. - PermanenceDecrement = 0.25, - PermanenceIncrement = 0.15, - - // Used by punishing of segments. - PredictedSegmentDecrement = 0.1, - - //NumInputs = 88 + NumActiveColumnsPerInhArea = DefaultGlobalInhibitionDensity * numColumns, + PotentialRadius = (int)(DefaultPotentialRadiusFactor * inputBits), + MaxBoost = DefaultMaxBoost, + DutyCyclePeriod = DefaultDutyCyclePeriod, + MinPctOverlapDutyCycles = DefaultMinPctOverlapDutyCycles, + MaxSynapsesPerSegment = (int)(DefaultMaxSynapsesPerSegmentFactor * numColumns), + ActivationThreshold = DefaultActivationThreshold, + ConnectedPermanence = DefaultConnectedPermanence, + PermanenceDecrement = DefaultPermanenceDecrement, + PermanenceIncrement = DefaultPermanenceIncrement, + PredictedSegmentDecrement = DefaultPredictedSegmentDecrement }; - - return cfg; - } - - /// - /// Takes in user input and return encoded SDR for prediction - /// - /// - /// - public static int[] EncodeSingleInput(string userInput) - { - int[] sdr = new int[0]; - - //needs no implementation - - return sdr; } /// /// Get the encoder with settings /// - /// input bits - /// Object of EncoderBase public static EncoderBase GetEncoder(int inputBits) { - double max = 20; - - Dictionary settings = new Dictionary() + var settings = new Dictionary { - { "W", 15}, - { "N", inputBits}, - { "Radius", -1.0}, - { "MinVal", 0.0}, - { "Periodic", false}, - { "Name", "scalar"}, - { "ClipInput", false}, - { "MaxVal", max} + { "W", 15 }, + { "N", inputBits }, + { "Radius", -1.0 }, + { "MinVal", 0.0 }, + { "Periodic", false }, + { "Name", "scalar" }, + { "ClipInput", false }, + { "MaxVal", MaxScalarValue } }; - EncoderBase encoder = new ScalarEncoder(settings); - - return encoder; + return new ScalarEncoder(settings); } /// /// Reads dataset from the file /// - /// full path of the file - /// Object of list of Sequence public static List ReadDataset(string path) { Console.WriteLine("Reading Sequence..."); - String lines = File.ReadAllText(path); - //var sequence = JsonConvert.DeserializeObject(lines); - List sequence = System.Text.Json.JsonSerializer.Deserialize>(lines); - - return sequence; - } - - /// - /// Creates list of Sequence as per configuration - /// - /// Object of list of Sequence - public static List CreateDataset() - { - int numberOfSequence = 30; - int size = 12; - int startVal = 0; - int endVal = 15; - Console.WriteLine("Creating Sequence..."); - List sequence = HelperMethods.CreateSequences(numberOfSequence, size, startVal, endVal); - - return sequence; - } - - /// - /// Saves the dataset in 'dataset' folder in BasePath of application - /// - /// Object of list of Sequence - /// Full path of the dataset - public static string SaveDataset(List sequences) - { - string BasePath = AppDomain.CurrentDomain.BaseDirectory; - string reportFolder = Path.Combine(BasePath, "dataset"); - if (!Directory.Exists(reportFolder)) - Directory.CreateDirectory(reportFolder); - string reportPath = Path.Combine(reportFolder, $"dataset_{DateTime.Now.Ticks}.json"); - - Console.WriteLine("Saving dataset..."); - - if (!File.Exists(reportPath)) + try { - using (StreamWriter sw = File.CreateText(reportPath)) - { - /*sw.WriteLine("name, data"); - foreach (Sequence sequence in sequences) - { - sw.WriteLine($"{sequence.name}, {string.Join(",", sequence.data)}"); - }*/ - //sw.WriteLine(System.Text.Json.JsonSerializer.Serialize>(sequences)); - sw.WriteLine(JsonConvert.SerializeObject(sequences)); - } + string fileContent = File.ReadAllText(path); + return JsonConvert.DeserializeObject>(fileContent); } - - return reportPath; - } - - /// - /// Creats multiple sequences as per parameters - /// - /// Number of sequences to be created - /// Size of each sequence - /// Minimum value of item in a sequence - /// Maximum value of item in a sequence - /// Object of list of Sequence - public static List CreateSequences(int count, int size, int startVal, int stopVal) - { - List dataset = new List(); - - for (int i = 0; i < count; i++) + catch (Exception ex) { - Sequence sequence = new Sequence(); - sequence.name = $"S{i+1}"; - sequence.data = getSyntheticData(size, startVal, stopVal); - dataset.Add(sequence); + Console.WriteLine($"Failed to read the dataset: {ex.Message}"); + return new List(); // Return an empty list in case of failure } - - return dataset; } /// - /// Creates a sequence of given size-3 and range + /// Saves the dataset in 'dataset' folder in BasePath of application /// - /// Size of list - /// Min range of the list - /// Max range of the list - /// - private static int[] getSyntheticData(int size, int startVal, int stopVal) + public static string SaveDataset(List sequences) { - int[] data = new int[size]; - - data = randomRemoveDouble(randomDouble(size, startVal, stopVal), 3); + string basePath = AppDomain.CurrentDomain.BaseDirectory; + string datasetFolder = Path.Combine(basePath, "dataset"); + Directory.CreateDirectory(datasetFolder); // CreateDirectory is safe to call if directory exists + string datasetPath = Path.Combine(datasetFolder, $"dataset_{DateTime.Now.Ticks}.json"); - return data; + Console.WriteLine("Saving dataset..."); + File.WriteAllText(datasetPath, JsonConvert.SerializeObject(sequences)); + return datasetPath; } /// - /// Creates a sorted list of array with given paramerters + /// Creates multiple sequences as per parameters /// - /// Size of array - /// Min range of the list - /// Max range of the list - /// - private static int[] randomDouble(int size, int startVal, int stopVal) + public static List CreateSequences(int count, int size, int startVal, int stopVal) { - int[] array = new int[size]; - List list = new List(); - int number = 0; - Random r = new Random(Guid.NewGuid().GetHashCode()); - while(list.Count < size) - { - number = r.Next(startVal,stopVal); - if (!list.Contains(number)) + return Enumerable.Range(1, count).Select(i => + new Sequence { - if(number >= startVal && number <= stopVal) - list.Add(number); - } - } - - array = list.ToArray(); - Array.Sort(array); - - return array; + name = $"S{i}", + data = GenerateRandomSequence(size, startVal, stopVal) + }) + .ToList(); } - /// - /// Randomly remove less number of items from array - /// - /// array to processed - /// number of removals to be done - /// array with less numbers - private static int[] randomRemoveDouble(int[] array, int less) + private static int[] GenerateRandomSequence(int size, int startVal, int stopVal) { - int[] temp = new int[array.Length - less]; - Random random = new Random(Guid.NewGuid().GetHashCode()); - int number = 0; - List list = new List(); + var rnd = new Random(); + var sequence = new HashSet(); - while (list.Count < (array.Length - less)) + while (sequence.Count < size) { - number = array[random.Next(0, (array.Length))]; - if (!list.Contains(number)) - list.Add(number); + int number = rnd.Next(startVal, stopVal + 1); + sequence.Add(number); } - temp = list.ToArray(); - Array.Sort(temp); - - return temp; - } - - private static int getDigits(int n) - { - if (n >= 0) - { - if (n < 100) return 2; - if (n < 1000) return 3; - if (n < 10000) return 4; - if (n < 100000) return 5; - if (n < 1000000) return 6; - if (n < 10000000) return 7; - if (n < 100000000) return 8; - if (n < 1000000000) return 9; - return 10; - } - else - { - return 2; - } + return sequence.OrderBy(n => n).ToArray(); } } } \ No newline at end of file diff --git a/source/Samples/ApproveMultiSequenceLearning/Program.cs b/source/Samples/ApproveMultiSequenceLearning/Program.cs index 1f1d3bd67..6c6b427df 100644 --- a/source/Samples/ApproveMultiSequenceLearning/Program.cs +++ b/source/Samples/ApproveMultiSequenceLearning/Program.cs @@ -11,218 +11,142 @@ namespace MultiSequenceLearning { class Program { - /// - /// This sample shows a typical experiment code for SP and TM. - /// You must start this code in debugger to follow the trace. - /// and TM. - /// - /// + private const string DatasetFolder = "dataset"; + private const string ReportFolder = "report"; + private const string DatasetFileName = "dataset_03.json"; + private const string TestsetFileName = "test_01.json"; + static void Main(string[] args) { - - //to create synthetic dataset - /*string path = HelperMethods.SaveDataset(HelperMethods.CreateDataset()); - Console.WriteLine($"Dataset saved: {path}");*/ - - //to read dataset - string BasePath = AppDomain.CurrentDomain.BaseDirectory; - string datasetPath = Path.Combine(BasePath, "dataset", "dataset_03.json"); - Console.WriteLine($"Reading Dataset: {datasetPath}"); - List sequences = HelperMethods.ReadDataset(datasetPath); - + //to read Input Dataset + string basePath = AppDomain.CurrentDomain.BaseDirectory; + List sequences = ReadDataset(Path.Combine(basePath, DatasetFolder, DatasetFileName)); //to read test dataset - string testsetPath = Path.Combine(BasePath, "dataset", "test_01.json"); - Console.WriteLine($"Reading Testset: {testsetPath}"); - List sequencesTest = HelperMethods.ReadDataset(testsetPath); - - //run learing only - //RunSimpleMultiSequenceLearningExperiment(sequences); + List sequencesTest = ReadDataset(Path.Combine(basePath, DatasetFolder, TestsetFileName)); - //run learning + prediction and generates report for results List reports = RunMultiSequenceLearningExperiment(sequences, sequencesTest); + WriteReport(reports, basePath); + } - WriteReport(sequences, reports); - - Console.WriteLine("Done..."); - + private static List ReadDataset(string datasetPath) + { + try + { + Console.WriteLine($"Reading Dataset: {datasetPath}"); + return JsonConvert.DeserializeObject>(File.ReadAllText(datasetPath)); + } + catch (Exception ex) + { + Console.WriteLine($"Error reading dataset: {ex.Message}"); + return new List(); + } } - /// - /// write and formats data in report object to a file - /// - /// input sequence - /// object of report - private static void WriteReport(List sequences, List reports) + private static void WriteReport(List reports, string basePath) { - string BasePath = AppDomain.CurrentDomain.BaseDirectory; - string reportFolder = Path.Combine(BasePath, "report"); - if (!Directory.Exists(reportFolder)) - Directory.CreateDirectory(reportFolder); + string reportFolder = EnsureDirectory(Path.Combine(basePath, ReportFolder)); string reportPath = Path.Combine(reportFolder, $"report_{DateTime.Now.Ticks}.txt"); - if (!File.Exists(reportPath)) + using (StreamWriter sw = File.CreateText(reportPath)) { - using (StreamWriter sw = File.CreateText(reportPath)) + foreach (Report report in reports) { - sw.WriteLine("------------------------------"); - foreach (Sequence sequence in sequences) - { - sw.WriteLine($"Sequence: {sequence.name} -> {string.Join("-",sequence.data)}"); - } - sw.WriteLine("------------------------------"); - foreach (Report report in reports) - { - sw.WriteLine($"Using test sequence: {report.SequenceName} -> {string.Join("-",report.SequenceData)}"); - foreach (string log in report.PredictionLog) - { - sw.WriteLine($"\t{log}"); - } - sw.WriteLine($"\tAccuracy: {report.Accuracy}%"); - sw.WriteLine("------------------------------"); - } + WriteReportContent(sw, report); } } - } - /// - /// takes input data set and runs the alogrithm - /// - /// input test dataset - private static void RunSimpleMultiSequenceLearningExperiment(List sequences) + private static string EnsureDirectory(string path) { - // - // Prototype for building the prediction engine. - MultiSequenceLearning experiment = new MultiSequenceLearning(); - var predictor = experiment.Run(sequences); + if (!Directory.Exists(path)) + Directory.CreateDirectory(path); + return path; } + private static void WriteReportContent(StreamWriter sw, Report report) + { + sw.WriteLine("------------------------------"); + sw.WriteLine($"Using test sequence: {report.SequenceName} -> {string.Join("-", report.SequenceData)}"); + foreach (string log in report.PredictionLog) + { + sw.WriteLine($"\t{log}"); + } + sw.WriteLine($"\tAccuracy: {report.Accuracy}%"); + sw.WriteLine("------------------------------"); + } - /// - /// This example demonstrates how to learn two sequences and how to use the prediction mechanism. - /// First, two sequences are learned. - /// Second, three short sequences with three elements each are created und used for prediction. The predictor used by experiment privides to the HTM every element of every predicting sequence. - /// The predictor tries to predict the next element. - /// - /// input dataset - /// input test dataset - /// list of Report per sequence private static List RunMultiSequenceLearningExperiment(List sequences, List sequencesTest) { - List reports = new List(); - Report report = new Report(); - - // Prototype for building the prediction engine. - MultiSequenceLearning experiment = new MultiSequenceLearning(); + var reports = new List(); + var experiment = new MultiSequenceLearning(); var predictor = experiment.Run(sequences); - // These list are used to see how the prediction works. - // Predictor is traversing the list element by element. - // By providing more elements to the prediction, the predictor delivers more precise result. - foreach (Sequence item in sequencesTest) { - report.SequenceName = item.name; - Debug.WriteLine($"Using test sequence: {item.name}"); - Console.WriteLine("------------------------------"); - Console.WriteLine($"Using test sequence: {item.name}"); - predictor.Reset(); - report.SequenceData = item.data; - var accuracy = PredictNextElement(predictor, item.data, report); + var report = new Report + { + SequenceName = item.name, + SequenceData = item.data + }; + + double accuracy = PredictNextElement(predictor, item.data, report); + report.Accuracy = accuracy; reports.Add(report); + Console.WriteLine($"Accuracy for {item.name} sequence: {accuracy}%"); } return reports; - } - /// - /// Takes predicted model, subsequence and generates report stating accuracy - /// - /// Object of Predictor - /// sub-sequence to be tested - /// accuracy of predicting elements in % private static double PredictNextElement(Predictor predictor, int[] list, Report report) { - int matchCount = 0; - int predictions = 0; - double accuracy = 0.0; + int matchCount = 0, predictions = 0; List logs = new List(); - Console.WriteLine("------------------------------"); - - int prev = -1; - bool first = true; - - /* - * Pseudo code for calculating accuracy: - * - * 1. loop for each element in the sub-sequence - * 1.1 if the element is first element do nothing and save the element as 'previous' for further comparision - * 1.2 take previous element and predict the next element - * 1.2.1 get the predicted element with highest similarity and compare with 'next' element - * 1.2.1.1 if predicted element matches the next element increment the counter of matched elements - * 1.2.2 increment the count for number of calls made to predict an element - * 1.2 update the 'previous' element with 'next' element - * 2. calculate the accuracy as (number of matched elements)/(total number of calls for prediction) * 100 - * 3. end the loop - */ - - foreach (var next in list) + + predictor.Reset(); + + for (int i = 0; i < list.Length - 1; i++) { - if(first) - { - first = false; - } - else - { - Console.WriteLine($"Input: {prev}"); - var res = predictor.Predict(prev); - string log = ""; - if (res.Count > 0) - { - foreach (var pred in res) - { - Debug.WriteLine($"Predicted Input: {pred.PredictedInput} - Similarity: {pred.Similarity}%"); - } - - var sequence = res.First().PredictedInput.Split('_'); - var prediction = res.First().PredictedInput.Split('-'); - Console.WriteLine($"Predicted Sequence: {sequence.First()} - Predicted next element: {prediction.Last()}"); - log = $"Input: {prev}, Predicted Sequence: {sequence.First()}, Predicted next element: {prediction.Last()}"; - //compare current element with prediction of previous element - if(next == Int32.Parse(prediction.Last())) - { - matchCount++; - } - } - else - { - Console.WriteLine("Nothing predicted :("); - log = $"Input: {prev}, Nothing predicted"; - } - - logs.Add(log); - predictions++; - } + int current = list[i]; + int next = list[i + 1]; - //save previous element to compare with upcoming element - prev = next; + logs.Add(PredictElement(predictor, current, next, ref matchCount)); + predictions++; } report.PredictionLog = logs; + return CalculateAccuracy(matchCount, predictions); + } - /* - * Accuracy is calculated as number of matching predictions made - * divided by total number of prediction made for an element in subsequence - * - * accuracy = number of matching predictions/total number of prediction * 100 - */ - accuracy = (double)matchCount / predictions * 100; - report.Accuracy = accuracy; - Console.WriteLine("------------------------------"); - - return accuracy; + private static string PredictElement(Predictor predictor, int current, int next, ref int matchCount) + { + Console.WriteLine($"Input: {current}"); + var predictions = predictor.Predict(current); + if (predictions.Any()) + { + var highestPrediction = predictions.OrderByDescending(p => p.Similarity).First(); + string predictedSequence = highestPrediction.PredictedInput.Split('-').First(); + int predictedNext = int.Parse(highestPrediction.PredictedInput.Split('-').Last()); + + Console.WriteLine($"Predicted Sequence: {predictedSequence} - Predicted next element: {predictedNext}"); + if (predictedNext == next) + matchCount++; + + return $"Input: {current}, Predicted Sequence: {predictedSequence}, Predicted next element: {predictedNext}"; + } + else + { + Console.WriteLine("Nothing predicted"); + return $"Input: {current}, Nothing predicted"; + } + } + + private static double CalculateAccuracy(int matchCount, int predictions) + { + return (double)matchCount / predictions * 100; } } -} + + +} \ No newline at end of file