From c6c7825bb98a3350f02b6b3ae4bbffc303b4f36a Mon Sep 17 00:00:00 2001 From: Singari Sahith Kumar Date: Sun, 21 Jan 2024 19:27:17 +0100 Subject: [PATCH] Approve Multisequence Learning --- source/NeoCortexApi.sln | 10 +- .../ApproveMultiSequenceLearning.csproj | 39 +++ .../DataSet/dataset_01.json | 1 + .../DataSet/dataset_02.json | 1 + .../DataSet/dataset_03.json | 1 + .../DataSet/dataset_04.json | 1 + .../DataSet/test_01.json | 18 + .../HelperMethods.cs | 280 +++++++++++++++ .../MultiSequenceLearning.csproj | 20 ++ .../MultisequenceLearning.cs | 321 ++++++++++++++++++ .../ApproveMultiSequenceLearning/Program.cs | 228 +++++++++++++ .../ApproveMultiSequenceLearning/Report.cs | 20 ++ .../ApproveMultiSequenceLearning/Sequence.cs | 14 + .../report/report_638172569568213902.txt | 33 ++ .../report/report_638172649322130486.txt | 60 ++++ 15 files changed, 1045 insertions(+), 2 deletions(-) create mode 100644 source/Samples/ApproveMultiSequenceLearning/ApproveMultiSequenceLearning.csproj create mode 100644 source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_01.json create mode 100644 source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_02.json create mode 100644 source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_03.json create mode 100644 source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_04.json create mode 100644 source/Samples/ApproveMultiSequenceLearning/DataSet/test_01.json create mode 100644 source/Samples/ApproveMultiSequenceLearning/HelperMethods.cs create mode 100644 source/Samples/ApproveMultiSequenceLearning/MultiSequenceLearning.csproj create mode 100644 source/Samples/ApproveMultiSequenceLearning/MultisequenceLearning.cs create mode 100644 source/Samples/ApproveMultiSequenceLearning/Program.cs create mode 100644 source/Samples/ApproveMultiSequenceLearning/Report.cs create mode 100644 source/Samples/ApproveMultiSequenceLearning/Sequence.cs create mode 100644 source/Samples/ApproveMultiSequenceLearning/report/report_638172569568213902.txt create mode 100644 source/Samples/ApproveMultiSequenceLearning/report/report_638172649322130486.txt diff --git a/source/NeoCortexApi.sln b/source/NeoCortexApi.sln index a1e4a135e..424f487d3 100644 --- a/source/NeoCortexApi.sln +++ b/source/NeoCortexApi.sln @@ -1,4 +1,3 @@ - Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.0.32014.148 @@ -55,7 +54,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "GridCell", "GridCell\GridCe EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NeoCortexApiAnomaly", "Samples\NeoCortexApiAnomaly\NeoCortexApiAnomaly.csproj", "{7F272910-3A59-4BBB-8888-9A7F695CA754}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NeocortexApiLLMSample", "NeocortexApiLLMSample\NeocortexApiLLMSample.csproj", "{8D1F778F-AF73-44CF-965E-C061C07B99A7}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NeocortexApiLLMSample", "NeocortexApiLLMSample\NeocortexApiLLMSample.csproj", "{8D1F778F-AF73-44CF-965E-C061C07B99A7}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ApproveMultiSequenceLearning", "Samples\ApproveMultiSequenceLearning\ApproveMultiSequenceLearning.csproj", "{D56D5B70-0094-41F0-9FFD-BCAB50BA7737}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -123,6 +124,10 @@ Global {8D1F778F-AF73-44CF-965E-C061C07B99A7}.Debug|Any CPU.Build.0 = Debug|Any CPU {8D1F778F-AF73-44CF-965E-C061C07B99A7}.Release|Any CPU.ActiveCfg = Release|Any CPU {8D1F778F-AF73-44CF-965E-C061C07B99A7}.Release|Any CPU.Build.0 = Release|Any CPU + {D56D5B70-0094-41F0-9FFD-BCAB50BA7737}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D56D5B70-0094-41F0-9FFD-BCAB50BA7737}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D56D5B70-0094-41F0-9FFD-BCAB50BA7737}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D56D5B70-0094-41F0-9FFD-BCAB50BA7737}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -133,6 +138,7 @@ Global {AE2CF42C-DC7D-4F0A-8B43-84A5D81E1D72} = {99AE2313-7D07-4905-A0BB-52D304FCC2F7} {7F272910-3A59-4BBB-8888-9A7F695CA754} = {99AE2313-7D07-4905-A0BB-52D304FCC2F7} {8D1F778F-AF73-44CF-965E-C061C07B99A7} = {99AE2313-7D07-4905-A0BB-52D304FCC2F7} + {D56D5B70-0094-41F0-9FFD-BCAB50BA7737} = {99AE2313-7D07-4905-A0BB-52D304FCC2F7} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {71185BAD-1342-4836-BCDA-6EC97118F92D} diff --git a/source/Samples/ApproveMultiSequenceLearning/ApproveMultiSequenceLearning.csproj b/source/Samples/ApproveMultiSequenceLearning/ApproveMultiSequenceLearning.csproj new file mode 100644 index 000000000..9835eb800 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/ApproveMultiSequenceLearning.csproj @@ -0,0 +1,39 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + + + + + + + Always + + + Always + + + Always + + + Always + + + Always + + + + diff --git a/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_01.json b/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_01.json new file mode 100644 index 000000000..fd4a66239 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_01.json @@ -0,0 +1 @@ +[{"name":"S1","data":[0,2,5,7,8,11,13,14,17,21,23,24,25,26,27,28,29]},{"name":"S2","data":[0,1,2,3,4,6,7,9,14,15,17,22,23,24,25,27,28]},{"name":"S3","data":[2,3,5,9,10,11,12,13,17,19,20,21,23,25,26,27,29]}] diff --git a/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_02.json b/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_02.json new file mode 100644 index 000000000..c694ac433 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_02.json @@ -0,0 +1 @@ +[{"name":"S1","data":[0,2,4,5,6,7,8,9,11,13,14,15,16,19]},{"name":"S2","data":[0,1,2,4,5,10,12,13,14,15,16,17,18,19]},{"name":"S3","data":[0,1,3,4,6,7,8,10,11,13,16,17,18,19]}] diff --git a/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_03.json b/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_03.json new file mode 100644 index 000000000..6698e3ffe --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_03.json @@ -0,0 +1 @@ +[{"name":"S1","data":[0,1,4,5,6,7,8,13,14]},{"name":"S2","data":[0,1,2,3,4,5,7,8,11]},{"name":"S3","data":[0,1,2,3,4,5,6,9,13]},{"name":"S4","data":[1,3,5,8,9,10,11,12,14]},{"name":"S5","data":[0,3,4,6,7,10,12,13,14]},{"name":"S6","data":[2,3,4,5,7,8,9,10,14]},{"name":"S7","data":[0,2,3,4,5,6,8,10,13]},{"name":"S8","data":[1,2,4,5,6,10,11,12,13]},{"name":"S9","data":[0,1,2,3,4,8,10,12,13]},{"name":"S10","data":[0,1,2,4,5,6,7,9,11]},{"name":"S11","data":[2,3,4,9,10,11,12,13,14]},{"name":"S12","data":[0,2,3,4,5,6,7,12,13]},{"name":"S13","data":[1,2,4,6,8,9,11,12,14]},{"name":"S14","data":[2,4,6,8,9,10,11,12,13]},{"name":"S15","data":[0,1,3,6,8,9,10,13,14]},{"name":"S16","data":[1,2,3,8,9,10,11,12,14]},{"name":"S17","data":[0,1,3,4,6,7,9,10,13]},{"name":"S18","data":[1,3,5,6,8,9,10,12,13]},{"name":"S19","data":[1,2,4,5,6,10,11,12,14]},{"name":"S20","data":[0,2,4,5,6,9,10,13,14]},{"name":"S21","data":[0,4,5,6,7,8,11,13,14]},{"name":"S22","data":[0,3,5,6,7,8,9,10,13]},{"name":"S23","data":[0,1,2,3,5,7,9,10,13]},{"name":"S24","data":[0,3,4,6,8,9,10,13,14]},{"name":"S25","data":[0,1,2,3,5,8,10,12,13]},{"name":"S26","data":[0,3,5,6,8,9,10,12,13]},{"name":"S27","data":[1,3,4,5,6,8,11,12,14]},{"name":"S28","data":[0,3,5,6,7,9,11,12,13]},{"name":"S29","data":[0,2,3,4,5,6,11,12,14]},{"name":"S30","data":[0,1,2,3,4,6,8,9,12]}] diff --git a/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_04.json b/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_04.json new file mode 100644 index 000000000..9ec97ca11 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/DataSet/dataset_04.json @@ -0,0 +1 @@ +[{"name":"S1","data":[0,2,5,6,7,8,10,11,13]},{"name":"S2","data":[1,2,3,4,6,11,12,13,14]},{"name":"S3","data":[1,2,3,4,7,8,10,12,14]}] diff --git a/source/Samples/ApproveMultiSequenceLearning/DataSet/test_01.json b/source/Samples/ApproveMultiSequenceLearning/DataSet/test_01.json new file mode 100644 index 000000000..3785c6d06 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/DataSet/test_01.json @@ -0,0 +1,18 @@ +[ + { + "name": "T1", + "data": [ 5, 6, 7, 8 ] + }, + { + "name": "T2", + "data": [ 6, 11, 12, 13 ] + }, + { + "name": "T3", + "data": [ 1, 2, 3, 4 ] + }, + { + "name": "T4", + "data": [ 3, 4, 7, 8, 10 ] + } +] diff --git a/source/Samples/ApproveMultiSequenceLearning/HelperMethods.cs b/source/Samples/ApproveMultiSequenceLearning/HelperMethods.cs new file mode 100644 index 000000000..84225d7f8 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/HelperMethods.cs @@ -0,0 +1,280 @@ +using System; +using System.Globalization; +using System.IO; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using NeoCortexApi; +using NeoCortexApi.Encoders; +using NeoCortexApi.Entities; +using MultiSequenceLearning; +using Newtonsoft.Json; + +namespace MultiSequenceLearning +{ + public class HelperMethods + { + public HelperMethods() + { + //needs no implementation + } + + /// + /// HTM Config for creating Connections + /// + /// input bits + /// number of columns + /// Object of HTMConfig + public static HtmConfig FetchHTMConfig(int inputBits, int numColumns) + { + HtmConfig cfg = new HtmConfig(new int[] { inputBits }, new int[] { numColumns }) + { + Random = new ThreadSafeRandom(42), + + CellsPerColumn = 25, + GlobalInhibition = true, + LocalAreaDensity = -1, + NumActiveColumnsPerInhArea = 0.02 * numColumns, + PotentialRadius = (int)(0.15 * inputBits), + //InhibitionRadius = 15, + + MaxBoost = 10.0, + DutyCyclePeriod = 25, + MinPctOverlapDutyCycles = 0.75, + MaxSynapsesPerSegment = (int)(0.02 * numColumns), + + ActivationThreshold = 15, + ConnectedPermanence = 0.5, + + // Learning is slower than forgetting in this case. + PermanenceDecrement = 0.25, + PermanenceIncrement = 0.15, + + // Used by punishing of segments. + PredictedSegmentDecrement = 0.1, + + //NumInputs = 88 + }; + + return cfg; + } + + /// + /// Takes in user input and return encoded SDR for prediction + /// + /// + /// + public static int[] EncodeSingleInput(string userInput) + { + int[] sdr = new int[0]; + + //needs no implementation + + return sdr; + } + + /// + /// Get the encoder with settings + /// + /// input bits + /// Object of EncoderBase + public static EncoderBase GetEncoder(int inputBits) + { + double max = 20; + + Dictionary settings = new Dictionary() + { + { "W", 15}, + { "N", inputBits}, + { "Radius", -1.0}, + { "MinVal", 0.0}, + { "Periodic", false}, + { "Name", "scalar"}, + { "ClipInput", false}, + { "MaxVal", max} + }; + + EncoderBase encoder = new ScalarEncoder(settings); + + return encoder; + } + + /// + /// Reads dataset from the file + /// + /// full path of the file + /// Object of list of Sequence + public static List ReadDataset(string path) + { + Console.WriteLine("Reading Sequence..."); + String lines = File.ReadAllText(path); + //var sequence = JsonConvert.DeserializeObject(lines); + List sequence = System.Text.Json.JsonSerializer.Deserialize>(lines); + + return sequence; + } + + /// + /// Creates list of Sequence as per configuration + /// + /// Object of list of Sequence + public static List CreateDataset() + { + int numberOfSequence = 30; + int size = 12; + int startVal = 0; + int endVal = 15; + Console.WriteLine("Creating Sequence..."); + List sequence = HelperMethods.CreateSequences(numberOfSequence, size, startVal, endVal); + + return sequence; + } + + /// + /// Saves the dataset in 'dataset' folder in BasePath of application + /// + /// Object of list of Sequence + /// Full path of the dataset + public static string SaveDataset(List sequences) + { + string BasePath = AppDomain.CurrentDomain.BaseDirectory; + string reportFolder = Path.Combine(BasePath, "dataset"); + if (!Directory.Exists(reportFolder)) + Directory.CreateDirectory(reportFolder); + string reportPath = Path.Combine(reportFolder, $"dataset_{DateTime.Now.Ticks}.json"); + + Console.WriteLine("Saving dataset..."); + + if (!File.Exists(reportPath)) + { + using (StreamWriter sw = File.CreateText(reportPath)) + { + /*sw.WriteLine("name, data"); + foreach (Sequence sequence in sequences) + { + sw.WriteLine($"{sequence.name}, {string.Join(",", sequence.data)}"); + }*/ + //sw.WriteLine(System.Text.Json.JsonSerializer.Serialize>(sequences)); + sw.WriteLine(JsonConvert.SerializeObject(sequences)); + } + } + + return reportPath; + } + + /// + /// Creats multiple sequences as per parameters + /// + /// Number of sequences to be created + /// Size of each sequence + /// Minimum value of item in a sequence + /// Maximum value of item in a sequence + /// Object of list of Sequence + public static List CreateSequences(int count, int size, int startVal, int stopVal) + { + List dataset = new List(); + + for (int i = 0; i < count; i++) + { + Sequence sequence = new Sequence(); + sequence.name = $"S{i+1}"; + sequence.data = getSyntheticData(size, startVal, stopVal); + dataset.Add(sequence); + } + + return dataset; + } + + /// + /// Creates a sequence of given size-3 and range + /// + /// Size of list + /// Min range of the list + /// Max range of the list + /// + private static int[] getSyntheticData(int size, int startVal, int stopVal) + { + int[] data = new int[size]; + + data = randomRemoveDouble(randomDouble(size, startVal, stopVal), 3); + + return data; + } + + /// + /// Creates a sorted list of array with given paramerters + /// + /// Size of array + /// Min range of the list + /// Max range of the list + /// + private static int[] randomDouble(int size, int startVal, int stopVal) + { + int[] array = new int[size]; + List list = new List(); + int number = 0; + Random r = new Random(Guid.NewGuid().GetHashCode()); + while(list.Count < size) + { + number = r.Next(startVal,stopVal); + if (!list.Contains(number)) + { + if(number >= startVal && number <= stopVal) + list.Add(number); + } + } + + array = list.ToArray(); + Array.Sort(array); + + return array; + } + + /// + /// Randomly remove less number of items from array + /// + /// array to processed + /// number of removals to be done + /// array with less numbers + private static int[] randomRemoveDouble(int[] array, int less) + { + int[] temp = new int[array.Length - less]; + Random random = new Random(Guid.NewGuid().GetHashCode()); + int number = 0; + List list = new List(); + + while (list.Count < (array.Length - less)) + { + number = array[random.Next(0, (array.Length))]; + if (!list.Contains(number)) + list.Add(number); + } + + temp = list.ToArray(); + Array.Sort(temp); + + return temp; + } + + private static int getDigits(int n) + { + if (n >= 0) + { + if (n < 100) return 2; + if (n < 1000) return 3; + if (n < 10000) return 4; + if (n < 100000) return 5; + if (n < 1000000) return 6; + if (n < 10000000) return 7; + if (n < 100000000) return 8; + if (n < 1000000000) return 9; + return 10; + } + else + { + return 2; + } + } + } +} \ No newline at end of file diff --git a/source/Samples/ApproveMultiSequenceLearning/MultiSequenceLearning.csproj b/source/Samples/ApproveMultiSequenceLearning/MultiSequenceLearning.csproj new file mode 100644 index 000000000..f1e8cb154 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/MultiSequenceLearning.csproj @@ -0,0 +1,20 @@ + + + + Exe + net6.0 + enable + enable + + + + + + + + + + + + + diff --git a/source/Samples/ApproveMultiSequenceLearning/MultisequenceLearning.cs b/source/Samples/ApproveMultiSequenceLearning/MultisequenceLearning.cs new file mode 100644 index 000000000..6759ae060 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/MultisequenceLearning.cs @@ -0,0 +1,321 @@ +using System; +using System.Globalization; +using System.IO; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using NeoCortexApi; +using NeoCortexApi.Classifiers; +using NeoCortexApi.Encoders; +using NeoCortexApi.Entities; +using NeoCortexApi.Classifiers; +using NeoCortexApi.Network; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + + +namespace MultiSequenceLearning +{ + /// + /// Implements an experiment that demonstrates how to learn sequences. + /// + public class MultiSequenceLearning + { + /// + /// Runs the learning of sequences. + /// + /// Dictionary of sequences. KEY is the sewuence name, the VALUE is th elist of element of the sequence. + public Predictor Run(List sequences) + { + Console.WriteLine($"Hello NeocortexApi! Experiment {nameof(MultiSequenceLearning)}"); + + int inputBits = 100; + int numColumns = 1024; + + HtmConfig cfg = HelperMethods.FetchHTMConfig(inputBits, numColumns); + + EncoderBase encoder = HelperMethods.GetEncoder(inputBits); + + return RunExperiment(inputBits, cfg, encoder, sequences); + } + + /// + /// + /// + private Predictor RunExperiment(int inputBits, HtmConfig cfg, EncoderBase encoder, List sequences) + { + Stopwatch sw = new Stopwatch(); + sw.Start(); + + int maxMatchCnt = 0; + + var mem = new Connections(cfg); + + bool isInStableState = false; + + HtmClassifier cls = new HtmClassifier(); + + var numUniqueInputs = GetNumberOfInputs(sequences); + + CortexLayer layer1 = new CortexLayer("L1"); + + TemporalMemory tm = new TemporalMemory(); + + Console.WriteLine("------------ START ------------"); + + // For more information see following paper: https://www.scitepress.org/Papers/2021/103142/103142.pdf + HomeostaticPlasticityController hpc = new HomeostaticPlasticityController(mem, numUniqueInputs * 150, (isStable, numPatterns, actColAvg, seenInputs) => + { + if (isStable) + // Event should be fired when entering the stable state. + Debug.WriteLine($"STABLE: Patterns: {numPatterns}, Inputs: {seenInputs}, iteration: {seenInputs / numPatterns}"); + else + // Ideal SP should never enter unstable state after stable state. + Debug.WriteLine($"INSTABLE: Patterns: {numPatterns}, Inputs: {seenInputs}, iteration: {seenInputs / numPatterns}"); + + // We are not learning in instable state. + isInStableState = isStable; + + // Clear active and predictive cells. + //tm.Reset(mem); + }, numOfCyclesToWaitOnChange: 50); + + + SpatialPoolerMT sp = new SpatialPoolerMT(hpc); + sp.Init(mem); + tm.Init(mem); + + // Please note that we do not add here TM in the layer. + // This is omitted for practical reasons, because we first eneter the newborn-stage of the algorithm + // In this stage we want that SP get boosted and see all elements before we start learning with TM. + // All would also work fine with TM in layer, but it would work much slower. + // So, to improve the speed of experiment, we first ommit the TM and then after the newborn-stage we add it to the layer. + layer1.HtmModules.Add("encoder", encoder); + layer1.HtmModules.Add("sp", sp); + + //double[] inputs = inputValues.ToArray(); + int[] prevActiveCols = new int[0]; + + int cycle = 0; + int matches = 0; + + var lastPredictedValues = new List(new string[] { "0" }); + + int maxCycles = 3500; + + // + // Training SP to get stable. New-born stage. + // + + for (int i = 0; i < maxCycles && isInStableState == false; i++) + { + matches = 0; + + cycle++; + + Debug.WriteLine($"-------------- Newborn SP Cycle {cycle} ---------------"); + Console.WriteLine($"-------------- Newborn SP Cycle {cycle} ---------------"); + + foreach (var inputs in sequences) + { + foreach (var input in inputs.data) + { + Debug.WriteLine($" -- {inputs.name} - {input} --"); + + var lyrOut = layer1.Compute(input, true); + + if (isInStableState) + break; + } + + if (isInStableState) + break; + } + } + + // Clear all learned patterns in the classifier. + cls.ClearState(); + + // We activate here the Temporal Memory algorithm. + layer1.HtmModules.Add("tm", tm); + + // + // Loop over all sequences. + foreach (var sequenceKeyPair in sequences) + { + Debug.WriteLine($"-------------- Sequences {sequenceKeyPair.name} ---------------"); + Console.WriteLine($"-------------- Sequences {sequenceKeyPair.name} ---------------"); + + int maxPrevInputs = sequenceKeyPair.data.Length - 1; + + List previousInputs = new List(); + + previousInputs.Add("-1"); + + // + // Now training with SP+TM. SP is pretrained on the given input pattern set. + for (int i = 0; i < maxCycles; i++) + { + matches = 0; + + cycle++; + + Debug.WriteLine(""); + + Debug.WriteLine($"-------------- Cycle SP+TM{cycle} ---------------"); + Console.WriteLine($"-------------- Cycle SP+TM {cycle} ---------------"); + + foreach (var input in sequenceKeyPair.data) + { + Debug.WriteLine($"-------------- {input} ---------------"); + + var lyrOut = layer1.Compute(input, true) as ComputeCycle; + + var activeColumns = layer1.GetResult("sp") as int[]; + + previousInputs.Add(input.ToString()); + if (previousInputs.Count > (maxPrevInputs + 1)) + previousInputs.RemoveAt(0); + + // In the pretrained SP with HPC, the TM will quickly learn cells for patterns + // In that case the starting sequence 4-5-6 might have the sam SDR as 1-2-3-4-5-6, + // Which will result in returning of 4-5-6 instead of 1-2-3-4-5-6. + // HtmClassifier allways return the first matching sequence. Because 4-5-6 will be as first + // memorized, it will match as the first one. + if (previousInputs.Count < maxPrevInputs) + continue; + + string key = GetKey(previousInputs, input, sequenceKeyPair.name); + + List actCells; + + if (lyrOut.ActiveCells.Count == lyrOut.WinnerCells.Count) + { + actCells = lyrOut.ActiveCells; + } + else + { + actCells = lyrOut.WinnerCells; + } + + cls.Learn(key, actCells.ToArray()); + + Debug.WriteLine($"Col SDR: {Helpers.StringifyVector(lyrOut.ActivColumnIndicies)}"); + Debug.WriteLine($"Cell SDR: {Helpers.StringifyVector(actCells.Select(c => c.Index).ToArray())}"); + + // + // If the list of predicted values from the previous step contains the currently presenting value, + // we have a match. + if (lastPredictedValues.Contains(key)) + { + matches++; + Debug.WriteLine($"Match. Actual value: {key} - Predicted value: {lastPredictedValues.FirstOrDefault(key)}."); + } + else + Debug.WriteLine($"Missmatch! Actual value: {key} - Predicted values: {String.Join(',', lastPredictedValues)}"); + + if (lyrOut.PredictiveCells.Count > 0) + { + //var predictedInputValue = cls.GetPredictedInputValue(lyrOut.PredictiveCells.ToArray()); + var predictedInputValues = cls.GetPredictedInputValues(lyrOut.PredictiveCells.ToArray(), 3); + + foreach (var item in predictedInputValues) + { + Debug.WriteLine($"Current Input: {input} \t| Predicted Input: {item.PredictedInput} - {item.Similarity}"); + } + + lastPredictedValues = predictedInputValues.Select(v => v.PredictedInput).ToList(); + } + else + { + Debug.WriteLine($"NO CELLS PREDICTED for next cycle."); + lastPredictedValues = new List(); + } + } + + // The first element (a single element) in the sequence cannot be predicted + double maxPossibleAccuraccy = (double)((double)sequenceKeyPair.data.Length - 1) / (double)sequenceKeyPair.data.Length * 100.0; + + double accuracy = (double)matches / (double)sequenceKeyPair.data.Length * 100.0; + + Debug.WriteLine($"Cycle: {cycle}\tMatches={matches} of {sequenceKeyPair.data.Length}\t {accuracy}%"); + Console.WriteLine($"Cycle: {cycle}\tMatches={matches} of {sequenceKeyPair.data.Length}\t {accuracy}%"); + + if (accuracy >= maxPossibleAccuraccy) + { + maxMatchCnt++; + Debug.WriteLine($"100% accuracy reched {maxMatchCnt} times."); + + // + // Experiment is completed if we are 30 cycles long at the 100% accuracy. + if (maxMatchCnt >= 30) + { + sw.Stop(); + Debug.WriteLine($"Sequence learned. The algorithm is in the stable state after 30 repeats with with accuracy {accuracy} of maximum possible {maxMatchCnt}. Elapsed sequence {sequenceKeyPair.name} learning time: {sw.Elapsed}."); + break; + } + } + else if (maxMatchCnt > 0) + { + Debug.WriteLine($"At 100% accuracy after {maxMatchCnt} repeats we get a drop of accuracy with accuracy {accuracy}. This indicates instable state. Learning will be continued."); + maxMatchCnt = 0; + } + + // This resets the learned state, so the first element starts allways from the beginning. + tm.Reset(mem); + } + } + + Debug.WriteLine("------------ END ------------"); + + return new Predictor(layer1, mem, cls); + } + + + /// + /// Gets the number of all unique inputs. + /// + /// Alle sequences. + /// + private int GetNumberOfInputs(List sequences) + { + int num = 0; + + foreach (var inputs in sequences) + { + //num += inputs.Value.Distinct().Count(); + num += inputs.data.Length; + } + + return num; + } + + + /// + /// Constracts the unique key of the element of an sequece. This key is used as input for HtmClassifier. + /// It makes sure that alle elements that belong to the same sequence are prefixed with the sequence. + /// The prediction code can then extract the sequence prefix to the predicted element. + /// + /// + /// + /// + /// + private static string GetKey(List prevInputs, double input, string sequence) + { + string key = String.Empty; + + for (int i = 0; i < prevInputs.Count; i++) + { + if (i > 0) + key += "-"; + + key += (prevInputs[i]); + } + //Console.WriteLine($"GetKey={sequence}_{key}"); + return $"{sequence}_{key}"; + } + } +} diff --git a/source/Samples/ApproveMultiSequenceLearning/Program.cs b/source/Samples/ApproveMultiSequenceLearning/Program.cs new file mode 100644 index 000000000..1f1d3bd67 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/Program.cs @@ -0,0 +1,228 @@ +using NeoCortexApi; +using NeoCortexApi.Encoders; +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using static MultiSequenceLearning.MultiSequenceLearning; + +namespace MultiSequenceLearning +{ + class Program + { + /// + /// This sample shows a typical experiment code for SP and TM. + /// You must start this code in debugger to follow the trace. + /// and TM. + /// + /// + static void Main(string[] args) + { + + //to create synthetic dataset + /*string path = HelperMethods.SaveDataset(HelperMethods.CreateDataset()); + Console.WriteLine($"Dataset saved: {path}");*/ + + //to read dataset + string BasePath = AppDomain.CurrentDomain.BaseDirectory; + string datasetPath = Path.Combine(BasePath, "dataset", "dataset_03.json"); + Console.WriteLine($"Reading Dataset: {datasetPath}"); + List sequences = HelperMethods.ReadDataset(datasetPath); + + //to read test dataset + string testsetPath = Path.Combine(BasePath, "dataset", "test_01.json"); + Console.WriteLine($"Reading Testset: {testsetPath}"); + List sequencesTest = HelperMethods.ReadDataset(testsetPath); + + //run learing only + //RunSimpleMultiSequenceLearningExperiment(sequences); + + //run learning + prediction and generates report for results + List reports = RunMultiSequenceLearningExperiment(sequences, sequencesTest); + + WriteReport(sequences, reports); + + Console.WriteLine("Done..."); + + } + + /// + /// write and formats data in report object to a file + /// + /// input sequence + /// object of report + private static void WriteReport(List sequences, List reports) + { + string BasePath = AppDomain.CurrentDomain.BaseDirectory; + string reportFolder = Path.Combine(BasePath, "report"); + if (!Directory.Exists(reportFolder)) + Directory.CreateDirectory(reportFolder); + string reportPath = Path.Combine(reportFolder, $"report_{DateTime.Now.Ticks}.txt"); + + if (!File.Exists(reportPath)) + { + using (StreamWriter sw = File.CreateText(reportPath)) + { + sw.WriteLine("------------------------------"); + foreach (Sequence sequence in sequences) + { + sw.WriteLine($"Sequence: {sequence.name} -> {string.Join("-",sequence.data)}"); + } + sw.WriteLine("------------------------------"); + foreach (Report report in reports) + { + sw.WriteLine($"Using test sequence: {report.SequenceName} -> {string.Join("-",report.SequenceData)}"); + foreach (string log in report.PredictionLog) + { + sw.WriteLine($"\t{log}"); + } + sw.WriteLine($"\tAccuracy: {report.Accuracy}%"); + sw.WriteLine("------------------------------"); + } + } + } + + } + + /// + /// takes input data set and runs the alogrithm + /// + /// input test dataset + private static void RunSimpleMultiSequenceLearningExperiment(List sequences) + { + // + // Prototype for building the prediction engine. + MultiSequenceLearning experiment = new MultiSequenceLearning(); + var predictor = experiment.Run(sequences); + } + + + /// + /// This example demonstrates how to learn two sequences and how to use the prediction mechanism. + /// First, two sequences are learned. + /// Second, three short sequences with three elements each are created und used for prediction. The predictor used by experiment privides to the HTM every element of every predicting sequence. + /// The predictor tries to predict the next element. + /// + /// input dataset + /// input test dataset + /// list of Report per sequence + private static List RunMultiSequenceLearningExperiment(List sequences, List sequencesTest) + { + List reports = new List(); + Report report = new Report(); + + // Prototype for building the prediction engine. + MultiSequenceLearning experiment = new MultiSequenceLearning(); + var predictor = experiment.Run(sequences); + + // These list are used to see how the prediction works. + // Predictor is traversing the list element by element. + // By providing more elements to the prediction, the predictor delivers more precise result. + + foreach (Sequence item in sequencesTest) + { + report.SequenceName = item.name; + Debug.WriteLine($"Using test sequence: {item.name}"); + Console.WriteLine("------------------------------"); + Console.WriteLine($"Using test sequence: {item.name}"); + predictor.Reset(); + report.SequenceData = item.data; + var accuracy = PredictNextElement(predictor, item.data, report); + reports.Add(report); + Console.WriteLine($"Accuracy for {item.name} sequence: {accuracy}%"); + } + + return reports; + + } + + /// + /// Takes predicted model, subsequence and generates report stating accuracy + /// + /// Object of Predictor + /// sub-sequence to be tested + /// accuracy of predicting elements in % + private static double PredictNextElement(Predictor predictor, int[] list, Report report) + { + int matchCount = 0; + int predictions = 0; + double accuracy = 0.0; + List logs = new List(); + Console.WriteLine("------------------------------"); + + int prev = -1; + bool first = true; + + /* + * Pseudo code for calculating accuracy: + * + * 1. loop for each element in the sub-sequence + * 1.1 if the element is first element do nothing and save the element as 'previous' for further comparision + * 1.2 take previous element and predict the next element + * 1.2.1 get the predicted element with highest similarity and compare with 'next' element + * 1.2.1.1 if predicted element matches the next element increment the counter of matched elements + * 1.2.2 increment the count for number of calls made to predict an element + * 1.2 update the 'previous' element with 'next' element + * 2. calculate the accuracy as (number of matched elements)/(total number of calls for prediction) * 100 + * 3. end the loop + */ + + foreach (var next in list) + { + if(first) + { + first = false; + } + else + { + Console.WriteLine($"Input: {prev}"); + var res = predictor.Predict(prev); + string log = ""; + if (res.Count > 0) + { + foreach (var pred in res) + { + Debug.WriteLine($"Predicted Input: {pred.PredictedInput} - Similarity: {pred.Similarity}%"); + } + + var sequence = res.First().PredictedInput.Split('_'); + var prediction = res.First().PredictedInput.Split('-'); + Console.WriteLine($"Predicted Sequence: {sequence.First()} - Predicted next element: {prediction.Last()}"); + log = $"Input: {prev}, Predicted Sequence: {sequence.First()}, Predicted next element: {prediction.Last()}"; + //compare current element with prediction of previous element + if(next == Int32.Parse(prediction.Last())) + { + matchCount++; + } + } + else + { + Console.WriteLine("Nothing predicted :("); + log = $"Input: {prev}, Nothing predicted"; + } + + logs.Add(log); + predictions++; + } + + //save previous element to compare with upcoming element + prev = next; + } + + report.PredictionLog = logs; + + /* + * Accuracy is calculated as number of matching predictions made + * divided by total number of prediction made for an element in subsequence + * + * accuracy = number of matching predictions/total number of prediction * 100 + */ + accuracy = (double)matchCount / predictions * 100; + report.Accuracy = accuracy; + Console.WriteLine("------------------------------"); + + return accuracy; + } + } +} diff --git a/source/Samples/ApproveMultiSequenceLearning/Report.cs b/source/Samples/ApproveMultiSequenceLearning/Report.cs new file mode 100644 index 000000000..0bc901651 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/Report.cs @@ -0,0 +1,20 @@ +using Org.BouncyCastle.Asn1.Mozilla; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace MultiSequenceLearning +{ + public class Report + { + public Report() { } + + public string SequenceName { get; set; } + public int[] SequenceData { get; set; } + public List PredictionLog { get; set; } + public double Accuracy { get; set; } + + } +} diff --git a/source/Samples/ApproveMultiSequenceLearning/Sequence.cs b/source/Samples/ApproveMultiSequenceLearning/Sequence.cs new file mode 100644 index 000000000..d8fcb7732 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/Sequence.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace MultiSequenceLearning +{ + public class Sequence + { + public String name { get; set; } + public int[] data { get; set; } + } +} diff --git a/source/Samples/ApproveMultiSequenceLearning/report/report_638172569568213902.txt b/source/Samples/ApproveMultiSequenceLearning/report/report_638172569568213902.txt new file mode 100644 index 000000000..ee1234df5 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/report/report_638172569568213902.txt @@ -0,0 +1,33 @@ +------------------------------ +Sequence: S1 -> 0-2-5-6-7-8-10-11-13 +Sequence: S2 -> 1-2-3-4-6-11-12-13-14 +Sequence: S3 -> 1-2-3-4-7-8-10-12-14 +------------------------------ +Using test sequence: T4 -> 3-4-7-8-10 + Input: 3, Predicted Sequence: S2, Predicted next element: 4 + Input: 4, Predicted Sequence: S3, Predicted next element: 7 + Input: 7, Predicted Sequence: S3, Predicted next element: 8 + Input: 8, Predicted Sequence: S3, Predicted next element: 10 + Accuracy: 100% +------------------------------ +Using test sequence: T4 -> 3-4-7-8-10 + Input: 3, Predicted Sequence: S2, Predicted next element: 4 + Input: 4, Predicted Sequence: S3, Predicted next element: 7 + Input: 7, Predicted Sequence: S3, Predicted next element: 8 + Input: 8, Predicted Sequence: S3, Predicted next element: 10 + Accuracy: 100% +------------------------------ +Using test sequence: T4 -> 3-4-7-8-10 + Input: 3, Predicted Sequence: S2, Predicted next element: 4 + Input: 4, Predicted Sequence: S3, Predicted next element: 7 + Input: 7, Predicted Sequence: S3, Predicted next element: 8 + Input: 8, Predicted Sequence: S3, Predicted next element: 10 + Accuracy: 100% +------------------------------ +Using test sequence: T4 -> 3-4-7-8-10 + Input: 3, Predicted Sequence: S2, Predicted next element: 4 + Input: 4, Predicted Sequence: S3, Predicted next element: 7 + Input: 7, Predicted Sequence: S3, Predicted next element: 8 + Input: 8, Predicted Sequence: S3, Predicted next element: 10 + Accuracy: 100% +------------------------------ diff --git a/source/Samples/ApproveMultiSequenceLearning/report/report_638172649322130486.txt b/source/Samples/ApproveMultiSequenceLearning/report/report_638172649322130486.txt new file mode 100644 index 000000000..89d715b70 --- /dev/null +++ b/source/Samples/ApproveMultiSequenceLearning/report/report_638172649322130486.txt @@ -0,0 +1,60 @@ +------------------------------ +Sequence: S1 -> 0-1-4-5-6-7-8-13-14 +Sequence: S2 -> 0-1-2-3-4-5-7-8-11 +Sequence: S3 -> 0-1-2-3-4-5-6-9-13 +Sequence: S4 -> 1-3-5-8-9-10-11-12-14 +Sequence: S5 -> 0-3-4-6-7-10-12-13-14 +Sequence: S6 -> 2-3-4-5-7-8-9-10-14 +Sequence: S7 -> 0-2-3-4-5-6-8-10-13 +Sequence: S8 -> 1-2-4-5-6-10-11-12-13 +Sequence: S9 -> 0-1-2-3-4-8-10-12-13 +Sequence: S10 -> 0-1-2-4-5-6-7-9-11 +Sequence: S11 -> 2-3-4-9-10-11-12-13-14 +Sequence: S12 -> 0-2-3-4-5-6-7-12-13 +Sequence: S13 -> 1-2-4-6-8-9-11-12-14 +Sequence: S14 -> 2-4-6-8-9-10-11-12-13 +Sequence: S15 -> 0-1-3-6-8-9-10-13-14 +Sequence: S16 -> 1-2-3-8-9-10-11-12-14 +Sequence: S17 -> 0-1-3-4-6-7-9-10-13 +Sequence: S18 -> 1-3-5-6-8-9-10-12-13 +Sequence: S19 -> 1-2-4-5-6-10-11-12-14 +Sequence: S20 -> 0-2-4-5-6-9-10-13-14 +Sequence: S21 -> 0-4-5-6-7-8-11-13-14 +Sequence: S22 -> 0-3-5-6-7-8-9-10-13 +Sequence: S23 -> 0-1-2-3-5-7-9-10-13 +Sequence: S24 -> 0-3-4-6-8-9-10-13-14 +Sequence: S25 -> 0-1-2-3-5-8-10-12-13 +Sequence: S26 -> 0-3-5-6-8-9-10-12-13 +Sequence: S27 -> 1-3-4-5-6-8-11-12-14 +Sequence: S28 -> 0-3-5-6-7-9-11-12-13 +Sequence: S29 -> 0-2-3-4-5-6-11-12-14 +Sequence: S30 -> 0-1-2-3-4-6-8-9-12 +------------------------------ +Using test sequence: T4 -> 3-4-7-8-10 + Input: 3, Predicted Sequence: S24, Predicted next element: 4 + Input: 4, Predicted Sequence: S30, Predicted next element: 6 + Input: 7, Predicted Sequence: S17, Predicted next element: 9 + Input: 8, Predicted Sequence: S30, Predicted next element: 9 + Accuracy: 25% +------------------------------ +Using test sequence: T4 -> 3-4-7-8-10 + Input: 3, Predicted Sequence: S24, Predicted next element: 4 + Input: 4, Predicted Sequence: S30, Predicted next element: 6 + Input: 7, Predicted Sequence: S17, Predicted next element: 9 + Input: 8, Predicted Sequence: S30, Predicted next element: 9 + Accuracy: 25% +------------------------------ +Using test sequence: T4 -> 3-4-7-8-10 + Input: 3, Predicted Sequence: S24, Predicted next element: 4 + Input: 4, Predicted Sequence: S30, Predicted next element: 6 + Input: 7, Predicted Sequence: S17, Predicted next element: 9 + Input: 8, Predicted Sequence: S30, Predicted next element: 9 + Accuracy: 25% +------------------------------ +Using test sequence: T4 -> 3-4-7-8-10 + Input: 3, Predicted Sequence: S24, Predicted next element: 4 + Input: 4, Predicted Sequence: S30, Predicted next element: 6 + Input: 7, Predicted Sequence: S17, Predicted next element: 9 + Input: 8, Predicted Sequence: S30, Predicted next element: 9 + Accuracy: 25% +------------------------------