Hi,
I am new to HTM and a slow learner, I tried to implement the codes with the HTM.core package with my own dataset and I got this results. It outputs “claLearningPeriod is deprecated, use learningPeriod instead.”, and I am not sure what it means. Besides, the Predictive Error (RMS) 1 steps and Predictive Error (RMS) 5 steps are quite high.
Below are the outputs:
(base) szekhai@Lims-MacBook-Air-2 HTM.CORE % python3.7 HTM_model.py
claLearningPeriod is deprecated, use learningPeriod instead.
Encoded Input SDR( 1462 )
Sparsity Min/Mean/Std/Max 0.0430917 / 0.0444279 / 0.000144373 / 0.0444596
Activation Frequency Min/Mean/Std/Max 0 / 0.0444283 / 0.183626 / 1
Entropy 0.187375
Overlap Min/Mean/Std/Max 0.769231 / 0.939829 / 0.0933824 / 1
Spatial Pooler Mini-Columns SDR( 1638 )
Sparsity Min/Mean/Std/Max 0.043956 / 0.0439559 / 2.07252e-07 / 0.043956
Activation Frequency Min/Mean/Std/Max 0 / 0.0439561 / 0.0513646 / 0.144476
Entropy 0.785945
Overlap Min/Mean/Std/Max 0 / 0.52718 / 0.376997 / 1
Spatial Pooler Connections:
Inputs (1462) ~> Outputs (1638) via Segments (1638)
Segments on Cell Min/Mean/Max 1 / 1 / 1
Potential Synapses on Segment Min/Mean/Max 1243 / 1243 / 1243
Connected Synapses on Segment Min/Mean/Max 54 / 381.315 / 667
Synapses Dead (0.406734%) Saturated (0.0180277%)
Synapses pruned (0%) Segments pruned (0%)
Temporal Memory Cells SDR( 21294 )
Sparsity Min/Mean/Std/Max 0.00338123 / 0.0154787 / 0.0120684 / 0.043956
Activation Frequency Min/Mean/Std/Max 0 / 0.0154787 / 0.0196881 / 0.0993554
Entropy 0.824147
Overlap Min/Mean/Std/Max 0 / 0.27518 / 0.273641 / 1
Temporal Memory Connections:
Inputs (9178) ~> Outputs (21294) via Segments (28776)
Segments on Cell Min/Mean/Max 0 / 1.35137 / 5
Potential Synapses on Segment Min/Mean/Max 32 / 48.4458 / 64
Connected Synapses on Segment Min/Mean/Max 26 / 41.222 / 64
Synapses Dead (0%) Saturated (0.0643774%)
Synapses pruned (0.742965%) Segments pruned (0%)
Predictive Error (RMS) 1 steps ahead: 328.1907669220488
Predictive Error (RMS) 5 steps ahead: 363.2411137647489
Anomaly Mean 0.2447766174083469
Anomaly Std 0.3195513115490765
(base) szekhai@Lims-MacBook-Air-2 HTM.CORE % python3.7 HTM_model.py
claLearningPeriod is deprecated, use learningPeriod instead.
Encoded Input SDR( 1462 )
Sparsity Min/Mean/Std/Max 0.0437757 / 0.0443148 / 0.000279587 / 0.0444596
Activation Frequency Min/Mean/Std/Max 0 / 0.0443146 / 0.18363 / 1
Entropy 0.186139
Overlap Min/Mean/Std/Max 0.769231 / 0.939756 / 0.0934862 / 1
Spatial Pooler Mini-Columns SDR( 1638 )
Sparsity Min/Mean/Std/Max 0.043956 / 0.0439559 / 2.07252e-07 / 0.043956
Activation Frequency Min/Mean/Std/Max 0 / 0.0439561 / 0.05077 / 0.141809
Entropy 0.789527
Overlap Min/Mean/Std/Max 0 / 0.518605 / 0.381331 / 1
Spatial Pooler Connections:
Inputs (1462) ~> Outputs (1638) via Segments (1638)
Segments on Cell Min/Mean/Max 1 / 1 / 1
Potential Synapses on Segment Min/Mean/Max 1243 / 1243 / 1243
Connected Synapses on Segment Min/Mean/Max 54 / 377.346 / 667
Synapses Dead (0.41289%) Saturated (0.0181451%)
Synapses pruned (0%) Segments pruned (0%)
Temporal Memory Cells SDR( 21294 )
Sparsity Min/Mean/Std/Max 0.00338123 / 0.015889 / 0.0122165 / 0.043956
Activation Frequency Min/Mean/Std/Max 0 / 0.015889 / 0.0199118 / 0.0940209
Entropy 0.826371
Overlap Min/Mean/Std/Max 0 / 0.267222 / 0.272855 / 1
Temporal Memory Connections:
Inputs (9352) ~> Outputs (21294) via Segments (30410)
Segments on Cell Min/Mean/Max 0 / 1.4281 / 5
Potential Synapses on Segment Min/Mean/Max 32 / 48.3886 / 64
Connected Synapses on Segment Min/Mean/Max 28 / 41.3068 / 64
Synapses Dead (0%) Saturated (0.0565118%)
Synapses pruned (0.733056%) Segments pruned (0%)
Predictive Error (RMS) 1 steps ahead: 323.27186728500817
Predictive Error (RMS) 5 steps ahead: 352.34799316694006
Anomaly Mean 0.2499197356808193
Anomaly Std 0.32531603728098846
This is the code I used, most of it is from the hotgym example
import csv
from datetime import datetime
import os
import numpy as np
import random
import math
import matplotlib.pyplot as plt
from htm.bindings.sdr import SDR, Metrics
from htm.encoders.rdse import RDSE, RDSE_Parameters
from htm.encoders.date import DateEncoder
from htm.bindings.algorithms import SpatialPooler
from htm.bindings.algorithms import TemporalMemory
from htm.algorithms.anomaly_likelihood import AnomalyLikelihood
from htm.bindings.algorithms import Predictor
_EXAMPLE_DIR = os.path.dirname(os.path.abspath(__file__))
_INPUT_FILE_PATH = os.path.join(_EXAMPLE_DIR, "networkTraffic.csv")
default_parameters = {
# there are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay)
'enc': {
"value" :
{'resolution': 0.88, 'size': 700, 'sparsity': 0.02},
"time":
{'timeOfDay': (30, 1), 'weekend': 21}
},
'predictor': {'sdrc_alpha': 0.1},
'sp': {'boostStrength': 3.0,
'columnCount': 1638,
'localAreaDensity': 0.04395604395604396,
'potentialPct': 0.85,
'synPermActiveInc': 0.04,
'synPermConnected': 0.13999999999999999,
'synPermInactiveDec': 0.006},
'tm': {'activationThreshold': 17,
'cellsPerColumn': 13,
'initialPerm': 0.21,
'maxSegmentsPerCell': 128,
'maxSynapsesPerSegment': 64,
'minThreshold': 10,
'newSynapseCount': 32,
'permanenceDec': 0.1,
'permanenceInc': 0.1},
'anomaly': {'period': 1000},
}
def main(parameters=default_parameters, argv=None, verbose=True):
# Read the input file.
records = []
with open(_INPUT_FILE_PATH, "r") as fin:
reader = csv.reader(fin)
headers = next(reader)
next(reader)
next(reader)
for record in reader:
records.append(record)
# Make the Encoders. These will convert input data into binary representations.
dateEncoder = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"],
weekend = parameters["enc"]["time"]["weekend"])
scalarEncoderParams = RDSE_Parameters()
scalarEncoderParams.size = parameters["enc"]["value"]["size"]
scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"]
scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"]
scalarEncoder = RDSE( scalarEncoderParams )
encodingWidth = (dateEncoder.size + scalarEncoder.size)
enc_info = Metrics( [encodingWidth], 999999999 )
# Make the HTM. SpatialPooler & TemporalMemory & associated tools.
spParams = parameters["sp"]
sp = SpatialPooler(
inputDimensions = (encodingWidth,),
columnDimensions = (spParams["columnCount"],),
potentialPct = spParams["potentialPct"],
potentialRadius = encodingWidth,
globalInhibition = True,
localAreaDensity = spParams["localAreaDensity"],
synPermInactiveDec = spParams["synPermInactiveDec"],
synPermActiveInc = spParams["synPermActiveInc"],
synPermConnected = spParams["synPermConnected"],
boostStrength = spParams["boostStrength"],
wrapAround = True
)
sp_info = Metrics( sp.getColumnDimensions(), 999999999 )
tmParams = parameters["tm"]
tm = TemporalMemory(
columnDimensions = (spParams["columnCount"],),
cellsPerColumn = tmParams["cellsPerColumn"],
activationThreshold = tmParams["activationThreshold"],
initialPermanence = tmParams["initialPerm"],
connectedPermanence = spParams["synPermConnected"],
minThreshold = tmParams["minThreshold"],
maxNewSynapseCount = tmParams["newSynapseCount"],
permanenceIncrement = tmParams["permanenceInc"],
permanenceDecrement = tmParams["permanenceDec"],
predictedSegmentDecrement = 0.0,
maxSegmentsPerCell = tmParams["maxSegmentsPerCell"],
maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"]
)
tm_info = Metrics( [tm.numberOfCells()], 999999999 )
anomaly_history = AnomalyLikelihood(parameters["anomaly"]["period"])
predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] )
predictor_resolution = 1
# Iterate through every datum in the dataset, record the inputs & outputs.
inputs = []
anomaly = []
anomalyProb = []
predictions = {1: [], 5: []}
for count, record in enumerate(records):
# Convert diate strng into Python date object.
dateString = datetime.strptime(record[0],"%H:%M:%S.%f")
# Convert data value string into float.
networkFlow = float(record[1])
inputs.append(networkFlow)
# Call the encoders to create bit representations for each value. These are SDR objects.
dateBits = dateEncoder.encode(dateString)
networkFlowBits = scalarEncoder.encode(networkFlow)
# Concatenate all these encodings into one large encoding for Spatial Pooling.
encoding = SDR( encodingWidth ).concatenate([networkFlowBits, dateBits])
enc_info.addData( encoding )
# Create an SDR to represent active columns, This will be populated by the
# compute method below. It must have the same dimensions as the Spatial Pooler.
activeColumns = SDR( sp.getColumnDimensions() )
# Execute Spatial Pooling algorithm over input space.
sp.compute(encoding, True, activeColumns)
sp_info.addData( activeColumns )
# Execute Temporal Memory algorithm over active mini-columns.
tm.compute(activeColumns, learn=True)
tm_info.addData( tm.getActiveCells().flatten() )
# Predict what will happen, and then train the predictor based on what just happened.
pdf = predictor.infer( tm.getActiveCells() )
for n in (1, 5):
if pdf[n]:
predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution )
else:
predictions[n].append(float('nan'))
anomaly.append( tm.anomaly )
anomalyLikelihood = anomaly_history.anomalyProbability(record,tm.anomaly)
anomalyProb.append(anomalyLikelihood)
predictor.learn(count, tm.getActiveCells(), int(networkFlow / predictor_resolution))
# Print information & statistics about the state of the HTM.
print("Encoded Input", enc_info)
print("")
print("Spatial Pooler Mini-Columns", sp_info)
print(str(sp))
print("")
print("Temporal Memory Cells", tm_info)
print(str(tm))
print("")
# Shift the predictions so that they are aligned with the input they predict.
for n_steps, pred_list in predictions.items():
for x in range(n_steps):
pred_list.insert(0, float('nan'))
pred_list.pop()
# Calculate the predictive accuracy, Root-Mean-Squared
accuracy = {1: 0, 5: 0}
accuracy_samples = {1: 0, 5: 0}
for idx, inp in enumerate(inputs):
for n in predictions: # For each [N]umber of time steps ahead which was predicted.
val = predictions[n][ idx ]
if not math.isnan(val):
accuracy[n] += (inp - val) ** 2
accuracy_samples[n] += 1
for n in sorted(predictions):
accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5
print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n])
# Show info about the anomaly (mean & std)
print("Anomaly Mean", np.mean(anomaly))
print("Anomaly Std ", np.std(anomaly))
# Plot the Predictions and Anomalies.
plt.subplot(2,1,1)
plt.title("Predictions")
plt.xlabel("Time")
plt.ylabel("Network Traffic Flow")
plt.plot(np.arange(len(inputs)), inputs, 'red',
np.arange(len(inputs)), predictions[1], 'blue',
np.arange(len(inputs)), predictions[5], 'green',)
plt.legend(labels=('Input', '1 Step Prediction, Shifted 1 step', '5 Step Prediction, Shifted 5 steps'))
plt.subplot(2,1,2)
plt.title("Anomaly Score")
plt.xlabel("Time")
plt.ylabel("Network Traffic Flow")
inputs = np.array(inputs) / max(inputs)
plt.plot(np.arange(len(inputs)), inputs, 'black',
np.arange(len(inputs)), anomaly, 'blue',
np.arange(len(inputs)), anomalyProb, 'red',)
plt.legend(labels=('Input', 'Instantaneous Anomaly', 'Anomaly Likelihood'))
plt.show()
# Output to a csv file
outputFiles = []
outputWriters = []
headerRow = [
'timestamp', 'network_flow', 'first_prediction', 'five_prediction', 'n_prediction',
'anomaly_score', 'anomaly_likelihood'
]
outputFileName = "output4.csv"
#print "Preparing to output %s data to %s" % (name, outputFileName)
outputFile = open(outputFileName, "w")
outputWriter = csv.writer(outputFile)
outputWriter.writerow(headerRow)
first_predictions = predictions[1]
five_predictions = predictions[5]
n_predictions = predictions[n]
for i in range(len(records)):
data = records[i]
outputRow = [data[0],data[1],first_predictions[i], five_predictions[i], n_predictions[i],anomaly[i],anomalyProb[i]]
outputWriter.writerow(outputRow)
outputFile.close()
return records, predictions, anomaly, anomalyProb
if __name__ == "__main__":
main()