All my code is here: https://github.com/JonnoFTW/htm-models-adelaide/tree/master/engine
That code is actually quite old though, parameters in later work are selected by distributed (read: I overtook my school’s computer lab) hyper-parameter optimisation using TPE (and not the included swarming methods that come with nupic).
The best encoding for my data is:
- DateEncoder for weekend, timeofday, dayofweek, holidays (using the improved holiday extensions I pushed to nupic)
- RDSE for flow with
resolution= max(0.001,(max_flow-1)/flow_buckets)
My best model with an RMSE of 9.31. I optimised over this space:
columnCount = 2048
max_flow = _max_flow
flow_buckets = {{quniform(1, 40, 1)}}
synPermConnected = {{uniform(0.05, 0.25)}}
activeColumns = {{quniform(20, 64, 1)}}
synPermInactiveDec = {{uniform(0.0003, 0.1)}}
synPermActiveInc = {{uniform(0.001, 0.1)}}
potentialPct = {{uniform(0.2, 0.85)}}
activationThreshold = {{quniform(5, 20, 1)}}
pamLength = {{quniform(1, 10, 1)}}
cellsPerColumn = {{quniform(8, 32, 2)}}
minThreshold = {{quniform(4, 32, 1)}}
alpha = {{uniform(0.0001, 0.2)}}
boost = {{uniform(0.0, 0.1)}}
tmPermanenceInc = {{uniform(0.05, 0.2)}}
maxSynapsesPerSegment = {{quniform(28, 72, 2)}}
newSynapseRatio = {{uniform(0.4, 0.8)}}
newSynapseCount = maxSynapsesPerSegment * newSynapseRatio
initialPerm = {{uniform(0.1, 0.33)}}
maxSegmentsPerCell = {{quniform(32, 66, 2)}}
permanenceDec = {{uniform(0.01, 0.2)}}
weekend_width = {{quniform(30, 150, 2)}}
weekend_width = int(1+weekend_width)
timeOfDay_width = {{quniform(16, 201, 2)}}
timeOfDay_width = int(1 + timeOfDay_width)
dayOfWeek_width = {{quniform(20, 201, 2)}}
dayOfWeek_width = int(1 + dayOfWeek_width)
# must always be odd
holiday_width = {{quniform(16,201,2)}}
holiday_width = int(1 + holiday_width)
dayOfWeek_radius = {{uniform(6, 15)}}
timeOfDay_radius = {{uniform(6, 15)}}
weekend_radius = {{uniform(6, 15)}}
I also optimised over 512, 1024 and 2048 columns and basically you only get better model runtimes with slightly worse RMSE score (best was 10.4 for 1024 and 512). I honestly think there’s a lower limit to how well you can predict traffic flow without a literal timemachine.
Anyway the best model params for my problem are (I hope someone can learn something here from the chosen parameters, because it’s insanely difficult to tell which parameters or their combination have the most significant impact on performance beyond column count):
{
"aggregationInfo" : {
"seconds" : NumberInt(0),
"fields" : [
],
"months" : NumberInt(0),
"days" : NumberInt(0),
"years" : NumberInt(0),
"hours" : NumberInt(0),
"microseconds" : NumberInt(0),
"weeks" : NumberInt(0),
"minutes" : NumberInt(0),
"milliseconds" : NumberInt(0)
},
"model" : "HTMPrediction",
"version" : NumberInt(1),
"predictAheadTime" : null,
"modelParams" : {
"sensorParams" : {
"verbosity" : NumberInt(0),
"encoders" : {
"datetime_timeOfDay" : {
"type" : "DateEncoder",
"timeOfDay" : [
NumberInt(55),
11.389925472016568
],
"fieldname" : "datetime",
"name" : "datetime_timeOfDay"
},
"flow" : {
"type" : "RandomDistributedScalarEncoder",
"resolution" : 89.25,
"fieldname" : "flow",
"name" : "flow"
},
"datetime_weekend" : {
"weekend" : [
NumberInt(127),
12.654411787919452
],
"fieldname" : "datetime",
"name" : "datetime_weekend",
"type" : "DateEncoder"
},
"datetime_holiday" : {
"type" : "DateEncoder",
"holiday" : NumberInt(133),
"fieldname" : "datetime",
"name" : "datetime_holiday",
"holidays" : [
[
NumberInt(2015),
NumberInt(12),
NumberInt(25)
],
[
NumberInt(2016),
NumberInt(12),
NumberInt(31)
],
[
NumberInt(2015),
NumberInt(10),
NumberInt(5)
],
[
NumberInt(2015),
NumberInt(4),
NumberInt(6)
],
[
NumberInt(2017),
NumberInt(1),
NumberInt(26)
],
[
NumberInt(2016),
NumberInt(12),
NumberInt(27)
],
[
NumberInt(2016),
NumberInt(4),
NumberInt(25)
],
[
NumberInt(2015),
NumberInt(6),
NumberInt(8)
],
[
NumberInt(2015),
NumberInt(4),
NumberInt(4)
],
[
NumberInt(2015),
NumberInt(3),
NumberInt(9)
],
[
NumberInt(2015),
NumberInt(1),
NumberInt(1)
],
[
NumberInt(2017),
NumberInt(4),
NumberInt(14)
],
[
NumberInt(2017),
NumberInt(4),
NumberInt(17)
],
[
NumberInt(2015),
NumberInt(1),
NumberInt(26)
],
[
NumberInt(2016),
NumberInt(3),
NumberInt(14)
],
[
NumberInt(2017),
NumberInt(10),
NumberInt(2)
],
[
NumberInt(2017),
NumberInt(12),
NumberInt(31)
],
[
NumberInt(2017),
NumberInt(6),
NumberInt(12)
],
[
NumberInt(2017),
NumberInt(1),
NumberInt(1)
],
[
NumberInt(2017),
NumberInt(12),
NumberInt(25)
],
[
NumberInt(2016),
NumberInt(10),
NumberInt(3)
],
[
NumberInt(2015),
NumberInt(4),
NumberInt(25)
],
[
NumberInt(2017),
NumberInt(3),
NumberInt(13)
],
[
NumberInt(2017),
NumberInt(4),
NumberInt(25)
],
[
NumberInt(2017),
NumberInt(12),
NumberInt(24)
],
[
NumberInt(2015),
NumberInt(4),
NumberInt(3)
],
[
NumberInt(2016),
NumberInt(3),
NumberInt(26)
],
[
NumberInt(2016),
NumberInt(12),
NumberInt(24)
],
[
NumberInt(2016),
NumberInt(3),
NumberInt(25)
],
[
NumberInt(2016),
NumberInt(3),
NumberInt(28)
],
[
NumberInt(2016),
NumberInt(12),
NumberInt(26)
],
[
NumberInt(2015),
NumberInt(12),
NumberInt(31)
],
[
NumberInt(2017),
NumberInt(4),
NumberInt(15)
],
[
NumberInt(2016),
NumberInt(1),
NumberInt(26)
],
[
NumberInt(2016),
NumberInt(1),
NumberInt(1)
],
[
NumberInt(2015),
NumberInt(12),
NumberInt(24)
],
[
NumberInt(2017),
NumberInt(1),
NumberInt(2)
],
[
NumberInt(2016),
NumberInt(6),
NumberInt(13)
],
[
NumberInt(2015),
NumberInt(12),
NumberInt(28)
],
[
NumberInt(2017),
NumberInt(12),
NumberInt(26)
]
]
},
"datetime_dayOfWeek" : {
"dayOfWeek" : [
NumberInt(133),
11.099369381130948
],
"type" : "DateEncoder",
"fieldname" : "datetime",
"name" : "datetime_dayOfWeek"
}
},
"sensorAutoReset" : null
},
"anomalyParams" : {
"anomalyCacheRecords" : null,
"autoDetectThreshold" : null,
"autoDetectWaitRecords" : null
},
"spParams" : {
"columnCount" : NumberInt(2048),
"spVerbosity" : NumberInt(0),
"spatialImp" : "cpp",
"inputWidth" : NumberInt(0),
"synPermInactiveDec" : 0.06321542545086611,
"synPermConnected" : 0.05762102903677198,
"synPermActiveInc" : 0.08360575341845242,
"seed" : NumberInt(1956),
"numActiveColumnsPerInhArea" : NumberInt(56),
"boostStrength" : 0.007220744757166206,
"globalInhibition" : NumberInt(1),
"potentialPct" : 0.7573713625971066
},
"trainSPNetOnlyIfRequested" : false,
"clParams" : {
"alpha" : 0.04423326056698039,
"verbosity" : NumberInt(0),
"steps" : "1",
"regionName" : "SDRClassifierRegion"
},
"tmParams" : {
"columnCount" : NumberInt(2048),
"activationThreshold" : NumberInt(8),
"pamLength" : NumberInt(5),
"cellsPerColumn" : NumberInt(14),
"permanenceInc" : 0.05875552578940036,
"minThreshold" : NumberInt(31),
"verbosity" : NumberInt(0),
"maxSynapsesPerSegment" : NumberInt(50),
"outputType" : "normal",
"globalDecay" : 0.0,
"initialPerm" : 0.11619888545169564,
"permanenceDec" : 0.05872729523847874,
"seed" : NumberInt(1960),
"maxAge" : NumberInt(0),
"newSynapseCount" : NumberInt(33),
"maxSegmentsPerCell" : NumberInt(44),
"temporalImp" : "cpp",
"inputWidth" : NumberInt(2048)
},
"tmEnable" : true,
"clEnable" : true,
"spEnable" : true,
"inferenceType" : "TemporalMultiStep"
}
}