Hello,
I’m trying to implement the example given in https://nupic.docs.numenta.org/0.8.0.dev0/quick-start/algorithms.html#encoding-data, but with generated sinusoidal data.
Code:
import csv
import datetime
import numpy
import os
import yaml
import matplotlib.pyplot as plot
from nupic.algorithms.sdr_classifier_factory import SDRClassifierFactory
from nupic.algorithms.spatial_pooler import SpatialPooler
from nupic.algorithms.temporal_memory import TemporalMemory
from nupic.encoders.date import DateEncoder
from nupic.encoders.random_distributed_scalar import \
RandomDistributedScalarEncoder
_NUM_RECORDS = 3000
_EXAMPLE_DIR = os.path.dirname(os.path.abspath(__file__))
_INPUT_FILE_PATH = os.path.join(_EXAMPLE_DIR, "data", "shortwave2.csv")
_PARAMS_PATH = os.path.join(_EXAMPLE_DIR, "params", "model.yaml")
def runTest(numRecords):
print(_PARAMS_PATH)
with open(_PARAMS_PATH, "r") as f:
modelParams = yaml.safe_load(f)["modelParams"]
enParams = modelParams["sensorParams"]["encoders"]
spParams = modelParams["spParams"]
tmParams = modelParams["tmParams"]
timeOfDayEncoder = DateEncoder(forced=True)
scalarEncoder = RandomDistributedScalarEncoder(enParams["signal"]["resolution"])
encodingWidth = (timeOfDayEncoder.getWidth()
+ scalarEncoder.getWidth())
sp = SpatialPooler(
inputDimensions=(encodingWidth,),
columnDimensions=(spParams["columnCount"],),
potentialPct=spParams["potentialPct"],
potentialRadius=encodingWidth,
globalInhibition=spParams["globalInhibition"],
localAreaDensity=spParams["localAreaDensity"],
numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
synPermInactiveDec=spParams["synPermInactiveDec"],
synPermActiveInc=spParams["synPermActiveInc"],
synPermConnected=spParams["synPermConnected"],
boostStrength=spParams["boostStrength"],
seed=spParams["seed"],
wrapAround=True
)
tm = TemporalMemory(
columnDimensions=(tmParams["columnCount"],),
cellsPerColumn=tmParams["cellsPerColumn"],
activationThreshold=tmParams["activationThreshold"],
initialPermanence=tmParams["initialPerm"],
connectedPermanence=spParams["synPermConnected"],
minThreshold=tmParams["minThreshold"],
maxNewSynapseCount=tmParams["newSynapseCount"],
permanenceIncrement=tmParams["permanenceInc"],
permanenceDecrement=tmParams["permanenceDec"],
predictedSegmentDecrement=0.0,
maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
seed=tmParams["seed"]
)
classifier = SDRClassifierFactory.create()
results = []
with open(_INPUT_FILE_PATH, "r") as fin:
reader = csv.reader(fin)
headers = reader.next()
for count, record in enumerate(reader):
if count >= numRecords: break
dateString = datetime.datetime.strptime(record[0],"%Y-%m-%d %H:%M:%S")
#print(dateString)
# Convert data value string into float.
sig = float(record[1])
# To encode, we need to provide zero-filled numpy arrays for the encoders
# to populate.
timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
sigBits = numpy.zeros(scalarEncoder.getWidth())
# Now we call the encoders to create bit representations for each value.
timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
scalarEncoder.encodeIntoArray(sig, sigBits)
# Concatenate all these encodings into one large encoding for Spatial
# Pooling.
encoding = numpy.concatenate(
[timeOfDayBits, sigBits]
)
encoding = sigBits
# Create an array to represent active columns, all initially zero. This
# will be populated by the compute method below. It must have the same
# dimensions as the Spatial Pooler.
activeColumns = numpy.zeros(spParams["columnCount"])
# Execute Spatial Pooling algorithm over input space.
sp.compute(encoding, True, activeColumns)
activeColumnIndices = numpy.nonzero(activeColumns)[0]
# Execute Temporal Memory algorithm over active mini-columns.
tm.compute(activeColumnIndices, learn=True)
activeCells = tm.getActiveCells()
# Get the bucket info for this input value for classification.
bucketIdx = scalarEncoder.getBucketIndices(sig)[0]
# Run classifier to translate active cells back to scalar value.
classifierResult = classifier.compute(
recordNum=count,
patternNZ=activeCells,
classification={
"bucketIdx": bucketIdx,
"actValue": sig
},
learn=True,
infer=True
)
# Print the best prediction for 1 step out.
oneStepConfidence, oneStep = sorted(
zip(classifierResult[1], classifierResult["actualValues"]),
reverse=True
)[0]
print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
results.append(oneStepConfidence * 100)
sub = results[2500:3000]
plot.plot(sub)
plot.grid(True)
plot.show()
if __name__ == "__main__":
runTest(_NUM_RECORDS)
Config file:
model: HTMPrediction
version: 1
predictAheadTime: null
modelParams:
inferenceType: TemporalNextStep
sensorParams:
verbosity: 0
encoders:
signal:
fieldname: signal
name: signal
resolution: 0.88
seed: 1
type: RandomDistributedScalarEncoder
datetime:
fieldname: datetime
name: datetime
type: DateEncoder
sensorAutoReset: null
spEnable: true
spParams:
inputWidth: 946
columnCount: 2048
spVerbosity: 0
spatialImp: cpp
globalInhibition: 1
localAreaDensity: -1.0
numActiveColumnsPerInhArea: 40
seed: 1956
potentialPct: 0.85
synPermConnected: 0.1
synPermActiveInc: 0.04
synPermInactiveDec: 0.005
boostStrength: 3.0
tmEnable: true
tmParams:
verbosity: 0
columnCount: 2048
cellsPerColumn: 32
inputWidth: 2048
seed: 1960
temporalImp: cpp
newSynapseCount: 20
initialPerm: 0.21
permanenceInc: 0.1
permanenceDec: 0.1
maxAge: 0
globalDecay: 0.0
maxSynapsesPerSegment: 32
maxSegmentsPerCell: 128
minThreshold: 12
activationThreshold: 16
outputType: normal
pamLength: 1
clParams:
verbosity: 0
regionName: SDRClassifierRegion
alpha: 0.1
steps: '1,5'
maxCategoryCount: 1000
implementation: cpp
trainSPNetOnlyIfRequested: false
Example output:
1-step: -0.804489083283 (51.25%)
1-step: 0.93655602968 (47.5%)
1-step: 0.187003990938 (76.98%)
1-step: 0.929403642176 (5.918%)
1-step: 0.187003990938 (36.48%)
1-step: -0.788550323498 (0.8647%)
1-step: 0.128416748256 (27.99%)
1-step: 0.901671961023 (92.11%)
1-step: 0.901671961023 (0.6367%)
1-step: 0.859828313216 (21.62%)
1-step: -0.835778050704 (66.3%)
1-step: -0.835778050704 (22.05%)
1-step: 0.116447583296 (56.18%)
1-step: 0.116447583296 (6.337%)
1-step: 0.888842026851 (59.55%)
1-step: 0.888842026851 (52.27%)
1-step: 0.922165177596 (43.04%)
1-step: -0.713035043798 (74.61%)
1-step: -0.627426267459 (42.81%)
1-step: 0.331628494255 (79.58%)
1-step: 0.922165177596 (19.21%)
1-step: 0.922165177596 (56.07%)
Does anyone know why the resulting probabilities are so random? What am I doing wrong?