Here you are!
The code is strongly inspired in
Launcher:
import ntpath
import os
import simplejson as json
import numpy as np
import pandas as pd
import time
from datetime import datetime
from nupic.algorithms import anomaly_likelihood
from nupic.data.inference_shifter import InferenceShifter
from nupic.frameworks.opf.modelfactory import ModelFactory
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
def getDataFrame(dataFilePath):
df = pd.read_csv(dataFilePath, skiprows=3, names=['timestamp',
'avg',
'req301',
'req302',
'req303',
'req304',
'req305',
'req309',
'req310'])
return df
def runDataThroughModel(model, dataFrame):
shifter = InferenceShifter()
anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood()
out = []
for index, row in dataFrame.iterrows():
timestamp = datetime.strptime(row["timestamp"], DATE_FORMAT)
avg = float(row["avg"])
req301 = float(row["req301"])
req302 = float(row["req302"])
req303 = float(row["req303"])
req304 = float(row["req304"])
req305 = float(row["req305"])
req309 = float(row["req309"])
req310 = float(row["req310"])
result = model.run({
'timestamp': timestamp,
'avg': avg,
'req301': req301,
'req302': req302,
'req303': req303,
'req304': req304,
'req305': req305,
'req309': req309,
'req310': req310
})
if index % 100 == 0:
print time.strftime("%d %b %Y %H:%M:%S", time.localtime()) + " Read %i lines..." % index
result = shifter.shift(result)
resultOut = convertToWritableOutput(result, anomalyLikelihood)
out.append(resultOut)
return pd.DataFrame(out)
def convertToWritableOutput(result, anomalyLikelihood):
timestamp = result.rawInput["timestamp"]
avg = result.rawInput["avg"]
req301 = result.rawInput["req301"]
req310 = result.rawInput["req310"]
inferences = result.inferences
output = {
"timestamp": timestamp,
"avg": avg,
}
if "anomalyScore" in inferences and inferences["anomalyScore"] is not None:
anomalyScore = inferences["anomalyScore"]
output["anomalyScore"] = anomalyScore
likelihood = anomalyLikelihood.anomalyProbability( avg, anomalyScore, timestamp)
return output
def createAnomalyDetectionModel(dataFrame):
with open(MODEL_PARAMS_PATH, "r") as dataIn:
modelParams = json.loads(dataIn.read())
model = ModelFactory.create(modelParams)
model.enableInference({"predictedField": "req301"})
return model
def main(inputPath):
inputFileName = ntpath.basename(inputPath)
dataFrame = getDataFrame(inputPath)
model = createAnomalyDetectionModel(dataFrame)
outputFrame = runDataThroughModel(model, dataFrame)
outputFrame.to_csv(
os.path.join('data', "anomaly_" + inputFileName),
index=False
)
if __name__ == "__main__":
dataPath = 'data/dataset.csv'
MODEL_PARAMS_PATH = 'model_params/multivariable.json'
main(dataPath)
Model:
{
"aggregationInfo": {
"days": 0,
"fields": [],
"hours": 0,
"microseconds": 0,
"milliseconds": 0,
"minutes": 0,
"months": 0,
"seconds": 0,
"weeks": 0,
"years": 0
},
"predictAheadTime": null,
"version": 1,
"model": "CLA",
"modelParams": {
"anomalyParams": {
"anomalyCacheRecords": null,
"autoDetectThreshold": null,
"autoDetectWaitRecords": 1000
},
"clEnable": true,
"clParams": {
"implementation": "cpp",
"alpha": 0.1,
"verbosity": 0,
"regionName": "SDRClassifierRegion",
"steps": "1"
},
"inferenceType": "TemporalAnomaly",
"sensorParams": {
"encoders": {
"timestamp_timeOfDay": null,
"timestamp_dayOfWeek": null,
"timestamp_weekend": null,
"avg": { "name": "avg", "fieldname": "avg", "resolution": 2.00,"type": "RandomDistributedScalarEncoder" },
"delta": { "name": "delta", "fieldname": "avg", "clipInput": true, "forced": true, "w": 41, "n": 2048, "type": "DeltaEncoder" },
"req301": { "name": "req301", "fieldname": "req301", "resolution": 2.00,"type": "RandomDistributedScalarEncoder" },
"delta301": { "name": "delta301", "fieldname": "req301", "clipInput": true, "forced": true, "w": 41, "n": 2048, "type": "DeltaEncoder" },
"req302": { "name": "req302", "fieldname": "req302", "resolution": 2.00,"type": "RandomDistributedScalarEncoder" },
"delta302": { "name": "delta302", "fieldname": "req302", "clipInput": true, "forced": true, "w": 41, "n": 2048, "type": "DeltaEncoder" },
"req303": { "name": "req303", "fieldname": "req303", "resolution": 2.00,"type": "RandomDistributedScalarEncoder" },
"delta303": { "name": "delta303", "fieldname": "req303", "clipInput": true, "forced": true, "w": 41, "n": 2048, "type": "DeltaEncoder" },
"req304": { "name": "req304", "fieldname": "req304", "resolution": 2.00,"type": "RandomDistributedScalarEncoder" },
"delta304": { "name": "delta304", "fieldname": "req304", "clipInput": true, "forced": true, "w": 41, "n": 2048, "type": "DeltaEncoder" },
"req305": { "name": "req305", "fieldname": "req305", "resolution": 2.00,"type": "RandomDistributedScalarEncoder" },
"delta305": { "name": "delta305", "fieldname": "req305", "clipInput": true, "forced": true, "w": 41, "n": 2048, "type": "DeltaEncoder" },
"req309": { "name": "req309", "fieldname": "req309", "resolution": 2.00,"type": "RandomDistributedScalarEncoder" },
"delta309": { "name": "delta309", "fieldname": "req309", "clipInput": true, "forced": true, "w": 41, "n": 2048, "type": "DeltaEncoder" },
"req310": { "name": "req310", "fieldname": "req310", "resolution": 2.00,"type": "RandomDistributedScalarEncoder" },
"delta310": { "name": "delta310", "fieldname": "req310", "clipInput": true, "forced": true, "w": 41, "n": 2048, "type": "DeltaEncoder" }
},
"sensorAutoReset": null,
"verbosity": 0
},
"spEnable": true,
"spParams": {
"potentialPct": 0.8,
"columnCount": 2048,
"globalInhibition": 1,
"inputWidth": 0,
"maxBoost": 1.0,
"numActiveColumnsPerInhArea": 40,
"seed": 1956,
"spVerbosity": 0,
"spatialImp": "cpp",
"synPermActiveInc": 0.003,
"synPermConnected": 0.2,
"synPermInactiveDec": 0.0005
},
"tpEnable": true,
"tpParams": {
"activationThreshold": 13,
"cellsPerColumn": 32,
"columnCount": 2048,
"globalDecay": 0.0,
"initialPerm": 0.21,
"inputWidth": 2048,
"maxAge": 0,
"maxSegmentsPerCell": 128,
"maxSynapsesPerSegment": 32,
"minThreshold": 10,
"newSynapseCount": 20,
"outputType": "normal",
"pamLength": 3,
"permanenceDec": 0.1,
"permanenceInc": 0.1,
"seed": 1960,
"temporalImp": "cpp",
"verbosity": 0
},
"trainSPNetOnlyIfRequested": false
}
}
Dataset (only few rows by confidencial reason):
timestamp,avg,req301,req302,req303,req304,req305,req309,req310
datetime,float,float,float,float,float,float,float,float
T,,,,,,,,
1970-11-05 17:15:00,7625.71,7484,8034,8036,7412,7647,7592,7175
1970-11-05 17:20:00,8048.86,8151,7976,8638,8201,8261,7890,7225
I hope it can help.