My company is a semiconductor manufacturing factory. In semiconductor manufacturing, there is a step called diffusion.
during this processing , the wafers are kept in a diffusion furnace. The furnace will ramp up from 500 centigrade to 1200 centigrade in two hours. At that temperature the furnace will last 20 hours. Finally, it will ramp down to 500 centigrade and finish the processing. I would like to detect the temperature anomaly during this processing. I have a lot of batch data for this process. Please see the attached picture. My idea is to calculate the time offset and encode it with temperature. But the result seems bad. I also attached the jupyter notebook. Please give me some light! Thanks a lot!
my code was modified on the hotgym example:
import csv
import datetime
import os
import numpy as np
import random
import math
import pandas as pd
import pprint
from htm.bindings.sdr import SDR, Metrics
from htm.encoders.rdse import RDSE, RDSE_Parameters
from htm.encoders.date import DateEncoder
from htm.bindings.algorithms import SpatialPooler
from htm.bindings.algorithms import TemporalMemory
from htm.algorithms.anomaly_likelihood import AnomalyLikelihood #FIXME use TM.anomaly instead, but it gives worse results than the py.AnomalyLikelihood now
from htm.bindings.algorithms import Predictor
_EXAMPLE_DIR = os.path.dirname(os.path.abspath(‘htm.core’))
_INPUT_FILE_PATH = os.path.join(_EXAMPLE_DIR, “gymdata.csv”)
class AdParam:
default_parameters = {
# there are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay)
'enc': {
"value" :
{'resolution': 0.88, 'size': 800, 'sparsity': 0.01},
"time":
{'timeOfDay': (30, 1), 'weekend': 21}
},
'predictor': {'sdrc_alpha': 0.1},
'sp': {'boostStrength': 3.0,
'columnCount': 2000,
'localAreaDensity': 0.04395604395604396,
'potentialPct': 0.85,
'synPermActiveInc': 0.04,
'synPermConnected': 0.13999999999999999,
'synPermInactiveDec': 0.006},
'tm': {'activationThreshold': 17,
'cellsPerColumn': 20,
'initialPerm': 0.21,
'maxSegmentsPerCell': 128,
'maxSynapsesPerSegment': 64,
'minThreshold': 10,
'newSynapseCount': 32,
'permanenceDec': 0.1,
'permanenceInc': 0.1},
'anomaly': {
'likelihood':
{#'learningPeriod': int(math.floor(self.probationaryPeriod / 2.0)),
#'probationaryPeriod': self.probationaryPeriod-default_parameters["anomaly"]["likelihood"]["learningPeriod"],
'probationaryPct': 0.01,
'reestimationPeriod': 100} #These settings are copied from NAB
}
}
def __init__(self):
self.param = self.default_parameters
def getParam(self):
return self.param
class CsvPreparation:
def __init__(self, fn, hdrLines):
self.records = []
with open(fn, "r") as fin:
reader = csv.reader(fin)
ix = 0
while(ix < hdrLines):
self.headers = next(reader)
ix = ix + 1
for record in reader:
self.records.append(record)
def getRecords(self):
return self.records
def getRecordsDataframe(self):
self.pdList = pd.DataFrame( data = self.records, columns = self.headers)
return self.pdList
def determine(tt, temp, encodingWidth, sp, tm, scalarEncoder,enc_info, sp_info, tm_info,predictor,anomaly_history):
ttbits = scalarEncoder.encode(tt)
tempbits = scalarEncoder.encode(temp)
encoding = SDR( encodingWidth ).concatenate([tempbits, ttbits])
enc_info.addData( encoding )
# Create an SDR to represent active columns, This will be populated by the
# compute method below. It must have the same dimensions as the Spatial Pooler.
activeColumns = SDR( sp.getColumnDimensions() )
# Execute Spatial Pooling algorithm over input space.
sp.compute(encoding, True, activeColumns)
sp_info.addData( activeColumns )
# Execute Temporal Memory algorithm over active mini-columns.
tm.compute(activeColumns, learn=False)
tm_info.addData( tm.getActiveCells().flatten() )
# Predict what will happen, and then train the predictor based on what just happened.
pdf = predictor.infer( tm.getActiveCells() )
# for n in (1, 5):
# if pdf[n]:
# predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution )
# else:
# predictions[n].append(float('nan'))
anomalyLikelihood = anomaly_history.anomalyProbability( temp, tm.anomaly )
print("anomalyLikelihood = {0}".format(anomalyLikelihood))
if anomalyLikelihood > 0.9:
print("Fault Data TT {0}, Temp {1}".format(tt, temp))
def main( argv=None, verbose=True):
debug = False
_EXAMPLE_DIR = os.path.dirname(os.path.abspath(‘htm.core’))
fn = os.path.join(_EXAMPLE_DIR, “sim1b.csv”)
print(fn)
cp = CsvPreparation(fn, 1)
records = cp.getRecords()
df = cp.getRecordsDataframe()
df[“sensorvalues”] = pd.to_numeric(df[“sensorvalues”], downcast=“float”)
df[“timestamp”] = pd.to_numeric(df[“timestamp”], downcast=“integer”)
df[“batchid”] = pd.to_numeric(df[“batchid”], downcast=“integer”)
df[“tsdelta”] = df[“timestamp”] - df.iloc[0].timestamp
df[‘delta’] = df.sensorvalues.diff().shift(-1)
pdList = []
batches = df.batchid.unique()
parameters = AdParam().getParam()
for bid in batches:
newPd = df[df[‘batchid’] == bid]
pdList.append(newPd)
#### now setup htm ****
# Make the Encoders. These will convert input data into binary representations.
dateEncoder = DateEncoder(timeOfDay= parameters[“enc”][“time”][“timeOfDay”],
weekend = parameters[“enc”][“time”][“weekend”])
scalarEncoderParams = RDSE_Parameters()
scalarEncoderParams.size = parameters[“enc”][“value”][“size”]
scalarEncoderParams.sparsity = parameters[“enc”][“value”][“sparsity”]
scalarEncoderParams.resolution = parameters[“enc”][“value”][“resolution”]
scalarEncoder = RDSE( scalarEncoderParams )
encodingWidth = (scalarEncoder.size + scalarEncoder.size)
enc_info = Metrics( [encodingWidth], 999999999 )
Make the HTM. SpatialPooler & TemporalMemory & associated tools.
spParams = parameters[“sp”]
sp = SpatialPooler(
inputDimensions = (encodingWidth,),
columnDimensions = (spParams[“columnCount”],),
potentialPct = spParams[“potentialPct”],
potentialRadius = encodingWidth,
globalInhibition = True,
localAreaDensity = spParams[“localAreaDensity”],
synPermInactiveDec = spParams[“synPermInactiveDec”],
synPermActiveInc = spParams[“synPermActiveInc”],
synPermConnected = spParams[“synPermConnected”],
boostStrength = spParams[“boostStrength”],
wrapAround = True
)
sp_info = Metrics( sp.getColumnDimensions(), 999999999 )
tmParams = parameters[“tm”]
tm = TemporalMemory(
columnDimensions = (spParams[“columnCount”],),
cellsPerColumn = tmParams[“cellsPerColumn”],
activationThreshold = tmParams[“activationThreshold”],
initialPermanence = tmParams[“initialPerm”],
connectedPermanence = spParams[“synPermConnected”],
minThreshold = tmParams[“minThreshold”],
maxNewSynapseCount = tmParams[“newSynapseCount”],
permanenceIncrement = tmParams[“permanenceInc”],
permanenceDecrement = tmParams[“permanenceDec”],
predictedSegmentDecrement = 0.0,
maxSegmentsPerCell = tmParams[“maxSegmentsPerCell”],
maxSynapsesPerSegment = tmParams[“maxSynapsesPerSegment”]
)
tm_info = Metrics( [tm.numberOfCells()], 999999999 )
setup likelihood, these settings are used in NAB
anParams = parameters[“anomaly”][“likelihood”]
probationaryPeriod = int(math.floor(float(anParams[“probationaryPct”])*len(records)/9)) # 9 batches
learningPeriod = int(math.floor(probationaryPeriod / 2.0))
anomaly_history = AnomalyLikelihood(learningPeriod= learningPeriod,
estimationSamples= probationaryPeriod - learningPeriod,
historicWindowSize = 40000,
reestimationPeriod= anParams[“reestimationPeriod”])
predictor = Predictor( steps=[1, 5], alpha=parameters[“predictor”][‘sdrc_alpha’] )
predictor_resolution = 1
now working
Iterate through every datum in the dataset, record the inputs & outputs.
inputs = []
anomaly = []
anomalyProb = []
predictions = {1: [], 5: []}
idx = 0
dtList = []
for abatch in pdList:
df = abatch.copy()
df[“tsdelta”] = df[“timestamp”] - df.iloc[0].timestamp
df[‘delta’] = df.sensorvalues.diff().shift(-1)
df = df.fillna(method=‘ffill’)
#df.sort_values(by=[‘batchid’,‘timestamp’], inplace=True)
#df = df.reset_index()
#print(df)
dtList.append(df)
#print(df[“sensorvalues”].describe())
# records = df.values.tolist()
for df in dtList:
#now learn one batch
now = datetime.datetime.now()
print(“batch data num {0} start at {1} “.format(len(df), now))
if debug == True:
print(df)
print(”-------------------------------------”)
index = 0 #init 0 for a batch
for inx, row in df.iterrows():
if debug == True:
print("---------current row---------")
print(row)
tt = row.tsdelta
temp = row.sensorvalues
inputs.append(temp)
ttbits = scalarEncoder.encode(tt)
tempbits = scalarEncoder.encode(temp)
encoding = SDR( encodingWidth ).concatenate([tempbits, ttbits])
enc_info.addData( encoding )
# Create an SDR to represent active columns, This will be populated by the
# compute method below. It must have the same dimensions as the Spatial Pooler.
activeColumns = SDR( sp.getColumnDimensions() )
# Execute Spatial Pooling algorithm over input space.
sp.compute(encoding, True, activeColumns)
sp_info.addData( activeColumns )
# Execute Temporal Memory algorithm over active mini-columns.
tm.compute(activeColumns, learn=True)
tm_info.addData( tm.getActiveCells().flatten() )
# Predict what will happen, and then train the predictor based on what just happened.
pdf = predictor.infer( tm.getActiveCells() )
for n in (1, 5):
if pdf[n]:
predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution )
else:
predictions[n].append(float('nan'))
anomalyLikelihood = anomaly_history.anomalyProbability( temp, tm.anomaly )
anomaly.append( tm.anomaly )
anomalyProb.append( anomalyLikelihood )
count = idx * len(df) + index
if count % 1000 == 0:
print("count {0} tt {1} temp {2}, amllh {3}".format(count, tt, temp, anomalyLikelihood))
index = index + 1
predictor.learn(count, tm.getActiveCells(), int(temp / predictor_resolution))
idx = idx + 1
if idx == 3:
break
###################
#now print result
Print information & statistics about the state of the HTM.
print(“Encoded Input”, enc_info)
print("")
print(“Spatial Pooler Mini-Columns”, sp_info)
print(str(sp))
print("")
print(“Temporal Memory Cells”, tm_info)
print(str™)
print("")
Shift the predictions so that they are aligned with the input they predict.
for n_steps, pred_list in predictions.items():
for x in range(n_steps):
pred_list.insert(0, float(‘nan’))
pred_list.pop()
Calculate the predictive accuracy, Root-Mean-Squared
accuracy = {1: 0, 5: 0}
accuracy_samples = {1: 0, 5: 0}
for idx, inp in enumerate(inputs):
for n in predictions: # For each [N]umber of time steps ahead which was predicted.
val = predictions[n][ idx ]
if not math.isnan(val):
accuracy[n] += (inp - val) ** 2
accuracy_samples[n] += 1
for n in sorted(predictions):
accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5
print(“Predictive Error (RMS)”, n, “steps ahead:”, accuracy[n])
Show info about the anomaly (mean & std)
print(“Anomaly Mean”, np.mean(anomaly))
print("Anomaly Std ", np.std(anomaly))
print("-------------------- try a data to make decision ------------------")
print(“Expect Fault: 3, 1260”)
determine(3, 1260, encodingWidth, sp, tm, scalarEncoder,enc_info,sp_info, tm_info,predictor,anomaly_history)
print(“Expect NORMAL: 10, 490”)
determine(10, 490, encodingWidth, sp, tm, scalarEncoder,enc_info,sp_info, tm_info,predictor,anomaly_history)
print(“Expect NORMAL: 10000, 1260”)
determine(10000, 1240, encodingWidth, sp, tm, scalarEncoder,enc_info,sp_info, tm_info,predictor,anomaly_history)
return
main()