Professional Documents
Culture Documents
Tim McNamara - A Look at NuPIC - A Self-Learning AI Engine
Tim McNamara - A Look at NuPIC - A Self-Learning AI Engine
Tim McNamara - A Look at NuPIC - A Self-Learning AI Engine
Tim McNamara
Disclaimers:
More concretely:
reader = csv.reader(open(_DATA_PATH))
headers = reader.next()
for i, record in enumerate(reader, start=1):
modelInput = dict(zip(headers, record))
modelInput["consumption"] = float(modelInput["consumption"])
modelInput["timestamp"] = datetime.datetime(
modelInput["timestamp"], "%m/%d/%y %H:%M")
result = model.run(modelInput)
model = ModelFactory.create(model_params.MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
reader = csv.reader(open(_DATA_PATH))
headers = reader.next()
for i, record in enumerate(reader, start=1):
modelInput = dict(zip(headers, record))
modelInput["consumption"] = float(modelInput["consumption"])
modelInput["timestamp"] = datetime.datetime(
modelInput["timestamp"], "%m/%d/%y %H:%M")
result = model.run(modelInput)
model = ModelFactory.create(model_params.MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
reader = csv.reader(open(_DATA_PATH))
headers = reader.next()
result = model.run(modelInput)
model = ModelFactory.create(model_params.MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
reader = csv.reader(open(_DATA_PATH))
headers = reader.next()
result = model.run(modelInput)
model = ModelFactory.create(model_params.MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
reader = csv.reader(open(_DATA_PATH))
headers = reader.next()
for i, record in enumerate(reader, start=1):
modelInput = dict(zip(headers, record))
modelInput["consumption"] = float(modelInput["consumption"])
modelInput["timestamp"] = datetime.datetime(
modelInput["timestamp"], "%m/%d/%y %H:%M")
result = model.run(modelInput)
model = ModelFactory.create(model_params.MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
reader = csv.reader(open(_DATA_PATH))
headers = reader.next()
for i, record in enumerate(reader, start=1):
modelInput = dict(zip(headers, record))
modelInput["consumption"] = float(modelInput["consumption"])
modelInput["timestamp"] = datetime.datetime(
modelInput["timestamp"], "%m/%d/%y %H:%M")
result = model.run(modelInput)
ModelResult(
inferences={
'multiStepPredictions': {
1: {
5.2825868514199987: 0.69999516634971859,
10.699999999999999: 0.07601257054965195,
22.100000000000001: 0.055294648127235196,
22.899999999999999: 0.052690624183750749,
},
5: {
38.188079999999999: 0.2275438176777452,
47.359999999999992: 0.19538808382423584,
37.399999999999999: 0.12597931862094047,
45.399999999999999: 0.099123261272031596,
37.089999999999996: 0.082913215936932752,
39.280000000000001: 0.077935781935515161,
43.629999999999995: 0.076405289164189288
}
},
'multiStepBestPredictions': {
1: 5.2825868514199987,
5: 38.188079999999999
}
}
...
)
MODEL_PARAMS = {
# Type of model that the rest of these parameters apply to.
'model': "CLA",
'predictAheadTime': None,
# Model parameter dictionary.
'modelParams': {
# The type of inference that this model will perform
'inferenceType': 'TemporalMultiStep',
'sensorParams': {
# Sensor diagnostic output verbosity control;
# if > 0: sensor region will print out on screen what it's sensing
# at each step 0: silent; >=1: some info; >=2: more info;
# >=3: even more info (see compute() in py/regions/RecordSensor.py)
'verbosity' : 0,
# Example:
# dsEncoderSchema = [
# DeferredDictLookup('__field_name_encoder'),
# ],
#
# (value generated from DS_ENCODER_SCHEMA)
'encoders': { 'consumption': { 'clipInput': True,
'fieldname': u'consumption',
'n': 100,
'name': u'consumption',
'type': 'AdaptiveScalarEncoder',
'w': 21},
'timestamp_dayOfWeek': { 'dayOfWeek': (21, 1),
'fieldname': u'timestamp',
'name': u'timestamp_dayOfWeek',
'type': 'DateEncoder'},
'timestamp_timeOfDay': { 'fieldname': u'timestamp',
'name': u'timestamp_timeOfDay',
'timeOfDay': (21, 1),
'type': 'DateEncoder'},
'timestamp_weekend': { 'fieldname': u'timestamp',
'name': u'timestamp_weekend',
'type': 'DateEncoder',
'weekend': 21}},
'spParams': {
# SP diagnostic output verbosity control;
# 0: silent; >=1: some info; >=2: more info;
'spVerbosity' : 0,
'globalInhibition': 1,
# Number of cell columns in the cortical region (same number for
# SP and TP)
# (see also tpNCellsPerCol)
'columnCount': 2048,
'inputWidth': 0,
# SP inhibition control (absolute value);
# Maximum number of active columns in the SP region's output (when
# there are more, the weaker ones are suppressed)
'numActivePerInhArea': 40,
'seed': 1956,
# coincInputPoolPct
# What percent of the columns's receptive field is available
# for potential synapses. At initialization time, we will
# choose coincInputPoolPct * (2*coincInputRadius+1)^2
'coincInputPoolPct': 0.5,
# The default connected threshold. Any synapse whose
# permanence value is above the connected threshold is
# a "connected synapse", meaning it can contribute to the
# cell's firing. Typical value is 0.10. Cells whose activity
# level before inhibition falls below minDutyCycleBeforeInh
# will have their own internal synPermConnectedCell
# threshold set below this default value.
# (This concept applies to both SP and TP and so 'cells'
# is correct here as opposed to 'columns')
'synPermConnected': 0.1,
'synPermActiveInc': 0.1,
'synPermInactiveDec': 0.01,
},
# Controls whether TP is enabled or disabled;
# TP is necessary for making temporal predictions, such as predicting
# the next inputs. Without TP, the model is only capable of
# reconstructing missing sensor inputs (via SP).
'tpEnable' : True,
'tpParams': {
# TP diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
# (see verbosity in nta/trunk/py/nupic/research/TP.py and TP10X*.py)
'verbosity': 0,
'inputWidth': 2048,
'seed': 1960,
# Permanence Decrement
# If set to None, will automatically default to tpPermanenceInc
# value.
'permanenceDec' : 0.1,
'globalDecay': 0.0,
'maxAge': 0,
'outputType': 'normal',
# "Pay Attention Mode" length. This tells the TP how many new
# elements to append to the end of a learned sequence at a time.
# Smaller values are better for datasets with short sequences,
# higher values are better for datasets with long sequences.
'pamLength': 1,
},
'clParams': {
'regionName' : 'CLAClassifierRegion',
# Classifier diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
'clVerbosity' : 0,
'trainSPNetOnlyIfRequested': False,
},
}
sensorParams
spParams (spatial pooling)
tpParams (temporal pooling)
clParams (cortical learning)
sensorParams
spParams (spatial pooling)
tpParams (temporal pooling)
clParams (cortical learning)
'spParams' {
...
'encoders': {
'consumption': {
'clipInput': True,
'fieldname': u'consumption',
'n': 100,
'name': u'consumption',
'type': 'AdaptiveScalarEncoder',
'w': 21
},
'timestamp_dayOfWeek': {
'dayOfWeek': (21, 1),
'fieldname': u'timestamp',
'name': u'timestamp_dayOfWeek',
'type': 'DateEncoder'
},
'timestamp_timeOfDay': {
'fieldname': u'timestamp',
'name': u'timestamp_timeOfDay',
'timeOfDay': (21, 1),
'type': 'DateEncoder'
},
'timestamp_weekend': {
'fieldname': u'timestamp',
'name': u'timestamp_weekend',
'type': 'DateEncoder',
'weekend': 21}
},
...
}
'spParams' {
...
'encoders': {
'consumption': {
'clipInput': True,
'fieldname': u'consumption',
'n': 100,
'name': u'consumption',
'type': 'AdaptiveScalarEncoder',
'w': 21
},
'timestamp_dayOfWeek': {
'dayOfWeek': (21, 1),
'fieldname': u'timestamp',
'name': u'timestamp_dayOfWeek',
'type': 'DateEncoder'
},
'timestamp_timeOfDay': {
'fieldname': u'timestamp',
'name': u'timestamp_timeOfDay',
'timeOfDay': (21, 1),
'type': 'DateEncoder'
},
'timestamp_weekend': {
'fieldname': u'timestamp',
'name': u'timestamp_weekend',
'type': 'DateEncoder',
'weekend': 21}
},
...
}
'spParams' {
...
'encoders': {
'consumption': {
'clipInput': True,
'fieldname': u'consumption',
'n': 100,
'name': u'consumption',
'type': 'AdaptiveScalarEncoder',
'w': 21
},
'timestamp_dayOfWeek': {
'dayOfWeek': (21, 1),
'fieldname': u'timestamp',
'name': u'timestamp_dayOfWeek',
'type': 'DateEncoder'
},
'timestamp_timeOfDay': {
'fieldname': u'timestamp',
'name': u'timestamp_timeOfDay',
'timeOfDay': (21, 1),
'type': 'DateEncoder'
},
'timestamp_weekend': {
'fieldname': u'timestamp',
'name': u'timestamp_weekend',
'type': 'DateEncoder',
'weekend': 21}
},
...
}
'spParams' {
...
'encoders': {
'consumption': {
'clipInput': True,
'fieldname': u'consumption',
'n': 100,
'name': u'consumption',
'type': 'AdaptiveScalarEncoder',
'w': 21
},
'timestamp_dayOfWeek': {
'dayOfWeek': (21, 1),
'fieldname': u'timestamp',
'name': u'timestamp_dayOfWeek',
'type': 'DateEncoder'
},
'timestamp_timeOfDay': {
'fieldname': u'timestamp',
'name': u'timestamp_timeOfDay',
'timeOfDay': (21, 1),
'type': 'DateEncoder'
},
'timestamp_weekend': {
'fieldname': u'timestamp',
'name': u'timestamp_weekend',
'type': 'DateEncoder',
'weekend': 21}
},
...
}
As you can see,
lots of knobs
NuPIC treat: swarming
{
"includedFields": [
{ "fieldName": "timestamp", "fieldType": "datetime" },
{ "fieldName": "consumption", "fieldType": "float"}
],
"streamDef": {
"info": "test",
"version": 1,
"streams": [
{
"info": "hotGym.csv",
"source": "file://extra/hotgym/hotgym.csv",
"columns": [ "*" ],
"last_record": 100
}
],
"aggregation": {
"years": 0, "months": 0, "weeks": 0, "days": 0,
"hours": 1, "minutes": 0, "seconds": 0,
"microseconds": 0, "milliseconds": 0,
"fields": [
[ "consumption", "sum" ],
[ "gym", "first" ],
[ "timestamp", "first" ]
],
}
},
"inferenceType": "MultiStep",
"inferenceArgs": {
"predictionSteps": [
1
],
"predictedField": "consumption"
},
"iterationCount": -1,
"swarmSize": "medium"
}
{
"includedFields": [
{ "fieldName": "timestamp", "fieldType": "datetime" },
{ "fieldName": "consumption", "fieldType": "float"}
],
"streamDef": {
"info": "test",
"version": 1,
"streams": [
{
"info": "hotGym.csv",
"source": "file://extra/hotgym/hotgym.csv",
"columns": [ "*" ],
"last_record": 100
}
],
"aggregation": {
"years": 0, "months": 0, "weeks": 0, "days": 0,
"hours": 1, "minutes": 0, "seconds": 0,
"microseconds": 0, "milliseconds": 0,
"fields": [
[ "consumption", "sum" ],
[ "gym", "first" ],
[ "timestamp", "first" ]
],
}
},
"inferenceType": "MultiStep",
"inferenceArgs": {
"predictionSteps": [
1
],
"predictedField": "consumption"
},
"iterationCount": -1,
"swarmSize": "medium"
}
{
"includedFields": [
{ "fieldName": "timestamp", "fieldType": "datetime" },
{ "fieldName": "consumption", "fieldType": "float"}
],
"streamDef": {
"info": "test",
"version": 1,
"streams": [
{
"info": "hotGym.csv",
"source": "file://extra/hotgym/hotgym.csv",
"columns": [ "*" ],
"last_record": 100
}
],
"aggregation": {
"years": 0, "months": 0, "weeks": 0, "days": 0,
"hours": 1, "minutes": 0, "seconds": 0,
"microseconds": 0, "milliseconds": 0,
"fields": [
[ "consumption", "sum" ],
[ "gym", "first" ],
[ "timestamp", "first" ]
],
}
},
"inferenceType": "MultiStep",
"inferenceArgs": {
"predictionSteps": [
1
],
"predictedField": "consumption"
},
"iterationCount": -1,
"swarmSize": "medium"
}
{
"includedFields": [
{ "fieldName": "timestamp", "fieldType": "datetime" },
{ "fieldName": "consumption", "fieldType": "float"}
],
"streamDef": {
"info": "test",
"version": 1,
"streams": [
{
"info": "hotGym.csv",
"source": "file://extra/hotgym/hotgym.csv",
"columns": [ "*" ],
"last_record": 100
}
],
"aggregation": {
"years": 0, "months": 0, "weeks": 0, "days": 0,
"hours": 1, "minutes": 0, "seconds": 0,
"microseconds": 0, "milliseconds": 0,
"fields": [
[ "consumption", "sum" ],
[ "gym", "first" ],
[ "timestamp", "first" ]
],
}
},
"inferenceType": "MultiStep",
"inferenceArgs": {
"predictionSteps": [
1
],
"predictedField": "consumption"
},
"iterationCount": -1,
"swarmSize": "medium"
}
{
"includedFields": [
{ "fieldName": "timestamp", "fieldType": "datetime" },
{ "fieldName": "consumption", "fieldType": "float"}
],
"streamDef": {
"info": "test",
"version": 1,
"streams": [
{
"info": "hotGym.csv",
"source": "file://extra/hotgym/hotgym.csv",
"columns": [ "*" ],
"last_record": 100
}
],
"aggregation": {
"years": 0, "months": 0, "weeks": 0, "days": 0,
"hours": 1, "minutes": 0, "seconds": 0,
"microseconds": 0, "milliseconds": 0,
"fields": [
[ "consumption", "sum" ],
[ "gym", "first" ],
[ "timestamp", "first" ]
],
}
},
"inferenceType": "MultiStep",
"inferenceArgs": {
"predictionSteps": [
1
],
"predictedField": "consumption"
},
"iterationCount": -1,
"swarmSize": "medium"
}
{
"includedFields": [
{ "fieldName": "timestamp", "fieldType": "datetime" },
{ "fieldName": "consumption", "fieldType": "float"}
],
"streamDef": {
"info": "test",
"version": 1,
"streams": [
{
"info": "hotGym.csv",
"source": "file://extra/hotgym/hotgym.csv",
"columns": [ "*" ],
"last_record": 100
}
],
"aggregation": {
"years": 0, "months": 0, "weeks": 0, "days": 0,
"hours": 1, "minutes": 0, "seconds": 0,
"microseconds": 0, "milliseconds": 0,
"fields": [
[ "consumption", "sum" ],
[ "gym", "first" ],
[ "timestamp", "first" ]
],
}
},
"inferenceType": "MultiStep",
"inferenceArgs": {
"predictionSteps": [
1
],
"predictedField": "consumption"
},
"iterationCount": -1,
"swarmSize": "medium"
}
{
"includedFields": [
{ "fieldName": "timestamp", "fieldType": "datetime" },
{ "fieldName": "consumption", "fieldType": "float"}
],
"streamDef": {
"info": "test",
"version": 1,
"streams": [
{
"info": "hotGym.csv",
"source": "file://extra/hotgym/hotgym.csv",
"columns": [ "*" ],
"last_record": 100
}
],
"aggregation": {
"years": 0, "months": 0, "weeks": 0, "days": 0,
"hours": 1, "minutes": 0, "seconds": 0,
"microseconds": 0, "milliseconds": 0,
"fields": [
[ "consumption", "sum" ],
[ "gym", "first" ],
[ "timestamp", "first" ]
],
}
},
"inferenceType": "MultiStep",
"inferenceArgs": {
"predictionSteps": [
1
],
"predictedField": "consumption"
},
"iterationCount": -1,
"swarmSize": "medium"
}
{
"includedFields": [
{ "fieldName": "timestamp", "fieldType": "datetime" },
{ "fieldName": "consumption", "fieldType": "float"}
],
"streamDef": {
"info": "test",
"version": 1,
"streams": [
{
"info": "hotGym.csv",
"source": "file://extra/hotgym/hotgym.csv",
"columns": [ "*" ],
"last_record": 100
}
],
"aggregation": {
"years": 0, "months": 0, "weeks": 0, "days": 0,
"hours": 1, "minutes": 0, "seconds": 0,
"microseconds": 0, "milliseconds": 0,
"fields": [
[ "consumption", "sum" ],
[ "gym", "first" ],
[ "timestamp", "first" ]
],
}
},
"inferenceType": "MultiStep",
"inferenceArgs": {
"predictionSteps": [
1
],
"predictedField": "consumption"
},
"iterationCount": -1,
"swarmSize": "medium"
}
{
"includedFields": [
{ "fieldName": "timestamp", "fieldType": "datetime" },
{ "fieldName": "consumption", "fieldType": "float"}
],
"streamDef": {
"info": "test",
"version": 1,
"streams": [
{
"info": "hotGym.csv",
"source": "file://extra/hotgym/hotgym.csv",
"columns": [ "*" ],
"last_record": 100
}
],
"aggregation": {
"years": 0, "months": 0, "weeks": 0, "days": 0,
"hours": 1, "minutes": 0, "seconds": 0,
"microseconds": 0, "milliseconds": 0,
"fields": [
[ "consumption", "sum" ],
[ "gym", "first" ],
[ "timestamp", "first" ]
],
}
},
"inferenceType": "MultiStep",
"inferenceArgs": {
"predictionSteps": [
1
],
"predictedField": "consumption"
},
"iterationCount": -1,
"swarmSize": "medium"
}
GNS Science GeoNet
- comprehensive, open data on NZ earthquakes
- accessible via easy, flexible unauthenticated HTTP API
- includes ~50 variables per quake
- includes "felt reports"
Can we predict the likely human impact of a quake
based purely on sensor data?
...
Sadly, I don't know yet.
Problems
- swarming takes a lot of time
- predictedField is singular
- wanted to predict likely values for numbers of
felt reports between Modified Mercali 0-10
NIWA CliFlo
- comprehensive, open(ish) data on NZ weather
- accessible via an easy(ish), HTTP API
- real-time(ish)
Can we predict the likelihood of severe weather event
based on these input streams?
...
I believe so, but...
Problems
• regional council data flood level data is
harder to access that I had anticipated
• some licence uncertainty around CliFlo reuse
Consider these applications
a work in progress!