import numpy as np
import pandas as pd
import json
from pandas.io.json import json_normalize
from mapr_streams_python import Consumer, KafkaError
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

Pull test data from OpenTSDB¶

import base64
import requests
import yaml
url="http://nodeb:4242/api/query?start=16h-ago&m=avg:Panel2Power"
headers = {'content-type': 'application/vnd.kafka.v1+json'}
r = requests.get(url, headers=headers)
d = (yaml.safe_load(r.text)[0]['dps'])
df3 = pd.DataFrame(list(d.items()))
df3.columns = ['timestamp', 'value']
df3['timestamp']=pd.to_datetime(df3['timestamp'], unit='s')
df3 = df3.sort_values(by=['timestamp'])
ts = pd.Series(df3['value'].values, index=df3['timestamp'])
ts.plot(c='b', title="Time series data")
plt.show()
ts.head(10)

timestamp
2018-01-30 16:24:02    34.7
2018-01-30 16:25:11    36.4
2018-01-30 16:26:02    28.5
2018-01-30 16:27:01    36.3
2018-01-30 16:28:02    38.3
2018-01-30 16:29:02    38.9
2018-01-30 16:30:02    30.4
2018-01-30 16:31:01    33.4
2018-01-30 16:32:02    23.1
2018-01-30 16:33:01    34.5
dtype: float64

Segment the OpenTSDB data into windows¶

We'll train the RNN model based on a narrow segment of the data recorded in opentsdb.

TS = np.array(ts[0:801])
num_periods = 20
f_horizon = 1

x_data = TS[:(len(TS)-(len(TS) % num_periods))]
x_batches = x_data.reshape(-1, 20, 1)

y_data = TS[1:(len(TS)-(len(TS) % num_periods))+f_horizon]
y_batches = y_data.reshape(-1, 20, 1)
print (len(x_batches))
print (x_batches.shape)
print (y_batches.shape)

40
(40, 20, 1)
(40, 20, 1)

def test_data(series,forecast,num_periods):
    test_x_setup = TS[-(num_periods + forecast):]
    testX = test_x_setup[:num_periods].reshape(-1, 20, 1)
    testY = TS[-(num_periods):].reshape(-1, 20, 1)
    return testX,testY
X_test, Y_test = test_data(TS,f_horizon,num_periods)
print (X_test.shape)

(1, 20, 1)

import tensorflow as tf
import os
import shutil
import tensorflow.contrib.learn as tflearn
import tensorflow.contrib.layers as tflayers
from tensorflow.contrib.learn.python.learn import learn_runner
import tensorflow.contrib.metrics as metrics
import tensorflow.contrib.rnn as rnn

/usr/local/lib/python3.5/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

Build the RNN model in Tensorflow¶

tf.reset_default_graph()
# num_periods = 20
inputs = 1
hidden = 100
output = 1

x = tf.placeholder(tf.float32, [None, num_periods, inputs])
y = tf.placeholder(tf.float32, [None, num_periods, output])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden, activation=tf.nn.relu)
rnn_output, states = tf.nn.dynamic_rnn(basic_cell, x, dtype=tf.float32)
learning_rate = 0.001
stacked_rnn_output = tf.reshape(rnn_output, [-1, hidden])
stacked_outputs = tf.layers.dense(stacked_rnn_output, output)
outputs = tf.reshape(stacked_outputs, [-1, num_periods, output])

loss = tf.reduce_sum(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()

epochs = 1000

with tf.Session() as sess:
    init.run()
    for ep in range(epochs):
        sess.run(training_op, feed_dict={x: x_batches, y: y_batches})
        if ep % 100 == 0:
            mse = loss.eval(feed_dict={x: x_batches, y: y_batches})
            print (ep, "\tMSE:", mse)
            
    y_pred = sess.run(outputs, feed_dict={x: X_test})

0 	MSE: 407618.75
100 	MSE: 13907.389
200 	MSE: 7875.666
300 	MSE: 5748.4287
400 	MSE: 5132.593
500 	MSE: 4703.6787
600 	MSE: 4322.6396
700 	MSE: 4031.6401
800 	MSE: 3641.7007
900 	MSE: 3333.899

Compare forecasts with ground truth¶

plt.title("Forecast vs Actual", fontsize = 14)
timestamps = df3['timestamp'][-len(pd.Series(np.ravel(Y_test))):].values
plt.plot(timestamps, pd.Series(np.ravel(Y_test)), "b-", markersize = 10)
plt.plot(timestamps, pd.Series(np.ravel(Y_test)), "bo", markersize = 10, label="Actual")
plt.plot(timestamps, pd.Series(np.ravel(y_pred)), "r-", markersize = 10)
plt.plot(timestamps, pd.Series(np.ravel(y_pred)), "r.", markersize = 10, label="Forecast")
plt.legend(loc="upper left")
plt.xlabel("Time")

plt.show()

Predicting Time-Series data from OpenTSDB with RNNs in Tensorflow

OpenTSDB REST API examples

Forcasting time-series data with RNNs in Tensorflow

Jupyter Notebook

Pull test data from OpenTSDB¶

Segment the OpenTSDB data into windows¶

Build the RNN model in Tensorflow¶

Compare forecasts with ground truth¶

Hope that Helped!