Xex = np.random.rand(10, 2) # Xex is simulating PRNGs from independent Uniform [0,1] RVs
Xex # visualize these as 10 orddered pairs of points in the x-y plane that makes up our x-axis and y-axis (or x1 and x2 axes)
Out[3]: array([[0.08072034, 0.26406061],
[0.39905512, 0.15924347],
[0.75927922, 0.18110477],
[0.835967 , 0.63886164],
[0.02655583, 0.42198268],
[0.70918178, 0.08333002],
[0.65725057, 0.76160834],
[0.62877755, 0.71599742],
[0.53159113, 0.53431959],
[0.76756446, 0.45007016]])
Yex = np.matmul(Xex, [REAL_SLOPE_X1, REAL_SLOPE_X2]) + REAL_INTERCEPT + 0.2 * np.random.randn(10)
Yex # note how each entry in Yex is jiggled independently a bit by 0.2 * np.random.randn()
Out[7]: array([6.04882653, 6.27953302, 7.3613603 , 8.31650424, 6.42681496,
6.67349851, 8.13663555, 7.94619623, 7.47473459, 7.90535507])
make_mini_batch() # our mini-batch of Xx and Ys
Out[8]: (array([[0.95512791, 0.07560272],
[0.92905928, 0.17473683],
[0.46518843, 0.70432958],
[0.1921237 , 0.8437294 ],
[0.14006183, 0.07956913],
[0.38009314, 0.07700915],
[0.31010924, 0.44697019],
[0.34576102, 0.14892159],
[0.15583036, 0.08141529],
[0.9498036 , 0.46449701]]), array([[6.75473895],
[7.63846734],
[7.80724397],
[7.8060772 ],
[5.59210548],
[6.20482347],
[6.58383517],
[6.19242001],
[5.76035242],
[8.39372949]]))
import tensorflow as tf
batch = 10 # size of batch
tf.reset_default_graph() # this is important to do before you do something new in TF
# we will work with single floating point precision and this is specified in the tf.float32 type argument to each tf object/method
x = tf.placeholder(tf.float32, shape=(batch, 2)) # placeholder node for the pairs of x variables (predictors) in batches of size batch
x_aug = tf.concat( (x, tf.ones((batch, 1))), 1 ) # x_aug is a concatenation of a vector of 1`s along the first dimension
y = tf.placeholder(tf.float32, shape=(batch, 1)) # placeholder node for the univariate response y with batch many rows and 1 column
model_params = tf.get_variable("model_params", [3,1]) # these are the x1 slope, x2 slope and the intercept (3 rows and 1 column)
y_model = tf.matmul(x_aug, model_params) # our two-factor regression model is defined by this matrix multiplication
# note that the noise is formally part of the model and what we are actually modeling is the mean response...
error = tf.reduce_sum(tf.square(y - y_model))/batch # this is mean square error where the sum is computed by a reduce call on addition
train_op = tf.train.GradientDescentOptimizer(0.02).minimize(error) # learning rate is set to 0.02
init = tf.global_variables_initializer() # our way into running the TF session
errors = [] # list to track errors over iterations
with tf.Session() as session:
session.run(init)
for i in range(1000):
x_data, y_data = make_mini_batch(batch) # simulate the mini-batch of data x1,x2 and response y with noise
_, error_val = session.run([train_op, error], feed_dict={x: x_data, y: y_data})
errors.append(error_val)
out = session.run(model_params)
print(out)
WARNING:tensorflow:From /databricks/python/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
[[2.0491152]
[3.0319374]
[4.961138 ]]
ScaDaMaLe Course site and book
This is a 2019-2021 augmentation and update of Adam Breindel's initial notebooks.
Thanks to Christian von Koch and William Anzén for their contributions towards making these materials Spark 3.0.1 and Python 3+ compliant.