Xex = np.random.rand(10, 2) # Xex is simulating PRNGs from independent Uniform [0,1] RVs Xex # visualize these as 10 orddered pairs of points in the x-y plane that makes up our x-axis and y-axis (or x1 and x2 axes)
Out[10]:
array([[0.21757443, 0.01815727],
[0.55624387, 0.51717902],
[0.50736386, 0.81707665],
[0.85695071, 0.08247471],
[0.45759568, 0.75775923],
[0.47513327, 0.35025263],
[0.80005626, 0.33802171],
[0.21462401, 0.14219813],
[0.11143285, 0.89456028],
[0.89588183, 0.95661233]])
Yex = np.matmul(Xex, [REAL_SLOPE_X1, REAL_SLOPE_X2]) + REAL_INTERCEPT + 0.2 * np.random.randn(10) Yex # note how each entry in Yex is jiggled independently a bit by 0.2 * np.random.randn()
Out[14]:
array([5.51604324, 7.77768412, 8.44899552, 7.20338209, 8.40246384,
7.07592727, 7.26620664, 5.85385249, 8.18290919, 9.69088502])
make_mini_batch() # our mini-batch of Xx and Ys
Out[15]:
(array([[0.53055592, 0.62512968],
[0.71641671, 0.16258358],
[0.98501818, 0.3434015 ],
[0.00145872, 0.42206563],
[0.13963167, 0.49958068],
[0.58965079, 0.69253778],
[0.1125337 , 0.7821038 ],
[0.53812365, 0.72680835],
[0.0228825 , 0.48261083],
[0.83198857, 0.64067338]]), array([[7.72924566],
[6.83058688],
[8.17826698],
[6.21726091],
[6.66331251],
[8.48564693],
[7.59010849],
[7.87368412],
[6.70226285],
[8.55898236]]))
import tensorflow as tf batch = 10 # size of batch tf.reset_default_graph() # this is important to do before you do something new in TF # we will work with single floating point precision and this is specified in the tf.float32 type argument to each tf object/method x = tf.placeholder(tf.float32, shape=(batch, 2)) # placeholder node for the pairs of x variables (predictors) in batches of size batch x_aug = tf.concat( (x, tf.ones((batch, 1))), 1 ) # x_aug is a concatenation of a vector of 1`s along the first dimension y = tf.placeholder(tf.float32, shape=(batch, 1)) # placeholder node for the univariate response y with batch many rows and 1 column model_params = tf.get_variable("model_params", [3,1]) # these are the x1 slope, x2 slope and the intercept (3 rows and 1 column) y_model = tf.matmul(x_aug, model_params) # our two-factor regression model is defined by this matrix multiplication # note that the noise is formally part of the model and what we are actually modeling is the mean response... error = tf.reduce_sum(tf.square(y - y_model))/batch # this is mean square error where the sum is computed by a reduce call on addition train_op = tf.train.GradientDescentOptimizer(0.02).minimize(error) # learning rate is set to 0.02 init = tf.global_variables_initializer() # our way into running the TF session errors = [] # list to track errors over iterations with tf.Session() as session: session.run(init) for i in range(1000): x_data, y_data = make_mini_batch(batch) # simulate the mini-batch of data x1,x2 and response y with noise _, error_val = session.run([train_op, error], feed_dict={x: x_data, y: y_data}) errors.append(error_val) out = session.run(model_params) print(out)
[[1.9413265]
[2.8955398]
[5.085459 ]]
SDS-2.x, Scalable Data Engineering Science
This is a 2019 augmentation and update of Adam Breindel's initial notebooks.
Last refresh: Never