from liner_regression.fashion_mnist_dataset import *
# read data batch_size = 256 train_data_iter, test_data_iter = load_data_fashion_mnist(batch_size)
# initial model parameters # each image in dataset is a 28 * 28 image, in this section, we will flatten each image, # treat them as vectors of length 784 # so X's size is 256 * 784, W's size is 784 * 10, b's size is 1 * 10, y's size is 256 * 10 (y = softmax(XW + b)) num_inputs = 28 * 28 num_outputs = 10
W = tf.Variable(tf.random.normal(shape=(num_inputs, num_outputs), mean=0, stddev=0.01)) b = tf.Variable(tf.zeros(num_outputs))
# define softmax operation defsoftmax(linear_result: tf.Variable): # if linear_result is n * m matrix
# exped is n * m matrix exped = tf.exp(linear_result) # sum_of_each_line is n * 1 matrix, if keepdims=False, then sum_of_each_line will be 1 * n matrix sum_of_each_line = tf.reduce_sum(exped, 1, keepdims=True) return exped / sum_of_each_line
# define loss, use cross-entropy loss defcross_entropy(predicted_y, label_y): # predicted_y is a n * m matrix, then label_y is a 1 * n matrix # in this example, predicted_y is 256 * 10, label_y = 1 * 256 return -tf.math.log(tf.boolean_mask(predicted_y, tf.one_hot(label_y, depth=predicted_y.shape[-1])))
# define optimizer defstochastic_gradient_descent(params, gradients, batch_size, learning_rate: float): # Because our loss is calculated as a sum over the mini-batch of examples, # we normalize our step size by the batch size (batch_size), # so that the magnitude of a typical step size does not depend heavily on our choice of the batch size. for param, grad inzip(params, gradients): param.assign_sub(grad * learning_rate / batch_size)
# classification accuracy defaccuracy(predicted_y, label_y): # predicted_y is a n * m matrix, then label_y is a 1 * n matrix # in this example, predicted_y is 256 * 10, label_y = 1 * 256 # tf.argmax returns the index with the largest value across axes of a tensor. predicted_y = tf.argmax(predicted_y, axis=1)
# cmp is a 1 * n `boolean` matrix cmp = tf.cast(predicted_y, label_y.dtype) == label_y # return num of right predictions and the total num of predictions return tf.reduce_sum(tf.cast(cmp, label_y.dtype)), label_y.shape[0]
# training deftrain(): for _ inrange(3): num_right_predictions = 0 num_total_predictions = 0 for x, y in train_data_iter: with tf.GradientTape() as g: x = tf.reshape(x, shape=(x.shape[0], -1)) y_hat = net(x, W, b) l = cross_entropy(y_hat, y)