Reinforcement learning: exploring reinforcement learning through the evolution of the Q-Learner
Create Date: August 21, 2019 at 06:00 PM         | Tag: MACHINE LEARNING         | Author Name: Sun, Charles |
Evan Hennis @ Google user group
Environment setup
TensorFlow 2.0.0 beta1
Jupyter Note books
Bellman's Equation
Value iteration
- instead of the optimal policy we find the maximum value
- value funciton
Policy Iteration
- update the policy direc
Q-Learning
deep Q-Network
google colab
New CommentData analytics
Create Date: June 21, 2018 at 11:41 PM         | Tag: DATA MINING         | Author Name: Sun, Charles |
Four areas of product development:
- Engagement
- growth
- utility
- core business
skills:
- product/business sense
- visualizing and comminicating data effectively
- applied statistics
- experimentation/testing
- resourceful
- focused on results
technologies:
- hive/hadoop
- mysql/oracle
- python/php
- excel/R
- presto, scuba (open source)
Typical questions:
- what happens when everyone in the world is on a smartone?
- what should we build next?
- how does seasonality affect usage?
- what types of people use this feature?
- why are people posting 5% more status updates this wek?
- should we launch this new feature?
- interpreting the data with scientific rigor
- understanding what the metrics tell us about people’s expecriences
- making trade-offs with conflicting results
- can we identify “bad” content by how people engage with it?
Recommender Systems Python 3 code
Create Date: April 27, 2018 at 11:49 PM         | Tag: MACHINE LEARNING         | Author Name: Sun, Charles |
class:
''' Created on Apr 26, 2018 @author: charles ''' import numpy as np class Recommender: def __init__(self): pass def CheckCostFunction(self, Lambda = None): if Lambda == None: Lambda = 0 X_t = np.random.rand(4,3) theta_t = np.random.rand(5,3) Y = X_t.dot(theta_t.T) Y[np.random.rand(np.shape(Y)[0]) > 0.5] = 0 R = np.zeros((np.shape(Y))) R[Y != 0] = 1 m, n = np.shape(X_t) X = np.random.randn(m,n) a, b = np.shape(theta_t) theta = np.random.randn(a,b) num_users = np.shape(Y)[1] num_movies = np.shape(Y)[0] num_features = np.shape(theta_t)[1] def J(t): return self.CofiCostFunc(t, Y, R, num_users, num_movies, \ num_features, Lambda) numgrad = self.ComputeNumericalGradient(J, \ np.append(X.flatten(), theta.flatten(), 0)) cost, grad = self.CofiCostFunc(np.append(X.flatten(), \ theta.flatten(), 0), Y, R, num_users, \ num_movies, num_features, Lambda) print(numgrad, grad) print('The above two columns you get should be very similar.') print('(Left-Your Numerical Gradient, Right-Analytical Gradient)') diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad) print('If your backpropagation implementation is correct, then \ the relative difference will be small (less than 1e-9).\ Relative Difference: ', diff) def CofiCost(self, params, Y, R, num_users, num_movies, num_features, Lambda): X = params[0:num_movies*num_features].reshape(num_movies, num_features) theta = params[num_movies*num_features:].reshape(num_users, num_features) J = 0 J = 1/2*np.sum((X.dot(theta.T)*R-Y*R)**2) + \ Lambda/2*((np.sum(theta**2)) + np.sum(X**2)) return J def CofiGradient(self, params, Y, R, num_users, num_movies, num_features, Lambda): X = params[0:num_movies*num_features].reshape(num_movies, num_features) theta = params[num_movies*num_features:].reshape(num_users, num_features) X_grad = np.zeros((np.shape(X))) theta_grad = np.zeros((np.shape(theta))) theta_grad = (X.dot(theta.T)*R-Y*R).T.dot(X) + Lambda*theta grad = np.append(X_grad.flatten(), theta_grad.flatten(), 0) return grad def CofiCostFunc(self, params, Y, R, num_users, num_movies, num_features, Lambda): X = params[0:num_movies*num_features].reshape(num_movies, num_features) theta = params[num_movies*num_features:].reshape(num_users, num_features) J = 0 X_grad = np.zeros((np.shape(X))) theta_grad = np.zeros((np.shape(theta))) J = 1/2*np.sum((X.dot(theta.T)*R-Y*R)**2) + \ Lambda/2*((np.sum(theta**2)) + np.sum(X**2)) X_grad = (X.dot(theta.T)*R-Y*R).dot(theta) + Lambda*X theta_grad = (X.dot(theta.T)*R-Y*R).T.dot(X) + Lambda*theta grad = np.append(X_grad.flatten(), theta_grad.flatten(), 0) return [J, grad] def ComputeNumericalGradient(self, J, theta): numgrad = np.zeros(np.shape(theta)) perturb = np.zeros(np.shape(theta)) e = 1e-4 for p in range(len(theta.flatten())): perturb[p] = e loss1, grad1 = J(theta-perturb) loss2, grad2 = J(theta+perturb) numgrad[p] = (loss2 - loss1)/(2*e) perturb[p] = 0 return numgrad def LoadMovieList(self): counter = 0 movielist = {} with open('movie_ids.txt', 'rb') as fid: lines = fid.readlines() for line in lines: movielist[counter] = str(line).split(' ', 1)[1] counter += 1 return movielist def NormalizeRatings(self,Y, R): m, n = np.shape(Y) Ymean = np.zeros((m,1)) Ynorm = np.zeros((m,n)) for i in range(m): idx = np.where(R[i,:] == 1) Ymean[i] = np.mean(Y[i,idx]) Ynorm[i,idx] = Y[i,idx] - Ymean[i] return [Ynorm, Ymean]
Unit test: not sure why the result is very different compared to the course. I will keep digging
''' Created on Apr 26, 2018 @author: charles ''' import unittest import numpy as np from recommender import Recommender class Test(unittest.TestCase): def setUp(self): self.r = Recommender() def tearDown(self): self.r = None def test_cofiCostFunc(self): params = np.array(range(1, 15))/10. Y = np.sin(np.array([[16,2,3,13], [5,11,10,8], [9,7,6,12], [4,14,15,1]])) Y = Y[:, 0:3] R = (np.array([[1, 0, 1], [1, 1, 1], [0, 0, 1], [1, 1, 0]]) > 0.5).astype(int) num_users = 3 num_movies = 4 num_features = 2 J, grad = self.r.CofiCostFunc(params, Y, R, \ num_users, num_movies, \ num_features, 0) print(J) print(grad) self.assertAlmostEqual(8.513, J, None, None, 1e-2) if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.testName'] unittest.main()
Integeration check:
''' Created on Apr 27, 2018 @author: charles ''' import scipy.io import matplotlib.pyplot as plt import numpy as np import scipy.optimize as opt from recommender import Recommender r = Recommender() print('Loading movie ratings dataset.') data = scipy.io.loadmat('ex8_movies.mat') R = data['R'] Y = data['Y'] print('Average rating for movie 1 (Toy Story): %8.8f/5 ' \ %np.mean(Y[0,np.where(R[0,:] -1 == 0)])) plt.figure(figsize=(5, 5)) plt.imshow(Y) plt.show() data1 = scipy.io.loadmat('ex8_movieParams.mat') X = data1['X'] theta = data1['Theta'] # Reduce the data set size so that this runs faster num_users = 4 num_movies = 5 num_features = 3 X = X[0:num_movies, 0:num_features] theta = theta[0:num_users, 0:num_features] Y = Y[0:num_movies, 0:num_users] R = R[0:num_movies, 0:num_users] J, grad = r.CofiCostFunc(np.append(X.flatten(), theta.flatten(), 0), \ Y, R, num_users, num_movies, num_features, 0) print('Cost at loaded parameters: %2.2f (this value should be about 22.22)' %J) print('Checking Gradients (without regularization) ...') r.CheckCostFunction() J, grad = r.CofiCostFunc(np.append(X.flatten(), theta.flatten(), 0), \ Y, R, num_users, num_movies, num_features, 1.5) print('Cost at loaded parameters (lambda = 1.5): %2.2f \ (this value should be about 31.34)' %J) print('Checking Gradients (with regularization) ...') r.CheckCostFunction(1.5) movielist = r.LoadMovieList() my_ratings = np.zeros((1682, 1)) #Check the file movie_idx.txt for id of each movie in our dataset #For example, Toy Story (1995) has ID 1, so to rate it "4", you can set my_ratings[0] = 4 #Or suppose did not enjoy Silence of the Lambs (1991), you can set my_ratings[97] = 2 # rate other movies my_ratings[6] = 3 my_ratings[11] = 5 my_ratings[53] = 4 my_ratings[63] = 5 my_ratings[65] = 3 my_ratings[68] = 5 my_ratings[182] = 4 my_ratings[225] = 5 my_ratings[354] = 5 print('New user ratings:') for i in range(len(my_ratings)): if my_ratings[i] > 0: print('Rated {} for {}'.format(my_ratings[i], movielist[i])) print('Training collaborative filtering...') data = scipy.io.loadmat('ex8_movies.mat') R = data['R'] Y = data['Y'] Y = np.append(my_ratings, Y, 1) R = np.append((my_ratings!=0)+0, R, 1) Ynorm, Ymean =r.NormalizeRatings(Y,R) num_users = np.shape(Y)[1] num_movies = np.shape(Y)[0] num_features = 10 X = np.random.randn(num_movies, num_features); theta = np.random.randn(num_users, num_features); initial_parameters = np.append(X.flatten(), theta.flatten(), 0) Lambda = 10 # result = opt.fmin_cg(r.CofiCost, x0 = initial_parameters,\ # args = (Y, R, num_users, num_movies, num_features, Lambda), \ # maxiter = 50, fprime = r.CofiGradient, \ # disp=True, full_output=True) # # theta = result[0] result = opt.minimize(fun = r.CofiCost, x0 = initial_parameters,\ args = (Y, R, num_users, num_movies, num_features, Lambda), \ method = 'CG', jac = r.CofiGradient, \ options = {'maxiter':100}) theta = result.x # Unfold the returned theta back into U and W X = theta[0:num_movies*num_features].reshape(num_movies, num_features) theta = theta[num_movies*num_features:].reshape(num_users, num_features) print('Recommender system learning completed.') p = X.dot(theta.T) my_predictions = p[:,0].reshape(np.shape(p)[0],1) + Ymean movielist = r.LoadMovieList() ix = np.argsort(my_predictions, axis=0, kind ='mergesort')[::-1] my_predictions = my_predictions[ix] my_predictions = my_predictions.flatten() print('Top recommendations for you:') for i in range(10): j = ix[i,0] print('Predicting rating %1.1f for movie %s' %(my_predictions[i],\ movielist[j])) print('Original ratings provided:') for i in range(len(my_ratings)): if my_ratings[i] > 0: print('Rated {} for {}'.format(my_ratings[i], movielist[i]))
New Comment