1 2 3 4 5 6 7 8 9 10 Next »

Reinforcement learning: exploring reinforcement learning through the evolution of the Q-Learner

Create Date: August 21, 2019 at 06:00 PM         Tag: MACHINE LEARNING         Author Name: Sun, Charles

Evan Hennis @ Google user group

Environment setup

TensorFlow 2.0.0 beta1

Jupyter Note books

Bellman's Equation

Value iteration

Policy Iteration

Q-Learning

deep Q-Network

google colab

New Comment

Data analytics

Create Date: June 21, 2018 at 11:41 PM         Tag: DATA MINING         Author Name: Sun, Charles

Four areas of product development:

skills:

technologies:

Typical questions:

New Comment

Recommender Systems Python 3 code

Create Date: April 27, 2018 at 11:49 PM         Tag: MACHINE LEARNING         Author Name: Sun, Charles

class:

'''
Created on Apr 26, 2018

@author: charles
'''

import numpy as np

class Recommender:
    def __init__(self):
        pass
    
    def CheckCostFunction(self, Lambda = None):
        if Lambda == None:
            Lambda = 0
        X_t = np.random.rand(4,3)
        theta_t = np.random.rand(5,3)
        
        Y = X_t.dot(theta_t.T)
        Y[np.random.rand(np.shape(Y)[0]) > 0.5] = 0
        R = np.zeros((np.shape(Y)))
        R[Y != 0] = 1
        
        m, n = np.shape(X_t)
        X = np.random.randn(m,n)
        a, b = np.shape(theta_t)
        theta = np.random.randn(a,b)
        num_users = np.shape(Y)[1]
        num_movies = np.shape(Y)[0]
        num_features = np.shape(theta_t)[1]
        def J(t):
            return self.CofiCostFunc(t, Y, R, num_users, num_movies, \
                                    num_features, Lambda)
         
        numgrad = self.ComputeNumericalGradient(J, \
                np.append(X.flatten(), theta.flatten(), 0))
        cost, grad = self.CofiCostFunc(np.append(X.flatten(), \
                theta.flatten(), 0), Y, R, num_users, \
                              num_movies, num_features, Lambda)
        print(numgrad, grad)
        print('The above two columns you get should be very similar.')
        print('(Left-Your Numerical Gradient, Right-Analytical Gradient)')
        diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
        print('If your backpropagation implementation is correct, then \
               the relative difference will be small (less than 1e-9).\
               Relative Difference: ', diff)
               
    def CofiCost(self, params, Y, R, num_users, num_movies, num_features, Lambda):
        X = params[0:num_movies*num_features].reshape(num_movies, num_features)
        theta = params[num_movies*num_features:].reshape(num_users, num_features)
        J = 0
            
        J = 1/2*np.sum((X.dot(theta.T)*R-Y*R)**2) + \
                Lambda/2*((np.sum(theta**2)) + np.sum(X**2))
        return J
    
    
    def CofiGradient(self, params, Y, R, num_users, num_movies, num_features, Lambda):
        X = params[0:num_movies*num_features].reshape(num_movies, num_features)
        theta = params[num_movies*num_features:].reshape(num_users, num_features)
        
        X_grad = np.zeros((np.shape(X)))
        theta_grad = np.zeros((np.shape(theta)))
        
        
        theta_grad = (X.dot(theta.T)*R-Y*R).T.dot(X) + Lambda*theta
        
        grad = np.append(X_grad.flatten(), theta_grad.flatten(), 0)
        return grad
    
    
    def CofiCostFunc(self, params, Y, R, num_users, num_movies, num_features, Lambda):
        X = params[0:num_movies*num_features].reshape(num_movies, num_features)
        theta = params[num_movies*num_features:].reshape(num_users, num_features)
        J = 0
        X_grad = np.zeros((np.shape(X)))
        theta_grad = np.zeros((np.shape(theta)))
        
        J = 1/2*np.sum((X.dot(theta.T)*R-Y*R)**2) + \
                Lambda/2*((np.sum(theta**2)) + np.sum(X**2))
        X_grad = (X.dot(theta.T)*R-Y*R).dot(theta) + Lambda*X
        theta_grad = (X.dot(theta.T)*R-Y*R).T.dot(X) + Lambda*theta
        
        grad = np.append(X_grad.flatten(), theta_grad.flatten(), 0)
        return [J, grad]
    
    def ComputeNumericalGradient(self, J, theta):
        numgrad = np.zeros(np.shape(theta))
        perturb = np.zeros(np.shape(theta))
        e = 1e-4
        for p in range(len(theta.flatten())):
            perturb[p] = e
            loss1, grad1 = J(theta-perturb)
            loss2, grad2 = J(theta+perturb)
            numgrad[p] = (loss2 - loss1)/(2*e)
            perturb[p] = 0
        return numgrad
    
    def LoadMovieList(self):
        counter = 0
        movielist = {}
        with open('movie_ids.txt', 'rb') as fid:
            lines = fid.readlines()
            for line in lines:
                movielist[counter] = str(line).split(' ', 1)[1]
                counter += 1
        return movielist
    
    def NormalizeRatings(self,Y, R):
        m, n = np.shape(Y)
        Ymean = np.zeros((m,1))
        Ynorm = np.zeros((m,n))
        for i in range(m):
            idx = np.where(R[i,:] == 1)
            Ymean[i] = np.mean(Y[i,idx])
            Ynorm[i,idx] = Y[i,idx] - Ymean[i]
        return [Ynorm, Ymean]

Unit test: not sure why the result is very different compared to the course. I will keep digging

'''
Created on Apr 26, 2018

@author: charles
'''
import unittest
import numpy as np
from recommender import Recommender

class Test(unittest.TestCase):


    def setUp(self):
        self.r = Recommender()


    def tearDown(self):
        self.r = None


    def test_cofiCostFunc(self):
        params = np.array(range(1, 15))/10.
        Y = np.sin(np.array([[16,2,3,13],
             [5,11,10,8],
             [9,7,6,12],
             [4,14,15,1]]))
        Y = Y[:, 0:3]
        R = (np.array([[1, 0, 1], [1, 1, 1], [0, 0, 1], [1, 1, 0]]) > 0.5).astype(int)
        num_users = 3
        num_movies = 4
        num_features = 2
        J, grad = self.r.CofiCostFunc(params, Y, R, \
                                    num_users, num_movies, \
                                    num_features, 0)
        print(J)
        print(grad)
        self.assertAlmostEqual(8.513, J, None, None, 1e-2)


if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()

Integeration check:

'''
Created on Apr 27, 2018

@author: charles
'''
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize as opt
from recommender import Recommender

r = Recommender()

print('Loading movie ratings dataset.')
data = scipy.io.loadmat('ex8_movies.mat')
R = data['R']
Y = data['Y']
print('Average rating for movie 1 (Toy Story): %8.8f/5 ' \
        %np.mean(Y[0,np.where(R[0,:] -1 == 0)]))

plt.figure(figsize=(5, 5))
plt.imshow(Y)
plt.show()

data1 = scipy.io.loadmat('ex8_movieParams.mat')
X = data1['X']
theta = data1['Theta']
# Reduce the data set size so that this runs faster
num_users = 4
num_movies = 5
num_features = 3

X = X[0:num_movies, 0:num_features]
theta = theta[0:num_users, 0:num_features]
Y = Y[0:num_movies, 0:num_users]
R = R[0:num_movies, 0:num_users]

J, grad = r.CofiCostFunc(np.append(X.flatten(), theta.flatten(), 0), \
        Y, R, num_users, num_movies, num_features, 0)
print('Cost at loaded parameters: %2.2f (this value should be about 22.22)' %J)

print('Checking Gradients (without regularization) ...')
r.CheckCostFunction()

J, grad = r.CofiCostFunc(np.append(X.flatten(), theta.flatten(), 0), \
        Y, R, num_users, num_movies, num_features, 1.5)
print('Cost at loaded parameters (lambda = 1.5): %2.2f \
        (this value should be about 31.34)' %J)
        
print('Checking Gradients (with regularization) ...')
r.CheckCostFunction(1.5)

movielist = r.LoadMovieList()
my_ratings = np.zeros((1682, 1))

#Check the file movie_idx.txt for id of each movie in our dataset
#For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
my_ratings[0] = 4

#Or suppose did not enjoy Silence of the Lambs (1991), you can set
my_ratings[97] = 2
# rate other movies
my_ratings[6] = 3
my_ratings[11] = 5
my_ratings[53] = 4
my_ratings[63] = 5
my_ratings[65] = 3
my_ratings[68] = 5
my_ratings[182] = 4
my_ratings[225] = 5
my_ratings[354] = 5

print('New user ratings:')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print('Rated {} for {}'.format(my_ratings[i], movielist[i]))

print('Training collaborative filtering...')
data = scipy.io.loadmat('ex8_movies.mat')
R = data['R']
Y = data['Y']
Y = np.append(my_ratings, Y, 1)
R = np.append((my_ratings!=0)+0, R, 1)

Ynorm, Ymean =r.NormalizeRatings(Y,R)
num_users = np.shape(Y)[1]
num_movies = np.shape(Y)[0]
num_features = 10

X = np.random.randn(num_movies, num_features);
theta = np.random.randn(num_users, num_features);


initial_parameters = np.append(X.flatten(), theta.flatten(), 0)
Lambda = 10

# result = opt.fmin_cg(r.CofiCost, x0 = initial_parameters,\
#          args = (Y, R, num_users, num_movies, num_features, Lambda), \
#          maxiter = 50, fprime = r.CofiGradient, \
#          disp=True, full_output=True)
# 
# theta = result[0]

result = opt.minimize(fun = r.CofiCost, x0 = initial_parameters,\
         args = (Y, R, num_users, num_movies, num_features, Lambda), \
         method = 'CG', jac = r.CofiGradient, \
         options = {'maxiter':100})

theta = result.x

# Unfold the returned theta back into U and W
X = theta[0:num_movies*num_features].reshape(num_movies, num_features)
theta = theta[num_movies*num_features:].reshape(num_users, num_features)

print('Recommender system learning completed.')


p = X.dot(theta.T)
my_predictions = p[:,0].reshape(np.shape(p)[0],1) + Ymean

movielist = r.LoadMovieList()
ix = np.argsort(my_predictions, axis=0, kind ='mergesort')[::-1]
my_predictions = my_predictions[ix]
my_predictions = my_predictions.flatten()

print('Top recommendations for you:')
for i in range(10):
    j = ix[i,0] 
    print('Predicting rating %1.1f for movie %s' %(my_predictions[i],\
                        movielist[j]))
        
print('Original ratings provided:')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print('Rated {} for {}'.format(my_ratings[i], movielist[i]))
        

 

New Comment
1 2 3 4 5 6 7 8 9 10 Next »