# This is a library for measuring distances
Pkg.add("Distances")
using Distances
# toy data
um = [4 0 0 5 1 0 0; # utility matrix
5 5 4 0 0 0 0;
0 0 0 2 4 5 0;
0 3 0 0 0 0 3]
ip = [1 1 0 0 0 0 0; # item profiles
1 1 0 0 0 0 0;
1 1 0 0 0 0 0;
0 0 1 0 0 1 0;
0 0 0 1 1 0 1;
0 0 0 1 1 0 1;
0 0 0 1 1 0 1]
up = zeros(4,7) # empty user profiles
# building user profile
for i=1:4 # num of users
for j=1:7 # num of feature
si = find(um[i,:] .!= 0) # s1 is set of voted items
up[i,j] = sum(ip[si,j])/length(si)
end
end
sim = zeros(4,7)
for i=1:4
for j=1:7
sim[i,j] = sqrt(sum((up[i,:] - ip[j,:]).^2)) # Euclidean
end
end
# measuring similarity between items and users
R = pairwise(Euclidean(),ip',up')'
R[um .!= 0] = Inf
R
#R[1,:]'
mov = readcsv("movies.csv");
# ============ constructing utility matrix ============
rating = readcsv("ratings.csv")
um = zeros(671,163949)
for i=2:size(rating,1)
um[rating[i,1],rating[i,2]] = rating[i,3]
end
# ============ end constructing utility ===============
# ============ constructing item profiles ===============
mov = readcsv("movies.csv")
genre = mov[2:end,3]
movid = mov[2:end,1]
# finding out number of features
s = Set()
for i=1:length(genre)
a = split(genre[i],'|')
s = union(s,a)
end
# constructing feature map
fm = Dict()
for i=1:length(s)
fm[s[i]] = i
end
movids = Vector()
genres = Vector()
for i=1:length(movid)
a = split(genre[i],'|')
for j=1:length(a)
push!(movids,movid[i])
push!(genres,fm[a[j]])
end
end
ip = sparse(movids, genres, ones(size(movids)))
ip = full(ip) # converting sparse matrix to full matrix
up = zeros(671,21) # user profiles 671 users and 21 features
for i=1:size(um,1) # num of users
for j=1:size(ip,2) # num of feature
si = find(um[i,:] .!= 0) # s1 is set of voted items
up[i,j] = sum(ip[si,j])/length(si)
end
end
R = pairwise(Euclidean(),ip',up')'
# what's the top movie to recommend ?
auser = 2 # active user
topScore = minimum(R[auser,:])
movie_id = find(R[auser,:].== topScore)
#println(movie_id)
# finding movies name
movie_idx = find(movid[:,1] .== movie_id[2])
println(movie_idx)
mov[movie_idx+1,2]