In [ ]:
# This is a library for measuring distances
using Distances

In [ ]:
# toy data

um = [4 0 0 5 1 0 0;      # utility matrix
5 5 4 0 0 0 0;
0 0 0 2 4 5 0;
0 3 0 0 0 0 3]

ip = [1 1 0 0 0 0 0;      # item profiles
1 1 0 0 0 0 0;
1 1 0 0 0 0 0;
0 0 1 0 0 1 0;
0 0 0 1 1 0 1;
0 0 0 1 1 0 1;
0 0 0 1 1 0 1]

up = zeros(4,7) # empty user profiles

# building user profile
for i=1:4 # num of users
for j=1:7 # num of feature
si = find(um[i,:] .!= 0) # s1 is set of voted items
up[i,j] = sum(ip[si,j])/length(si)
end
end

sim = zeros(4,7)
for i=1:4
for j=1:7
sim[i,j] = sqrt(sum((up[i,:] - ip[j,:]).^2)) # Euclidean
end
end

# measuring similarity between items and users
R = pairwise(Euclidean(),ip',up')'

R[um .!= 0] = Inf

R
#R[1,:]'

In [32]:
mov = readcsv("movies.csv");

In [38]:
# ============ constructing utility matrix ============
um = zeros(671,163949)
for i=2:size(rating,1)
um[rating[i,1],rating[i,2]] = rating[i,3]
end
# ============ end constructing utility ===============

# ============ constructing item profiles ===============
genre = mov[2:end,3]
movid = mov[2:end,1]

# finding out number of features
s = Set()
for i=1:length(genre)
a = split(genre[i],'|')
s = union(s,a)
end

# constructing feature map
fm = Dict()
for i=1:length(s)
fm[s[i]] = i
end

movids = Vector()
genres = Vector()
for i=1:length(movid)
a = split(genre[i],'|')
for j=1:length(a)
push!(movids,movid[i])
push!(genres,fm[a[j]])
end
end
ip = sparse(movids, genres, ones(size(movids)))

Out[38]:
164979×21 SparseMatrixCSC{Float64,Int64} with 20337 stored entries:
[1     ,      1]  =  1.0
[2     ,      1]  =  1.0
[8     ,      1]  =  1.0
[10    ,      1]  =  1.0
[13    ,      1]  =  1.0
[15    ,      1]  =  1.0
[29    ,      1]  =  1.0
[44    ,      1]  =  1.0
[53    ,      1]  =  1.0
[60    ,      1]  =  1.0
⋮
[132549,     21]  =  1.0
[132952,     21]  =  1.0
[134025,     21]  =  1.0
[136592,     21]  =  1.0
[140753,     21]  =  1.0
[140763,     21]  =  1.0
[141866,     21]  =  1.0
[143410,     21]  =  1.0
[149532,     21]  =  1.0
[151307,     21]  =  1.0
[160590,     21]  =  1.0
In [39]:
ip = full(ip)  # converting sparse matrix to full matrix

up = zeros(671,21)  # user profiles 671 users and 21 features

for i=1:size(um,1) # num of users
for j=1:size(ip,2) # num of feature
si = find(um[i,:] .!= 0) # s1 is set of voted items
up[i,j] = sum(ip[si,j])/length(si)
end
end

In [49]:
R = pairwise(Euclidean(),ip',up')'

# what's the top movie to recommend ?
auser = 2 # active user
topScore = minimum(R[auser,:])
movie_id = find(R[auser,:].== topScore)
#println(movie_id)

# finding movies name
movie_idx = find(movid[:,1] .== movie_id[2])
println(movie_idx)
mov[movie_idx+1,2]

[26]

Out[49]:
1-element Array{Any,1}:
"Othello (1995)"