# This is a library for measuring distances
Pkg.add("Distances")
using Distances

# toy data

um = [4 0 0 5 1 0 0;      # utility matrix
      5 5 4 0 0 0 0; 
      0 0 0 2 4 5 0; 
      0 3 0 0 0 0 3]

ip = [1 1 0 0 0 0 0;      # item profiles
      1 1 0 0 0 0 0; 
      1 1 0 0 0 0 0; 
      0 0 1 0 0 1 0; 
      0 0 0 1 1 0 1; 
      0 0 0 1 1 0 1; 
      0 0 0 1 1 0 1]

up = zeros(4,7) # empty user profiles

# building user profile
for i=1:4 # num of users
    for j=1:7 # num of feature                
        si = find(um[i,:] .!= 0) # s1 is set of voted items
        up[i,j] = sum(ip[si,j])/length(si)
    end     
end                                

sim = zeros(4,7)             
for i=1:4                     
    for j=1:7
        sim[i,j] = sqrt(sum((up[i,:] - ip[j,:]).^2)) # Euclidean
    end
end

# measuring similarity between items and users
R = pairwise(Euclidean(),ip',up')'

R[um .!= 0] = Inf

R
#R[1,:]'

mov = readcsv("movies.csv");

# ============ constructing utility matrix ============
rating = readcsv("ratings.csv")
um = zeros(671,163949)
for i=2:size(rating,1) 
    um[rating[i,1],rating[i,2]] = rating[i,3]
end
# ============ end constructing utility ===============


# ============ constructing item profiles ===============
mov   = readcsv("movies.csv")
genre = mov[2:end,3]
movid = mov[2:end,1]

# finding out number of features
s = Set()
for i=1:length(genre)
    a = split(genre[i],'|')  
    s = union(s,a)    
end

# constructing feature map
fm = Dict()
for i=1:length(s)
    fm[s[i]] = i
end

movids = Vector()
genres = Vector()
for i=1:length(movid)
    a = split(genre[i],'|')     
    for j=1:length(a)
        push!(movids,movid[i])               
        push!(genres,fm[a[j]])
    end
end
ip = sparse(movids, genres, ones(size(movids)))

164979×21 SparseMatrixCSC{Float64,Int64} with 20337 stored entries:
  [1     ,      1]  =  1.0
  [2     ,      1]  =  1.0
  [8     ,      1]  =  1.0
  [10    ,      1]  =  1.0
  [13    ,      1]  =  1.0
  [15    ,      1]  =  1.0
  [29    ,      1]  =  1.0
  [44    ,      1]  =  1.0
  [53    ,      1]  =  1.0
  [60    ,      1]  =  1.0
  ⋮
  [132549,     21]  =  1.0
  [132952,     21]  =  1.0
  [134025,     21]  =  1.0
  [136592,     21]  =  1.0
  [140753,     21]  =  1.0
  [140763,     21]  =  1.0
  [141866,     21]  =  1.0
  [143410,     21]  =  1.0
  [149532,     21]  =  1.0
  [151307,     21]  =  1.0
  [160590,     21]  =  1.0

ip = full(ip)  # converting sparse matrix to full matrix

up = zeros(671,21)  # user profiles 671 users and 21 features

for i=1:size(um,1) # num of users
    for j=1:size(ip,2) # num of feature                
        si = find(um[i,:] .!= 0) # s1 is set of voted items
        up[i,j] = sum(ip[si,j])/length(si)
    end     
end

R = pairwise(Euclidean(),ip',up')'

# what's the top movie to recommend ?
auser = 2 # active user
topScore = minimum(R[auser,:])
movie_id = find(R[auser,:].== topScore)
#println(movie_id)

# finding movies name
movie_idx = find(movid[:,1] .== movie_id[2])
println(movie_idx)
mov[movie_idx+1,2]

[26]

1-element Array{Any,1}:
 "Othello (1995)"