Skip to content
Snippets Groups Projects
Commit e841c770 authored by Israel Barreto Sant'Anna's avatar Israel Barreto Sant'Anna
Browse files

Implemented functions to get objects from tag cluster

parent 5343b99a
No related branches found
No related tags found
No related merge requests found
......@@ -7,63 +7,63 @@ namespace :tag do
hash = {}
edges_total = 0
graphPath = Rails.root.join(outDir, fileName + ".net")
def swap(a, b)
tmp = a
a = b
b = tmp
end
LearningObject.all.each do |lo|
# for each lo, count tags and tag pairs and add to hash
# if id1 <= id2
# hash[id1][id2] will equal how many times tags with id1 and id2 appear together on a LO
lo.tags.each.with_index do |t, i|
hash[t.id] = {} if hash[t.id].nil?
hash[t.id][t.id] = 0 if hash[t.id][t.id].nil?
hash[t.id][t.id] += 1
lo.tags.drop(i+1).each do |t2|
if t.id > t2.id
swap(t, t2)
hash[t.id] = {} if hash[t.id].nil?
end
if hash[t.id][t2.id].nil?
hash[t.id][t2.id] = 0
end
hash[t2.id] = {} if hash[t2.id].nil?
# hash[t2.id][t2.id] = 0 if hash[t2.id][t2.id].nil?
# hash[t2.id][t2.id] += 1
# hash[t.id][t.id] = 0 if hash[t.id][t.id].nil?
# hash[t.id][t.id] += 1
hash[t.id][t2.id] += 1
end
end
end
File.open(graphPath, "w+") do |f|
f << "*Vertices #{Tag.all.size}\n"
# tags = Tag.all.to_ary
tag_index = {}
Tag.all.each_with_index do |t,i|
f << "#{i+1} \"#{t.name}\"\n"
tag_index[t.id] = i+1
end
f << "*Edges #{edges_total}\n"
hash.each do |id1, ids2Hash|
ids2Hash.each do |id2, value|
if id1 != id2
f << "#{tag_index[id1]} #{tag_index[id2]} #{hash[id1][id2].to_f/(Math.sqrt(hash[id1][id1]*hash[id2][id2]))}\n"
end
end
end
end
system("infomap --ftree #{graphPath} #{Rails.root.join(outDIR)}")
graphPath = Rails.root.join(outDIR, fileName + ".net")
#
# def swap(a, b)
# tmp = a
# a = b
# b = tmp
# end
#
# LearningObject.all.each do |lo|
# # for each lo, count tags and tag pairs and add to hash
# # if id1 <= id2
# # hash[id1][id2] will equal how many times tags with id1 and id2 appear together on a LO
# lo.tags.each.with_index do |t, i|
# hash[t.id] = {} if hash[t.id].nil?
# hash[t.id][t.id] = 0 if hash[t.id][t.id].nil?
# hash[t.id][t.id] += 1
# lo.tags.drop(i+1).each do |t2|
# if t.id > t2.id
# swap(t, t2)
# hash[t.id] = {} if hash[t.id].nil?
# end
# if hash[t.id][t2.id].nil?
# hash[t.id][t2.id] = 0
# end
# hash[t2.id] = {} if hash[t2.id].nil?
#
# # hash[t2.id][t2.id] = 0 if hash[t2.id][t2.id].nil?
# # hash[t2.id][t2.id] += 1
# # hash[t.id][t.id] = 0 if hash[t.id][t.id].nil?
# # hash[t.id][t.id] += 1
#
# hash[t.id][t2.id] += 1
# end
# end
# end
#
# File.open(graphPath, "w+") do |f|
# f << "*Vertices #{Tag.all.size}\n"
# # tags = Tag.all.to_ary
# tag_index = {}
# Tag.all.each_with_index do |t,i|
# f << "#{i+1} \"#{t.name}\"\n"
# tag_index[t.id] = i+1
# end
#
# f << "*Edges #{edges_total}\n"
#
# hash.each do |id1, ids2Hash|
# ids2Hash.each do |id2, value|
# if id1 != id2
# f << "#{tag_index[id1]} #{tag_index[id2]} #{hash[id1][id2].to_f/(Math.sqrt(hash[id1][id1]*hash[id2][id2]))}\n"
# end
# end
# end
# end
#
# system("infomap --ftree #{graphPath} #{Rails.root.join(outDIR)}")
clusters = {childs: [], parent: nil}
tags = {}
......@@ -84,8 +84,7 @@ namespace :tag do
it = clusters
ftree.each do |clusterId|
# p it
clusterId = clusterId.to_i
clusterId = clusterId.to_i - 1
if it[:childs][clusterId].nil?
it[:childs][clusterId] = {childs: [], parent: nil}
it[:childs][clusterId][:parent] = it
......@@ -93,9 +92,127 @@ namespace :tag do
it = it[:childs][clusterId]
end
it[:childs][leafId] = {id: tagId, rank: rank, name: name, parent: it}
tags[tagId] = it
it[:childs][leafId-1] = {id: tagId, rank: rank, name: name, parent: it}
tags[tagId] = it[:childs][leafId-1]
end
end
def calculate_relevance(lo, close_tags)
rel = 0
lo.tags.each do |t|
close_tags.each do |cloT|
if cloT[:id] == t.id
rel += cloT[:rank]
end
end
end
return rel
end
def closest(tagId, tags)
clos = []
tags[tagId][:parent][:childs].each do |t1|
rank = (Math.log2(tags[tagId][:rank])-Math.log2(t1[:rank])).abs
if rank < 4
clos << { id: t1[:id], rank: rank}
end
end
normalize(clos)
end
def normalize(tags)
sum = 0
max = 0
tags.each do |t|
sum += t[:rank]
max = t[:rank] if t[:rank] > max
end
tags.each do |t|
t[:rank] = 1 - (t[:rank]/(max*1.05))
end
tags
end
def find_relevant_results(tagId, tags)
los = {}
close_tags = closest(tagId, tags)
p "==============="
p "close_tags"
close_tags.each {|ct| p Tag.find(ct[:id]).name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s}
p "==============="
freq = cluster_frequency(close_tags)
LearningObject.all.each do |lo|
los[lo.id] = calculate_relevance(lo, close_tags)
# los[lo.id] = frequency_rank_global(lo, close_tags, freq)
# los[lo.id] = frequency_rank(lo, close_tags)
end
los = los.sort_by { |id, rel| rel }
lol = los.last(25).reverse
lol.each do |key, value|
puts "#{key}: #{value}"
end
lol
# lol.map {|lo| lo[0]}
end
def frequency_rank(lo, close_tags)
itf_sum = 0
wdf = 0
# t_size = lo.tags.size == 1 ? 2 : lo.tags.size
# wdf = 1/(Math.log2(t_size)) if lo.tags.size != 0
wdf = 1/(Math.log2(lo.tags.size)+1) if lo.tags.size != 0
lo.tags.each do |t|
close_tags.each do |cloT|
if cloT[:id] == t.id
itf_sum += cloT[:rank]*(Math.log2(Tag.all.size/t.taggings.size)+1)
end
end
end
return wdf*itf_sum
end
def cluster_frequency(cluster)
freq_cluster = 0
cluster.each do |t|
freq_cluster += Tag.find(t[:id]).taggings.size
end
freq_cluster
end
def frequency_rank_global(lo, close_tags, freq_cluster)
freq = 0
# rel = 0
lo.tags.each do |t|
close_tags.each do |cloT|
if cloT[:id] == t.id
freq += 1
# rel += cloT[:rank]
end
end
end
if lo.tags.size != 0
wdf = (Math.log2(freq+1)/(Math.log2(lo.tags.size)+1))
else
wdf = 0
end
itf = Math.log2(Tag.all.size/freq_cluster)+1
return wdf*itf#*rel
end
lol = find_relevant_results(22794, tags)
lol.each do |id, rank|
lo = LearningObject.find(id)
puts "-----"
p lo.id.to_s+": "+rank.to_s+" | "+lo.name
lo.tags.each {|t| print t.name+" - "+tags[t.id][:rank].to_s+" | "}
puts ""
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment