From e841c770f1132da7116f741b0745c29f166a7311 Mon Sep 17 00:00:00 2001 From: Israel Barreto Sant'Anna <ibsa14@inf.ufpr.br> Date: Thu, 18 May 2017 10:11:08 -0300 Subject: [PATCH] Implemented functions to get objects from tag cluster --- lib/tasks/tag_clustering.rake | 239 +++++++++++++++++++++++++--------- 1 file changed, 178 insertions(+), 61 deletions(-) diff --git a/lib/tasks/tag_clustering.rake b/lib/tasks/tag_clustering.rake index e527c2d9..4828780a 100644 --- a/lib/tasks/tag_clustering.rake +++ b/lib/tasks/tag_clustering.rake @@ -7,63 +7,63 @@ namespace :tag do hash = {} edges_total = 0 - graphPath = Rails.root.join(outDir, fileName + ".net") - - def swap(a, b) - tmp = a - a = b - b = tmp - end - - LearningObject.all.each do |lo| - # for each lo, count tags and tag pairs and add to hash - # if id1 <= id2 - # hash[id1][id2] will equal how many times tags with id1 and id2 appear together on a LO - lo.tags.each.with_index do |t, i| - hash[t.id] = {} if hash[t.id].nil? - hash[t.id][t.id] = 0 if hash[t.id][t.id].nil? - hash[t.id][t.id] += 1 - lo.tags.drop(i+1).each do |t2| - if t.id > t2.id - swap(t, t2) - hash[t.id] = {} if hash[t.id].nil? - end - if hash[t.id][t2.id].nil? - hash[t.id][t2.id] = 0 - end - hash[t2.id] = {} if hash[t2.id].nil? - - # hash[t2.id][t2.id] = 0 if hash[t2.id][t2.id].nil? - # hash[t2.id][t2.id] += 1 - # hash[t.id][t.id] = 0 if hash[t.id][t.id].nil? - # hash[t.id][t.id] += 1 - - hash[t.id][t2.id] += 1 - end - end - end - - File.open(graphPath, "w+") do |f| - f << "*Vertices #{Tag.all.size}\n" - # tags = Tag.all.to_ary - tag_index = {} - Tag.all.each_with_index do |t,i| - f << "#{i+1} \"#{t.name}\"\n" - tag_index[t.id] = i+1 - end - - f << "*Edges #{edges_total}\n" - - hash.each do |id1, ids2Hash| - ids2Hash.each do |id2, value| - if id1 != id2 - f << "#{tag_index[id1]} #{tag_index[id2]} #{hash[id1][id2].to_f/(Math.sqrt(hash[id1][id1]*hash[id2][id2]))}\n" - end - end - end - end - - system("infomap --ftree #{graphPath} #{Rails.root.join(outDIR)}") + graphPath = Rails.root.join(outDIR, fileName + ".net") + # + # def swap(a, b) + # tmp = a + # a = b + # b = tmp + # end + # + # LearningObject.all.each do |lo| + # # for each lo, count tags and tag pairs and add to hash + # # if id1 <= id2 + # # hash[id1][id2] will equal how many times tags with id1 and id2 appear together on a LO + # lo.tags.each.with_index do |t, i| + # hash[t.id] = {} if hash[t.id].nil? + # hash[t.id][t.id] = 0 if hash[t.id][t.id].nil? + # hash[t.id][t.id] += 1 + # lo.tags.drop(i+1).each do |t2| + # if t.id > t2.id + # swap(t, t2) + # hash[t.id] = {} if hash[t.id].nil? + # end + # if hash[t.id][t2.id].nil? + # hash[t.id][t2.id] = 0 + # end + # hash[t2.id] = {} if hash[t2.id].nil? + # + # # hash[t2.id][t2.id] = 0 if hash[t2.id][t2.id].nil? + # # hash[t2.id][t2.id] += 1 + # # hash[t.id][t.id] = 0 if hash[t.id][t.id].nil? + # # hash[t.id][t.id] += 1 + # + # hash[t.id][t2.id] += 1 + # end + # end + # end + # + # File.open(graphPath, "w+") do |f| + # f << "*Vertices #{Tag.all.size}\n" + # # tags = Tag.all.to_ary + # tag_index = {} + # Tag.all.each_with_index do |t,i| + # f << "#{i+1} \"#{t.name}\"\n" + # tag_index[t.id] = i+1 + # end + # + # f << "*Edges #{edges_total}\n" + # + # hash.each do |id1, ids2Hash| + # ids2Hash.each do |id2, value| + # if id1 != id2 + # f << "#{tag_index[id1]} #{tag_index[id2]} #{hash[id1][id2].to_f/(Math.sqrt(hash[id1][id1]*hash[id2][id2]))}\n" + # end + # end + # end + # end + # + # system("infomap --ftree #{graphPath} #{Rails.root.join(outDIR)}") clusters = {childs: [], parent: nil} tags = {} @@ -84,8 +84,7 @@ namespace :tag do it = clusters ftree.each do |clusterId| - # p it - clusterId = clusterId.to_i + clusterId = clusterId.to_i - 1 if it[:childs][clusterId].nil? it[:childs][clusterId] = {childs: [], parent: nil} it[:childs][clusterId][:parent] = it @@ -93,9 +92,127 @@ namespace :tag do it = it[:childs][clusterId] end - it[:childs][leafId] = {id: tagId, rank: rank, name: name, parent: it} - tags[tagId] = it + it[:childs][leafId-1] = {id: tagId, rank: rank, name: name, parent: it} + tags[tagId] = it[:childs][leafId-1] end end + + def calculate_relevance(lo, close_tags) + rel = 0 + lo.tags.each do |t| + close_tags.each do |cloT| + if cloT[:id] == t.id + rel += cloT[:rank] + end + end + end + return rel + end + + def closest(tagId, tags) + clos = [] + tags[tagId][:parent][:childs].each do |t1| + rank = (Math.log2(tags[tagId][:rank])-Math.log2(t1[:rank])).abs + if rank < 4 + clos << { id: t1[:id], rank: rank} + end + end + normalize(clos) + end + + def normalize(tags) + sum = 0 + max = 0 + tags.each do |t| + sum += t[:rank] + max = t[:rank] if t[:rank] > max + end + tags.each do |t| + t[:rank] = 1 - (t[:rank]/(max*1.05)) + end + tags + end + + def find_relevant_results(tagId, tags) + los = {} + close_tags = closest(tagId, tags) + + p "===============" + p "close_tags" + close_tags.each {|ct| p Tag.find(ct[:id]).name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s} + p "===============" + + freq = cluster_frequency(close_tags) + LearningObject.all.each do |lo| + los[lo.id] = calculate_relevance(lo, close_tags) + # los[lo.id] = frequency_rank_global(lo, close_tags, freq) + # los[lo.id] = frequency_rank(lo, close_tags) + end + + los = los.sort_by { |id, rel| rel } + lol = los.last(25).reverse + lol.each do |key, value| + puts "#{key}: #{value}" + end + lol + # lol.map {|lo| lo[0]} + end + + def frequency_rank(lo, close_tags) + itf_sum = 0 + wdf = 0 + # t_size = lo.tags.size == 1 ? 2 : lo.tags.size + # wdf = 1/(Math.log2(t_size)) if lo.tags.size != 0 + wdf = 1/(Math.log2(lo.tags.size)+1) if lo.tags.size != 0 + + lo.tags.each do |t| + close_tags.each do |cloT| + if cloT[:id] == t.id + itf_sum += cloT[:rank]*(Math.log2(Tag.all.size/t.taggings.size)+1) + end + end + end + + return wdf*itf_sum + end + + def cluster_frequency(cluster) + freq_cluster = 0 + cluster.each do |t| + freq_cluster += Tag.find(t[:id]).taggings.size + end + freq_cluster + end + + def frequency_rank_global(lo, close_tags, freq_cluster) + freq = 0 + # rel = 0 + lo.tags.each do |t| + close_tags.each do |cloT| + if cloT[:id] == t.id + freq += 1 + # rel += cloT[:rank] + end + end + end + + if lo.tags.size != 0 + wdf = (Math.log2(freq+1)/(Math.log2(lo.tags.size)+1)) + else + wdf = 0 + end + itf = Math.log2(Tag.all.size/freq_cluster)+1 + return wdf*itf#*rel + end + + lol = find_relevant_results(22794, tags) + + lol.each do |id, rank| + lo = LearningObject.find(id) + puts "-----" + p lo.id.to_s+": "+rank.to_s+" | "+lo.name + lo.tags.each {|t| print t.name+" - "+tags[t.id][:rank].to_s+" | "} + puts "" + end end end -- GitLab