diff --git a/lib/tasks/tag_clustering.rake b/lib/tasks/tag_clustering.rake index 8a7e942f40dfa8051652f96eebecb19faf7a5387..94a14841a0724faa01b92f53ac22339f86eaa637 100644 --- a/lib/tasks/tag_clustering.rake +++ b/lib/tasks/tag_clustering.rake @@ -25,6 +25,7 @@ namespace :tag do clusters = {childs: [], parent: nil} tags = {} + countClusters = {} File.open(Rails.root.join(outDIR, fileName + ".ftree"), "r") do |f| f.gets @@ -49,28 +50,36 @@ namespace :tag do end it = it[:childs][clusterId] end - + countClusters[it] = 1 it[:childs][leafId-1] = {id: tagId, flow: flow, name: name, parent: it} tags[tagId] = it[:childs][leafId-1] end end - $PAD = 0.05 + puts "\nNumber of clusters found: #{countClusters.size}\n" - # max flow lg distance to consider tag close - $MAX_DIST = 4 + $PAD = 0.05 # number of results to return - $N_LOS = 25 + $N_LOS = 50 + + # searched tag id + $TAG = Tag.find_by_name("Molécula") + + puts "\nStarted Calculating relevant results for tag #{$TAG.name}\n" - relevant_los = find_relevant_results(22794, tags) + relevant_los = find_relevant_results($TAG.id, tags) - relevant_los.each do |id, rank| - lo = LearningObject.find(id) - puts "-----" - p lo.id.to_s+": "+rank.to_s+" | "+lo.name - lo.tags.each {|t| print t.name+" - "+tags[t.id][:rank].to_s+" | "} - puts "" + relevant_los.each do |type| + puts "\n============ Learning Objects ===============\n" + puts "ID: Ranking | Name | Tags\n\n" + type.each do |id, rank| + lo = LearningObject.find(id) + puts "-----" + p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name) + lo.tags.each {|t| print t.name+" | "} + puts "" + end end end # task @@ -154,9 +163,7 @@ namespace :tag do lg_dist = ( Math.log2(tags[tagId][:flow]) - Math.log2(t1[:flow]) ).abs # include only if close enough - if lg_dist < $MAX_DIST - close_tags << { id: t1[:id], rank: lg_dist} - end + close_tags << { id: t1[:id], rank: lg_dist} end return normalize_complement_close(close_tags) @@ -182,37 +189,50 @@ namespace :tag do end def find_relevant_results(tagId, tags) - puts "\nStarted Calculating relevant results for tag of Id #{tagId}\n" + los_ranked = [{},{}] - los_ranked = {} - - puts "\nGetting tags from the same cluster, only close ones, MAX_DIST = #{$MAX_DIST}\n" + puts "\nGetting tags from the same cluster\n" puts "Normalization with padding = #{$PAD}\n" close_tags = ranked_close_tags(tagId, tags) - puts "============\n" - p "close_tags" - close_tags.each {|ct| p Tag.find(ct[:id]).name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s} - puts "============\n" - puts "\nStarted Ranking LOs...\n" - # ranks each LO - # freq_cluster = cluster_frequency(close_tags) - LearningObject.all.each do |lo| - los_ranked[lo.id] = relevance_frequency_rank(lo, close_tags) - # los_ranked[lo.id] = relevance_frequency_rank_global(lo, close_tags, freq_cluster) - # los_ranked[lo.id] = relevance_raw_rank(lo, close_tags) + + puts "\n====== Close Tags =========\n" + puts "Name | ID | Normalized Ranking\n" + close_tags.each do |ct| + tag = Tag.find(ct[:id]) + p tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s + tag.taggings.where(taggable_type: "LearningObject").each do |tagging| + lo = tagging.taggable + if los_ranked[0][lo.id].nil? + los_ranked[0][lo.id] = relevance_frequency_rank(lo, close_tags) + # los_ranked[lo.id] = relevance_frequency_rank_global(lo, close_tags, freq_cluster) + los_ranked[1][lo.id] = relevance_raw_rank(lo, close_tags) + end + end end + puts "============\n" + + # # ranks each LO + # # freq_cluster = cluster_frequency(close_tags) + # close_tags + # LearningObject.all.each do |lo| + # los_ranked[0][lo.id] = relevance_frequency_rank(lo, close_tags) + # # los_ranked[lo.id] = relevance_frequency_rank_global(lo, close_tags, freq_cluster) + # los_ranked[1][lo.id] = relevance_raw_rank(lo, close_tags) + # end puts "\nSorting LOs...\n" # sorts by its ranking - los_ranked = los_ranked.sort_by { |id, rank| rank } + los_ranked[0] = los_ranked[0].sort_by { |id, rank| rank } + los_ranked[1] = los_ranked[1].sort_by { |id, rank| rank } # get highest ranks - los_ranked = los_ranked.last($N_LOS).reverse + los_ranked[0] = los_ranked[0].last($N_LOS).reverse + los_ranked[1] = los_ranked[1].last($N_LOS).reverse - los_ranked.each do |key, value| - puts "#{key}: #{value}" - end + # los_ranked.each do |key, value| + # puts "#{key}: #{value}" + # end return los_ranked end