From b645338e430e13872c14708732adfc7c81c9afe9 Mon Sep 17 00:00:00 2001 From: Israel Barreto Sant'Anna <ibsa14@inf.ufpr.br> Date: Thu, 1 Jun 2017 11:53:43 -0300 Subject: [PATCH] Implemented merged search with elasticsearch --- Gemfile | 2 +- lib/tasks/tag_clustering.rake | 112 ++++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 26 deletions(-) diff --git a/Gemfile b/Gemfile index cfcb968b..e5e60aa5 100644 --- a/Gemfile +++ b/Gemfile @@ -134,7 +134,7 @@ gem 'gitlab' gem 'pundit' # elasticsearch integration -gem 'searchkick', '~> 1.3.6' +gem 'searchkick' # enable/disable features by enviroments gem 'feature' diff --git a/lib/tasks/tag_clustering.rake b/lib/tasks/tag_clustering.rake index 94a14841..04a5ac57 100644 --- a/lib/tasks/tag_clustering.rake +++ b/lib/tasks/tag_clustering.rake @@ -8,13 +8,13 @@ namespace :tag do # edges_total = 0 $graphPath = Rails.root.join(outDIR, fileName + ".net") - hash = create_hash() - - create_pajek_net_graph(hash, $graphPath) - - puts "\nCalling infomap with the pajek graph\n\n" - - system("infomap --ftree #{$graphPath} #{Rails.root.join(outDIR)}") + # hash = create_hash() + # + # create_pajek_net_graph(hash, $graphPath) + # + # puts "\nCalling infomap with the pajek graph\n\n" + # + # system("infomap --ftree #{$graphPath} #{Rails.root.join(outDIR)}") # create cluster tree from ftree @@ -63,29 +63,91 @@ namespace :tag do # number of results to return $N_LOS = 50 - # searched tag id - $TAG = Tag.find_by_name("Molécula") + # searched tag + $TAG = Tag.find_by_name("DNA") puts "\nStarted Calculating relevant results for tag #{$TAG.name}\n" relevant_los = find_relevant_results($TAG.id, tags) - relevant_los.each do |type| - puts "\n============ Learning Objects ===============\n" - puts "ID: Ranking | Name | Tags\n\n" - type.each do |id, rank| - lo = LearningObject.find(id) - puts "-----" - p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name) - lo.tags.each {|t| print t.name+" | "} - puts "" - end + # relevant_los.each do |type| + # puts "\n============ Learning Objects ===============\n" + # puts "ID: Ranking | Name | Tags\n\n" + # type.each do |id, rank| + # lo = LearningObject.find(id) + # puts "-----" + # p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name) + # lo.tags.each {|t| print t.name+" | "} + # puts "" + # end + # end + + search_los = LearningObject.search $TAG.name, limit: $N_LOS + puts "\n============ Learning Objects Searched ===============\n" + search_los.each do |lo| + puts "#{lo.id}: #{lo.search_hit['_score']}" + end + puts "ID: Ranking | Name | Tags\n\n" + search_los.each do |lo| + puts "-----" + p lo.id.to_s+": "+lo.search_hit["_score"].to_s+" | "+lo.try(:name) + lo.tags.each {|t| print t.name+" | "} + puts "" + end + + + merged_los = merge(search_los, relevant_los[0]) + + puts "\n============ Learning Objects Merged ===============\n" + merged_los.each do |id, rank| + puts "#{id}: #{rank}" + end + puts "ID: Ranking | Name | Tags\n\n" + merged_los.each do |id, rank| + lo = LearningObject.find(id) + puts "-----" + p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name) + lo.tags.each {|t| print t.name+" | "} + puts "" end end # task private + def merge(search_los, relevant_los) + merged_los = [] + + max = search_los.first.search_hit['_score'] + # min = search_los[search_los.size-1].search_hit['_score'] + min = 0 + + search_los.each do |slo| + slo.search_hit['_score'] = (slo.search_hit['_score']-min)/(max-min) + end + + max = relevant_los.first[1] + # min = relevant_los.last[1] + + relevant_los.each do |rlo| + rlo[1] = (rlo[1]-min)/(max-min) + end + + search_los.each do |slo| + relevant_los.each_with_index do |rlo, index| + if slo.id == rlo[0] + slo.search_hit['_score'] = rlo[1]*(1+slo.search_hit['_score']) + relevant_los.delete_at(index) + end + end + merged_los << [slo.id, slo.search_hit['_score']] + end + + merged_los.push(*relevant_los) + merged_los = merged_los.sort_by { |lo| lo[1] } + return merged_los.reverse.first(50) + end + # hash[id1][id2] will equal how many times tags with id1 and id2 appear together on a LO def create_hash() puts "\nCreating hash of tag concurrences #{path}\n" @@ -198,7 +260,7 @@ namespace :tag do puts "\nStarted Ranking LOs...\n" puts "\n====== Close Tags =========\n" - puts "Name | ID | Normalized Ranking\n" + puts "Name | ID | Normalized Ranking\n\n" close_tags.each do |ct| tag = Tag.find(ct[:id]) p tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s @@ -227,12 +289,12 @@ namespace :tag do los_ranked[0] = los_ranked[0].sort_by { |id, rank| rank } los_ranked[1] = los_ranked[1].sort_by { |id, rank| rank } # get highest ranks - los_ranked[0] = los_ranked[0].last($N_LOS).reverse - los_ranked[1] = los_ranked[1].last($N_LOS).reverse + los_ranked[0] = los_ranked[0].reverse.first($N_LOS) + los_ranked[1] = los_ranked[1].reverse.first($N_LOS) - # los_ranked.each do |key, value| - # puts "#{key}: #{value}" - # end + los_ranked[0].each do |key, value| + puts "#{key}: #{value}" + end return los_ranked end -- GitLab