From b645338e430e13872c14708732adfc7c81c9afe9 Mon Sep 17 00:00:00 2001
From: Israel Barreto Sant'Anna <ibsa14@inf.ufpr.br>
Date: Thu, 1 Jun 2017 11:53:43 -0300
Subject: [PATCH] Implemented merged search with elasticsearch

---
 Gemfile                       |   2 +-
 lib/tasks/tag_clustering.rake | 112 ++++++++++++++++++++++++++--------
 2 files changed, 88 insertions(+), 26 deletions(-)

diff --git a/Gemfile b/Gemfile
index cfcb968b..e5e60aa5 100644
--- a/Gemfile
+++ b/Gemfile
@@ -134,7 +134,7 @@ gem 'gitlab'
 gem 'pundit'
 
 # elasticsearch integration
-gem 'searchkick', '~> 1.3.6'
+gem 'searchkick'
 
 # enable/disable features by enviroments
 gem 'feature'
diff --git a/lib/tasks/tag_clustering.rake b/lib/tasks/tag_clustering.rake
index 94a14841..04a5ac57 100644
--- a/lib/tasks/tag_clustering.rake
+++ b/lib/tasks/tag_clustering.rake
@@ -8,13 +8,13 @@ namespace :tag do
     # edges_total = 0
     $graphPath = Rails.root.join(outDIR, fileName + ".net")
 
-    hash = create_hash()
-
-    create_pajek_net_graph(hash, $graphPath)
-
-    puts "\nCalling infomap with the pajek graph\n\n"
-
-    system("infomap --ftree #{$graphPath} #{Rails.root.join(outDIR)}")
+    # hash = create_hash()
+    #
+    # create_pajek_net_graph(hash, $graphPath)
+    #
+    # puts "\nCalling infomap with the pajek graph\n\n"
+    #
+    # system("infomap --ftree #{$graphPath} #{Rails.root.join(outDIR)}")
 
     # create cluster tree from ftree
 
@@ -63,29 +63,91 @@ namespace :tag do
     # number of results to return
     $N_LOS = 50
 
-    # searched tag id
-    $TAG = Tag.find_by_name("Molécula")
+    # searched tag
+    $TAG = Tag.find_by_name("DNA")
 
     puts "\nStarted Calculating relevant results for tag #{$TAG.name}\n"
 
     relevant_los = find_relevant_results($TAG.id, tags)
 
-    relevant_los.each do |type|
-      puts "\n============ Learning Objects ===============\n"
-      puts "ID: Ranking | Name | Tags\n\n"
-      type.each do |id, rank|
-        lo = LearningObject.find(id)
-        puts "-----"
-        p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name)
-        lo.tags.each {|t| print t.name+" | "}
-        puts ""
-      end
+    # relevant_los.each do |type|
+    #   puts "\n============ Learning Objects ===============\n"
+    #   puts "ID: Ranking | Name | Tags\n\n"
+    #   type.each do |id, rank|
+    #     lo = LearningObject.find(id)
+    #     puts "-----"
+    #     p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name)
+    #     lo.tags.each {|t| print t.name+" | "}
+    #     puts ""
+    #   end
+    # end
+
+    search_los = LearningObject.search $TAG.name, limit: $N_LOS
+    puts "\n============ Learning Objects Searched ===============\n"
+    search_los.each do |lo|
+      puts "#{lo.id}: #{lo.search_hit['_score']}"
+    end
+    puts "ID: Ranking | Name | Tags\n\n"
+    search_los.each do |lo|
+      puts "-----"
+      p lo.id.to_s+": "+lo.search_hit["_score"].to_s+" | "+lo.try(:name)
+      lo.tags.each {|t| print t.name+" | "}
+      puts ""
+    end
+
+
+    merged_los = merge(search_los, relevant_los[0])
+
+    puts "\n============ Learning Objects Merged ===============\n"
+    merged_los.each do |id, rank|
+      puts "#{id}: #{rank}"
+    end
+    puts "ID: Ranking | Name | Tags\n\n"
+    merged_los.each do |id, rank|
+      lo = LearningObject.find(id)
+      puts "-----"
+      p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name)
+      lo.tags.each {|t| print t.name+" | "}
+      puts ""
     end
 
   end # task
 
   private
 
+  def merge(search_los, relevant_los)
+    merged_los = []
+
+    max = search_los.first.search_hit['_score']
+    # min = search_los[search_los.size-1].search_hit['_score']
+    min = 0
+
+    search_los.each do |slo|
+      slo.search_hit['_score'] = (slo.search_hit['_score']-min)/(max-min)
+    end
+
+    max = relevant_los.first[1]
+    # min = relevant_los.last[1]
+
+    relevant_los.each do |rlo|
+      rlo[1] = (rlo[1]-min)/(max-min)
+    end
+
+    search_los.each do |slo|
+      relevant_los.each_with_index do |rlo, index|
+        if slo.id == rlo[0]
+          slo.search_hit['_score'] = rlo[1]*(1+slo.search_hit['_score'])
+          relevant_los.delete_at(index)
+        end
+      end
+      merged_los << [slo.id, slo.search_hit['_score']]
+    end
+
+    merged_los.push(*relevant_los)
+    merged_los = merged_los.sort_by { |lo| lo[1] }
+    return merged_los.reverse.first(50)
+  end
+
   # hash[id1][id2] will equal how many times tags with id1 and id2 appear together on a LO
   def create_hash()
     puts "\nCreating hash of tag concurrences #{path}\n"
@@ -198,7 +260,7 @@ namespace :tag do
     puts "\nStarted Ranking LOs...\n"
 
     puts "\n====== Close Tags =========\n"
-    puts "Name | ID | Normalized Ranking\n"
+    puts "Name | ID | Normalized Ranking\n\n"
     close_tags.each do |ct|
       tag = Tag.find(ct[:id])
       p tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s
@@ -227,12 +289,12 @@ namespace :tag do
     los_ranked[0] = los_ranked[0].sort_by { |id, rank| rank }
     los_ranked[1] = los_ranked[1].sort_by { |id, rank| rank }
     # get highest ranks
-    los_ranked[0] = los_ranked[0].last($N_LOS).reverse
-    los_ranked[1] = los_ranked[1].last($N_LOS).reverse
+    los_ranked[0] = los_ranked[0].reverse.first($N_LOS)
+    los_ranked[1] = los_ranked[1].reverse.first($N_LOS)
 
-    # los_ranked.each do |key, value|
-    #   puts "#{key}: #{value}"
-    # end
+    los_ranked[0].each do |key, value|
+      puts "#{key}: #{value}"
+    end
 
     return los_ranked
   end
-- 
GitLab