Merge branch 'tag-frequency-task' of gitlab.c3sl.ufpr.br:portalmec/portalmec...

Merge branch 'tag-frequency-task' of gitlab.c3sl.ufpr.br:portalmec/portalmec into tag-frequency-task

Merge branch 'tag-frequency-task' of gitlab.c3sl.ufpr.br:portalmec/portalmec...
2e4b88fe · bfs15 · 6590821a · e4603611 · 2e4b88fe · 2e4b88fe
Commit 2e4b88fe authored 7 years ago by bfs15
--- a/app/controllers/concerns/paginator.rb
+++ b/app/controllers/concerns/paginator.rb
@@ -29,13 +29,15 @@ module Paginator
  private

  def limit
-    return 12 if params[:limit].blank?
-    params[:limit].to_i
+    return params[:limit].to_i if !params[:limit].blank?
+    return params[:results_per_page].to_i if !params[:results_per_page].blank?
+    12
  end

  def offset
-    return 0 if params[:offset].blank?
-    params[:offset].to_i
+    return params[:offset].to_i if !params[:offset].blank?
+    return params[:page].to_i*params[:results_per_page].to_i if !params[:page].blank? && !params[:results_per_page].blank?
+    return 0
  end

  def total_count(model)

--- a/app/controllers/v1/search_controller.rb
+++ b/app/controllers/v1/search_controller.rb
@@ -18,7 +18,7 @@
 # along with portalmec.  If not, see <http://www.gnu.org/licenses/>.

 class V1::SearchController < ApplicationController
-  before_action :set_search
+  before_action :set_search, except: [:tag]

  # GET v1/search
  # GET v1/search.json
@@ -38,6 +38,13 @@ class V1::SearchController < ApplicationController
    render json: @search.errors, status: :bad_request
  end

+  # GET v1/search/tag
+  # GET v1/search/tag.json
+  def tag
+    results = TagSearchService.search(Tag.find(tag_search_params[:tag]))
+    render json: paginate results
+  end
+
  private

  def set_search
@@ -48,4 +55,8 @@ class V1::SearchController < ApplicationController
  def search_params
    params.permit(:page, :results_per_page, :order, :query, :search_class, tags: [], subjects: [], educational_stages: [], object_types: [])
  end
+
+  def tag_search_params
+    params.permit(:page, :results_per_page, :tag)
+  end
 end
--- a/app/services/tag_search_service.rb
+++ b/app/services/tag_search_service.rb
+
+# Copyright (C) 2015 Centro de Computacao Cientifica e Software Livre
+# Departamento de Informatica - Universidade Federal do Parana
+#
+# This file is part of portalmec.
+#
+# portalmec is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# portalmec is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with portalmec.  If not, see <http://www.gnu.org/licenses/>.
+
+module TagSearchService
+  CACHE_KEY = "tag_clusters".freeze
+  PAD = 1
+  PADM = 0.05
+
+  def root_dir
+    Rails.root.join("tmp").to_s
+  end
+
+  def file_path
+    Rails.root.join(root_dir, "tags").to_s
+  end
+
+  def tags_cluster
+    Rails.cache.fetch(CACHE_KEY) do
+      parseTree(file_path+".tree")
+    end
+  end
+
+  def search(tag, limit = -1)
+    # create cluster tree from ftree
+    tags = tags_cluster()
+
+    # puts "\nStarted Calculating relevant results for tag #{tag.name}\n"
+
+    relevant_los = find_relevant_results(tag.id, tags, limit)
+
+    # puts "\n============ Learning Objects - Cluster Search ===============\n"
+    # puts "ID: Ranking | Name | Tags\n\n"
+    # relevant_los.each do |id, rank|
+    #   lo = LearningObject.find(id)
+    #   # puts "-----"
+    #   p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name)
+    #   lo.tags.each {|t| print t.name+" | "}
+    #   # puts ""
+    # end
+
+    search_los = LearningObject.search tag.name, limit: limit, explain:true
+
+    # puts "\n============ Learning Objects - Elasticsearch ===============\n"
+    # search_los.each do |lo|
+    #   # puts "#{lo.id}: #{lo.search_hit['_score']}"
+    # end
+    # puts "ID: Ranking | Name | Tags\n\n"
+    # search_los.each do |lo|
+    #   # puts "-----"
+    #   p lo.id.to_s+": "+lo.search_hit["_score"].to_s+" | "+lo.try(:name)
+    #   lo.tags.each {|t| print t.name+" | "}
+    #   # puts ""
+    # end
+
+    merged_los = merge(search_los, relevant_los)
+
+    # puts "\n============ Learning Objects - Merged ===============\n"
+    # merged_los.each do |id, rank|
+    #   # puts "#{id}: #{rank}"
+    # end
+    # # puts "ID: Ranking | Name | Tags\n\n"
+    # merged_los.each do |id, rank|
+    #   lo = LearningObject.find(id)
+    #   # puts "-----"
+    #   p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name)
+    #   lo.tags.each {|t| print t.name+" | "}
+    #   # puts ""
+    # end
+    merged_los
+  end
+
+  def merge(search_los, relevant_los)
+    # puts "\n---------------------- MERGING -----------------------"
+    merged_los = []
+
+    max = search_los.first.search_hit['_score']
+    min = search_los[search_los.size-1].search_hit['_score']
+    # min = 0
+    max_boost = 0
+    response = search_los.response['hits']['hits']
+    search_los.each_with_index do |slo, i|
+      detail = response[i]['_explanation']['details'][0]
+      while detail['description'] != "boost"
+        detail = detail['details'][0]
+      end
+      boost = detail['value']
+      max_boost = boost if boost > max_boost
+
+      slo.search_hit['_score'] = boost*(slo.search_hit['_score']*(1+PADM)-min)/(max-min)
+    end
+
+    max = relevant_los.first[1]
+    min = relevant_los.last[1]
+    # puts "\nMax boost found: "+max_boost.to_s
+    relevant_los.each do |rlo|
+      rlo[1] = max_boost*(rlo[1]*(1+PADM)-min)/(max-min)
+    end
+
+    search_los.each do |slo|
+      relevant_los.each_with_index do |rlo, index|
+        if slo.id == rlo[0]
+          slo.search_hit['_score'] = slo.search_hit['_score'] + rlo[1]
+          relevant_los.delete_at(index)
+        end
+      end
+      merged_los << [slo.id, slo.search_hit['_score']]
+    end
+
+    merged_los.push(*relevant_los)
+    merged_los = merged_los.sort_by { |lo| lo[1]*-1 }
+    return merged_los.first(limit) if limit != -1
+
+    merged_los
+  end
+
+  def find_relevant_results(tagId, tags, limit)
+    los_ranked = {}
+
+    # puts "\nGetting tags from the same cluster\n"
+    # puts "Normalization with padding = #{PAD}\n"
+    close_tags = ranked_close_tags(tagId, tags)
+
+    # Uncomment the next line if you want to sort by global tag frequency
+    # freq_cluster = cluster_frequency(close_tags)
+
+    # puts "\nStarted Ranking LOs...\n"
+
+    # puts "\n====== Close Tags =========\n"
+    # puts "Name | ID | Normalized Ranking\n\n"
+    close_tags.each do |ct|
+      tag = Tag.find(ct[:id])
+      # p tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s
+      tag.taggings.where(taggable_type: "LearningObject").each do |tagging|
+        lo = tagging.taggable
+        if los_ranked[lo.id].nil?
+          # Uncomment the next line if you want to sort by local tag frequency
+          # los_ranked[lo.id] = relevance_frequency_rank(lo, close_tags)
+
+          # Uncomment the next line if you want to sort by global tag frequency
+          # los_ranked[lo.id] = relevance_frequency_rank_global(lo, close_tags, freq_cluster)
+
+          # Uncomment the next line if you want to sort by tag cluster rank
+          los_ranked[lo.id] = relevance_raw_rank(lo, close_tags)
+        end
+      end
+    end
+    # puts "============\n"
+
+    # puts "\nSorting LOs...\n"
+    # sorts by its ranking
+    los_ranked = los_ranked.sort_by { |id, rank| rank*-1 }
+    # get highest ranks
+    return los_ranked.first(limit) if limit != -1
+
+    # los_ranked.each do |key, value|
+    #   # puts "#{key}: #{value}"
+    # end
+
+    los_ranked
+  end
+
+  # ranking #
+  def ranked_close_tags(tagId, tags)
+    close_tags = []
+
+    tags[tagId][:parent][:childs].each do |t1|
+      # calculate logarithmic distance between tag flows
+      # lower value, closer, more relevant
+      # the tag you are searching for will be at distance 0 of itself
+      lg_dist = ( Math.log2(tags[tagId][:flow]) - Math.log2(t1[:flow]) ).abs
+
+      close_tags << { id: t1[:id], rank: lg_dist}
+    end
+
+    normalize_complement_close(close_tags)
+  end
+
+  # normalizes and complements, 0 distance will be 1,
+  # max dist will be closest to 0
+  def normalize_complement_close(tags)
+    max = 0
+
+    # find max rank
+    tags.each do |t|
+      max = t[:rank] if t[:rank] > max
+    end
+
+    # normalize, min will always be 0
+    tags.each do |t|
+      # increase max by $PAD so its rank isn't 0
+      t[:rank] = 1 - (t[:rank]/(max*(1 + PAD)))
+    end
+
+    tags
+  end
+
+  def relevance_frequency_rank(lo, close_tags)
+    itf_sum = 0
+
+    wdf = 0
+    wdf = 1/(Math.log2(lo.tags.size)+1) if lo.tags.size != 0
+
+    lo.tags.each do |t|
+      close_tags.each do |tag|
+        if tag[:id] == t.id
+          itf_sum += tag[:rank]*(Math.log2(Tag.all.size/t.taggings.size)+1)
+        end
+      end
+    end
+
+    wdf*itf_sum
+  end
+
+  # returns the sum of how many times each tag in cluster appears in space
+  def cluster_frequency(cluster)
+    freq_cluster = 0
+    cluster.each do |t|
+      freq_cluster += Tag.find(t[:id]).taggings.size
+    end
+
+    freq_cluster
+  end
+
+  def relevance_frequency_rank_global(lo, close_tags, freq_cluster)
+    # for each tag in LO that is in the cluster, accumulate it's rank
+    rank_sum = 1
+    lo.tags.each do |t|
+      close_tags.each do |tag|
+        if tag[:id] == t.id
+          rank_sum += tag[:rank]
+        end
+      end
+    end
+
+    wdf = 0
+    wdf = (Math.log2(rank_sum)/(Math.log2(lo.tags.size)+1)) if lo.tags.size != 0
+
+    itf = Math.log2(Tag.all.size/freq_cluster)+1
+
+    wdf*itf
+  end
+
+  # returns the rank sum of the tags in the LO
+  def relevance_raw_rank(lo, close_tags)
+    # for each tag in LO that is in the cluster, accumulate it's rank
+    rank_sum = 0
+    lo.tags.each do |t|
+      close_tags.each do |tag|
+        if tag[:id] == t.id
+          rank_sum += tag[:rank]
+        end
+      end
+    end
+
+    rank_sum
+  end
+
+  def parseTree(path)
+    # parse .ftree into a 'tree', leafs are tags with flow number, branches are the clustes
+    # create tags list, tags[tagId] == the tag leaf inside the cluster tree
+
+    # puts "\nParsing .ftree output into a 'tree'\n"
+
+    clusters = {childs: [], parent: nil}
+    tags = {}
+    countClusters = {}
+
+    File.open(path+".ftree", "r") do |f|
+      f.gets
+      f.gets
+
+      while line = f.gets
+        break if !line.include? ':'
+
+        tmp = line.split(' ')
+        # tmp[0] format like: 2:7:5:4, 2 an 7 have clusters as childs, 5 has leafs, 4 is one of them (tag)
+        # get id of each level of the tree
+        ftree = tmp[0].split(':')[0..-2]
+        # last number of the sequence is the leaf Id
+        leafId = tmp[0].split(':')[-1].to_i
+
+        # last number on the line is the tag Id
+        tagId = tmp[-1].to_i
+        # second number on the line
+        flow = tmp[1].to_f
+        # between the third and second to last is where the name lies
+        name = tmp[2..-2].join(' ')[1..-2]
+
+        # iterate through the levels of the tree
+        it = clusters # start at the root
+        ftree.each do |clusterId|
+          clusterId = clusterId.to_i - 1 # on the file they start at 1, we want 0
+          if it[:childs][clusterId].nil? # if this id doesn't exist, create it as child of 'it'
+            it[:childs][clusterId] = {childs: [], parent: nil}
+            it[:childs][clusterId][:parent] = it
+          end
+          # go down a level
+          it = it[:childs][clusterId]
+        end
+        countClusters[it] = 1 # set this cluster in this hash, for counting purposes
+        # 'it' is the cluster leafId is a child of, so add it
+        it[:childs][leafId-1] = {id: tagId, flow: flow, name: name, parent: it}
+        # put the leaf on this hash for easy acess by the tagId
+        tags[tagId] = it[:childs][leafId-1]
+      end
+    end
+    # puts "\nNumber of clusters found: #{countClusters.size}\n"
+
+    tags
+  end
+
+end
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -133,6 +133,7 @@ Rails.application.routes.draw do
    resources :search, only: :index do
      collection do
        get :autocomplete
+        get :tag
      end
    end