diff --git a/app/controllers/concerns/paginator.rb b/app/controllers/concerns/paginator.rb index f855a13d2d4d6dcee9099538260257fb1b0dd0e9..187b2d04fe6c093ffc70ab0ff602fca4d54e47b0 100644 --- a/app/controllers/concerns/paginator.rb +++ b/app/controllers/concerns/paginator.rb @@ -29,13 +29,15 @@ module Paginator private def limit - return 12 if params[:limit].blank? - params[:limit].to_i + return params[:limit].to_i if !params[:limit].blank? + return params[:results_per_page].to_i if !params[:results_per_page].blank? + 12 end def offset - return 0 if params[:offset].blank? - params[:offset].to_i + return params[:offset].to_i if !params[:offset].blank? + return params[:page].to_i*params[:results_per_page].to_i if !params[:page].blank? && !params[:results_per_page].blank? + return 0 end def total_count(model) diff --git a/app/controllers/v1/search_controller.rb b/app/controllers/v1/search_controller.rb index adb52cd86a94961a117e6bc888f14a5c7b780fd9..7cacad4fb237608b0cebc1e0c5027a6d3cdc44a9 100644 --- a/app/controllers/v1/search_controller.rb +++ b/app/controllers/v1/search_controller.rb @@ -18,7 +18,7 @@ # along with portalmec. If not, see <http://www.gnu.org/licenses/>. class V1::SearchController < ApplicationController - before_action :set_search + before_action :set_search, except: [:tag] # GET v1/search # GET v1/search.json @@ -38,6 +38,13 @@ class V1::SearchController < ApplicationController render json: @search.errors, status: :bad_request end + # GET v1/search/tag + # GET v1/search/tag.json + def tag + results = TagSearchService.search(Tag.find(tag_search_params[:tag])) + render json: paginate results + end + private def set_search @@ -48,4 +55,8 @@ class V1::SearchController < ApplicationController def search_params params.permit(:page, :results_per_page, :order, :query, :search_class, tags: [], subjects: [], educational_stages: [], object_types: []) end + + def tag_search_params + params.permit(:page, :results_per_page, :tag) + end end diff --git a/app/services/tag_search_service.rb b/app/services/tag_search_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..1e67e2e502dbb09631111d431f168851057585da --- /dev/null +++ b/app/services/tag_search_service.rb @@ -0,0 +1,328 @@ + +# Copyright (C) 2015 Centro de Computacao Cientifica e Software Livre +# Departamento de Informatica - Universidade Federal do Parana +# +# This file is part of portalmec. +# +# portalmec is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# portalmec is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with portalmec. If not, see <http://www.gnu.org/licenses/>. + +module TagSearchService + CACHE_KEY = "tag_clusters".freeze + PAD = 1 + PADM = 0.05 + + def root_dir + Rails.root.join("tmp").to_s + end + + def file_path + Rails.root.join(root_dir, "tags").to_s + end + + def tags_cluster + Rails.cache.fetch(CACHE_KEY) do + parseTree(file_path+".tree") + end + end + + def search(tag, limit = -1) + # create cluster tree from ftree + tags = tags_cluster() + + # puts "\nStarted Calculating relevant results for tag #{tag.name}\n" + + relevant_los = find_relevant_results(tag.id, tags, limit) + + # puts "\n============ Learning Objects - Cluster Search ===============\n" + # puts "ID: Ranking | Name | Tags\n\n" + # relevant_los.each do |id, rank| + # lo = LearningObject.find(id) + # # puts "-----" + # p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name) + # lo.tags.each {|t| print t.name+" | "} + # # puts "" + # end + + search_los = LearningObject.search tag.name, limit: limit, explain:true + + # puts "\n============ Learning Objects - Elasticsearch ===============\n" + # search_los.each do |lo| + # # puts "#{lo.id}: #{lo.search_hit['_score']}" + # end + # puts "ID: Ranking | Name | Tags\n\n" + # search_los.each do |lo| + # # puts "-----" + # p lo.id.to_s+": "+lo.search_hit["_score"].to_s+" | "+lo.try(:name) + # lo.tags.each {|t| print t.name+" | "} + # # puts "" + # end + + merged_los = merge(search_los, relevant_los) + + # puts "\n============ Learning Objects - Merged ===============\n" + # merged_los.each do |id, rank| + # # puts "#{id}: #{rank}" + # end + # # puts "ID: Ranking | Name | Tags\n\n" + # merged_los.each do |id, rank| + # lo = LearningObject.find(id) + # # puts "-----" + # p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name) + # lo.tags.each {|t| print t.name+" | "} + # # puts "" + # end + merged_los + end + + def merge(search_los, relevant_los) + # puts "\n---------------------- MERGING -----------------------" + merged_los = [] + + max = search_los.first.search_hit['_score'] + min = search_los[search_los.size-1].search_hit['_score'] + # min = 0 + max_boost = 0 + response = search_los.response['hits']['hits'] + search_los.each_with_index do |slo, i| + detail = response[i]['_explanation']['details'][0] + while detail['description'] != "boost" + detail = detail['details'][0] + end + boost = detail['value'] + max_boost = boost if boost > max_boost + + slo.search_hit['_score'] = boost*(slo.search_hit['_score']*(1+PADM)-min)/(max-min) + end + + max = relevant_los.first[1] + min = relevant_los.last[1] + # puts "\nMax boost found: "+max_boost.to_s + relevant_los.each do |rlo| + rlo[1] = max_boost*(rlo[1]*(1+PADM)-min)/(max-min) + end + + search_los.each do |slo| + relevant_los.each_with_index do |rlo, index| + if slo.id == rlo[0] + slo.search_hit['_score'] = slo.search_hit['_score'] + rlo[1] + relevant_los.delete_at(index) + end + end + merged_los << [slo.id, slo.search_hit['_score']] + end + + merged_los.push(*relevant_los) + merged_los = merged_los.sort_by { |lo| lo[1]*-1 } + return merged_los.first(limit) if limit != -1 + + merged_los + end + + def find_relevant_results(tagId, tags, limit) + los_ranked = {} + + # puts "\nGetting tags from the same cluster\n" + # puts "Normalization with padding = #{PAD}\n" + close_tags = ranked_close_tags(tagId, tags) + + # Uncomment the next line if you want to sort by global tag frequency + # freq_cluster = cluster_frequency(close_tags) + + # puts "\nStarted Ranking LOs...\n" + + # puts "\n====== Close Tags =========\n" + # puts "Name | ID | Normalized Ranking\n\n" + close_tags.each do |ct| + tag = Tag.find(ct[:id]) + # p tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s + tag.taggings.where(taggable_type: "LearningObject").each do |tagging| + lo = tagging.taggable + if los_ranked[lo.id].nil? + # Uncomment the next line if you want to sort by local tag frequency + # los_ranked[lo.id] = relevance_frequency_rank(lo, close_tags) + + # Uncomment the next line if you want to sort by global tag frequency + # los_ranked[lo.id] = relevance_frequency_rank_global(lo, close_tags, freq_cluster) + + # Uncomment the next line if you want to sort by tag cluster rank + los_ranked[lo.id] = relevance_raw_rank(lo, close_tags) + end + end + end + # puts "============\n" + + # puts "\nSorting LOs...\n" + # sorts by its ranking + los_ranked = los_ranked.sort_by { |id, rank| rank*-1 } + # get highest ranks + return los_ranked.first(limit) if limit != -1 + + # los_ranked.each do |key, value| + # # puts "#{key}: #{value}" + # end + + los_ranked + end + + # ranking # + def ranked_close_tags(tagId, tags) + close_tags = [] + + tags[tagId][:parent][:childs].each do |t1| + # calculate logarithmic distance between tag flows + # lower value, closer, more relevant + # the tag you are searching for will be at distance 0 of itself + lg_dist = ( Math.log2(tags[tagId][:flow]) - Math.log2(t1[:flow]) ).abs + + close_tags << { id: t1[:id], rank: lg_dist} + end + + normalize_complement_close(close_tags) + end + + # normalizes and complements, 0 distance will be 1, + # max dist will be closest to 0 + def normalize_complement_close(tags) + max = 0 + + # find max rank + tags.each do |t| + max = t[:rank] if t[:rank] > max + end + + # normalize, min will always be 0 + tags.each do |t| + # increase max by $PAD so its rank isn't 0 + t[:rank] = 1 - (t[:rank]/(max*(1 + PAD))) + end + + tags + end + + def relevance_frequency_rank(lo, close_tags) + itf_sum = 0 + + wdf = 0 + wdf = 1/(Math.log2(lo.tags.size)+1) if lo.tags.size != 0 + + lo.tags.each do |t| + close_tags.each do |tag| + if tag[:id] == t.id + itf_sum += tag[:rank]*(Math.log2(Tag.all.size/t.taggings.size)+1) + end + end + end + + wdf*itf_sum + end + + # returns the sum of how many times each tag in cluster appears in space + def cluster_frequency(cluster) + freq_cluster = 0 + cluster.each do |t| + freq_cluster += Tag.find(t[:id]).taggings.size + end + + freq_cluster + end + + def relevance_frequency_rank_global(lo, close_tags, freq_cluster) + # for each tag in LO that is in the cluster, accumulate it's rank + rank_sum = 1 + lo.tags.each do |t| + close_tags.each do |tag| + if tag[:id] == t.id + rank_sum += tag[:rank] + end + end + end + + wdf = 0 + wdf = (Math.log2(rank_sum)/(Math.log2(lo.tags.size)+1)) if lo.tags.size != 0 + + itf = Math.log2(Tag.all.size/freq_cluster)+1 + + wdf*itf + end + + # returns the rank sum of the tags in the LO + def relevance_raw_rank(lo, close_tags) + # for each tag in LO that is in the cluster, accumulate it's rank + rank_sum = 0 + lo.tags.each do |t| + close_tags.each do |tag| + if tag[:id] == t.id + rank_sum += tag[:rank] + end + end + end + + rank_sum + end + + def parseTree(path) + # parse .ftree into a 'tree', leafs are tags with flow number, branches are the clustes + # create tags list, tags[tagId] == the tag leaf inside the cluster tree + + # puts "\nParsing .ftree output into a 'tree'\n" + + clusters = {childs: [], parent: nil} + tags = {} + countClusters = {} + + File.open(path+".ftree", "r") do |f| + f.gets + f.gets + + while line = f.gets + break if !line.include? ':' + + tmp = line.split(' ') + # tmp[0] format like: 2:7:5:4, 2 an 7 have clusters as childs, 5 has leafs, 4 is one of them (tag) + # get id of each level of the tree + ftree = tmp[0].split(':')[0..-2] + # last number of the sequence is the leaf Id + leafId = tmp[0].split(':')[-1].to_i + + # last number on the line is the tag Id + tagId = tmp[-1].to_i + # second number on the line + flow = tmp[1].to_f + # between the third and second to last is where the name lies + name = tmp[2..-2].join(' ')[1..-2] + + # iterate through the levels of the tree + it = clusters # start at the root + ftree.each do |clusterId| + clusterId = clusterId.to_i - 1 # on the file they start at 1, we want 0 + if it[:childs][clusterId].nil? # if this id doesn't exist, create it as child of 'it' + it[:childs][clusterId] = {childs: [], parent: nil} + it[:childs][clusterId][:parent] = it + end + # go down a level + it = it[:childs][clusterId] + end + countClusters[it] = 1 # set this cluster in this hash, for counting purposes + # 'it' is the cluster leafId is a child of, so add it + it[:childs][leafId-1] = {id: tagId, flow: flow, name: name, parent: it} + # put the leaf on this hash for easy acess by the tagId + tags[tagId] = it[:childs][leafId-1] + end + end + # puts "\nNumber of clusters found: #{countClusters.size}\n" + + tags + end + +end diff --git a/config/routes.rb b/config/routes.rb index 4be4bc61253723249c0416e1b6beb9ce464b1e81..e4af450d887d33df75c8570f23dc7e3a1652e229 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -133,6 +133,7 @@ Rails.application.routes.draw do resources :search, only: :index do collection do get :autocomplete + get :tag end end