Skip to content
Snippets Groups Projects
Commit 2e4b88fe authored by bfs15's avatar bfs15
Browse files

Merge branch 'tag-frequency-task' of gitlab.c3sl.ufpr.br:portalmec/portalmec...

Merge branch 'tag-frequency-task' of gitlab.c3sl.ufpr.br:portalmec/portalmec into tag-frequency-task
parents 6590821a e4603611
No related branches found
No related tags found
No related merge requests found
......@@ -29,13 +29,15 @@ module Paginator
private
def limit
return 12 if params[:limit].blank?
params[:limit].to_i
return params[:limit].to_i if !params[:limit].blank?
return params[:results_per_page].to_i if !params[:results_per_page].blank?
12
end
def offset
return 0 if params[:offset].blank?
params[:offset].to_i
return params[:offset].to_i if !params[:offset].blank?
return params[:page].to_i*params[:results_per_page].to_i if !params[:page].blank? && !params[:results_per_page].blank?
return 0
end
def total_count(model)
......
......@@ -18,7 +18,7 @@
# along with portalmec. If not, see <http://www.gnu.org/licenses/>.
class V1::SearchController < ApplicationController
before_action :set_search
before_action :set_search, except: [:tag]
# GET v1/search
# GET v1/search.json
......@@ -38,6 +38,13 @@ class V1::SearchController < ApplicationController
render json: @search.errors, status: :bad_request
end
# GET v1/search/tag
# GET v1/search/tag.json
def tag
results = TagSearchService.search(Tag.find(tag_search_params[:tag]))
render json: paginate results
end
private
def set_search
......@@ -48,4 +55,8 @@ class V1::SearchController < ApplicationController
def search_params
params.permit(:page, :results_per_page, :order, :query, :search_class, tags: [], subjects: [], educational_stages: [], object_types: [])
end
def tag_search_params
params.permit(:page, :results_per_page, :tag)
end
end
# Copyright (C) 2015 Centro de Computacao Cientifica e Software Livre
# Departamento de Informatica - Universidade Federal do Parana
#
# This file is part of portalmec.
#
# portalmec is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# portalmec is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with portalmec. If not, see <http://www.gnu.org/licenses/>.
module TagSearchService
CACHE_KEY = "tag_clusters".freeze
PAD = 1
PADM = 0.05
def root_dir
Rails.root.join("tmp").to_s
end
def file_path
Rails.root.join(root_dir, "tags").to_s
end
def tags_cluster
Rails.cache.fetch(CACHE_KEY) do
parseTree(file_path+".tree")
end
end
def search(tag, limit = -1)
# create cluster tree from ftree
tags = tags_cluster()
# puts "\nStarted Calculating relevant results for tag #{tag.name}\n"
relevant_los = find_relevant_results(tag.id, tags, limit)
# puts "\n============ Learning Objects - Cluster Search ===============\n"
# puts "ID: Ranking | Name | Tags\n\n"
# relevant_los.each do |id, rank|
# lo = LearningObject.find(id)
# # puts "-----"
# p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name)
# lo.tags.each {|t| print t.name+" | "}
# # puts ""
# end
search_los = LearningObject.search tag.name, limit: limit, explain:true
# puts "\n============ Learning Objects - Elasticsearch ===============\n"
# search_los.each do |lo|
# # puts "#{lo.id}: #{lo.search_hit['_score']}"
# end
# puts "ID: Ranking | Name | Tags\n\n"
# search_los.each do |lo|
# # puts "-----"
# p lo.id.to_s+": "+lo.search_hit["_score"].to_s+" | "+lo.try(:name)
# lo.tags.each {|t| print t.name+" | "}
# # puts ""
# end
merged_los = merge(search_los, relevant_los)
# puts "\n============ Learning Objects - Merged ===============\n"
# merged_los.each do |id, rank|
# # puts "#{id}: #{rank}"
# end
# # puts "ID: Ranking | Name | Tags\n\n"
# merged_los.each do |id, rank|
# lo = LearningObject.find(id)
# # puts "-----"
# p lo.id.to_s+": "+rank.to_s+" | "+lo.try(:name)
# lo.tags.each {|t| print t.name+" | "}
# # puts ""
# end
merged_los
end
def merge(search_los, relevant_los)
# puts "\n---------------------- MERGING -----------------------"
merged_los = []
max = search_los.first.search_hit['_score']
min = search_los[search_los.size-1].search_hit['_score']
# min = 0
max_boost = 0
response = search_los.response['hits']['hits']
search_los.each_with_index do |slo, i|
detail = response[i]['_explanation']['details'][0]
while detail['description'] != "boost"
detail = detail['details'][0]
end
boost = detail['value']
max_boost = boost if boost > max_boost
slo.search_hit['_score'] = boost*(slo.search_hit['_score']*(1+PADM)-min)/(max-min)
end
max = relevant_los.first[1]
min = relevant_los.last[1]
# puts "\nMax boost found: "+max_boost.to_s
relevant_los.each do |rlo|
rlo[1] = max_boost*(rlo[1]*(1+PADM)-min)/(max-min)
end
search_los.each do |slo|
relevant_los.each_with_index do |rlo, index|
if slo.id == rlo[0]
slo.search_hit['_score'] = slo.search_hit['_score'] + rlo[1]
relevant_los.delete_at(index)
end
end
merged_los << [slo.id, slo.search_hit['_score']]
end
merged_los.push(*relevant_los)
merged_los = merged_los.sort_by { |lo| lo[1]*-1 }
return merged_los.first(limit) if limit != -1
merged_los
end
def find_relevant_results(tagId, tags, limit)
los_ranked = {}
# puts "\nGetting tags from the same cluster\n"
# puts "Normalization with padding = #{PAD}\n"
close_tags = ranked_close_tags(tagId, tags)
# Uncomment the next line if you want to sort by global tag frequency
# freq_cluster = cluster_frequency(close_tags)
# puts "\nStarted Ranking LOs...\n"
# puts "\n====== Close Tags =========\n"
# puts "Name | ID | Normalized Ranking\n\n"
close_tags.each do |ct|
tag = Tag.find(ct[:id])
# p tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s
tag.taggings.where(taggable_type: "LearningObject").each do |tagging|
lo = tagging.taggable
if los_ranked[lo.id].nil?
# Uncomment the next line if you want to sort by local tag frequency
# los_ranked[lo.id] = relevance_frequency_rank(lo, close_tags)
# Uncomment the next line if you want to sort by global tag frequency
# los_ranked[lo.id] = relevance_frequency_rank_global(lo, close_tags, freq_cluster)
# Uncomment the next line if you want to sort by tag cluster rank
los_ranked[lo.id] = relevance_raw_rank(lo, close_tags)
end
end
end
# puts "============\n"
# puts "\nSorting LOs...\n"
# sorts by its ranking
los_ranked = los_ranked.sort_by { |id, rank| rank*-1 }
# get highest ranks
return los_ranked.first(limit) if limit != -1
# los_ranked.each do |key, value|
# # puts "#{key}: #{value}"
# end
los_ranked
end
# ranking #
def ranked_close_tags(tagId, tags)
close_tags = []
tags[tagId][:parent][:childs].each do |t1|
# calculate logarithmic distance between tag flows
# lower value, closer, more relevant
# the tag you are searching for will be at distance 0 of itself
lg_dist = ( Math.log2(tags[tagId][:flow]) - Math.log2(t1[:flow]) ).abs
close_tags << { id: t1[:id], rank: lg_dist}
end
normalize_complement_close(close_tags)
end
# normalizes and complements, 0 distance will be 1,
# max dist will be closest to 0
def normalize_complement_close(tags)
max = 0
# find max rank
tags.each do |t|
max = t[:rank] if t[:rank] > max
end
# normalize, min will always be 0
tags.each do |t|
# increase max by $PAD so its rank isn't 0
t[:rank] = 1 - (t[:rank]/(max*(1 + PAD)))
end
tags
end
def relevance_frequency_rank(lo, close_tags)
itf_sum = 0
wdf = 0
wdf = 1/(Math.log2(lo.tags.size)+1) if lo.tags.size != 0
lo.tags.each do |t|
close_tags.each do |tag|
if tag[:id] == t.id
itf_sum += tag[:rank]*(Math.log2(Tag.all.size/t.taggings.size)+1)
end
end
end
wdf*itf_sum
end
# returns the sum of how many times each tag in cluster appears in space
def cluster_frequency(cluster)
freq_cluster = 0
cluster.each do |t|
freq_cluster += Tag.find(t[:id]).taggings.size
end
freq_cluster
end
def relevance_frequency_rank_global(lo, close_tags, freq_cluster)
# for each tag in LO that is in the cluster, accumulate it's rank
rank_sum = 1
lo.tags.each do |t|
close_tags.each do |tag|
if tag[:id] == t.id
rank_sum += tag[:rank]
end
end
end
wdf = 0
wdf = (Math.log2(rank_sum)/(Math.log2(lo.tags.size)+1)) if lo.tags.size != 0
itf = Math.log2(Tag.all.size/freq_cluster)+1
wdf*itf
end
# returns the rank sum of the tags in the LO
def relevance_raw_rank(lo, close_tags)
# for each tag in LO that is in the cluster, accumulate it's rank
rank_sum = 0
lo.tags.each do |t|
close_tags.each do |tag|
if tag[:id] == t.id
rank_sum += tag[:rank]
end
end
end
rank_sum
end
def parseTree(path)
# parse .ftree into a 'tree', leafs are tags with flow number, branches are the clustes
# create tags list, tags[tagId] == the tag leaf inside the cluster tree
# puts "\nParsing .ftree output into a 'tree'\n"
clusters = {childs: [], parent: nil}
tags = {}
countClusters = {}
File.open(path+".ftree", "r") do |f|
f.gets
f.gets
while line = f.gets
break if !line.include? ':'
tmp = line.split(' ')
# tmp[0] format like: 2:7:5:4, 2 an 7 have clusters as childs, 5 has leafs, 4 is one of them (tag)
# get id of each level of the tree
ftree = tmp[0].split(':')[0..-2]
# last number of the sequence is the leaf Id
leafId = tmp[0].split(':')[-1].to_i
# last number on the line is the tag Id
tagId = tmp[-1].to_i
# second number on the line
flow = tmp[1].to_f
# between the third and second to last is where the name lies
name = tmp[2..-2].join(' ')[1..-2]
# iterate through the levels of the tree
it = clusters # start at the root
ftree.each do |clusterId|
clusterId = clusterId.to_i - 1 # on the file they start at 1, we want 0
if it[:childs][clusterId].nil? # if this id doesn't exist, create it as child of 'it'
it[:childs][clusterId] = {childs: [], parent: nil}
it[:childs][clusterId][:parent] = it
end
# go down a level
it = it[:childs][clusterId]
end
countClusters[it] = 1 # set this cluster in this hash, for counting purposes
# 'it' is the cluster leafId is a child of, so add it
it[:childs][leafId-1] = {id: tagId, flow: flow, name: name, parent: it}
# put the leaf on this hash for easy acess by the tagId
tags[tagId] = it[:childs][leafId-1]
end
end
# puts "\nNumber of clusters found: #{countClusters.size}\n"
tags
end
end
......@@ -133,6 +133,7 @@ Rails.application.routes.draw do
resources :search, only: :index do
collection do
get :autocomplete
get :tag
end
end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment