Skip to content
Snippets Groups Projects
Commit 4a76d518 authored by bfs15's avatar bfs15
Browse files

Added new way of finding LOs in the same cluster & changed names

parent 39702fa5
No related branches found
No related tags found
No related merge requests found
......@@ -19,7 +19,8 @@
module TagSearchService
# DEBUG = true
CACHE_KEY = "tag_clusters".freeze
FTREE_CACHE_KEY = "ftree".freeze
LO_CLUSTER_CACHE_KEY = "lo_cluster".freeze
PAD = 1
PADM = 0.05
......@@ -31,19 +32,32 @@ module TagSearchService
Rails.root.join(root_dir, "tags").to_s
end
def tags_cluster
Rails.cache.fetch(CACHE_KEY) do
parseFtree(file_path)
def parse_ftree_cache
Rails.cache.fetch(FTREE_CACHE_KEY) do
parse_ftree(file_path)
end
end
def lo_cluster_cache(tag_clusters)
Rails.cache.fetch(LO_CLUSTER_CACHE_KEY) do
lo_cluster(tag_clusters)
end
end
def search(tag, limit = -1)
# Create clusters from ftree
tags = tags_cluster()
ftree = parse_ftree_cache()
tag_cluster_hash = ftree[:tag_cluster_hash]
# NEW: Uncomment to test this, see if it takes too long for its benefits
# if it's good, put it in the tag_clustering.rake
# lo_cluster = lo_cluster_cache(ftree[:tag_clusters])
# puts "\nStarted Calculating relevant results for tag #{tag.name}\n" if DEBUG
relevant_los = find_relevant_results(tag.id, tags, limit)
relevant_los = find_relevant_results(tag.id, tag_cluster_hash, limit)
# relevant_los = find_relevant_results_v2(tag.id, tag_cluster_hash, lo_cluster, limit) ##
# NEW: Uncomment to test this, see if it takes too long for its benefits
# puts "\n============ Learning Objects - Cluster Search ===============\n"
# puts "ID: Ranking | Name | Tags\n\n"
......@@ -138,12 +152,12 @@ module TagSearchService
return merged_los
end
def find_relevant_results(tagId, tags, limit)
def find_relevant_results(tagId, tag_cluster_hash, limit)
los_ranked = {}
# puts "\nGetting tags from the same cluster\n" if DEBUG
# puts "Normalization with padding = #{PAD}\n" if DEBUG
close_tags = ranked_close_tags(tagId, tags)
close_tags = ranked_close_tags(tagId, tag_cluster_hash)
# Uncomment the next line if you want to sort by global tag frequency
# freq_cluster = cluster_frequency(close_tags)
......@@ -154,7 +168,7 @@ module TagSearchService
# puts "Name | ID | Normalized Ranking\n\n" if DEBUG
close_tags.each do |ct|
tag = Tag.find(ct[:id])
# puts tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tags[ct[:id]][:rank].to_s if DEBUG
# puts tag.name+" | "+ct[:id].to_s+" | "+ct[:rank].to_s+" | "+tag_cluster_hash[ct[:id]][:rank].to_s if DEBUG
tag.taggings.where(taggable_type: "LearningObject").each do |tagging|
lo = tagging.taggable
if los_ranked[lo].nil?
......@@ -179,15 +193,15 @@ module TagSearchService
end
# ranking #
def ranked_close_tags(tagId, tags)
def ranked_close_tags(tagId, tag_cluster_hash)
# puts "Rank close tags" if DEBUG
close_tags = []
tags[tagId][:parent][:childs].each do |t|
tag_cluster_hash[tagId][:parent][:childs].each do |t|
# calculate logarithmic distance between tag flows
# lower value, closer, more relevant
# the tag you are searching for will be at distance 0 of itself
lg_dist = ( Math.log2(tags[tagId][:flow]) - Math.log2(t[:flow]) ).abs
lg_dist = ( Math.log2(tag_cluster_hash[tagId][:flow]) - Math.log2(t[:flow]) ).abs
close_tags << { id: t[:id], rank: lg_dist}
# puts "Rank for tag_id=#{close_tags[close_tags.length - 1][:id]}: #{close_tags[close_tags.length - 1][:rank]}" if DEBUG
......@@ -281,15 +295,15 @@ module TagSearchService
return rank_sum
end
def parseFtree(path)
def parse_ftree(path)
# parse .ftree into a 'tree', leafs are tags with flow number, branches are the clustes
# create tags list, tags[tagId] == the tag leaf inside the cluster tree
# puts "\nParsing .ftree output into a 'tree'\n" if DEBUG
clusters = {childs: [], parent: nil}
tags = {}
countClusters = {}
root = {childs: [], parent: nil}
tag_cluster_hash = {}
tag_clusters = []
File.open(path+".ftree", "r") do |f|
f.gets
......@@ -313,7 +327,7 @@ module TagSearchService
name = tmp[2..-2].join(' ')[1..-2]
# iterate through the levels of the tree
it = clusters # start at the root
it = root # restart at the root
ftree.each do |clusterId|
clusterId = clusterId.to_i - 1 # on the file they start at 1, we want 0
if it[:childs][clusterId].nil? # if this id doesn't exist, create it as child of 'it'
......@@ -323,16 +337,83 @@ module TagSearchService
# go down a level
it = it[:childs][clusterId]
end
countClusters[it] = 1 # set this cluster in this hash, for counting purposes
tag_clusters << it
# 'it' is the cluster leafId is a child of, so add it
it[:childs][leafId-1] = {id: tagId, flow: flow, name: name, parent: it}
# put the leaf on this hash for easy acess by the tagId
tags[tagId] = it[:childs][leafId-1]
tag_cluster_hash[tagId] = it[:childs][leafId-1]
end
end
# puts "\nNumber of clusters found: #{countClusters.size}\n" if DEBUG
# puts "\nNumber of clusters found: #{tag_clusters.size}\n" if DEBUG
return tags
return {tag_cluster_hash: tag_cluster_hash, tag_clusters: tag_clusters}
end
# NEW: test this, see if it takes too long for its benefits
def find_relevant_results_v2(tagId, tag_cluster_hash, lo_cluster, limit)
los_ranked = {}
# puts "\nGetting tags from the same cluster\n" if DEBUG
# puts "Normalization with padding = #{PAD}\n" if DEBUG
close_tags = ranked_close_tags(tagId, tag_cluster_hash)
# Uncomment the next line if you want to sort by global tag frequency
# freq_cluster = cluster_frequency(close_tags)
# puts "\nStarted Ranking LOs...\n" if DEBUG
# puts "\n====== Close Tags =========\n" if DEBUG
# puts "Name | ID | Normalized Ranking\n\n" if DEBUG
# NEW: for each LO in the cluster of this tag
lo_cluster[tagId].each do |lo_id|
lo = LearningObject.find(lo_id)
# Uncomment the next line if you want to sort by local tag frequency
# los_ranked[lo.id] = relevance_frequency_rank(lo, close_tags)
# Uncomment the next line if you want to sort by global tag frequency
# los_ranked[lo.id] = relevance_frequency_rank_global(lo, close_tags, freq_cluster)
# Uncomment the next line if you want to sort by tag cluster rank
los_ranked[lo] = relevance_raw_rank(lo, close_tags)
# puts "Found lo of id=#{lo.id} with rank=#{los_ranked[lo.id]}" if DEBUG
end
# puts "Sorting LOs...\n" if DEBUG
# highest ranks first
los_ranked = los_ranked.sort_by { |lo, rank| rank*-1 }
return los_ranked.first(limit) if limit > 0
return los_ranked
end
# returns a hash keyed by tag id,
# contains an array with all learning object ids that the cluster contains
def lo_clusters(tag_clusters)
lo_clusters = {}
tag_clusters.each do |tag_cluster|
# for each cluster, find all LOs tagged by the tags in the cluster
# insert their ids in lo_cluster array
lo_cluster = []
aux = {}
# for each tag in the cluster
tag_cluster[:childs].each do |leaf|
tag_id = leaf[:id]
# save the reference in the tag_id hash
lo_clusters[tag_id] = lo_cluster
tag = Tag.find(tag_id)
# find LOs with this tag
tag.taggings.where(taggable_type: "LearningObject").each do |tagging|
lo = tagging.taggable
if aux[lo.id].nil?
aux[lo.id] = 1
lo_cluster << lo.id
end
end
end
end
return lo_clusters
end
end
......@@ -33,10 +33,8 @@ namespace :tag do
infomap_ftree(graph_path, TagSearchService.root_dir)
# Cluster needs to be read from disk again, so clear cache of TagSearchService
Rails.cache.delete(TagSearchService::CACHE_KEY)
Rails.cache.fetch(TagSearchService::CACHE_KEY) do
parseFtree(file_path)
end
Rails.cache.delete(TagSearchService::FTREE_CACHE_KEY)
TagSearchService::parse_ftree_cache()
end # task
private
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment