From 23dc88bab4209b8ae2d673f62c1111fc8bfca752 Mon Sep 17 00:00:00 2001 From: Bruno Nocera Zanette <bnzanette@inf.ufpr.br> Date: Sat, 10 Oct 2015 17:31:19 -0300 Subject: [PATCH] Add DuplicatedItems verification task --- lib/tasks/dbInfo.rake | 74 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/lib/tasks/dbInfo.rake b/lib/tasks/dbInfo.rake index 19e00c350..7eaaaee1e 100644 --- a/lib/tasks/dbInfo.rake +++ b/lib/tasks/dbInfo.rake @@ -43,7 +43,7 @@ namespace :dbinfo do end bitstreams_formats = bitstreams_formats_hash.sort_by {|key, value| value} - + puts "\n\n" puts "---------------------------------" puts "---- BITSTREAMS FORMATS LIST ----" @@ -60,4 +60,76 @@ namespace :dbinfo do end + task verify_duplicated_items: :environment do + desc "Verify duplicated items" + + include RepositoriesProxy + + # Quantity of LearningObjects fetched on each iteration + limit = 1000 + # Starting point from where LearningObjects will be fetched + offset = 0 + + lo_hash = Hash.new + + loop do + print "\r -> Analysing LearningObjects from #{offset} to #{offset+limit}" + + begin + # Get LearningObjects from OrientDB (from offset to offset+limit) + learning_objects = learning_object_repository.all_from_offset_to_limit(offset,limit) + rescue + # Sleeps for a while to wait database's recovery + sleep(30.seconds) + # Goes to next iteration to retry + next + else + # Terminate loop if there are no more LearningObjects + break if learning_objects.empty? + + learning_objects.each do |lo| + # Verify duplicity using LO's unique attributes + # item_key = encode_hash_from learning_object_unique_attributes(lo) + lo_key = encode_hash_from lo.name + if lo_hash[lo_key].nil? + lo_hash[lo_key] = Array.new + end + lo_hash[lo_key] << lo.id + end + + offset += limit + end + end + + duplicated_lo = lo_hash.select {|key,value| value.length > 1} + + puts "\n\n" + puts "---------------------------------" + puts "----- DUPLICATED ITEMS LIST -----" + puts "---------------------------------" + puts "TOTAL CASES: #{duplicated_lo.length}" + puts "---------------------------------" + duplicated_lo.each do |key, value| + puts "#{value.to_s}" + end + puts "---------------------------------" + + end + + private + + def learning_object_unique_attributes(lo) + unique_attributes = Array.new + unique_attributes << lo.name + unique_attributes << lo.author + unique_attributes << lo.description + unique_attributes << lo.type + unique_attributes << lo.metadata + unique_attributes.to_s + end + + def encode_hash_from(object) + Digest::SHA1.hexdigest object + end + end -- GitLab