diff --git a/lib/tasks/dbInfo.rake b/lib/tasks/dbInfo.rake index 19e00c350d61bce751d8c2c1c52519909dd6e3a6..7eaaaee1eb5f6fc5b8463ffeb5b678653aca786c 100644 --- a/lib/tasks/dbInfo.rake +++ b/lib/tasks/dbInfo.rake @@ -43,7 +43,7 @@ namespace :dbinfo do end bitstreams_formats = bitstreams_formats_hash.sort_by {|key, value| value} - + puts "\n\n" puts "---------------------------------" puts "---- BITSTREAMS FORMATS LIST ----" @@ -60,4 +60,76 @@ namespace :dbinfo do end + task verify_duplicated_items: :environment do + desc "Verify duplicated items" + + include RepositoriesProxy + + # Quantity of LearningObjects fetched on each iteration + limit = 1000 + # Starting point from where LearningObjects will be fetched + offset = 0 + + lo_hash = Hash.new + + loop do + print "\r -> Analysing LearningObjects from #{offset} to #{offset+limit}" + + begin + # Get LearningObjects from OrientDB (from offset to offset+limit) + learning_objects = learning_object_repository.all_from_offset_to_limit(offset,limit) + rescue + # Sleeps for a while to wait database's recovery + sleep(30.seconds) + # Goes to next iteration to retry + next + else + # Terminate loop if there are no more LearningObjects + break if learning_objects.empty? + + learning_objects.each do |lo| + # Verify duplicity using LO's unique attributes + # item_key = encode_hash_from learning_object_unique_attributes(lo) + lo_key = encode_hash_from lo.name + if lo_hash[lo_key].nil? + lo_hash[lo_key] = Array.new + end + lo_hash[lo_key] << lo.id + end + + offset += limit + end + end + + duplicated_lo = lo_hash.select {|key,value| value.length > 1} + + puts "\n\n" + puts "---------------------------------" + puts "----- DUPLICATED ITEMS LIST -----" + puts "---------------------------------" + puts "TOTAL CASES: #{duplicated_lo.length}" + puts "---------------------------------" + duplicated_lo.each do |key, value| + puts "#{value.to_s}" + end + puts "---------------------------------" + + end + + private + + def learning_object_unique_attributes(lo) + unique_attributes = Array.new + unique_attributes << lo.name + unique_attributes << lo.author + unique_attributes << lo.description + unique_attributes << lo.type + unique_attributes << lo.metadata + unique_attributes.to_s + end + + def encode_hash_from(object) + Digest::SHA1.hexdigest object + end + end