Forked from
PortalMEC / portalmec
1868 commits behind the upstream repository.
-
Giovanne Marcelo authoredGiovanne Marcelo authored
import.rake 4.14 KiB
require 'rainbow'
namespace :import do
desc "Portal MEC data importer"
task :topics, [:log] => [:environment] do |t, args|
desc "Create Topics relations based on its metadata"
include Log::Logging
args.with_defaults(:log => STDOUT)
Log::Logging.logger = Log::DatabaseLogger.new(args.log)
logger.level = Log::DatabaseLogger::CREATE
# Quantity of items fetched on each iteration
limit = 500
# Start point from where items will be fetched
offset = 0
loop do
logger.info " --> Creating Topics relations from #{offset} to #{offset+limit}"
begin
# Get learning_objects (from offset to offset+limit)
learning_objects = LearningObject.limit(limit).offset(offset)
rescue => e
logger.warn "Database error, going to sleep"
logger.error e
# Sleeps for a while to wait database's recovery
sleep(10.seconds)
# Goes to next iteration to retry
next
else
# Terminate loop if there are no more learning_objects to import
break if learning_objects.empty?
importer = TopicsImporter.new (learning_objects)
importer.import
end
offset += limit
end
logger.close
end
task :institution, [:log] => [:environment] do |t, args|
desc "Create Publisher relations based on its metadata"
include Log::Logging
args.with_defaults(:log => STDOUT)
Log::Logging.logger = Log::DatabaseLogger.new(args.log)
logger.level = Log::DatabaseLogger::CREATE
# Quantity of items fetched on each iteration
limit = 500
# Start point from where items will be fetched
offset = 0
importer = InstitutionImporter.new
loop do
logger.info " --> Creating Publisher relations from #{offset} to #{offset+limit}"
begin
# Get learning_objects (from offset to offset+limit)
learning_objects = LearningObject.limit(limit).offset(offset)
rescue => e
logger.warn "Database error, going to sleep"
logger.error e
# Sleeps for a while to wait database's recovery
sleep(10.seconds)
# Goes to next iteration to retry
next
else
# Terminate loop if there are no more learning_objects to import
break if learning_objects.empty?
importer.items = learning_objects
importer.import
end
offset += limit
end
logger.close
end
task :learning_objects, [:log] => [:environment] do |t, args|
desc "Importing Learning Objects from Dspace items"
args.with_defaults(:log => STDOUT)
logger = Log::DatabaseLogger.new(args.log)
logger.level = Log::DatabaseLogger::CREATE
# Quantity of items fetched on each iteration
limit = 500
# Start point from where items will be fetched
offset = 0
loop do
logger.info " --> importing items from #{offset} to #{offset+limit}"
begin
# Get items from dspace (from offset to offset+limit)
dspace_items = get_items_from_dspace(limit, offset, ['metadata', 'bitstreams'])
rescue => e
logger.warn "Database error, going to sleep"
logger.error e
# Sleeps for a while to wait database's recovery
sleep(10.seconds)
# Goes to next iteration to retry
next
else
# Terminate loop if there are no more items to import
break if dspace_items.empty?
# continue if receive an error string
if dspace_items.is_a? String
logger.warn "Received a string instead of items: #{dspace_items}"
next
end
# Increment offset, to get new items on next iteration
offset = offset + limit
# Iterates through items
# Verifies if item is already on repository
# Initializes new LearningObjects
# and saves them on LearningObjects's repository
importer = Dspace::LearningObjectImporter.new(dspace_items)
importer.import
end
end
logger.close
end
private
def get_items_from_dspace(limit, offset, expand=[])
client = DspaceService.create_client
client.items.all(
expand: expand.join(','),
limit: limit, offset: offset
)
end
end