Skip to content
Snippets Groups Projects
Forked from PortalMEC / portalmec
1868 commits behind the upstream repository.
import.rake 4.14 KiB
require 'rainbow'

namespace :import do
  desc "Portal MEC data importer"

  task :topics, [:log] => [:environment] do |t, args|
    desc "Create Topics relations based on its metadata"
    include Log::Logging

    args.with_defaults(:log => STDOUT)

    Log::Logging.logger = Log::DatabaseLogger.new(args.log)
    logger.level = Log::DatabaseLogger::CREATE


    # Quantity of items fetched on each iteration
    limit = 500
    # Start point from where items will be fetched
    offset = 0

    loop do
      logger.info " --> Creating Topics relations from #{offset} to #{offset+limit}"

      begin
        # Get learning_objects (from offset to offset+limit)
        learning_objects = LearningObject.limit(limit).offset(offset)

      rescue => e
        logger.warn "Database error, going to sleep"
        logger.error e
        # Sleeps for a while to wait database's recovery
        sleep(10.seconds)
        # Goes to next iteration to retry
        next
      else
        # Terminate loop if there are no more learning_objects to import
        break if learning_objects.empty?

        importer = TopicsImporter.new (learning_objects)
        importer.import

      end
      offset += limit
    end
    logger.close
  end

  task :institution, [:log] => [:environment] do |t, args|
    desc "Create Publisher relations based on its metadata"
    include Log::Logging

    args.with_defaults(:log => STDOUT)

    Log::Logging.logger = Log::DatabaseLogger.new(args.log)
    logger.level = Log::DatabaseLogger::CREATE


    # Quantity of items fetched on each iteration
    limit = 500
    # Start point from where items will be fetched
    offset = 0
    importer = InstitutionImporter.new

    loop do
      logger.info " --> Creating Publisher relations from #{offset} to #{offset+limit}"

      begin
        # Get learning_objects (from offset to offset+limit)
        learning_objects = LearningObject.limit(limit).offset(offset)
      rescue => e
        logger.warn "Database error, going to sleep"
        logger.error e
        # Sleeps for a while to wait database's recovery
        sleep(10.seconds)
        # Goes to next iteration to retry
        next
      else
        # Terminate loop if there are no more learning_objects to import
        break if learning_objects.empty?

        importer.items = learning_objects
        importer.import

      end
      offset += limit
    end
    logger.close
  end

  task :learning_objects, [:log] => [:environment] do |t, args|
    desc "Importing Learning Objects from Dspace items"

    args.with_defaults(:log => STDOUT)
    logger = Log::DatabaseLogger.new(args.log)
    logger.level = Log::DatabaseLogger::CREATE

    # Quantity of items fetched on each iteration
    limit = 500

    # Start point from where items will be fetched
    offset = 0

    loop do

      logger.info " --> importing items from #{offset} to #{offset+limit}"

      begin
        # Get items from dspace (from offset to offset+limit)
        dspace_items = get_items_from_dspace(limit, offset, ['metadata', 'bitstreams'])
      rescue => e
        logger.warn "Database error, going to sleep"
        logger.error e
        # Sleeps for a while to wait database's recovery
        sleep(10.seconds)
        # Goes to next iteration to retry
        next
      else
        # Terminate loop if there are no more items to import
        break if dspace_items.empty?

        # continue if receive an error string
        if dspace_items.is_a? String
          logger.warn "Received a string instead of items: #{dspace_items}"
          next
        end

        # Increment offset, to get new items on next iteration
        offset = offset + limit

        # Iterates through items
        # Verifies if item is already on repository
        # Initializes new LearningObjects
        # and saves them on LearningObjects's repository
        importer = Dspace::LearningObjectImporter.new(dspace_items)
        importer.import
      end
    end
    logger.close
  end

  private

  def get_items_from_dspace(limit, offset, expand=[])
    client = DspaceService.create_client
    client.items.all(
        expand: expand.join(','),
        limit: limit, offset: offset
    )
  end

end