Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions app/models/harvesting/example.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ def for_graphql(generic: false, metadata_format: nil, protocol: nil)

base_scope.all
end

# @return [String]
def template_for(id)
find(id).extraction_mapping_template
end
end
end
end
5 changes: 4 additions & 1 deletion app/operations/journal_sources/parse.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@ class Parse

include MeruAPI::Deps[
parse_full: "journal_sources.parsers.full",
parse_issue_only: "journal_sources.parsers.issue_only",
parse_volume_only: "journal_sources.parsers.volume_only",
parse_fallback: "journal_sources.parsers.fallback",
]

def call(*inputs)
parse_full.(*inputs).or do
parse_volume_only.(*inputs).or do
parse_fallback.(*inputs)
parse_issue_only.(*inputs).or do
parse_fallback.(*inputs)
end
end
end.value_or do
JournalSources::Parsed::Unknown.new
Expand Down
15 changes: 13 additions & 2 deletions app/operations/journal_sources/parsers/full.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,21 @@ class Full < JournalSources::Parsers::Abstract
/\A.+?, vol (?<volume>\d+), iss (?<issue>\d+)\z/,
].freeze

ENDS_WITH_SINGLE_PAGE = /; \d+\z/

def try_parsing!(input)
try_parsing_with_patterns!(input)
if ENDS_WITH_SINGLE_PAGE.match?(input)
try_parsing_with_patterns!(input)

try_anystyle!(input)
# :nocov:
# A fallback for inputs that do not match any of the patterns
try_anystyle!(input)
# :nocov:
else
try_anystyle!(input)

try_parsing_with_patterns!(input)
end
end

# @param [String] input
Expand Down
13 changes: 13 additions & 0 deletions app/operations/journal_sources/parsers/issue_only.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# frozen_string_literal: true

module JournalSources
module Parsers
class IssueOnly < JournalSources::Parsers::Abstract
parsed_klass JournalSources::Parsed::IssueOnly

def try_parsing!(input)
try_anystyle! input
end
end
end
end
7 changes: 7 additions & 0 deletions app/services/harvesting/extraction/common_filters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@

module Harvesting
module Extraction
# @see LiquidExt::CommonFilters
module CommonFilters
# @param [#to_s] input
# @return [String]
def parameterize(input)
input.to_s.parameterize
end

# @param [#to_s] input
# @return [ActiveSupport::SafeBuffer]
def unescape_html(input)
Expand Down
4 changes: 4 additions & 0 deletions app/services/harvesting/extraction/context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def parse_mapping_template!
# :nocov:
end
end

# :nocov:
raise Harvesting::Error, "Invalid extraction mapping template: #{@mapping_errors.join(', ')}" if @mapping.nil? && @mapping_errors.any?
# :nocov:
end
end
end
Expand Down
2 changes: 2 additions & 0 deletions app/services/harvesting/extraction/environment_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def call
# @return [Liquid::Environment]
def build_liquid_environment
Liquid::Environment.build(error_mode: :strict) do |env|
env.register_filter LiquidExt::CommonFilters
env.register_filter Harvesting::Extraction::CommonFilters

env.file_system = Harvesting::Extraction::FileSystem.new
Expand All @@ -56,6 +57,7 @@ def build_liquid_environment
# @return [void]
def configure_common_tags!(env)
env.register_tag "ifpresent", ::LiquidExt::Tags::IfPresent
env.register_tag "ifinteger", ::LiquidExt::Tags::IfInteger
end

# @param [Liquid::Environment] env
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def render_child_structs_for(render_context, parent:)
if respond_to?(:items)
structs.concat(items.map { _1.render_struct_for(render_context, parent:) })
end
end
end.compact # remove any child structs whose requirements were not met
end

def requirements_met?(render_context)
Expand Down
41 changes: 28 additions & 13 deletions app/services/harvesting/records/entities_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class EntitiesExtractor < Support::HookBased::Actor

standard_execution!

define_model_callbacks :entities_extraction

# @return [<String>]
attr_reader :existing_contributor_ids

Expand Down Expand Up @@ -58,27 +60,31 @@ class EntitiesExtractor < Support::HookBased::Actor

delegate :metadata_mappings, :use_metadata_mappings?, to: :extraction_context

around_execute :provide_harvest_configuration!
around_execute :provide_harvest_source!
around_execute :provide_harvest_mapping!
around_execute :provide_harvest_attempt!
around_execute :provide_harvest_record!
around_entities_extraction :provide_harvest_configuration!
around_entities_extraction :provide_harvest_source!
around_entities_extraction :provide_harvest_mapping!
around_entities_extraction :provide_harvest_attempt!
around_entities_extraction :provide_harvest_record!
around_entities_extraction :capture_harvest_errors!

around_execute :provide_metadata_format!

# @return [Dry::Monads::Success(HarvestRecord)]
def call
yield set_up!
run_callbacks :entities_extraction do
yield set_up!

run_callbacks :execute do
yield prepare!
run_callbacks :execute do
yield prepare!

yield extract!
yield extract!

yield prune!
yield prune!

yield mark_active!
yield mark_active!

yield update_entity_count!
yield update_entity_count!
end
end

link.try(:transition_to, "extracted")
Expand Down Expand Up @@ -336,7 +342,7 @@ def track_skip!(skipped)

harvest_record.update_columns columns

harvest_record.harvest_entities.delete_all
harvest_record.harvest_entities.destroy_all

harvest_record.reload

Expand All @@ -351,6 +357,15 @@ def watch_extraction!

track_skip!(result) if skipped
end

# @return [void]
def capture_harvest_errors!
yield
rescue Harvesting::Error => e
logger.fatal e.message, tag: %i[entity_extraction_failure], exception_klass: e.class.name, backtrace: e.backtrace

skip!(e.message, code: :entity_extraction_failure)
end
end
end
end
40 changes: 40 additions & 0 deletions app/services/journal_sources/drop.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
# frozen_string_literal: true

module JournalSources
# A Liquid drop for an instance of {JournalSources::Parsed::Abstract}
class Drop < ::Liquid::Drop
include Dry::Matcher.for(:match_journal_source, with: ::JournalSources::Matcher)

# @param [JournalSources::Parsed::Abstract] journal_source
def initialize(journal_source = nil)
@journal_source = journal_source

@mode = JournalSources::Types::LiquidMode[@journal_source.try(:mode)]

@exists = @journal_source.try(:known?).present?

match_journal_source do |m|
Expand All @@ -21,6 +24,11 @@ def initialize(journal_source = nil)
@issue = nil
end

m.issue_only do |parsed|
@volume = nil
@issue = parsed.issue
end

m.unknown do
@volume = @issue = nil
end
Expand All @@ -30,12 +38,44 @@ def initialize(journal_source = nil)
# @return [Boolean]
attr_reader :exists

alias known exists

alias exists? exists

alias known? known

# @!attribute [r] full
# @return [Boolean]
def full = mode == "full"

alias full? full

# @return [String, nil]
attr_reader :issue

# @!attribute [r] issue_only
# @return [Boolean]
def issue_only = mode == "issue_only"

alias issue_only? issue_only

# @return [JournalSources::Types::LiquidMode]
attr_reader :mode

# @!attribute [r] unknown
# @return [Boolean]
def unknown = mode == "unknown"

alias unknown? unknown

# @return [String, nil]
attr_reader :volume

# @return [Boolean]
def volume_only = mode == "volume_only"

alias volume_only? volume_only

private

def match_journal_source
Expand Down
11 changes: 11 additions & 0 deletions app/services/journal_sources/matcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ module JournalSources
end
end

IssueOnlyCase = Dry::Matcher::Case.new do |parsed, *|
if parsed.kind_of?(JournalSources::Parsed::Abstract) && parsed.issue_only? && parsed.known?
parsed
else
# :nocov:
Dry::Matcher::Undefined
# :nocov:
end
end

# @api private
UnknownCase = Dry::Matcher::Case.new do |parsed, *|
if parsed.kind_of?(JournalSources::Parsed::Abstract) && parsed.known?
Expand All @@ -37,6 +47,7 @@ module JournalSources
Matcher = Dry::Matcher.new(
full: FullCase,
volume_only: VolumeOnlyCase,
issue_only: IssueOnlyCase,
unknown: UnknownCase,
)
end
45 changes: 15 additions & 30 deletions app/services/journal_sources/parsed/abstract.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,51 +29,36 @@ class Abstract < ::Support::FlexibleStruct
attribute? :fpage, Types::OptionalInteger
attribute? :lpage, Types::OptionalInteger

validates :volume, presence: true, comparison: { other_than: UNKNOWN }, if: :has_expected_volume?
validates :volume, presence: true, comparison: { other_than: UNKNOWN }, if: :has_required_volume?

validates :issue, presence: true, comparison: { other_than: UNKNOWN }, if: :has_expected_issue?
validates :issue, presence: true, comparison: { other_than: UNKNOWN }, if: :has_required_issue?

# @return [JournalSources::Drop]
def to_liquid
JournalSources::Drop.new(self)
end
def to_liquid = JournalSources::Drop.new(self)

# @return [Dry::Monads::Some(JournalSources::Parsed::Abstract), Dry::Monads::None]
def to_monad
valid? ? Some(self) : None()
end
def to_monad = valid? ? Some(self) : None()

# @!group Mode Logic

def full?
mode == :full
end
def full? = mode == :full

def known?
!unknown? && valid?
end
def known? = !unknown? && valid?

def mode
self.class.mode
end
def has_required_issue? = full? || issue_only?

def has_expected_volume?
full? || volume_only?
end
def has_required_volume? = full? || volume_only?

def has_expected_issue?
full?
end
def issue_only? = mode == :issue_only

def unknown?
mode == :unknown || invalid?
end
# @return [JournalSources::Types::Mode]
def mode = self.class.mode

def volume_only?
mode == :volume_only
end
def unknown? = mode == :unknown || invalid?

# @!endgroup
def volume_only? = mode == :volume_only

# @!endgroup Mode Logic
end
end
end
10 changes: 10 additions & 0 deletions app/services/journal_sources/parsed/issue_only.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# frozen_string_literal: true

module JournalSources
module Parsed
# @see JournalSources::ParseIssueOnly
class IssueOnly < ::JournalSources::Parsed::Abstract
mode :issue_only
end
end
end
Loading