diff --git a/app/models/harvesting/example.rb b/app/models/harvesting/example.rb index cb26665a..ee1b2212 100644 --- a/app/models/harvesting/example.rb +++ b/app/models/harvesting/example.rb @@ -110,6 +110,11 @@ def for_graphql(generic: false, metadata_format: nil, protocol: nil) base_scope.all end + + # @return [String] + def template_for(id) + find(id).extraction_mapping_template + end end end end diff --git a/app/operations/journal_sources/parse.rb b/app/operations/journal_sources/parse.rb index aa91fd83..4357ae91 100644 --- a/app/operations/journal_sources/parse.rb +++ b/app/operations/journal_sources/parse.rb @@ -7,6 +7,7 @@ class Parse include MeruAPI::Deps[ parse_full: "journal_sources.parsers.full", + parse_issue_only: "journal_sources.parsers.issue_only", parse_volume_only: "journal_sources.parsers.volume_only", parse_fallback: "journal_sources.parsers.fallback", ] @@ -14,7 +15,9 @@ class Parse def call(*inputs) parse_full.(*inputs).or do parse_volume_only.(*inputs).or do - parse_fallback.(*inputs) + parse_issue_only.(*inputs).or do + parse_fallback.(*inputs) + end end end.value_or do JournalSources::Parsed::Unknown.new diff --git a/app/operations/journal_sources/parsers/full.rb b/app/operations/journal_sources/parsers/full.rb index 961e64ea..6e847eb0 100644 --- a/app/operations/journal_sources/parsers/full.rb +++ b/app/operations/journal_sources/parsers/full.rb @@ -13,10 +13,21 @@ class Full < JournalSources::Parsers::Abstract /\A.+?, vol (?\d+), iss (?\d+)\z/, ].freeze + ENDS_WITH_SINGLE_PAGE = /; \d+\z/ + def try_parsing!(input) - try_parsing_with_patterns!(input) + if ENDS_WITH_SINGLE_PAGE.match?(input) + try_parsing_with_patterns!(input) - try_anystyle!(input) + # :nocov: + # A fallback for inputs that do not match any of the patterns + try_anystyle!(input) + # :nocov: + else + try_anystyle!(input) + + try_parsing_with_patterns!(input) + end end # @param [String] input diff --git a/app/operations/journal_sources/parsers/issue_only.rb b/app/operations/journal_sources/parsers/issue_only.rb new file mode 100644 index 00000000..9b156bd9 --- /dev/null +++ b/app/operations/journal_sources/parsers/issue_only.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module JournalSources + module Parsers + class IssueOnly < JournalSources::Parsers::Abstract + parsed_klass JournalSources::Parsed::IssueOnly + + def try_parsing!(input) + try_anystyle! input + end + end + end +end diff --git a/app/services/harvesting/extraction/common_filters.rb b/app/services/harvesting/extraction/common_filters.rb index 8f6cfb7a..b6fcb11f 100644 --- a/app/services/harvesting/extraction/common_filters.rb +++ b/app/services/harvesting/extraction/common_filters.rb @@ -2,7 +2,14 @@ module Harvesting module Extraction + # @see LiquidExt::CommonFilters module CommonFilters + # @param [#to_s] input + # @return [String] + def parameterize(input) + input.to_s.parameterize + end + # @param [#to_s] input # @return [ActiveSupport::SafeBuffer] def unescape_html(input) diff --git a/app/services/harvesting/extraction/context.rb b/app/services/harvesting/extraction/context.rb index fca5888f..6a0c9e19 100644 --- a/app/services/harvesting/extraction/context.rb +++ b/app/services/harvesting/extraction/context.rb @@ -80,6 +80,10 @@ def parse_mapping_template! # :nocov: end end + + # :nocov: + raise Harvesting::Error, "Invalid extraction mapping template: #{@mapping_errors.join(', ')}" if @mapping.nil? && @mapping_errors.any? + # :nocov: end end end diff --git a/app/services/harvesting/extraction/environment_builder.rb b/app/services/harvesting/extraction/environment_builder.rb index 3f85b1ca..db08dcaa 100644 --- a/app/services/harvesting/extraction/environment_builder.rb +++ b/app/services/harvesting/extraction/environment_builder.rb @@ -36,6 +36,7 @@ def call # @return [Liquid::Environment] def build_liquid_environment Liquid::Environment.build(error_mode: :strict) do |env| + env.register_filter LiquidExt::CommonFilters env.register_filter Harvesting::Extraction::CommonFilters env.file_system = Harvesting::Extraction::FileSystem.new @@ -56,6 +57,7 @@ def build_liquid_environment # @return [void] def configure_common_tags!(env) env.register_tag "ifpresent", ::LiquidExt::Tags::IfPresent + env.register_tag "ifinteger", ::LiquidExt::Tags::IfInteger end # @param [Liquid::Environment] env diff --git a/app/services/harvesting/extraction/mappings/entities/abstract.rb b/app/services/harvesting/extraction/mappings/entities/abstract.rb index 4d44dd86..4f9f38c3 100644 --- a/app/services/harvesting/extraction/mappings/entities/abstract.rb +++ b/app/services/harvesting/extraction/mappings/entities/abstract.rb @@ -118,7 +118,7 @@ def render_child_structs_for(render_context, parent:) if respond_to?(:items) structs.concat(items.map { _1.render_struct_for(render_context, parent:) }) end - end + end.compact # remove any child structs whose requirements were not met end def requirements_met?(render_context) diff --git a/app/services/harvesting/records/entities_extractor.rb b/app/services/harvesting/records/entities_extractor.rb index 05fb7491..abd2e3b0 100644 --- a/app/services/harvesting/records/entities_extractor.rb +++ b/app/services/harvesting/records/entities_extractor.rb @@ -26,6 +26,8 @@ class EntitiesExtractor < Support::HookBased::Actor standard_execution! + define_model_callbacks :entities_extraction + # @return [] attr_reader :existing_contributor_ids @@ -58,27 +60,31 @@ class EntitiesExtractor < Support::HookBased::Actor delegate :metadata_mappings, :use_metadata_mappings?, to: :extraction_context - around_execute :provide_harvest_configuration! - around_execute :provide_harvest_source! - around_execute :provide_harvest_mapping! - around_execute :provide_harvest_attempt! - around_execute :provide_harvest_record! + around_entities_extraction :provide_harvest_configuration! + around_entities_extraction :provide_harvest_source! + around_entities_extraction :provide_harvest_mapping! + around_entities_extraction :provide_harvest_attempt! + around_entities_extraction :provide_harvest_record! + around_entities_extraction :capture_harvest_errors! + around_execute :provide_metadata_format! # @return [Dry::Monads::Success(HarvestRecord)] def call - yield set_up! + run_callbacks :entities_extraction do + yield set_up! - run_callbacks :execute do - yield prepare! + run_callbacks :execute do + yield prepare! - yield extract! + yield extract! - yield prune! + yield prune! - yield mark_active! + yield mark_active! - yield update_entity_count! + yield update_entity_count! + end end link.try(:transition_to, "extracted") @@ -336,7 +342,7 @@ def track_skip!(skipped) harvest_record.update_columns columns - harvest_record.harvest_entities.delete_all + harvest_record.harvest_entities.destroy_all harvest_record.reload @@ -351,6 +357,15 @@ def watch_extraction! track_skip!(result) if skipped end + + # @return [void] + def capture_harvest_errors! + yield + rescue Harvesting::Error => e + logger.fatal e.message, tag: %i[entity_extraction_failure], exception_klass: e.class.name, backtrace: e.backtrace + + skip!(e.message, code: :entity_extraction_failure) + end end end end diff --git a/app/services/journal_sources/drop.rb b/app/services/journal_sources/drop.rb index 6a318585..082a9062 100644 --- a/app/services/journal_sources/drop.rb +++ b/app/services/journal_sources/drop.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true module JournalSources + # A Liquid drop for an instance of {JournalSources::Parsed::Abstract} class Drop < ::Liquid::Drop include Dry::Matcher.for(:match_journal_source, with: ::JournalSources::Matcher) @@ -8,6 +9,8 @@ class Drop < ::Liquid::Drop def initialize(journal_source = nil) @journal_source = journal_source + @mode = JournalSources::Types::LiquidMode[@journal_source.try(:mode)] + @exists = @journal_source.try(:known?).present? match_journal_source do |m| @@ -21,6 +24,11 @@ def initialize(journal_source = nil) @issue = nil end + m.issue_only do |parsed| + @volume = nil + @issue = parsed.issue + end + m.unknown do @volume = @issue = nil end @@ -30,12 +38,44 @@ def initialize(journal_source = nil) # @return [Boolean] attr_reader :exists + alias known exists + + alias exists? exists + + alias known? known + + # @!attribute [r] full + # @return [Boolean] + def full = mode == "full" + + alias full? full + # @return [String, nil] attr_reader :issue + # @!attribute [r] issue_only + # @return [Boolean] + def issue_only = mode == "issue_only" + + alias issue_only? issue_only + + # @return [JournalSources::Types::LiquidMode] + attr_reader :mode + + # @!attribute [r] unknown + # @return [Boolean] + def unknown = mode == "unknown" + + alias unknown? unknown + # @return [String, nil] attr_reader :volume + # @return [Boolean] + def volume_only = mode == "volume_only" + + alias volume_only? volume_only + private def match_journal_source diff --git a/app/services/journal_sources/matcher.rb b/app/services/journal_sources/matcher.rb index 531a83e7..2a249e41 100644 --- a/app/services/journal_sources/matcher.rb +++ b/app/services/journal_sources/matcher.rb @@ -23,6 +23,16 @@ module JournalSources end end + IssueOnlyCase = Dry::Matcher::Case.new do |parsed, *| + if parsed.kind_of?(JournalSources::Parsed::Abstract) && parsed.issue_only? && parsed.known? + parsed + else + # :nocov: + Dry::Matcher::Undefined + # :nocov: + end + end + # @api private UnknownCase = Dry::Matcher::Case.new do |parsed, *| if parsed.kind_of?(JournalSources::Parsed::Abstract) && parsed.known? @@ -37,6 +47,7 @@ module JournalSources Matcher = Dry::Matcher.new( full: FullCase, volume_only: VolumeOnlyCase, + issue_only: IssueOnlyCase, unknown: UnknownCase, ) end diff --git a/app/services/journal_sources/parsed/abstract.rb b/app/services/journal_sources/parsed/abstract.rb index 09d78ce9..f9b88886 100644 --- a/app/services/journal_sources/parsed/abstract.rb +++ b/app/services/journal_sources/parsed/abstract.rb @@ -29,51 +29,36 @@ class Abstract < ::Support::FlexibleStruct attribute? :fpage, Types::OptionalInteger attribute? :lpage, Types::OptionalInteger - validates :volume, presence: true, comparison: { other_than: UNKNOWN }, if: :has_expected_volume? + validates :volume, presence: true, comparison: { other_than: UNKNOWN }, if: :has_required_volume? - validates :issue, presence: true, comparison: { other_than: UNKNOWN }, if: :has_expected_issue? + validates :issue, presence: true, comparison: { other_than: UNKNOWN }, if: :has_required_issue? # @return [JournalSources::Drop] - def to_liquid - JournalSources::Drop.new(self) - end + def to_liquid = JournalSources::Drop.new(self) # @return [Dry::Monads::Some(JournalSources::Parsed::Abstract), Dry::Monads::None] - def to_monad - valid? ? Some(self) : None() - end + def to_monad = valid? ? Some(self) : None() # @!group Mode Logic - def full? - mode == :full - end + def full? = mode == :full - def known? - !unknown? && valid? - end + def known? = !unknown? && valid? - def mode - self.class.mode - end + def has_required_issue? = full? || issue_only? - def has_expected_volume? - full? || volume_only? - end + def has_required_volume? = full? || volume_only? - def has_expected_issue? - full? - end + def issue_only? = mode == :issue_only - def unknown? - mode == :unknown || invalid? - end + # @return [JournalSources::Types::Mode] + def mode = self.class.mode - def volume_only? - mode == :volume_only - end + def unknown? = mode == :unknown || invalid? - # @!endgroup + def volume_only? = mode == :volume_only + + # @!endgroup Mode Logic end end end diff --git a/app/services/journal_sources/parsed/issue_only.rb b/app/services/journal_sources/parsed/issue_only.rb new file mode 100644 index 00000000..c10ea03e --- /dev/null +++ b/app/services/journal_sources/parsed/issue_only.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +module JournalSources + module Parsed + # @see JournalSources::ParseIssueOnly + class IssueOnly < ::JournalSources::Parsed::Abstract + mode :issue_only + end + end +end diff --git a/app/services/journal_sources/parsers/abstract.rb b/app/services/journal_sources/parsers/abstract.rb index f3371fe8..72a7d659 100644 --- a/app/services/journal_sources/parsers/abstract.rb +++ b/app/services/journal_sources/parsers/abstract.rb @@ -70,9 +70,11 @@ def check_parsed!(**attrs) # @!attribute [r] parsed_klass # @return [Class] - def parsed_klass - self.class.parsed_klass - end + def parsed_klass = self.class.parsed_klass + + delegate :mode, to: :parsed_klass + + def issue_only? = mode == :issue_only # @!group Extraction Helpers @@ -89,7 +91,7 @@ def extract_year(input) extract.year(input).value_or(nil) end - # @!endgroup + # @!endgroup Extraction Helpers # @!group AnyStyle parsing @@ -145,7 +147,13 @@ def anystyle_process_type!(result, target, type: target[:type]) in date: [String => date, *], title: [String => title, *] target[:volume] ||= ("%s %s" % { date:, title:, }).strip in title: [String => title, *] + # :nocov: + # Not sure any examples exist of this case alone, + # but it was something that existed in the original journal source parser. target[:volume] ||= ("%<title>s" % { title: }).strip + # :nocov: + in note: [/Issue/i => note, *] if looks_like_issue_only?(result, target) + target[:issue] = maybe_combine_for_issue_only(note, **result) else # Intentionally left blank end @@ -166,7 +174,30 @@ def normalize_anystyle(result) end end - # @!endgroup + # @param [Hash] hash + def has_issue_or_volume?(hash) + hash[:issue].present? || hash[:volume].present? + end + + # @param [Hash] result + # @param [Hash] target + def looks_like_issue_only?(result, target) + issue_only? && !has_issue_or_volume?(target) && !has_issue_or_volume?(result) + end + + # @param [<String>] note + # @param [<String>, nil] publisher + # @param [<String>, nil] location + # @return [String] + def maybe_combine_for_issue_only(note, publisher: nil, location: nil, **) + n, p, l = [note, publisher, location].map { extract_first_string(_1) } + + suffix = [p, l].compact.join(", ") + + [n, suffix].compact_blank.join(" ").squish + end + + # @!endgroup AnyStyle parsing end end end diff --git a/app/services/journal_sources/types.rb b/app/services/journal_sources/types.rb index 10a880f8..ab3f6585 100644 --- a/app/services/journal_sources/types.rb +++ b/app/services/journal_sources/types.rb @@ -10,7 +10,9 @@ module Types KnowableString = String.default(UNKNOWN).fallback(UNKNOWN) - Mode = Symbol.enum(:unknown, :full, :volume_only) + Mode = Symbol.enum(:unknown, :full, :volume_only, :issue_only) + + LiquidMode = Coercible::String.enum(*Mode.values.map(&:to_s)).fallback("unknown") OptionalInteger = Coercible::Integer.optional.fallback(nil) diff --git a/app/services/liquid_ext/common_filters.rb b/app/services/liquid_ext/common_filters.rb new file mode 100644 index 00000000..9f100555 --- /dev/null +++ b/app/services/liquid_ext/common_filters.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module LiquidExt + # Filters common to multiple Liquid contexts + module CommonFilters + WHOLE_NUMBER_MATCH = /\A-?\d+\z/ + + # @param [Integer, String] input + # @return [Boolean] + def is_whole_number(input) + case input + when Integer, WHOLE_NUMBER_MATCH then true + else + false + end + end + + # @param [#to_s] input + # @return [String] + def parameterize(input) + input.to_s.parameterize + end + end +end diff --git a/app/services/liquid_ext/tags/abstract_if_block.rb b/app/services/liquid_ext/tags/abstract_if_block.rb new file mode 100644 index 00000000..41abe854 --- /dev/null +++ b/app/services/liquid_ext/tags/abstract_if_block.rb @@ -0,0 +1,233 @@ +# frozen_string_literal: true + +module LiquidExt + module Tags + # @abstract An abstract Liquid tag for conditional blocks. + class AbstractIfBlock < Liquid::Block + extend Dry::Core::ClassAttributes + + DEFAULT_ELSE_TAG_NAMES = %w[ + $never_match$ + $always_fail$ + ].freeze + + # @!attribute [r] if_tag_name + # @!scope class + # The name of the "if" tag for this conditional tag. + # @return [String] + # @!attribute [r] elsif_tag_name + # @!scope class + # The name of the "elsif" tag for this conditional tag. + # @return [String] + defines :if_tag_name, :elsif_tag_name, type: LiquidExt::Types::String + + # @!attribute [r] else_tag_names + # @!scope class + # The names of the "else" tags for this conditional tag. + # @return [LiquidExt::Types::TagNames] + defines :else_tag_names, type: LiquidExt::Types::ElseTagNames + + if_tag_name "$if_never_match$" + + elsif_tag_name "$els_never_match$" + + else_tag_names DEFAULT_ELSE_TAG_NAMES.freeze + + Syntax = /(#{Liquid::QuotedFragment})\s*([=!<>a-z_]+)?\s*(#{Liquid::QuotedFragment})?/o + + ExpressionsAndOperators = /(?:\b(?:\s?and\s?|\s?or\s?)\b|(?:\s*(?!\b(?:\s?and\s?|\s?or\s?)\b)(?:#{Liquid::QuotedFragment}|\S+)\s*)+)/o + + BOOLEAN_OPERATORS = %w[and or].freeze + + # The blocks / conditions for this tag. + # @return [<Liquid::Condition, Liquid::ElseCondition>] + attr_reader :blocks + + # @param [String] tag_name + # @param [String] markup + # @param [Liquid::ParseContext] options + def initialize(tag_name, markup, options) + super + @blocks = [] + push_block("if", markup) + end + + # @api private + def nodelist + # :nocov: + @blocks.map(&:attachment) + # :nocov: + end + + # @api private + def parse(tokens) + while parse_body(@blocks.last.attachment, tokens); end + + @blocks.reverse_each do |block| + # :nocov: + block.attachment.remove_blank_strings if blank? + # :nocov: + block.attachment.freeze + end + end + + # @api private + # @note A hook provided by Liquid::Block + def unknown_tag(tag, markup, tokens) + if else_tag?(tag) + push_block(tag, markup) + else + # :nocov: + super + # :nocov: + end + end + + # @api private + def render_to_output_buffer(context, output) + @blocks.each do |block| + result = Liquid::Utils.to_liquid_value( + block.evaluate(context), + ) + rescue Liquid::UndefinedVariable, Liquid::UndefinedDropMethod + # We allow undefined variables and drop method calls to occur here, regardless of strict variables. + next + else + if evaluate_if_branch?(block, result) + return block.attachment.render_to_output_buffer(context, output) + end + end + + # :nocov: + output + # :nocov: + end + + # @param [String, Symbol] name + def else_tag?(name) = name.in?(else_tag_names) + + # @!attribute [r] else_tag_names + # The names of the "else" tags for this conditional tag. + # @return [<String>] + def else_tag_names = self.class.else_tag_names + + private + + # @abstract Override in subclasses to customize truthiness evaluation. + # @note Defer to `super` for else logic. + # @param [Liquid::Condition, Liquid::ElseCondition] block + # @param [Object] result + def evaluate_if_branch?(block, result) + block.kind_of?(Liquid::ElseCondition) && result.present? + end + + def push_block(tag, markup) + block = + if tag == "else" + Liquid::ElseCondition.new + else + parse_with_selected_parser(markup) + end + + @blocks.push(block) + + block.attach(new_body) + end + + def parse_expression(markup) + Liquid::Condition.parse_expression(parse_context, markup) + end + + def lax_parse(markup) + expressions = markup.scan(ExpressionsAndOperators) + + # :nocov: + raise Liquid::SyntaxError, options[:locale].t("errors.syntax.if") unless expressions.pop =~ Syntax + # :nocov: + + condition = Liquid::Condition.new(parse_expression(Regexp.last_match(1)), Regexp.last_match(2), parse_expression(Regexp.last_match(3))) + + until expressions.empty? + operator = expressions.pop.to_s.strip + + # :nocov: + raise Liquid::SyntaxError, options[:locale].t("errors.syntax.if") unless expressions.pop.to_s =~ Syntax + # :nocov: + + new_condition = Liquid::Condition.new(parse_expression(Regexp.last_match(1)), Regexp.last_match(2), parse_expression(Regexp.last_match(3))) + + # :nocov: + raise Liquid::SyntaxError, options[:locale].t("errors.syntax.if") unless BOOLEAN_OPERATORS.include?(operator) + # :nocov: + + new_condition.send(operator, condition) + + condition = new_condition + end + + condition + end + + def strict_parse(markup) + p = @parse_context.new_parser(markup) + + condition = parse_binary_comparisons(p) + + p.consume(:end_of_string) + + condition + end + + def parse_binary_comparisons(p) + condition = parse_comparison(p) + + first_condition = condition + + while (op = p.id?("and") || p.id?("or")) + child_condition = parse_comparison(p) + condition.send(op, child_condition) + condition = child_condition + end + + first_condition + end + + def parse_comparison(p) + a = parse_expression(p.expression) + + if (op = p.consume?(:comparison)) + # :nocov: + b = parse_expression(p.expression) + Liquid::Condition.new(a, op, b) + # :nocov: + else + Liquid::Condition.new(a) + end + end + + class << self + # Define the tag names for this conditional block. + # + # @return [void] + def if_tag!(name, elsif_tag: "els#{name}") + if_tag_name name.freeze + + elsif_tag_name elsif_tag.freeze + + else_tag_names [elsif_tag_name, "else"].freeze + end + end + + # @api private + # Required for introspection in Liquid + class ParseTreeVisitor < Liquid::ParseTreeVisitor + # @return [<Liquid::Condition, Liquid::ElseCondition>] + def children + # :nocov: + @node.blocks + # :nocov: + end + end + end + end +end diff --git a/app/services/liquid_ext/tags/if_integer.rb b/app/services/liquid_ext/tags/if_integer.rb new file mode 100644 index 00000000..8001884c --- /dev/null +++ b/app/services/liquid_ext/tags/if_integer.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module LiquidExt + module Tags + # Liquid lacks the ability to test if a value is a whole number (integer). + # This tag implements that functionality, along with Rails' `present?` logic. + class IfInteger < LiquidExt::Tags::AbstractIfBlock + INTEGER_MATCH = /\A-?\d+\z/ + + if_tag! "ifinteger" + + private + + # @param [Liquid::Condition, Liquid::ElseCondition] block + # @param [Object] result + def evaluate_if_branch?(block, result) + case result + when Integer, INTEGER_MATCH then true + else + super + end + end + end + end +end diff --git a/app/services/liquid_ext/tags/if_present.rb b/app/services/liquid_ext/tags/if_present.rb index 6e87966c..2d6054f1 100644 --- a/app/services/liquid_ext/tags/if_present.rb +++ b/app/services/liquid_ext/tags/if_present.rb @@ -5,165 +5,18 @@ module Tags # We'd like the ability to check for the presence of an entire chain call in Liquid # without having to enumerate. In addition, we'd like Rails `present?` logic instead # of the Rubyish truthiness checks. This tag accomplishes that. - class IfPresent < Liquid::Block - Syntax = /(#{Liquid::QuotedFragment})\s*([=!<>a-z_]+)?\s*(#{Liquid::QuotedFragment})?/o - - ExpressionsAndOperators = /(?:\b(?:\s?and\s?|\s?or\s?)\b|(?:\s*(?!\b(?:\s?and\s?|\s?or\s?)\b)(?:#{Liquid::QuotedFragment}|\S+)\s*)+)/o - - BOOLEAN_OPERATORS = %w[and or].freeze - - ELSE_TAG_NAMES = %w[elsifpresent else].freeze - - private_constant :ELSE_TAG_NAMES - - attr_reader :blocks - - def initialize(tag_name, markup, options) - super - @blocks = [] - push_block("if", markup) - end - - def nodelist - # :nocov: - @blocks.map(&:attachment) - # :nocov: - end - - def parse(tokens) - while parse_body(@blocks.last.attachment, tokens); end - - @blocks.reverse_each do |block| - # :nocov: - block.attachment.remove_blank_strings if blank? - # :nocov: - block.attachment.freeze - end - end - - def unknown_tag(tag, markup, tokens) - if ELSE_TAG_NAMES.include?(tag) - push_block(tag, markup) - else - # :nocov: - super - # :nocov: - end - end - - def render_to_output_buffer(context, output) - @blocks.each do |block| - result = Liquid::Utils.to_liquid_value( - block.evaluate(context), - ) - rescue Liquid::UndefinedVariable, Liquid::UndefinedDropMethod - # We allow undefined variables and drop method calls to occur here, regardless of strict variables. - next - else - if check_presence?(result) - return block.attachment.render_to_output_buffer(context, output) - end - end - - # :nocov: - output - # :nocov: - end - - private + class IfPresent < LiquidExt::Tags::AbstractIfBlock + if_tag! "ifpresent" + # @param [Liquid::Condition, Liquid::ElseCondition] block # @param [Object] result - def check_presence?(result) + # @return [Boolean] + def evaluate_if_branch?(block, result) case result when ::LiquidExt::Behavior::BlankAndPresent result.is_present else - result.present? - end - end - - def push_block(tag, markup) - block = - if tag == "else" - Liquid::ElseCondition.new - else - parse_with_selected_parser(markup) - end - - @blocks.push(block) - - block.attach(new_body) - end - - def parse_expression(markup) - Liquid::Condition.parse_expression(parse_context, markup) - end - - def lax_parse(markup) - expressions = markup.scan(ExpressionsAndOperators) - - # :nocov: - raise Liquid::SyntaxError, options[:locale].t("errors.syntax.if") unless expressions.pop =~ Syntax - # :nocov: - - condition = Liquid::Condition.new(parse_expression(Regexp.last_match(1)), Regexp.last_match(2), parse_expression(Regexp.last_match(3))) - - until expressions.empty? - operator = expressions.pop.to_s.strip - - # :nocov: - raise Liquid::SyntaxError, options[:locale].t("errors.syntax.if") unless expressions.pop.to_s =~ Syntax - # :nocov: - - new_condition = Liquid::Condition.new(parse_expression(Regexp.last_match(1)), Regexp.last_match(2), parse_expression(Regexp.last_match(3))) - - # :nocov: - raise Liquid::SyntaxError, options[:locale].t("errors.syntax.if") unless BOOLEAN_OPERATORS.include?(operator) - # :nocov: - - new_condition.send(operator, condition) - - condition = new_condition - end - - condition - end - - def strict_parse(markup) - p = @parse_context.new_parser(markup) - condition = parse_binary_comparisons(p) - p.consume(:end_of_string) - condition - end - - def parse_binary_comparisons(p) - condition = parse_comparison(p) - first_condition = condition - while (op = p.id?("and") || p.id?("or")) - child_condition = parse_comparison(p) - condition.send(op, child_condition) - condition = child_condition - end - first_condition - end - - def parse_comparison(p) - a = parse_expression(p.expression) - if (op = p.consume?(:comparison)) - # :nocov: - b = parse_expression(p.expression) - Liquid::Condition.new(a, op, b) - # :nocov: - else - Liquid::Condition.new(a) - end - end - - class ParseTreeVisitor < Liquid::ParseTreeVisitor - def children - # :nocov: - @node.blocks - # :nocov: + result.present? || super end end end diff --git a/app/services/liquid_ext/types.rb b/app/services/liquid_ext/types.rb index 777d8df9..c4fb7a07 100644 --- a/app/services/liquid_ext/types.rb +++ b/app/services/liquid_ext/types.rb @@ -9,5 +9,11 @@ module Types ArgName = Coercible::Symbol.constrained(filled: true) ArgNames = Coercible::Array.of(ArgName) + + TagName = String.constrained(filled: true) + + TagNames = Array.of(TagName) + + ElseTagNames = TagNames.constrained(min_size: 2) end end diff --git a/app/services/templates/filters/common_filters.rb b/app/services/templates/filters/common_filters.rb index b2d11c7c..60c69e7f 100644 --- a/app/services/templates/filters/common_filters.rb +++ b/app/services/templates/filters/common_filters.rb @@ -2,6 +2,7 @@ module Templates module Filters + # @see LiquidExt::CommonFilters module CommonFilters CC_LICENSE_URL_MAPPING = { "CC BY" => "https://creativecommons.org/licenses/by/2.5/", diff --git a/app/services/templates/slots/environment_builder.rb b/app/services/templates/slots/environment_builder.rb index d052a79e..1bc93198 100644 --- a/app/services/templates/slots/environment_builder.rb +++ b/app/services/templates/slots/environment_builder.rb @@ -71,7 +71,9 @@ def configure_common!(env) env.register_tag "copylink", Templates::Tags::Blocks::CopyLink env.register_tag "entitylink", Templates::Tags::Blocks::EntityLink env.register_tag "ifpresent", ::LiquidExt::Tags::IfPresent + env.register_tag "ifinteger", ::LiquidExt::Tags::IfInteger + env.register_filter LiquidExt::CommonFilters env.register_filter Templates::Filters::CommonFilters register_dot_list! env diff --git a/lib/frozen_record/harvesting/examples.yml b/lib/frozen_record/harvesting/examples.yml index 71e5cdf2..fd99272f 100644 --- a/lib/frozen_record/harvesting/examples.yml +++ b/lib/frozen_record/harvesting/examples.yml @@ -63,6 +63,19 @@ the liquid environment. These are available on the following assigns in any OAIDC environment: `journal`, `creators`, `contributors`, and `pdf`. +- id: oaidc_journal_special_issues + name: OAIDC Journal Article (Special Issues) + default: false + protocol_name: "oai" + metadata_format_name: "oaidc" + description: > + When fetching an OAIDC source that represents a journal article + contained within special issues (i.e., issues that do not have + volume/issue numbers, but rather special issue names), + the data needs to be parsed using special helpers we provide in + the liquid environment. These are available on the following + assigns in any OAIDC environment: `journal`, `creators`, `contributors`, + and `pdf`. - id: empty name: "Barebones Template" default: false diff --git a/lib/harvesting/examples/oaidc_journal_special_issues/template.xml b/lib/harvesting/examples/oaidc_journal_special_issues/template.xml new file mode 100644 index 00000000..1677f77c --- /dev/null +++ b/lib/harvesting/examples/oaidc_journal_special_issues/template.xml @@ -0,0 +1,114 @@ +<?xml version="1.0" encoding="UTF-8"?> +<mapping> + <assigns> + <assign name="journal_volume">{{journal.volume | default: "special"}}</assign> + <assign name="journal_volume_identifier"> + {% if journal.volume %} + volume-{{journal.volume}} + {% else %} + special-issues + {% endif %} + </assign> + <assign name="journal_volume_sortable_number"> + {% if journal.volume %} + {{journal.volume}} + {% else %} + 100 + {% endif %} + </assign> + <assign name="journal_volume_title"> + {% if journal.volume %} + Volume {{journal.volume}} + {% else %} + Special Issues + {% endif %} + </assign> + <assign name="journal_issue"> + {% ifinteger journal.issue %} + {{journal.issue}} + {% else %} + {{journal.issue | parameterize}} + {% endifinteger %} + </assign> + <assign name="journal_issue_title"> + {% ifinteger journal.issue %} + Issue {{journal.issue}} + {% else %} + {{journal.issue}} + {% endifinteger %} + </assign> + <assign name="journal_issue_sortable_number"> + {% ifinteger journal.issue %} + {{journal.issue}} + {% else %} + 100 + {% endifinteger %} + </assign> + </assigns> + <entities> + <collection schema="nglp:journal_volume"> + <requires> + <expr reason="must have a volume">{{journal_volume}}</expr> + </requires> + <identifier>{{journal_volume_identifier}}</identifier> + <title>{{journal_volume_title}} + + {{journal_volume}} + {{journal_volume_sortable_number}} + + + + {{journal_issue}} + + issue-{{journal_issue}} + {{journal_issue_title}} + + {{journal_issue}} + {{journal_issue_sortable_number}} + + + {{record.identifier}} + {{oaidc.title | first}} + {{doi}} + + {% for creator in creators %} + {% contribution role: "aut" %} + {% person %} + {% given_name creator.given %} + {% family_name creator.family %} + {% endperson %} + {% endcontribution %} + {% endfor %} + {% for contributor in contributors %} + {% contribution role: "ctb" %} + {% person %} + {% given_name contributor.given %} + {% family_name contributor.family %} + {% endperson %} + {% endcontribution %} + {% endfor %} + + + + {% for subject in oaidc.subject %} + {% tag %}{{subject}}{% endtag %} + {% endfor %} + + + pdf + application/pdf + PDF Version + + {% if pdf.url contains "article/view" %} + {{ pdf.url | replace_first: "article/view", "article/download" }} + {% else %} + {{ pdf.url }} + {% endif %} + + + + + + + + diff --git a/spec/operations/journal_sources/parse_spec.rb b/spec/operations/journal_sources/parse_spec.rb index 4573c54b..93e3ddfe 100644 --- a/spec/operations/journal_sources/parse_spec.rb +++ b/spec/operations/journal_sources/parse_spec.rb @@ -1,21 +1,27 @@ # frozen_string_literal: true RSpec.describe JournalSources::Parse, type: :operation do + let(:input) { nil } + + subject(:parsed) { operation.(input) } + context "with a well-structured citation" do - let(:multiple_pages) do - "Some Journal; Vol. 18 No. 5 (2022); 106-112" - end + context "with multiple pages" do + let(:input) do + "Some Journal; Vol. 18 No. 5 (2022); 106-112" + end - let(:single_page) do - "Some Journal; Vol. 5 No. 12 (2009); 3" + it { is_expected.to be_full.and be_known } + it { is_expected.to have_attributes(volume: "18", issue: ?5, year: 2022, fpage: 106, lpage: 112) } end - it "handles a range of pages" do - expect_calling_with(multiple_pages).to be_full.and(be_known).and(have_attributes(volume: "18", issue: ?5, year: 2022, fpage: 106, lpage: 112)) - end + context "with a single page" do + let(:input) do + "Some Journal; Vol. 5 No. 12 (2009); 3" + end - it "handles a single page" do - expect_calling_with(single_page).to have_attributes(volume: ?5, issue: "12", year: 2009, fpage: 3, lpage: nil) + it { is_expected.to be_full.and be_known } + it { is_expected.to have_attributes(volume: ?5, issue: "12", year: 2009, fpage: 3, lpage: nil) } end end @@ -24,9 +30,9 @@ "Journal of Some Unknown Studies; Vol 6, No 2: Fall/Winter 2011" end - it "parses correctly" do - expect_calling_with(input).to be_full.and(be_known).and(have_attributes(volume: ?6, issue: ?2, year: 2011)) - end + it { is_expected.to be_full.and be_known } + + it { is_expected.to have_attributes(volume: ?6, issue: ?2, year: 2011) } end context "when dealing with just a volume" do @@ -34,30 +40,72 @@ "Some Journal; Vol. 13 (1999)" end - it "can handle just a volume" do - expect_calling_with(input).to have_attributes(volume: "13", issue: "UNKNOWN", year: 1999).and(be_known).and(be_volume_only) + it { is_expected.to be_volume_only.and be_known } + + it { is_expected.to have_attributes(volume: "13", issue: "UNKNOWN", year: 1999) } + end + + context "with an oddly structured volume" do + let(:input) do + "Journal of Oddities; 2020; Widgets" end + + it { is_expected.to be_volume_only.and be_known } + it { is_expected.to have_attributes(volume: "2020 Widgets", issue: "UNKNOWN", year: 2020) } + end + + context "when dealing with just an issue" do + let(:input) do + "Journal of Testing, Magic, and Journaling; 2025: Special Issue: Let's Test Issue Only Parsing" + end + + it { is_expected.to be_issue_only.and be_known } + + it { is_expected.to have_attributes(volume: "UNKNOWN", issue: "Special Issue: Let's Test Issue Only Parsing", year: 2025) } + end + + context "when dealing with an oddly structured issue" do + let(:input) do + "Journal of Textile and Apparel, Technology and Management; 2019: Special Issue - ITMA Show, Barcelona, Spain" + end + + it { is_expected.to be_issue_only.and be_known } + + it { is_expected.to have_attributes(volume: "UNKNOWN", issue: "Special Issue - ITMA Show, Barcelona, Spain", year: 2019) } end context "with auto_create_volumes_and_issues set" do include Dry::Effects::Handler.Resolve + let(:input) { "literally anything" } + around do |example| provide auto_create_volumes_and_issues: true do example.run end end - it "defaults to volume 1 and issue 1" do - expect_calling_with("literally anything").to have_attributes(volume: ?1, issue: ?1).and be_known - end + it { is_expected.to be_full.and be_known } + + it { is_expected.to have_attributes(volume: ?1, issue: ?1) } end - it "handles a truncated format" do - expect_calling_with(nil, "Some Journal, vol 12, iss 103").to have_attributes(volume: "12", issue: "103", fpage: nil) + context "with unparseable input" do + let(:input) { "some random text" } + + it { is_expected.to be_unknown } end - it "handles non-matching input" do - expect_calling_with("some random text").to be_unknown + context "with a truncated format as the second argument" do + let(:input) do + [ + nil, + "Some Journal, vol 12, iss 103" + ] + end + + it { is_expected.to be_full.and be_known } + + it { is_expected.to have_attributes(volume: "12", issue: "103") } end end diff --git a/spec/services/journal_sources/parsed/full_spec.rb b/spec/services/journal_sources/parsed/full_spec.rb new file mode 100644 index 00000000..92577a7e --- /dev/null +++ b/spec/services/journal_sources/parsed/full_spec.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +RSpec.describe JournalSources::Parsed::Full, type: :parsed_journal_source do + context "with valid attributes" do + let(:volume) { "10" } + let(:issue) { "2" } + + it_behaves_like "a valid parsed journal source", :full + end + + context "with a missing volume" do + let(:volume) { nil } + let(:issue) { "3" } + + it_behaves_like "an invalid parsed journal source" + end +end diff --git a/spec/services/journal_sources/parsed/issue_only_spec.rb b/spec/services/journal_sources/parsed/issue_only_spec.rb new file mode 100644 index 00000000..e14d2202 --- /dev/null +++ b/spec/services/journal_sources/parsed/issue_only_spec.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +RSpec.describe JournalSources::Parsed::IssueOnly, type: :parsed_journal_source do + context "with valid attributes" do + let(:issue) { "10" } + let(:volume) { "2" } + + it_behaves_like "a valid parsed journal source", :issue_only + end + + context "with a missing issue" do + let(:issue) { nil } + + it_behaves_like "an invalid parsed journal source" + end +end diff --git a/spec/services/journal_sources/parsed/unknown_spec.rb b/spec/services/journal_sources/parsed/unknown_spec.rb new file mode 100644 index 00000000..199fa172 --- /dev/null +++ b/spec/services/journal_sources/parsed/unknown_spec.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +RSpec.describe JournalSources::Parsed::Unknown, type: :parsed_journal_source do + context "with no attributes provided" do + it_behaves_like "a valid parsed journal source", :unknown + end +end diff --git a/spec/services/journal_sources/parsed/volume_only_spec.rb b/spec/services/journal_sources/parsed/volume_only_spec.rb new file mode 100644 index 00000000..e09dfd14 --- /dev/null +++ b/spec/services/journal_sources/parsed/volume_only_spec.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +RSpec.describe JournalSources::Parsed::VolumeOnly, type: :parsed_journal_source do + context "with valid attributes" do + let(:volume) { "10" } + let(:issue) { "2" } + + it_behaves_like "a valid parsed journal source", :volume_only + end + + context "with a missing volume" do + let(:volume) { nil } + + it_behaves_like "an invalid parsed journal source" + end +end diff --git a/spec/services/liquid_ext/tags/if_integer_spec.rb b/spec/services/liquid_ext/tags/if_integer_spec.rb new file mode 100644 index 00000000..f0d4dc98 --- /dev/null +++ b/spec/services/liquid_ext/tags/if_integer_spec.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +RSpec.describe LiquidExt::Tags::IfInteger do + include_context "liquid tag testing" + + def augment_liquid_environment!(env) + env.register_tag "ifinteger", described_class + end + + let(:firstish) { 123 } + let(:secondish) { nil } + + let(:assigns) do + { + "firstish" => firstish, + "secondish" => secondish, + } + end + + let(:template_body) do + <<~LIQUID + {% ifinteger firstish %} + firstish is integer + {% elsifinteger secondish %} + secondish is integer + {% else %} + nothing is integer + {% endifinteger %} + LIQUID + end + + shared_examples_for "a firstish match" do + it "renders the first block" do + expect_rendering_with(assigns).to eq "firstish is integer" + + expect(template.warnings).to be_blank + expect(template.errors).to be_blank + end + end + + shared_examples_for "a secondish match" do + it "renders the second block" do + expect_rendering_with(assigns).to eq "secondish is integer" + + expect(template.warnings).to be_blank + expect(template.errors).to be_blank + end + end + + shared_examples_for "no matches" do + it "renders the else block" do + expect_rendering_with(assigns).to eq "nothing is integer" + + expect(template.warnings).to be_blank + expect(template.errors).to be_blank + end + end + + shared_examples_for "rendering tests" do + context "when firstish is an actual integer" do + let(:firstish) { 42 } + let(:secondish) { "not an integer" } + + it_behaves_like "a firstish match" + end + + context "when firstish is a string representation of an integer" do + let(:firstish) { "-7" } + + it_behaves_like "a firstish match" + end + + context "when secondish is an integer but firstish is not" do + let(:firstish) { "not an integer" } + let(:secondish) { 0 } + + it_behaves_like "a secondish match" + end + + context "when neither firstish nor secondish are integers" do + let(:firstish) { "foo" } + let(:secondish) { "bar" } + + it_behaves_like "no matches" + end + + context "when assigns are empty" do + let(:assigns) { {} } + + it_behaves_like "no matches" + end + end + + context "when in a strict environment" do + let(:environment) { strict_environment } + + include_examples "rendering tests" + end + + context "when in a lax environment" do + let(:environment) { lax_environment } + + include_examples "rendering tests" + end +end diff --git a/spec/services/liquid_ext/tags/if_present_tag_spec.rb b/spec/services/liquid_ext/tags/if_present_spec.rb similarity index 71% rename from spec/services/liquid_ext/tags/if_present_tag_spec.rb rename to spec/services/liquid_ext/tags/if_present_spec.rb index e6c4edd7..395fc7be 100644 --- a/spec/services/liquid_ext/tags/if_present_tag_spec.rb +++ b/spec/services/liquid_ext/tags/if_present_spec.rb @@ -1,16 +1,10 @@ # frozen_string_literal: true RSpec.describe LiquidExt::Tags::IfPresent do - let_it_be(:strict_environment) do - Liquid::Environment.build(error_mode: :strict) do |env| - env.register_tag "ifpresent", described_class - end - end + include_context "liquid tag testing" - let_it_be(:lax_environment) do - Liquid::Environment.build(error_mode: :lax) do |env| - env.register_tag "ifpresent", described_class - end + def augment_liquid_environment!(env) + env.register_tag "ifpresent", described_class end let_it_be(:root_drop) { LiquidTesting::Drops::RootDrop.new } @@ -21,8 +15,6 @@ } end - let(:environment) { strict_environment } - let(:template_body) do <<~LIQUID {% ifpresent root.always_blank %} @@ -43,16 +35,6 @@ LIQUID end - let(:template) do - Liquid::Template.parse(template_body, environment:) - end - - def expect_rendering_with(render_assigns) - result = template.render(render_assigns, strict_variables: true).strip - - expect(result) - end - shared_examples_for "rendering tests" do it "renders as expected with the right assigns" do expect_rendering_with(assigns).to eq "GRANDCHILD FOUND" diff --git a/spec/support/contexts/liquid_tag_testing.rb b/spec/support/contexts/liquid_tag_testing.rb new file mode 100644 index 00000000..c12cdfb4 --- /dev/null +++ b/spec/support/contexts/liquid_tag_testing.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +RSpec.shared_context "liquid tag testing" do + # @abstract + # Override in including spec + # @param [Liquid::Environment] env + # @return [void] + def augment_liquid_environment!(env); end + + let_it_be(:strict_environment) do + Liquid::Environment.build(error_mode: :strict) do |env| + augment_liquid_environment!(env) + end + end + + let_it_be(:lax_environment) do + Liquid::Environment.build(error_mode: :lax) do |env| + augment_liquid_environment!(env) + end + end + + let(:assigns) { {} } + + let(:environment) { strict_environment } + + let(:template_body) { "" } + + let(:template) do + Liquid::Template.parse(template_body, environment:) + end + + def rendering_with(render_assigns, strict_variables: true, strict_filters: true) + template.render( + render_assigns, + strict_variables:, + strict_filters:, + ).strip + end + + def expect_rendering_with(...) + expect(rendering_with(...)) + end +end diff --git a/spec/support/contexts/parsed_journal_source.rb b/spec/support/contexts/parsed_journal_source.rb new file mode 100644 index 00000000..e9893246 --- /dev/null +++ b/spec/support/contexts/parsed_journal_source.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +RSpec.shared_context "parsed journal source" do + let(:input) { nil } + let(:volume) { nil } + let(:issue) { nil } + let(:year) { nil } + let(:fpage) { nil } + let(:lpage) { nil } + + let(:parsed_attrs) do + { + volume:, + issue:, + year:, + fpage:, + lpage:, + } + end + + let(:instance) { described_class.new(**parsed_attrs) } + + subject { instance } + + shared_examples_for "a valid parsed journal source" do |mode| + it { is_expected.to be_valid } + + case mode + when :full + it { is_expected.to be_full } + it { is_expected.not_to be_issue_only } + it { is_expected.not_to be_volume_only } + when :issue_only + it { is_expected.not_to be_full } + it { is_expected.to be_issue_only } + it { is_expected.not_to be_volume_only } + when :volume_only + it { is_expected.not_to be_full } + it { is_expected.not_to be_issue_only } + it { is_expected.to be_volume_only } + else + it { is_expected.not_to be_known } + it { is_expected.to be_unknown } + end + + it "produces the expected monad" do + expect(instance.to_monad).to be_some + end + + context "when parsing its liquid drop" do + let(:drop) { instance.to_liquid } + + subject { drop } + + case mode + when :full + it { is_expected.to be_exists } + it { is_expected.to be_known } + it { is_expected.to be_full } + it { is_expected.not_to be_issue_only } + it { is_expected.not_to be_volume_only } + when :issue_only + it { is_expected.to be_exists } + it { is_expected.to be_known } + it { is_expected.not_to be_full } + it { is_expected.to be_issue_only } + it { is_expected.not_to be_volume_only } + when :volume_only + it { is_expected.to be_exists } + it { is_expected.to be_known } + it { is_expected.not_to be_full } + it { is_expected.not_to be_issue_only } + it { is_expected.to be_volume_only } + else + it { is_expected.not_to be_exists } + it { is_expected.not_to be_known } + it { is_expected.to be_unknown } + end + end + end + + shared_examples_for "an invalid parsed journal source" do + it { is_expected.to be_invalid } + it { is_expected.not_to be_known } + + it "produces the expected monad" do + expect(instance.to_monad).to be_none + end + end +end + +RSpec.configure do |rspec| + rspec.include_context "parsed journal source", type: :parsed_journal_source +end