From 64ca6c8311daa7c5b1eaa24717525ed7b4d1ac33 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Thu, 1 Mar 2018 16:19:11 -0800 Subject: [PATCH 01/15] Keep a cache of found readers, so not constantly re-initializing. --- lib/rdf/rdfa/reader.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/rdf/rdfa/reader.rb b/lib/rdf/rdfa/reader.rb index a44b0236..fef85970 100644 --- a/lib/rdf/rdfa/reader.rb +++ b/lib/rdf/rdfa/reader.rb @@ -414,7 +414,9 @@ def extract_script(el, input, type, options, &block) rescue LoadError end - if reader = RDF::Reader.for(content_type: type.to_s) + @readers ||= {} + reader = @readers[type.to_s] = RDF::Reader.for(content_type: type.to_s) unless @readers.has_key?(type.to_s) + if reader = @readers[type.to_s] add_debug(el, "=> reader #{reader.to_sym}") # Wrap input in a RemoteDocument with appropriate content-type and base doc = if input.is_a?(String) From a43c395d3ec8a5f4d858ce69cdeaa398844c38ae Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Tue, 7 Aug 2018 16:58:54 -0700 Subject: [PATCH 02/15] Remove gemspec deprecations. --- rdf-rdfa.gemspec | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/rdf-rdfa.gemspec b/rdf-rdfa.gemspec index 5d0e3e78..22948f82 100755 --- a/rdf-rdfa.gemspec +++ b/rdf-rdfa.gemspec @@ -17,7 +17,6 @@ Gem::Specification.new do |gem| gem.platform = Gem::Platform::RUBY gem.files = %w(AUTHORS README.md UNLICENSE VERSION) + Dir.glob('lib/**/*.rb') gem.require_paths = %w(lib) - gem.has_rdoc = false gem.required_ruby_version = '>= 2.2.2' gem.requirements = [] @@ -25,8 +24,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency 'rdf', '~> 3.0' gem.add_runtime_dependency 'haml', '~> 5.0' gem.add_runtime_dependency 'rdf-xsd', '~> 3.0' - #gem.add_runtime_dependency 'rdf-aggregate-repo', '~> 3.0' - gem.add_runtime_dependency 'rdf-aggregate-repo', '>= 2.2', '< 4.0' + gem.add_runtime_dependency 'rdf-aggregate-repo', '~> 3.0' gem.add_runtime_dependency 'htmlentities', '~> 4.3' #gem.add_development_dependency 'json-ld', '~> 3.0' @@ -41,8 +39,7 @@ Gem::Specification.new do |gem| gem.add_development_dependency 'rdf-tabular', '>= 2.2', '< 4.0' #gem.add_development_dependency 'rdf-rdfxml', '~> 3.0' gem.add_development_dependency 'rdf-rdfxml', '>= 2.2', '< 4.0' - #gem.add_development_dependency 'sparql', '~> 3.0' - gem.add_development_dependency 'sparql', '>= 2.2', '< 4.0' + gem.add_development_dependency 'sparql', '~> 3.0' gem.add_development_dependency 'yard' , '~> 0.9.12' gem.post_install_message = nil From e6d38f2b270ebf04a7a0ac60d44aa9b7c43b97e7 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 11 Aug 2018 14:57:27 -0700 Subject: [PATCH 03/15] Remove gemspec deprecations. --- rdf-rdfa.gemspec | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rdf-rdfa.gemspec b/rdf-rdfa.gemspec index 22948f82..d04b392f 100755 --- a/rdf-rdfa.gemspec +++ b/rdf-rdfa.gemspec @@ -24,7 +24,8 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency 'rdf', '~> 3.0' gem.add_runtime_dependency 'haml', '~> 5.0' gem.add_runtime_dependency 'rdf-xsd', '~> 3.0' - gem.add_runtime_dependency 'rdf-aggregate-repo', '~> 3.0' + #gem.add_runtime_dependency 'rdf-aggregate-repo', '~> 3.0' + gem.add_runtime_dependency 'rdf-aggregate-repo', '>= 2.2', '< 4.0' gem.add_runtime_dependency 'htmlentities', '~> 4.3' #gem.add_development_dependency 'json-ld', '~> 3.0' From 408dbc5627da53e9ae527528a121acc5461336f1 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 20 Aug 2018 17:50:33 -0700 Subject: [PATCH 04/15] Update language and base attribute detection. --- lib/rdf/rdfa/reader/nokogiri.rb | 14 +++++--------- lib/rdf/rdfa/reader/rexml.rb | 10 +++++----- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/lib/rdf/rdfa/reader/nokogiri.rb b/lib/rdf/rdfa/reader/nokogiri.rb index 2ac2bded..16dec850 100644 --- a/lib/rdf/rdfa/reader/nokogiri.rb +++ b/lib/rdf/rdfa/reader/nokogiri.rb @@ -35,12 +35,10 @@ def initialize(node, parent = nil) # @return [String] def language language = case - when @node.document.is_a?(::Nokogiri::HTML::Document) && @node.attributes["xml:lang"] - @node.attributes["xml:lang"].to_s - when @node.document.is_a?(::Nokogiri::HTML::Document) && @node.attributes["lang"] - @node.attributes["lang"].to_s when @node.attribute_with_ns("lang", RDF::XML.to_s) @node.attribute_with_ns("lang", RDF::XML.to_s) + when @node.attribute("xml:lang") + @node.attribute("xml:lang").to_s when @node.attribute("lang") @node.attribute("lang").to_s end @@ -51,7 +49,7 @@ def language # # @return [String] def base - @node.attribute_with_ns("base", RDF::XML.to_s) + @node.attribute_with_ns("base", RDF::XML.to_s) || @node.attribute('xml:base') end def display_path @@ -202,8 +200,6 @@ def detect_host_language_version(input, options) doc_type_string = input.children.detect {|c| c.is_a?(::Nokogiri::XML::DTD)} version_attr = input.root && input.root.attribute("version").to_s root_element = input.root.name.downcase - root_namespace = input.root.namespace.to_s - root_attrs = input.root.attributes content_type = case when root_element == "html" && input.is_a?(::Nokogiri::HTML::Document) "text/html" @@ -226,7 +222,7 @@ def detect_host_language_version(input, options) doc_type_string = head.match(%r(]*>)m).to_s root = head.match(%r(<[^!\?>]*>)m).to_s root_element = root.match(%r(^<(\S+)[ >])) ? $1 : "" - version_attr = root.match(/version\s+=\s+(\S+)[\s">]/m) ? $1 : "" + version_attr = root.match(/version\s*=\s*"([^"]+)"/m) ? $1 : "" head_element = head.match(%r()mi) head_doc = ::Nokogiri::HTML.parse(head_element.to_s) @@ -308,7 +304,7 @@ def doc_base(base) base_el = @doc.at_css("html>head>base") base = base.join(base_el.attribute("href").to_s.split("#").first) if base_el else - xml_base = root.attribute_with_ns("base", RDF::XML.to_s) if root + xml_base = root.attribute_with_ns("base", RDF::XML.to_s) || root.attribute('xml:base') if root base = base.join(xml_base) if xml_base end diff --git a/lib/rdf/rdfa/reader/rexml.rb b/lib/rdf/rdfa/reader/rexml.rb index cf317739..43c701c8 100644 --- a/lib/rdf/rdfa/reader/rexml.rb +++ b/lib/rdf/rdfa/reader/rexml.rb @@ -39,6 +39,8 @@ def language language = case when @node.attribute("lang", RDF::XML.to_s) @node.attribute("lang", RDF::XML.to_s) + when @node.attribute("xml:lang") + @node.attribute("xml:lang").to_s when @node.attribute("lang") @node.attribute("lang").to_s end @@ -49,7 +51,7 @@ def language # # @return [String] def base - @node.attribute("base", RDF::XML.to_s) + @node.attribute("base", RDF::XML.to_s) || @node.attribute('xml:base') end def display_path @@ -238,8 +240,6 @@ def detect_host_language_version(input, options) doc_type_string = input.doctype.to_s version_attr = input.root && input.root.attribute("version").to_s root_element = input.root.name.downcase - root_namespace = input.root.namespace.to_s - root_attrs = input.root.attributes content_type = "application/xhtml+html" # FIXME: what about other possible XML types? else content_type = input.content_type if input.respond_to?(:content_type) @@ -257,7 +257,7 @@ def detect_host_language_version(input, options) doc_type_string = head.match(%r(]*>)m).to_s root = head.match(%r(<[^!\?>]*>)m).to_s root_element = root.match(%r(^<(\S+)[ >])) ? $1 : "" - version_attr = root.match(/version\s+=\s+(\S+)[\s">]/m) ? $1 : "" + version_attr = root.match(/version\s*=\s*"([^"]+)"/m) ? $1 : "" head_element = head.match(%r()mi) head_doc = ::REXML::Document.new(head_element.to_s) @@ -331,7 +331,7 @@ def doc_base(base) base_el = ::REXML::XPath.first(@doc, "/html/head/base") rescue nil base = base.join(base_el.attribute("href").to_s.split("#").first) if base_el else - xml_base = root.attribute("base", RDF::XML.to_s) if root + xml_base = root.attribute("base", RDF::XML.to_s) || root.attribute('xml:base') if root base = base.join(xml_base) if xml_base end From c0ab882a5ebe4bec2b26ddc189c97e2ab1999374 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 20 Aug 2018 17:51:35 -0700 Subject: [PATCH 05/15] Do script extraction from within traverse so that base (and potentially language) can be passed in as an option. --- lib/rdf/rdfa/reader.rb | 89 ++++++++++++++++++------------------- spec/reader_spec.rb | 99 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 141 insertions(+), 47 deletions(-) diff --git a/lib/rdf/rdfa/reader.rb b/lib/rdf/rdfa/reader.rb index fef85970..b36fce77 100644 --- a/lib/rdf/rdfa/reader.rb +++ b/lib/rdf/rdfa/reader.rb @@ -381,6 +381,38 @@ def initialize(input = $stdin, options = {}, &block) end end + ## + # Extracts RDF from script element, or embeded RDF/XML + def extract_script(el, input, type, **options, &block) + add_debug(el, "script element of type #{type}") + begin + # Formats don't exist unless they've been required + case type.to_s + when 'application/csvm+json' then require 'rdf/tabular' + when 'application/ld+json' then require 'json/ld' + when 'application/rdf+xml' then require 'rdf/rdfxml' + when 'text/ntriples' then require 'rdf/ntriples' + when 'text/turtle' then require 'rdf/turtle' + end + rescue LoadError + end + + @readers ||= {} + reader = @readers[type.to_s] = RDF::Reader.for(content_type: type.to_s) unless @readers.has_key?(type.to_s) + if reader = @readers[type.to_s] + add_debug(el, "=> reader #{reader.to_sym}") + # Wrap input in a RemoteDocument with appropriate content-type and base + doc = if input.is_a?(String) + RDF::Util::File::RemoteDocument.new(input, content_type: type.to_s, **options) + else + input + end + reader.new(doc, options).each(&block) + else + add_debug(el, "=> no reader found") + end + end + ## # Iterates the given block for each RDF statement in the input. # @@ -400,54 +432,9 @@ def each_statement(&block) # parse parse_whole_document(@doc, RDF::URI(base_uri)) - def extract_script(el, input, type, options, &block) - add_debug(el, "script element of type #{type}") - begin - # Formats don't exist unless they've been required - case type.to_s - when 'application/csvm+json' then require 'rdf/tabular' - when 'application/ld+json' then require 'json/ld' - when 'application/rdf+xml' then require 'rdf/rdfxml' - when 'text/ntriples' then require 'rdf/ntriples' - when 'text/turtle' then require 'rdf/turtle' - end - rescue LoadError - end - - @readers ||= {} - reader = @readers[type.to_s] = RDF::Reader.for(content_type: type.to_s) unless @readers.has_key?(type.to_s) - if reader = @readers[type.to_s] - add_debug(el, "=> reader #{reader.to_sym}") - # Wrap input in a RemoteDocument with appropriate content-type and base - doc = if input.is_a?(String) - RDF::Util::File::RemoteDocument.new(input, - options.merge( - content_type: type.to_s, - base_uri: base_uri - )) - else - input - end - reader.new(doc, options).each(&block) - else - add_debug(el, "=> no reader found") - end - end - # Look for Embedded RDF/XML unless @root.xpath("//rdf:RDF", "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#").empty? - extract_script(@root, @doc, "application/rdf+xml", @options) do |statement| - @repository << statement - end - end - - # Look for Embedded scripts - @root.css("script[type]").each do |el| - type = el.attribute("type") - - text = el.inner_html.sub(%r(\A\s*\\s*\Z)m, '') - - extract_script(el, text, type, @options) do |statement| + extract_script(@root, @doc, "application/rdf+xml", @options.merge(base_uri: base_uri)) do |statement| @repository << statement end end @@ -777,6 +764,7 @@ def traverse(element, evaluation_context) rev role src + type typeof value vocab @@ -832,6 +820,15 @@ def traverse(element, evaluation_context) language = nil if language.to_s.empty? add_debug(element) {"HTML5 [3.2.3.3] lang: #{language.inspect}"} if language + # Embedded scripts + if element.name == 'script' + text = element.inner_html.sub(%r(\A\s*\\s*\Z)m, '') + + extract_script(element, text, attrs[:type], @options.merge(base_uri: base)) do |statement| + @repository << statement + end + end + # From HTML5, if the property attribute and the rel and/or rev attribute exists on the same element, the non-CURIE and non-URI rel and rev values are ignored. If, after this, the value of rel and/or rev becomes empty, then the processor must act as if the respective attribute is not present. if [:html5, :xhtml5].include?(@host_language) && attrs[:property] && (attrs[:rel] || attrs[:rev]) old_rel, old_rev = attrs[:rel], attrs[:rev] diff --git a/spec/reader_spec.rb b/spec/reader_spec.rb index 36075246..2852cde3 100644 --- a/spec/reader_spec.rb +++ b/spec/reader_spec.rb @@ -1681,6 +1681,103 @@ gr:hasLegalName "Hepp Industries Ltd."^^xsd:string . ) ], + "application/ld+json with doc base" => [ + %q( + + ), + %q( + @prefix foo: . + @prefix gr: . + @prefix xsd: . + @prefix rdfs: . + + + a gr:BusinessEntity ; + rdfs:seeAlso ; + gr:hasLegalName "Hepp Industries Ltd."^^xsd:string . + ) + ], + "application/ld+json with base element" => [ + %q( + + + + + + + ), + %q( + @prefix foo: . + @prefix gr: . + @prefix xsd: . + @prefix rdfs: . + + + a gr:BusinessEntity ; + rdfs:seeAlso ; + gr:hasLegalName "Hepp Industries Ltd."^^xsd:string . + ) + ], + "application/ld+json with @xml:base" => [ + %q( + + + + + + + ), + %q( + @prefix foo: . + @prefix gr: . + @prefix xsd: . + @prefix rdfs: . + + + a gr:BusinessEntity ; + rdfs:seeAlso ; + gr:hasLegalName "Hepp Industries Ltd."^^xsd:string . + ) + ], "application/ld+json with junk" => [ %q( + ), @@ -2662,9 +2662,9 @@ end end - def parse(input, options = {}) + def parse(input, **options) graph = RDF::Graph.new - RDF::RDFa::Reader.new(input, options.merge(logger: logger, library: @library)).each do |statement| + RDF::RDFa::Reader.new(input, logger: logger, library: @library, **options).each do |statement| graph << statement rescue fail "SPEC: #{$!}" end graph diff --git a/spec/suite_helper.rb b/spec/suite_helper.rb index 950d09a5..f39b5817 100644 --- a/spec/suite_helper.rb +++ b/spec/suite_helper.rb @@ -22,7 +22,7 @@ class << self # HTTP Request headers. # @return [IO] File stream # @yield [IO] File stream - def self.open_file(filename_or_url, options = {}, &block) + def self.open_file(filename_or_url, **options, &block) case when filename_or_url.to_s =~ /^file:/ path = filename_or_url[5..-1] @@ -57,14 +57,14 @@ def self.open_file(filename_or_url, options = {}, &block) # For overriding content type from test data document_options[:headers][:content_type] = options[:contentType] if options[:contentType] - remote_document = RDF::Util::File::RemoteDocument.new(response.read, document_options) + remote_document = RDF::Util::File::RemoteDocument.new(response.read, **document_options) if block_given? yield remote_document else remote_document end else - original_open_file(filename_or_url, options, &block) + original_open_file(filename_or_url, **options, &block) end end end diff --git a/spec/suite_spec.rb b/spec/suite_spec.rb index 7e46cfc5..a7dde27b 100644 --- a/spec/suite_spec.rb +++ b/spec/suite_spec.rb @@ -32,7 +32,7 @@ validate = %w(0239 0279 0295 0284).none? {|n| t.input(host_language, version).to_s.include?(n)} graph = RDF::Repository.new - RDF::Reader.open(t.input(host_language, version), options.merge(validate: validate)) do |reader| + RDF::Reader.open(t.input(host_language, version), **options.merge(validate: validate)) do |reader| expect(reader).to be_a RDF::RDFa::Reader # Some allowances for REXML diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index 4c804c74..b94bd04b 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -564,7 +564,7 @@ class EX < RDF::Vocabulary("http://example/"); end logger.info result.force_encoding("utf-8") graph2 = parse(result, format: :rdfa, logger: logger) # Need to put this in to avoid problems with added markup - statements = graph2.query(object: RDF::URI("http://rdf.kellogg-assoc.com/css/distiller.css")).to_a + statements = graph2.query({object: RDF::URI("http://rdf.kellogg-assoc.com/css/distiller.css")}).to_a statements.each {|st| graph2.delete(st)} #puts graph2.dump(:ttl) expect(graph2).to be_equivalent_graph(@graph, logger: logger) @@ -575,20 +575,20 @@ class EX < RDF::Vocabulary("http://example/"); end end unless ENV['CI'] # Not for continuous integration require 'rdf/turtle' - def parse(input, options = {}) + def parse(input, **options) reader_class = RDF::Reader.for(options[:format]) if options[:format] reader_class ||= options.fetch(:reader, RDF::Reader.for(detect_format(input))) graph = RDF::Repository.new - reader_class.new(input, options).each do |statement| + reader_class.new(input, **options).each do |statement| graph << statement end graph end # Serialize @graph to a string and compare against regexps - def serialize(options = {}) - result = RDF::RDFa::Writer.buffer({logger: logger, standard_prefixes: true}.merge(options)) do |writer| + def serialize(**options) + result = RDF::RDFa::Writer.buffer(logger: logger, standard_prefixes: true, **options) do |writer| writer << @graph end require 'cgi' From d0647b943589e0db38776eb9f72e194208438ec1 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 14 Dec 2019 10:39:57 -0800 Subject: [PATCH 14/15] Fix bundler args in travis. --- .travis.yml | 1 - Gemfile | 4 ++-- spec/suite_helper.rb | 12 ++++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2b531323..55c863b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,4 @@ language: ruby -bundler_args: --without debug script: "bundle exec rspec spec" env: - CI=true diff --git a/Gemfile b/Gemfile index 3195fd90..2b3781a6 100644 --- a/Gemfile +++ b/Gemfile @@ -25,8 +25,8 @@ group :development, :test do end group :test do - gem 'simplecov', require: false - gem 'coveralls', require: false + gem 'simplecov', platforms: :mri + gem 'coveralls', '~> 0.8', platforms: :mri gem 'psych', platforms: [:mri, :rbx] end diff --git a/spec/suite_helper.rb b/spec/suite_helper.rb index f39b5817..4d75f853 100644 --- a/spec/suite_helper.rb +++ b/spec/suite_helper.rb @@ -45,12 +45,12 @@ def self.open_file(filename_or_url, **options, &block) document_options[:headers][:content_type] = case filename_or_url.to_s when /\.html$/ then 'text/html' when /\.xhtml$/ then 'application/xhtml+xml' - when /\.xml$/ then 'application/xml' - when /\.svg$/ then 'image/svg+xml' - when /\.ttl$/ then 'text/turtle' - when /\.ttl$/ then 'text/turtle' - when /\.jsonld$/ then 'application/ld+json' - else 'unknown' + when /\.xml$/ then 'application/xml' + when /\.svg$/ then 'image/svg+xml' + when /\.ttl$/ then 'text/turtle' + when /\.ttl$/ then 'text/turtle' + when /\.jsonld$/ then 'application/ld+json' + else 'unknown' end document_options[:headers][:content_type] = response.content_type if response.respond_to?(:content_type) From 8b6dab15e0574a26b0f2858d2725ee01a6be1800 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 16 Dec 2019 13:31:56 -0800 Subject: [PATCH 15/15] Update dependencies. --- rdf-rdfa.gemspec | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/rdf-rdfa.gemspec b/rdf-rdfa.gemspec index dc55b932..96540ace 100755 --- a/rdf-rdfa.gemspec +++ b/rdf-rdfa.gemspec @@ -23,22 +23,20 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency 'rdf', '~> 3.1' gem.add_runtime_dependency 'haml', '~> 5.1' - gem.add_runtime_dependency 'rdf-xsd', '~> 3.0' + gem.add_runtime_dependency 'rdf-xsd', '~> 3.1' gem.add_runtime_dependency 'rdf-aggregate-repo', '~> 3.1' gem.add_runtime_dependency 'htmlentities', '~> 4.3' - gem.add_development_dependency 'json-ld', '>= 3.1' + gem.add_development_dependency 'json-ld', '~> 3.1' gem.add_development_dependency 'rspec', '~> 3.9' gem.add_development_dependency 'rspec-its', '~> 1.3' gem.add_development_dependency 'rdf-spec', '~> 3.1' gem.add_development_dependency 'rdf-turtle', '~> 3.1' - gem.add_development_dependency 'rdf-vocab', '~> 3.0' - gem.add_development_dependency 'rdf-isomorphic', '~> 3.0' - #gem.add_development_dependency 'rdf-tabular', '~> 3.0' - gem.add_development_dependency 'rdf-tabular', '>= 2.2', '< 4.0' - #gem.add_development_dependency 'rdf-rdfxml', '~> 3.0' - gem.add_development_dependency 'rdf-rdfxml', '>= 2.2', '< 4.0' - gem.add_development_dependency 'sparql', '~> 3.0' + gem.add_development_dependency 'rdf-vocab', '~> 3.1' + gem.add_development_dependency 'rdf-isomorphic', '~> 3.1' + gem.add_development_dependency 'rdf-tabular', '~> 3.1' + gem.add_development_dependency 'rdf-rdfxml', '~> 3.1' + gem.add_development_dependency 'sparql', '~> 3.1' gem.add_development_dependency 'yard' , '~> 0.9.20' gem.post_install_message = nil