diff --git a/.travis.yml b/.travis.yml index 9e1f468..83a126b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,18 +1,16 @@ language: ruby -bundler_args: --without debug script: "bundle exec rspec spec" env: - CI=true rvm: - - 2.2 - - 2.3 - 2.4 - - jruby-9 - - rbx-3 + - 2.5 + - 2.6 + - 2.7 + - jruby cache: bundler sudo: false matrix: allow_failures: - - rvm: jruby-9 - - rvm: rbx-3 + - rvm: jruby dist: trusty diff --git a/Gemfile b/Gemfile index 10db0a6..c4cd63b 100644 --- a/Gemfile +++ b/Gemfile @@ -5,11 +5,12 @@ gemspec gem "rdf", git: "https://github.com/ruby-rdf/rdf", branch: "develop" gem "rdf-rdfa", git: "https://github.com/ruby-rdf/rdf-rdfa", branch: "develop" gem "rdf-xsd", git: "https://github.com/ruby-rdf/rdf-xsd", branch: "develop" -gem "nokogumbo", '~> 1.4' +gem "nokogumbo", '~> 2.0' group :development do - gem 'linkeddata' - gem 'ebnf', git: "https://github.com/gkellogg/ebnf", branch: "develop" + gem "json-ld", git: "https://github.com/ruby-rdf/json-ld", branch: "develop" + #gem 'linkeddata' + gem 'ebnf', git: "https://github.com/dryruby/ebnf", branch: "develop" gem 'rdf-aggregate-repo', git: "https://github.com/ruby-rdf/rdf-aggregate-repo", branch: "develop" gem 'rdf-isomorphic', git: "https://github.com/ruby-rdf/rdf-isomorphic", branch: "develop" gem "rdf-spec", git: "https://github.com/ruby-rdf/rdf-spec", branch: "develop" @@ -20,9 +21,3 @@ end group :debug do gem "byebug", platform: :mri end - -platforms :rbx do - gem 'rubysl', '~> 2.0' - gem 'rubinius', '~> 2.0' - gem 'json' -end diff --git a/README.md b/README.md index c8d5da1..7f95f2d 100755 --- a/README.md +++ b/README.md @@ -45,11 +45,11 @@ GRDDL-type triple generation, such as for html>head>title anchor tags. If the `RDFa` parser is available, {RDF::Microdata::Format} will not assert content type `text/html` or file extension `.html`, as this is also asserted by RDFa. Instead, the RDFa reader will invoke the microdata reader if an `@itemscope` attribute is detected. ## Dependencies -* [RDF.rb](http://rubygems.org/gems/rdf) (>= 2.0) -* [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>= 2.0) +* [RDF.rb](http://rubygems.org/gems/rdf) (>= 3.1) +* [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>= 3.1) * [HTMLEntities](https://rubygems.org/gems/htmlentities) ('>= 4.3.0') -* [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.7.1) -* Soft dependency on [Nokogumbo](https://github.com/rubys/nokogumbo) (>= 1.4.10) +* [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.10) +* Soft dependency on [Nokogumbo](https://github.com/rubys/nokogumbo) (~> 2.0) ## Documentation Full documentation available on [Rubydoc.info][Microdata doc] @@ -68,13 +68,6 @@ use {RDF::Microdata::RdfaReader} directly. The reader exposes a `#rdfa` method, which can be used to retrieve the transformed HTML+RDFa -### JSON-lD-based Reader -There is an experimental reader based on transforming Microdata to JSON-LD. To invoke -this, add the `jsonld: true` option to the {RDF::Microdata::Reader.new}, or -use {RDF::Microdata::JsonLdReader} directly. - -The reader exposes a `#json` method, which can be used to retrieve the generated JSON-LD - ## Resources * [RDF.rb][RDF.rb] * [Documentation](http://www.rubydoc.info/github/ruby-rdf/rdf-microdata/) diff --git a/VERSION b/VERSION index 5859406..fd2a018 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.2.3 +3.1.0 diff --git a/lib/rdf/microdata.rb b/lib/rdf/microdata.rb index 98b386b..c74bfd4 100644 --- a/lib/rdf/microdata.rb +++ b/lib/rdf/microdata.rb @@ -26,7 +26,6 @@ module Microdata require 'rdf/microdata/format' require 'rdf/microdata/vocab' autoload :Expansion, 'rdf/microdata/expansion' - autoload :JsonLdReader, 'rdf/microdata/jsonld_reader' autoload :Profile, 'rdf/microdata/profile' autoload :RdfaReader, 'rdf/microdata/rdfa_reader' autoload :Reader, 'rdf/microdata/reader' diff --git a/lib/rdf/microdata/expansion.rb b/lib/rdf/microdata/expansion.rb index 93efe13..ad05ff4 100644 --- a/lib/rdf/microdata/expansion.rb +++ b/lib/rdf/microdata/expansion.rb @@ -26,7 +26,6 @@ def expand repo = RDF::Repository.new repo << self # Add default graph - count = repo.count log_debug("expand") {"Loaded #{repo.size} triples into default graph"} repo = owl_entailment(repo) @@ -38,7 +37,7 @@ def expand end def rule(name, &block) - Rule.new(name, @options, block) + Rule.new(name, **@options, &block) end ## @@ -72,7 +71,7 @@ class Rule # r.execute(queryable) {|statement| puts statement.inspect} # # @param [String] name - def initialize(name, options = {}, &block) + def initialize(name, **options, &block) @antecedents = [] @consequents = [] @options = options.dup diff --git a/lib/rdf/microdata/format.rb b/lib/rdf/microdata/format.rb index 722da35..ee7211c 100644 --- a/lib/rdf/microdata/format.rb +++ b/lib/rdf/microdata/format.rb @@ -55,7 +55,7 @@ def self.cli_commands format: :microdata }, option_use: {output_format: :disabled}, - lambda: ->(files, options) do + lambda: ->(files, **options) do out = options[:output] || $stdout xsl = Nokogiri::XSLT(%( @@ -107,7 +107,7 @@ def self.cli_commands # If files are empty, either use options[::evaluate] input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN input.set_encoding(options.fetch(:encoding, Encoding::UTF_8)) - RDF::Microdata::Reader.new(input, options.merge(rdfa: true)) do |reader| + RDF::Microdata::Reader.new(input, **options.merge(rdfa: true)) do |reader| reader.rdfa.xpath("//text()").each do |txt| txt.content = txt.content.to_s.strip end @@ -115,7 +115,7 @@ def self.cli_commands end else files.each do |file| - RDF::Microdata::Reader.open(file, options.merge(rdfa: true)) do |reader| + RDF::Microdata::Reader.open(file, **options.merge(rdfa: true)) do |reader| reader.rdfa.xpath("//text()").each do |txt| txt.content = txt.content.to_s.strip end @@ -125,32 +125,6 @@ def self.cli_commands end end }, - "to-jsonld": { - description: "Transform HTML+Microdata into JSON-LD", - parse: false, - help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD", - filter: { - format: :microdata - }, - option_use: {output_format: :disabled}, - lambda: ->(files, options) do - out = options[:output] || $stdout - if files.empty? - # If files are empty, either use options[::evaluate] - input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN - input.set_encoding(options.fetch(:encoding, Encoding::UTF_8)) - RDF::Microdata::Reader.new(input, options.merge(jsonld: true)) do |reader| - out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE) - end - else - files.each do |file| - RDF::Microdata::Reader.open(file, options.merge(jsonld: true)) do |reader| - out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE) - end - end - end - end - }, } end end diff --git a/lib/rdf/microdata/jsonld_reader.rb b/lib/rdf/microdata/jsonld_reader.rb deleted file mode 100644 index c11427d..0000000 --- a/lib/rdf/microdata/jsonld_reader.rb +++ /dev/null @@ -1,251 +0,0 @@ -require 'json/ld' -require 'nokogumbo' - -module RDF::Microdata - ## - # Update DOM to turn Microdata into JSON-LD and parse using the JSON-LD Reader - class JsonLdReader < JSON::LD::Reader - # The resulting JSON-LD - # @return [Hash] - attr_reader :jsonld - - def self.format(klass = nil) - if klass.nil? - RDF::Microdata::Format - else - super - end - end - - ## - # Initializes the JsonLdReader instance. - # - # @param [IO, File, String] input - # the input stream to read - # @param [Hash{Symbol => Object}] options - # any additional options (see `RDF::Reader#initialize`) - # @return [reader] - # @yield [reader] `self` - # @yieldparam [RDF::Reader] reader - # @yieldreturn [void] ignored - # @raise [RDF::ReaderError] if _validate_ - def initialize(input = $stdin, options = {}, &block) - @options = options - log_info('', "using JSON-LD transformation reader") - - input = case input - when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input - else - # Try to detect charset from input - options[:encoding] ||= input.charset if input.respond_to?(:charset) - - # Otherwise, default is utf-8 - options[:encoding] ||= 'utf-8' - options[:encoding] = options[:encoding].to_s if options[:encoding] - input = input.read if input.respond_to?(:read) - ::Nokogiri::HTML5(input.force_encoding(options[:encoding])) - end - - # Load registry - begin - registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY - log_debug('', "registry = #{registry_uri.inspect}") - Registry.load_registry(registry_uri) - rescue JSON::ParserError => e - log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?) - end - - @jsonld = {'@graph' => []} - - # Start with all top-level items - input.css("[itemscope]").each do |item| - next if item['itemprop'] # Only top-level items - jsonld['@graph'] << get_object(item) - end - - log_debug('', "Transformed document: #{jsonld.to_json(JSON::LD::JSON_STATE)}") - - # Rely on RDFa reader - super(jsonld.to_json, options, &block) - end - - private - # Return JSON-LD representation of an item - # @param [Nokogiri::XML::Element] item - # @param [Hash{Nokogiri::XML::Node => Hash}] - # @return [Hash] - def get_object(item, memory = {}) - if result = memory[item] - # Result is a reference to that item; assign a blank-node identifier if necessary - result['@id'] ||= alloc_bnode - return result - end - - result = {} - memory[item] = result - - # If the item has a global identifier, add an entry to result called "@id" whose value is the global identifier of item. - result['@id'] = item['itemid'].to_s if item['itemid'] - - # If the item has any item types, add an entry to result called "@type" whose value is an array listing the item types of item, in the order they were specified on the itemtype attribute. - if item['itemtype'] - # Only absolute URLs - types = item.attribute('itemtype'). - remove. - to_s. - split(/\s+/). - select {|t| RDF::URI(t).absolute?} - if vocab = types.first - vocab = Registry.find(vocab) || begin - type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil? - Registry.new(type_vocab) if type_vocab - end - (result['@context'] = {})['@vocab'] = vocab.uri.to_s if vocab - result['@type'] = types unless types.empty? - end - end - - # For each element element that has one or more property names and is one of the properties of the item item, in the order those elements are given by the algorithm that returns the properties of an item, run the following substeps - item_properties(item).each do |element| - value = if element['itemscope'] - get_object(element, memory) - else - property_value(element) - end - element['itemprop'].to_s.split(/\s+/).each do |prop| - result[prop] ||= [] << value - end - end - - result - end - - ## - # - # @param [Nokogiri::XML::Element] item - # @return [Array] - # List of property elements for an item - def item_properties(item) - results, memory, pending = [], [item], item.children.select(&:element?) - log_debug(item, "item_properties") - - # If root has an itemref attribute, split the value of that itemref attribute on spaces. For each resulting token ID, if there is an element in the document whose ID is ID, then add the first such element to pending. - item['itemref'].to_s.split(/\s+/).each do |ref| - if referenced = referenced = item.at_css("##{ref}") - pending << referenced - end - end - - while !pending.empty? - current = pending.shift - # Error - break if memory.include?(current) - memory << current - - # If current does not have an itemscope attribute, then: add all the child elements of current to pending. - pending += current.children.select(&:element?) unless current['itemscope'] - - # If current has an itemprop attribute specified and has one or more property names, then add current to results. - results << current unless current['itemprop'].to_s.split(/\s+/).empty? - end - - results - end - - ## - # - def property_value(element) - base = element.base || base_uri - log_debug(element) {"property_value(#{element.name}): base #{base.inspect}"} - value = case - when element.has_attribute?('itemscope') - {} - when element.has_attribute?('content') - if element.language - {"@value" => element['content'].to_s.strip, language: element.language} - else - element['content'].to_s.strip - end - when %w(data meter).include?(element.name) && element.attribute('value') - # XXX parse as number? - {"@value" => element['value'].to_s.strip} - when %w(audio embed iframe img source track video).include?(element.name) - {"@id" => uri(element.attribute('src'), base).to_s} - when %w(a area link).include?(element.name) - {"@id" => uri(element.attribute('href'), base).to_s} - when %w(object).include?(element.name) - {"@id" => uri(element.attribute('data'), base).to_s} - when %w(time).include?(element.name) - # use datatype? - (element.attribute('datetime') || element.text).to_s.strip - else - if element.language - {"@value" => element.inner_text.to_s.strip, language: element.language} - else - element.inner_text.to_s.strip - end - end - log_debug(element) {" #{value.inspect}"} - value - end - - # Allocate a new blank node identifier - # @return [String] - def alloc_bnode - @bnode_base ||= "_:a" - res = @bnode_base - @bnode_base = res.succ - res - end - - # Fixme, what about xml:base relative to element? - def uri(value, base = nil) - value = if base - base = uri(base) unless base.is_a?(RDF::URI) - base.join(value.to_s) - else - RDF::URI(value.to_s) - end - value.validate! if validate? - value.canonicalize! if canonicalize? - value = RDF::URI.intern(value) if intern? - value - end - end -end - -# Monkey Patch Nokogiri -module Nokogiri::XML - class Element - - ## - # Get any xml:base in effect for this element - def base - if @base.nil? - @base = attributes['xml:base'] || - (parent && parent.element? && parent.base) || - false - end - - @base == false ? nil : @base - end - - - ## - # Get any xml:lang or lang in effect for this element - def language - if @language.nil? - language = case - when self["xml:lang"] - self["xml:lang"].to_s - when self["lang"] - self["lang"].to_s - else - parent && parent.element? && parent.language - end - end - @language == false ? nil : @language - end - - end -end diff --git a/lib/rdf/microdata/rdfa_reader.rb b/lib/rdf/microdata/rdfa_reader.rb index d58d39a..3157af9 100644 --- a/lib/rdf/microdata/rdfa_reader.rb +++ b/lib/rdf/microdata/rdfa_reader.rb @@ -29,7 +29,7 @@ def self.format(klass = nil) # @yieldparam [RDF::Reader] reader # @yieldreturn [void] ignored # @raise [RDF::ReaderError] if _validate_ - def initialize(input = $stdin, options = {}, &block) + def initialize(input = $stdin, **options, &block) @options = options log_debug('', "using RDFa transformation reader") @@ -46,15 +46,6 @@ def initialize(input = $stdin, options = {}, &block) ::Nokogiri::HTML5(input.force_encoding(options[:encoding])) end - # Load registry - begin - registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY - log_debug('', "registry = #{registry_uri.inspect}") - Registry.load_registry(registry_uri) - rescue JSON::ParserError => e - log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?) - end - # For all members having @itemscope input.css("[itemscope]").each do |item| # Get @itemtypes to create @type and @vocab @@ -69,8 +60,8 @@ def initialize(input = $stdin, options = {}, &block) item['typeof'] = types.join(' ') unless types.empty? if vocab = types.first - vocab = Registry.find(vocab) || begin - type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil? + vocab = begin + type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') Registry.new(type_vocab) if type_vocab end item['vocab'] = vocab.uri.to_s if vocab @@ -124,7 +115,7 @@ def initialize(input = $stdin, options = {}, &block) version: :"rdfa1.1") # Rely on RDFa reader - super(input, options, &block) + super(input, **options, &block) end end end \ No newline at end of file diff --git a/lib/rdf/microdata/reader.rb b/lib/rdf/microdata/reader.rb index 4c1ee6a..06e3151 100644 --- a/lib/rdf/microdata/reader.rb +++ b/lib/rdf/microdata/reader.rb @@ -54,7 +54,7 @@ def self.options # Redirect for RDFa Reader given `:rdfa` option # # @private - def self.new(input = nil, options = {}, &block) + def self.new(input = nil, **options, &block) klass = if options[:rdfa] # Requires rdf-rdfa gem to be loaded begin @@ -63,19 +63,11 @@ def self.new(input = nil, options = {}, &block) raise ReaderError, "Use of RDFa-based reader requires rdf-rdfa gem" end RdfaReader - elsif options[:jsonld] - # Requires rdf-rdfa gem to be loaded - begin - require 'json/ld' - rescue LoadError - raise ReaderError, "Use of JSON-LD-based reader requires json-ld gem" - end - JsonLdReader else self end reader = klass.allocate - reader.send(:initialize, input, options, &block) + reader.send(:initialize, input, **options, &block) reader end @@ -102,7 +94,7 @@ def self.new(input = nil, options = {}, &block) # @yieldparam [RDF::Reader] reader # @yieldreturn [void] ignored # @raise [Error] Raises `RDF::ReaderError` when validating - def initialize(input = $stdin, options = {}, &block) + def initialize(input = $stdin, **options, &block) super do @library = :nokogiri @@ -111,7 +103,7 @@ def initialize(input = $stdin, options = {}, &block) self.extend(@implementation) input.rewind if input.respond_to?(:rewind) - initialize_html(input, options) rescue log_fatal($!.message, exception: RDF::ReaderError) + initialize_html(input, **options) rescue log_fatal($!.message, exception: RDF::ReaderError) log_error("Empty document") if root.nil? log_error(doc_errors.map(&:message).uniq.join("\n")) if !doc_errors.empty? diff --git a/lib/rdf/microdata/reader/nokogiri.rb b/lib/rdf/microdata/reader/nokogiri.rb index f148516..3b7cabb 100644 --- a/lib/rdf/microdata/reader/nokogiri.rb +++ b/lib/rdf/microdata/reader/nokogiri.rb @@ -178,7 +178,7 @@ def method_missing(method, *args) # # @param [Hash{Symbol => Object}] options # @return [void] - def initialize_html(input, options = {}) + def initialize_html(input, **options) require 'nokogiri' unless defined?(::Nokogiri) @doc = case input when ::Nokogiri::XML::Document @@ -194,7 +194,7 @@ def initialize_html(input, options = {}) begin require 'nokogumbo' unless defined?(::Nokogumbo) input = input.read if input.respond_to?(:read) - ::Nokogiri::HTML5(input.force_encoding(options[:encoding])) + ::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000) rescue LoadError ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding]) end diff --git a/rdf-microdata.gemspec b/rdf-microdata.gemspec index ff94ced..183e1ad 100755 --- a/rdf-microdata.gemspec +++ b/rdf-microdata.gemspec @@ -17,39 +17,24 @@ Gem::Specification.new do |gem| gem.platform = Gem::Platform::RUBY gem.files = %w(AUTHORS README.md UNLICENSE VERSION) + Dir.glob('lib/**/*.rb') + Dir.glob('etc/*') gem.require_paths = %w(lib) - gem.extensions = %w() - gem.test_files = %w() - gem.has_rdoc = false - gem.required_ruby_version = '>= 2.2.2' + gem.required_ruby_version = '>= 2.4' gem.requirements = [] - #gem.add_runtime_dependency 'rdf', '~> 2.2', '>= 2.2.8' - #gem.add_runtime_dependency 'rdf-xsd', '~> 2.2' - gem.add_runtime_dependency 'rdf', '>= 2.2.8', '< 4.0' - gem.add_runtime_dependency 'rdf-xsd', '>= 2.2', '< 4.0' + gem.add_runtime_dependency 'rdf', '~> 3.1' + gem.add_runtime_dependency 'rdf-xsd', '~> 3.1' gem.add_runtime_dependency 'htmlentities', '~> 4.3' - gem.add_runtime_dependency 'nokogiri' , '~> 1.8' + gem.add_runtime_dependency 'nokogiri' , '~> 1.10' gem.add_development_dependency 'equivalent-xml' , '~> 0.6' - gem.add_development_dependency 'yard' , '~> 0.9' - gem.add_development_dependency 'rspec', '~> 3.6' - gem.add_development_dependency 'rspec-its', '~> 1.2' + gem.add_development_dependency 'yard' , '~> 0.9.20' + gem.add_development_dependency 'rspec', '~> 3.9' + gem.add_development_dependency 'rspec-its', '~> 1.3' - #gem.add_development_dependency 'json-ld', '~> 2.1' - #gem.add_development_dependency 'rdf-spec', '~> 2.2' - #gem.add_development_dependency 'rdf-rdfa', '~> 2.2' - #gem.add_development_dependency 'rdf-turtle', '~> 2.2' - #gem.add_development_dependency 'rdf-isomorphic', '~> 2.2' - gem.add_development_dependency 'json-ld', '>= 2.1', '< 4.0' - gem.add_development_dependency 'rdf-spec', '>= 2.2', '< 4.0' - gem.add_development_dependency 'rdf-rdfa', '>= 2.2', '< 4.0' - gem.add_development_dependency 'rdf-turtle', '>= 2.2', '< 4.0' - gem.add_development_dependency 'rdf-isomorphic', '>= 2.2', '< 4.0' - - # Rubinius has it's own dependencies - if RUBY_ENGINE == "rbx" && RUBY_VERSION >= "2.1.0" - gem.add_runtime_dependency "racc" - end + gem.add_development_dependency 'rdf-spec', '~> 3.1' + gem.add_development_dependency 'rdf-turtle', '>= 3.1' + gem.add_development_dependency 'rdf-isomorphic', '~> 3.1' + gem.add_development_dependency 'json-ld', '~> 3.1' + gem.add_development_dependency 'rdf-rdfa', '~> 3.1' gem.post_install_message = nil end diff --git a/script/parse b/script/parse index c7bc0de..77cb7c2 100755 --- a/script/parse +++ b/script/parse @@ -82,10 +82,6 @@ def run(input, options) end options[:output].puts xsl.apply_to(reader.rdfa).to_s end - when :jsonld - reader_class.new(input, options.merge(jsonld: true)) do |reader| - options[:output].puts reader.jsonld.to_json(::JSON::LD::JSON_STATE) - end when :inspect reader_class.new(input, options).each do |statement| num += 1 diff --git a/spec/expansion_spec.rb b/spec/expansion_spec.rb index d772a82..804546d 100644 --- a/spec/expansion_spec.rb +++ b/spec/expansion_spec.rb @@ -8,6 +8,7 @@ class ExpansionTester include RDF::Util::Logger attr_reader :id, :repo, :action, :result, :options + attr_accessor :format def initialize(name) @id = name diff --git a/spec/jsonld_reader_spec.rb b/spec/jsonld_reader_spec.rb deleted file mode 100644 index d341b71..0000000 --- a/spec/jsonld_reader_spec.rb +++ /dev/null @@ -1,888 +0,0 @@ -# coding: utf-8 -$:.unshift "." -require 'spec_helper' -require 'rdf/spec/reader' - -describe RDF::Microdata::JsonLdReader do - let!(:doap) {File.expand_path("../../etc/doap.html", __FILE__)} - let!(:doap_nt) {File.expand_path("../../etc/doap.nt", __FILE__)} - let!(:registry_path) {File.expand_path("../test-files/test-registry.json", __FILE__)} - before :each do - @reader = RDF::Microdata::JsonLdReader.new(StringIO.new("")) - end - - context :interface do - subject {%( -
-

My name is Elizabeth.

-
- )} - - it "should yield reader" do - inner = double("inner") - expect(inner).to receive(:called).with(RDF::Microdata::JsonLdReader) - RDF::Microdata::JsonLdReader.new(subject, base_uri: 'http://example/') do |reader| - inner.called(reader.class) - end - end - - it "should return reader" do - expect(RDF::Microdata::JsonLdReader.new(subject, base_uri: 'http://example/')).to be_a(RDF::Microdata::JsonLdReader) - end - - it "should not raise errors" do - expect { - RDF::Microdata::JsonLdReader.new(subject, validate: true, base_uri: 'http://example/') - }.not_to raise_error - end - - it "should yield statements" do - inner = double("inner") - expect(inner).to receive(:called).with(RDF::Statement).at_least(2) - RDF::Microdata::JsonLdReader.new(subject, base_uri: 'http://example/').each_statement do |statement| - inner.called(statement.class) - end - end - - it "should yield triples" do - inner = double("inner") - expect(inner).to receive(:called).at_least(2) - RDF::Microdata::JsonLdReader.new(subject, base_uri: 'http://example/').each_triple do |subject, predicate, object| - inner.called(subject.class, predicate.class, object.class) - end - end - - context "Microdata Reader with :jsonld option" do - it "returns a JsonLdReader instance" do - r = RDF::Microdata::Reader.new(StringIO.new(""), jsonld: true) - expect(r).to be_a(RDF::Microdata::JsonLdReader) - end - end - end - - context :parsing do - before :each do - @md_ctx = %q( -
- %s -
- ) - @nt_ctx = %q( - _:a . - %s - ) - end - - it "parses a simple graph" do - md = %q(

My name is Gregg Kellogg.

) - nt = %q(_:a "Gregg Kellogg" .) - expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) - end - - context "values" do - [ - [ - %q(

My name is Gregg Kellogg

), - %q(_:a "Gregg Kellogg" .) - ], - [ - %q( -

My name is Gregg

-

My name is Kellogg

- ), - %q(_:a "Gregg", "Kellogg" .) - ], - [ - %q(

My name is Gregg Kellogg

), - %q( - _:a "Gregg Kellogg" . - _:a "Gregg Kellogg" . - ) - ], - [ - %q(

My name is Gregg Kellogg

), - %q(_:a "Gregg Kellogg" .) - ], - [ - %q(), - %q(_:a "foo" .) - ], - [ - %q(Bar), - %q(_:a "foo" .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a "2011-06-28Z"^^ .) - ], - [ - %q(), - %q(_:a "00:00:00Z"^^ .) - ], - [ - %q(), - %q(_:a "2011-06-28T00:00:00Z"^^ .) - ], - [ - %q(), - %q(_:a "P2011Y06M28DT00H00M00S"^^ .) - ], - [ - %q(), - %q(_:a "foo" .) - ], - [ - %q(), - %q(_:a _:b .) - ], - [ - %q(), - %q(_:a "1"^^ .) - ], - [ - %q(), - %q(_:a "1.1"^^ .) - ], - [ - %q(), - %q(_:a "1.1e1"^^ .) - ], - [ - %q(), - %q(_:a "foo" .) - ], - [ - %q(), - %q(_:a "foo" .) - ], - [ - %q(), - %q(_:a "1"^^ .) - ], - [ - %q(), - %q(_:a "1.1"^^ .) - ], - [ - %q(), - %q(_:a "1.1e1"^^ .) - ], - [ - %q(), - %q(_:a "foo" .) - ], - [ - %q(), - %q(_:a "foo" .) - ], - ].each do |(md, nt)| - it "parses #{md}" do - pending if [ - '', - '', - ].include?(md) - expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) - end - end - end - - context "base_uri" do - before :each do - @nt_ctx = %q( - _:a . - %s - ) - end - - [ - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(), - %q(_:a .) - ], - [ - %q(Stéphane Corlosquet), - %q(_:a .) - ], - ].each do |(md, nt)| - it "parses #{md}" do - expect(parse(@md_ctx % md, base_uri: 'http://example.com/')).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) - end - end - end - - context "itemid" do - before :each do - @md_ctx = %q( -
- %s -
- ) - @nt_ctx = %q( - . - %s - ) - end - - [ - [ - %q(

My name is Gregg Kellogg

), - %q( "Gregg Kellogg" .) - ], - [ - %q(), - %q( "foo" .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( .) - ], - [ - %q(), - %q( "2011-06-28T00:00:00Z"^^ .) - ], - [ - %q(), - %q( .) - ], - ].each do |(md, nt)| - it "parses #{md}" do - expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger) - end - end - end - - context "itemtype" do - { - "with no type and token property" => [ - %q( -
-
-

Name: Amanda

-
-
- ), - %q() - ], - "with empty type and token property" => [ - %q( -
-
-

Name: Amanda

-
-
- ), - %q() - ], - "with relative type and token property" => [ - %q( -
-
-

Name: Amanda

-
-
- ), - %q() - ], - "with single type and token property" => [ - %q( -
-
-

Name: Amanda

-
-
- ), - %q( - [ a ; - "Amanda" ; - ] . - ) - ], - "with multipe types and token property" => [ - %q( -
-
-

Name: Amanda

-
-
- ), - %q( - [ a , ; - "Amanda" ; - ] . - ) - ], - #"with no type and URI property" => [ - # %q( - #
- #
- #

Name: Amanda

- #
- #
- # ), - # %q( - # [ "Amanda" ] . - # ) - #], - #"with empty type and URI property" => [ - # %q( - #
- #
- #

Name: Amanda

- #
- #
- # ), - # %q( - # [ "Amanda" ] . - # ) - #], - #"with relative type and URI property" => [ - # %q( - #
- #
- #

Name: Amanda

- #
- #
- # ), - # %q( - # [ "Amanda" ] . - # ) - #], - "with single type and URI property" => [ - %q( -
-
-

Name: Amanda

-
-
- ), - %q( - [ a ; - "Amanda" ; - ] . - ) - ], - "with multipe types and URI property" => [ - %q( -
-
-

Name: Amanda

-
-
- ), - %q( - [ a , ; - "Amanda" ; - ] . - ) - ], - "with inherited type and token property" => [ - %q( -
-

Name: Gregg

-
-

Name: Jeni

-
-
- ), - %q( - @prefix md: . - @prefix schema: . - [ a schema:Person ; - schema:name "Gregg" ; - schema:knows [ schema:name "Jeni" ] - ] . - ) - ] - }.each do |name, (md, nt)| - it "#{name}" do - expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) - end - end - end - - context "itemref" do - { - "to single id" => - [ - %q( -
-
-

Name: Amanda

-
- ), - %q( - [ a ; - "Amanda" ; - ] . - ) - ], - "to generate listed property values" => - [ - %q( -
-
-

My name is Gregg

-
-

My name is Kellogg

-
- ), - %q( - [ a ; - "Gregg", "Kellogg" ; - ] . - ) - ], - #"to single id with different types" => - #[ - # %q( - #
- #
- #
- #

Name: Amanda

- #
- # ), - # %q( - # [ a ; - # "Amanda" ; - # ] . - # [ a ; - # "Amanda" ; - # ] . - # ) - #], - "to multiple ids" => - [ - %q( -
-
-

Name: Amanda

-

Jazz Band

-
- ), - %q( - [ a ; - "Amanda" ; - "Jazz Band" ; - ] . - ) - ], - "with chaining" => - [ - %q( -
-
-

Name: Amanda

-
-
-

Band: Jazz Band

-

Size: 12 players

-
-
- ), - %q( - [ a ; - "Amanda" ; - [ - a ; - "Jazz Band"; - "12" - ] - ] . - ) - ], - "shared" => - [ - %q( -
-
-
-
- Amanda -
-
- ), - %q( - [ a ; _:a ] . - [ a ; _:a ] . - _:a "Amanda" . - ) - - ], - }.each do |name, (md, nt)| - it "parses #{name}" do - expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) - end - end - - it "catches infinite recursion", pending: true do - md = %( - - -
-
-
friend1
-
-
friend2
-
-
-
-
- - ) - expect {parse(md, validate: true)}.to raise_error(RDF::ReaderError) - expect(@logger.to_s).to include("itemref recursion") - end - end - - context "propertyURI" do - context "no expansion" do - { - "http://foo/bar + baz => http://foo/baz" => - [ - %q( -
-

FooBar

-
- ), - %q( - [ a ; "FooBar" ] . - ) - ], - "http://foo#bar + baz => http://foo#baz" => - [ - %q( -
-

FooBar

-
- ), - %q( - [ a ; "FooBar" ] . - ) - ], - "http://foo#Type + bar + baz => http://foo#baz" => - [ - %q( -
-

Baz

-
- ), - %q( - [ a ; - [ "Baz"]] . - ) - ], - }.each do |name, (md, nt)| - it "expands #{name}" do - expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) - end - end - end - - context "default propertyURI generation" do - { - "http://foo/bar + baz => http://foo/baz" => - [ - %q( -
-

FooBar

-
- ), - %q( - [ a ; "FooBar" ] . - ) - ], - "http://foo#bar + baz => http://foo#baz" => - [ - %q( -
-

FooBar

-
- ), - %q( - [ a ; "FooBar" ] . - ) - ], - "http://foo#Type + bar + baz => http://foo#baz" => - [ - %q( -
-

Baz

-
- ), - %q( - [ a ; - [ "Baz"]] . - ) - ], - }.each do |name, (md, nt)| - it "expands #{name}" do - expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) - end - end - end - end - - context "itemprop-reverse", skip: true do - { - "link" => [ - %q( -
- William Shakespeare - -
- ), - %q( - [ - a ; - "William Shakespeare" - ] . - ) - ], - "itemscope" => [ - %q( -
- The ACME Shopping Mall on Structured Data Avenue - The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data -

Here is a list of shops inside:

-
- Dan Brickley's Data Restaurant -
-
- Ramanathan Guha's Meta Content Framework Bakery -
-
- ), - %q( - _:a a ; - "The ACME Shopping Mall on Structured Data Avenue"; - "The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data" . - _:b a ; - "Dan Brickley's Data Restaurant"; - _:a . - _:c a ; - "Ramanathan Guha's Meta Content Framework Bakery"; - _:a . - ) - ], - "literal" => [ - %q( -
- William Shakespeare - -
- ), - %q( - _:a a ; - "William Shakespeare" . - ) - ], - "itemprop and itemprop-reverse" => [ - %q( -
- Cryptography Users -
-
- Alice -
- 1977 -
-
- ), - %q( - @prefix schema: . - @prefix md: . - - _:a a schema:Organization; - schema:name "Cryptography Users"; - schema:member _:b . - _:b a schema:OrganizationRole; - schema:startDate "1977"; - schema:member _:c; - schema:memberOf _:a . - _:c a schema:Person; - schema:name "Alice"; - schema:memberOf _:b . - ) - ], - }.each do |name, (md, nt)| - it "expands #{name}" do - expect(parse(md)).to be_equivalent_graph(nt, logger: @logger) - end - end - end - - context "vocabulary expansion", pending: true do - it "always expands" do - md = %q( -
- -
- ) - ttl = %q( - [ a , ; - - ] . - ) - - expect(parse(md, vocab_expansion: true)).to be_equivalent_graph(ttl, logger: @logger) - end - end - - context "test-files", skip: true do - Dir.glob(File.join(File.expand_path(File.dirname(__FILE__)), "test-files", "*.html")).each do |md| - it "parses #{md}" do - test_file(md) - end - end - end - end - - def parse(input, options = {}) - @logger = RDF::Spec.logger - graph = options[:graph] || RDF::Graph.new - RDF::Microdata::Reader.new(input, { - logger: @logger, - rdfa: true, - validate: false, - base_uri: "http://example/", - registry: registry_path, - canonicalize: false}.merge(options)).each do |statement| - graph << statement - end - - # Remove any rdfa:usesVocabulary statements - graph.query(predicate: RDF::RDFA.usesVocabulary).each do |stmt| - graph.delete(stmt) - end - graph - end - - def test_file(filepath, options = {}) - graph = parse(File.open(filepath), options) - - ttl_string = File.read(filepath.sub('.html', '.ttl')) - expect(graph).to be_equivalent_graph(ttl_string, logger: @logger) - end -end diff --git a/spec/rdfa_reader_spec.rb b/spec/rdfa_reader_spec.rb index 18a7139..a9a43cb 100644 --- a/spec/rdfa_reader_spec.rb +++ b/spec/rdfa_reader_spec.rb @@ -862,25 +862,27 @@ def parse(input, options = {}) @logger = RDF::Spec.logger graph = options[:graph] || RDF::Graph.new - RDF::Microdata::Reader.new(input, { - logger: @logger, - rdfa: true, - validate: false, - base_uri: "http://example/", - registry: registry_path, - canonicalize: false}.merge(options)).each do |statement| + RDF::Microdata::Reader.new(input, + logger: @logger, + rdfa: true, + validate: false, + base_uri: "http://example/", + registry: registry_path, + canonicalize: false, + **options + ).each do |statement| graph << statement end # Remove any rdfa:usesVocabulary statements - graph.query(predicate: RDF::RDFA.usesVocabulary).each do |stmt| + graph.query({predicate: RDF::RDFA.usesVocabulary}).each do |stmt| graph.delete(stmt) end graph end - def test_file(filepath, options = {}) - graph = parse(File.open(filepath), options) + def test_file(filepath, **options) + graph = parse(File.open(filepath), **options) ttl_string = File.read(filepath.sub('.html', '.ttl')) expect(graph).to be_equivalent_graph(ttl_string, logger: @logger) diff --git a/spec/reader_spec.rb b/spec/reader_spec.rb index 236d2f3..b56ed18 100644 --- a/spec/reader_spec.rb +++ b/spec/reader_spec.rb @@ -872,18 +872,20 @@ def parse(input, options = {}) @logger = RDF::Spec.logger graph = options[:graph] || RDF::Graph.new - RDF::Microdata::Reader.new(input, { - logger: @logger, - validate: false, - registry: registry_path, - canonicalize: false}.merge(options)).each do |statement| + RDF::Microdata::Reader.new(input, + logger: @logger, + validate: false, + registry: registry_path, + canonicalize: false, + **options + ).each do |statement| graph << statement end graph end def test_file(filepath, options = {}) - graph = parse(File.open(filepath), options) + graph = parse(File.open(filepath), **options) ttl_string = File.read(filepath.sub('.html', '.ttl')) expect(graph).to be_equivalent_graph(ttl_string, logger: @logger) diff --git a/spec/suite_helper.rb b/spec/suite_helper.rb index 80850f7..e47d415 100644 --- a/spec/suite_helper.rb +++ b/spec/suite_helper.rb @@ -9,6 +9,10 @@ module File REMOTE_PATH = "http://w3c.github.io/microdata-rdf/tests/" LOCAL_PATH = ::File.expand_path("../spec-tests", __FILE__) + '/' + class << self + alias_method :original_open_file, :open_file + end + ## # Override to use Patron for http and https, Kernel.open otherwise. # @@ -18,50 +22,50 @@ module File # HTTP Request headers. # @return [IO] File stream # @yield [IO] File stream - def self.open_file(filename_or_url, options = {}, &block) - options = options[:headers] || {} if filename_or_url.start_with?('http') - case filename_or_url.to_s - when /^file:/ + def self.open_file(filename_or_url, **options, &block) + case + when filename_or_url.to_s =~ /^file:/ path = filename_or_url[5..-1] - Kernel.open(path.to_s, &block) - when 'http://www.w3.org/ns/md' - Kernel.open(RDF::Microdata::DEFAULT_REGISTRY, &block) - when /^#{REMOTE_PATH}/ - begin - #puts "attempt to open #{filename_or_url} locally" - if response = ::File.open(filename_or_url.to_s.sub(REMOTE_PATH, LOCAL_PATH)) - #puts "use #{filename_or_url} locally" - case filename_or_url.to_s - when /\.html$/ - def response.content_type; 'text/html'; end - when /\.ttl$/ - def response.content_type; 'text/turtle'; end - when /\.json$/ - def response.content_type; 'application/json'; end - when /\.jsonld$/ - def response.content_type; 'application/ld+json'; end - else - def response.content_type; 'unknown'; end - end - - if block_given? - begin - yield response - ensure - response.close - end - else - response - end - else - Kernel.open(filename_or_url.to_s, options, &block) - end - rescue Errno::ENOENT - # Not there, don't run tests - StringIO.new("") + Kernel.open(path.to_s, options, &block) + when (filename_or_url.to_s =~ %r{^#{REMOTE_PATH}} && Dir.exist?(LOCAL_PATH)) + #puts "attempt to open #{filename_or_url} locally" + localpath = filename_or_url.to_s.sub(REMOTE_PATH, LOCAL_PATH) + response = begin + ::File.open(localpath) + rescue Errno::ENOENT => e + raise IOError, e.message + end + document_options = { + base_uri: RDF::URI(filename_or_url), + charset: Encoding::UTF_8, + code: 200, + headers: {} + } + #puts "use #{filename_or_url} locally" + document_options[:headers][:content_type] = case filename_or_url.to_s + when /\.html$/ then 'text/html' + when /\.xhtml$/ then 'application/xhtml+xml' + when /\.xml$/ then 'application/xml' + when /\.svg$/ then 'image/svg+xml' + when /\.ttl$/ then 'text/turtle' + when /\.ttl$/ then 'text/turtle' + when /\.jsonld$/ then 'application/ld+json' + when /\.json$/ then 'application/json' + else 'unknown' + end + + document_options[:headers][:content_type] = response.content_type if response.respond_to?(:content_type) + # For overriding content type from test data + document_options[:headers][:content_type] = options[:contentType] if options[:contentType] + + remote_document = RDF::Util::File::RemoteDocument.new(response.read, **document_options) + if block_given? + yield remote_document + else + remote_document end else - Kernel.open(filename_or_url.to_s, options, &block) + original_open_file(filename_or_url, **options, &block) end end end @@ -126,9 +130,7 @@ def self.open(file) # @param [Hash] json framed JSON-LD # @return [Array] def self.from_jsonld(json) - json['@graph']. - select {|m| m['@type'] == 'mf:Manifest'}. - map {|e| Manifest.new(e)} + Manifest.new(json) end def entries diff --git a/spec/suite_spec.rb b/spec/suite_spec.rb index ffccf93..3a7219b 100644 --- a/spec/suite_spec.rb +++ b/spec/suite_spec.rb @@ -7,7 +7,7 @@ require 'suite_helper' MANIFEST = Fixtures::SuiteTest::BASE + "manifest.jsonld" - {native: :native, RDFa: :rdfa, "JSON-LD": :jsonld}.each do |w, sym| + {native: :native, RDFa: :rdfa}.each do |w, sym| describe w do Fixtures::SuiteTest::Manifest.open(MANIFEST).each do |m| describe m.comment do @@ -17,6 +17,15 @@ t.logger.info t.inspect t.logger.info "source:\n#{t.input}" + if sym == :rdfa + %w(0002 0003 0052 0053 0054 0067).include?(t.name.split.last) && skip("Not valid test for RDFa") + %w(0026 0044).include?(t.name.split.last) && skip("Difference in subject for head/body elements") + %w(0071 0073 0074).include?(t.name.split.last) && skip("No vocabulary expansion") + %w(0075 0078).include?(t.name.split.last) && skip("Differences in number parsing") + %w(0081 0082 0084).include?(t.name.split.last) && skip("No @itemprop-reverse") + %w(0064).include?(t.name.split.last) && pending("Double use of itemref with different vocabularies") + end + reader = RDF::Microdata::Reader.open(t.action, base_uri: t.action, strict: true,