From 0e157d92fda0b2a84e3786c2561317f331a6bf58 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Fri, 30 Jun 2017 16:44:05 -0700
Subject: [PATCH 01/12] Update based on simplified spec proposed for Third
 Edition. This version continues to create document-relative property URIs.

---
 lib/rdf/microdata/reader.rb          | 99 ++++++++++++----------------
 lib/rdf/microdata/reader/nokogiri.rb |  6 ++
 spec/suite_helper.rb                 |  4 ++
 3 files changed, 52 insertions(+), 57 deletions(-)

diff --git a/lib/rdf/microdata/reader.rb b/lib/rdf/microdata/reader.rb
index d33b253..e820387 100644
--- a/lib/rdf/microdata/reader.rb
+++ b/lib/rdf/microdata/reader.rb
@@ -20,10 +20,12 @@ class Reader < RDF::Reader
     # @private
     class CrawlFailure < StandardError; end
 
-    # @!attribute [r] implementation
     # @return [Module] Returns the HTML implementation module for this reader instance.
     attr_reader :implementation
 
+    # @return [Hash{Object => RDF::Resource}] maps RDF elements (items) to resources
+    attr_reader :memory
+
     ##
     # Returns the base URI determined by this reader.
     #
@@ -92,16 +94,16 @@ def self.find(type)
       # @param [#to_s] name
       # @param [Hash{}] ec Evaluation Context
       # @return [RDF::URI]
-      def predicateURI(name, ec)
+      def predicateURI(name, base_uri)
         u = RDF::URI(name)
         # 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
         return u if u.absolute?
         
         n = frag_escape(name)
-        if ec[:current_type].nil?
-          # 2) If current type from context is null, there can be no current vocabulary.
+        if uri.nil?
+          # 2) If current vocabulary from context is null, there can be no current vocabulary.
           #    Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
-          u = RDF::URI(ec[:document_base].to_s)
+          u = RDF::URI(base_uri.to_s)
           u.fragment = frag_escape(name)
           u
         else
@@ -178,12 +180,12 @@ def initialize(input = $stdin, options = {}, &block)
         log_error("Empty document") if root.nil?
         log_error(doc_errors.map(&:message).uniq.join("\n")) if !doc_errors.empty?
 
-        log_debug(@doc, "library = #{@library}")
+        log_debug('', "library = #{@library}")
 
         # Load registry
         begin
           registry_uri = options[:registry] || DEFAULT_REGISTRY
-          log_debug(@doc, "registry = #{registry_uri.inspect}")
+          log_debug('', "registry = #{registry_uri.inspect}")
           Registry.load_registry(registry_uri)
         rescue JSON::ParserError => e
           log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
@@ -270,6 +272,7 @@ def add_triple(node, subject, predicate, object)
     # Parsing a Microdata document (this is *not* the recursive method)
     def parse_whole_document(doc, base)
       base = doc_base(base)
+      @memory = {}
       options[:base_uri] = if (base)
         # Strip any fragment from base
         base = base.to_s.split('#').first
@@ -280,15 +283,9 @@ def parse_whole_document(doc, base)
       
       log_info(nil) {"parse_whole_doc: base='#{base}'"}
 
-      ec = {
-        memory:             {},
-        current_type:       nil,
-        current_vocabulary: nil,
-        document_base:      base,
-      }
       # 1) For each element that is also a top-level item, Generate the triples for that item using the evaluation context.
       getItems.each do |el|
-        log_depth {generate_triples(el, ec)}
+        log_depth {generate_triples(el, Registry.new(nil))}
       end
 
       log_info(doc, "parse_whole_doc: traversal complete")
@@ -298,12 +295,11 @@ def parse_whole_document(doc, base)
     # Generate triples for an item
     #
     # @param [RDF::Resource] item
-    # @param [Hash{Symbol => Object}] ec
+    # @param [Registry] vocab
     # @option ec [Hash{Nokogiri::XML::Element} => RDF::Resource] memory
-    # @option ec [RDF::Resource] :current_type
+    # @option ec [RDF::Resource] :current_vocabulary
     # @return [RDF::Resource]
-    def generate_triples(item, ec = {})
-      memory = ec[:memory]
+    def generate_triples(item, vocab)
       # 1) If there is an entry for item in memory, then let subject be the subject of that entry. Otherwise, if item has a global identifier and that global identifier is an absolute URL, let subject be that global identifier. Otherwise, let subject be a new blank node.
       subject = if memory.include?(item.node)
         memory[item.node][:subject]
@@ -312,12 +308,13 @@ def generate_triples(item, ec = {})
       end || RDF::Node.new
       memory[item.node] ||= {}
 
-      log_debug(item) {"gentrips(2): subject=#{subject.inspect}, current_type: #{ec[:current_type]}"}
+      log_debug(item) {"gentrips(2): subject=#{subject.inspect}, vocab: #{vocab.inspect}"}
 
       # 2) Add a mapping from item to subject in memory, if there isn't one already.
       memory[item.node][:subject] ||= subject
       
       # 3) For each type returned from element.itemType of the element defining the item.
+      # 4) Set vocab to the first value returned from element.itemType of the element defining the item.
       type = nil
       item.attribute('itemtype').to_s.split(' ').map{|n| uri(n)}.select(&:absolute?).each do |t|
         #   3.1. If type is an absolute URL, generate the following triple:
@@ -325,36 +322,26 @@ def generate_triples(item, ec = {})
         add_triple(item, subject, RDF.type, t)
       end
 
-      # 4) Set type to the first value returned from element.itemType of the element defining the item.
-
-      # 5) Otherwise, set type to current type from the Evaluation Context if not empty.
-      type ||= ec[:current_type]
-      log_debug(item)  {"gentrips(5): type=#{type.inspect}"}
-
-      # 6) If the registry contains a URI prefix that is a character for character match of type up to the length of the URI prefix, set vocab as that URI prefix.
-      vocab = Registry.find(type)
-
-      # 7) Otherwise, if type is not empty, construct vocab by removing everything following the last SOLIDUS U+002F ("/") or NUMBER SIGN U+0023 ("#") from the path component of type.
-      vocab ||= begin
-        type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
-        log_debug(item)  {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
-        Registry.new(type_vocab)
+      # 6) If the registry contains a URI prefix that is a character for character match of vocab up to the length of the URI prefix, set vocab as that URI prefix.
+      if type || vocab.nil?
+        vocab = Registry.find(type) || begin
+          type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless type.nil?
+          log_debug(item)  {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
+          Registry.new(type_vocab)
+        end
       end
 
-      # 8) Update evaluation context setting current vocabulary to vocab.
-      ec[:current_vocabulary] = vocab
+      # Otherwise, use vocab from evaluation context
+      log_debug(item) {"gentrips(8): vocab: #{vocab.inspect}"}
 
       # 9. For each element _element_ that has one or more property names and is one of the properties of the item _item_, run the following substep:
       props = item_properties(item)
       # 9.1. For each name name in element's property names, run the following substeps:
       props.each do |element|
         element.attribute('itemprop').to_s.split(' ').compact.each do |name|
-          log_debug(item) {"gentrips(9.1): name=#{name.inspect}, type=#{type}"}
-          # 9.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
-          ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
-          
+          log_debug(item) {"gentrips(9.1): name=#{name.inspect}, vocab=#{vocab.inspect}"}
           # 9.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
-          predicate = vocab.predicateURI(name, ec_new)
+          predicate = vocab.predicateURI(name, base_uri)
 
           # 9.1.3) Let value be the property value of element.
           value = property_value(element)
@@ -362,7 +349,7 @@ def generate_triples(item, ec = {})
           
           # 9.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
           if value.is_a?(Hash)
-            value = generate_triples(element, ec_new) 
+            value = generate_triples(element, vocab) 
             log_debug(item) {"gentrips(9.1.4): value=#{value.inspect}"}
           end
 
@@ -384,11 +371,9 @@ def generate_triples(item, ec = {})
       props.each do |element|
         element.attribute('itemprop-reverse').to_s.split(' ').compact.each do |name|
           log_debug(item) {"gentrips(10.1): name=#{name.inspect}"}
-          # 10.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
-          ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
           
           # 10.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
-          predicate = vocab.predicateURI(name, ec_new)
+          predicate = vocab.predicateURI(name, base_uri)
           
           # 10.1.3) Let value be the property value of element.
           value = property_value(element)
@@ -396,7 +381,7 @@ def generate_triples(item, ec = {})
 
           # 10.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
           if value.is_a?(Hash)
-            value = generate_triples(element, ec_new) 
+            value = generate_triples(element, vocab) 
             log_debug(item) {"gentrips(10.1.4): value=#{value.inspect}"}
           elsif value.is_a?(RDF::Literal)
             # 10.1.5) Otherwise, if value is a literal, ignore the value and continue to the next name; it is an error for the value of @itemprop-reverse to be a literal
@@ -432,13 +417,13 @@ def item_properties(item, reverse = false)
     # To crawl the properties of an element root with a list memory, the user agent must run the following steps. These steps either fail or return a list with a count of errors. The count of errors is used as part of the authoring conformance criteria below.
     #
     # @param [Nokogiri::XML::Element] root
-    # @param [Array<Nokokogiri::XML::Element>] memory
+    # @param [Array<Nokokogiri::XML::Element>] memo
     # @param [Boolean] reverse crawl reverse properties
     # @return [Array<Nokogiri::XML::Element>]
     #   Resultant elements
-    def crawl_properties(root, memory, reverse)
-      # 1. If root is in memory, then the algorithm fails; abort these steps.
-      raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if memory.include?(root)
+    def crawl_properties(root, memo, reverse)
+      # 1. If root is in memo, then the algorithm fails; abort these steps.
+      raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if memo.include?(root)
       
       # 2. Collect all the elements in the item root; let results be the resulting list of elements, and errors be the resulting count of errors.
       results = elements_in_item(root)
@@ -447,13 +432,13 @@ def crawl_properties(root, memory, reverse)
       # 3. Remove any elements from results that do not have an @itemprop (@itemprop-reverse) attribute specified.
       results = results.select {|e| e.has_attribute?(reverse ? 'itemprop-reverse' : 'itemprop')}
       
-      # 4. Let new memory be a new list consisting of the old list memory with the addition of root.
-      raise CrawlFailure, "itemref recursion" if memory.detect {|n| root.node.object_id == n.node.object_id}
-      new_memory = memory + [root]
+      # 4. Let new memo be a new list consisting of the old list memo with the addition of root.
+      raise CrawlFailure, "itemref recursion" if memo.detect {|n| root.node.object_id == n.node.object_id}
+      new_memo = memo + [root]
       
-      # 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new memory as the memory.
+      # 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new memo as the memo.
       results.select {|e| e.has_attribute?('itemscope')}.each do |element|
-        log_depth {crawl_properties(element, new_memory, reverse)}
+        log_depth {crawl_properties(element, new_memo, reverse)}
       end
       
       results
@@ -469,7 +454,7 @@ def crawl_properties(root, memory, reverse)
     def elements_in_item(root)
       # Let results and pending be empty lists of elements.
       # Let errors be zero.
-      results, memory, errors = [], [], 0
+      results, memo, errors = [], [], 0
       
       # Add all the children elements of root to pending.
       pending = root.elements
@@ -487,13 +472,13 @@ def elements_in_item(root)
 
       # Loop: Remove an element from pending and let current be that element.
       while current = pending.shift
-        if memory.include?(current)
+        if memo.include?(current)
           raise CrawlFailure, "elements_in_item: results already includes #{current.inspect}"
         elsif !current.has_attribute?('itemscope')
           # If current is not already in results and current does not have an itemscope attribute, then: add all the child elements of current to pending.
           pending += current.elements
         end
-        memory << current
+        memo << current
         
         # If current is not already in results, then: add current to results.
         results << current unless results.include?(current)
diff --git a/lib/rdf/microdata/reader/nokogiri.rb b/lib/rdf/microdata/reader/nokogiri.rb
index a77bb30..f148516 100644
--- a/lib/rdf/microdata/reader/nokogiri.rb
+++ b/lib/rdf/microdata/reader/nokogiri.rb
@@ -103,6 +103,12 @@ def elements
           NodeSetProxy.new(@node.elements, self)
         end
 
+        ##
+        # Rational debug output
+        def to_str
+          @node.path
+        end
+
         ##
         # Proxy for everything else to @node
         def method_missing(method, *args)
diff --git a/spec/suite_helper.rb b/spec/suite_helper.rb
index 7ac224a..5d14456 100644
--- a/spec/suite_helper.rb
+++ b/spec/suite_helper.rb
@@ -145,6 +145,10 @@ def action
         BASE.join(property('action'))
       end
 
+      def input
+        RDF::Util::File.open_file(action).read
+      end
+
       def registry
         reg = property('registry') ||
           BASE + "test-registry.json"

From a44ea71dfd53f6f6db9ea09efbf9829da7363155 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Wed, 19 Jul 2017 16:24:21 -0700
Subject: [PATCH 02/12] Extract the Registry class from the reader.

---
 lib/rdf/microdata.rb          |   2 +
 lib/rdf/microdata/reader.rb   | 108 +--------------------------------
 lib/rdf/microdata/registry.rb | 109 ++++++++++++++++++++++++++++++++++
 3 files changed, 112 insertions(+), 107 deletions(-)
 create mode 100644 lib/rdf/microdata/registry.rb

diff --git a/lib/rdf/microdata.rb b/lib/rdf/microdata.rb
index e86650a..387dc2f 100644
--- a/lib/rdf/microdata.rb
+++ b/lib/rdf/microdata.rb
@@ -21,12 +21,14 @@ module RDF
   # @author [Gregg Kellogg](http://greggkellogg.net/)
   module Microdata
     USES_VOCAB = RDF::URI("http://www.w3.org/ns/rdfa#usesVocabulary")
+    DEFAULT_REGISTRY = File.expand_path("../../../etc/registry.json", __FILE__)
 
     require 'rdf/microdata/format'
     require 'rdf/microdata/vocab'
     autoload :Expansion,  'rdf/microdata/expansion'
     autoload :Profile,    'rdf/microdata/profile'
     autoload :Reader,     'rdf/microdata/reader'
+    autoload :Registry,   'rdf/microdata/registry'
     autoload :VERSION,    'rdf/microdata/version'
   end
 end
diff --git a/lib/rdf/microdata/reader.rb b/lib/rdf/microdata/reader.rb
index e820387..c78a0c1 100644
--- a/lib/rdf/microdata/reader.rb
+++ b/lib/rdf/microdata/reader.rb
@@ -15,7 +15,6 @@ class Reader < RDF::Reader
     include Expansion
     include RDF::Util::Logger
     URL_PROPERTY_ELEMENTS = %w(a area audio embed iframe img link object source track video)
-    DEFAULT_REGISTRY = File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "..", "etc", "registry.json"))
 
     # @private
     class CrawlFailure < StandardError; end
@@ -38,111 +37,6 @@ def base_uri
       @options[:base_uri]
     end
 
-    # Interface to registry
-    class Registry
-      # @return [RDF::URI] Prefix of vocabulary
-      attr_reader :uri
-
-      # @return [Hash] properties
-      attr_reader :properties
-
-      ##
-      # Initialize the registry from a URI or file path
-      #
-      # @param [String] registry_uri
-      def self.load_registry(registry_uri)
-        return if @registry_uri == registry_uri
-
-        json = RDF::Util::File.open_file(registry_uri) { |f| JSON.load(f) }
-
-        @prefixes = {}
-        json.each do |prefix, elements|
-          next unless elements.is_a?(Hash)
-          properties = elements.fetch("properties", {})
-          @prefixes[prefix] = Registry.new(prefix, properties)
-        end
-        @registry_uri = registry_uri
-      end
-
-      ##
-      # Initialize registry for a particular prefix URI
-      #
-      # @param [RDF::URI] prefixURI
-      # @param [Hash] properties ({})
-      def initialize(prefixURI, properties = {})
-        @uri = prefixURI
-        @properties = properties
-        @property_base = prefixURI.to_s
-        # Append a '#' for fragment if necessary
-        @property_base += '#' unless %w(/ #).include?(@property_base[-1,1])
-      end
-
-      ##
-      # Find a registry entry given a type URI
-      #
-      # @param [RDF::URI] type
-      # @return [Registry]
-      def self.find(type) 
-        @prefixes ||= {}
-        k = @prefixes.keys.detect {|key| type.to_s.index(key) == 0 }
-        @prefixes[k] if k
-      end
-      
-      ##
-      # Generate a predicateURI given a `name`
-      #
-      # @param [#to_s] name
-      # @param [Hash{}] ec Evaluation Context
-      # @return [RDF::URI]
-      def predicateURI(name, base_uri)
-        u = RDF::URI(name)
-        # 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
-        return u if u.absolute?
-        
-        n = frag_escape(name)
-        if uri.nil?
-          # 2) If current vocabulary from context is null, there can be no current vocabulary.
-          #    Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
-          u = RDF::URI(base_uri.to_s)
-          u.fragment = frag_escape(name)
-          u
-        else
-          # 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/).
-          RDF::URI(@property_base + n)
-        end
-      end
-
-      ##
-      # Yield a equivalentProperty or subPropertyOf if appropriate
-      #
-      # @param [RDF::URI] predicateURI
-      # @yield equiv
-      # @yieldparam [RDF::URI] equiv
-      def expand(predicateURI)
-        tok = tokenize(predicateURI)
-        if @properties[tok].is_a?(Hash)
-          value = @properties[tok].fetch("subPropertyOf", nil)
-          value ||= @properties[tok].fetch("equivalentProperty", nil)
-
-          Array(value).each {|equiv| yield RDF::URI(equiv)}
-        end
-      end
-
-      ##
-      # Turn a predicateURI into a simple token
-      # @param [RDF::URI] predicateURI
-      # @return [String]
-      def tokenize(predicateURI)
-        predicateURI.to_s.sub(@property_base, '')
-      end
-
-      ##
-      # Fragment escape a name
-      def frag_escape(name)
-        name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase}
-      end
-    end
-
     ##
     # Initializes the Microdata reader instance.
     #
@@ -184,7 +78,7 @@ def initialize(input = $stdin, options = {}, &block)
 
         # Load registry
         begin
-          registry_uri = options[:registry] || DEFAULT_REGISTRY
+          registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
           log_debug('', "registry = #{registry_uri.inspect}")
           Registry.load_registry(registry_uri)
         rescue JSON::ParserError => e
diff --git a/lib/rdf/microdata/registry.rb b/lib/rdf/microdata/registry.rb
new file mode 100644
index 0000000..f7940f8
--- /dev/null
+++ b/lib/rdf/microdata/registry.rb
@@ -0,0 +1,109 @@
+require 'json'
+module RDF::Microdata
+
+  # Interface to registry
+  class Registry
+    # @return [RDF::URI] Prefix of vocabulary
+    attr_reader :uri
+
+    # @return [Hash] properties
+    attr_reader :properties
+
+    ##
+    # Initialize the registry from a URI or file path
+    #
+    # @param [String] registry_uri
+    def self.load_registry(registry_uri)
+      return if @registry_uri == registry_uri
+
+      json = RDF::Util::File.open_file(registry_uri) { |f| ::JSON.load(f) }
+
+      @prefixes = {}
+      json.each do |prefix, elements|
+        next unless elements.is_a?(Hash)
+        properties = elements.fetch("properties", {})
+        @prefixes[prefix] = Registry.new(prefix, properties)
+      end
+      @registry_uri = registry_uri
+    end
+
+    ##
+    # Initialize registry for a particular prefix URI
+    #
+    # @param [RDF::URI] prefixURI
+    # @param [Hash] properties ({})
+    def initialize(prefixURI, properties = {})
+      @uri = prefixURI
+      @properties = properties
+      @property_base = prefixURI.to_s
+      # Append a '#' for fragment if necessary
+      @property_base += '#' unless %w(/ #).include?(@property_base[-1,1])
+    end
+
+    ##
+    # Find a registry entry given a type URI
+    #
+    # @param [RDF::URI] type
+    # @return [Registry]
+    def self.find(type) 
+      @prefixes ||= {}
+      k = @prefixes.keys.detect {|key| type.to_s.index(key) == 0 }
+      @prefixes[k] if k
+    end
+    
+    ##
+    # Generate a predicateURI given a `name`
+    #
+    # @param [#to_s] name
+    # @param [Hash{}] ec Evaluation Context
+    # @return [RDF::URI]
+    def predicateURI(name, base_uri)
+      u = RDF::URI(name)
+      # 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
+      return u if u.absolute?
+      
+      n = frag_escape(name)
+      if uri.nil?
+        # 2) If current vocabulary from context is null, there can be no current vocabulary.
+        #    Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
+        u = RDF::URI(base_uri.to_s)
+        u.fragment = frag_escape(name)
+        u
+      else
+        # 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/).
+        RDF::URI(@property_base + n)
+      end
+    end
+
+    ##
+    # Yield a equivalentProperty or subPropertyOf if appropriate
+    #
+    # @param [RDF::URI] predicateURI
+    # @yield equiv
+    # @yieldparam [RDF::URI] equiv
+    def expand(predicateURI)
+      tok = tokenize(predicateURI)
+      if @properties[tok].is_a?(Hash)
+        value = @properties[tok].fetch("subPropertyOf", nil)
+        value ||= @properties[tok].fetch("equivalentProperty", nil)
+
+        Array(value).each {|equiv| yield RDF::URI(equiv)}
+      end
+    end
+
+    ##
+    # Turn a predicateURI into a simple token
+    # @param [RDF::URI] predicateURI
+    # @return [String]
+    def tokenize(predicateURI)
+      predicateURI.to_s.sub(@property_base, '')
+    end
+
+    ##
+    # Fragment escape a name
+    def frag_escape(name)
+      name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase}
+    end
+  end
+
+end
\ No newline at end of file

From e791cd1e6be37d8d28c7a2e6905bc7bab376d00d Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Wed, 19 Jul 2017 16:24:44 -0700
Subject: [PATCH 03/12] Add experimental RdfaReader based on DOM transformation
 from Microdata to RDFa.

---
 README.md                        |   6 +-
 lib/rdf/microdata.rb             |   1 +
 lib/rdf/microdata/rdfa_reader.rb | 125 +++++
 lib/rdf/microdata/reader.rb      |  15 +
 spec/rdfa_reader_spec.rb         | 888 +++++++++++++++++++++++++++++++
 spec/suite_helper.rb             |   2 +-
 6 files changed, 1035 insertions(+), 2 deletions(-)
 create mode 100644 lib/rdf/microdata/rdfa_reader.rb
 create mode 100644 spec/rdfa_reader_spec.rb

diff --git a/README.md b/README.md
index 2cec89a..d00e533 100755
--- a/README.md
+++ b/README.md
@@ -60,7 +60,11 @@ Full documentation available on [Rubydoc.info][Microdata doc]
 * {RDF::Microdata::Reader}
   * {RDF::Microdata::Reader::Nokogiri}
 
-### Additional vocabularies
+
+### RDFa-based Reader
+There is an experimental reader based on transforming Microdata to RDFa within the DOM. To invoke
+this, add the `rdfa: true` option to the {RDF::Microdata::Reader.new}, or
+use {RDF::Microdata::RdfaReader} directly.
 
 ## Resources
 * [RDF.rb][RDF.rb]
diff --git a/lib/rdf/microdata.rb b/lib/rdf/microdata.rb
index 387dc2f..34e77fb 100644
--- a/lib/rdf/microdata.rb
+++ b/lib/rdf/microdata.rb
@@ -28,6 +28,7 @@ module Microdata
     autoload :Expansion,  'rdf/microdata/expansion'
     autoload :Profile,    'rdf/microdata/profile'
     autoload :Reader,     'rdf/microdata/reader'
+    autoload :RdfaReader, 'rdf/microdata/rdfa_reader'
     autoload :Registry,   'rdf/microdata/registry'
     autoload :VERSION,    'rdf/microdata/version'
   end
diff --git a/lib/rdf/microdata/rdfa_reader.rb b/lib/rdf/microdata/rdfa_reader.rb
new file mode 100644
index 0000000..14e1681
--- /dev/null
+++ b/lib/rdf/microdata/rdfa_reader.rb
@@ -0,0 +1,125 @@
+require 'rdf/rdfa'
+require 'nokogumbo'
+
+module RDF::Microdata
+  ##
+  # Update DOM to turn Microdata into RDFa and parse using the RDFa Reader
+  class RdfaReader < RDF::RDFa::Reader
+
+    def self.format(klass = nil)
+      if klass.nil?
+        RDF::Microdata::Format
+      else
+        super
+      end
+    end
+
+    ##
+    # Initializes the RdfaReader instance.
+    #
+    # @param  [IO, File, String] input
+    #   the input stream to read
+    # @param  [Hash{Symbol => Object}] options
+    #   any additional options (see `RDF::Reader#initialize`)
+    # @return [reader]
+    # @yield  [reader] `self`
+    # @yieldparam  [RDF::Reader] reader
+    # @yieldreturn [void] ignored
+    # @raise [RDF::ReaderError] if _validate_
+    def initialize(input = $stdin, options = {}, &block)
+
+      input = case input
+      when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
+      else
+        # Try to detect charset from input
+        options[:encoding] ||= input.charset if input.respond_to?(:charset)
+        
+        # Otherwise, default is utf-8
+        options[:encoding] ||= 'utf-8'
+        options[:encoding] = options[:encoding].to_s if options[:encoding]
+        input = input.read if input.respond_to?(:read)
+        ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
+      end
+
+
+      # Load registry
+      begin
+        registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
+        log_debug('', "registry = #{registry_uri.inspect}")
+        Registry.load_registry(registry_uri)
+      rescue JSON::ParserError => e
+        log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
+      end
+
+      # For all members having @itemscope
+      input.css("[itemscope]").each do |item|
+        # Get @itemtypes to create @type and @vocab
+        item.attribute('itemscope').remove
+        if item['itemtype']
+          # Only absolute URLs
+          types = item.attribute('itemtype').
+            remove.
+            to_s.
+            split(/\s+/).
+            select {|t| RDF::URI(t).absolute?}
+
+          item['typeof'] = types.join(' ') unless types.empty?
+          if vocab = types.first
+            vocab = Registry.find(vocab) || begin
+              type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
+              Registry.new(type_vocab) if type_vocab
+            end
+            item['vocab'] = vocab.uri.to_s if vocab
+          end
+        end
+
+        # Change each itemid attribute to an resource attribute with the same value
+        if item['itemid']
+          id = item.attribute('itemid').remove
+          item[item['itemprop'] ? 'resource' : 'about'] = id
+        else
+          # Otherwise, ensure that @typeof has at least an empty value
+          item['typeof'] ||= ''
+        end
+      end
+
+      # Add @resource for all itemprop values of object based on a @data value
+      input.css("object[itemprop][data]").each do |item|
+        item['resource'] ||= item['data']
+      end
+
+      # Replace all @itemprop values with @property
+      input.css("[itemprop]").each {|item| item['property'] = item.attribute('itemprop').remove}
+
+      # Wrap all @itemref properties
+      input.css("[itemref]").each do |item|
+        item_vocab = item['vocab'] || item.ancestors.detect {|a| a.attribute('vocab')}
+        item_vocab = item_vocab.to_s if item_vocab
+
+        item.attribute('itemref').remove.to_s.split(/\s+/).each do |ref|
+          if referenced = input.css("##{ref}")
+            # Add @vocab to referenced using the closest ansestor having @vocab of item.
+            # If the element with id reference has no resource attribute, add a resource attribute whose value is a NUMBER SIGN U+0023 followed by reference to the element.
+            # If the element with id reference has no typeof attribute, add a typeof="rdfa:Pattern" attribute to the element.
+            referenced.wrap(%(<div vocab="#{item_vocab}" resource="##{ref}" typeof="rdfa:Pattern"))
+
+            # Add a link child element to the element that represents the item, with a rel="rdfa:copy" attribute and an href attribute whose value is a NUMBER SIGN U+0023 followed by reference
+            link = ::Nokogiri::XML::Node.new('link', input)
+            link['rel'] = 'rdfa:copy'
+            link['href'] = "##{ref}"
+            item << link
+          end
+        end
+      end
+
+      options = options.merge(
+        library: :nokogiri,
+        reference_folding: true,
+        host_language: :html5,
+        version: :"rdfa1.1")
+
+      # Rely on RDFa reader
+      super(input, options, &block)
+    end
+  end
+end
\ No newline at end of file
diff --git a/lib/rdf/microdata/reader.rb b/lib/rdf/microdata/reader.rb
index c78a0c1..49bef8c 100644
--- a/lib/rdf/microdata/reader.rb
+++ b/lib/rdf/microdata/reader.rb
@@ -37,6 +37,21 @@ def base_uri
       @options[:base_uri]
     end
 
+    ##
+    # Redirect for RDFa Reader given `:rdfa` option
+    #
+    # @private
+    def self.new(input = nil, options = {}, &block)
+      klass = if options[:rdfa]
+        RdfaReader
+      else
+        self
+      end
+      reader = klass.allocate
+      reader.send(:initialize, input, options, &block)
+      reader
+    end
+
     ##
     # Initializes the Microdata reader instance.
     #
diff --git a/spec/rdfa_reader_spec.rb b/spec/rdfa_reader_spec.rb
new file mode 100644
index 0000000..18a7139
--- /dev/null
+++ b/spec/rdfa_reader_spec.rb
@@ -0,0 +1,888 @@
+# coding: utf-8
+$:.unshift "."
+require 'spec_helper'
+require 'rdf/spec/reader'
+
+describe RDF::Microdata::RdfaReader do
+  let!(:doap) {File.expand_path("../../etc/doap.html", __FILE__)}
+  let!(:doap_nt) {File.expand_path("../../etc/doap.nt", __FILE__)}
+  let!(:registry_path) {File.expand_path("../test-files/test-registry.json", __FILE__)}
+  before :each do
+    @reader = RDF::Microdata::RdfaReader.new(StringIO.new("<html></html>"))
+  end
+
+  context :interface do
+    subject {%(
+      <div itemscope itemtype="http://schema.org/">
+       <p>My name is <span itemprop="name">Elizabeth</span>.</p>
+      </div>
+    )}
+    
+    it "should yield reader" do
+      inner = double("inner")
+      expect(inner).to receive(:called).with(RDF::Microdata::RdfaReader)
+      RDF::Microdata::RdfaReader.new(subject, base_uri: 'http://example/') do |reader|
+        inner.called(reader.class)
+      end
+    end
+    
+    it "should return reader" do
+      expect(RDF::Microdata::RdfaReader.new(subject, base_uri: 'http://example/')).to be_a(RDF::Microdata::RdfaReader)
+    end
+    
+    it "should not raise errors" do
+      expect {
+        RDF::Microdata::RdfaReader.new(subject, validate:  true, base_uri: 'http://example/')
+      }.not_to raise_error
+    end
+
+    it "should yield statements" do
+      inner = double("inner")
+      expect(inner).to receive(:called).with(RDF::Statement).at_least(2)
+      RDF::Microdata::RdfaReader.new(subject, base_uri: 'http://example/').each_statement do |statement|
+        inner.called(statement.class)
+      end
+    end
+    
+    it "should yield triples" do
+      inner = double("inner")
+      expect(inner).to receive(:called).at_least(2)
+      RDF::Microdata::RdfaReader.new(subject, base_uri: 'http://example/').each_triple do |subject, predicate, object|
+        inner.called(subject.class, predicate.class, object.class)
+      end
+    end
+
+    context "Microdata Reader with :rdfa option" do
+      it "returns a RdfaReader instance" do
+        r = RDF::Microdata::Reader.new(StringIO.new(""), rdfa:  true)
+        expect(r).to be_a(RDF::Microdata::RdfaReader)
+      end
+    end
+  end
+
+  context :parsing do
+    before :each do 
+      @md_ctx = %q(
+        <div itemscope='' itemtype="http://schema.org/Person">
+         %s
+        </div>
+      )
+      @nt_ctx = %q(
+      _:a <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
+      %s
+      )
+    end
+
+    it "parses a simple graph" do
+      md = %q(<p>My name is <span itemprop="name">Gregg Kellogg</span>.</p>)
+      nt = %q(_:a <http://schema.org/name> "Gregg Kellogg" .)
+      expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger)
+    end
+
+    context "values" do
+      [
+        [
+          %q(<p>My name is <span itemprop="name">Gregg Kellogg</span></p>),
+          %q(_:a <http://schema.org/name> "Gregg Kellogg" .)
+        ],
+        [
+          %q(
+          <p>My name is <span itemprop="name">Gregg</span></p>
+          <p>My name is <span itemprop="name">Kellogg</span></p>
+          ),
+          %q(_:a <http://schema.org/name> "Gregg", "Kellogg" .)
+        ],
+        [
+          %q(<p>My name is <span itemprop="name fullName">Gregg Kellogg</span></p>),
+          %q(
+            _:a <http://schema.org/name> "Gregg Kellogg" .
+            _:a <http://schema.org/fullName> "Gregg Kellogg" .
+          )
+        ],
+        [
+          %q(<p>My name is <span itemprop="http://schema.org/name">Gregg Kellogg</span></p>),
+          %q(_:a <http://schema.org/name> "Gregg Kellogg" .)
+        ],
+        [
+          %q(<meta itemprop="meta" content="foo"/>),
+          %q(_:a <http://schema.org/meta> "foo" .)
+        ],
+        [
+          %q(<span itemprop="span" content="foo">Bar</span>),
+          %q(_:a <http://schema.org/span> "foo" .)
+        ],
+        [
+          %q(<audio itemprop="audio" src="foo"></audio>),
+          %q(_:a <http://schema.org/audio> <http://example/foo> .)
+        ],
+        [
+          %q(<embed itemprop="embed" src="foo"></embed>),
+          %q(_:a <http://schema.org/embed> <http://example/foo> .)
+        ],
+        [
+          %q(<iframe itemprop="iframe" src="foo"></iframe>),
+          %q(_:a <http://schema.org/iframe> <http://example/foo> .)
+        ],
+        [
+          %q(<img itemprop="img" src="foo"/>),
+          %q(_:a <http://schema.org/img> <http://example/foo> .)
+        ],
+        [
+          %q(<source itemprop="source" src="foo"/>),
+          %q(_:a <http://schema.org/source> <http://example/foo> .)
+        ],
+        [
+          %q(<track itemprop="track" src="foo"/>),
+          %q(_:a <http://schema.org/track> <http://example/foo> .)
+        ],
+        [
+          %q(<video itemprop="video" src="foo"></video>),
+          %q(_:a <http://schema.org/video> <http://example/foo> .)
+        ],
+        [
+          %q(<a itemprop="a" href="foo"></a>),
+          %q(_:a <http://schema.org/a> <http://example/foo> .)
+        ],
+        [
+          %q(<area itemprop="area" href="foo"/>),
+          %q(_:a <http://schema.org/area> <http://example/foo> .)
+        ],
+        [
+          %q(<link itemprop="link" href="foo"/>),
+          %q(_:a <http://schema.org/link> <http://example/foo> .)
+        ],
+        [
+          %q(<object itemprop="object" data="foo"/>),
+          %q(_:a <http://schema.org/object> <http://example/foo> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="2011-06-28Z">28 June 2011</time>),
+          %q(_:a <http://schema.org/time> "2011-06-28Z"^^<http://www.w3.org/2001/XMLSchema#date> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="00:00:00Z">midnight</time>),
+          %q(_:a <http://schema.org/time> "00:00:00Z"^^<http://www.w3.org/2001/XMLSchema#time> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="2011-06-28T00:00:00Z">28 June 2011 at midnight</time>),
+          %q(_:a <http://schema.org/time> "2011-06-28T00:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="P2011Y06M28DT00H00M00S">2011 years 6 months 28 days</time>),
+          %q(_:a <http://schema.org/time> "P2011Y06M28DT00H00M00S"^^<http://www.w3.org/2001/XMLSchema#duration> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="foo">28 June 2011</time>),
+          %q(_:a <http://schema.org/time> "foo" .)
+        ],
+        [
+          %q(<div itemprop="knows" itemscope=''><a href="http://manu.sporny.org/">Manu</a></div>),
+          %q(_:a <http://schema.org/knows> _:b .)
+        ],
+        [
+          %q(<data itemprop="data" value="1"/>),
+          %q(_:a <http://schema.org/data> "1"^^<http://www.w3.org/2001/XMLSchema#integer> .)
+        ],
+        [
+          %q(<data itemprop="data" value="1.1"/>),
+          %q(_:a <http://schema.org/data> "1.1"^^<http://www.w3.org/2001/XMLSchema#double> .)
+        ],
+        [
+          %q(<data itemprop="data" value="1.1e1"/>),
+          %q(_:a <http://schema.org/data> "1.1e1"^^<http://www.w3.org/2001/XMLSchema#double> .)
+        ],
+        [
+          %q(<data itemprop="data" value="foo"/>),
+          %q(_:a <http://schema.org/data> "foo" .)
+        ],
+        [
+          %q(<data itemprop="data" lang="en" value="foo"/>),
+          %q(_:a <http://schema.org/data> "foo" .)
+        ],
+        [
+          %q(<meter itemprop="meter" value="1"/>),
+          %q(_:a <http://schema.org/meter> "1"^^<http://www.w3.org/2001/XMLSchema#integer> .)
+        ],
+        [
+          %q(<meter itemprop="meter" value="1.1"/>),
+          %q(_:a <http://schema.org/meter> "1.1"^^<http://www.w3.org/2001/XMLSchema#double> .)
+        ],
+        [
+          %q(<meter itemprop="meter" value="1.1e1"/>),
+          %q(_:a <http://schema.org/meter> "1.1e1"^^<http://www.w3.org/2001/XMLSchema#double> .)
+        ],
+        [
+          %q(<meter itemprop="meter" value="foo"/>),
+          %q(_:a <http://schema.org/meter> "foo" .)
+        ],
+        [
+          %q(<meter itemprop="meter" lang="en" value="foo"/>),
+          %q(_:a <http://schema.org/meter> "foo" .)
+        ],
+      ].each do |(md, nt)|
+        it "parses #{md}" do
+          pending if [
+            '<data itemprop="data" value="1.1"/>',
+            '<meter itemprop="meter" value="1.1"/>',
+          ].include?(md)
+          expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger)
+        end
+      end
+    end
+
+    context "base_uri" do
+      before :each do 
+        @nt_ctx = %q(
+        _:a <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
+        %s
+        )
+      end
+
+      [
+        [
+          %q(<audio itemprop="audio" src="foo"></audio>),
+          %q(_:a <http://schema.org/audio> <http://example.com/foo> .)
+        ],
+        [
+          %q(<embed itemprop="embed" src="foo"></embed>),
+          %q(_:a <http://schema.org/embed> <http://example.com/foo> .)
+        ],
+        [
+          %q(<iframe itemprop="iframe" src="foo"></iframe>),
+          %q(_:a <http://schema.org/iframe> <http://example.com/foo> .)
+        ],
+        [
+          %q(<img itemprop="img" src="foo"/>),
+          %q(_:a <http://schema.org/img> <http://example.com/foo> .)
+        ],
+        [
+          %q(<source itemprop="source" src="foo"/>),
+          %q(_:a <http://schema.org/source> <http://example.com/foo> .)
+        ],
+        [
+          %q(<track itemprop="track" src="foo"/>),
+          %q(_:a <http://schema.org/track> <http://example.com/foo> .)
+        ],
+        [
+          %q(<video itemprop="video" src="foo"></video>),
+          %q(_:a <http://schema.org/video> <http://example.com/foo> .)
+        ],
+        [
+          %q(<a itemprop="a" href="foo"></a>),
+          %q(_:a <http://schema.org/a> <http://example.com/foo> .)
+        ],
+        [
+          %q(<area itemprop="area" href="foo"/>),
+          %q(_:a <http://schema.org/area> <http://example.com/foo> .)
+        ],
+        [
+          %q(<link itemprop="link" href="foo"/>),
+          %q(_:a <http://schema.org/link> <http://example.com/foo> .)
+        ],
+        [
+          %q(<a itemprop="knows" href="scor">Stéphane Corlosquet</a>),
+          %q(_:a <http://schema.org/knows> <http://example.com/scor> .)
+        ],
+      ].each do |(md, nt)|
+        it "parses #{md}" do
+          expect(parse(@md_ctx % md, base_uri: 'http://example.com/')).to be_equivalent_graph(@nt_ctx % nt, logger: @logger)
+        end
+      end
+    end
+
+    context "itemid" do
+      before :each do 
+        @md_ctx = %q(
+          <div itemid="subj" itemscope='' itemtype="http://schema.org/Person">
+           %s
+          </div>
+        )
+        @nt_ctx = %q(
+        <http://example/subj> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
+        %s
+        )
+      end
+
+      [
+        [
+          %q(<p>My name is <span itemprop="name">Gregg Kellogg</span></p>),
+          %q(<http://example/subj> <http://schema.org/name> "Gregg Kellogg" .)
+        ],
+        [
+          %q(<meta itemprop="meta" content="foo"/>),
+          %q(<http://example/subj> <http://schema.org/meta> "foo" .)
+        ],
+        [
+          %q(<audio itemprop="audio" src="foo"></audio>),
+          %q(<http://example/subj> <http://schema.org/audio> <http://example/foo> .)
+        ],
+        [
+          %q(<embed itemprop="embed" src="foo"></embed>),
+          %q(<http://example/subj> <http://schema.org/embed> <http://example/foo> .)
+        ],
+        [
+          %q(<iframe itemprop="iframe" src="foo"></iframe>),
+          %q(<http://example/subj> <http://schema.org/iframe> <http://example/foo> .)
+        ],
+        [
+          %q(<img itemprop="img" src="foo"/>),
+          %q(<http://example/subj> <http://schema.org/img> <http://example/foo> .)
+        ],
+        [
+          %q(<source itemprop="source" src="foo"/>),
+          %q(<http://example/subj> <http://schema.org/source> <http://example/foo> .)
+        ],
+        [
+          %q(<track itemprop="track" src="foo"/>),
+          %q(<http://example/subj> <http://schema.org/track> <http://example/foo> .)
+        ],
+        [
+          %q(<video itemprop="video" src="foo"></video>),
+          %q(<http://example/subj> <http://schema.org/video> <http://example/foo> .)
+        ],
+        [
+          %q(<a itemprop="a" href="foo"></a>),
+          %q(<http://example/subj> <http://schema.org/a> <http://example/foo> .)
+        ],
+        [
+          %q(<area itemprop="area" href="foo"/>),
+          %q(<http://example/subj> <http://schema.org/area> <http://example/foo> .)
+        ],
+        [
+          %q(<link itemprop="link" href="foo"/>),
+          %q(<http://example/subj> <http://schema.org/link> <http://example/foo> .)
+        ],
+        [
+          %q(<object itemprop="object" data="foo"/>),
+          %q(<http://example/subj> <http://schema.org/object> <http://example/foo> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="2011-06-28T00:00:00Z">28 June 2011</time>),
+          %q(<http://example/subj> <http://schema.org/time> "2011-06-28T00:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .)
+        ],
+        [
+          %q(<div itemprop="knows" itemscope='' itemid="obj"><a href="http://manu.sporny.org/">Manu</a></div>),
+          %q(<http://example/subj> <http://schema.org/knows> <http://example/obj> .)
+        ],
+      ].each do |(md, nt)|
+        it "parses #{md}" do
+          expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger)
+        end
+      end
+    end
+
+    context "itemtype" do
+      {
+        "with no type and token property" => [
+          %q(
+            <div>
+              <div itemscope=''>
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q()
+        ],
+        "with empty type and token property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="">
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q()
+        ],
+        "with relative type and token property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="Person">
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q()
+        ],
+        "with single type and token property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person">
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          [ a <http://schema.org/Person> ;
+            <http://schema.org/name> "Amanda" ;
+          ] .
+          )
+        ],
+        "with multipe types and token property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person http://xmlns.com/foaf/0.1/Person">
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          [ a <http://schema.org/Person>, <http://xmlns.com/foaf/0.1/Person> ;
+            <http://schema.org/name> "Amanda" ;
+          ] .
+          )
+        ],
+        #"with no type and URI property" => [
+        #  %q(
+        #    <div>
+        #      <div itemscope=''>
+        #      <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+        #      </div>
+        #    </div>
+        #  ),
+        #  %q(
+        #    [ <http://schema.org/name> "Amanda" ] .
+        #  )
+        #],
+        #"with empty type and URI property" => [
+        #  %q(
+        #    <div>
+        #      <div itemscope='' itemtype="">
+        #      <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+        #      </div>
+        #    </div>
+        #  ),
+        #  %q(
+        #  [ <http://schema.org/name> "Amanda" ] .
+        #  )
+        #],
+        #"with relative type and URI property" => [
+        #  %q(
+        #    <div>
+        #      <div itemscope='' itemtype="Person">
+        #      <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+        #      </div>
+        #    </div>
+        #  ),
+        #  %q(
+        #  [ <http://schema.org/name> "Amanda" ] .
+        #  )
+        #],
+        "with single type and URI property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person">
+              <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          [ a <http://schema.org/Person> ;
+            <http://schema.org/name> "Amanda" ;
+          ] .
+          )
+        ],
+        "with multipe types and URI property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person http://xmlns.com/foaf/0.1/Person">
+              <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          [ a <http://schema.org/Person>, <http://xmlns.com/foaf/0.1/Person> ;
+            <http://schema.org/name> "Amanda" ;
+          ] .
+          )
+        ],
+        "with inherited type and token property" => [
+          %q(
+            <div itemscope=''  itemtype="http://schema.org/Person">
+              <p>Name: <span itemprop="name">Gregg</span></p>
+              <div itemprop="knows" itemscope="">
+                <p id="a">Name: <span itemprop="name">Jeni</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          @prefix md: <http://www.w3.org/ns/md#> .
+          @prefix schema: <http://schema.org/> .
+          [ a schema:Person ;
+            schema:name "Gregg" ;
+            schema:knows [ schema:name "Jeni" ]
+          ] .
+          )
+        ]
+      }.each do |name, (md, nt)|
+        it "#{name}" do
+          expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+        end
+      end
+    end
+
+    context "itemref" do
+      {
+        "to single id" =>
+        [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person" id="amanda" itemref="a"></div>
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+            </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person> ;
+              <http://schema.org/name> "Amanda" ;
+            ] .
+          )
+        ],
+        "to generate listed property values" =>
+        [
+          %q(
+          <div>
+            <div itemscope='' itemtype="http://schema.org/Person" itemref="surname">
+              <p>My name is <span itemprop="name">Gregg</span></p>
+            </div>
+            <p id="surname">My name is <span itemprop="name">Kellogg</span></p>
+          </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person> ;
+              <http://schema.org/name> "Gregg", "Kellogg" ;
+            ] .
+          )
+        ],
+        #"to single id with different types" =>
+        #[
+        #  %q(
+        #    <div>
+        #      <div itemscope='' itemtype="http://xmlns.com/foaf/0.1/Person" id="amanda" itemref="a"></div>
+        #      <div itemscope='' itemtype="http://schema.org/Person" id="amanda" itemref="a"></div>
+        #      <p id="a">Name: <span itemprop="name">Amanda</span></p>
+        #    </div>
+        #  ),
+        #  %q(
+        #  [ a <http://schema.org/Person> ;
+        #    <http://schema.org/name> "Amanda" ;
+        #  ] .
+        #  [ a <http://xmlns.com/foaf/0.1/Person> ;
+        #    <http://xmlns.com/foaf/0.1/name> "Amanda" ;
+        #  ] .
+        #  )
+        #],
+        "to multiple ids" =>
+        [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person" id="amanda" itemref="a b"></div>
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              <p id="b" itemprop="band">Jazz Band</p>
+            </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person> ;
+              <http://schema.org/name> "Amanda" ;
+              <http://schema.org/band> "Jazz Band" ;
+            ] .
+          )
+        ],
+        "with chaining" =>
+        [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person" id="amanda" itemref="a b"></div>
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              <div id="b" itemprop="band" itemscope='' itemtype="http://schema.org/MusicGroup" itemref="c"></div>
+              <div id="c">
+               <p>Band: <span itemprop="name">Jazz Band</span></p>
+               <p>Size: <span itemprop="size">12</span> players</p>
+              </div>
+            </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person> ;
+              <http://schema.org/name> "Amanda" ;
+              <http://schema.org/band> [
+                a <http://schema.org/MusicGroup> ;
+                <http://schema.org/name> "Jazz Band";
+                <http://schema.org/size> "12"
+              ]
+            ] .
+          )
+        ],
+        "shared" =>
+        [
+          %q(
+            <div>
+              <div itemscope='' itemref="a" itemtype="http://schema.org/Person"></div>
+              <div itemscope='' itemref="a"itemtype="http://schema.org/Person"></div>
+              <div id="a" itemprop="refers-to" itemscope=''>
+                <span itemprop="name">Amanda</span>
+              </div>
+            </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person>; <http://schema.org/refers-to> _:a ] .
+            [ a <http://schema.org/Person>; <http://schema.org/refers-to> _:a ] .
+            _:a <http://schema.org/name> "Amanda" .
+          )
+      
+        ],
+      }.each do |name, (md, nt)|
+        it "parses #{name}" do
+          expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+        end
+      end
+
+      it "catches infinite recursion", pending: true do
+        md = %(
+        <!DOCTYPE html>
+        <html><body>
+        <div itemscope>
+          <div id="ref">
+            <div itemprop="name">friend1</div>
+            <div itemprop="friend" itemscope>
+              <div itemprop="name">friend2</div>
+              <div itemprop="friend" itemref="ref" itemscope></div>
+            </div>
+          </div>
+        </div>
+        </body></html>
+        )
+        expect {parse(md, validate: true)}.to raise_error(RDF::ReaderError)
+        expect(@logger.to_s).to include("itemref recursion")
+      end
+    end
+
+    context "propertyURI" do
+      context "no expansion" do
+        {
+          "http://foo/bar + baz => http://foo/baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo/bar'>
+                <p itemprop='baz'>FooBar</p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo/bar>; <http://foo/baz> "FooBar" ] .
+            )
+          ],
+          "http://foo#bar + baz => http://foo#baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo#bar'>
+                <p itemprop='baz'>FooBar</p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo#bar>; <http://foo#baz> "FooBar" ] .
+            )
+          ],
+          "http://foo#Type + bar + baz => http://foo#baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo#Type'>
+                <p itemscope='' itemprop='bar'><span itemprop='baz'>Baz</span></p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo#Type>;
+                <http://foo#bar> [ <http://foo#baz> "Baz"]] .
+            )
+          ],
+        }.each do |name, (md, nt)|
+          it "expands #{name}" do
+            expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+          end
+        end
+      end
+
+      context "default propertyURI generation" do
+        {
+          "http://foo/bar + baz => http://foo/baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo/bar'>
+                <p itemprop='baz'>FooBar</p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo/bar>; <http://foo/baz> "FooBar" ] .
+            )
+          ],
+          "http://foo#bar + baz => http://foo#baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo#bar'>
+                <p itemprop='baz'>FooBar</p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo#bar>; <http://foo#baz> "FooBar" ] .
+            )
+          ],
+          "http://foo#Type + bar + baz => http://foo#baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo#Type'>
+                <p itemscope='' itemprop='bar'><span itemprop='baz'>Baz</span></p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo#Type>;
+                <http://foo#bar> [ <http://foo#baz> "Baz"]] .
+            )
+          ],
+        }.each do |name, (md, nt)|
+          it "expands #{name}" do
+            expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+          end
+        end
+      end
+    end
+
+    context "itemprop-reverse", skip: true do
+      {
+        "link" => [
+          %q(
+            <div itemscope itemtype="http://schema.org/Person">
+              <span itemprop="name">William Shakespeare</span>
+              <link itemprop-reverse="creator" href="http://www.freebase.com/m/0yq9mqd">
+            </div>
+          ),
+          %q(
+            <http://www.freebase.com/m/0yq9mqd> <http://schema.org/creator> [
+              a <http://schema.org/Person>;
+              <http://schema.org/name> "William Shakespeare"
+            ] .
+          )
+        ],
+        "itemscope" => [
+          %q(
+            <div itemscope itemtype="http://schema.org/ShoppingCenter">
+              <span itemprop="name">The ACME Shopping Mall on Structured Data Avenue</span>
+              <span itemprop="description">The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data</span>
+              <p>Here is a list of shops inside:</p>
+              <div itemprop-reverse="containedIn" itemscope itemtype="http://schema.org/Restaurant">
+                <span itemprop="name">Dan Brickley's Data Restaurant</span>
+              </div>
+              <div itemprop-reverse="containedIn" itemscope itemtype="http://schema.org/Bakery">
+                <span itemprop="name">Ramanathan Guha's Meta Content Framework Bakery</span>
+              </div>
+            </div>
+          ),
+          %q(
+            _:a a <http://schema.org/ShoppingCenter>;
+                <http://schema.org/name> "The ACME Shopping Mall on Structured Data Avenue";
+                <http://schema.org/description> "The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data" .
+            _:b a <http://schema.org/Restaurant>;
+                <http://schema.org/name> "Dan Brickley's Data Restaurant";
+                <http://schema.org/containedIn> _:a .
+            _:c a <http://schema.org/Bakery>;
+                <http://schema.org/name> "Ramanathan Guha's Meta Content Framework Bakery";
+                <http://schema.org/containedIn> _:a .
+          )
+        ],
+        "literal" => [
+          %q(
+            <div itemscope itemtype="http://schema.org/Person">
+              <span itemprop="name">William Shakespeare</span>
+              <meta itemprop-reverse="creator" content="foo">
+            </div>
+          ),
+          %q(
+            _:a a <http://schema.org/Person>;
+                <http://schema.org/name> "William Shakespeare" .
+          )
+        ],
+        "itemprop and itemprop-reverse" => [
+          %q(
+            <div itemscope itemtype="http://schema.org/Organization">
+              <span itemprop="name">Cryptography Users</span>
+              <div itemprop-reverse="memberOf" itemprop="member" itemscope
+                    itemtype="http://schema.org/OrganizationRole">
+                <div itemprop-reverse="memberOf" itemprop="member" itemscope
+                        itemtype="http://schema.org/Person">
+                  <span itemprop="name">Alice</span>
+                </div>
+                <span itemprop="startDate">1977</span>
+              </div>
+            </div>
+          ),
+          %q(
+            @prefix schema: <http://schema.org/> .
+            @prefix md: <http://www.w3.org/ns/md#> .
+
+            _:a a schema:Organization;
+                schema:name "Cryptography Users";
+                schema:member _:b .
+            _:b a schema:OrganizationRole;
+                schema:startDate "1977";
+                schema:member _:c;
+                schema:memberOf _:a .
+            _:c a schema:Person;
+                schema:name "Alice";
+                schema:memberOf _:b .
+          )
+        ],
+      }.each do |name, (md, nt)|
+        it "expands #{name}" do
+          expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+        end
+      end
+    end
+
+    context "vocabulary expansion", pending: true do
+      it "always expands" do
+        md = %q(
+          <div itemscope='' itemtype='http://schema.org/Person'>
+            <link itemprop='additionalType' href='http://xmlns.com/foaf/0.1/Person' />
+          </div>
+        )
+        ttl = %q(
+          [ a <http://schema.org/Person>, <http://xmlns.com/foaf/0.1/Person>;
+            <http://schema.org/additionalType> <http://xmlns.com/foaf/0.1/Person>
+          ] .
+        )
+
+        expect(parse(md, vocab_expansion: true)).to be_equivalent_graph(ttl, logger: @logger)
+      end
+    end
+
+    context "test-files", skip: true do
+      Dir.glob(File.join(File.expand_path(File.dirname(__FILE__)), "test-files", "*.html")).each do |md|
+        it "parses #{md}" do
+          test_file(md)
+        end
+      end
+    end
+  end
+  
+  def parse(input, options = {})
+    @logger = RDF::Spec.logger
+    graph = options[:graph] || RDF::Graph.new
+    RDF::Microdata::Reader.new(input, {
+        logger: @logger,
+        rdfa: true,
+        validate: false,
+        base_uri: "http://example/",
+        registry: registry_path,
+        canonicalize: false}.merge(options)).each do |statement|
+      graph << statement
+    end
+
+    # Remove any rdfa:usesVocabulary statements
+    graph.query(predicate: RDF::RDFA.usesVocabulary).each do |stmt|
+      graph.delete(stmt)
+    end
+    graph
+  end
+
+  def test_file(filepath, options = {})
+    graph = parse(File.open(filepath), options)
+
+    ttl_string = File.read(filepath.sub('.html', '.ttl'))
+    expect(graph).to be_equivalent_graph(ttl_string, logger: @logger)
+  end
+end
diff --git a/spec/suite_helper.rb b/spec/suite_helper.rb
index 5d14456..80850f7 100644
--- a/spec/suite_helper.rb
+++ b/spec/suite_helper.rb
@@ -25,7 +25,7 @@ def self.open_file(filename_or_url, options = {}, &block)
         path = filename_or_url[5..-1]
         Kernel.open(path.to_s, &block)
       when 'http://www.w3.org/ns/md'
-        Kernel.open(RDF::Microdata::Reader::DEFAULT_REGISTRY, &block)
+        Kernel.open(RDF::Microdata::DEFAULT_REGISTRY, &block)
       when /^#{REMOTE_PATH}/
         begin
           #puts "attempt to open #{filename_or_url} locally"

From f0e408d1311394eb611cb27bdfa0fdf6e021f6fc Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Thu, 20 Jul 2017 14:23:28 -0700
Subject: [PATCH 04/12] Add `RdfaReader#rdfa` to retrieve converted RDFa
 document after reader initialization.

---
 Gemfile                          |  1 +
 examples/itemref.html            |  7 +++
 examples/locomotive.html         | 11 +++++
 lib/rdf/microdata/format.rb      | 82 ++++++++++++++++++++++++++++++++
 lib/rdf/microdata/rdfa_reader.rb |  9 +++-
 lib/rdf/microdata/reader.rb      | 13 +++++
 script/parse                     | 61 +++++++++++++++++++++++-
 7 files changed, 181 insertions(+), 3 deletions(-)
 create mode 100644 examples/itemref.html
 create mode 100644 examples/locomotive.html

diff --git a/Gemfile b/Gemfile
index 48f46d8..e63746f 100644
--- a/Gemfile
+++ b/Gemfile
@@ -8,6 +8,7 @@ gem "rdf-xsd",        github: "ruby-rdf/rdf-xsd",  branch: "develop"
 gem "nokogumbo",      '~> 1.4'
 
 group :development do
+  gem 'linkeddata'
   gem 'ebnf',               github: "gkellogg/ebnf",                branch: "develop"
   gem 'rdf-aggregate-repo', github: "ruby-rdf/rdf-aggregate-repo",  branch: "develop"
   gem 'rdf-isomorphic',     github: "ruby-rdf/rdf-isomorphic",      branch: "develop"
diff --git a/examples/itemref.html b/examples/itemref.html
new file mode 100644
index 0000000..b961767
--- /dev/null
+++ b/examples/itemref.html
@@ -0,0 +1,7 @@
+<div id="x">
+ <p itemprop="a">1</p>
+</div>
+<div itemscope itemref="x" itemtype="http://schema.org/Thing">
+ <p itemprop="b">test</p>
+ <p itemprop="a">2</p>
+</div>
diff --git a/examples/locomotive.html b/examples/locomotive.html
new file mode 100644
index 0000000..e976ef9
--- /dev/null
+++ b/examples/locomotive.html
@@ -0,0 +1,11 @@
+<dl itemscope itemtype="http://md.example.com/loco
+                        http://md.example.com/lighting">
+ <dt>Name:
+ <dd itemprop="name">Tank Locomotive (DB 80)
+ <dt>Product code:
+ <dd itemprop="product-code">33041
+ <dt>Scale:
+ <dd itemprop="scale">HO
+ <dt>Digital:
+ <dd itemprop="digital">Delta
+</dl>
diff --git a/lib/rdf/microdata/format.rb b/lib/rdf/microdata/format.rb
index a11dca1..2377030 100644
--- a/lib/rdf/microdata/format.rb
+++ b/lib/rdf/microdata/format.rb
@@ -41,5 +41,87 @@ class Format < RDF::Format
     def self.detect(sample)
       !!sample.match(/<[^>]*(itemprop|itemtype|itemref|itemscope|itemid)[^>]*>/m)
     end
+
+    ##
+    # Hash of CLI commands appropriate for this format
+    # @return [Hash{Symbol => Hash}]
+    def self.cli_commands
+      {
+        "to-rdfa": {
+          description: "Transform HTML+Microdata into HTML+RDFa",
+          parse: false,
+          help: "to-rdfa files ...",
+          lambda: ->(files, options) do
+            out = options[:output] || $stdout
+            xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+              <xsl:param name="indent-increment" select="'  '"/>
+              <xsl:output method="html" doctype-system="about:legacy-compat"/>
+ 
+              <xsl:template name="newline">
+                <xsl:text disable-output-escaping="yes">
+            </xsl:text>
+              </xsl:template>
+ 
+              <xsl:template match="comment() | processing-instruction()">
+                <xsl:param name="indent" select="''"/>
+                <xsl:call-template name="newline"/>
+                <xsl:value-of select="$indent"/>
+                <xsl:copy />
+              </xsl:template>
+ 
+              <xsl:template match="text()">
+                <xsl:param name="indent" select="''"/>
+                <xsl:call-template name="newline"/>
+                <xsl:value-of select="$indent"/>
+                <xsl:value-of select="normalize-space(.)"/>
+              </xsl:template>
+ 
+              <xsl:template match="text()[normalize-space(.)='']"/>
+ 
+              <xsl:template match="*">
+                <xsl:param name="indent" select="''"/>
+                <xsl:call-template name="newline"/>
+                <xsl:value-of select="$indent"/>
+                  <xsl:choose>
+                   <xsl:when test="count(child::*) > 0">
+                    <xsl:copy>
+                     <xsl:copy-of select="@*"/>
+                     <xsl:apply-templates select="*|text()">
+                       <xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
+                     </xsl:apply-templates>
+                     <xsl:call-template name="newline"/>
+                     <xsl:value-of select="$indent"/>
+                    </xsl:copy>
+                   </xsl:when>
+                   <xsl:otherwise>
+                    <xsl:copy-of select="."/>
+                   </xsl:otherwise>
+                 </xsl:choose>
+              </xsl:template>
+            </xsl:stylesheet>).gsub(/^            /, ''))
+            if files.empty?
+              # If files are empty, either use options[::evaluate]
+              input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
+              input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
+              RDF::Microdata::Reader.new(input, options(rdfa: true)) do |reader|
+                reader.rdfa.xpath("//text()").each do |txt|
+                  txt.content = txt.content.to_s.strip
+                end
+                out.puts xsl.apply_to(reader.rdfa).to_s
+              end
+            else
+              files.each do |file|
+                RDF::Microdata::Reader.open(file, options.merge(rdfa: true)) do |reader|
+                  reader.rdfa.xpath("//text()").each do |txt|
+                    txt.content = txt.content.to_s.strip
+                  end
+                  out.puts xsl.apply_to(reader.rdfa).to_s
+                end
+              end
+            end
+          end
+        },
+      }
+    end
   end
 end
diff --git a/lib/rdf/microdata/rdfa_reader.rb b/lib/rdf/microdata/rdfa_reader.rb
index 14e1681..9ca15d4 100644
--- a/lib/rdf/microdata/rdfa_reader.rb
+++ b/lib/rdf/microdata/rdfa_reader.rb
@@ -5,6 +5,9 @@ module RDF::Microdata
   ##
   # Update DOM to turn Microdata into RDFa and parse using the RDFa Reader
   class RdfaReader < RDF::RDFa::Reader
+    # The transformed DOM using RDFa
+    # @return [RDF::HTML::Document]
+    attr_reader :rdfa
 
     def self.format(klass = nil)
       if klass.nil?
@@ -27,6 +30,8 @@ def self.format(klass = nil)
     # @yieldreturn [void] ignored
     # @raise [RDF::ReaderError] if _validate_
     def initialize(input = $stdin, options = {}, &block)
+      @options = options
+      log_debug('', "using RDFa transformation reader")
 
       input = case input
       when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
@@ -41,7 +46,6 @@ def initialize(input = $stdin, options = {}, &block)
         ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
       end
 
-
       # Load registry
       begin
         registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
@@ -112,6 +116,9 @@ def initialize(input = $stdin, options = {}, &block)
         end
       end
 
+      @rdfa = input
+      log_debug('', "Transformed document: #{input.to_html}")
+
       options = options.merge(
         library: :nokogiri,
         reference_folding: true,
diff --git a/lib/rdf/microdata/reader.rb b/lib/rdf/microdata/reader.rb
index 49bef8c..2889dfb 100644
--- a/lib/rdf/microdata/reader.rb
+++ b/lib/rdf/microdata/reader.rb
@@ -37,6 +37,19 @@ def base_uri
       @options[:base_uri]
     end
 
+    ##
+    # Reader options
+    # @see http://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method
+    def self.options
+      super + [
+        RDF::CLI::Option.new(
+          symbol: :rdfa,
+          datatype: TrueClass,
+          on: ["--rdfa"],
+          description: "Transform and parse as RDFa.") {true},
+      ]
+    end
+
     ##
     # Redirect for RDFa Reader given `:rdfa` option
     #
diff --git a/script/parse b/script/parse
index 5eccebc..a4f4349 100755
--- a/script/parse
+++ b/script/parse
@@ -19,7 +19,8 @@ def run(input, options)
   start = Time.new
   num = 0
 
-  if options[:output_format] == :ntriples || options[:quiet]
+  case options[:output_format]
+  when :ntriples, :quiet
     reader_class.new(input, options).each do |statement|
       num += 1
       if options[:quiet]
@@ -28,7 +29,60 @@ def run(input, options)
         options[:output].puts statement.to_ntriples
       end
     end
-  elsif options[:output_format] == :inspect
+  when :rdfa
+    xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+      <xsl:param name="indent-increment" select="'  '"/>
+      <xsl:output method="html" doctype-system="about:legacy-compat"/>
+ 
+      <xsl:template name="newline">
+        <xsl:text disable-output-escaping="yes">
+    </xsl:text>
+      </xsl:template>
+ 
+      <xsl:template match="comment() | processing-instruction()">
+        <xsl:param name="indent" select="''"/>
+        <xsl:call-template name="newline"/>
+        <xsl:value-of select="$indent"/>
+        <xsl:copy />
+      </xsl:template>
+ 
+      <xsl:template match="text()">
+        <xsl:param name="indent" select="''"/>
+        <xsl:call-template name="newline"/>
+        <xsl:value-of select="$indent"/>
+        <xsl:value-of select="normalize-space(.)"/>
+      </xsl:template>
+ 
+      <xsl:template match="text()[normalize-space(.)='']"/>
+ 
+      <xsl:template match="*">
+        <xsl:param name="indent" select="''"/>
+        <xsl:call-template name="newline"/>
+        <xsl:value-of select="$indent"/>
+          <xsl:choose>
+           <xsl:when test="count(child::*) > 0">
+            <xsl:copy>
+             <xsl:copy-of select="@*"/>
+             <xsl:apply-templates select="*|text()">
+               <xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
+             </xsl:apply-templates>
+             <xsl:call-template name="newline"/>
+             <xsl:value-of select="$indent"/>
+            </xsl:copy>
+           </xsl:when>
+           <xsl:otherwise>
+            <xsl:copy-of select="."/>
+           </xsl:otherwise>
+         </xsl:choose>
+      </xsl:template>
+    </xsl:stylesheet>).gsub(/^    /, ''))
+    reader_class.new(input, options.merge(rdfa: true)) do |reader|
+      reader.rdfa.xpath("//text()").each do |txt|
+        txt.content = txt.content.to_s.strip
+      end
+      options[:output].puts xsl.apply_to(reader.rdfa).to_s
+    end
+  when :inspect
     reader_class.new(input, options).each do |statement|
       num += 1
       options[:output].puts statement.inspect
@@ -55,6 +109,7 @@ logger.formatter = lambda {|severity, datetime, progname, msg| "#{severity}: #{m
 options = {
   verbose: false,
   validate: false,
+  rdfa: false,
   logger: logger,
   output: STDOUT,
   output_format: :turtle,
@@ -72,6 +127,7 @@ opts = GetoptLong.new(
   ["--output", "-o", GetoptLong::REQUIRED_ARGUMENT],
   ["--quiet", GetoptLong::NO_ARGUMENT],
   ["--registry", GetoptLong::REQUIRED_ARGUMENT],
+  ["--rdfa", GetoptLong::NO_ARGUMENT],
   ["--template", GetoptLong::REQUIRED_ARGUMENT],
   ["--uri", GetoptLong::REQUIRED_ARGUMENT],
   ["--validate", GetoptLong::NO_ARGUMENT],
@@ -82,6 +138,7 @@ opts.each do |opt, arg|
   when '--debug'        then logger.level = Logger::DEBUG
   when '--execute'      then input = arg
   when '--format'       then options[:output_format] = arg.to_sym
+  when '--rdfa'         then options[:rdfa] = true
   when '--input-format' then options[:input_format] = arg.to_sym
   when '--quiet'
     options[:quiet] = options[:quiet].to_i + 1

From ce70b8a21a16f594edf1112e3cc0922e62f47ada Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Thu, 20 Jul 2017 16:41:14 -0700
Subject: [PATCH 05/12] Add JSON-LD-based reader, invoked by passing `jsonld:
 true` when instantiating the reader.

---
 README.md                          |   9 +
 lib/rdf/microdata.rb               |  13 +-
 lib/rdf/microdata/format.rb        |  24 +-
 lib/rdf/microdata/jsonld_reader.rb | 251 ++++++++
 lib/rdf/microdata/reader.rb        |  14 +
 rdf-microdata.gemspec              |   1 +
 script/parse                       |   6 +-
 spec/jsonld_reader_spec.rb         | 888 +++++++++++++++++++++++++++++
 8 files changed, 1197 insertions(+), 9 deletions(-)
 create mode 100644 lib/rdf/microdata/jsonld_reader.rb
 create mode 100644 spec/jsonld_reader_spec.rb

diff --git a/README.md b/README.md
index d00e533..7a696c2 100755
--- a/README.md
+++ b/README.md
@@ -66,6 +66,15 @@ There is an experimental reader based on transforming Microdata to RDFa within t
 this, add the `rdfa: true` option to the {RDF::Microdata::Reader.new}, or
 use {RDF::Microdata::RdfaReader} directly.
 
+The reader exposes a `#rdfa` method, which can be used to retrieve the transformed HTML+RDFa
+
+### JSON-lD-based Reader
+There is an experimental reader based on transforming Microdata to JSON-LD. To invoke
+this, add the `jsonld: true` option to the {RDF::Microdata::Reader.new}, or
+use {RDF::Microdata::JsonLdReader} directly.
+
+The reader exposes a `#json` method, which can be used to retrieve the generated JSON-LD
+
 ## Resources
 * [RDF.rb][RDF.rb]
 * [Documentation](http://rdf.rubyforge.org/microdata)
diff --git a/lib/rdf/microdata.rb b/lib/rdf/microdata.rb
index 34e77fb..101f09a 100644
--- a/lib/rdf/microdata.rb
+++ b/lib/rdf/microdata.rb
@@ -25,11 +25,12 @@ module Microdata
 
     require 'rdf/microdata/format'
     require 'rdf/microdata/vocab'
-    autoload :Expansion,  'rdf/microdata/expansion'
-    autoload :Profile,    'rdf/microdata/profile'
-    autoload :Reader,     'rdf/microdata/reader'
-    autoload :RdfaReader, 'rdf/microdata/rdfa_reader'
-    autoload :Registry,   'rdf/microdata/registry'
-    autoload :VERSION,    'rdf/microdata/version'
+    autoload :Expansion,    'rdf/microdata/expansion'
+    autoload :JsonLdReader, 'rdf/microdata/jsonld_reader'
+    autoload :Profile,      'rdf/microdata/profile'
+    autoload :RdfaReader,   'rdf/microdata/rdfa_reader'
+    autoload :Reader,       'rdf/microdata/reader'
+    autoload :Registry,     'rdf/microdata/registry'
+    autoload :VERSION,      'rdf/microdata/version'
   end
 end
diff --git a/lib/rdf/microdata/format.rb b/lib/rdf/microdata/format.rb
index 2377030..193bb52 100644
--- a/lib/rdf/microdata/format.rb
+++ b/lib/rdf/microdata/format.rb
@@ -50,7 +50,7 @@ def self.cli_commands
         "to-rdfa": {
           description: "Transform HTML+Microdata into HTML+RDFa",
           parse: false,
-          help: "to-rdfa files ...",
+          help: "to-rdfa files ...\nTransform HTML+Microdata into HTML+RDFa",
           lambda: ->(files, options) do
             out = options[:output] || $stdout
             xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
@@ -121,6 +121,28 @@ def self.cli_commands
             end
           end
         },
+        "to-jsonld": {
+          description: "Transform HTML+Microdata into JSON-LD",
+          parse: false,
+          help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD",
+          lambda: ->(files, options) do
+            out = options[:output] || $stdout
+            if files.empty?
+              # If files are empty, either use options[::evaluate]
+              input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
+              input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
+              RDF::Microdata::Reader.new(input, options(jsonld: true)) do |reader|
+                out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
+              end
+            else
+              files.each do |file|
+                RDF::Microdata::Reader.open(file, options.merge(jsonld: true)) do |reader|
+                  out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
+                end
+              end
+            end
+          end
+        },
       }
     end
   end
diff --git a/lib/rdf/microdata/jsonld_reader.rb b/lib/rdf/microdata/jsonld_reader.rb
new file mode 100644
index 0000000..5bf6d3d
--- /dev/null
+++ b/lib/rdf/microdata/jsonld_reader.rb
@@ -0,0 +1,251 @@
+require 'json/ld'
+require 'nokogumbo'
+
+module RDF::Microdata
+  ##
+  # Update DOM to turn Microdata into JSON-LD and parse using the JSON-LD Reader
+  class JsonLdReader < JSON::LD::Reader
+    # The resulting JSON-LD
+    # @return [Hash]
+    attr_reader :jsonld
+
+    def self.format(klass = nil)
+      if klass.nil?
+        RDF::Microdata::Format
+      else
+        super
+      end
+    end
+
+    ##
+    # Initializes the JsonLdReader instance.
+    #
+    # @param  [IO, File, String] input
+    #   the input stream to read
+    # @param  [Hash{Symbol => Object}] options
+    #   any additional options (see `RDF::Reader#initialize`)
+    # @return [reader]
+    # @yield  [reader] `self`
+    # @yieldparam  [RDF::Reader] reader
+    # @yieldreturn [void] ignored
+    # @raise [RDF::ReaderError] if _validate_
+    def initialize(input = $stdin, options = {}, &block)
+      @options = options
+      log_debug('', "using JSON-LD transformation reader")
+
+      input = case input
+      when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
+      else
+        # Try to detect charset from input
+        options[:encoding] ||= input.charset if input.respond_to?(:charset)
+        
+        # Otherwise, default is utf-8
+        options[:encoding] ||= 'utf-8'
+        options[:encoding] = options[:encoding].to_s if options[:encoding]
+        input = input.read if input.respond_to?(:read)
+        ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
+      end
+
+      # Load registry
+      begin
+        registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
+        log_debug('', "registry = #{registry_uri.inspect}")
+        Registry.load_registry(registry_uri)
+      rescue JSON::ParserError => e
+        log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
+      end
+
+      @jsonld = {'@graph' => []}
+
+      # Start with all top-level items
+      input.css("[itemscope]").each do |item|
+        next if item['itemprop']  # Only top-level items
+        jsonld['@graph'] << get_object(item)
+      end
+
+      log_debug('', "Transformed document: #{jsonld.to_json(JSON::LD::JSON_STATE)}")
+
+      # Rely on RDFa reader
+      super(jsonld.to_json, options, &block)
+    end
+
+    private
+    # Return JSON-LD representation of an item
+    # @param [Nokogiri::XML::Element] item
+    # @param [Hash{Nokogiri::XML::Node => Hash}]
+    # @return [Hash]
+    def get_object(item, memory = {})
+      if result = memory[item]
+        # Result is a reference to that item; assign a blank-node identifier if necessary
+        result['@id'] ||= alloc_bnode
+        return result
+      end
+
+      result = {}
+      memory[item] = result
+
+      # If the item has a global identifier, add an entry to result called "@id" whose value is the global identifier of item.
+      result['@id'] = item['itemid'].to_s if item['itemid']
+
+      # If the item has any item types, add an entry to result called "@type" whose value is an array listing the item types of item, in the order they were specified on the itemtype attribute.
+      if item['itemtype']
+        # Only absolute URLs
+        types = item.attribute('itemtype').
+          remove.
+          to_s.
+          split(/\s+/).
+          select {|t| RDF::URI(t).absolute?}
+        if vocab = types.first
+          vocab = Registry.find(vocab) || begin
+            type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
+            Registry.new(type_vocab) if type_vocab
+          end
+          (result['@context'] = {})['@vocab'] = vocab.uri.to_s if vocab
+          result['@type'] = types unless types.empty?
+        end
+      end
+
+      # For each element element that has one or more property names and is one of the properties of the item item, in the order those elements are given by the algorithm that returns the properties of an item, run the following substeps
+      item_properties(item).each do |element|
+        value = if element['itemscope']
+          get_object(element, memory)
+        else
+          property_value(element)
+        end
+        element['itemprop'].to_s.split(/\s+/).each do |prop|
+          result[prop] ||= [] << value
+        end
+      end
+
+      result
+    end
+
+    ##
+    #
+    # @param [Nokogiri::XML::Element] item
+    # @return [Array<Nokogiri::XML::Element>]
+    #   List of property elements for an item
+    def item_properties(item)
+      results, memory, pending = [], [item], item.children.select(&:element?)
+      log_debug(item, "item_properties")
+
+      # If root has an itemref attribute, split the value of that itemref attribute on spaces. For each resulting token ID, if there is an element in the document whose ID is ID, then add the first such element to pending.
+      item['itemref'].to_s.split(/\s+/).each do |ref|
+        if referenced = referenced = item.at_css("##{ref}")
+          pending << referenced
+        end
+      end
+
+      while !pending.empty?
+        current = pending.shift
+        # Error
+        break if memory.include?(current)
+        memory << current
+
+        # If current does not have an itemscope attribute, then: add all the child elements of current to pending.
+        pending += current.children.select(&:element?) unless current['itemscope']
+
+        # If current has an itemprop attribute specified and has one or more property names, then add current to results.
+        results << current unless current['itemprop'].to_s.split(/\s+/).empty?
+      end
+
+      results
+    end
+
+    ##
+    #
+    def property_value(element)
+      base = element.base || base_uri
+      log_debug(element) {"property_value(#{element.name}): base #{base.inspect}"}
+      value = case
+      when element.has_attribute?('itemscope')
+        {}
+      when element.has_attribute?('content')
+        if element.language
+          {"@value" => element['content'].to_s.strip, language: element.language}
+        else
+          element['content'].to_s.strip
+        end
+      when %w(data meter).include?(element.name) && element.attribute('value')
+        # XXX parse as number?
+        {"@value" => element['value'].to_s.strip}
+      when %w(audio embed iframe img source track video).include?(element.name)
+        {"@id" => uri(element.attribute('src'), base).to_s}
+      when %w(a area link).include?(element.name)
+        {"@id" => uri(element.attribute('href'), base).to_s}
+      when %w(object).include?(element.name)
+        {"@id" => uri(element.attribute('data'), base).to_s}
+      when %w(time).include?(element.name)
+        # use datatype?
+        (element.attribute('datetime') || element.text).to_s.strip
+      else
+        if element.language
+          {"@value" => element.inner_text.to_s.strip, language: element.language}
+        else
+          element.inner_text.to_s.strip
+        end
+      end
+      log_debug(element) {"  #{value.inspect}"}
+      value
+    end
+
+    # Allocate a new blank node identifier
+    # @return [String]
+    def alloc_bnode
+      @bnode_base ||= "_:a"
+      res = @bnode_base
+      @bnode_base = res.succ
+      res
+    end
+
+    # Fixme, what about xml:base relative to element?
+    def uri(value, base = nil)
+      value = if base
+        base = uri(base) unless base.is_a?(RDF::URI)
+        base.join(value.to_s)
+      else
+        RDF::URI(value.to_s)
+      end
+      value.validate! if validate?
+      value.canonicalize! if canonicalize?
+      value = RDF::URI.intern(value) if intern?
+      value
+    end
+  end
+end
+
+# Monkey Patch Nokogiri
+module Nokogiri::XML
+  class Element
+
+    ##
+    # Get any xml:base in effect for this element
+    def base
+      if @base.nil?
+        @base = attributes['xml:base'] ||
+        (parent && parent.element? && parent.base) ||
+        false
+      end
+
+      @base == false ? nil : @base
+    end
+
+
+    ##
+    # Get any xml:lang or lang in effect for this element
+    def language
+      if @language.nil?
+        language = case
+        when self["xml:lang"]
+          self["xml:lang"].to_s
+        when self["lang"]
+          self["lang"].to_s
+        else
+          parent && parent.element? && parent.language
+        end
+      end
+      @language == false ? nil : @language
+    end
+
+  end
+end
diff --git a/lib/rdf/microdata/reader.rb b/lib/rdf/microdata/reader.rb
index 2889dfb..4c1ee6a 100644
--- a/lib/rdf/microdata/reader.rb
+++ b/lib/rdf/microdata/reader.rb
@@ -56,7 +56,21 @@ def self.options
     # @private
     def self.new(input = nil, options = {}, &block)
       klass = if options[:rdfa]
+        # Requires rdf-rdfa gem to be loaded
+        begin
+          require 'rdf/rdfa'
+        rescue LoadError
+          raise ReaderError, "Use of RDFa-based reader requires rdf-rdfa gem"
+        end
         RdfaReader
+      elsif options[:jsonld]
+        # Requires rdf-rdfa gem to be loaded
+        begin
+          require 'json/ld'
+        rescue LoadError
+          raise ReaderError, "Use of JSON-LD-based reader requires json-ld gem"
+        end
+        JsonLdReader
       else
         self
       end
diff --git a/rdf-microdata.gemspec b/rdf-microdata.gemspec
index 459fc28..623d957 100755
--- a/rdf-microdata.gemspec
+++ b/rdf-microdata.gemspec
@@ -34,6 +34,7 @@ Gem::Specification.new do |gem|
   gem.add_development_dependency 'rspec',           '~> 3.5'
   gem.add_development_dependency 'rspec-its',       '~> 1.2'
   
+  gem.add_development_dependency 'json-ld',         '~> 2.1'
   gem.add_development_dependency 'rdf-spec',        '~> 2.2'
   gem.add_development_dependency 'rdf-rdfa',        '~> 2.2'
   gem.add_development_dependency 'rdf-turtle',      '~> 2.2'
diff --git a/script/parse b/script/parse
index a4f4349..c7bc0de 100755
--- a/script/parse
+++ b/script/parse
@@ -82,6 +82,10 @@ def run(input, options)
       end
       options[:output].puts xsl.apply_to(reader.rdfa).to_s
     end
+  when :jsonld
+    reader_class.new(input, options.merge(jsonld: true)) do |reader|
+      options[:output].puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
+    end
   when :inspect
     reader_class.new(input, options).each do |statement|
       num += 1
@@ -127,7 +131,6 @@ opts = GetoptLong.new(
   ["--output", "-o", GetoptLong::REQUIRED_ARGUMENT],
   ["--quiet", GetoptLong::NO_ARGUMENT],
   ["--registry", GetoptLong::REQUIRED_ARGUMENT],
-  ["--rdfa", GetoptLong::NO_ARGUMENT],
   ["--template", GetoptLong::REQUIRED_ARGUMENT],
   ["--uri", GetoptLong::REQUIRED_ARGUMENT],
   ["--validate", GetoptLong::NO_ARGUMENT],
@@ -138,7 +141,6 @@ opts.each do |opt, arg|
   when '--debug'        then logger.level = Logger::DEBUG
   when '--execute'      then input = arg
   when '--format'       then options[:output_format] = arg.to_sym
-  when '--rdfa'         then options[:rdfa] = true
   when '--input-format' then options[:input_format] = arg.to_sym
   when '--quiet'
     options[:quiet] = options[:quiet].to_i + 1
diff --git a/spec/jsonld_reader_spec.rb b/spec/jsonld_reader_spec.rb
new file mode 100644
index 0000000..d341b71
--- /dev/null
+++ b/spec/jsonld_reader_spec.rb
@@ -0,0 +1,888 @@
+# coding: utf-8
+$:.unshift "."
+require 'spec_helper'
+require 'rdf/spec/reader'
+
+describe RDF::Microdata::JsonLdReader do
+  let!(:doap) {File.expand_path("../../etc/doap.html", __FILE__)}
+  let!(:doap_nt) {File.expand_path("../../etc/doap.nt", __FILE__)}
+  let!(:registry_path) {File.expand_path("../test-files/test-registry.json", __FILE__)}
+  before :each do
+    @reader = RDF::Microdata::JsonLdReader.new(StringIO.new("<html></html>"))
+  end
+
+  context :interface do
+    subject {%(
+      <div itemscope itemtype="http://schema.org/">
+       <p>My name is <span itemprop="name">Elizabeth</span>.</p>
+      </div>
+    )}
+    
+    it "should yield reader" do
+      inner = double("inner")
+      expect(inner).to receive(:called).with(RDF::Microdata::JsonLdReader)
+      RDF::Microdata::JsonLdReader.new(subject, base_uri: 'http://example/') do |reader|
+        inner.called(reader.class)
+      end
+    end
+    
+    it "should return reader" do
+      expect(RDF::Microdata::JsonLdReader.new(subject, base_uri: 'http://example/')).to be_a(RDF::Microdata::JsonLdReader)
+    end
+    
+    it "should not raise errors" do
+      expect {
+        RDF::Microdata::JsonLdReader.new(subject, validate:  true, base_uri: 'http://example/')
+      }.not_to raise_error
+    end
+
+    it "should yield statements" do
+      inner = double("inner")
+      expect(inner).to receive(:called).with(RDF::Statement).at_least(2)
+      RDF::Microdata::JsonLdReader.new(subject, base_uri: 'http://example/').each_statement do |statement|
+        inner.called(statement.class)
+      end
+    end
+    
+    it "should yield triples" do
+      inner = double("inner")
+      expect(inner).to receive(:called).at_least(2)
+      RDF::Microdata::JsonLdReader.new(subject, base_uri: 'http://example/').each_triple do |subject, predicate, object|
+        inner.called(subject.class, predicate.class, object.class)
+      end
+    end
+
+    context "Microdata Reader with :jsonld option" do
+      it "returns a JsonLdReader instance" do
+        r = RDF::Microdata::Reader.new(StringIO.new(""), jsonld:  true)
+        expect(r).to be_a(RDF::Microdata::JsonLdReader)
+      end
+    end
+  end
+
+  context :parsing do
+    before :each do 
+      @md_ctx = %q(
+        <div itemscope='' itemtype="http://schema.org/Person">
+         %s
+        </div>
+      )
+      @nt_ctx = %q(
+      _:a <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
+      %s
+      )
+    end
+
+    it "parses a simple graph" do
+      md = %q(<p>My name is <span itemprop="name">Gregg Kellogg</span>.</p>)
+      nt = %q(_:a <http://schema.org/name> "Gregg Kellogg" .)
+      expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger)
+    end
+
+    context "values" do
+      [
+        [
+          %q(<p>My name is <span itemprop="name">Gregg Kellogg</span></p>),
+          %q(_:a <http://schema.org/name> "Gregg Kellogg" .)
+        ],
+        [
+          %q(
+          <p>My name is <span itemprop="name">Gregg</span></p>
+          <p>My name is <span itemprop="name">Kellogg</span></p>
+          ),
+          %q(_:a <http://schema.org/name> "Gregg", "Kellogg" .)
+        ],
+        [
+          %q(<p>My name is <span itemprop="name fullName">Gregg Kellogg</span></p>),
+          %q(
+            _:a <http://schema.org/name> "Gregg Kellogg" .
+            _:a <http://schema.org/fullName> "Gregg Kellogg" .
+          )
+        ],
+        [
+          %q(<p>My name is <span itemprop="http://schema.org/name">Gregg Kellogg</span></p>),
+          %q(_:a <http://schema.org/name> "Gregg Kellogg" .)
+        ],
+        [
+          %q(<meta itemprop="meta" content="foo"/>),
+          %q(_:a <http://schema.org/meta> "foo" .)
+        ],
+        [
+          %q(<span itemprop="span" content="foo">Bar</span>),
+          %q(_:a <http://schema.org/span> "foo" .)
+        ],
+        [
+          %q(<audio itemprop="audio" src="foo"></audio>),
+          %q(_:a <http://schema.org/audio> <http://example/foo> .)
+        ],
+        [
+          %q(<embed itemprop="embed" src="foo"></embed>),
+          %q(_:a <http://schema.org/embed> <http://example/foo> .)
+        ],
+        [
+          %q(<iframe itemprop="iframe" src="foo"></iframe>),
+          %q(_:a <http://schema.org/iframe> <http://example/foo> .)
+        ],
+        [
+          %q(<img itemprop="img" src="foo"/>),
+          %q(_:a <http://schema.org/img> <http://example/foo> .)
+        ],
+        [
+          %q(<source itemprop="source" src="foo"/>),
+          %q(_:a <http://schema.org/source> <http://example/foo> .)
+        ],
+        [
+          %q(<track itemprop="track" src="foo"/>),
+          %q(_:a <http://schema.org/track> <http://example/foo> .)
+        ],
+        [
+          %q(<video itemprop="video" src="foo"></video>),
+          %q(_:a <http://schema.org/video> <http://example/foo> .)
+        ],
+        [
+          %q(<a itemprop="a" href="foo"></a>),
+          %q(_:a <http://schema.org/a> <http://example/foo> .)
+        ],
+        [
+          %q(<area itemprop="area" href="foo"/>),
+          %q(_:a <http://schema.org/area> <http://example/foo> .)
+        ],
+        [
+          %q(<link itemprop="link" href="foo"/>),
+          %q(_:a <http://schema.org/link> <http://example/foo> .)
+        ],
+        [
+          %q(<object itemprop="object" data="foo"/>),
+          %q(_:a <http://schema.org/object> <http://example/foo> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="2011-06-28Z">28 June 2011</time>),
+          %q(_:a <http://schema.org/time> "2011-06-28Z"^^<http://www.w3.org/2001/XMLSchema#date> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="00:00:00Z">midnight</time>),
+          %q(_:a <http://schema.org/time> "00:00:00Z"^^<http://www.w3.org/2001/XMLSchema#time> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="2011-06-28T00:00:00Z">28 June 2011 at midnight</time>),
+          %q(_:a <http://schema.org/time> "2011-06-28T00:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="P2011Y06M28DT00H00M00S">2011 years 6 months 28 days</time>),
+          %q(_:a <http://schema.org/time> "P2011Y06M28DT00H00M00S"^^<http://www.w3.org/2001/XMLSchema#duration> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="foo">28 June 2011</time>),
+          %q(_:a <http://schema.org/time> "foo" .)
+        ],
+        [
+          %q(<div itemprop="knows" itemscope=''><a href="http://manu.sporny.org/">Manu</a></div>),
+          %q(_:a <http://schema.org/knows> _:b .)
+        ],
+        [
+          %q(<data itemprop="data" value="1"/>),
+          %q(_:a <http://schema.org/data> "1"^^<http://www.w3.org/2001/XMLSchema#integer> .)
+        ],
+        [
+          %q(<data itemprop="data" value="1.1"/>),
+          %q(_:a <http://schema.org/data> "1.1"^^<http://www.w3.org/2001/XMLSchema#double> .)
+        ],
+        [
+          %q(<data itemprop="data" value="1.1e1"/>),
+          %q(_:a <http://schema.org/data> "1.1e1"^^<http://www.w3.org/2001/XMLSchema#double> .)
+        ],
+        [
+          %q(<data itemprop="data" value="foo"/>),
+          %q(_:a <http://schema.org/data> "foo" .)
+        ],
+        [
+          %q(<data itemprop="data" lang="en" value="foo"/>),
+          %q(_:a <http://schema.org/data> "foo" .)
+        ],
+        [
+          %q(<meter itemprop="meter" value="1"/>),
+          %q(_:a <http://schema.org/meter> "1"^^<http://www.w3.org/2001/XMLSchema#integer> .)
+        ],
+        [
+          %q(<meter itemprop="meter" value="1.1"/>),
+          %q(_:a <http://schema.org/meter> "1.1"^^<http://www.w3.org/2001/XMLSchema#double> .)
+        ],
+        [
+          %q(<meter itemprop="meter" value="1.1e1"/>),
+          %q(_:a <http://schema.org/meter> "1.1e1"^^<http://www.w3.org/2001/XMLSchema#double> .)
+        ],
+        [
+          %q(<meter itemprop="meter" value="foo"/>),
+          %q(_:a <http://schema.org/meter> "foo" .)
+        ],
+        [
+          %q(<meter itemprop="meter" lang="en" value="foo"/>),
+          %q(_:a <http://schema.org/meter> "foo" .)
+        ],
+      ].each do |(md, nt)|
+        it "parses #{md}" do
+          pending if [
+            '<data itemprop="data" value="1.1"/>',
+            '<meter itemprop="meter" value="1.1"/>',
+          ].include?(md)
+          expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger)
+        end
+      end
+    end
+
+    context "base_uri" do
+      before :each do 
+        @nt_ctx = %q(
+        _:a <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
+        %s
+        )
+      end
+
+      [
+        [
+          %q(<audio itemprop="audio" src="foo"></audio>),
+          %q(_:a <http://schema.org/audio> <http://example.com/foo> .)
+        ],
+        [
+          %q(<embed itemprop="embed" src="foo"></embed>),
+          %q(_:a <http://schema.org/embed> <http://example.com/foo> .)
+        ],
+        [
+          %q(<iframe itemprop="iframe" src="foo"></iframe>),
+          %q(_:a <http://schema.org/iframe> <http://example.com/foo> .)
+        ],
+        [
+          %q(<img itemprop="img" src="foo"/>),
+          %q(_:a <http://schema.org/img> <http://example.com/foo> .)
+        ],
+        [
+          %q(<source itemprop="source" src="foo"/>),
+          %q(_:a <http://schema.org/source> <http://example.com/foo> .)
+        ],
+        [
+          %q(<track itemprop="track" src="foo"/>),
+          %q(_:a <http://schema.org/track> <http://example.com/foo> .)
+        ],
+        [
+          %q(<video itemprop="video" src="foo"></video>),
+          %q(_:a <http://schema.org/video> <http://example.com/foo> .)
+        ],
+        [
+          %q(<a itemprop="a" href="foo"></a>),
+          %q(_:a <http://schema.org/a> <http://example.com/foo> .)
+        ],
+        [
+          %q(<area itemprop="area" href="foo"/>),
+          %q(_:a <http://schema.org/area> <http://example.com/foo> .)
+        ],
+        [
+          %q(<link itemprop="link" href="foo"/>),
+          %q(_:a <http://schema.org/link> <http://example.com/foo> .)
+        ],
+        [
+          %q(<a itemprop="knows" href="scor">Stéphane Corlosquet</a>),
+          %q(_:a <http://schema.org/knows> <http://example.com/scor> .)
+        ],
+      ].each do |(md, nt)|
+        it "parses #{md}" do
+          expect(parse(@md_ctx % md, base_uri: 'http://example.com/')).to be_equivalent_graph(@nt_ctx % nt, logger: @logger)
+        end
+      end
+    end
+
+    context "itemid" do
+      before :each do 
+        @md_ctx = %q(
+          <div itemid="subj" itemscope='' itemtype="http://schema.org/Person">
+           %s
+          </div>
+        )
+        @nt_ctx = %q(
+        <http://example/subj> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
+        %s
+        )
+      end
+
+      [
+        [
+          %q(<p>My name is <span itemprop="name">Gregg Kellogg</span></p>),
+          %q(<http://example/subj> <http://schema.org/name> "Gregg Kellogg" .)
+        ],
+        [
+          %q(<meta itemprop="meta" content="foo"/>),
+          %q(<http://example/subj> <http://schema.org/meta> "foo" .)
+        ],
+        [
+          %q(<audio itemprop="audio" src="foo"></audio>),
+          %q(<http://example/subj> <http://schema.org/audio> <http://example/foo> .)
+        ],
+        [
+          %q(<embed itemprop="embed" src="foo"></embed>),
+          %q(<http://example/subj> <http://schema.org/embed> <http://example/foo> .)
+        ],
+        [
+          %q(<iframe itemprop="iframe" src="foo"></iframe>),
+          %q(<http://example/subj> <http://schema.org/iframe> <http://example/foo> .)
+        ],
+        [
+          %q(<img itemprop="img" src="foo"/>),
+          %q(<http://example/subj> <http://schema.org/img> <http://example/foo> .)
+        ],
+        [
+          %q(<source itemprop="source" src="foo"/>),
+          %q(<http://example/subj> <http://schema.org/source> <http://example/foo> .)
+        ],
+        [
+          %q(<track itemprop="track" src="foo"/>),
+          %q(<http://example/subj> <http://schema.org/track> <http://example/foo> .)
+        ],
+        [
+          %q(<video itemprop="video" src="foo"></video>),
+          %q(<http://example/subj> <http://schema.org/video> <http://example/foo> .)
+        ],
+        [
+          %q(<a itemprop="a" href="foo"></a>),
+          %q(<http://example/subj> <http://schema.org/a> <http://example/foo> .)
+        ],
+        [
+          %q(<area itemprop="area" href="foo"/>),
+          %q(<http://example/subj> <http://schema.org/area> <http://example/foo> .)
+        ],
+        [
+          %q(<link itemprop="link" href="foo"/>),
+          %q(<http://example/subj> <http://schema.org/link> <http://example/foo> .)
+        ],
+        [
+          %q(<object itemprop="object" data="foo"/>),
+          %q(<http://example/subj> <http://schema.org/object> <http://example/foo> .)
+        ],
+        [
+          %q(<time itemprop="time" datetime="2011-06-28T00:00:00Z">28 June 2011</time>),
+          %q(<http://example/subj> <http://schema.org/time> "2011-06-28T00:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .)
+        ],
+        [
+          %q(<div itemprop="knows" itemscope='' itemid="obj"><a href="http://manu.sporny.org/">Manu</a></div>),
+          %q(<http://example/subj> <http://schema.org/knows> <http://example/obj> .)
+        ],
+      ].each do |(md, nt)|
+        it "parses #{md}" do
+          expect(parse(@md_ctx % md)).to be_equivalent_graph(@nt_ctx % nt, logger: @logger)
+        end
+      end
+    end
+
+    context "itemtype" do
+      {
+        "with no type and token property" => [
+          %q(
+            <div>
+              <div itemscope=''>
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q()
+        ],
+        "with empty type and token property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="">
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q()
+        ],
+        "with relative type and token property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="Person">
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q()
+        ],
+        "with single type and token property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person">
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          [ a <http://schema.org/Person> ;
+            <http://schema.org/name> "Amanda" ;
+          ] .
+          )
+        ],
+        "with multipe types and token property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person http://xmlns.com/foaf/0.1/Person">
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          [ a <http://schema.org/Person>, <http://xmlns.com/foaf/0.1/Person> ;
+            <http://schema.org/name> "Amanda" ;
+          ] .
+          )
+        ],
+        #"with no type and URI property" => [
+        #  %q(
+        #    <div>
+        #      <div itemscope=''>
+        #      <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+        #      </div>
+        #    </div>
+        #  ),
+        #  %q(
+        #    [ <http://schema.org/name> "Amanda" ] .
+        #  )
+        #],
+        #"with empty type and URI property" => [
+        #  %q(
+        #    <div>
+        #      <div itemscope='' itemtype="">
+        #      <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+        #      </div>
+        #    </div>
+        #  ),
+        #  %q(
+        #  [ <http://schema.org/name> "Amanda" ] .
+        #  )
+        #],
+        #"with relative type and URI property" => [
+        #  %q(
+        #    <div>
+        #      <div itemscope='' itemtype="Person">
+        #      <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+        #      </div>
+        #    </div>
+        #  ),
+        #  %q(
+        #  [ <http://schema.org/name> "Amanda" ] .
+        #  )
+        #],
+        "with single type and URI property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person">
+              <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          [ a <http://schema.org/Person> ;
+            <http://schema.org/name> "Amanda" ;
+          ] .
+          )
+        ],
+        "with multipe types and URI property" => [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person http://xmlns.com/foaf/0.1/Person">
+              <p id="a">Name: <span itemprop="http://schema.org/name">Amanda</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          [ a <http://schema.org/Person>, <http://xmlns.com/foaf/0.1/Person> ;
+            <http://schema.org/name> "Amanda" ;
+          ] .
+          )
+        ],
+        "with inherited type and token property" => [
+          %q(
+            <div itemscope=''  itemtype="http://schema.org/Person">
+              <p>Name: <span itemprop="name">Gregg</span></p>
+              <div itemprop="knows" itemscope="">
+                <p id="a">Name: <span itemprop="name">Jeni</span></p>
+              </div>
+            </div>
+          ),
+          %q(
+          @prefix md: <http://www.w3.org/ns/md#> .
+          @prefix schema: <http://schema.org/> .
+          [ a schema:Person ;
+            schema:name "Gregg" ;
+            schema:knows [ schema:name "Jeni" ]
+          ] .
+          )
+        ]
+      }.each do |name, (md, nt)|
+        it "#{name}" do
+          expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+        end
+      end
+    end
+
+    context "itemref" do
+      {
+        "to single id" =>
+        [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person" id="amanda" itemref="a"></div>
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+            </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person> ;
+              <http://schema.org/name> "Amanda" ;
+            ] .
+          )
+        ],
+        "to generate listed property values" =>
+        [
+          %q(
+          <div>
+            <div itemscope='' itemtype="http://schema.org/Person" itemref="surname">
+              <p>My name is <span itemprop="name">Gregg</span></p>
+            </div>
+            <p id="surname">My name is <span itemprop="name">Kellogg</span></p>
+          </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person> ;
+              <http://schema.org/name> "Gregg", "Kellogg" ;
+            ] .
+          )
+        ],
+        #"to single id with different types" =>
+        #[
+        #  %q(
+        #    <div>
+        #      <div itemscope='' itemtype="http://xmlns.com/foaf/0.1/Person" id="amanda" itemref="a"></div>
+        #      <div itemscope='' itemtype="http://schema.org/Person" id="amanda" itemref="a"></div>
+        #      <p id="a">Name: <span itemprop="name">Amanda</span></p>
+        #    </div>
+        #  ),
+        #  %q(
+        #  [ a <http://schema.org/Person> ;
+        #    <http://schema.org/name> "Amanda" ;
+        #  ] .
+        #  [ a <http://xmlns.com/foaf/0.1/Person> ;
+        #    <http://xmlns.com/foaf/0.1/name> "Amanda" ;
+        #  ] .
+        #  )
+        #],
+        "to multiple ids" =>
+        [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person" id="amanda" itemref="a b"></div>
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              <p id="b" itemprop="band">Jazz Band</p>
+            </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person> ;
+              <http://schema.org/name> "Amanda" ;
+              <http://schema.org/band> "Jazz Band" ;
+            ] .
+          )
+        ],
+        "with chaining" =>
+        [
+          %q(
+            <div>
+              <div itemscope='' itemtype="http://schema.org/Person" id="amanda" itemref="a b"></div>
+              <p id="a">Name: <span itemprop="name">Amanda</span></p>
+              <div id="b" itemprop="band" itemscope='' itemtype="http://schema.org/MusicGroup" itemref="c"></div>
+              <div id="c">
+               <p>Band: <span itemprop="name">Jazz Band</span></p>
+               <p>Size: <span itemprop="size">12</span> players</p>
+              </div>
+            </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person> ;
+              <http://schema.org/name> "Amanda" ;
+              <http://schema.org/band> [
+                a <http://schema.org/MusicGroup> ;
+                <http://schema.org/name> "Jazz Band";
+                <http://schema.org/size> "12"
+              ]
+            ] .
+          )
+        ],
+        "shared" =>
+        [
+          %q(
+            <div>
+              <div itemscope='' itemref="a" itemtype="http://schema.org/Person"></div>
+              <div itemscope='' itemref="a"itemtype="http://schema.org/Person"></div>
+              <div id="a" itemprop="refers-to" itemscope=''>
+                <span itemprop="name">Amanda</span>
+              </div>
+            </div>
+          ),
+          %q(
+            [ a <http://schema.org/Person>; <http://schema.org/refers-to> _:a ] .
+            [ a <http://schema.org/Person>; <http://schema.org/refers-to> _:a ] .
+            _:a <http://schema.org/name> "Amanda" .
+          )
+      
+        ],
+      }.each do |name, (md, nt)|
+        it "parses #{name}" do
+          expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+        end
+      end
+
+      it "catches infinite recursion", pending: true do
+        md = %(
+        <!DOCTYPE html>
+        <html><body>
+        <div itemscope>
+          <div id="ref">
+            <div itemprop="name">friend1</div>
+            <div itemprop="friend" itemscope>
+              <div itemprop="name">friend2</div>
+              <div itemprop="friend" itemref="ref" itemscope></div>
+            </div>
+          </div>
+        </div>
+        </body></html>
+        )
+        expect {parse(md, validate: true)}.to raise_error(RDF::ReaderError)
+        expect(@logger.to_s).to include("itemref recursion")
+      end
+    end
+
+    context "propertyURI" do
+      context "no expansion" do
+        {
+          "http://foo/bar + baz => http://foo/baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo/bar'>
+                <p itemprop='baz'>FooBar</p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo/bar>; <http://foo/baz> "FooBar" ] .
+            )
+          ],
+          "http://foo#bar + baz => http://foo#baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo#bar'>
+                <p itemprop='baz'>FooBar</p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo#bar>; <http://foo#baz> "FooBar" ] .
+            )
+          ],
+          "http://foo#Type + bar + baz => http://foo#baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo#Type'>
+                <p itemscope='' itemprop='bar'><span itemprop='baz'>Baz</span></p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo#Type>;
+                <http://foo#bar> [ <http://foo#baz> "Baz"]] .
+            )
+          ],
+        }.each do |name, (md, nt)|
+          it "expands #{name}" do
+            expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+          end
+        end
+      end
+
+      context "default propertyURI generation" do
+        {
+          "http://foo/bar + baz => http://foo/baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo/bar'>
+                <p itemprop='baz'>FooBar</p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo/bar>; <http://foo/baz> "FooBar" ] .
+            )
+          ],
+          "http://foo#bar + baz => http://foo#baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo#bar'>
+                <p itemprop='baz'>FooBar</p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo#bar>; <http://foo#baz> "FooBar" ] .
+            )
+          ],
+          "http://foo#Type + bar + baz => http://foo#baz" =>
+          [
+            %q(
+              <div itemscope='' itemtype='http://foo#Type'>
+                <p itemscope='' itemprop='bar'><span itemprop='baz'>Baz</span></p>
+              </div>
+            ),
+            %q(
+              [ a <http://foo#Type>;
+                <http://foo#bar> [ <http://foo#baz> "Baz"]] .
+            )
+          ],
+        }.each do |name, (md, nt)|
+          it "expands #{name}" do
+            expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+          end
+        end
+      end
+    end
+
+    context "itemprop-reverse", skip: true do
+      {
+        "link" => [
+          %q(
+            <div itemscope itemtype="http://schema.org/Person">
+              <span itemprop="name">William Shakespeare</span>
+              <link itemprop-reverse="creator" href="http://www.freebase.com/m/0yq9mqd">
+            </div>
+          ),
+          %q(
+            <http://www.freebase.com/m/0yq9mqd> <http://schema.org/creator> [
+              a <http://schema.org/Person>;
+              <http://schema.org/name> "William Shakespeare"
+            ] .
+          )
+        ],
+        "itemscope" => [
+          %q(
+            <div itemscope itemtype="http://schema.org/ShoppingCenter">
+              <span itemprop="name">The ACME Shopping Mall on Structured Data Avenue</span>
+              <span itemprop="description">The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data</span>
+              <p>Here is a list of shops inside:</p>
+              <div itemprop-reverse="containedIn" itemscope itemtype="http://schema.org/Restaurant">
+                <span itemprop="name">Dan Brickley's Data Restaurant</span>
+              </div>
+              <div itemprop-reverse="containedIn" itemscope itemtype="http://schema.org/Bakery">
+                <span itemprop="name">Ramanathan Guha's Meta Content Framework Bakery</span>
+              </div>
+            </div>
+          ),
+          %q(
+            _:a a <http://schema.org/ShoppingCenter>;
+                <http://schema.org/name> "The ACME Shopping Mall on Structured Data Avenue";
+                <http://schema.org/description> "The ACME Shopping Mall is your one-stop paradise for all data-related shopping needs, from schemas to instance data" .
+            _:b a <http://schema.org/Restaurant>;
+                <http://schema.org/name> "Dan Brickley's Data Restaurant";
+                <http://schema.org/containedIn> _:a .
+            _:c a <http://schema.org/Bakery>;
+                <http://schema.org/name> "Ramanathan Guha's Meta Content Framework Bakery";
+                <http://schema.org/containedIn> _:a .
+          )
+        ],
+        "literal" => [
+          %q(
+            <div itemscope itemtype="http://schema.org/Person">
+              <span itemprop="name">William Shakespeare</span>
+              <meta itemprop-reverse="creator" content="foo">
+            </div>
+          ),
+          %q(
+            _:a a <http://schema.org/Person>;
+                <http://schema.org/name> "William Shakespeare" .
+          )
+        ],
+        "itemprop and itemprop-reverse" => [
+          %q(
+            <div itemscope itemtype="http://schema.org/Organization">
+              <span itemprop="name">Cryptography Users</span>
+              <div itemprop-reverse="memberOf" itemprop="member" itemscope
+                    itemtype="http://schema.org/OrganizationRole">
+                <div itemprop-reverse="memberOf" itemprop="member" itemscope
+                        itemtype="http://schema.org/Person">
+                  <span itemprop="name">Alice</span>
+                </div>
+                <span itemprop="startDate">1977</span>
+              </div>
+            </div>
+          ),
+          %q(
+            @prefix schema: <http://schema.org/> .
+            @prefix md: <http://www.w3.org/ns/md#> .
+
+            _:a a schema:Organization;
+                schema:name "Cryptography Users";
+                schema:member _:b .
+            _:b a schema:OrganizationRole;
+                schema:startDate "1977";
+                schema:member _:c;
+                schema:memberOf _:a .
+            _:c a schema:Person;
+                schema:name "Alice";
+                schema:memberOf _:b .
+          )
+        ],
+      }.each do |name, (md, nt)|
+        it "expands #{name}" do
+          expect(parse(md)).to be_equivalent_graph(nt, logger: @logger)
+        end
+      end
+    end
+
+    context "vocabulary expansion", pending: true do
+      it "always expands" do
+        md = %q(
+          <div itemscope='' itemtype='http://schema.org/Person'>
+            <link itemprop='additionalType' href='http://xmlns.com/foaf/0.1/Person' />
+          </div>
+        )
+        ttl = %q(
+          [ a <http://schema.org/Person>, <http://xmlns.com/foaf/0.1/Person>;
+            <http://schema.org/additionalType> <http://xmlns.com/foaf/0.1/Person>
+          ] .
+        )
+
+        expect(parse(md, vocab_expansion: true)).to be_equivalent_graph(ttl, logger: @logger)
+      end
+    end
+
+    context "test-files", skip: true do
+      Dir.glob(File.join(File.expand_path(File.dirname(__FILE__)), "test-files", "*.html")).each do |md|
+        it "parses #{md}" do
+          test_file(md)
+        end
+      end
+    end
+  end
+  
+  def parse(input, options = {})
+    @logger = RDF::Spec.logger
+    graph = options[:graph] || RDF::Graph.new
+    RDF::Microdata::Reader.new(input, {
+        logger: @logger,
+        rdfa: true,
+        validate: false,
+        base_uri: "http://example/",
+        registry: registry_path,
+        canonicalize: false}.merge(options)).each do |statement|
+      graph << statement
+    end
+
+    # Remove any rdfa:usesVocabulary statements
+    graph.query(predicate: RDF::RDFA.usesVocabulary).each do |stmt|
+      graph.delete(stmt)
+    end
+    graph
+  end
+
+  def test_file(filepath, options = {})
+    graph = parse(File.open(filepath), options)
+
+    ttl_string = File.read(filepath.sub('.html', '.ttl'))
+    expect(graph).to be_equivalent_graph(ttl_string, logger: @logger)
+  end
+end

From e6265437316b081c7978bf1751c41b324653d205 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Wed, 16 Aug 2017 15:10:48 -0700
Subject: [PATCH 06/12] Update CLI commands.

---
 examples/blog_posting.html  | 34 ++++++++++++++++++++++++++++++++++
 lib/rdf/microdata/format.rb | 10 ++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 examples/blog_posting.html

diff --git a/examples/blog_posting.html b/examples/blog_posting.html
new file mode 100644
index 0000000..9676054
--- /dev/null
+++ b/examples/blog_posting.html
@@ -0,0 +1,34 @@
+<!DOCTYPE HTML>
+<title>My Blog</title>
+<article itemscope itemtype="https://schema.org/BlogPosting">
+ <header>
+  <h1 itemprop="headline">Progress report</h1>
+  <p><time itemprop="datePublished" datetime="2013-08-29">today</time></p>
+  <link itemprop="url" href="?comments=0">
+ </header>
+ <p>All in all, he's doing well with his swim lessons. The biggest thing was he had trouble
+ putting his head in, but we got it down.</p>
+ <section>
+  <h1>Comments</h1>
+  <article itemprop="comment" itemscope itemtype="https://schema.org/Comment" id="c1">
+   <link itemprop="url" href="#c1">
+   <footer>
+    <p>Posted by: <span itemprop="creator" itemscope itemtype="https://schema.org/Person">
+     <span itemprop="name">Greg</span>
+    </span></p>
+    <p><time itemprop="dateCreated" datetime="2013-08-29">15 minutes ago</time></p>
+   </footer>
+   <p>Ha!</p>
+  </article>
+  <article itemprop="comment" itemscope itemtype="https://schema.org/Comment" id="c2">
+   <link itemprop="url" href="#c2">
+   <footer>
+    <p>Posted by: <span itemprop="creator" itemscope itemtype="https://schema.org/Person">
+     <span itemprop="name">Charlotte</span>
+    </span></p>
+    <p><time itemprop="dateCreated" datetime="2013-08-29">5 minutes ago</time></p>
+   </footer>
+   <p>When you say "we got it down"...</p>
+  </article>
+ </section>
+</article>
diff --git a/lib/rdf/microdata/format.rb b/lib/rdf/microdata/format.rb
index 193bb52..7384868 100644
--- a/lib/rdf/microdata/format.rb
+++ b/lib/rdf/microdata/format.rb
@@ -51,6 +51,11 @@ def self.cli_commands
           description: "Transform HTML+Microdata into HTML+RDFa",
           parse: false,
           help: "to-rdfa files ...\nTransform HTML+Microdata into HTML+RDFa",
+          filter: {
+            format: :microdata,
+            output_format: :jsonld
+          },
+          option_use: {output_format: :disabled},
           lambda: ->(files, options) do
             out = options[:output] || $stdout
             xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
@@ -125,6 +130,11 @@ def self.cli_commands
           description: "Transform HTML+Microdata into JSON-LD",
           parse: false,
           help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD",
+          filter: {
+            format: :microdata,
+            output_format: :rdfa
+          },
+          option_use: {output_format: :disabled},
           lambda: ->(files, options) do
             out = options[:output] || $stdout
             if files.empty?

From 4930bf181421f4513b7d423d5e4b27b101ee3de9 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Thu, 17 Aug 2017 09:05:20 -0700
Subject: [PATCH 07/12] Update rdf dependency.

---
 rdf-microdata.gemspec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdf-microdata.gemspec b/rdf-microdata.gemspec
index 623d957..a192559 100755
--- a/rdf-microdata.gemspec
+++ b/rdf-microdata.gemspec
@@ -24,7 +24,7 @@ Gem::Specification.new do |gem|
 
   gem.required_ruby_version = '>= 2.2.2'
   gem.requirements          = []
-  gem.add_runtime_dependency     'rdf',             '~> 2.2'
+  gem.add_runtime_dependency     'rdf',             '~> 2.2', '>= 2.2.8'
   gem.add_runtime_dependency     'rdf-xsd',         '~> 2.1'
   gem.add_runtime_dependency     'htmlentities',    '~> 4.3'
   gem.add_runtime_dependency     'nokogiri' ,       '~> 1.7'

From 9128422ada958b4593901eb827a47ad543db0aba Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@kellogg-assoc.com>
Date: Mon, 21 Aug 2017 10:03:50 -0600
Subject: [PATCH 08/12] Update rubyforge references.

---
 README.md            | 2 +-
 lib/rdf/microdata.rb | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 7a696c2..c8d5da1 100755
--- a/README.md
+++ b/README.md
@@ -77,7 +77,7 @@ The reader exposes a `#json` method, which can be used to retrieve the generated
 
 ## Resources
 * [RDF.rb][RDF.rb]
-* [Documentation](http://rdf.rubyforge.org/microdata)
+* [Documentation](http://www.rubydoc.info/github/ruby-rdf/rdf-microdata/)
 * [History](file:History.md)
 * [Microdata][]
 * [Microdata RDF][]
diff --git a/lib/rdf/microdata.rb b/lib/rdf/microdata.rb
index 101f09a..98b386b 100644
--- a/lib/rdf/microdata.rb
+++ b/lib/rdf/microdata.rb
@@ -15,7 +15,7 @@ module RDF
   #     end
   #   end
   #
-  # @see http://rdf.rubyforge.org/
+  # @see http://www.rubydoc.info/github/ruby-rdf/rdf/
   # @see http://www.w3.org/TR/2011/WD-microdata-20110525/
   #
   # @author [Gregg Kellogg](http://greggkellogg.net/)

From 735ed0245b16d4625172ea9470903f0c3009c4fd Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@kellogg-assoc.com>
Date: Mon, 21 Aug 2017 16:14:41 -0600
Subject: [PATCH 09/12] Remove rubyforge reference.

---
 rdf-microdata.gemspec | 1 -
 1 file changed, 1 deletion(-)

diff --git a/rdf-microdata.gemspec b/rdf-microdata.gemspec
index a192559..283f2c4 100755
--- a/rdf-microdata.gemspec
+++ b/rdf-microdata.gemspec
@@ -10,7 +10,6 @@ Gem::Specification.new do |gem|
   gem.license               = 'Unlicense'
   gem.summary               = "Microdata reader for Ruby."
   gem.description           = 'Reads HTML Microdata as RDF.'
-  gem.rubyforge_project     = 'rdf-microdata'
 
   gem.authors               = %w(Gregg Kellogg)
   gem.email                 = 'public-rdf-ruby@w3.org'

From cdcea807f01d066dff2f2b46eb1a072411f80964 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Tue, 10 Oct 2017 16:14:20 -0700
Subject: [PATCH 10/12] Don't restrict rdfa and jsonld reader output on output
 format.

---
 Gemfile                     | 18 +++++++++---------
 examples/to_jsonld.html     | 34 ++++++++++++++++++++++++++++++++++
 lib/rdf/microdata/format.rb |  6 ++----
 rdf-microdata.gemspec       |  6 +++---
 4 files changed, 48 insertions(+), 16 deletions(-)
 create mode 100644 examples/to_jsonld.html

diff --git a/Gemfile b/Gemfile
index e63746f..10db0a6 100644
--- a/Gemfile
+++ b/Gemfile
@@ -2,19 +2,19 @@ source "http://rubygems.org"
 
 gemspec
 
-gem "rdf",            github: "ruby-rdf/rdf",      branch: "develop"
-gem "rdf-rdfa",       github: "ruby-rdf/rdf-rdfa", branch: "develop"
-gem "rdf-xsd",        github: "ruby-rdf/rdf-xsd",  branch: "develop"
+gem "rdf",            git: "https://github.com/ruby-rdf/rdf",      branch: "develop"
+gem "rdf-rdfa",       git: "https://github.com/ruby-rdf/rdf-rdfa", branch: "develop"
+gem "rdf-xsd",        git: "https://github.com/ruby-rdf/rdf-xsd",  branch: "develop"
 gem "nokogumbo",      '~> 1.4'
 
 group :development do
   gem 'linkeddata'
-  gem 'ebnf',               github: "gkellogg/ebnf",                branch: "develop"
-  gem 'rdf-aggregate-repo', github: "ruby-rdf/rdf-aggregate-repo",  branch: "develop"
-  gem 'rdf-isomorphic',     github: "ruby-rdf/rdf-isomorphic",      branch: "develop"
-  gem "rdf-spec",           github: "ruby-rdf/rdf-spec",            branch: "develop"
-  gem 'rdf-turtle',         github: "ruby-rdf/rdf-turtle",          branch: "develop"
-  gem 'sxp',                github: "dryruby/sxp.rb",               branch: "develop"
+  gem 'ebnf',               git: "https://github.com/gkellogg/ebnf",                branch: "develop"
+  gem 'rdf-aggregate-repo', git: "https://github.com/ruby-rdf/rdf-aggregate-repo",  branch: "develop"
+  gem 'rdf-isomorphic',     git: "https://github.com/ruby-rdf/rdf-isomorphic",      branch: "develop"
+  gem "rdf-spec",           git: "https://github.com/ruby-rdf/rdf-spec",            branch: "develop"
+  gem 'rdf-turtle',         git: "https://github.com/ruby-rdf/rdf-turtle",          branch: "develop"
+  gem 'sxp',                git: "https://github.com/dryruby/sxp.rb",               branch: "develop"
 end
 
 group :debug do
diff --git a/examples/to_jsonld.html b/examples/to_jsonld.html
new file mode 100644
index 0000000..9676054
--- /dev/null
+++ b/examples/to_jsonld.html
@@ -0,0 +1,34 @@
+<!DOCTYPE HTML>
+<title>My Blog</title>
+<article itemscope itemtype="https://schema.org/BlogPosting">
+ <header>
+  <h1 itemprop="headline">Progress report</h1>
+  <p><time itemprop="datePublished" datetime="2013-08-29">today</time></p>
+  <link itemprop="url" href="?comments=0">
+ </header>
+ <p>All in all, he's doing well with his swim lessons. The biggest thing was he had trouble
+ putting his head in, but we got it down.</p>
+ <section>
+  <h1>Comments</h1>
+  <article itemprop="comment" itemscope itemtype="https://schema.org/Comment" id="c1">
+   <link itemprop="url" href="#c1">
+   <footer>
+    <p>Posted by: <span itemprop="creator" itemscope itemtype="https://schema.org/Person">
+     <span itemprop="name">Greg</span>
+    </span></p>
+    <p><time itemprop="dateCreated" datetime="2013-08-29">15 minutes ago</time></p>
+   </footer>
+   <p>Ha!</p>
+  </article>
+  <article itemprop="comment" itemscope itemtype="https://schema.org/Comment" id="c2">
+   <link itemprop="url" href="#c2">
+   <footer>
+    <p>Posted by: <span itemprop="creator" itemscope itemtype="https://schema.org/Person">
+     <span itemprop="name">Charlotte</span>
+    </span></p>
+    <p><time itemprop="dateCreated" datetime="2013-08-29">5 minutes ago</time></p>
+   </footer>
+   <p>When you say "we got it down"...</p>
+  </article>
+ </section>
+</article>
diff --git a/lib/rdf/microdata/format.rb b/lib/rdf/microdata/format.rb
index 7384868..9eef5aa 100644
--- a/lib/rdf/microdata/format.rb
+++ b/lib/rdf/microdata/format.rb
@@ -52,8 +52,7 @@ def self.cli_commands
           parse: false,
           help: "to-rdfa files ...\nTransform HTML+Microdata into HTML+RDFa",
           filter: {
-            format: :microdata,
-            output_format: :jsonld
+            format: :microdata
           },
           option_use: {output_format: :disabled},
           lambda: ->(files, options) do
@@ -131,8 +130,7 @@ def self.cli_commands
           parse: false,
           help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD",
           filter: {
-            format: :microdata,
-            output_format: :rdfa
+            format: :microdata
           },
           option_use: {output_format: :disabled},
           lambda: ->(files, options) do
diff --git a/rdf-microdata.gemspec b/rdf-microdata.gemspec
index 283f2c4..a9cf6f9 100755
--- a/rdf-microdata.gemspec
+++ b/rdf-microdata.gemspec
@@ -24,13 +24,13 @@ Gem::Specification.new do |gem|
   gem.required_ruby_version = '>= 2.2.2'
   gem.requirements          = []
   gem.add_runtime_dependency     'rdf',             '~> 2.2', '>= 2.2.8'
-  gem.add_runtime_dependency     'rdf-xsd',         '~> 2.1'
+  gem.add_runtime_dependency     'rdf-xsd',         '~> 2.2'
   gem.add_runtime_dependency     'htmlentities',    '~> 4.3'
-  gem.add_runtime_dependency     'nokogiri' ,       '~> 1.7'
+  gem.add_runtime_dependency     'nokogiri' ,       '~> 1.8'
 
   gem.add_development_dependency 'equivalent-xml' , '~> 0.6'
   gem.add_development_dependency 'yard' ,           '~> 0.9'
-  gem.add_development_dependency 'rspec',           '~> 3.5'
+  gem.add_development_dependency 'rspec',           '~> 3.6'
   gem.add_development_dependency 'rspec-its',       '~> 1.2'
   
   gem.add_development_dependency 'json-ld',         '~> 2.1'

From 570547cdc1a6145d1a04a7517e94f88a845418e0 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Tue, 10 Oct 2017 16:25:48 -0700
Subject: [PATCH 11/12] Fix to-rdfa and to-jsonld access of `options`.

---
 lib/rdf/microdata/format.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/rdf/microdata/format.rb b/lib/rdf/microdata/format.rb
index 9eef5aa..722da35 100644
--- a/lib/rdf/microdata/format.rb
+++ b/lib/rdf/microdata/format.rb
@@ -107,7 +107,7 @@ def self.cli_commands
               # If files are empty, either use options[::evaluate]
               input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
               input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
-              RDF::Microdata::Reader.new(input, options(rdfa: true)) do |reader|
+              RDF::Microdata::Reader.new(input, options.merge(rdfa: true)) do |reader|
                 reader.rdfa.xpath("//text()").each do |txt|
                   txt.content = txt.content.to_s.strip
                 end
@@ -139,7 +139,7 @@ def self.cli_commands
               # If files are empty, either use options[::evaluate]
               input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
               input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
-              RDF::Microdata::Reader.new(input, options(jsonld: true)) do |reader|
+              RDF::Microdata::Reader.new(input, options.merge(jsonld: true)) do |reader|
                 out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
               end
             else

From 5181927a6486aa5f6f10c2d4126b7d3f3868a2c1 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Tue, 10 Oct 2017 16:28:12 -0700
Subject: [PATCH 12/12] Version 2.2.2.

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index c043eea..b1b25a5 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.2.1
+2.2.2