diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7eb055f..32255be 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: - name: Run tests run: ruby --version; bundle exec rspec spec || $ALLOW_FAILURES - name: Coveralls GitHub Action - uses: coverallsapp/github-action@v1.1.2 + uses: coverallsapp/github-action@v2 if: ${{ matrix.ruby == '3.0' && matrix.gemfile == 'Gemfile' }} with: github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/Gemfile b/Gemfile index 0c72d41..000e71a 100644 --- a/Gemfile +++ b/Gemfile @@ -3,7 +3,6 @@ source "https://rubygems.org" gemspec gem 'rdf', github: "ruby-rdf/rdf", branch: "develop" -gem 'rdf-rdfa', github: "ruby-rdf/rdf-rdfa", branch: "develop" gem "nokogiri", '~> 1.13', '>= 1.13.4', platforms: [:mri, :jruby] group :development do diff --git a/Gemfile-pure b/Gemfile-pure index c44310f..bbb117c 100644 --- a/Gemfile-pure +++ b/Gemfile-pure @@ -4,7 +4,6 @@ source "https://rubygems.org" gemspec gem 'rdf', github: "ruby-rdf/rdf", branch: "develop" -gem 'rdf-rdfa', github: "ruby-rdf/rdf-rdfa", branch: "develop" group :development do gem 'ebnf', github: "dryruby/ebnf", branch: "develop" diff --git a/README.md b/README.md index 9c13f56..ace7513 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [RDF/XML][] reader/writer for [RDF.rb][]. -[![Gem Version](https://badge.fury.io/rb/rdf-rdfxml.png)](https://badge.fury.io/rb/rdf-rdfxml) +[![Gem Version](https://badge.fury.io/rb/rdf-rdfxml.svg)](https://badge.fury.io/rb/rdf-rdfxml) [![Build Status](https://github.com/ruby-rdf/rdf-rdfxml/workflows/CI/badge.svg?branch=develop)](https://github.com/ruby-rdf/rdf-rdfxml/actions?query=workflow%3ACI) [![Coverage Status](https://coveralls.io/repos/ruby-rdf/rdf-rdfxml/badge.svg?branch=develop)](https://coveralls.io/github/ruby-rdf/rdf-rdfxml?branch=develop) [![Gitter chat](https://badges.gitter.im/ruby-rdf/rdf.png)](https://gitter.im/ruby-rdf/rdf) @@ -43,8 +43,8 @@ Write a graph to a file: ## Dependencies * [RDF.rb](https://rubygems.org/gems/rdf) (~> 3.2) -* [Haml](https://rubygems.org/gems/haml) (~>- 5.2) -* Soft dependency on [Nokogiri](https://rubygems.org/gems/nokogiri) (>= 1.12) +* [Builder](https://rubygems.org/gems/builder) (~>- 3.2) +* Soft dependency on [Nokogiri](https://rubygems.org/gems/nokogiri) (>= 1.13) ## Documentation Full documentation available on [Rubydoc.info][RDF/XML doc]) diff --git a/VERSION b/VERSION index e4604e3..be94e6f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.1 +3.2.2 diff --git a/lib/rdf/rdfxml.rb b/lib/rdf/rdfxml.rb index dab0040..003c4e0 100644 --- a/lib/rdf/rdfxml.rb +++ b/lib/rdf/rdfxml.rb @@ -2,7 +2,8 @@ require 'rdf' module RDF - autoload :XML, 'rdf/rdfa/vocab' + XML = Class.new(Vocabulary("http://www.w3.org/XML/1998/namespace")) + ## # **`RDF::RDFXML`** is an RDF/XML extension for RDF.rb. # diff --git a/lib/rdf/rdfxml/extensions.rb b/lib/rdf/rdfxml/extensions.rb new file mode 100644 index 0000000..645ebd2 --- /dev/null +++ b/lib/rdf/rdfxml/extensions.rb @@ -0,0 +1,54 @@ +# Extend builder to allow for better control of whitespace in XML Literals + +require 'builder' + +module Builder + # Extends XmlMarkup#tag! to better control whitespace when adding content from a block + # + class RdfXml < Builder::XmlMarkup + # Create a tag named +sym+. Other than the first argument which + # is the tag name, the arguments are the same as the tags + # implemented via method_missing. + # + # @see https://github.com/jimweirich/builder/blob/master/lib/builder/xmlbase.rb + def tag!(sym, *args, &block) + text = nil + attrs = args.last.is_a?(::Hash) ? args.last : {} + return super unless block && attrs[:no_whitespace] + attrs.delete(:no_whitespace) + + sym = "#{sym}:#{args.shift}".to_sym if args.first.kind_of?(::Symbol) + + args.each do |arg| + case arg + when ::Hash + attrs.merge!(arg) + when nil + attrs.merge!({:nil => true}) if explicit_nil_handling? + else + text ||= '' + text << arg.to_s + end + end + + unless text.nil? + ::Kernel::raise ::ArgumentError, + "XmlMarkup cannot mix a text argument with a block" + end + + # Indent + _indent + #unless @indent == 0 || @level == 0 + # text!(" " * (@level * @indent)) + #end + + _start_tag(sym, attrs) + begin + _nested_structures(block) + ensure + _end_tag(sym) + _newline + end + end + end +end diff --git a/lib/rdf/rdfxml/patches/nokogiri_hacks.rb b/lib/rdf/rdfxml/patches/nokogiri_hacks.rb deleted file mode 100644 index 8136b3a..0000000 --- a/lib/rdf/rdfxml/patches/nokogiri_hacks.rb +++ /dev/null @@ -1,23 +0,0 @@ -require 'nokogiri' -class Nokogiri::XML::Node - # URI of namespace + node_name - def uri - ns = self.namespace ? self.namespace.href : RDF::XML.to_s - RDF::URI.intern(ns + self.node_name) - end - - alias_method :attribute_with_ns_without_ffi_null, :attribute_with_ns - ## - # Monkey patch attribute_with_ns, to insure nil is returned for #null? - # - # Get the attribute node with name and namespace - # - # @param [String] name - # @param [String] namespace - # @return [Nokogiri::XML::Attr] - def attribute_with_ns(name, namespace) - a = attribute_with_ns_without_ffi_null(name, namespace) - - (a.respond_to?(:null?) && a.null?) ? nil : a # to ensure FFI Pointer compatibility - end -end diff --git a/lib/rdf/rdfxml/reader.rb b/lib/rdf/rdfxml/reader.rb index 23dadf4..baf52c2 100644 --- a/lib/rdf/rdfxml/reader.rb +++ b/lib/rdf/rdfxml/reader.rb @@ -1,6 +1,6 @@ begin require 'nokogiri' -rescue LoadError => e +rescue LoadError :rexml end require 'rdf/xsd' @@ -91,7 +91,7 @@ def extract_mappings(element, &cb) # Produce the next list entry for this context def li_next @li_counter += 1 - predicate = RDF["_#{@li_counter}"] + RDF["_#{@li_counter}"] end # Set XML base. Ignore any fragment @@ -328,7 +328,6 @@ def nodeElement(el, ec) end # Handle the propertyEltList children events in document order - li_counter = 0 # this will increase for each li we iterate through el.children.each do |child| log_fatal "child must be a proxy not a #{child.class}" unless child.is_a?(@implementation::NodeProxy) next unless child.element? diff --git a/lib/rdf/rdfxml/writer.rb b/lib/rdf/rdfxml/writer.rb index b1ce122..79e2520 100644 --- a/lib/rdf/rdfxml/writer.rb +++ b/lib/rdf/rdfxml/writer.rb @@ -1,4 +1,4 @@ -require 'rdf/rdfa' +require_relative 'extensions' module RDF::RDFXML ## @@ -47,12 +47,22 @@ module RDF::RDFXML # end # # @author [Gregg Kellogg](http://greggkellogg.net/) - class Writer < RDF::RDFa::Writer + class Writer < RDF::Writer format RDF::RDFXML::Format include RDF::Util::Logger VALID_ATTRIBUTES = [:none, :untyped, :typed] + # Defines rdf:type of subjects to be emitted at the beginning of the document. + # @return [Array] + attr :top_classes + + # @return [Graph] Graph of statements serialized + attr_accessor :graph + + # @return [RDF::URI] Base URI used for relativizing URIs + attr_accessor :base_uri + ## # RDF/XML Writer options # @see https://ruby-rdf.github.io/rdf/RDF/Writer#options-class_method @@ -71,8 +81,8 @@ def self.options RDF::CLI::Option.new( symbol: :lang, datatype: String, - on: ["--lang"], - description: "Output as root @lang attribute, and avoid generation _@lang_ where possible."), + on: ["--lang LANG", :REQUIRED], + description: "Output as root xml:lang attribute, and avoid generation xml:lang, where possible.") {|arg| RDF::URI(arg)}, RDF::CLI::Option.new( symbol: :max_depth, datatype: Integer, @@ -93,71 +103,152 @@ def self.options # the output stream # @param [Hash{Symbol => Object}] options # any additional options - # @option options [Boolean] :canonicalize (false) - # whether to canonicalize literals when serializing - # @option options [Hash] :prefixes (Hash.new) - # the prefix mappings to use (not supported by all writers) + # @option options [Symbol] :attributes (nil) + # How to use XML attributes when serializing, one of :none, :untyped, :typed. The default is :none. # @option options [#to_s] :base_uri (nil) # the base URI to use when constructing relative URIs - # @option options [Integer] :max_depth (10) - # Maximum depth for recursively defining resources + # @option options [Boolean] :canonicalize (false) + # whether to canonicalize literals when serializing + # @option options [String] :default_namespace (nil) + # URI to use as default namespace, same as prefix(nil) # @option options [#to_s] :lang (nil) # Output as root xml:lang attribute, and avoid generation _xml:lang_ where possible - # @option options [Symbol] :attributes (nil) - # How to use XML attributes when serializing, one of :none, :untyped, :typed. The default is :none. + # @option options [Integer] :max_depth (10) + # Maximum depth for recursively defining resources + # @option options [Hash] :prefixes (Hash.new) + # the prefix mappings to use (not supported by all writers) # @option options [Boolean] :standard_prefixes (false) # Add standard prefixes to _prefixes_, if necessary. - # @option options [String] :default_namespace (nil) - # URI to use as default namespace, same as prefix(nil) # @option options [String] :stylesheet (nil) # URI to use as @href for output stylesheet processing instruction. + # @option options [Array] :top_classes ([RDF::RDFS.Class]) + # Defines rdf:type of subjects to be emitted at the beginning of the document. # @yield [writer] # @yieldparam [RDF::Writer] writer def initialize(output = $stdout, **options, &block) - super - end + super do + @graph = RDF::Graph.new + @uri_to_prefix = {} + @uri_to_qname = {} + @top_classes = options[:top_classes] || [RDF::RDFS.Class] - # @return [Hash String>] - def haml_template - return @haml_template if @haml_template - case @options[:haml] - when Hash then @options[:haml] - else DEFAULT_HAML + block.call(self) if block_given? end end + ## + # Addes a triple to be serialized + # @param [RDF::Resource] subject + # @param [RDF::URI] predicate + # @param [RDF::Value] object + # @return [void] + # @raise [NotImplementedError] unless implemented in subclass + # @abstract + # @raise [RDF::WriterError] if validating and attempting to write an invalid {RDF::Term}. + def write_triple(subject, predicate, object) + @graph.insert(RDF::Statement(subject, predicate, object)) + end + def write_epilogue - @force_RDF_about = {} @max_depth = @options.fetch(:max_depth, 10) @attributes = @options.fetch(:attributes, :none) + @base_uri = RDF::URI(@options[:base_uri]) if @options[:base_uri] + @lang = @options[:lang] + self.reset + + log_debug {"\nserialize: graph size: #{@graph.size}"} + + preprocess + # Prefixes + prefix = prefixes.keys.map {|pk| "#{pk}: #{prefixes[pk]}"}.sort.join(" ") unless prefixes.empty? + log_debug {"\nserialize: prefixes: #{prefix.inspect}"} + + @subjects = order_subjects + + # Generate document + doc = render_document(@subjects, + lang: @lang, + base: base_uri, + prefix: prefix, + stylesheet: @options[:stylesheet]) do |s| + subject(s) + end + @output.write(doc) super end protected + + # Reset parser to run again + def reset + @options[:log_depth] = 0 + @references = {} + @serialized = {} + @subjects = {} + end + + # Render document using `haml_template[:doc]`. Yields each subject to be rendered separately. + # + # @param [Array] subjects + # Ordered list of subjects. Template must yield to each subject, which returns + # the serialization of that subject (@see #subject_template) + # @param [Hash{Symbol => Object}] options Rendering options passed to Haml render. + # @option options [RDF::URI] base (nil) + # Base URI added to document, used for shortening URIs within the document. + # @option options [Symbol, String] language (nil) + # Value of @lang attribute in document, also allows included literals to omit + # an @lang attribute if it is equivalent to that of the document. + # @option options [String] title (nil) + # Value of html>head>title element. + # @option options [String] prefix (nil) + # Value of @prefix attribute. + # @option options [String] haml (haml_template[:doc]) + # Haml template to render. + # @yield [subject] + # Yields each subject + # @yieldparam [RDF::URI] subject + # @yieldparam [Builder::RdfXml] builder + # @yieldreturn [:ignored] + # @return String + # The rendered document is returned as a string + def render_document(subjects, lang: nil, base: nil, **options, &block) + builder = Builder::RdfXml.new(indent: 2) + builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8" + builder.instruct! :'xml-stylesheet', type: 'text/xsl', href: options[:stylesheet] if options[:stylesheet] + attrs = prefix_attrs + attrs[:"xml:lang"] = lang if lang + attrs[:"xml:base"] = base if base + + builder.rdf(:RDF, **attrs) do |b| + subjects.each do |subject| + render_subject(subject, b, **options) + end + end + end + # Render a subject using `haml_template[:subject]`. # # The _subject_ template may be called either as a top-level element, or recursively under another element if the _rel_ local is not nil. # # For RDF/XML, removes from predicates those that can be rendered as attributes, and adds the `:attr_props` local for the Haml template, which includes all attributes to be rendered as properties. # - # Yields each predicate/property to be rendered separately (@see #render_property_value and `#render_property_values`). + # Yields each property to be rendered separately. # # @param [Array] subject # Subject to render - # @param [Array] predicates - # Predicates of subject. Each property is yielded for separate rendering. + # @param [Builder::RdfXml] builder # @param [Hash{Symbol => Object}] options Rendering options passed to Haml render. # @option options [String] about (nil) - # About description, a CURIE, URI or Node definition. + # About description, a QName, URI or Node definition. # May be nil if no @about is rendered (e.g. unreferenced Nodes) # @option options [String] resource (nil) - # Resource description, a CURIE, URI or Node definition. + # Resource description, a QName, URI or Node definition. # May be nil if no @resource is rendered # @option options [String] rel (nil) - # Optional @rel property description, a CURIE, URI or Node definition. + # Optional @rel property description, a QName, URI or Node definition. # @option options [String] typeof (nil) - # RDF type as a CURIE, URI or Node definition. + # RDF type as a QName, URI or Node definition. # If :about is nil, this defaults to the empty string (""). # @option options [:li, nil] element (nil) # Render with <li>, otherwise with template default. @@ -167,102 +258,54 @@ def write_epilogue # Yields each predicate # @yieldparam [RDF::URI] predicate # @yieldreturn [:ignored] - # @return String - # The rendered document is returned as a string - # Return Haml template for document from `haml_template[:subject]` - def render_subject(subject, predicates, **options, &block) - # extract those properties that can be rendered as attributes - attr_props = if [:untyped, :typed].include?(@attributes) - options[:property_values].inject({}) do |memo, (prop, values)| - object = values.first - if values.length == 1 && - object.literal? && - (object.plain? || @attributes == :typed) && - get_lang(object).nil? + # @return Builder::RdfXml + def render_subject(subject, builder, **options, &block) + return nil if is_done?(subject) + + attr_props, embed_props, types = prop_partition(properties_for_subject(subject)) + + # The first type is used for + first_type = types.shift + type_qname = get_qname(first_type) if first_type && !first_type.node? + type_qname = nil unless type_qname.is_a?(String) + types.unshift(first_type) if first_type && !type_qname + type_qname ||= "rdf:Description" + + attr_props = attr_props.merge("rdf:nodeID": subject.id) if subject.node? && ref_count(subject) >= 1 + attr_props = attr_props.merge("rdf:about": subject.relativize(base_uri)) if subject.uri? + + log_debug {"render_subject(#{subject.inspect})"} + subject_done(subject) - memo[get_qname(RDF::URI(prop))] = object.value + builder.tag!(type_qname, **attr_props) do |b| + types.each do |type| + if type.node? + b.tag!("rdf:type", "rdf:nodeID": type.id) + else + b.tag!("rdf:type", "rdf:resource": type.to_s) end - memo end - else - {} - end - - predicates -= attr_props.keys.map {|k| expand_curie(k).to_s} - super(subject, predicates, **options.merge(attr_props: attr_props), &block) - end - # See if we can serialize as attribute. - # * untyped attributes that aren't duplicated where xml:lang == @lang - # * typed attributes that aren't duplicated if @dt_as_attr is true - # * rdf:type - def predicate_as_attribute?(prop, object) - [:untyped, :typed].include?(@attributes) && ( - prop == RDF.type || - [:typed].include?(@attributes) && object.literal? && object.typed? || - (object.literal? && object.simple? || @lang && object.language.to_s == @lang.to_s) - ) - end - # Render document using `haml_template[:doc]`. Yields each subject to be rendered separately. - # - # For RDF/XML pass along a stylesheet option. - # - # @param [Array] subjects - # Ordered list of subjects. Template must yield to each subject, which returns - # the serialization of that subject (@see #subject_template) - # @param [Hash{Symbol => Object}] options Rendering options passed to Haml render. - # @option options [RDF::URI] base (nil) - # Base URI added to document, used for shortening URIs within the document. - # @option options [Symbol, String] language (nil) - # Value of @lang attribute in document, also allows included literals to omit - # an @lang attribute if it is equivalent to that of the document. - # @option options [String] title (nil) - # Value of html>head>title element. - # @option options [String] prefix (nil) - # Value of @prefix attribute. - # @option options [String] haml (haml_template[:doc]) - # Haml template to render. - # @yield [subject] - # Yields each subject - # @yieldparam [RDF::URI] subject - # @yieldreturn [:ignored] - # @return String - # The rendered document is returned as a string - def render_document(subjects, **options, &block) - super(subjects, **options.merge(stylesheet: @options[:stylesheet]), &block) + log_depth do + embed_props.each do |p, objects| + render_property(p, objects, b, **options) + end + end + end end - # Render a single- or multi-valued predicate using `haml_template[:property_value]` or `haml_template[:property_values]`. Yields each object for optional rendering. The block should only render for recursive subject definitions (i.e., where the object is also a subject and is rendered underneath the first referencing subject). - # - # For RDF/XML, pass the `:no_list_literals` option onto the `RDFa` implementation because of special considerations for lists in RDF/XML. + # Render a single- or multi-valued property. Yields each object for optional rendering. The block should only render for recursive subject definitions (i.e., where the object is also a subject and is rendered underneath the first referencing subject). # # If a multi-valued property definition is not found within the template, the writer will use the single-valued property definition multiple times. # - # @param [Array] predicate - # Predicate to render. + # @param [String] property + # Property to render, already in QName form. # @param [Array] objects # List of objects to render. If the list contains only a single element, the :property_value template will be used. Otherwise, the :property_values template is used. + # @param [Builder::RdfXml] builder # @param [Hash{Symbol => Object}] options Rendering options passed to Haml render. - # @option options [String] :haml (haml_template[:property_value], haml_template[:property_values]) - # Haml template to render. Otherwise, uses `haml_template[:property_value] or haml_template[:property_values]` - # depending on the cardinality of objects. - # @option options [Boolean] :no_list_literals - # Do not serialize as a list if any elements are literal. - # @yield object, inlist - # Yields object and if it is contained in a list. - # @yieldparam [RDF::Resource] object - # @yieldparam [Boolean] inlist - # @yieldreturn [String, nil] - # The block should only return a string for recursive object definitions. - # @return String - # The rendered document is returned as a string - def render_property(predicate, objects, **options, &block) - log_debug {"render_property(#{predicate}): #{objects.inspect}, #{options.inspect}"} - # If there are multiple objects, and no :property_values is defined, call recursively with - # each object - - template = options[:haml] - template ||= haml_template[:property_value] + def render_property(property, objects, builder, **options) + log_debug {"render_property(#{property}): #{objects.inspect}"} # Separate out the objects which are lists and render separately lists = objects. @@ -270,38 +313,57 @@ def render_property(predicate, objects, **options, &block) map {|o| RDF::List.new(subject: o, graph: @graph)}. select {|l| l.valid? && l.none?(&:literal?)} + objects = objects - lists.map(&:subject) + unless lists.empty? # Render non-list objects - log_debug {"properties with lists: #{lists} non-lists: #{objects - lists.map(&:subject)}"} - nl = log_depth {render_property(predicate, objects - lists.map(&:subject), options, &block)} unless objects == lists.map(&:subject) - return nl.to_s + lists.map do |list| + log_debug(depth: log_depth + 1) {"properties with lists: #{lists} non-lists: #{objects - lists.map(&:subject)}"} + + unless objects.empty? + render_property(property, objects, builder, **options) + end + + # Render each list + lists.each do |list| # Render each list as multiple properties and set :inlist to true list.each_statement {|st| subject_done(st.subject)} - log_debug {"list: #{list.inspect} #{list.to_a}"} log_depth do - render_collection(predicate, list, **options) do |object| - yield(object, true) if block_given? - end + log_debug {"list: #{list.inspect} #{list.to_a}"} + render_collection(property, list, builder, **options) end - end.join(" ") + end end - if objects.length > 1 - # Render each property using property_value template - objects.map do |object| - log_depth {render_property(predicate, [object], **options, &block)} - end.join(" ") + if objects.length == 1 + recurse = log_depth <= @max_depth + object = objects.first + + if recurse && !is_done?(object) + builder.tag!(property) do |b| + render_subject(object, b, **options) + end + elsif object.literal? && object.datatype == RDF.XMLLiteral + builder.tag!(property, "rdf:parseType": "Literal", no_whitespace: true) do |b| + b << object.value + end + elsif object.literal? + attrs = {} + attrs[:"xml:lang"] = object.language if object.language? + attrs[:"rdf:datatype"] = object.datatype if object.datatype? + builder.tag!(property, object.value.to_s, **attrs) + elsif object.node? + builder.tag!(property, "rdf:nodeID": object.id) + else + builder.tag!(property, "rdf:resource": object.relativize(base_uri)) + end else - log_fatal("Missing property template", exception: RDF::WriterError) if template.nil? - - options = { - object: objects.first, - predicate: predicate, - property: get_qname(predicate), - recurse: log_depth <= @max_depth - }.merge(options) - hamlify(template, **options, &block) + # Render each property using property_value template + objects.each do |object| + log_depth do + render_property(property, [object], builder, **options) + end + end end end @@ -309,27 +371,27 @@ def render_property(predicate, objects, **options, &block) # Render a collection, which may be included in a property declaration, or # may be recursive within another collection # - # @param [RDF::URI] predicate + # @param [String] property in QName form # @param [RDF::List] list + # @param [Builder::RdfXml] builder # @param [Hash{Symbol => Object}] options - # @yield object - # Yields object, unless it is an included list - # @yieldparam [RDF::Resource] object - # @yieldreturn [String, nil] - # The block should only return a string for recursive object definitions. # @return String # The rendered collection is returned as a string - def render_collection(predicate, list, **options, &block) - template = options[:haml] || haml_template[:collection] - - options = { - list: list, - predicate: predicate, - property: get_qname(predicate), - recurse: log_depth <= @max_depth, - }.merge(options) - hamlify(template, **options) do |object| - yield object + def render_collection(property, list, builder, **options, &block) + builder.tag!(property, "rdf:parseType": "Collection") do |b| + list.each do |object| + if log_depth <= @max_depth && !is_done?(object) + render_subject(object, b) + elsif object.node? + if ref_count(object) > 1 + b.tag!("rdf:Description", "rdf:nodeID": object.id) + else + b.tag!("rdf:Description") + end + else + b.tag!("rdf:Description", "rdf:about": object.relativize(base_uri)) + end + end end end @@ -337,7 +399,7 @@ def render_collection(predicate, list, **options, &block) # @return [Hash{String => String}] def prefix_attrs prefixes.inject({}) do |memo, (k, v)| - memo[k ? "xmlns:#{k}" : "xmlns"] = v.to_s + memo[(k ? "xmlns:#{k}" : "xmlns").to_sym] = v.to_s memo end end @@ -363,32 +425,174 @@ def preprocess prefix(nil, @options[:default_namespace]) end - # Process each statement to establish CURIEs and Terms + # Process each statement to establish QNames and Terms @graph.each {|statement| preprocess_statement(statement)} end - ## - # Turn CURIE into a QNAME - def get_qname(uri) - curie = get_curie(uri) - curie.start_with?(":") ? curie[1..-1] : curie - end - # Perform any statement preprocessing required. This is used to perform reference counts and determine required prefixes. # - # For RDF/XML, make sure that all predicates have CURIEs + # For RDF/XML, make sure that all predicates have QNames # @param [Statement] statement def preprocess_statement(statement) - super + #log_debug {"preprocess: #{statement.inspect}"} + bump_reference(statement.object) + @subjects[statement.subject] = true + get_qname(statement.subject) + ensure_qname(statement.predicate) + statement.predicate == RDF.type && statement.object.uri? ? ensure_qname(statement.object) : get_qname(statement.object) + get_qname(statement.object.datatype) if statement.object.literal? && statement.object.datatype? + end + + private - # Invent a prefix for the predicate, if necessary - ensure_curie(statement.predicate) - ensure_curie(statement.object) if statement.predicate == RDF.type + # Order subjects for output. Override this to output subjects in another order. + # + # Uses #top_classes and #base_uri. + # @return [Array] Ordered list of subjects + def order_subjects + seen = {} + subjects = [] + + # Start with base_uri + if base_uri && @subjects.keys.include?(base_uri) + subjects << base_uri + seen[base_uri] = true + end + + # Add distinguished classes + top_classes. + select {|s| !seen.include?(s)}. + each do |class_uri| + graph.query({predicate: "rdf:type", object: class_uri}).map {|st| st.subject}.sort.uniq.each do |subject| + #log_debug {"order_subjects: #{subject.inspect}"} + subjects << subject + seen[subject] = true + end + end + + # Sort subjects by resources over nodes, ref_counts and the subject URI itself + recursable = @subjects.keys. + select {|s| !seen.include?(s)}. + map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}. + sort + + log_debug {"order_subjects: #{recursable.inspect}"} + + subjects += recursable.map{|r| r.last} + end + + # @param [RDF::Resource] subject + # @return [Hash{String => Object}] + def properties_for_subject(subject) + properties = {} + @graph.query({subject: subject}) do |st| + key = get_qname(st.predicate.to_s) + properties[key] ||= [] + properties[key] << st.object + end + properties + end + + # Partition properties into attributed, embedded, and types + # + # @param [Hash{String => Array] + def prop_partition(properties) + attr_props, embed_props = {}, {} + + type_prop = "rdf:type" + types = properties.delete(type_prop) + + # extract those properties that can be rendered as attributes + if [:untyped, :typed].include?(@attributes) + properties.each do |prop, values| + object = values.first + if values.length == 1 && + object.literal? && + (object.plain? || @attributes == :typed) && + get_lang(object).nil? + + attr_props[prop.to_sym] = values.first.to_s + else + embed_props[prop] = values + end + end + else + embed_props = properties + end + + [attr_props, embed_props, Array(types)] + end + + # Return language for literal, if there is no language, or it is the same as the document, return nil + # + # @param [RDF::Literal] literal + # @return [Symbol, nil] + # @raise [RDF::WriterError] + def get_lang(literal) + if literal.is_a?(RDF::Literal) + literal.language if literal.literal? && literal.language && literal.language.to_s != @lang.to_s + else + log_error("Getting language for #{literal.inspect}, which must be a literal") + nil + end end - # Make sure a CURIE is defined - def ensure_curie(resource) - if get_curie(resource) == resource.to_s || get_curie(resource).split(':', 2).last =~ /[\.#]/ + # Return appropriate, term, QName or URI for the given resource. + # + # @param [RDF::Value, String] resource + # @return [String] value to use to identify URI + # @raise [RDF::WriterError] + def get_qname(resource) + return @uri_to_qname[resource] if resource.is_a?(String) && @uri_to_qname.key?(resource) + + case resource + when RDF::URI + begin + uri = resource.to_s + + qname = case + when @uri_to_qname.key?(uri) + @uri_to_qname[uri] + when base_uri && uri.index(base_uri.to_s) == 0 + #log_debug {"get_qname(#{uri}): base_uri (#{uri.sub(base_uri.to_s, "")})"} + uri.sub(base_uri.to_s, "") + when u = @uri_to_prefix.keys.detect {|u| uri.index(u.to_s) == 0} + #log_debug {"get_qname(#{uri}): uri_to_prefix"} + # Use a defined prefix + prefix = @uri_to_prefix[u] + prefix(prefix, u) # Define for output + uri.sub(u.to_s, "#{prefix}:") + when @options[:standard_prefixes] && vocab = RDF::Vocabulary.detect {|v| uri.index(v.to_uri.to_s) == 0} + #log_debug {"get_qname(#{uri}): standard_prefixes"} + prefix = vocab.__name__.to_s.split('::').last.downcase + prefix(prefix, vocab.to_uri) # Define for output + uri.sub(vocab.to_uri.to_s, "#{prefix}:") + end + + # Don't define ill-formed qnames + @uri_to_qname[uri] = if qname.nil? || qname == ':' + resource + elsif qname.start_with?(':') + qname[1..-1] + else + qname + end + rescue ArgumentError => e + log_error("Invalid URI #{uri.inspect}: #{e.message}") + nil + end + when RDF::Node then resource.to_s + when RDF::Literal then nil + else + log_error("Getting QName for #{resource.inspect}, which must be a resource") + nil + end + end + + # Make sure a QName is defined + def ensure_qname(resource) + if get_qname(resource) == resource.to_s || get_qname(resource).split(':', 2).last =~ /[\.#]/ uri = resource.to_s # No vocabulary found, invent one # Add bindings for predicates not already having bindings @@ -399,29 +603,40 @@ def ensure_curie(resource) base_uri = uri.to_s[0..separation] suffix = uri.to_s[separation+1..-1] @gen_prefix = @gen_prefix ? @gen_prefix.succ : "ns0" - log_debug {"ensure_curie: generated prefix #{@gen_prefix} for #{base_uri}"} + log_debug {"ensure_qname: generated prefix #{@gen_prefix} for #{base_uri}"} @uri_to_prefix[base_uri] = @gen_prefix - @uri_to_term_or_curie[uri] = "#{@gen_prefix}:#{suffix}" + @uri_to_qname[uri] = "#{@gen_prefix}:#{suffix}" prefix(@gen_prefix, base_uri) - get_curie(resource) + get_qname(resource) end end - - # If base_uri is defined, use it to try to make uri relative - # @param [#to_s] uri - # @return [String] - def relativize(uri) - uri = expand_curie(uri.to_s) - base_uri ? uri.sub(base_uri.to_s, "") : uri + + # Mark a subject as done. + # @param [RDF::Resource] subject + # @return [Boolean] + def subject_done(subject) + @serialized[subject] = true end - # Undo CURIE - # @return [RDF::URI] - def expand_curie(curie) - pfx, suffix = curie.split(":", 2) - prefix(pfx) ? prefix(pfx) + suffix : curie + # Determine if the subject has been completed + # @param [RDF::Resource] subject + # @return [Boolean] + def is_done?(subject) + @serialized.include?(subject) || !@subjects.include?(subject) + end + + # Increase the reference count of this resource + # @param [RDF::Resource] resource + # @return [Integer] resulting reference count + def bump_reference(resource) + @references[resource] = ref_count(resource) + 1 + end + + # Return the number of times this node has been referenced in the object position + # @param [RDF::Node] node + # @return [Boolean] + def ref_count(node) + @references.fetch(node, 0) end end end - -require 'rdf/rdfxml/writer/haml_templates' diff --git a/lib/rdf/rdfxml/writer/haml_templates.rb b/lib/rdf/rdfxml/writer/haml_templates.rb deleted file mode 100644 index a981926..0000000 --- a/lib/rdf/rdfxml/writer/haml_templates.rb +++ /dev/null @@ -1,86 +0,0 @@ -# Default HAML templates used for generating RDF/XML output from the writer -module RDF::RDFXML - class Writer - # The default set of HAML templates used for RDFa code generation - BASE_HAML = { - identifier: "base", - # Document - # Locals: lang, title, prefix, base, subjects - # Yield: subjects.each - doc: %q( - = %() - - if stylesheet - = %() - %rdf:RDF{prefix_attrs.merge("xml:lang" => lang, "xml:base" => base)} - - subjects.each do |subject| - != yield(subject) - ), - - # Output for non-leaf resources - # Note that @about may be omitted for Nodes that are not referenced - # - # If _rel_ and _resource_ are not nil, the tag will be written relative - # to a previous subject. If _element_ is :li, the tag will be written - # with
  • instead of
    . - # - # Locals: subject, typeof, predicates, rel, element, inlist, attr_props - # Yield: predicates.each - subject: %q( - - first_type, *types = typeof.to_s.split(' ') - - (types.unshift(first_type); first_type = nil) if first_type && (first_type.include?('/') || first_type.start_with?('_:')) - - first_type ||= get_qname(RDF.Description) - - first_type = first_type[1..-1] if first_type.to_s.start_with?(":") - - attr_props = attr_props.merge(get_qname(RDF.nodeID) => subject.id) if subject.node? && ref_count(subject) >= 1 - - attr_props = attr_props.merge(get_qname(RDF.about) => relativize(subject)) if subject.uri? - - haml_tag(first_type, attr_props) do - - types.each do |type| - - expanded_type = expand_curie(type) - - if expanded_type.start_with?('_:') - - haml_tag(get_qname(RDF.type), "rdf:nodeID" => expanded_type[2..-1]) - -else - - haml_tag(get_qname(RDF.type), "rdf:resource" => expanded_type) - - predicates.each do |p| - = yield(p) - ), - - # Output for single-valued properties - # Locals: predicate, object, inlist - # Yields: object - # If nil is returned, render as a leaf - # Otherwise, render result - property_value: %q( - - if recurse && res = yield(object) - - haml_tag(property) do - = res - - elsif object.literal? && object.datatype == RDF.XMLLiteral - - haml_tag(property, :"<", "rdf:parseType" => "Literal") do - = object.value - - elsif object.literal? - - haml_tag(property, :"<", "xml:lang" => object.language, "rdf:datatype" => (object.datatype unless object.plain?)) do - = object.value.to_s.encode(xml: :text) - - elsif object.node? - - haml_tag(property, :"/", "rdf:nodeID" => object.id) - - else - - haml_tag(property, :"/", "rdf:resource" => relativize(object)) - ), - - # Outpust for a list - # Locals: predicate, list - # Yields: object - # If nil is returned, render as a leaf - # Otherwise, render result - collection: %q( - - haml_tag(property, get_qname(RDF.parseType) => "Collection") do - - list.each do |object| - - if recurse && res = yield(object) - = res - - elsif object.node? - - haml_tag(get_qname(RDF.Description), :"/", "rdf:nodeID" => (object.id if ref_count(object) > 1)) - - else - - haml_tag(get_qname(RDF.Description), :"/", "rdf:about" => relativize(object)) - ), - } - HAML_TEMPLATES = {base: BASE_HAML} - DEFAULT_HAML = BASE_HAML - end -end \ No newline at end of file diff --git a/rdf-rdfxml.gemspec b/rdf-rdfxml.gemspec index 586b641..016c070 100755 --- a/rdf-rdfxml.gemspec +++ b/rdf-rdfxml.gemspec @@ -30,9 +30,8 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency 'rdf', '~> 3.2' gem.add_runtime_dependency 'rdf-xsd', '~> 3.2' - gem.add_runtime_dependency 'rdf-rdfa', '~> 3.2' gem.add_runtime_dependency 'htmlentities', '~> 4.3' - gem.add_runtime_dependency 'haml', '~> 5.2' # Until workaround for haml_tag is found. + gem.add_runtime_dependency 'builder', '~> 3.2' gem.add_development_dependency 'json-ld', '>= 3.2' gem.add_development_dependency 'rspec', '~> 3.10' diff --git a/script/parse b/script/parse index 112df5f..9697894 100755 --- a/script/parse +++ b/script/parse @@ -5,7 +5,6 @@ require "bundler/setup" require 'rdf/rdfxml' require 'rdf/turtle' require 'rdf/ntriples' -require 'rdf/ordered_repo' require 'getoptlong' require 'logger' @@ -34,7 +33,7 @@ def run(input, **options) end else r = reader_class.new(input, **options[:parser_options]) - g = RDF::OrderedRepo.new << r + g = RDF::Repository.new << r num = g.count options[:output].puts g.dump(options[:output_format], prefixes: r.prefixes, **options[:writer_options]) end diff --git a/script/tc b/script/tc index 6656c89..ed28deb 100755 --- a/script/tc +++ b/script/tc @@ -31,21 +31,23 @@ def earl_preamble(options) end def run_tc(tc, **options) - STDERR.write "run #{tc.name}" + STDERR.write "run #{tc.name} " if options[:verbose] puts "\nTestCase: #{tc.inspect}" - puts "\nInput:\n" + tc.input.read - puts "\nExpected:\n" + tc.expected.read - tc.input.rewind - tc.expected.rewind + puts "\nInput:\n" + tc.input + puts "\nExpected:\n" + tc.expected end begin puts "open #{tc.action}" if options[:verbose] - options = {base_uri: tc.base}.merge(options) + options = { + base_uri: tc.base, + validate: tc.syntax?, + logger: tc.logger + }.merge(options) - reader = RDF::Reader.for(tc.action).new(tc.input, base_uri: tc.base, validate: tc.syntax?, logger: options[:logger]) + reader = RDF::Reader.for(tc.action).new(tc.input, **options) graph = RDF::Repository.new result = nil @@ -86,7 +88,7 @@ def run_tc(tc, **options) end end - options[:output].puts("\nOutput:\n" + graph.dump(:ntriples, validate: false)) unless options[:quiet] + options[:output].puts("\nOutput:\n" + graph.dump(:ntriples, validate: false)) if options[:verbose] if options[:earl] options[:output].puts %{ @@ -149,16 +151,20 @@ opts.each do |opt, arg| end end -manifest = Fixtures::SuiteTest::BASE + "manifest.ttl" +manifests = [Fixtures::SuiteTest::BASE + "rdf11/rdf-xml/"].map {|m| "#{m}manifest.ttl"} earl_preamble(options) if options[:earl] result_count = {} -Fixtures::SuiteTest::Manifest.open(manifest) do |m| - m.entries.each do |tc| - next unless ARGV.empty? || ARGV.any? {|n| tc.name.match(/#{n}/)} - run_tc(tc, options.merge(result_count: result_count)) +begin + manifests.each do |manifest| + Fixtures::SuiteTest::Manifest.open(manifest) do |m| + m.entries.each do |tc| + next unless ARGV.empty? || ARGV.any? {|n| tc.name.match(/#{n}/)} + run_tc(tc, result_count: result_count, **options) + end + end end end diff --git a/spec/.gitignore b/spec/.gitignore index a4cc2ca..1dbd86d 100644 --- a/spec/.gitignore +++ b/spec/.gitignore @@ -1 +1,2 @@ /uri-cache/ +/w3c-rdf-tests diff --git a/spec/reader_spec.rb b/spec/reader_spec.rb index 769f787..f14ab1d 100644 --- a/spec/reader_spec.rb +++ b/spec/reader_spec.rb @@ -242,7 +242,7 @@ ) expect do - graph = parse(sampledoc, base_uri: "http://example.com", validate: true) + parse(sampledoc, base_uri: "http://example.com", validate: true) end.to raise_error(RDF::ReaderError) expect(logger.to_s).to match(/Obsolete attribute .*aboutEach/) end @@ -264,7 +264,7 @@ ) expect do - graph = parse(sampledoc, base_uri: "http://example.com", validate: true) + parse(sampledoc, base_uri: "http://example.com", validate: true) end.to raise_error(RDF::ReaderError) expect(logger.to_s).to match(/Obsolete attribute .*aboutEachPrefix/) end @@ -276,7 +276,7 @@ ) expect do - graph = parse(sampledoc, base_uri: "http://example.com", validate: true) + parse(sampledoc, base_uri: "http://example.com", validate: true) end.to raise_error(RDF::ReaderError) expect(logger.to_s).to match(/ID addtribute '.*' must be a NCName/) end @@ -291,7 +291,7 @@ ) expect do - graph = parse(sampledoc, base_uri: "http://example.com", validate: true) + parse(sampledoc, base_uri: "http://example.com", validate: true) end.to raise_error(RDF::ReaderError) expect(logger.to_s).to match(/ID addtribute '.*' must be a NCName/) end @@ -304,7 +304,7 @@ ) expect do - graph = parse(sampledoc, base_uri: "http://example.com", validate: true) + parse(sampledoc, base_uri: "http://example.com", validate: true) end.to raise_error(RDF::ReaderError) expect(logger.to_s).to include("ID addtribute 'a/b' must be a NCName") end @@ -316,7 +316,7 @@ ) expect do - graph = parse(sampledoc, base_uri: "http://example.com", validate: true) + parse(sampledoc, base_uri: "http://example.com", validate: true) end.to raise_error(RDF::ReaderError) expect(logger.to_s).to match(/Obsolete attribute .*bagID/) end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index bc905ac..e57f387 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -11,7 +11,7 @@ require 'matchers' begin require 'nokogiri' -rescue LoadError => e +rescue LoadError :rexml end begin @@ -29,11 +29,10 @@ ]) SimpleCov.start do add_filter "/spec/" - add_filter "/lib/rdf/rdfa/reader/rexml.rb" - add_filter "/lib/rdf/rdfa/context.rb" end rescue LoadError end + require 'rdf/rdfxml' ::RSpec.configure do |c| diff --git a/spec/suite_helper.rb b/spec/suite_helper.rb index 6fda920..ef76ac7 100644 --- a/spec/suite_helper.rb +++ b/spec/suite_helper.rb @@ -6,8 +6,8 @@ # For now, override RDF::Utils::File.open_file to look for the file locally before attempting to retrieve it module RDF::Util module File - REMOTE_PATH = "http://www.w3.org/2013/RDFXMLTests/" - LOCAL_PATH = ::File.expand_path("../w3c-rdf/rdf-xml", __FILE__) + '/' + REMOTE_PATH = "https://w3c.github.io/rdf-tests/rdf/" + LOCAL_PATH = ::File.expand_path("../w3c-rdf-tests/rdf/", __FILE__) + '/' class << self alias_method :original_open_file, :open_file @@ -72,7 +72,7 @@ def self.open_file(filename_or_url, **options, &block) module Fixtures module SuiteTest - BASE = "http://www.w3.org/2013/RDFXMLTests/" + BASE = "https://w3c.github.io/rdf-tests/rdf/" FRAME = JSON.parse(%q({ "@context": { "xsd": "http://www.w3.org/2001/XMLSchema#", @@ -124,7 +124,7 @@ class Entry < JSON::LD::Resource attr_accessor :logger def base - "http://www.w3.org/2013/RDFXMLTests/" + action.split('/')[-2,2].join("/") + RDF::URI(action) end # Alias data and query diff --git a/spec/suite_spec.rb b/spec/suite_spec.rb index 7cb5803..8993e41 100644 --- a/spec/suite_spec.rb +++ b/spec/suite_spec.rb @@ -6,7 +6,7 @@ describe "w3c rdfcore tests" do require_relative 'suite_helper' - %w(manifest.ttl).each do |man| + %w(rdf11/rdf-xml/manifest.ttl).each do |man| Fixtures::SuiteTest::Manifest.open(Fixtures::SuiteTest::BASE + man) do |m| describe m.comment do m.entries.each do |t| diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index e5da3c3..95ab7ac 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -8,7 +8,7 @@ class FOO < RDF::Vocabulary("http://foo/"); end describe "RDF::RDFXML::Writer" do let(:logger) {RDF::Spec.logger} it_behaves_like 'an RDF::Writer' do - let(:writer) {RDF::RDFXML::Writer.new} + let(:writer) {RDF::RDFXML::Writer.new(::StringIO.new)} end describe "#buffer" do @@ -517,13 +517,13 @@ class FOO < RDF::Vocabulary("http://foo/"); end end end - specify { expect(subject).to match / "http://foo/"}) + serialize(nt, prefixes: {"" => "http://foo/"}) end { @@ -534,8 +534,8 @@ class FOO < RDF::Vocabulary("http://foo/"); end end end - specify { expect(subject).to match /