Skip to content

Commit

Permalink
Improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
gkellogg committed May 16, 2023
1 parent cb297e3 commit d28856b
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 149 deletions.
275 changes: 128 additions & 147 deletions lib/rdf/rdfxml/writer.rb
Expand Up @@ -167,9 +167,10 @@ def write_epilogue

# Generate document
doc = render_document(@subjects,
lang: @lang,
base: base_uri,
prefix: prefix) do |s|
lang: @lang,
base: base_uri,
prefix: prefix,
stylesheet: @options[:stylesheet]) do |s|
subject(s)
end
@output.write(doc)
Expand All @@ -179,51 +180,6 @@ def write_epilogue

protected

# Reset parser to run again
def reset
@options[:log_depth] = 0
self.prefixes = {}
@references = {}
@serialized = {}
@subjects = {}
end

# Order subjects for output. Override this to output subjects in another order.
#
# Uses #top_classes and #base_uri.
# @return [Array<Resource>] Ordered list of subjects
def order_subjects
seen = {}
subjects = []

# Start with base_uri
if base_uri && @subjects.keys.include?(base_uri)
subjects << base_uri
seen[base_uri] = true
end

# Add distinguished classes
top_classes.
select {|s| !seen.include?(s)}.
each do |class_uri|
graph.query({predicate: "rdf:type", object: class_uri}).map {|st| st.subject}.sort.uniq.each do |subject|
#log_debug {"order_subjects: #{subject.inspect}"}
subjects << subject
seen[subject] = true
end
end

# Sort subjects by resources over nodes, ref_counts and the subject URI itself
recursable = @subjects.keys.
select {|s| !seen.include?(s)}.
map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}.
sort

log_debug {"order_subjects: #{recursable.inspect}"}

subjects += recursable.map{|r| r.last}
end

# Render document using `haml_template[:doc]`. Yields each subject to be rendered separately.
#
# @param [Array<RDF::Resource>] subjects
Expand Down Expand Up @@ -251,8 +207,8 @@ def order_subjects
def render_document(subjects, lang: nil, base: nil, **options, &block)
builder = Builder::XmlMarkup.new(indent: 2)
builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
builder.instruct! :'xml-stylesheet', type: 'text/xsd', href: options[:stylesheet] if options[:stylesheet]
attrs = prefix_attrs.dup
builder.instruct! :'xml-stylesheet', type: 'text/xsl', href: options[:stylesheet] if options[:stylesheet]
attrs = prefix_attrs
attrs["xml:lang"] = lang if lang
attrs["xml:base"] = base if base

Expand Down Expand Up @@ -307,8 +263,9 @@ def render_subject(subject, builder, **options, &block)
type_qname ||= "rdf:Description"

attr_props = attr_props.merge("rdf:nodeID" => subject.id) if subject.node? && ref_count(subject) >= 1
attr_props = attr_props.merge("rdf:about" => relativize(subject)) if subject.uri?
attr_props = attr_props.merge("rdf:about" => subject.relativize(base_uri)) if subject.uri?

log_debug {"render_subject(#{subject.inspect})"}
subject_done(subject)

builder.tag!(type_qname, **attr_props) do |b|
Expand All @@ -328,61 +285,6 @@ def render_subject(subject, builder, **options, &block)
end
end

# @param [RDF::Resource] subject
# @return [Hash{String => Object}]
def properties_for_subject(subject)
properties = {}
@graph.query({subject: subject}) do |st|
key = get_qname(st.predicate)
properties[key] ||= []
properties[key] << st.object
end
properties
end

# Partition properties into attributed, embedded, and types
#
# @param [Hash{String => Array<RDF::Resource}] properties
# @return [Hash, Hash, Array<RDF::Resource>]
def prop_partition(properties)
attr_props, embed_props = {}, {}

type_prop = "rdf:type"
types = properties.delete(type_prop)

# extract those properties that can be rendered as attributes
if [:untyped, :typed].include?(@attributes)
properties.each do |prop, values|
object = values.first
if values.length == 1 &&
object.literal? &&
(object.plain? || @attributes == :typed) &&
get_lang(object).nil?

attr_props[prop] = values.first.to_s
else
embed_props[prop] = values
end
end
else
embed_props = properties
end

[attr_props, embed_props, Array(types)]
end

# See if we can serialize as attribute.
# * untyped attributes that aren't duplicated where xml:lang == @lang
# * typed attributes that aren't duplicated if @dt_as_attr is true
# * rdf:type
def property_as_attribute?(prop, object)
[:untyped, :typed].include?(@attributes) && (
prop == RDF.type ||
[:typed].include?(@attributes) && object.literal? && object.typed? ||
(object.literal? && object.simple? || @lang && object.language.to_s == @lang.to_s)
)
end

# Render a single- or multi-valued property. Yields each object for optional rendering. The block should only render for recursive subject definitions (i.e., where the object is also a subject and is rendered underneath the first referencing subject).
#
# If a multi-valued property definition is not found within the template, the writer will use the single-valued property definition multiple times.
Expand All @@ -393,45 +295,38 @@ def property_as_attribute?(prop, object)
# List of objects to render. If the list contains only a single element, the :property_value template will be used. Otherwise, the :property_values template is used.
# @param [Builder::XmlMarkup] builder
# @param [Hash{Symbol => Object}] options Rendering options passed to Haml render.
# @return String
# The rendered document is returned as a string
def render_property(property, objects, builder, **options)
log_debug {"render_property(#{property}): #{objects.inspect}, #{options.inspect}"}
log_debug {"render_property(#{property}): #{objects.inspect}"}

# Separate out the objects which are lists and render separately
lists = objects.
select(&:node?).
map {|o| RDF::List.new(subject: o, graph: @graph)}.
select {|l| l.valid? && l.none?(&:literal?)}

objects = objects - lists.map(&:subject)

unless lists.empty?
# Render non-list objects
log_debug {"properties with lists: #{lists} non-lists: #{objects - lists.map(&:subject)}"}
log_debug(depth: log_depth + 1) {"properties with lists: #{lists} non-lists: #{objects - lists.map(&:subject)}"}

unless objects == lists.map(&:subject)
render_property(property, objects - lists.map(&:subject), builder, **options)
unless objects.empty?
render_property(property, objects, builder, **options)
end

# Render each list
lists.each do |list|
# Render each list as multiple properties and set :inlist to true
list.each_statement {|st| subject_done(st.subject)}

log_debug {"list: #{list.inspect} #{list.to_a}"}
log_depth do
log_debug {"list: #{list.inspect} #{list.to_a}"}
render_collection(property, list, builder, **options)
end
end
end

if objects.length > 1
# Render each property using property_value template
objects.map do |object|
log_depth do
render_property(property, [object], builder, **options)
end
end.join(" ")
else
if objects.length == 1
recurse = log_depth <= @max_depth
object = objects.first

Expand All @@ -443,13 +338,20 @@ def render_property(property, objects, builder, **options)
builder.tag!(property, object.value, "rdf:parseType" => "Literal")
elsif object.literal?
attrs = {}
attrs[:"xml:lang"] = object.language if object.language?
attrs[:"xml:datatype"] = object.datatype if object.datatype?
attrs["xml:lang"] = object.language if object.language?
attrs["rdf:datatype"] = object.datatype if object.datatype?
builder.tag!(property, object.value.to_s, **attrs)
elsif object.node?
builder.tag!(property, "rdf:nodeID": object.id)
else
builder.tag!(property, "rdf:resource": relativize(object))
builder.tag!(property, "rdf:resource": object.relativize(base_uri))
end
else
# Render each property using property_value template
objects.each do |object|
log_depth do
render_property(property, [object], builder, **options)
end
end
end
end
Expand All @@ -476,7 +378,7 @@ def render_collection(property, list, builder, **options, &block)
b.tag!("rdf:Description")
end
else
b.tag!("rdf:Description", "rdf:about" => relativize(object))
b.tag!("rdf:Description", "rdf:about" => object.relativize(base_uri))
end
end
end
Expand Down Expand Up @@ -516,19 +418,6 @@ def preprocess
@graph.each {|statement| preprocess_statement(statement)}
end

##
# Turn CURIE into a QNAME or return the RDF::URI
def get_qname(uri)
curie = get_curie(uri)
if curie.nil?
uri
elsif curie.start_with?(":")
curie[1..-1]
else
curie
end
end

# Perform any statement preprocessing required. This is used to perform reference counts and determine required prefixes.
#
# For RDF/XML, make sure that all predicates have CURIEs
Expand All @@ -540,23 +429,102 @@ def preprocess_statement(statement)
get_curie(statement.subject)
get_curie(statement.predicate)
get_curie(statement.object)
get_curie(statement.object.datatype) if statement.object.literal? && statement.object.has_datatype?
get_curie(statement.object.datatype) if statement.object.literal? && statement.object.datatype?

# Invent a prefix for the predicate, if necessary
ensure_curie(statement.predicate)
ensure_curie(statement.object) if statement.predicate == RDF.type
end

# If base_uri is defined, use it to try to make uri relative
# @param [#to_s] uri
# @return [String]
def relativize(uri)
uri = expand_curie(uri.to_s)
base_uri ? uri.sub(base_uri.to_s, "") : uri
end

private

# Reset parser to run again
def reset
@options[:log_depth] = 0
@references = {}
@serialized = {}
@subjects = {}
end

# Order subjects for output. Override this to output subjects in another order.
#
# Uses #top_classes and #base_uri.
# @return [Array<Resource>] Ordered list of subjects
def order_subjects
seen = {}
subjects = []

# Start with base_uri
if base_uri && @subjects.keys.include?(base_uri)
subjects << base_uri
seen[base_uri] = true
end

# Add distinguished classes
top_classes.
select {|s| !seen.include?(s)}.
each do |class_uri|
graph.query({predicate: "rdf:type", object: class_uri}).map {|st| st.subject}.sort.uniq.each do |subject|
#log_debug {"order_subjects: #{subject.inspect}"}
subjects << subject
seen[subject] = true
end
end

# Sort subjects by resources over nodes, ref_counts and the subject URI itself
recursable = @subjects.keys.
select {|s| !seen.include?(s)}.
map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}.
sort

log_debug {"order_subjects: #{recursable.inspect}"}

subjects += recursable.map{|r| r.last}
end

# @param [RDF::Resource] subject
# @return [Hash{String => Object}]
def properties_for_subject(subject)
properties = {}
@graph.query({subject: subject}) do |st|
key = get_qname(st.predicate)
properties[key] ||= []
properties[key] << st.object
end
properties
end

# Partition properties into attributed, embedded, and types
#
# @param [Hash{String => Array<RDF::Resource}] properties
# @return [Hash, Hash, Array<RDF::Resource>]
def prop_partition(properties)
attr_props, embed_props = {}, {}

type_prop = "rdf:type"
types = properties.delete(type_prop)

# extract those properties that can be rendered as attributes
if [:untyped, :typed].include?(@attributes)
properties.each do |prop, values|
object = values.first
if values.length == 1 &&
object.literal? &&
(object.plain? || @attributes == :typed) &&
get_lang(object).nil?

attr_props[prop] = values.first.to_s
else
embed_props[prop] = values
end
end
else
embed_props = properties
end

[attr_props, embed_props, Array(types)]
end

# Return language for literal, if there is no language, or it is the same as the document, return nil
#
# @param [RDF::Literal] literal
Expand Down Expand Up @@ -647,6 +615,19 @@ def expand_curie(curie)
prefix(pfx) ? prefix(pfx) + suffix : curie
end

##
# Turn CURIE into a QNAME or return the RDF::URI
def get_qname(uri)
curie = get_curie(uri)
if curie.nil?
uri
elsif curie.start_with?(":")
curie[1..-1]
else
curie
end
end

# Mark a subject as done.
# @param [RDF::Resource] subject
# @return [Boolean]
Expand Down

0 comments on commit d28856b

Please sign in to comment.