-
Notifications
You must be signed in to change notification settings - Fork 4
/
format.rb
157 lines (148 loc) · 6.09 KB
/
format.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# Attempt to load RDF::RDFa first, so that RDF::Format.for(:rdfa) is defined
begin
require 'rdf/rdfa'
rescue LoadError
# Soft error
end
module RDF::Microdata
##
# Microdata format specification.
#
# @example Obtaining a Microdata format class
# RDF::Format.for(:microdata) #=> RDF::Microdata::Format
# RDF::Format.for("etc/foaf.html")
# RDF::Format.for(:file_name => "etc/foaf.html")
# RDF::Format.for(file_extension: "html")
# RDF::Format.for(:content_type => "text/html")
#
# @example Obtaining serialization format MIME types
# RDF::Format.content_types #=> {"text/html" => [RDF::Microdata::Format]}
#
# @see http://www.w3.org/TR/rdf-testcases/#ntriples
class Format < RDF::Format
content_encoding 'utf-8'
# Only define content type if RDFa is not available.
# The Microdata processor will be launched from there
# otherwise.
content_type 'text/html;q=0.5', extension: :html unless RDF::Format.for(:rdfa)
reader { RDF::Microdata::Reader }
##
# Sample detection to see if it matches Microdata (not RDF/XML or RDFa)
#
# Use a text sample to detect the format of an input file. Sub-classes implement
# a matcher sufficient to detect probably format matches, including disambiguating
# between other similar formats.
#
# @param [String] sample Beginning several bytes (~ 1K) of input.
# @return [Boolean]
def self.detect(sample)
!!sample.match(/<[^>]*(itemprop|itemtype|itemref|itemscope|itemid)[^>]*>/m)
end
##
# Hash of CLI commands appropriate for this format
# @return [Hash{Symbol => Hash}]
def self.cli_commands
{
"to-rdfa": {
description: "Transform HTML+Microdata into HTML+RDFa",
parse: false,
help: "to-rdfa files ...\nTransform HTML+Microdata into HTML+RDFa",
filter: {
format: :microdata
},
option_use: {output_format: :disabled},
lambda: ->(files, options) do
out = options[:output] || $stdout
xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:param name="indent-increment" select="' '"/>
<xsl:output method="html" doctype-system="about:legacy-compat"/>
<xsl:template name="newline">
<xsl:text disable-output-escaping="yes">
</xsl:text>
</xsl:template>
<xsl:template match="comment() | processing-instruction()">
<xsl:param name="indent" select="''"/>
<xsl:call-template name="newline"/>
<xsl:value-of select="$indent"/>
<xsl:copy />
</xsl:template>
<xsl:template match="text()">
<xsl:param name="indent" select="''"/>
<xsl:call-template name="newline"/>
<xsl:value-of select="$indent"/>
<xsl:value-of select="normalize-space(.)"/>
</xsl:template>
<xsl:template match="text()[normalize-space(.)='']"/>
<xsl:template match="*">
<xsl:param name="indent" select="''"/>
<xsl:call-template name="newline"/>
<xsl:value-of select="$indent"/>
<xsl:choose>
<xsl:when test="count(child::*) > 0">
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:apply-templates select="*|text()">
<xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
</xsl:apply-templates>
<xsl:call-template name="newline"/>
<xsl:value-of select="$indent"/>
</xsl:copy>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="."/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>).gsub(/^ /, ''))
if files.empty?
# If files are empty, either use options[::evaluate]
input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
RDF::Microdata::Reader.new(input, options.merge(rdfa: true)) do |reader|
reader.rdfa.xpath("//text()").each do |txt|
txt.content = txt.content.to_s.strip
end
out.puts xsl.apply_to(reader.rdfa).to_s
end
else
files.each do |file|
RDF::Microdata::Reader.open(file, options.merge(rdfa: true)) do |reader|
reader.rdfa.xpath("//text()").each do |txt|
txt.content = txt.content.to_s.strip
end
out.puts xsl.apply_to(reader.rdfa).to_s
end
end
end
end
},
"to-jsonld": {
description: "Transform HTML+Microdata into JSON-LD",
parse: false,
help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD",
filter: {
format: :microdata
},
option_use: {output_format: :disabled},
lambda: ->(files, options) do
out = options[:output] || $stdout
if files.empty?
# If files are empty, either use options[::evaluate]
input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
RDF::Microdata::Reader.new(input, options.merge(jsonld: true)) do |reader|
out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
end
else
files.each do |file|
RDF::Microdata::Reader.open(file, options.merge(jsonld: true)) do |reader|
out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
end
end
end
end
},
}
end
end
end