-
Notifications
You must be signed in to change notification settings - Fork 27
/
nokogiri.rb
151 lines (134 loc) · 3.8 KB
/
nokogiri.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
module JSON::LD
class API
##
# Nokogiri implementation of an HTML parser.
#
# @see http://nokogiri.org/
module Nokogiri
##
# Returns the name of the underlying XML library.
#
# @return [Symbol]
def self.library
:nokogiri
end
# Proxy class to implement uniform element accessors
class NodeProxy
attr_reader :node
attr_reader :parent
def initialize(node, parent = nil)
@node = node
@parent = parent
end
##
# Return xml:base on element, if defined
#
# @return [String]
def base
@node.attribute_with_ns("base", RDF::XML.to_s) || @node.attribute('xml:base')
end
def display_path
@display_path ||= begin
path = []
path << parent.display_path if parent
path << @node.name
case @node
when ::Nokogiri::XML::Element then path.join("/")
when ::Nokogiri::XML::Attr then path.join("@")
else path.join("?")
end
end
end
##
# Return true of all child elements are text
#
# @return [Array<:text, :element, :attribute>]
def text_content?
@node.children.all? {|c| c.text?}
end
##
# Children of this node
#
# @return [NodeSetProxy]
def children
NodeSetProxy.new(@node.children, self)
end
# Ancestors of this element, in order
def ancestors
@ancestors ||= parent ? parent.ancestors + [parent] : []
end
##
# Inner text of an element. Decode Entities
#
# @return [String]
#def inner_text
# coder = HTMLEntities.new
# coder.decode(@node.inner_text)
#end
def attribute_nodes
@attribute_nodes ||= NodeSetProxy.new(@node.attribute_nodes, self)
end
def xpath(*args)
@node.xpath(*args).map do |n|
# Get node ancestors
parent = n.ancestors.reverse.inject(nil) do |p,node|
NodeProxy.new(node, p)
end
NodeProxy.new(n, parent)
end
end
##
# Proxy for everything else to @node
def method_missing(method, *args)
@node.send(method, *args)
end
end
##
# NodeSet proxy
class NodeSetProxy
attr_reader :node_set
attr_reader :parent
def initialize(node_set, parent)
@node_set = node_set
@parent = parent
end
##
# Return a proxy for each child
#
# @yield child
# @yieldparam [NodeProxy]
def each
@node_set.each do |c|
yield NodeProxy.new(c, parent)
end
end
##
# Proxy for everything else to @node_set
def method_missing(method, *args)
@node_set.send(method, *args)
end
end
##
# Initializes the underlying XML library.
#
# @param [Hash{Symbol => Object}] options
# @return [NodeProxy] of root element
def initialize_html_nokogiri(input, options = {})
require 'nokogiri' unless defined?(::Nokogiri)
doc = case input
when ::Nokogiri::HTML::Document, ::Nokogiri::XML::Document
input
else
begin
input = input.read if input.respond_to?(:read)
::Nokogiri::HTML5(input.force_encoding('utf-8'), max_parse_errors: 1000)
rescue LoadError, NoMethodError
::Nokogiri::HTML.parse(input, base_uri.to_s, 'utf-8')
end
end
NodeProxy.new(doc.root) if doc && doc.root
end
alias_method :initialize_html, :initialize_html_nokogiri
end
end
end