Finish 3.0.10

ruby-rdf · Feb 12, 2019 · caf2895 · caf2895
2 parents 0dee10d + 00d0776
commit caf2895
Show file tree

Hide file tree

Showing 23 changed files with 733 additions and 224 deletions.
diff --git a/Gemfile b/Gemfile
@@ -22,7 +22,6 @@ group :debug do
   gem 'psych', platforms: [:mri, :rbx]
   gem "redcarpet", platforms: :ruby
   gem "byebug", platforms: :mri
-  gem 'ruby-debug', platform: :jruby
   gem 'guard-rspec'
 end
 

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-3.0.9
+3.0.10
diff --git a/lib/rdf/model/literal/datetime.rb b/lib/rdf/model/literal/datetime.rb
@@ -7,7 +7,7 @@ module RDF; class Literal
   class DateTime < Literal
     DATATYPE = RDF::URI("http://www.w3.org/2001/XMLSchema#dateTime")
     GRAMMAR  = %r(\A(-?(?:\d{4}|[1-9]\d{4,})-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?)((?:[\+\-]\d{2}:\d{2})|UTC|GMT|Z)?\Z).freeze
-    FORMAT   = '%Y-%m-%dT%H:%M:%S%:z'.freeze
+    FORMAT   = '%Y-%m-%dT%H:%M:%S.%L%:z'.freeze
 
     ##
     # @param  [DateTime] value
@@ -31,9 +31,9 @@ def initialize(value, datatype: nil, lexical: nil, **options)
     def canonicalize!
       if self.valid?
         @string = if has_timezone?
-          @object.new_offset.new_offset.strftime(FORMAT[0..-4] + 'Z')
+          @object.new_offset.new_offset.strftime(FORMAT[0..-4] + 'Z').sub('.000', '')
         else
-          @object.strftime(FORMAT[0..-4])
+          @object.strftime(FORMAT[0..-4]).sub('.000', '')
         end
       end
       self
@@ -80,6 +80,16 @@ def valid?
       super && object && value !~ %r(\A0000)
     end
 
+    ##
+    # Does the literal representation include millisectonds?
+    #
+    # @return [Boolean]
+    # @since 1.1.6
+    def has_milliseconds?
+      self.format("%L").to_i > 0
+    end
+    alias_method :has_ms?, :has_milliseconds?
+
     ##
     # Does the literal representation include a timezone? Note that this is only possible if initialized using a string, or `:lexical` option.
     #
@@ -98,7 +108,7 @@ def has_timezone?
     #
     # @return [String]
     def to_s
-      @string || @object.strftime(FORMAT).sub("+00:00", 'Z')
+      @string || @object.strftime(FORMAT).sub("+00:00", 'Z').sub('.000', '')
     end
 
     ##

diff --git a/lib/rdf/model/literal/time.rb b/lib/rdf/model/literal/time.rb
@@ -12,7 +12,7 @@ module RDF; class Literal
   class Time < Literal
     DATATYPE = RDF::URI("http://www.w3.org/2001/XMLSchema#time")
     GRAMMAR  = %r(\A(\d{2}:\d{2}:\d{2}(?:\.\d+)?)((?:[\+\-]\d{2}:\d{2})|UTC|GMT|Z)?\Z).freeze
-    FORMAT   = '%H:%M:%S%:z'.freeze
+    FORMAT   = '%H:%M:%S.%L%:z'.freeze
 
     ##
     # @param  [String, DateTime, #to_datetime] value
@@ -43,9 +43,9 @@ def initialize(value, datatype: nil, lexical: nil, **options)
     def canonicalize!
       if self.valid?
         @string = if has_timezone?
-          @object.new_offset.new_offset.strftime(FORMAT[0..-4] + 'Z')
+          @object.new_offset.new_offset.strftime(FORMAT[0..-4] + 'Z').sub('.000', '')
         else
-          @object.strftime(FORMAT[0..-4])
+          @object.strftime(FORMAT[0..-4]).sub('.000', '')
         end
       end
       self
@@ -91,7 +91,7 @@ def has_timezone?
     #
     # @return [String]
     def to_s
-      @string || @object.strftime(FORMAT).sub("+00:00", 'Z')
+      @string || @object.strftime(FORMAT).sub("+00:00", 'Z').sub('.000', '')
     end
 
     ##
@@ -122,7 +122,7 @@ def ==(other)
         return super unless other.valid?
         # Compare as strings, as time includes a date portion, and adjusting for UTC
         # can create a mismatch in the date portion.
-        self.object.new_offset.strftime('%H%M%S') == other.object.new_offset.strftime('%H%M%S')
+        self.object.new_offset.strftime('%H%M%S.%L') == other.object.new_offset.strftime('%H%M%S.%L')
       when Literal::DateTime, Literal::Date
         false
       else

diff --git a/lib/rdf/model/statement.rb b/lib/rdf/model/statement.rb
@@ -57,6 +57,9 @@ def self.from(statement, graph_name: nil, **options)
     # @return [RDF::Term]
     attr_accessor :object
 
+    # @return [Hash{Symbol => Object}]
+    attr_accessor :options
+
     ##
     # @overload initialize(**options)
     #   @param  [Hash{Symbol => Object}] options

diff --git a/lib/rdf/model/uri.rb b/lib/rdf/model/uri.rb
@@ -169,8 +169,7 @@ def self.parse(str)
     def self.normalize_path(path)
       output, input = "", path.to_s
       if input.encoding != Encoding::ASCII_8BIT
-        input = input.dup if input.frozen?
-        input = input.force_encoding(Encoding::ASCII_8BIT)
+        input = input.dup.force_encoding(Encoding::ASCII_8BIT)
       end
       until input.empty?
         if input.match(RDS_2A)
@@ -228,8 +227,7 @@ def initialize(*args, validate: false, canonicalize: false, **options)
       if uri
         @value = uri.to_s
         if @value.encoding != Encoding::UTF_8
-          @value = @value.dup if @value.frozen?
-          @value.force_encoding(Encoding::UTF_8)
+          @value.dup.force_encoding(Encoding::UTF_8)
           @value.freeze
         end
       else
@@ -842,16 +840,16 @@ def parse(value)
         user, password = userinfo.to_s.split(':', 2)
         host, port = hostport.to_s.split(':', 2)
 
-        parts[:scheme] = (scheme.force_encoding(Encoding::UTF_8) if scheme)
-        parts[:authority] = (authority.force_encoding(Encoding::UTF_8) if authority)
-        parts[:userinfo] = (userinfo.force_encoding(Encoding::UTF_8) if userinfo)
-        parts[:user] = (user.force_encoding(Encoding::UTF_8) if user)
-        parts[:password] = (password.force_encoding(Encoding::UTF_8) if password)
-        parts[:host] = (host.force_encoding(Encoding::UTF_8) if host)
+        parts[:scheme] = (scheme.dup.force_encoding(Encoding::UTF_8) if scheme)
+        parts[:authority] = (authority.dup.force_encoding(Encoding::UTF_8) if authority)
+        parts[:userinfo] = (userinfo.dup.force_encoding(Encoding::UTF_8) if userinfo)
+        parts[:user] = (user.dup.force_encoding(Encoding::UTF_8) if user)
+        parts[:password] = (password.dup.force_encoding(Encoding::UTF_8) if password)
+        parts[:host] = (host.dup.force_encoding(Encoding::UTF_8) if host)
         parts[:port] = (::URI.decode(port).to_i if port)
-        parts[:path] = (path.to_s.force_encoding(Encoding::UTF_8) unless path.empty?)
-        parts[:query] = (query[1..-1].force_encoding(Encoding::UTF_8) if query)
-        parts[:fragment] = (fragment[1..-1].force_encoding(Encoding::UTF_8) if fragment)
+        parts[:path] = (path.to_s.dup.force_encoding(Encoding::UTF_8) unless path.empty?)
+        parts[:query] = (query[1..-1].dup.force_encoding(Encoding::UTF_8) if query)
+        parts[:fragment] = (fragment[1..-1].dup.force_encoding(Encoding::UTF_8) if fragment)
       end
 
       parts
@@ -869,7 +867,7 @@ def scheme
     # @param [String, #to_s] value
     # @return [RDF::URI] self
     def scheme=(value)
-      object[:scheme] = (value.to_s.force_encoding(Encoding::UTF_8) if value)
+      object[:scheme] = (value.to_s.dup.force_encoding(Encoding::UTF_8) if value)
       @value = nil
       self
     end
@@ -893,7 +891,7 @@ def user
     # @param [String, #to_s] value
     # @return [RDF::URI] self
     def user=(value)
-      object[:user] = (value.to_s.force_encoding(Encoding::UTF_8) if value)
+      object[:user] = (value.to_s.dup.force_encoding(Encoding::UTF_8) if value)
       @object[:userinfo] = format_userinfo("")
       @object[:authority] = format_authority
       @value = nil
@@ -919,7 +917,7 @@ def password
     # @param [String, #to_s] value
     # @return [RDF::URI] self
     def password=(value)
-      object[:password] = (value.to_s.force_encoding(Encoding::UTF_8) if value)
+      object[:password] = (value.to_s.dup.force_encoding(Encoding::UTF_8) if value)
       @object[:userinfo] = format_userinfo("")
       @object[:authority] = format_authority
       @value = nil
@@ -947,7 +945,7 @@ def host
     # @param [String, #to_s] value
     # @return [RDF::URI] self
     def host=(value)
-      object[:host] = (value.to_s.force_encoding(Encoding::UTF_8) if value)
+      object[:host] = (value.to_s.dup.force_encoding(Encoding::UTF_8) if value)
       @object[:authority] = format_authority
       @value = nil
       self
@@ -1010,7 +1008,7 @@ def path=(value)
       if value
         # Always lead with a slash
         value = "/#{value}" if host && value.to_s.match?(/^[^\/]/)
-        object[:path] = value.to_s.force_encoding(Encoding::UTF_8)
+        object[:path] = value.to_s.dup.force_encoding(Encoding::UTF_8)
       else
         object[:path] = nil
       end
@@ -1069,7 +1067,7 @@ def query
     # @param [String, #to_s] value
     # @return [RDF::URI] self
     def query=(value)
-      object[:query] = (value.to_s.force_encoding(Encoding::UTF_8) if value)
+      object[:query] = (value.to_s.dup.force_encoding(Encoding::UTF_8) if value)
       @value = nil
       self
     end
@@ -1093,7 +1091,7 @@ def fragment
     # @param [String, #to_s] value
     # @return [RDF::URI] self
     def fragment=(value)
-      object[:fragment] = (value.to_s.force_encoding(Encoding::UTF_8) if value)
+      object[:fragment] = (value.to_s.dup.force_encoding(Encoding::UTF_8) if value)
       @value = nil
       self
     end
@@ -1118,7 +1116,7 @@ def authority
     # @return [RDF::URI] self
     def authority=(value)
       object.delete_if {|k, v| [:user, :password, :host, :port, :userinfo].include?(k)}
-      object[:authority] = (value.to_s.force_encoding(Encoding::UTF_8) if value)
+      object[:authority] = (value.to_s.dup.force_encoding(Encoding::UTF_8) if value)
       user; password; userinfo; host; port
       @value = nil
       self
@@ -1148,7 +1146,7 @@ def userinfo
     # @return [RDF::URI] self
     def userinfo=(value)
       object.delete_if {|k, v| [:user, :password, :authority].include?(k)}
-      object[:userinfo] = (value.to_s.force_encoding(Encoding::UTF_8) if value)
+      object[:userinfo] = (value.to_s.dup.force_encoding(Encoding::UTF_8) if value)
       user; password; authority
       @value = nil
       self
@@ -1263,6 +1261,26 @@ def request_uri
       return res
     end
 
+    ##
+    # Dump of data needed to reconsitute this object using Marshal.load
+    # This override is needed to avoid serializing @mutex.
+    #
+    # @param [Integer] level The maximum depth of objects to dump.
+    # @return [String] The dump of data needed to reconsitute this object.
+    def _dump(level)
+      value
+    end
+
+    ##
+    # Load dumped data to reconsitute marshaled object
+    # This override is needed to avoid serializing @mutex.
+    #
+    # @param [String] data The dump of data needed to reconsitute this object.
+    # @return [RDF::URI] The reconsituted object.
+    def self._load(data)
+      new(data)
+    end
+
   private
 
     ##
@@ -1274,8 +1292,7 @@ def request_uri
     # @return [String]
     def normalize_segment(value, expr, downcase = false)
       if value
-        value = value.dup if value.frozen?
-        value = value.force_encoding(Encoding::UTF_8)
+        value = value.dup.force_encoding(Encoding::UTF_8)
         decoded = ::URI.decode(value)
         decoded.downcase! if downcase
         ::URI.encode(decoded, /[^(?:#{expr})]/)

diff --git a/lib/rdf/ntriples/reader.rb b/lib/rdf/ntriples/reader.rb
@@ -32,7 +32,7 @@ class Reader < RDF::Reader
     format RDF::NTriples::Format
 
     # @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
-    ESCAPE_CHARS    = ["\b", "\f", "\t", "\n", "\r", "\"", "\\"].freeze
+    ESCAPE_CHARS    = ["\b", "\f", "\t", "\n", "\r", "\"", "'", "\\"].freeze
     UCHAR4          = /\\u([0-9A-Fa-f]{4,4})/.freeze
     UCHAR8          = /\\U([0-9A-Fa-f]{8,8})/.freeze
     UCHAR           = Regexp.union(UCHAR4, UCHAR8).freeze
@@ -60,7 +60,7 @@ class Reader < RDF::Reader
     # 166s
     PN_CHARS             = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/.freeze
     # 159s
-    ECHAR                = /\\[tbnrf\\"]/.freeze
+    ECHAR                = /\\[tbnrf"'\\]/.freeze
     # 18
     IRIREF               = /<((?:#{IRI_RANGE}|#{UCHAR})*)>/.freeze
     # 141s
@@ -135,7 +135,6 @@ def self.parse_node(input, **options)
     # @return [RDF::URI]
     def self.parse_uri(input, intern: false, **options)
       if input =~ URIREF
-        uri_str = unescape($1)
         RDF::URI.send(intern ? :intern : :new, unescape($1))
       end
     end
@@ -155,9 +154,16 @@ def self.parse_literal(input, **options)
     end
 
     # cache constants to optimize escaping the escape chars in self.unescape
-    ESCAPE_CHARS_ESCAPED = ESCAPE_CHARS.each_with_object({}) do |escape, memo|
-      memo[escape.inspect[1...-1]] = escape
-    end.freeze
+    ESCAPE_CHARS_ESCAPED = {
+      "\\b"   =>  "\b",
+      "\\f"   =>  "\f",
+      "\\t"   =>  "\t",
+      "\\n"   =>  "\n",
+      "\\r"   =>  "\r",
+      "\\\""  =>  "\"",
+      "\\'"   =>  "'",
+      "\\\\"  =>  "\\"
+    } .freeze
     ESCAPE_CHARS_ESCAPED_REGEXP = Regexp.union(
       ESCAPE_CHARS_ESCAPED.keys
     ).freeze
@@ -171,26 +177,23 @@ def self.parse_literal(input, **options)
     def self.unescape(string)
       # Note: avoiding copying the input string when no escaping is needed
       # greatly reduces the number of allocations and the processing time.
-      unless string.encoding == Encoding::UTF_8
-        string = string.dup.force_encoding(Encoding::UTF_8)
-      end
-
-      has_escape_chars = ESCAPE_CHARS_ESCAPED_REGEXP.match?(string)
-      has_uchar = UCHAR.match?(string)
-
-      string = string.dup if has_escape_chars || has_uchar
+      string = string.dup.force_encoding(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
+      scanner = StringScanner.new(string)
 
-      # Decode \t|\n|\r|\"|\\ character escapes using Regexp:
-      string.gsub!(ESCAPE_CHARS_ESCAPED_REGEXP) do
-        ESCAPE_CHARS_ESCAPED.fetch($~[0])
-      end if has_escape_chars
+      buffer = ""
 
-      # Decode \uXXXX and \UXXXXXXXX code points:
-      string.gsub!(UCHAR) do
-        [($1 || $2).hex].pack('U*')
-      end if has_uchar
+      while !scanner.eos?
+        buffer << if scanner.scan(ESCAPE_CHARS_ESCAPED_REGEXP)
+          ESCAPE_CHARS_ESCAPED[scanner.matched]
+        elsif scanner.scan(UCHAR)
+          scanner.matched.sub(UCHAR) {[($1 || $2).hex].pack('U*')}
+        else
+          # Scan one character
+          scanner.getch
+        end
+      end
 
-      string
+      buffer
     end
 
     ##
@@ -250,15 +253,15 @@ def read_uriref(intern: false, **options)
         uri.canonicalize! if canonicalize?
         uri
       end
-    rescue ArgumentError => e
+    rescue ArgumentError
       log_error("Invalid URI (found: \"<#{uri_str}>\")", lineno: lineno, token: "<#{uri_str}>", exception: RDF::ReaderError)
     end
 
     ##
     # @return [RDF::Node]
     # @see    http://www.w3.org/TR/rdf-testcases/#ntrip_grammar (nodeID)
     def read_node
-       if node_id = match(NODEID)
+      if node_id = match(NODEID)
         @nodes ||= {}
         @nodes[node_id] ||= RDF::Node.new(node_id)
       end