From b72416e98d1baa213c7143b862c35f3a1fa07f86 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Fri, 6 Aug 2021 12:46:38 -0700
Subject: [PATCH 1/8] Remove unused travis config.

---
 .travis.yml | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 .travis.yml
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 35096bf..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-language: ruby
-script: "bundle exec rspec spec"
-env:
-  - CI=true
-  global:
-    - NOKOGIRI_USE_SYSTEM_LIBRARIES=true
-rvm:
-  - 2.4
-  - 2.5
-  - 2.6
-  - 2.7
-  - ruby-head
-  - jruby
-cache: bundler
-sudo: false
-matrix:
-  allow_failures:
-    - rvm: jruby
-    - rvm: ruby-head
-dist: trusty

From 7178b6516ddbd26c74f0ececc585f4fd6425fba3 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Fri, 6 Aug 2021 12:46:55 -0700
Subject: [PATCH 2/8] Don't use Nokogumbo directly, as it's included in
 Nokogiri >= 1.12.

---
 Gemfile             | 1 -
 README.md           | 2 +-
 lib/ebnf/writer.rb  | 3 +--
 spec/spec_helper.rb | 8 +-------
 4 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/Gemfile b/Gemfile
index 38930f0..54adbe4 100644
--- a/Gemfile
+++ b/Gemfile
@@ -6,7 +6,6 @@ gem 'rdf',            github: "ruby-rdf/rdf",       branch: "develop"
 
 group :development do
   gem 'rdf-spec',   github: "ruby-rdf/rdf-spec",    branch: "develop"
-  gem "nokogumbo",  platforms: :mri
   gem "byebug",     platforms: :mri
   gem 'psych',      platforms: [:mri, :rbx]
   gem "redcarpet",  platforms: :mri
diff --git a/README.md b/README.md
index b64e254..8c79112 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@ Inevitably while implementing a parser for some specific grammar, a developer wi
 
 The {EBNF::Writer} class can be used to write parsed grammars out, either as formatted text, or HTML. Because grammars are written from the Abstract Syntax Tree, represented as [S-Expressions][S-Expression], this provides a means of transforming between grammar formats (e.g., W3C [EBNF][] to [ABNF][]), although with some potential loss in semantic fidelity (case-insensitive string matching vs. case-sensitive matching).
 
-The formatted HTML results are designed to be appropriate for including in specifications. If the [Nokogumbo](https://rubygems.org/gems/nokogumbo) gem list available, the resulting HTML encoded grammar will also be validated.
+The formatted HTML results are designed to be appropriate for including in specifications.
 
 ### Parser Errors
 On a parsing failure, and exception is raised with information that may be useful in determining the source of the error.
diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb
index 5cb7e97..df83056 100644
--- a/lib/ebnf/writer.rb
+++ b/lib/ebnf/writer.rb
@@ -181,12 +181,11 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, validate: false,
 
           if validate
             begin
-              require 'nokogumbo'
               # Validate the output HTML
               doc = Nokogiri::HTML5("<!DOCTYPE html>" + html_result, max_errors: 10)
               raise EncodingError, "Errors found in generated HTML:\n  " +
                 doc.errors.map(&:to_s).join("\n  ") unless doc.errors.empty?
-            rescue LoadError
+            rescue LoadError, NoMethodError
               # Skip
             end
           end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index a609097..1ff2ac4 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -9,12 +9,6 @@
 require 'rspec/matchers'
 require 'rspec/its'
 require 'matchers'
-begin
-  have_nokogumbo = true
-  require 'nokogumbo'
-rescue LoadError
-  have_nokogumbo = false
-end
 
 begin
   require 'simplecov'
@@ -47,7 +41,7 @@
 
 RSpec::Matchers.define :be_valid_html do
   match do |actual|
-    return true unless have_nokogumbo
+    return true unless Nokogiri.const_defined?(:HTML5)
     root = Nokogiri::HTML5("<!DOCTYPE html>" + actual, max_parse_errors: 1000)
     @errors = Array(root && root.errors.map(&:to_s))
     @errors.empty?

From a09ca6dcb2108b8cb19b38624ba620f06aae9565 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Wed, 11 Aug 2021 12:28:54 -0700
Subject: [PATCH 3/8] Update CI for coveralls.

---
 .github/workflows/ci.yml | 10 +++++++---
 Gemfile                  |  4 ++--
 spec/spec_helper.rb      | 15 +++++++++++----
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0e1a059..3475da9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,11 +15,11 @@ jobs:
     runs-on: ubuntu-latest
     env:
       CI: true
+      ALLOW_FAILURES: ${{ endsWith(matrix.ruby, 'head') }}
     strategy:
       fail-fast: false
       matrix:
         ruby:
-          - 2.4
           - 2.5
           - 2.6
           - 2.7
@@ -36,5 +36,9 @@ jobs:
       - name: Install dependencies
         run: bundle install --jobs 4 --retry 3
       - name: Run tests
-        run: bundle exec rspec spec
- 
+        run: bundle exec rspec spec || $ALLOW_FAILURES
+      - name: Coveralls GitHub Action
+        uses: coverallsapp/github-action@v1.1.2
+        if: "matrix.ruby == '3.0'"
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/Gemfile b/Gemfile
index 54adbe4..97c28cb 100644
--- a/Gemfile
+++ b/Gemfile
@@ -14,6 +14,6 @@ group :development do
 end
 
 group :development, :test do
-  gem 'simplecov',  platforms: :mri
-  gem 'coveralls',  '~> 0.8', platforms: :mri
+  gem 'simplecov', '~> 0.21',  platforms: :mri
+  gem 'simplecov-lcov', '~> 0.8',  platforms: :mri
 end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 1ff2ac4..c0d22a4 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -12,10 +12,17 @@
 
 begin
   require 'simplecov'
-  require 'coveralls'
+  require 'simplecov-lcov'
+
+  SimpleCov::Formatter::LcovFormatter.config do |config|
+    #Coveralls is coverage by default/lcov. Send info results
+    config.report_with_single_file = true
+    config.single_report_path = 'coverage/lcov.info'
+  end
+
   SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new([
     SimpleCov::Formatter::HTMLFormatter,
-    Coveralls::SimpleCov::Formatter
+    SimpleCov::Formatter::LcovFormatter
   ])
   SimpleCov.start do
     add_filter "/spec/"
@@ -24,6 +31,8 @@
   STDERR.puts "Coverage Skipped: #{e.message}"
 end
 
+require 'ebnf'
+
 ::RSpec.configure do |c|
   c.filter_run focus: true
   c.run_all_when_everything_filtered = true
@@ -52,6 +61,4 @@
   end
 end
 
-require 'ebnf'
-
 PARSED_EBNF_GRAMMAR = EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)), format: :native).freeze
\ No newline at end of file

From 6d2e4bbbe58de63579b89d899786e661019ecf81 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Tue, 24 Aug 2021 11:25:58 -0700
Subject: [PATCH 4/8] Move string unescaping from LL1::Lexter to new Unescape
 module.

---
 lib/ebnf.rb            |  1 +
 lib/ebnf/ll1/lexer.rb  | 52 ++---------------------------
 lib/ebnf/native.rb     |  4 +--
 lib/ebnf/unescape.rb   | 62 +++++++++++++++++++++++++++++++++++
 spec/ll1/lexer_spec.rb | 69 ---------------------------------------
 spec/unescape_spec.rb  | 74 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 141 insertions(+), 121 deletions(-)
 create mode 100644 lib/ebnf/unescape.rb
 create mode 100644 spec/unescape_spec.rb

diff --git a/lib/ebnf.rb b/lib/ebnf.rb
index dce92bd..eb00a0c 100755
--- a/lib/ebnf.rb
+++ b/lib/ebnf.rb
@@ -9,6 +9,7 @@ module EBNF
   autoload :PEG,      "ebnf/peg"
   autoload :Rule,     "ebnf/rule"
   autoload :Terminals,"ebnf/terminals"
+  autoload :Unescape, "ebnf/unescape"
   autoload :Writer,   "ebnf/writer"
   autoload :VERSION,  "ebnf/version"
 
diff --git a/lib/ebnf/ll1/lexer.rb b/lib/ebnf/ll1/lexer.rb
index 19be66c..65520d4 100644
--- a/lib/ebnf/ll1/lexer.rb
+++ b/lib/ebnf/ll1/lexer.rb
@@ -32,60 +32,12 @@ module EBNF::LL1
   # @see https://en.wikipedia.org/wiki/Lexical_analysis
   class Lexer
     include Enumerable
-
-    ESCAPE_CHARS         = {
-      '\\t'   => "\t",  # \u0009 (tab)
-      '\\n'   => "\n",  # \u000A (line feed)
-      '\\r'   => "\r",  # \u000D (carriage return)
-      '\\b'   => "\b",  # \u0008 (backspace)
-      '\\f'   => "\f",  # \u000C (form feed)
-      '\\"'  => '"',    # \u0022 (quotation mark, double quote mark)
-      "\\'"  => '\'',   # \u0027 (apostrophe-quote, single quote mark)
-      '\\\\' => '\\'    # \u005C (backslash)
-    }.freeze
-    ESCAPE_CHAR4        = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze    # \uXXXX
-    ESCAPE_CHAR8        = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze    # \UXXXXXXXX
-    ECHAR               = /\\./u.freeze                        # More liberal unescaping
-    UCHAR               = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
+    include ::EBNF::Unescape
 
     ##
     # @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
     attr_reader :whitespace
 
-    ##
-    # Returns a copy of the given `input` string with all `\uXXXX` and
-    # `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
-    # unescaped UTF-8 character counterparts.
-    #
-    # @param  [String] string
-    # @return [String]
-    # @see    https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
-    def self.unescape_codepoints(string)
-      string = string.dup
-      string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
-
-      # Decode \uXXXX and \UXXXXXXXX code points:
-      string = string.gsub(UCHAR) do |c|
-        s = [(c[2..-1]).hex].pack('U*')
-        s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
-      end
-
-      string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) 
-      string
-    end
-
-    ##
-    # Returns a copy of the given `input` string with all string escape
-    # sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
-    # character counterparts.
-    #
-    # @param  [String] input
-    # @return [String]
-    # @see    https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
-    def self.unescape_string(input)
-      input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
-    end
-
     ##
     # Tokenizes the given `input` string or stream.
     #
@@ -338,7 +290,7 @@ def ==(other)
       # @return [String]
       def unescape(string)
         if @options[:unescape]
-          Lexer.unescape_string(Lexer.unescape_codepoints(string))
+          EBNF::Unescape.unescape(string)
         else
           string
         end
diff --git a/lib/ebnf/native.rb b/lib/ebnf/native.rb
index 664d01c..c3fbb43 100644
--- a/lib/ebnf/native.rb
+++ b/lib/ebnf/native.rb
@@ -287,10 +287,10 @@ def terminal(s)
       case m = s[0,1]
       when '"', "'" # STRING1 or STRING2
         l, s = s[1..-1].split(m.rstrip, 2)
-        [LL1::Lexer.unescape_string(l), s]
+        [Unescape.unescape_string(l), s]
       when '[' # RANGE, O_RANGE
         l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
-        [[:range, LL1::Lexer.unescape_string(l)], s]
+        [[:range, Unescape.unescape_string(l)], s]
       when '#' # HEX
         s.match(/(#x\h+)(.*)$/)
         l, s = $1, $2
diff --git a/lib/ebnf/unescape.rb b/lib/ebnf/unescape.rb
new file mode 100644
index 0000000..e501608
--- /dev/null
+++ b/lib/ebnf/unescape.rb
@@ -0,0 +1,62 @@
+# encoding: utf-8
+# Unsecape strings
+module EBNF::Unescape
+  ESCAPE_CHARS         = {
+    '\\t'   => "\t",  # \u0009 (tab)
+    '\\n'   => "\n",  # \u000A (line feed)
+    '\\r'   => "\r",  # \u000D (carriage return)
+    '\\b'   => "\b",  # \u0008 (backspace)
+    '\\f'   => "\f",  # \u000C (form feed)
+    '\\"'  => '"',    # \u0022 (quotation mark, double quote mark)
+    "\\'"  => '\'',   # \u0027 (apostrophe-quote, single quote mark)
+    '\\\\' => '\\'    # \u005C (backslash)
+  }.freeze
+  ESCAPE_CHAR4        = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze    # \uXXXX
+  ESCAPE_CHAR8        = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze    # \UXXXXXXXX
+  ECHAR               = /\\./u.freeze                        # More liberal unescaping
+  UCHAR               = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
+
+  ##
+  # Returns a copy of the given `input` string with all `\uXXXX` and
+  # `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
+  # unescaped UTF-8 character counterparts.
+  #
+  # @param  [String] string
+  # @return [String]
+  # @see    https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
+  def unescape_codepoints(string)
+    string = string.dup
+    string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
+
+    # Decode \uXXXX and \UXXXXXXXX code points:
+    string = string.gsub(UCHAR) do |c|
+      s = [(c[2..-1]).hex].pack('U*')
+      s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
+    end
+
+    string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) 
+    string
+  end
+  module_function :unescape_codepoints
+
+  ##
+  # Returns a copy of the given `input` string with all string escape
+  # sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
+  # character counterparts.
+  #
+  # @param  [String] input
+  # @return [String]
+  # @see    https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
+  def unescape_string(input)
+    input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
+  end
+  module_function :unescape_string
+
+  # Perform string and codepoint unescaping if defined for this terminal
+  # @param [String] string
+  # @return [String]
+  def unescape(string)
+    unescape_string(unescape_codepoints(string))
+  end
+  module_function :unescape
+end
\ No newline at end of file
diff --git a/spec/ll1/lexer_spec.rb b/spec/ll1/lexer_spec.rb
index 76354f9..845feb0 100644
--- a/spec/ll1/lexer_spec.rb
+++ b/spec/ll1/lexer_spec.rb
@@ -25,75 +25,6 @@
     :STRING_LITERAL_QUOTE, :STRING_LITERAL_SINGLE_QUOTE,
     :STRING_LITERAL_LONG_SINGLE_QUOTE, :STRING_LITERAL_LONG_QUOTE
   ]}
-  
-  describe ".unescape_codepoints" do
-    # @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
-
-    it "unescapes \\uXXXX codepoint escape sequences" do
-      inputs = {
-        %q(\\u0020)       => %q( ),
-        %q(<ab\\u00E9xy>) => %Q(<ab\xC3\xA9xy>),
-        %q(\\u03B1:a)     => %Q(\xCE\xB1:a),
-        %q(a\\u003Ab)     => %Q(a\x3Ab),
-      }
-      inputs.each do |input, output|
-        output.force_encoding(Encoding::UTF_8)
-        expect(EBNF::LL1::Lexer.unescape_codepoints(input)).to eq output
-      end
-    end
-
-    it "unescapes \\UXXXXXXXX codepoint escape sequences" do
-      inputs = {
-        %q(\\U00000020)   => %q( ),
-        %q(\\U00010000)   => %Q(\xF0\x90\x80\x80),
-        %q(\\U000EFFFF)   => %Q(\xF3\xAF\xBF\xBF),
-      }
-      inputs.each do |input, output|
-        output.force_encoding(Encoding::UTF_8)
-        expect(EBNF::LL1::Lexer.unescape_codepoints(input)).to eq output
-      end
-    end
-
-    context "escaped strings" do
-      {
-        'Dürst' => 'D\\u00FCrst',
-        "é" => '\\u00E9',
-        "€" => '\\u20AC',
-        "resumé" => 'resum\\u00E9',
-      }.each_pair do |unescaped, escaped|
-        it "unescapes #{unescaped.inspect}" do
-          expect(EBNF::LL1::Lexer.unescape_codepoints(escaped)).to eq unescaped
-        end
-      end
-    end
-  end
-
-  describe ".unescape_string" do
-    # @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
-
-    context "escape sequences" do
-      EBNF::LL1::Lexer::ESCAPE_CHARS.each do |escaped, unescaped|
-        it "unescapes #{unescaped.inspect}" do
-          expect(EBNF::LL1::Lexer.unescape_string(escaped)).to eq unescaped
-        end
-      end
-    end
-    
-    context "escaped strings" do
-      {
-        'simple literal' => 'simple literal',
-        'backslash:\\' => 'backslash:\\\\',
-        'dquote:"' => 'dquote:\\"',
-        "newline:\n" => 'newline:\\n',
-        "return\r" => 'return\\r',
-        "tab:\t" => 'tab:\\t',
-      }.each_pair do |unescaped, escaped|
-        it "unescapes #{unescaped.inspect}" do
-          expect(EBNF::LL1::Lexer.unescape_string(escaped)).to eq unescaped
-        end
-      end
-    end
-  end
 
   describe ".tokenize" do
     context "numeric literals" do
diff --git a/spec/unescape_spec.rb b/spec/unescape_spec.rb
new file mode 100644
index 0000000..11416bb
--- /dev/null
+++ b/spec/unescape_spec.rb
@@ -0,0 +1,74 @@
+# coding: utf-8
+$:.unshift ".."
+require 'spec_helper'
+require 'ebnf'
+
+describe EBNF::Unescape do
+  
+  describe ".unescape_codepoints" do
+    # @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
+
+    it "unescapes \\uXXXX codepoint escape sequences" do
+      inputs = {
+        %q(\\u0020)       => %q( ),
+        %q(<ab\\u00E9xy>) => %Q(<ab\xC3\xA9xy>),
+        %q(\\u03B1:a)     => %Q(\xCE\xB1:a),
+        %q(a\\u003Ab)     => %Q(a\x3Ab),
+      }
+      inputs.each do |input, output|
+        expect(EBNF::Unescape.unescape_codepoints(input)).to eq output
+      end
+    end
+
+    it "unescapes \\UXXXXXXXX codepoint escape sequences" do
+      inputs = {
+        %q(\\U00000020)   => %q( ),
+        %q(\\U00010000)   => %Q(\xF0\x90\x80\x80),
+        %q(\\U000EFFFF)   => %Q(\xF3\xAF\xBF\xBF),
+      }
+      inputs.each do |input, output|
+        expect(EBNF::Unescape.unescape_codepoints(input)).to eq output
+      end
+    end
+
+    context "escaped strings" do
+      {
+        'Dürst' => 'D\\u00FCrst',
+        "é" => '\\u00E9',
+        "€" => '\\u20AC',
+        "resumé" => 'resum\\u00E9',
+      }.each_pair do |unescaped, escaped|
+        it "unescapes #{unescaped.inspect}" do
+          expect(EBNF::Unescape.unescape_codepoints(escaped)).to eq unescaped
+        end
+      end
+    end
+  end
+
+  describe ".unescape_string" do
+    # @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
+
+    context "escape sequences" do
+      EBNF::Unescape::ESCAPE_CHARS.each do |escaped, unescaped|
+        it "unescapes #{unescaped.inspect}" do
+          expect(EBNF::Unescape.unescape_string(escaped)).to eq unescaped
+        end
+      end
+    end
+    
+    context "escaped strings" do
+      {
+        'simple literal' => 'simple literal',
+        'backslash:\\' => 'backslash:\\\\',
+        'dquote:"' => 'dquote:\\"',
+        "newline:\n" => 'newline:\\n',
+        "return\r" => 'return\\r',
+        "tab:\t" => 'tab:\\t',
+      }.each_pair do |unescaped, escaped|
+        it "unescapes #{unescaped.inspect}" do
+          expect(EBNF::Unescape.unescape_string(escaped)).to eq unescaped
+        end
+      end
+    end
+  end
+end
\ No newline at end of file

From 26e69d8dd7e0c048bdcf79fbd0bf360ad83469d9 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Tue, 24 Aug 2021 12:59:31 -0700
Subject: [PATCH 5/8] Implement :map and :unescape options to PEG terminals.

---
 lib/ebnf/peg/parser.rb | 21 ++++++++++++++++++++-
 lib/ebnf/peg/rule.rb   | 15 +++++++++++++--
 spec/peg/rule_spec.rb  |  2 +-
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb
index 95f635f..39bc140 100644
--- a/lib/ebnf/peg/parser.rb
+++ b/lib/ebnf/peg/parser.rb
@@ -55,6 +55,7 @@ def start_options; (@start_hoptions ||= {}); end
       def production_handlers; (@production_handlers ||= {}); end
       def terminal_handlers; (@terminal_handlers ||= {}); end
       def terminal_regexps; (@terminal_regexps ||= {}); end
+      def terminal_options; (@terminal_options ||= {}); end
 
       ##
       # Defines the pattern for a terminal node and a block to be invoked
@@ -75,6 +76,8 @@ def terminal_regexps; (@terminal_regexps ||= {}); end
       # @option options [Hash{String => String}] :map ({})
       #   A mapping from terminals, in lower-case form, to
       #   their canonical value
+      # @option options [Boolean] :unescape
+      #   Cause strings and codepoints to be unescaped.
       # @yield [value, prod]
       # @yieldparam [String] value
       #   The scanned terminal value.
@@ -83,9 +86,11 @@ def terminal_regexps; (@terminal_regexps ||= {}); end
       # @yieldparam [Proc] block
       #   Block passed to initialization for yielding to calling parser.
       #   Should conform to the yield specs for #initialize
+      # @todo FIXME implement map and unescape
       def terminal(term, regexp = nil, **options, &block)
         terminal_regexps[term] = regexp if regexp
         terminal_handlers[term] = block if block_given?
+        terminal_options[term] = options
       end
 
       ##
@@ -180,8 +185,12 @@ def method_missing(method, *args, &block)
     #   Identify the symbol of the starting rule with `start`.
     # @param  [Hash{Symbol => Object}] options
     # @option options[Integer] :high_water passed to lexer
+    # @option options[:upper, :lower] :insensitive_strings
+    #   Perform case-insensitive match of strings not defined as terminals, and map to either upper or lower case.
     # @option options [Logger] :logger for errors/progress/debug.
     # @option options[Integer] :low_water passed to lexer
+    # @option options[Boolean] :seq_hash (false)
+    #   If `true`, sets the default for the value sent to a production handler that is for a `seq` to a hash composed of the flattened consitutent hashes that are otherwise provided.
     # @option options [Symbol, Regexp] :whitespace 
     #   Symbol of whitespace rule (defaults to `@pass`), or a regular expression
     #   for eating whitespace between non-terminal rules (strongly encouraged).
@@ -195,6 +204,7 @@ def method_missing(method, *args, &block)
     # @raise [Exception] Raises exceptions for parsing errors
     #   or errors raised during processing callbacks. Internal
     #   errors are raised using {Error}.
+    # @todo FIXME implement insensitive_strings and seq_hash
     def parse(input = nil, start = nil, rules = nil, **options, &block)
       start ||= options[:start]
       rules ||= options[:rules] || []
@@ -467,10 +477,19 @@ def find_rule(sym)
     #
     # @param [Symbol] sym
     # @return [Regexp]
-    def find_terminal_regexp(sym)
+    def terminal_regexp(sym)
       self.class.terminal_regexps[sym]
     end
 
+    ##
+    # Find a regular expression defined for a terminal
+    #
+    # @param [Symbol] sym
+    # @return [Regexp]
+    def terminal_options(sym)
+      self.class.terminal_options[sym]
+    end
+
     ##
     # Record furthest failure.
     #
diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb
index 115ba51..3626464 100644
--- a/lib/ebnf/peg/rule.rb
+++ b/lib/ebnf/peg/rule.rb
@@ -1,6 +1,8 @@
 module EBNF::PEG
   # Behaviior for parsing a PEG rule
   module Rule
+    include ::EBNF::Unescape
+
     ##
     # Initialized by parser when loading rules.
     # Used for finding rules and invoking elements of the parse process.
@@ -45,9 +47,18 @@ def parse(input)
         # If the terminal is defined with a regular expression,
         # use that to match the input,
         # otherwise,
-        if regexp = parser.find_terminal_regexp(sym)
-          matched = input.scan(regexp)
+        if regexp = parser.terminal_regexp(sym)
+          term_opts = parser.terminal_options(sym)
+          if matched = input.scan(regexp)
+            # Optionally map matched
+            matched = term_opts.fetch(:map, {}).fetch(matched.downcase, matched)
+
+            # Optionally unescape matched
+            matched = unescape(matched) if term_opts[:unescape]
+          end
+
           result = parser.onTerminal(sym, (matched ? matched : :unmatched))
+
           # Update furthest failure for strings and terminals
           parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
           parser.packrat[sym][pos] = {
diff --git a/spec/peg/rule_spec.rb b/spec/peg/rule_spec.rb
index 783f887..edd86d7 100644
--- a/spec/peg/rule_spec.rb
+++ b/spec/peg/rule_spec.rb
@@ -449,7 +449,7 @@
           expect(parser).to receive(:onStart).with(Symbol).and_return({})
           expect(parser).to receive(:onFinish).with(params[:expect]).and_return(params[:expect])
           expect(parser).not_to receive(:onTerminal)
-          expect(parser).to receive(:find_terminal_regexp).with(:rule)
+          expect(parser).to receive(:terminal_regexp).with(:rule)
 
           expect(rule.parse(EBNF::LL1::Scanner.new(params[:input]))).to eql(params[:expect])
         end

From 343951cccc20ed6cf82df3a164359315c751078a Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Tue, 24 Aug 2021 13:49:16 -0700
Subject: [PATCH 6/8] Allow start_production for PEG to specify case
 insensitive_strings, mapping to either :upper or :lower.

---
 lib/ebnf/peg/parser.rb  | 10 +++-------
 lib/ebnf/peg/rule.rb    | 20 +++++++++++---------
 spec/peg/parser_spec.rb | 24 ++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb
index 39bc140..65307a7 100644
--- a/lib/ebnf/peg/parser.rb
+++ b/lib/ebnf/peg/parser.rb
@@ -73,9 +73,6 @@ def terminal_options; (@terminal_options ||= {}); end
       #   defaults to the expression defined in the associated rule.
       #   If unset, the terminal rule is used for matching.
       # @param [Hash] options
-      # @option options [Hash{String => String}] :map ({})
-      #   A mapping from terminals, in lower-case form, to
-      #   their canonical value
       # @option options [Boolean] :unescape
       #   Cause strings and codepoints to be unescaped.
       # @yield [value, prod]
@@ -86,7 +83,6 @@ def terminal_options; (@terminal_options ||= {}); end
       # @yieldparam [Proc] block
       #   Block passed to initialization for yielding to calling parser.
       #   Should conform to the yield specs for #initialize
-      # @todo FIXME implement map and unescape
       def terminal(term, regexp = nil, **options, &block)
         terminal_regexps[term] = regexp if regexp
         terminal_handlers[term] = block if block_given?
@@ -105,6 +101,8 @@ def terminal(term, regexp = nil, **options, &block)
       #   Options which are returned from {Parser#onStart}.
       # @option options [Boolean] :as_hash (false)
       #   If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence.
+      # @option options[:upper, :lower] :insensitive_strings
+      #   Perform case-insensitive match of strings not defined as terminals, and map to either upper or lower case.
       # @yield [data, block]
       # @yieldparam [Hash] data
       #   A Hash defined for the current production, during :start
@@ -185,8 +183,6 @@ def method_missing(method, *args, &block)
     #   Identify the symbol of the starting rule with `start`.
     # @param  [Hash{Symbol => Object}] options
     # @option options[Integer] :high_water passed to lexer
-    # @option options[:upper, :lower] :insensitive_strings
-    #   Perform case-insensitive match of strings not defined as terminals, and map to either upper or lower case.
     # @option options [Logger] :logger for errors/progress/debug.
     # @option options[Integer] :low_water passed to lexer
     # @option options[Boolean] :seq_hash (false)
@@ -204,7 +200,7 @@ def method_missing(method, *args, &block)
     # @raise [Exception] Raises exceptions for parsing errors
     #   or errors raised during processing callbacks. Internal
     #   errors are raised using {Error}.
-    # @todo FIXME implement insensitive_strings and seq_hash
+    # @todo FIXME implement seq_hash
     def parse(input = nil, start = nil, rules = nil, **options, &block)
       start ||= options[:start]
       rules ||= options[:rules] || []
diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb
index 3626464..dec0921 100644
--- a/lib/ebnf/peg/rule.rb
+++ b/lib/ebnf/peg/rule.rb
@@ -72,6 +72,7 @@ def parse(input)
         eat_whitespace(input)
       end
       start_options = parser.onStart(sym)
+      string_regexp_opts = start_options[:insensitive_strings] ? Regexp::IGNORECASE : 0
 
       result = case expr.first
       when :alt
@@ -85,7 +86,7 @@ def parse(input)
             raise "No rule found for #{prod}" unless rule
             rule.parse(input)
           when String
-            input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
+            input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) || :unmatched
           end
           if alt == :unmatched
             # Update furthest failure for strings and terminals
@@ -123,7 +124,7 @@ def parse(input)
           raise "No rule found for #{prod}" unless rule
           rule.parse(input)
         when String
-          input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
+          input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) || :unmatched
         end
         if res != :unmatched
           # Update furthest failure for terminals
@@ -134,7 +135,7 @@ def parse(input)
         end
       when :opt
         # Result is the matched value or nil
-        opt = rept(input, 0, 1, expr[1])
+        opt = rept(input, 0, 1, expr[1], string_regexp_opts)
 
         # Update furthest failure for strings and terminals
         parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
@@ -142,7 +143,7 @@ def parse(input)
       when :plus
         # Result is an array of all expressions while they match,
         # at least one must match
-        plus = rept(input, 1, '*', expr[1])
+        plus = rept(input, 1, '*', expr[1], string_regexp_opts)
 
         # Update furthest failure for strings and terminals
         parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
@@ -157,7 +158,7 @@ def parse(input)
       when :rept
         # Result is an array of all expressions while they match,
         # an empty array of none match
-        rept = rept(input, expr[1], expr[2], expr[3])
+        rept = rept(input, expr[1], expr[2], expr[3], string_regexp_opts)
 
         # # Update furthest failure for strings and terminals
         parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal?
@@ -172,7 +173,7 @@ def parse(input)
             raise "No rule found for #{prod}" unless rule
             rule.parse(input)
           when String
-            input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
+            input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) || :unmatched
           end
           if res == :unmatched
             # Update furthest failure for strings and terminals
@@ -193,7 +194,7 @@ def parse(input)
       when :star
         # Result is an array of all expressions while they match,
         # an empty array of none match
-        star = rept(input, 0, '*', expr[1])
+        star = rept(input, 0, '*', expr[1], string_regexp_opts)
 
         # Update furthest failure for strings and terminals
         parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
@@ -225,8 +226,9 @@ def parse(input)
     # @param [Integer] max
     #   If it is an integer, it stops matching after max entries.
     # @param [Symbol, String] prod
+    # @param [Integer] string_regexp_opts
     # @return [:unmatched, Array]
-    def rept(input, min, max, prod)
+    def rept(input, min, max, prod, string_regexp_opts)
       result = []
 
       case prod
@@ -238,7 +240,7 @@ def rept(input, min, max, prod)
           result << res
         end
       when String
-        while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
+        while (res = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))) && (max == '*' || result.length < max)
           eat_whitespace(input) unless terminal?
           result << res
         end
diff --git a/spec/peg/parser_spec.rb b/spec/peg/parser_spec.rb
index 5fffbc5..74507fd 100644
--- a/spec/peg/parser_spec.rb
+++ b/spec/peg/parser_spec.rb
@@ -11,6 +11,8 @@ class PegParserTest
   before(:all) {
     PegParserTest.start_production(:term) {"foo"}
     PegParserTest.production(:term) {"foo"}
+    PegParserTest.start_production(:toLower) {|value| value}
+    PegParserTest.start_production(:toUpper) {|value| value}
     PegParserTest.terminal(:escape, /escape/) {"foo"}
     PegParserTest.terminal(:unescape, /unescape/, unescape: true) {"foo"}
   }
@@ -94,6 +96,28 @@ class PegParserTest
       end
     end
 
+    context "case insensitive string matching" do
+      let(:start) {:expression}
+      let(:grammar) {%{(
+        (rule expression "1" (alt upper lower))
+        (rule upper "2" (seq "uPpEr"))
+        (rule lower "3" (seq "LoWeR"))
+      )}}
+      let(:rules) {EBNF.parse(grammar, format: :sxp).make_peg.ast}
+
+      {
+        "UPPER" => "UPPER",
+        "upper" => "UPPER",
+        "LOWER" => "lower",
+        "lower" => "lower",
+      }.each do |input, expected|
+        it "parses #{input.inspect} to #{expected.inspect}" do
+          output = PegParserTest.new.parse(input, start, rules, debug: 3, logger: logger)
+          expect(output).to produce(expected, logger)
+        end
+      end
+    end
+
     context "with backtracking" do
       let(:start) {:expression}
       let(:grammar) {%{(

From cf89c77f444ef0599ffb58539d02e6204375178b Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Wed, 25 Aug 2021 14:33:58 -0700
Subject: [PATCH 7/8] Acdtually do transformation of matched string when
 insensitive_strings is set.

---
 lib/ebnf/peg/parser.rb  |  2 +-
 lib/ebnf/peg/rule.rb    | 24 +++++++++++++++++++-----
 spec/peg/parser_spec.rb | 20 ++++++++++----------
 3 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb
index 65307a7..79a7270 100644
--- a/lib/ebnf/peg/parser.rb
+++ b/lib/ebnf/peg/parser.rb
@@ -86,7 +86,7 @@ def terminal_options; (@terminal_options ||= {}); end
       def terminal(term, regexp = nil, **options, &block)
         terminal_regexps[term] = regexp if regexp
         terminal_handlers[term] = block if block_given?
-        terminal_options[term] = options
+        terminal_options[term] = options.freeze
       end
 
       ##
diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb
index dec0921..305543a 100644
--- a/lib/ebnf/peg/rule.rb
+++ b/lib/ebnf/peg/rule.rb
@@ -86,7 +86,12 @@ def parse(input)
             raise "No rule found for #{prod}" unless rule
             rule.parse(input)
           when String
-            input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) || :unmatched
+            s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
+            case start_options[:insensitive_strings]
+            when :lower then s && s.downcase
+            when :upper then s && s.upcase
+            else s
+            end || :unmatched
           end
           if alt == :unmatched
             # Update furthest failure for strings and terminals
@@ -135,7 +140,7 @@ def parse(input)
         end
       when :opt
         # Result is the matched value or nil
-        opt = rept(input, 0, 1, expr[1], string_regexp_opts)
+        opt = rept(input, 0, 1, expr[1], string_regexp_opts, **start_options)
 
         # Update furthest failure for strings and terminals
         parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
@@ -173,7 +178,12 @@ def parse(input)
             raise "No rule found for #{prod}" unless rule
             rule.parse(input)
           when String
-            input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) || :unmatched
+            s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
+            case start_options[:insensitive_strings]
+            when :lower then s && s.downcase
+            when :upper then s && s.upcase
+            else s
+            end || :unmatched
           end
           if res == :unmatched
             # Update furthest failure for strings and terminals
@@ -228,7 +238,7 @@ def parse(input)
     # @param [Symbol, String] prod
     # @param [Integer] string_regexp_opts
     # @return [:unmatched, Array]
-    def rept(input, min, max, prod, string_regexp_opts)
+    def rept(input, min, max, prod, string_regexp_opts, **options)
       result = []
 
       case prod
@@ -242,7 +252,11 @@ def rept(input, min, max, prod, string_regexp_opts)
       when String
         while (res = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))) && (max == '*' || result.length < max)
           eat_whitespace(input) unless terminal?
-          result << res
+          result << case options[:insensitive_strings]
+          when :lower then res.downcase
+          when :upper then res.upcase
+          else res
+          end
         end
       end
 
diff --git a/spec/peg/parser_spec.rb b/spec/peg/parser_spec.rb
index 74507fd..9d31d3d 100644
--- a/spec/peg/parser_spec.rb
+++ b/spec/peg/parser_spec.rb
@@ -11,8 +11,8 @@ class PegParserTest
   before(:all) {
     PegParserTest.start_production(:term) {"foo"}
     PegParserTest.production(:term) {"foo"}
-    PegParserTest.start_production(:toLower) {|value| value}
-    PegParserTest.start_production(:toUpper) {|value| value}
+    PegParserTest.start_production(:toLower, insensitive_strings: :lower) {|value| value}
+    PegParserTest.start_production(:toUpper, insensitive_strings: :upper) {|value| value}
     PegParserTest.terminal(:escape, /escape/) {"foo"}
     PegParserTest.terminal(:unescape, /unescape/, unescape: true) {"foo"}
   }
@@ -24,7 +24,7 @@ class PegParserTest
   describe "ClassMethods" do
     describe "production" do
       it "adds as a start_handler" do
-        expect(PegParserTest.start_handlers.keys).to eq [:term]
+        expect(PegParserTest.start_handlers.keys).to eq [:term, :toLower, :toUpper]
         expect(PegParserTest.start_handlers[:term]).to be_a(Proc)
       end
       it "adds as a production_handler" do
@@ -99,17 +99,17 @@ class PegParserTest
     context "case insensitive string matching" do
       let(:start) {:expression}
       let(:grammar) {%{(
-        (rule expression "1" (alt upper lower))
-        (rule upper "2" (seq "uPpEr"))
-        (rule lower "3" (seq "LoWeR"))
+        (rule expression "1" (alt toUpper toLower))
+        (rule toUpper "2" (seq "uPpEr"))
+        (rule toLower "3" (seq "LoWeR"))
       )}}
       let(:rules) {EBNF.parse(grammar, format: :sxp).make_peg.ast}
 
       {
-        "UPPER" => "UPPER",
-        "upper" => "UPPER",
-        "LOWER" => "lower",
-        "lower" => "lower",
+        "UPPER" => [{uPpEr: "UPPER"}],
+        "upper" => [{uPpEr: "UPPER"}],
+        "LOWER" => [{LoWeR: "lower"}],
+        "lower" => [{LoWeR: "lower"}],
       }.each do |input, expected|
         it "parses #{input.inspect} to #{expected.inspect}" do
           output = PegParserTest.new.parse(input, start, rules, debug: 3, logger: logger)

From f1c081921786b2b2ef0255ee145b6361bd49c848 Mon Sep 17 00:00:00 2001
From: Gregg Kellogg <gregg@greggkellogg.net>
Date: Wed, 25 Aug 2021 14:38:28 -0700
Subject: [PATCH 8/8] Version 2,2,0.

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index ac2cdeb..ccbccc3 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.1.3
+2.2.0