diff --git a/.coveralls.yml b/.coveralls.yml new file mode 100644 index 0000000..a67b3d9 --- /dev/null +++ b/.coveralls.yml @@ -0,0 +1 @@ +repo_token: nZpCGeEQmrpsuh3Er4qfKarr00VUGU5Lx diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0e1a059 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,40 @@ +# This workflow runs continuous CI across different versions of ruby on all branches and pull requests to develop. + +name: CI +on: + push: + branches: [ '**' ] + pull_request: + branches: [ develop ] + workflow_dispatch: + +jobs: + tests: + name: Ruby ${{ matrix.ruby }} + if: "contains(github.event.commits[0].message, '[ci skip]') == false" + runs-on: ubuntu-latest + env: + CI: true + strategy: + fail-fast: false + matrix: + ruby: + - 2.4 + - 2.5 + - 2.6 + - 2.7 + - 3.0 + - ruby-head + - jruby + steps: + - name: Clone repository + uses: actions/checkout@v2 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + - name: Install dependencies + run: bundle install --jobs 4 --retry 3 + - name: Run tests + run: bundle exec rspec spec + diff --git a/.travis.yml b/.travis.yml index 46eb519..35096bf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,10 +9,12 @@ rvm: - 2.5 - 2.6 - 2.7 + - ruby-head - jruby cache: bundler sudo: false matrix: allow_failures: - rvm: jruby + - rvm: ruby-head dist: trusty diff --git a/Gemfile b/Gemfile index 16b7c06..38930f0 100644 --- a/Gemfile +++ b/Gemfile @@ -6,6 +6,7 @@ gem 'rdf', github: "ruby-rdf/rdf", branch: "develop" group :development do gem 'rdf-spec', github: "ruby-rdf/rdf-spec", branch: "develop" + gem "nokogumbo", platforms: :mri gem "byebug", platforms: :mri gem 'psych', platforms: [:mri, :rbx] gem "redcarpet", platforms: :mri @@ -16,5 +17,4 @@ end group :development, :test do gem 'simplecov', platforms: :mri gem 'coveralls', '~> 0.8', platforms: :mri - gem 'awesome_print', github: 'MatthiasWinkelmann/awesome_print' end diff --git a/README.md b/README.md index 8a02e3a..b64e254 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,9 @@ [EBNF][] parser and generic parser generator. [![Gem Version](https://badge.fury.io/rb/ebnf.png)](https://badge.fury.io/rb/ebnf) -[![Build Status](https://secure.travis-ci.org/dryruby/ebnf.png?branch=master)](https://travis-ci.org/dryruby/ebnf) -[![Coverage Status](https://coveralls.io/repos/dryruby/ebnf/badge.svg)](https://coveralls.io/r/dryruby/ebnf) +[![Build Status](https://github.com/dryruby/ebnf/workflows/CI/badge.svg?branch=develop)](https://github.com/dryruby/ebnf/actions?query=workflow%3ACI) +[![Coverage Status](https://coveralls.io/repos/dryruby/ebnf/badge.svg?branch=develop)](https://coveralls.io/r/dryruby/ebnf?branch=develop) +[![Gitter chat](https://badges.gitter.im/ruby-rdf/rdf.png)](https://gitter.im/ruby-rdf/rdf) ## Description This is a [Ruby][] implementation of an [EBNF][] and [BNF][] parser and parser generator. @@ -92,7 +93,7 @@ Inevitably while implementing a parser for some specific grammar, a developer wi The {EBNF::Writer} class can be used to write parsed grammars out, either as formatted text, or HTML. Because grammars are written from the Abstract Syntax Tree, represented as [S-Expressions][S-Expression], this provides a means of transforming between grammar formats (e.g., W3C [EBNF][] to [ABNF][]), although with some potential loss in semantic fidelity (case-insensitive string matching vs. case-sensitive matching). -The formatted HTML results are designed to be appropriate for including in specifications. +The formatted HTML results are designed to be appropriate for including in specifications. If the [Nokogumbo](https://rubygems.org/gems/nokogumbo) gem list available, the resulting HTML encoded grammar will also be validated. ### Parser Errors On a parsing failure, and exception is raised with information that may be useful in determining the source of the error. diff --git a/Rakefile b/Rakefile old mode 100644 new mode 100755 diff --git a/VERSION b/VERSION index eca07e4..ac2cdeb 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.2 +2.1.3 diff --git a/bin/ebnf b/bin/ebnf index ece8d9a..fca115d 100755 --- a/bin/ebnf +++ b/bin/ebnf @@ -34,7 +34,7 @@ OPT_ARGS = [ ["--prefix", "-p", GetoptLong::REQUIRED_ARGUMENT,"Prefix to use when generating Turtle"], ["--progress", "-v", GetoptLong::NO_ARGUMENT, "Detail on execution"], ["--renumber", GetoptLong::NO_ARGUMENT, "Renumber parsed reules"], - ["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar"], + ["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar and any generated HTML"], ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] ] def usage @@ -67,7 +67,7 @@ opts.each do |opt, arg| end options[:format] = arg.to_sym when '--format' - unless %w(abnf abnfh ebnf html isoebnf isoebnfh rb sxp).include?(arg) + unless %w(abnf abnfh ebnf html isoebnf isoebnfh rb sxp ttl).include?(arg) STDERR.puts("unrecognized output format #{arg}") usage end @@ -99,11 +99,11 @@ ebnf.renumber! if options[:renumber] res = case options[:output_format] when :abnf then ebnf.to_s(format: :abnf) -when :abnfh then ebnf.to_html(format: :abnf) +when :abnfh then ebnf.to_html(format: :abnf, validate: options[:validate]) when :ebnf then ebnf.to_s -when :html then ebnf.to_html +when :html then ebnf.to_html(validate: options[:validate]) when :isoebnf then ebnf.to_s(format: :isoebnf) -when :isoebnfh then ebnf.to_html(format: :isoebnf) +when :isoebnfh then ebnf.to_html(format: :isoebnf, validate: options[:validate]) when :sxp then ebnf.to_sxp when :ttl then ebnf.to_ttl(options[:prefix], options[:namespace]) when :rb then ebnf.to_ruby(out, grammarFile: ARGV[0], **options) diff --git a/ebnf.gemspec b/ebnf.gemspec index 31dd18d..f1e2ecd 100755 --- a/ebnf.gemspec +++ b/ebnf.gemspec @@ -8,7 +8,7 @@ Gem::Specification.new do |gem| gem.name = "ebnf" gem.homepage = "https://github.com/dryruby/ebnf" gem.license = 'Unlicense' - gem.summary = "EBNF parser and parser generator." + gem.summary = "EBNF parser and parser generator in Ruby." gem.description = %q{EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF.} gem.authors = ['Gregg Kellogg'] @@ -28,11 +28,13 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency 'scanf', '~> 1.0' gem.add_runtime_dependency 'rdf', '~> 3.1' # Required by sxp gem.add_runtime_dependency 'htmlentities', '~> 4.3' + gem.add_runtime_dependency 'unicode-types', '~> 1.6' + gem.add_runtime_dependency 'amazing_print', '~> 1.2' gem.add_development_dependency 'rdf-spec', '~> 3.1' gem.add_development_dependency 'rdf-turtle', '~> 3.1' gem.add_development_dependency 'nokogiri', '~> 1.10' gem.add_development_dependency 'erubis', '~> 2.7' - gem.add_development_dependency 'rspec', '~> 3.9' + gem.add_development_dependency 'rspec', '~> 3.10' gem.add_development_dependency 'rspec-its', '~> 1.3' gem.add_development_dependency 'yard', '~> 0.9' gem.add_development_dependency 'rake', '~> 13.0' diff --git a/etc/doap.ttl b/etc/doap.ttl index 5a4f5c6..ae73f46 100644 --- a/etc/doap.ttl +++ b/etc/doap.ttl @@ -12,7 +12,7 @@ doap:name "ebnf" ; doap:homepage ; doap:license ; - doap:shortdesc "EBNF parser and parser generator"@en ; + doap:shortdesc "EBNF parser and parser generator in Ruby."@en ; doap:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF."@en ; doap:created "2011-08-29"^^xsd:date ; doap:programming-language "Ruby" ; @@ -34,7 +34,4 @@ doap:maintainer ; doap:documenter ; foaf:maker ; - dc:title "ebnf" ; - dc:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF."@en ; - dc:date "2011-08-29"^^xsd:date ; dc:creator . diff --git a/etc/ebnf.html b/etc/ebnf.html index d1c4316..da753ae 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -11,7 +11,7 @@ [2] declaration ::= - "@terminals" | pass + "@terminals" | pass [3] @@ -53,61 +53,24 @@ [9] primary ::= - HEX - - - [9] - - | - SYMBOL - - - [9] - - | - O_RANGE - - - [9] - - | - RANGE - - - [9] - - | - STRING1 - - - [9] - - | - STRING2 - - - [9] - - | - ( "(" expression ")") + HEX | SYMBOL | O_RANGE | RANGE | STRING1 | STRING2 | ( "(" expression ")") [10] pass ::= - "@pass" expression + "@pass" expression - - @terminals - + + @terminals - Productions for terminals + # Productions for terminals [11] LHS ::= - ( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* "::=" + ( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* "::=" [12] @@ -119,91 +82,37 @@ [13] HEX ::= - "#x" ( [ a-f] | [ A-F] | [ 0-9] ) + + "#x" ( [ a-f] | [ A-F] | [ 0-9] ) + [14] RANGE ::= - "[" - - - [14] - - - ( ( R_CHAR "-" R_CHAR)( HEX "-" HEX) | R_CHAR | HEX) + - - - [14] - - - "-"? - - - [14] - - - ( "]" - LHS) + "[" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) | R_CHAR | HEX) + "-"? ( "]" - LHS) [15] O_RANGE ::= - "[^" - - - [15] - - - ( ( R_CHAR "-" R_CHAR)( HEX "-" HEX) | R_CHAR | HEX) + - - - [15] - - - "-"? - - - [15] - - - "]" + "[^" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) | R_CHAR | HEX) + "-"? "]" [16] STRING1 ::= - '"' ( CHAR - '"') * '"' + '"' ( CHAR - '"') * '"' [17] STRING2 ::= - "'" ( CHAR - "'") * "'" + "'" ( CHAR - "'") * "'" [18] CHAR ::= - [ #x09#x0A#x0D] - - - [18] - - | - [ #x20-#xD7FF] - - - [18] - - | - [ #xE000-#xFFFD] - - - [18] - - | - [ #x00010000-#x0010FFFF] + [ #x09#x0A#x0D] | [ #x20-#xD7FF] | [ #xE000-#xFFFD] | [ #x00010000-#x0010FFFF] [19] @@ -224,28 +133,24 @@ [ #x09#x0A#x0D#x20] - [21] - + | - ( ( ( "#" - "#x") | "//") [ ^#x0A#x0D] * ) + ( ( ( "#" - "#x") | "//") [ ^#x0A#x0D] * ) - [21] - + | - ( "/*" ( ( "*" [ ^/] ) ? | [ ^*] ) * "*/") + ( "/*" ( ( "*" [ ^/] ) ? | [ ^*] ) * "*/") - [21] - + | - ( "(*" ( ( "*" [ ^)] ) ? | [ ^*] ) * "*)") + ( "(*" ( ( "*" [ ^)] ) ? | [ ^*] ) * "*)") - - @pass - - + + @pass + PASS diff --git a/etc/ebnf.ll1.rb b/etc/ebnf.ll1.rb index 9dc00c1..8e40933 100644 --- a/etc/ebnf.ll1.rb +++ b/etc/ebnf.ll1.rb @@ -1,4 +1,4 @@ -# This file is automatically generated by ebnf version 2.0.0 +# This file is automatically generated by ebnf version 2.1.2 # Derived from etc/ebnf.ebnf module Meta START = :ebnf diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index 7492826..617ba15 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -1,4 +1,4 @@ -# This file is automatically generated by ebnf version 2.0.0 +# This file is automatically generated by ebnf version 2.1.2 # Derived from etc/ebnf.ebnf module EBNFMeta RULES = [ diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index 90d8f71..df84bc7 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -220,9 +220,10 @@ def to_s(format: :ebnf) # Output formatted EBNF as HTML # # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) + # @param [Boolean] validate (false) validate generated HTML. # @return [String] - def to_html(format: :ebnf) - Writer.html(*ast, format: format) + def to_html(format: :ebnf, validate: false) + Writer.html(*ast, format: format, validate: validate) end ## diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 8b5b66b..5cb7e97 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -2,6 +2,7 @@ require 'rdf' require 'strscan' unless defined?(StringScanner) require "ostruct" +require 'unicode/types' ## # Serialize ruleset back to EBNF @@ -86,22 +87,23 @@ def self.write(out, *rules, format: :ebnf) # # @param [Array] rules # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) + # @param [Boolean] validate (false) validate generated HTML. # @return [Object] - def self.html(*rules, format: :ebnf) + def self.html(*rules, format: :ebnf, validate: false) require 'stringio' unless defined?(StringIO) buf = StringIO.new - Writer.new(rules, out: buf, html: true, format: format) + Writer.new(rules, out: buf, html: true, format: format, validate: validate) buf.string end ## # @param [Array] rules + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) + # @param [Boolean] html (false) generate HTML output + # @param [Boolean] validate (false) validate generated HTML. # @param [Hash{Symbol => Object}] options # @param [#write] out ($stdout) - # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) - # @option options [Symbol] format - # @option options [Boolean] html (false) - def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) + def initialize(rules, out: $stdout, html: false, format: :ebnf, validate: false, **options) @options = options.merge(html: html) return if rules.empty? @@ -174,7 +176,22 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) end end end.flatten - out.write eruby.evaluate(format: format, rules: formatted_rules) + + html_result = eruby.evaluate(format: format, rules: formatted_rules) + + if validate + begin + require 'nokogumbo' + # Validate the output HTML + doc = Nokogiri::HTML5("" + html_result, max_errors: 10) + raise EncodingError, "Errors found in generated HTML:\n " + + doc.errors.map(&:to_s).join("\n ") unless doc.errors.empty? + rescue LoadError + # Skip + end + end + + out.write html_result return rescue LoadError $stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded" @@ -347,16 +364,20 @@ def escape_ebnf_hex(u) end char = fmt % u.ord if @options[:html] - if u.ord <= 0x20 - char = %(#{@coder.encode char}) + char = if u.ord <= 0x20 + %(#{@coder.encode char}) + elsif u.ord == 0x22 + %(>") elsif u.ord < 0x7F - char = %(#{@coder.encode char}) + %(#{@coder.encode char}) elsif u.ord == 0x7F - char = %(#{@coder.encode char}) + %(#{@coder.encode char}) elsif u.ord <= 0xFF - char = %(#{char}) + %(#{char}) + elsif (%w(Control Private-use Surrogate Noncharacter Reserved) - ::Unicode::Types.of(u)).empty? + %(#{char}) else - char = %(#{char}) + %(#{char}) end %(#{char}) else @@ -455,7 +476,7 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) # Format a single-character string, prefering hex for non-main ASCII def format_abnf_char(c) if /[\x20-\x21\x23-\x7E]/.match?(c) - c.inspect + @options[:html] ? %("#{@coder.encode c}") : c.inspect else escape_abnf_hex(c) end @@ -536,14 +557,16 @@ def escape_abnf_hex(u) if @options[:html] if u.ord <= 0x20 char = %(#{@coder.encode char}) - elsif u.ord <= 0x7F + elsif u.ord == 0x22 + %(>") + elsif u.ord < 0x7F char = %(#{@coder.encode char}) elsif u.ord == 0x7F char = %(#{@coder.encode char}) elsif u.ord <= 0xFF char = %(#{char}) else - char = %(#{char}) + char = %(#{char}) end %(#{char}) else @@ -686,7 +709,7 @@ def format_isoebnf_range(string) <% for rule in @rules %> - > + > <% if rule.id %> ><%= rule.id %> <% end %> diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index e1a4b1a..a609097 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -2,11 +2,20 @@ $:.unshift File.dirname(__FILE__) require 'bundler/setup' +require 'amazing_print' require 'rdf/spec' require 'rdf/spec/matchers' require 'rspec' +require 'rspec/matchers' require 'rspec/its' require 'matchers' +begin + have_nokogumbo = true + require 'nokogumbo' +rescue LoadError + have_nokogumbo = false +end + begin require 'simplecov' require 'coveralls' @@ -36,6 +45,19 @@ end end +RSpec::Matchers.define :be_valid_html do + match do |actual| + return true unless have_nokogumbo + root = Nokogiri::HTML5("" + actual, max_parse_errors: 1000) + @errors = Array(root && root.errors.map(&:to_s)) + @errors.empty? + end + + failure_message do |actual| + "expected no errors, was #{@errors.join("\n")}\n" + actual + end +end + require 'ebnf' PARSED_EBNF_GRAMMAR = EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)), format: :native).freeze \ No newline at end of file diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index 1798fd8..e5891f4 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -90,6 +90,9 @@ }.each do |title, (grammar, xpaths)| context title do subject {EBNF::Writer.html(*EBNF::Base.new(grammar, format: :native).ast)} + + specify {is_expected.to be_valid_html} + xpaths.each do |path, value| specify {is_expected.to have_xpath(path, value)} end @@ -250,7 +253,9 @@ expect(EBNF.parse(File.read(file)).to_sxp).to produce(File.read(file.sub('.ebnf', '.sxp'))) end it "outputs grammar as html" do - expect {EBNF.parse(File.read(file)).to_html}.to_not raise_error + html = nil + expect {html = EBNF.parse(File.read(file)).to_html}.to_not raise_error + expect(html).to be_valid_html end end end @@ -412,7 +417,9 @@ expect {EBNF.parse(File.read(file), format: :abnf).to_s(format: :abnf)}.to_not raise_error end it "outputs grammar as html" do - expect {EBNF.parse(File.read(file), format: :abnf).to_html(format: :abnf)}.to_not raise_error + html = nil + expect {html = EBNF.parse(File.read(file), format: :abnf).to_html(format: :abnf)}.to_not raise_error + expect(html).to be_valid_html end end end @@ -570,7 +577,9 @@ expect {EBNF.parse(File.read(file), format: :isoebnf).to_s(format: :isoebnf)}.to_not raise_error end it "outputs grammar as html" do - expect {EBNF.parse(File.read(file), format: :isoebnf).to_html(format: :isoebnf)}.to_not raise_error + html = nil + expect {html = EBNF.parse(File.read(file), format: :isoebnf).to_html(format: :isoebnf)}.to_not raise_error + expect(html).to be_valid_html end end end