Ambiguity in Nearley.JS

File ambiguous.ne

See folder ULL-ESIT-PL/learning-nearley/tree/main/examples/ambiguous (opens in a new tab)

➜  ambiguous git:(main) ✗ npm i
➜  ambiguous git:(main) ✗ cat ambiguous.ne

Here you can find the grammar ambiguous.ne (opens in a new tab):

learning-nearley/examples/ambiguous/ambiguous.ne

@{%
const { makeLexer } = require("moo-ignore");
const tokens = require('./tokens.js')
 
let lexer = makeLexer(tokens);
lexer.ignore("ws", "comment");
%}
 
@lexer lexer
 
main ->  e       {% id %}
 
e -> e %plus e   {% ([f,_,s]) => { return f+s; } %}
   | e %minus e  {% ([f,_,s]) => { return f-s; } %}
   | e %mul e    {% ([f,_,s]) => { return f*s; } %}
   | e %div e    {% ([f,_,s]) => { return f/s; } %}
   | %number     {% id %}

That can be compiled with:

➜  ambiguous git:(main) ✗ npm run compile

> parser-example@1.0.0 compile
> nearleyc ambiguous.ne -o ambiguous.js

use-ambiguous.js

The program use-ambiguous.js (opens in a new tab)

learning-nearley/examples/ambiguous/use-ambiguous.js

#!/usr/bin/env node
 
const nearley = require("nearley");
const grammar = require("./ambiguous.js");
 
const tests = (process.argv.length > 2)? process.argv.slice(2): [
        "3\n - /* comment */ 2\n-\n1" ,
        "2-2/2"
    ];
 
try {
    for (let expression of tests) { 
        const parser = new nearley.Parser(nearley.Grammar.fromCompiled(grammar));
        parser.feed(expression);
        
        console.log(parser.results); // parser.results is an array of possible parsings
    }   
} catch(e) {
    console.error("Found an error: "+e.message);
}

loads the grammar const grammar = require("./ambiguous.js");,
gets the inputs from the command line,
creates a parser object with new nearley.Parser(nearley.Grammar.fromCompiled(grammar))
feeds to the parser object the input.

➜  ambiguous git:(main) ✗ ./use-ambiguous.js '4/2/2'
[ 1, 4 ]

Notice how two AST are built and the semantic values for both are computed. Again:

➜  ambiguous git:(main) ✗ ./use-ambiguous.js '4*2*2'
[ 16, 16 ]

tokens.js

Here is the file tokens.js (opens in a new tab):

learning-nearley/examples/ambiguous/tokens.js

const ws = /\s+/; /* whitespace */
const comment = /\/\*.*?\*\//;
const number = /\d+/;
const minus = "-";
const plus = "+";
const mul = "*";
const div = "/";
 
const tokens = {
  ws: {match: ws, lineBreaks: true },
  comment,
  number: {match: number, value: s => parseInt(s)},
  minus,
  plus,
  mul,
  div
};
 
module.exports = tokens;

Removing Ambiguity

File ULL-ESIT-PL/learning-nearley/blob/main/examples/calculator/arithmetic.ne (opens in a new tab):

➜  calculator git:(main) ✗ cat arithmetic.ne

# This is a nice little grammar to familiarize yourself
# with the nearley syntax.
 
# `main` is the nonterminal that nearley tries to parse, so
# we define it first.
# The _'s are defined as whitespace below. This is a mini-
# -idiom.
 
main -> null | _ AS _ {% function(d) {return d[1]; } %}
 
# Addition and subtraction
AS -> AS _ "+" _ MD {% function(d) {return d[0]+d[4]; } %}
    | AS _ "-" _ MD {% function(d) {return d[0]-d[4]; } %}
    | MD            {% id %}
 
# PEMDAS!
# We define each level of precedence as a nonterminal.
 
# Parentheses
P -> "(" _ AS _ ")" {% function(d) {return d[2]; } %}
    | N             {% id %}
 
# Factorial 
F ->  P "!"          {% function(d) {
                          const fac = n => (n===0)?1:n*fac(n-1);
                          return fac(d[0]); 
                        } 
                     %}
    | P              {% id %} 
 
# Exponents
E -> F _ "^" _ E    {% function(d) {return Math.pow(d[0], d[4]); } %}
    | F             {% id %}
 
# Multiplication and division
MD -> MD _ "*" _ E  {% function(d) {return d[0]*d[4]; } %}
    | MD _ "/" _ E  {% function(d) {return d[0]/d[4]; } %}
    | E             {% id %}
 
 
# A number or a function of a number
N -> float          {% id %}
    | "sin" _ P     {% function(d) {return Math.sin(d[2]); } %}
    | "cos" _ P     {% function(d) {return Math.cos(d[2]); } %}
    | "tan" _ P     {% function(d) {return Math.tan(d[2]); } %}
    
    | "asin" _ P    {% function(d) {return Math.asin(d[2]); } %}
    | "acos" _ P    {% function(d) {return Math.acos(d[2]); } %}
    | "atan" _ P    {% function(d) {return Math.atan(d[2]); } %}
 
    | "pi"          {% function(d) {return Math.PI; } %}
    | "e"           {% function(d) {return Math.E; } %}
    | "sqrt" _ P    {% function(d) {return Math.sqrt(d[2]); } %}
    | "ln" _ P      {% function(d) {return Math.log(d[2]); }  %}
 
# I use `float` to basically mean a number with a decimal point in it
float ->
      int "." int   {% function(d) {return parseFloat(d[0] + d[1] + d[2])} %}
        | int           {% function(d) {return parseInt(d[0])} %}
 
int -> [0-9]:+        {% function(d) {return d[0].join(""); } %}
 
# Whitespace. The important thing here is that the postprocessor
# is a null-returning function. This is a memory efficiency trick.
_ -> [\s]:*     {% function(d) {return null; } %}

Ejecución

➜  calculator git:(main) ✗ npm test                  

> calculator@1.0.0 test
> npm run compile && export NODE_PATH=$NODE_PATH:`npm root -g` && node calculator.js


> calculator@1.0.0 compile
> nearleyc arithmetic.ne -o grammar.js

> 2+3*4
14
> sin(pi/2)
1
> sinpi
1.2246467991473532e-16
>

References

The lexer generator Moo Importing Grammars