%{ # Copyright © 2006, 2007, 2008, 2009, 2010, 2011, 2012 Casiano Rodriguez-Leon. # Copyright © 2017 William N. Braswell, Jr. # All Rights Reserved. # # Based on Parse::Yapp. # # Parse::Yapp is Copyright © 1998, 1999, 2000, 2001, Francois Desarmenien. # Parse::Yapp is Copyright © 2017 William N. Braswell, Jr. # All Rights Reserved. %} %{ require 5.004; use Carp; my ( $input, $lexlevel, # Used by the lexical analyzer. Controls in which section we are: # head (0), body(1) or tail (2) @lineno, # Used by the lexical analyzer. $lineno[0] is the lione number for # the beginning of the token, $lineno[1] the end $nberr, # Number of errors up to now $prec, $labelno); my $syms; my $head; # list of texts inside header sections my $tail; my $token; my $term; # hash ref. key: token, value: an array describing the assoc and priority { '-' => [ 'LEFT' 1 ], '*' => [ 'LEFT' 2 ], } my $termdef; # token definitions. key is token, value is regexp my $whites; # string with the code for white spaces (when automatic generated lexer) my $lexer; # boolean: true if %lexer was used my $incremental; # build an incremental lexer: one that reads in chunks from $self->YYInputFile my $nterm; my $rules; my $precterm; # hash ref. key token used in %prec. value: priority my $start; my $nullable; my $semantic; # hash ref. Keys are the tokens. Value: 0 = syntactic 1 = semantic my $dummy = []; # array ref. the dummy tokens my ($expect); my $namingscheme; my $defaultaction; my $filename; my $tree = 0; # true if %tree or %metatree my $metatree = 0; my $flatlists = 0; # true if flat list semantic for * + and ? operators my $bypass = 0; my $prefix = ''; # yyprefix my $buildingtree = 0; my $alias = 0; my $accessors = {}; # Hash for named accessors when %tree or %metatree is active { exp::left => 0 } my $strict = 0; # When true, all tokens must be declared or a warning will be issued my $nocompact; # Do not compact action tables. No DEFAULT field for "STATES" my %nondeclared; # Potential non declared token identifiers appearing in the program my %conflict; # Hash of conflict name => { codeh => 'code handler', line => #line, #prodnumber1 => [pos1, pos2], #prodnumber2 => [pos1,pos2,pos3], ... } sub not_an_id { my $id = shift; !defined($id) or $id !~ m/^[a-zA-Z_][[a-zA-Z_0-9]*$/; } # When using %metatree, i.e. generating a Translation Scheme # returns true if $code was preceded by a %begin directive sub is_begin_code { my $code = shift; return (UNIVERSAL::isa($code, 'ARRAY') and exists($code->[2]) and $code->[2] eq 'BEGINCODE'); } # Produces the text containing the declarations # and initializations of the associated variables sub prefixcode { my %index = @_; # When TS var $lhs refers to the father node my $text = ($metatree)? 'my $lhs = $_[0]; ' : ''; # No identifiers were associated with the attributes if %index is empty return $text unless %index; $text .= join "", (map { "my \$$_ = \$_[$index{$_}]; " } (keys(%index))); # The former line produces the code for initialization of the attribute # variables so that a production like: # exp: VAR.left '='.op exp.right { ... semantic action } # will produce s.t. like: # sub { # my $left = $_[1]; my $right = $_[3]; my $op = $_[2]; # ... semantic action # } return $text; } # Computes the hash %index used in sub 'prefixcode' # $index{a} is the index of the symbol associated with 'a' in the right hand side # of the production. For example in # R: B.b A.a # $index{a} will be 2. sub symbol_index { my $rhs = shift || []; my $position = shift || @$rhs; my %index; local $_ = 0; for my $value (@{$rhs}) { $_++ unless (($value->[0] eq 'CODE') and $metatree) or ($value->[0] eq 'CONFLICTHANDLER'); my $id = $value->[1][2]; if (defined($id)) { _SyntaxError( 2, "Error: attribute variable '\$$id' appears more than once", $value->[1][1]) if exists($index{$id}); $index{$id} = $_; } last if $_ >= $position; } return %index; } # Computes the hash %index holding the position in the generated # AST (as it is build by YYBuildAST) of the node associated with # the identifier. For ex. in "E: E.left '+' E.right" # $index{right} will be 1 (remember that '+' is a syntactic token) sub child_index_in_AST { my $rhs = shift || []; my %index; local $_ = 0; for my $value (@{$rhs}) { my ($symb, $line, $id) = @{$value->[1]}; # Accessors will be build only for explictly named attributes # Hal Finkel's patch next unless $$semantic{$symb}; $index{$id} = $_ if defined($id); $_++ ; } return %index; } # This sub gives support to the "%tree alias" directive. # Expands the 'accessors' hash relation # for the current production. Uses 'child_index_in_AST' # to build the mapping between names and indices sub make_accessors { my $name = shift; return unless ($tree and $alias and defined($name) and $name->[0] =~m{^[a-zA-Z_]\w*$}); my $rhs = shift; my %index = child_index_in_AST($rhs); for (keys(%index)) { $accessors->{"$name->[0]::$_"} = $index{$_}; } } # Gives support to %metatree sub insert_delaying_code { my $code = shift; # If %begin the code will be executed at "tree time construction" return if is_begin_code($$code); if ($$code) { $$code = [ # The user code is substituted by a builder of a node referencing the # actual sub "push \@_, sub { $$code->[0] }; goto &Parse::Eyapp::Driver::YYBuildTS; ", $$code->[1] ]; } else { $$code = [ ' goto &Parse::Eyapp::Driver::YYBuildTS ', $lineno[0]] } } # Called only from _AddRules sub process_production { my ($rhs) = @_; my $position = $#$rules; my @newrhs = (); my $m = 0; for my $s (0..$#$rhs) { my($what,$value)=@{$$rhs[$s]}; if ($what eq 'CODE') { # TODO: modify name scheme: RULE_POSITION my($tmplhs)='@'.$position."-$s"; if ($value) { # The auxiliary production generated for # intermediate actions has access to the # attributes of the symbols to its left # Not needed if generating a TS my @optarg = $metatree? () : ($s+1); # Variable declarations my %index = symbol_index($rhs, @optarg); $value->[0] = prefixcode(%index).$value->[0]; } insert_delaying_code(\$value) if $metatree; # rhs prec name code push(@$rules,[ $tmplhs, [], undef, undef, $value ]); push(@newrhs, $tmplhs); next; } elsif ($what eq 'CONFLICTHANDLER') { my $ch = $value->[0]; push @{$conflict{$ch}{production}{-$position}}, $m; next; } # elsif ($what eq 'CONFLICTVIEWPOINT') { # } push(@newrhs, $$value[0]); $m++; } return \@newrhs; } # Receives a specification of the RHS of a production like in: # rhs([ $A, $val], name => $_[2], code => $code_rec, prec => $prec) # Returns the data structure used to represent the RHS: # [ @rhs, $arg{prec}, $arg{name}, $arg{code}] sub rhs { my @rhs = @{shift()}; my %arg = @_; $arg{prec} = exists($arg{prec})? token($arg{prec}): undef; $arg{name} = undef unless exists($arg{name}); $arg{code} = exists($arg{code})? token($arg{code}): undef; @rhs = map { ['SYMB', $_] } @rhs; return [ @rhs, $arg{prec}, $arg{name}, $arg{code}]; } sub token { my $value = shift; return [ $value, $lineno[0]]; } sub symbol { my $id = shift; return ['SYMB', $id]; } # To be used with the %lexer directive sub make_lexer { my ($code, $line) = @_; my $errline = $line + ($code =~ tr/\n//); my $lexertemplate = << 'ENDOFLEXER'; __PACKAGE__->YYLexer( sub { # lexical analyzer my $self = $_[0]; for (${$self->input()}) { # contextualize #line <> "<>" <> <> return ('', undef) if ($_ eq '') || (defined(pos($_)) && (pos($_) >= length($_))); die("Error inside the lexical analyzer. Line: <>. File: <>. No regexp matched.\n"); } } # end lexical analyzer ); ENDOFLEXER $lexertemplate =~ s/<>/$code/g; $lexertemplate =~ s/<>/$line/g; $lexertemplate =~ s/<>/$errline/g; $lexertemplate =~ s/<>/$filename/g; $lexertemplate =~ s/<>/################ @@@@@@@@@ End of User Code @@@@@@@@@ ###################/g; return $lexertemplate; } sub explorer_handler { my ($name, $code) = @_; my ($cn, $line) = @$name; my ($c, $li) = @$code; # TODO: this must be in Output my $conflict_header = <<"CONFLICT_EXPLORER"; my \$self = \$_[0]; for (\${\$self->input()}) { #line $li "$filename" CONFLICT_EXPLORER $c =~ s/^/$conflict_header/; # } # { # follows the closing curly bracket of the for .. to contextualize!!!!!! v $c =~ s/$/\n################ @@@@@@@@@ End of User Code @@@@@@@@@ ###################\n }\n/; #$code->[0] = $c; $conflict{$cn}{explorer} = $c; $conflict{$cn}{explorerline} = $line; # TODO: error control. Factorize!!!!! $$syms{$cn} = $line; $$nterm{$cn} = undef; undef; } %} %token ASSOC /* is %(left|right|nonassoc) */ %token BEGINCODE /* is %begin { Perl code ... } */ %token CODE /* is { Perl code ... } */ %token CONFLICT /* is %conflict */ %token DEFAULTACTION /* is %defaultaction */ %token EXPECT /* is %expect */ %token HEADCODE /* is %{ Perl code ... %} */ %token IDENT /* is [A-Za-z_][A-Za-z0-9_]* */ %token LABEL /* is :[A-Za-z0-9_]+ */ %token LITERAL /* is a string literal like 'hello' */ %token METATREE /* is %metatree */ %token NAME /* is %name */ %token NAMINGSCHEME /* is %namingscheme */ %token NOCOMPACT /* is %nocompact */ %token NUMBER /* is \d+ */ %token OPTION /* is (%name\s*([A-Za-z_]\w*)\s*)?\? */ %token PLUS /* is (%name\s*([A-Za-z_]\w*)\s*)?\+ */ %token PREC /* is %prec */ %token PREFIX /* is %prefix\s+([A-Za-z_][A-Za-z0-9_:]*::) */ %token SEMANTIC /* is %semantic\s+token */ %token STAR /* is (%name\s*([A-Za-z_]\w*)\s*)?\* */ %token START /* is %start */ %token STRICT /* is %strict */ %token SYNTACTIC /* is %syntactic\s+token */ %token TAILCODE /* is { Perl code ... } */ %token TOKEN /* is %token */ %token TREE /* is %tree */ %token TYPE /* is %type */ %token UNION /* is %union */ %start eyapp %% # Main rule eyapp: head body tail ; #Common rules: symbol: LITERAL { my($symbol,$lineno)=@{$_[1]}; exists($$syms{$symbol}) or do { $$syms{$symbol} = $lineno; $$term{$symbol} = undef; # Warning! $$semantic{$symbol} = 0 unless exists($$semantic{$symbol}); }; $_[1] } | ident #default action ; ident: IDENT { my($symbol,$lineno)=@{$_[1]}; exists($$syms{$symbol}) or do { $$syms{$symbol} = $lineno; $$term{$symbol} = undef; # Warning! $$semantic{$symbol} = 1 unless exists($$semantic{$symbol}); # Not declared identifier? $nondeclared{$symbol} = 1 unless (exists($$nterm{$symbol}) or $$term{$symbol}); }; $_[1] } ; prodname: IDENT | LABEL | IDENT LABEL { $_[1][0] .= $_[2][0]; $_[1]; } ; # Head section: head: headsec '%%' ; #perlidents: /* empty */ # | perlidents perlident #; perlident: IDENT | perlident '::' IDENT { $_[1][0] .= "::".$_[3][0]; $_[1]; } ; headsec: #empty #default action | decls #default action ; decls: decls decl #default action | decl #default action ; decl: '\n' #default action | SEMANTIC typedecl toklist '\n' { for (@{$_[3]}) { my($symbol,$lineno, $def)=@$_; # exists($$token{$symbol}) #and do { # _SyntaxError(0, # "Token $symbol redefined: ". # "Previously defined line $$syms{$symbol}", # $lineno); # next; #}; $$token{$symbol}=$lineno; $$term{$symbol} = [ ]; $$semantic{$symbol} = 1; $$termdef{$symbol} = $def if $def; } undef } | SYNTACTIC typedecl toklist '\n' { for (@{$_[3]}) { my($symbol,$lineno, $def)=@$_; # exists($$token{$symbol}) #and do { # _SyntaxError(0, # "Token $symbol redefined: ". # "Previously defined line $$syms{$symbol}", # $lineno); # next; #}; $$token{$symbol}=$lineno; $$term{$symbol} = [ ]; $$semantic{$symbol} = 0; $$termdef{$symbol} = $def if $def; } undef } | DUMMY typedecl toklist '\n' { for (@{$_[3]}) { my($symbol,$lineno, $def)=@$_; $$token{$symbol}=$lineno; $$term{$symbol} = [ ]; $$semantic{$symbol} = 0; push @$dummy, $symbol; $$termdef{$symbol} = $def if $def; } undef } | TOKEN typedecl toklist '\n' { for (@{$_[3]}) { my($symbol,$lineno, $def)=@$_; exists($$token{$symbol}) and do { _SyntaxError(0, "Token $symbol redefined: ". "Previously defined line $$syms{$symbol}", $lineno); next; }; $$token{$symbol}=$lineno; $$term{$symbol} = [ ]; $$termdef{$symbol} = $def if $def; } undef } | ASSOC typedecl symlist '\n' { for (@{$_[3]}) { my($symbol,$lineno)=@$_; defined($$term{$symbol}[0]) and do { _SyntaxError(1, "Precedence for symbol $symbol redefined: ". "Previously defined line $$syms{$symbol}", $lineno); next; }; $$token{$symbol}=$lineno; $$term{$symbol} = [ $_[1][0], $prec ]; } ++$prec; undef } | START ident '\n' { $start=$_[2][0] unless $start; undef } | PREFIX '\n' { # TODO: Instead of ident has to be a prefix!!! $prefix=$_[1][0]; undef } | WHITES CODE '\n' { push @{$_[2]}, 'CODE'; $whites = $_[2]; } | WHITES REGEXP '\n' { push @{$_[2]}, 'REGEXP'; $whites = $_[2]; } | WHITES '=' CODE '\n' { push @{$_[3]}, 'CODE'; $whites = $_[3]; } | WHITES '=' REGEXP '\n' { push @{$_[3]}, 'REGEXP'; $whites = $_[3]; } | NAMINGSCHEME CODE '\n' { $namingscheme = $_[2]; undef } | HEADCODE '\n' { push(@$head,$_[1]); undef } | UNION CODE '\n' { undef } #ignore | DEFAULTACTION CODE '\n' { $defaultaction = $_[2]; undef } | INCREMENTAL '\n' { $incremental = ''; undef } | INCREMENTAL LITERAL '\n' { $incremental = $_[2][0]; undef } | LEXER CODE '\n' { my ($code, $line) = @{$_[2]}; push @$head, [ make_lexer($code, $line), $line]; $lexer = 1; undef } | TREE '\n' { $tree = $buildingtree = 1; $bypass = ($_[1][0] =~m{bypass})? 1 : 0; $alias = ($_[1][0] =~m{alias})? 1 : 0; $defaultaction = [ ' goto &Parse::Eyapp::Driver::YYBuildAST ', $lineno[0]]; undef } | METATREE '\n' { $metatree = $tree = $buildingtree = 1; undef } | STRICT '\n' { $strict = 1; undef } | NOCOMPACT '\n' { $nocompact = 1; undef } | TYPE typedecl identlist '\n' { for ( @{$_[3]} ) { my($symbol,$lineno)=@$_; exists($$nterm{$symbol}) and do { _SyntaxError(0, "Non-terminal $symbol redefined: ". "Previously defined line $$syms{$symbol}", $lineno); next; }; delete($$term{$symbol}); #not a terminal $$nterm{$symbol}=undef; #is a non-terminal } } | CONFLICT ident CODE '\n' { my ($name, $code) = @_[2,3]; my ($cn, $line) = @$name; my ($c, $li) = @$code; # TODO: this must be in Output my $conflict_header = <<"CONFLICT_HEADER"; my \$self = \$_[0]; for (\${\$self->input()}) { #line $li "$filename" CONFLICT_HEADER $c =~ s/^/$conflict_header/; # } # { # follows the closing curly bracket of the for .. to contextualize!!!!!! v $c =~ s/$/\n################ @@@@@@@@@ End of User Code @@@@@@@@@ ###################\n }\n/; #$code->[0] = $c; $conflict{$cn}{codeh} = $c; $conflict{$cn}{line} = $line; $$syms{$cn} = $line; #$$nterm{$cn} = undef; undef; } # conflict syntacticvariable? prodname : prodname | CONFLICT ident perlident '?' prodname ':' prodname '\n' { #print "<@{$_[2]} @{$_[3]} @{$_[5]} @{$_[7]}>\n"; my $conflict = $_[2]; my ($startsymbol, $line) = @{$_[3]}; my @prodname = ($_[5][0], $_[7][0]); my $cn = $conflict->[0]; my $c = <<"CONFLICT_HEADER"; my \$self = \$_[0]; for (\${\$self->input()}) { #line $line "$filename" \$self->YYIf('$startsymbol', '$prodname[0]', '$prodname[1]'); ################ @@@@@@@@@ End of User Code @@@@@@@@@ ################### } CONFLICT_HEADER $conflict{$cn}{codeh} = $c; $conflict{$cn}{line} = $line; $$syms{$cn} = $line; $$nterm{$cn} = undef; #$$nterm{$startsymbol} = undef; #delete $$syms{$startsymbol}; if ($startsymbol eq 'EMPTY') { $c = <<"NESTEDPARSING"; { \$self->YYIs('EMPTY', 1); } NESTEDPARSING } else { $c = <<"NESTEDPARSING"; { \$self->YYNestedParse('$startsymbol'); } NESTEDPARSING } explorer_handler($conflict, [$c, $line]); undef; } | CONFLICT ident neg REGEXP '?' prodname ':' prodname '\n' { my $conflict = $_[2]; my $neg = $_[3]; my ($regexp, $line) = @{$_[4]}; my @prodname = ($_[6][0], $_[8][0]); my $cn = $conflict->[0]; my $c = <<"CONFLICT_HEADER"; my \$self = \$_[0]; for (\${\$self->input()}) { #line $line "$filename" \$self->YYIf('.regexp', '$prodname[0]', '$prodname[1]'); ################ @@@@@@@@@ End of User Code @@@@@@@@@ ################### } CONFLICT_HEADER $conflict{$cn}{codeh} = $c; $conflict{$cn}{line} = $line; $$syms{$cn} = $line; $$nterm{$cn} = undef; $regexp = substr($regexp,1,-1); if (!$neg) { $regexp = "\\G(?=$regexp)"; } else { $regexp = "\\G(?!$regexp)"; } $c = <<"NESTEDPARSING"; { \$self->YYNestedRegexp('$regexp'); } NESTEDPARSING explorer_handler($conflict, [$c, $line]); undef; } /*********************************************/ | EXPLORER ident CODE '\n' # research testing { my ($name, $code) = @_[2,3]; explorer_handler($name, $code); } | EXPLORER ident LITERAL '\n' # { my ($name, $startsymbol) = @_[2,3]; my $c = <<"NESTEDPARSING"; { \$self->YYNestedParse($startsymbol->[0]); } NESTEDPARSING my $li = $startsymbol->[1]; explorer_handler($name, [$c, $li]); } | EXPLORER ident perlident '\n' # research testing: copy paste: factorize! { my ($name, $startsymbol) = @_[2,3]; my $c = <<"NESTEDPARSING"; { \$self->YYNestedParse('$startsymbol->[0]'); } NESTEDPARSING my $li = $startsymbol->[1]; explorer_handler($name, [$c, $li]); } | EXPLORER ident perlident LITERAL '\n' # research testing: copy paste: factorize! { my ($name, $startsymbol, $file) = @_[2,4]; my $c = <<"NESTEDPARSING"; { \$self->YYNestedParse('$startsymbol->[0]', $file->[0]); } NESTEDPARSING my $li = $startsymbol->[1]; explorer_handler($name, [$c, $li]); } /*********************************************/ | EXPECT NUMBER '\n' { $expect=$_[2][0]; undef } | EXPECT NUMBER NUMBER '\n' { $expect= [ $_[2][0], $_[3][0]]; undef } | EXPECTRR NUMBER '\n' { $expect = 0 unless defined($expect); croak "Number of reduce-reduce conflicts is redefined (line $_[2][1], file: $filename)\n" if ref($expect); $expect= [ $expect, $_[2][0]]; undef } | error '\n' { $_[0]->YYErrok } ; neg: /* empty */ {} | '!' { 1; } ; typedecl: #empty | '<' IDENT '>' ; symlist: symlist symbol { push(@{$_[1]},$_[2]); $_[1] } | symbol { [ $_[1] ] } ; toklist: toklist tokendef { push(@{$_[1]},$_[2]); $_[1] } | tokendef { [ $_[1] ] } ; tokendef: ident '=' REGEXP { push @{$_[3]}, 'REGEXP'; push @{$_[1]}, $_[3]; $_[1] } | ident '=' '%' REGEXP { push @{$_[4]}, 'CONTEXTUAL_REGEXP'; push @{$_[1]}, $_[4]; $_[1] } | ident '=' '%' REGEXP '=' IDENT { push @{$_[4]}, 'CONTEXTUAL_REGEXP_MATCH'; push @{$_[4]}, $_[6]; push @{$_[1]}, $_[4]; $_[1] } | ident '=' '%' REGEXP '!' IDENT { push @{$_[4]}, 'CONTEXTUAL_REGEXP_NOMATCH'; push @{$_[4]}, $_[6]; push @{$_[1]}, $_[4]; $_[1] } | ident '=' CODE { push @{$_[3]}, 'CODE'; push @{$_[1]}, $_[3]; $_[1] } | symbol { push @{$_[1]}, [ @{$_[1]}, 'LITERAL']; $_[1]; } ; identlist: identlist ident { push(@{$_[1]},$_[2]); $_[1] } | ident { [ $_[1] ] } ; # Rule section body: rulesec '%%' { $start or $start=$$rules[1][0]; ref($$nterm{$start}) or _SyntaxError(2,"Start symbol $start not found ". "in rules section",$_[2][1]); # Add conflict handlers # [ left hand side, right hand side, precedence, rulename, code, ] for my $A (keys %conflict) { if (defined($conflict{$A}{explorer})) { if (!$conflict{$A}{totalviewpoint}) { my $code = $conflict{$A}{codeh}; $conflict{$A}{codeh} = "{ $conflict{$A}{explorer} }\n{ $code }"; delete $$syms{$A}; delete $$nterm{$A}; delete $$term{$A}; delete $conflict{$A}{explorer}; } else { my $lhs = [$A, $conflict{$A}{explorerline}]; my $code = $conflict{$A}{explorer}; my $rhss = [ rhs([], name => $lhs, code => $code), ]; _AddRules($lhs, $rhss); delete $conflict{$A}{explorer}; } } else { delete $$syms{$A}; delete $$nterm{$A}; delete $$term{$A}; } } # # If exists an @identifiers that is not a nterm and not a term is a warn if ($strict) { for (keys %nondeclared) { warn "Warning! Non declared token $_ at line $$syms{$_} of $filename\n" unless ($_ eq 'error' || $$term{$_} || exists($$nterm{$_}) || exists($conflict{$_})); } } # Superstart rule # [ left hand side, right hand side, precedence, rulename, code, ] $$rules[0]=[ '$start', [ $start, chr(0) ], undef, undef, undef,]; } | '%%' { _SyntaxError(2,"No rules in input grammar",$_[1][1]); } ; rulesec: rulesec rules #default action | startrules #default action ; startrules: IDENT ':' { $start = $_[1][0] unless $start; } rhss ';' { _AddRules($_[1],$_[4]); undef } | error ';' { $_[0]->YYErrok } ; rules: IDENT ':' rhss ';' { _AddRules($_[1],$_[3]); undef } | error ';' { $_[0]->YYErrok } ; rhss: rhss '|' rule { push(@{$_[1]},$_[3]); $_[1] } | rule { [ $_[1] ] } ; rule: optname rhs prec epscode { my ($name, $rhs, $prec, $code) = @_[1..4]; my %index = symbol_index($rhs); $code->[0] = prefixcode(%index).$code->[0] if ($code); insert_delaying_code(\$code) if $metatree; make_accessors($name, $rhs); push(@{$rhs}, $prec, $name, $code); # only three???? what with prefixofcode? $rhs } | optname rhs { my ($name, $rhs) = @_[1, 2]; my $code; # Be careful: $defaultaction must be replicated per action # to emulate "yacc/yapp" true behavior. # There was a previous bug when %metatree and %defaultaction # were activated ------------------V $code = $defaultaction && [ @$defaultaction ]; defined($rhs) and $rhs->[-1][0] eq 'CODE' and $code = ${pop(@{$rhs})}[1]; my %index = symbol_index($rhs); $code->[0] = prefixcode(%index).$code->[0] if ($code); make_accessors($name, $rhs); insert_delaying_code(\$code) if $metatree; push(@{$rhs}, undef, $name, $code); $rhs } ; rhs: #empty #default action (will return undef) | rhselts #default action ; rhselts: rhselts rhseltwithid { push(@{$_[1]},$_[2]); $_[1] } | rhseltwithid { [ $_[1] ] } ; rhseltwithid : rhselt '.' IDENT { push @{$_[1][1]}, $_[3][0]; $_[1] } | '$' rhselt { # check that is an identifier _SyntaxError(2,"\$ is allowed for identifiers only (Use dot notation instead)",$lineno[0]) if not_an_id($_[2][1][0]); push @{$_[2][1]}, $_[2][1][0]; $_[2] } | '$' error { _SyntaxError(2,"\$ is allowed for identifiers only",$lineno[0]) } | rhselt { $_[1]; } ; rhselt: symbol { [ 'SYMB', $_[1] ] } | code { [ 'CODE', $_[1] ] } | DPREC ident { my $cname = $_[2][0]; $conflict{$cname}{total}++; [ 'CONFLICTHANDLER', $_[2] ] } #************** research *****************# | VIEWPOINT { $conflict{$_[1][0]}{totalviewpoint}++; [ 'CONFLICTVIEWPOINT', $_[1] ] } #************** research *****************# | '(' optname rhs ')' { my ($name, $rhs) = @_[2, 3]; my $code = $defaultaction && [ @$defaultaction ]; $code =[ ' goto &Parse::Eyapp::Driver::YYActionforParenthesis', $lineno[0]] unless $metatree; defined($rhs) and $rhs->[-1][0] eq 'CODE' and $code = ${pop(@$rhs)}[1]; my %index = symbol_index($rhs); $code->[0] = prefixcode(%index).$code->[0] if ($code); insert_delaying_code(\$code) if $metatree; my $A = token('PAREN-'.++$labelno); _AddRules($A, [[@$rhs, undef, $name, $code]]); [ 'SYMB', $A] } | rhselt STAR { my ($what, $val) = @{$_[1]}; _SyntaxError(1, "Star(*) operator can't be applied to an action", $lineno[0]) if $what eq 'CODE'; my $A = token('STAR-'.++$labelno); my $code_rec = ' goto &Parse::Eyapp::Driver::YYActionforT_TX1X2 '; my $code_empty = ' goto &Parse::Eyapp::Driver::YYActionforT_empty '; my $rhss = [ rhs([ $A, $val], name => $_[2], code => $code_rec), rhs([], name => $_[2], code => $code_empty), ]; _AddRules($A, $rhss); [ 'SYMB', $A] } | rhselt '<' STAR symbol '>' { my ($what, $val) = @{$_[1]}; _SyntaxError(1, "Star(*) operator can't be applied to an action", $lineno[0]) if $what eq 'CODE'; my $B = token('STAR-'.++$labelno); my $code_rec = ' goto &Parse::Eyapp::Driver::YYActionforT_TX1X2 '; my $code_single = ' goto &Parse::Eyapp::Driver::YYActionforT_single '; my $rhss = [#rhs [token , [value, line]] ..., prec, name, code ] rhs([ $B, $_[4], $val], name => $_[3], code => $code_rec), rhs([ $val], name => $_[3], code => $code_single), ]; _AddRules($B, $rhss); my $A = token('STAR-'.++$labelno); my $code_empty = ' goto &Parse::Eyapp::Driver::YYActionforT_empty '; $code_single = ' { $_[1] } # optimize '."\n"; $rhss = [ rhs([ $B ], name => $_[3], code => $code_single ), rhs([], name => $_[3], code => $code_empty), ]; _AddRules($A, $rhss); [ 'SYMB', $A ] } | rhselt OPTION { my ($what, $val) = @{$_[1]}; _SyntaxError(1, "Question(?) operator can't be applied to an action", $lineno[0]) if $what eq 'CODE'; my $A = token('OPTIONAL-'.++$labelno); my $code_single = ' goto &Parse::Eyapp::Driver::YYActionforT_single '; my $code_empty = ' goto &Parse::Eyapp::Driver::YYActionforT_empty '; my $rhss = [ rhs([ $val], name => $_[2], code => $code_single), rhs([], name => $_[2], code => $code_empty), ]; _AddRules($A, $rhss); [ 'SYMB', $A] } | rhselt '<' PLUS symbol '>' { my ($what, $val) = @{$_[1]}; _SyntaxError(1, "Plus(+) operator can't be applied to an action", $lineno[0]) if $what eq 'CODE'; my $A = token('PLUS-'.++$labelno); my $code_rec = ' goto &Parse::Eyapp::Driver::YYActionforT_TX1X2 '; my $code_single = ' goto &Parse::Eyapp::Driver::YYActionforT_single '; my $rhss = [ rhs([$A, $_[4], $val], name => $_[3], code => $code_rec), rhs([$val], name => $_[3], code => $code_single), ]; _AddRules($A, $rhss); [ 'SYMB', $A] } | rhselt PLUS { my ($what, $val) = @{$_[1]}; _SyntaxError(1, "Plus(+) operator can't be applied to an action", $lineno[0]) if $what eq 'CODE'; my $A = token('PLUS-'.++$labelno); my $code_rec = ' goto &Parse::Eyapp::Driver::YYActionforT_TX1X2 '; my $code_single = ' goto &Parse::Eyapp::Driver::YYActionforT_single '; my $rhss = [ rhs([$A, $val], name => $_[2], code => $code_rec), rhs([$val], name => $_[2], code => $code_single) ]; _AddRules($A, $rhss); [ 'SYMB', $A] } ; optname: /* empty */ | NAME IDENT { # save bypass status $_[2][2] = $_[1][0]; $_[2] } | NAME IDENT LABEL { # LABELs are used for dynamic conflict resolution # save bypass status $_[2][2] = $_[1][0]; # 0: identifier 1: line number 2: bypass # concat the label to the name $_[2][0] .= "$_[3][0]"; $_[2] } | NAME LABEL { # LABELs are used for dynamic conflict resolution # save bypass status $_[2][2] = $_[1][0]; $_[2] } ; prec: PREC symbol { defined($$term{$_[2][0]}) or do { _SyntaxError(1,"No precedence for symbol $_[2][0]", $_[2][1]); return undef; }; ++$$precterm{$_[2][0]}; $$term{$_[2][0]}[1]; } ; epscode: { $defaultaction } | code { $_[1] } ; code: CODE { $_[1] } | BEGINCODE { _SyntaxError(2, "%begin code is allowed only when metatree is active\n", $lineno[0]) unless $metatree; my $code = $_[1]; push @$code, 'BEGINCODE'; return $code; } ; # Tail section: tail: /*empty*/ | TAILCODE { $tail=$_[1] } ; %% sub _Error { my($value)=$_[0]->YYCurval; my $token = $$value[0]; my($what)= $token ? "input: '$token'" : "symbol"; _SyntaxError(1,"Unexpected $what",$$value[1]); } sub slurp_perl_code { my($level,$from,$code); $from=pos($$input); $level=1; while($$input=~/([{}])/gc) { substr($$input,pos($$input)-1,1) eq '\\' #Quoted and next; $level += ($1 eq '{' ? 1 : -1) or last; } $level and _SyntaxError(2,"Unmatched { opened line $lineno[0]",-1); $code = substr($$input,$from,pos($$input)-$from-1); $lineno[1]+= $code=~tr/\n//; return [ $code, $lineno[0] ]; } my %headertoken = ( start => 'START', expect => 'EXPECT', token => 'TOKEN', strict => 'STRICT', type => 'TYPE', union => 'UNION', namingscheme => 'NAMINGSCHEME', metatree => 'METATREE', nocompact => 'NOCOMPACT', conflict => 'CONFLICT', whites => 'WHITES', ); # Used for <%name LIST_of_STH +>, <%name OPT_STH ?> my %listtoken = ( '*' => 'STAR', '+' => 'PLUS', '?' => 'OPTION', ); my $ID = qr{[A-Za-z_][A-Za-z0-9_]*}; my $LABEL = qr{:[A-Za-z0-9_]+}; my $STRING = qr { ' # opening apostrophe (?:[^'\\]| # an ordinary character \\\\| # escaped \ i.e. \\ \\'| # escaped apostrophe i.e. \' \\ # escape i.e. \ )*? # non greedy repetitions ' # closing apostrophe }x; # Head section: \n separates declarations my $HEADERWHITESPACES = qr{ (?: [\t\ ]+ # Any white space char but \n | \#[^\n]* # Perl like comments | /\*.*?\*/ # C like comments )+ }xs; # Head section: \n is not significant my $BODYWHITESPACES = qr{ (?: \s+ # Any white space char, including \n | \#[^\n]* # Perl like comments | /\*.*?\*/ # C like comments )+ }xs; my $REGEXP = qr{ / # opening slash (?:[^/\\]| # an ordinary character \\\\| # escaped \ i.e. \\ \\/| # escaped slash i.e. \/ \\ # escape i.e. \ )*? # non greedy repetitions / # closing slash }xs; sub _Lexer { #At EOF pos($$input) >= length($$input) and return('',[ undef, -1 ]); #In TAIL section $lexlevel > 1 and do { my($pos)=pos($$input); $lineno[0]=$lineno[1]; $lineno[1]=-1; pos($$input)=length($$input); return('TAILCODE',[ substr($$input,$pos), $lineno[0] ]); }; #Skip blanks $lexlevel == 0 ? $$input=~m{\G($HEADERWHITESPACES)}gc : $$input=~m{\G($BODYWHITESPACES)}gc and do { my($blanks)=$1; #Maybe At EOF pos($$input) >= length($$input) and return('',[ undef, -1 ]); $lineno[1]+= $blanks=~tr/\n//; }; $lineno[0]=$lineno[1]; $$input=~/\G($LABEL)/gc and return('LABEL',[ $1, $lineno[0] ]); $$input=~/\G($ID)/gc and return('IDENT',[ $1, $lineno[0] ]); $$input=~/\G($STRING)/gc and do { my $string = $1; # The string 'error' is reserved for the special token 'error' $string eq "'error'" and do { _SyntaxError(0,"Literal 'error' ". "will be treated as error token",$lineno[0]); return('IDENT',[ 'error', $lineno[0] ]); }; my $lines = $string =~ tr/\n//; _SyntaxError(2, "Constant string $string contains newlines",$lineno[0]) if $lines; $lineno[1] += $lines; $string = chr(0) if $string eq "''"; return('LITERAL',[ $string, $lineno[0] ]); }; # New section: body or tail $$input=~/\G(%%)/gc and do { ++$lexlevel; return($1, [ $1, $lineno[0] ]); }; $$input=~/\G\s*\{/gc and return ('CODE', &slurp_perl_code()); # } if($lexlevel == 0) {# In head section $$input=~/\G%(left|right|nonassoc)/gc and return('ASSOC',[ uc($1), $lineno[0] ]); $$input=~/\G%\{/gc and do { my($code); $$input=~/\G(.*?)%}/sgc or _SyntaxError(2,"Unmatched %{ opened line $lineno[0]",-1); $code=$1; $lineno[1]+= $code=~tr/\n//; return('HEADCODE',[ $code, $lineno[0] ]); }; $$input=~/\G%prefix\s+([A-Za-z_][A-Za-z0-9_:]*::)/gc and return('PREFIX',[ $1, $lineno[0] ]); $$input=~/\G%(tree((?:\s+(?:bypass|alias)){0,2}))/gc and do { my $treeoptions = defined($2)? $2 : ''; return('TREE',[ $treeoptions, $lineno[0] ]) }; $$input=~/\G%(?:(semantic|syntactic|dummy)(?:\s+token)?)\b/gc and return(uc($1),[ undef, $lineno[0] ]); $$input=~/\G%(?:(incremental)(?:\s+lexer)?)\b/gc and return(uc($1),[ undef, $lineno[0] ]); $$input=~/\G%(lexer|defaultaction|union)\b\s*/gc and return(uc($1),[ undef, $lineno[0] ]); $$input=~/\G([0-9]+)/gc and return('NUMBER',[ $1, $lineno[0] ]); $$input=~/\G%expect-rr/gc and return('EXPECTRR',[ undef, $lineno[0] ]); $$input=~/\G%(explorer)/gc and return('EXPLORER',[ undef, $lineno[0] ]); $$input=~/\G%($ID)/gc and return($headertoken{$1},[ undef, $lineno[0] ]); $$input=~/\G($REGEXP)/gc and return('REGEXP',[ $1, $lineno[0] ]); $$input=~/\G::/gc and return('::',[ undef, $lineno[0] ]); } else { # In rule section # like in <%name LIST_of_STH *> # like in <%name LIST_of_STH +> # like in <%name OPT_STH ?> # returns STAR or PLUS or OPTION $$input=~/\G(?:<\s*%name\s*($ID)\s*)?([*+?])\s*>/gc and return($listtoken{$2},[ $1, $lineno[0] ]); # like in %name LIST_of_STH * # like in %name LIST_of_STH + # like in %name OPT_STH ? # returns STAR or PLUS or OPTION $$input=~/\G(?:%name\s*($ID)\s*)?([*+?])/gc and return($listtoken{$2},[ $1, $lineno[0] ]); $$input=~/\G%no\s+bypass/gc and do { #my $bp = defined($1)?0:1; return('NAME',[ 0, $lineno[0] ]); }; $$input=~/\G%(prec)/gc and return('PREC',[ undef, $lineno[0] ]); $$input=~/\G%(PREC)/gc and return('DPREC',[ undef, $lineno[0] ]); $$input=~/\G%name/gc and do { # return current bypass status return('NAME',[ $bypass, $lineno[0] ]); }; # Now label is returned in the "common" area # $$input=~/\G($LABEL)/gc # and return('LABEL',[ $1, $lineno[0] ]); $$input=~/\G%begin\s*{/gc # } and return ('BEGINCODE', &slurp_perl_code()); #********** research *************# $$input=~/\G%([a-zA-Z_]\w*)\?/gc and return('VIEWPOINT',[ $1, $lineno[0] ]); } #Always return something $$input=~/\G(.)/sg or die "Parse::Eyapp::Grammar::Parse: Match (.) failed: report as a BUG"; my $char = $1; $char =~ s/\cM/\n/; # dos to unix $char eq "\n" and ++$lineno[1]; ( $char ,[ $char, $lineno[0] ]); } sub _SyntaxError { my($level,$message,$lineno)=@_; $message= "*". [ 'Warning', 'Error', 'Fatal' ]->[$level]. "* $message, at ". ($lineno < 0 ? "eof" : "line $lineno")." at file $filename\n"; $level > 1 and die $message; warn $message; $level > 0 and ++$nberr; $nberr == 20 and die "*Fatal* Too many errors detected.\n" } # _AddRules # There was a serious error I introduced between versions 171 and 172 (subversion # numbers). I delayed the instruction # my ($tmprule)=[ $lhs, [], splice(@$rhs,-3)]; # with catastrophic consequences for the resulting # LALR tables. # The splice of the ($precedence, $name, $code) # must be done before this line, if not the counts of nullables # will no work! # @$rhs # or do { # ++$$nullable{$lhs}; # ++$epsrules; # }; sub _AddRules { my($lhs,$lineno)=@{$_[0]}; my($rhss)=$_[1]; ref($$nterm{$lhs}) and do { _SyntaxError(1,"Non-terminal $lhs redefined: ". "Previously declared line $$syms{$lhs}",$lineno); return; }; ref($$term{$lhs}) and do { my($where) = exists($$token{$lhs}) ? $$token{$lhs} : $$syms{$lhs}; _SyntaxError(1,"Non-terminal $lhs previously ". "declared as token line $where",$lineno); return; }; ref($$nterm{$lhs}) #declared through %type or do { $$syms{$lhs}=$lineno; #Say it's declared here delete($$term{$lhs}); #No more a terminal }; $$nterm{$lhs}=[]; #It's a non-terminal now # Hal Finkel's patch: a non terminal is a semantic child $$semantic{$lhs} = 1; my($epsrules)=0; #To issue a warning if more than one epsilon rule for my $rhs (@$rhss) { # ($precedence, $name, $code) my ($tmprule)=[ $lhs, [], splice(@$rhs,-3)]; # Warning! the splice of the ($precedence, $name, $code) # must be done before this line, if not the counts of nullables # will no work! @$rhs or do { ++$$nullable{$lhs}; ++$epsrules; }; # Reserve position for current rule push(@$rules, undef); my $position = $#$rules; # Expand to auxiliary productions all the intermediate codes $tmprule->[1] = process_production($rhs); $$rules[$position] = $tmprule; push(@{$$nterm{$lhs}},$position); } $epsrules > 1 and _SyntaxError(0,"More than one empty rule for symbol $lhs",$lineno); } # This sub is called fro Parse::Eyapp::Grammar::new # 0 1 2 3 4 5 6 7 8 # Args: object, input, firstline, filename, tree, nocompact, lexerisdefined, acceptinputprefix, start # See the call to thsi sub 'Parse' inside sub new in module Grammar.pm sub Parse { my($self)=shift; @_ > 0 or croak("No input grammar\n"); my($parsed)={}; $input=\$_[0]; # we did a shift for $self, one less $lexlevel=0; my $firstline = $_[1]; $filename = $_[2] or croak "Unknown input file"; @lineno= $firstline? ($firstline, $firstline) : (1,1); $tree = $_[3]; if ($tree) { # factorize! $buildingtree = 1; $bypass = 0; $alias = 0; $defaultaction = [ ' goto &Parse::Eyapp::Driver::YYBuildAST ', 0]; $namingscheme = [ '\&give_rhs_name', 0]; } $nocompact = $_[4]; $nberr=0; $prec=0; $labelno=0; $head=[]; $tail=""; $syms={}; $token={}; $term={}; $termdef={}; $nterm={}; $rules=[ undef ]; #reserve slot 0 for start rule $precterm={}; $start=""; $start = $_[7] if ($_[7]); $nullable={}; $expect=0; $semantic = {}; $strict = 0; pos($$input)=0; $self->YYParse(yylex => \&_Lexer, yyerror => \&_Error); #??? $nberr and _SyntaxError(2,"Errors detected: No output",-1); @$parsed{ 'HEAD', 'TAIL', 'RULES', 'NTERM', 'TERM', 'NULL', 'PREC', 'SYMS', 'START', 'EXPECT', 'SEMANTIC', 'BYPASS', 'ACCESSORS', 'BUILDINGTREE', 'PREFIX', 'NAMINGSCHEME', 'NOCOMPACT', 'CONFLICTHANDLERS', 'TERMDEF', 'WHITES', 'LEXERISDEFINED', 'INCREMENTAL', 'STRICT', 'DUMMY', } = ( $head, $tail, $rules, $nterm, $term, $nullable, $precterm, $syms, $start, $expect, $semantic, $bypass, $accessors, $buildingtree, $prefix, $namingscheme, $nocompact, \%conflict, $termdef, $whites, $lexer, $incremental, $strict, $dummy, ); undef($input); undef($lexlevel); undef(@lineno); undef($nberr); undef($prec); undef($labelno); undef($incremental); undef($head); undef($tail); undef($syms); undef($token); undef($term); undef($termdef); undef($whites); undef($nterm); undef($rules); undef($precterm); undef($start); undef($nullable); undef($expect); undef($defaultaction); undef($semantic); undef($buildingtree); undef($strict); $parsed }