001 /* 002 // $Id: Grammar.java 3 2009-05-11 08:11:57Z jhyde $ 003 // Clapham generates railroad diagrams to represent computer language grammars. 004 // Copyright (C) 2008-2009 Julian Hyde 005 // Copyright (c) 2005 Stefan Schoergenhumer, Markus Dopler 006 // 007 // This program is free software; you can redistribute it and/or modify it 008 // under the terms of the GNU General Public License as published by the Free 009 // Software Foundation; either version 2 of the License, or (at your option) 010 // any later version approved by The Eigenbase Project. 011 // 012 // This program is distributed in the hope that it will be useful, 013 // but WITHOUT ANY WARRANTY; without even the implied warranty of 014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 // GNU General Public License for more details. 016 // 017 // You should have received a copy of the GNU General Public License 018 // along with this program; if not, write to the Free Software 019 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 020 */ 021 package net.hydromatic.clapham.graph; 022 023 import java.util.*; 024 import java.util.List; 025 import java.io.PrintStream; 026 027 /** 028 * TODO: 029 * 030 * @author jhyde 031 * @version $Id: Grammar.java 3 2009-05-11 08:11:57Z jhyde $ 032 * @since Aug 26, 2008 033 */ 034 public class Grammar { 035 public final Map<String,Symbol> symbolMap = new HashMap<String, Symbol>(); 036 public final Map<Symbol, Graph> ruleMap = new HashMap<Symbol, Graph>(); 037 final List<Node> nodes = new ArrayList<Node>(); 038 039 public static boolean TRACE = false; 040 041 // enable optimizations? 042 private boolean optimizeGraph = true; 043 044 public final List<Symbol> terminals = new ArrayList<Symbol>(); 045 public final List<Symbol> nonterminals = new ArrayList<Symbol>(); 046 047 private static int ptr(Node p, boolean up) { 048 if (p == null) { 049 return 0; 050 } else if (up) { 051 return -p.n; 052 } else { 053 return p.n; 054 } 055 } 056 057 public void setOptimizeGraph(boolean value) { 058 this.optimizeGraph = value; 059 } 060 061 public boolean setOptimizeGraph() { 062 return optimizeGraph; 063 } 064 065 boolean compare(Node n1, Node n2) { 066 if (n1.typ == n2.typ) { 067 if (n1.typ == NodeType.NONTERM || n1.typ == NodeType.TERM) { 068 if (!n1.sym.name.equals(n2.sym.name)) { 069 return false; 070 } 071 } 072 return true; 073 } 074 return false; 075 } 076 077 boolean deepCompare(Node n1, Node n2, boolean untilIter) { 078 boolean samelevel = true; 079 Node identifier = n2; // helps to identify the relevant iter node 080 while (n1 != null && samelevel) { 081 //just compare nodes until the iter node 082 if (untilIter) { 083 if (n1.typ == NodeType.ITER && n1.sub == identifier) { 084 if (n1 == n2) { //last iter node's next points to the iter 085 if (TRACE) { 086 System.out.println( 087 "true: iter node reached, graphs match"); 088 } 089 return true; 090 } else { 091 if (TRACE) { 092 System.out.println( 093 "false: iter node reached, graphs do not match"); 094 } 095 return false; 096 } 097 } 098 } 099 if (n2 == null) { 100 if (TRACE) { 101 System.out.println( 102 "false: second enclosing substructure ended before first"); 103 } 104 return false; 105 } 106 if (!compare(n1, n2)) { 107 if (TRACE) { 108 System.out.println("false: node not same type/content"); 109 } 110 return false; 111 } 112 //--> t,nt,eps is ok, go to next 113 114 if (n1.typ == NodeType.OPT 115 || n1.typ == NodeType.ITER 116 || n1.typ == NodeType.RERUN) { 117 if (!deepCompare(n1.sub, n2.sub, false)) { 118 if (TRACE) { 119 System.out 120 .println( 121 "false: false in subelem of iter,opt or rerun"); 122 } 123 return false; 124 } 125 if (n1.typ == NodeType.RERUN 126 && !deepCompare(n1.itergraph, n2.itergraph, false)) { 127 if (TRACE) { 128 System.out.println( 129 "false: itergraph of rerun doesn't match"); 130 } 131 return false; 132 } 133 } else if (n1.typ == NodeType.ALT) { 134 Node a1 = n1; 135 Node a2 = n2; 136 while (a1 != null) { 137 if (a2 == null) { 138 if (TRACE) { 139 System.out 140 .println( 141 "false: false in subalt, second node null"); 142 } 143 return false; 144 } 145 146 if (!deepCompare(a1.sub, a2.sub, false)) { 147 if (TRACE) { 148 System.out 149 .println("false: false in subelem of subalt"); 150 } 151 return false; 152 } 153 a1 = a1.down; 154 a2 = a2.down; 155 } 156 if (a2 != null) { 157 if (TRACE) { 158 System.out 159 .println("false: second alt has more alternatives"); 160 } 161 return false; 162 } 163 } 164 if (n1.up) { 165 if (!n2.up) { 166 if (TRACE) { 167 System.out.println( 168 "false: second has not finished enclosing structure"); 169 } 170 return false; 171 } 172 samelevel = false; 173 } 174 n1 = n1.next; 175 n2 = n2.next; 176 } 177 if (n1 == null && n2 != null) { 178 if (TRACE) { 179 System.out.println( 180 "false: first enclosing substructure ended before second"); 181 } 182 return false; 183 } 184 return true; 185 } 186 187 /** calls all methods which optimize the graphs */ 188 public void optimize() { 189 for (Symbol s : nonterminals) { 190 removeWrongLinebreaks(s.graph.l, null, s); 191 if (optimizeGraph) { 192 //remove redundant iter/opts 193 removeRedundancy(s.graph.l, null, s); 194 } 195 if (optimizeGraph) { 196 //remove eps nodes and redundant eps nodes in alternatives 197 removeEps(s.graph.l, null, s); 198 } 199 if (optimizeGraph) { 200 optimizeIter(s.graph.l, null, s); 201 } 202 } 203 } 204 205 /** 206 * Removes all unnecessary and wrong linebreaks (wrap-nodes) from the 207 * graph. 208 */ 209 void removeWrongLinebreaks(Node n, Node parent, Symbol s) { 210 boolean samelevel = true; 211 Node i = n; 212 while (i != null && samelevel) { 213 if (i.typ == NodeType.WRAP) { 214 //if in outer structure, just remove multiple wraps 215 if (parent == null) { 216 while (i.next != null && i.next.typ == NodeType.WRAP) { 217 i.next = i.next.next; 218 } 219 } //if in inner structure remove it 220 else { 221 //if \n is first element of substructure 222 if (n == i) { 223 //parent==null doesn't occur 224 225 //if \n is the only subelement 226 if (i.up || i.next == null) { 227 Node eps = new Node(this, NodeType.EPS, null); 228 parent.sub = eps; 229 eps.up = i.up; 230 eps.next = i.next; 231 n = eps; 232 } else { 233 parent.sub = i.next; 234 n = parent.sub; 235 } 236 } else { //if within substructure 237 Node j = n; 238 while (j.next != i) { 239 j = j.next; 240 } 241 j.next = i.next; 242 j.up = i.up; 243 } 244 } 245 } else if (i.typ == NodeType.OPT 246 || i.typ == NodeType.ITER 247 || i.typ == NodeType 248 .RERUN) { 249 removeWrongLinebreaks(i.sub, i, s); 250 } else if (i.typ == NodeType.ALT) { 251 Node a = i; 252 while (a != null) { 253 removeWrongLinebreaks(a.sub, a, s); 254 a = a.down; 255 } 256 } 257 258 if (i.up) { 259 samelevel = false; 260 } 261 i = i.next; 262 } 263 } 264 265 void removeRedundancy(Node n, Node parent, Symbol s) { 266 boolean samelevel = true; //next node in same level? 267 Node begin = n; 268 while (n != null && samelevel) { 269 if (n.typ == NodeType.ALT) { 270 Node a = n; 271 while (a != null) { 272 removeRedundancy(a.sub, a, s); 273 a = a.down; 274 } 275 } else if (n.typ == NodeType.ITER) { 276 while ((n.sub.typ == NodeType.ITER 277 || n.sub.typ == NodeType.OPT) && n.sub.up) { 278 //EbnfForm.WriteLine("Rendundant "+Node.nTyp[n.sub.typ]+" Node removed (iter)."); 279 n.sub = n.sub.sub; 280 Node i = n.sub; 281 while (!i.up) { 282 i = i.next; 283 } 284 i.next = n; 285 } 286 removeRedundancy(n.sub, n, s); 287 } else if (n.typ == NodeType.OPT) { 288 boolean containsIter = false; 289 while ((n.sub.typ == NodeType.OPT 290 && (n.sub.up || n.sub.next == null)) 291 || (n.sub.typ == NodeType.ITER 292 && (n.sub.up || n.sub.next == null))) { 293 //if(n.sub.typ==Node.opt || containsIter) EbnfForm.WriteLine("Rendundant "+Node.nTyp[n.sub.typ]+" Node removed (opt)."); 294 if (n.sub.typ == NodeType.ITER) { 295 containsIter = true; 296 } 297 n.sub = n.sub.sub; 298 } 299 if (containsIter) { 300 Node iter = new Node(this, NodeType.ITER, n.sub); 301 iter.next = n.next; 302 if (n == begin) { 303 if (parent == null) { 304 s.graph.l = iter; 305 } else { 306 parent.sub = iter; 307 } 308 } else { 309 Node j = begin; 310 while (j.next != n) { 311 j = j.next; 312 } 313 j.next = iter; 314 } 315 n = iter; 316 317 //set correct next pointer of last subelement of new iter 318 Node i = iter.sub; 319 while (i.next != null && !i.up) { 320 i = i.next; 321 } 322 i.next = iter; 323 } 324 removeRedundancy(n.sub, n, s); 325 } 326 if (n.up) { 327 samelevel = false; 328 } 329 n = n.next; 330 } 331 } 332 333 /** 334 * Removes all empty ('epsilon') iter/opt nodes in alternatives, as well as 335 * multiple epsilon nodes at the beginning. 336 */ 337 void removeEps(Node n, Node parent, Symbol s) { 338 boolean samelevel = true; //next node in same level? 339 Node begin = n; 340 while (n != null && samelevel) { 341 342 if (n.typ == NodeType.EPS) { 343 if (n == begin) { 344 if (parent == null) { 345 //if the graph only consists of an eps, let it live 346 if (n.next != null) { 347 s.graph.l = n.next; 348 begin = n.next; 349 } 350 } //else: at beginning of substructure not required (iter/opt/alt subnodes were already handled) 351 } else { 352 Node i = begin; 353 while (i.next != n) { 354 i = i.next; 355 } 356 i.next = n.next; 357 i.up = n.up; 358 } 359 } else if (n.typ == NodeType.ITER || n.typ == NodeType.OPT) { 360 if (n.sub.typ == NodeType.EPS 361 && (n.sub.next == null 362 || n.sub.up)) { 363 if (n == begin) { 364 if (parent == null) { //beginning of graph 365 //if graph only consists of this iter/opt, then replace it with an eps node 366 if (n.next == null) { 367 Node eps = new Node(this, NodeType.EPS, null); 368 s.graph.l = eps; 369 s.graph.r = eps; 370 } else { //remove that node 371 s.graph.l = n.next; 372 begin = n.next; 373 } 374 } //else: at beginning of substructure not required (iter/opt/alt subnodes were already handled) 375 } else { //within substructure 376 Node i = begin; 377 while (i.next != n) { 378 i = i.next; 379 } 380 if (n.up) { 381 i.up = true; 382 } 383 i.next = n.next; 384 } 385 } else { 386 removeEps(n.sub, n, s); 387 } 388 } else if (n.typ == NodeType.ALT) { 389 Node a = n; 390 //count number of eps 391 int numOfEps = 0; 392 while (a != null) { 393 //checkSubAlts(a); 394 if (a.sub.typ == NodeType.EPS 395 && (a.sub.next == null 396 || a.sub.up)) { 397 numOfEps++; 398 } 399 a = a.down; 400 } 401 a = n; 402 while (numOfEps > 1) { 403 if (n != a && a.sub.typ == NodeType.EPS && (a.sub 404 .next 405 == null || a.sub.up)) { 406 Node i = n; 407 while (i.down != a) { 408 i = i.down; 409 } 410 i.down = a.down; 411 numOfEps--; 412 } 413 a = a.down; 414 } 415 removeSameAlts(n); 416 putEpsAtBeginningOfAlt(begin, n, parent, s); 417 //optimize subcomponents 418 a = n; 419 while (a != null) { 420 //if not the left eps node 421 if (!(a.sub.typ == NodeType.EPS 422 && (a.sub.next == null 423 || a.sub.up))) { 424 removeEps(a.sub, a, s); 425 } 426 a = a.down; 427 } 428 } 429 if (n.up) { 430 samelevel = false; 431 } 432 n = n.next; 433 } 434 } 435 436 //they would bug a condition in removeEps 437 public void checkSubAlts(Node alt) { 438 439 //remove all empty iter/opts 440 //make sure, that at least one eps Node will exist 441 Node eps = new Node(this, NodeType.EPS, null); 442 eps.next = alt.sub; 443 alt.sub = eps; 444 Node i = alt.sub; 445 boolean samelevel = true; 446 while (i != null && samelevel) { 447 //if empty iter/opt 448 if ((i.typ == NodeType.ITER 449 || i.typ == NodeType.OPT) 450 && i.sub.typ == NodeType.EPS 451 && (i.sub.next == null 452 || i.sub.up)) { 453 //case i==alt.sub not possible 454 Node a = alt.sub; 455 while (a.next != i) { 456 a = a.next; 457 } 458 a.next = i.next; 459 } 460 if (i.up) { 461 samelevel = false; 462 } 463 i = i.next; 464 } 465 466 i = alt.sub; 467 //remove multiple eps nodes at the beginning 468 if (i.typ == NodeType.EPS) { 469 while (i.next != null 470 && !i.next.up 471 && i.next.typ == NodeType.EPS) { 472 i.next = i.next.next; 473 } 474 } 475 } 476 477 void removeSameAlts(Node alt) { 478 Node a = alt; 479 while (a != null) { 480 Node i = a.down; 481 while (i != null) { 482 if (deepCompare(a.sub, i.sub, false)) { 483 Node n = a; 484 while (n.down != i) { 485 n = n.down; 486 } 487 n.down = i.down; 488 } 489 i = i.down; 490 } 491 a = a.down; 492 } 493 } 494 495 void putEpsAtBeginningOfAlt(Node n, Node alt, Node parent, Symbol s) { 496 Node a = alt; 497 boolean containsEps = false; 498 499 //determine if eps is contained 500 while (a != null) { 501 //if eps node 502 if (a.sub.typ == NodeType.EPS && 503 (a.sub.next == null 504 || a.sub.up)) { 505 containsEps = true; 506 } 507 a = a.down; 508 } 509 if (containsEps) { 510 //remove eps node 511 a = alt; 512 while (a != null) { 513 //if eps node 514 if (a.sub.typ == NodeType.EPS && (a.sub.next == null 515 || a.sub 516 .up)) { 517 //remove eps only if within alternatives 518 if (a != alt) { 519 Node i = alt; 520 while (i.down != a) { 521 i = i.down; 522 } 523 i.down = a.down; 524 } 525 break; //there can be only one eps in the alts because same nodes have already been removed 526 } 527 a = a.down; 528 } 529 //insert eps, if first alt isn't eps 530 531 if (!(alt.sub.typ == NodeType.EPS 532 && (alt.sub.next == null || alt.sub.up))) { 533 Node eps = new Node(this, NodeType.EPS, null); 534 eps.next = alt.next; 535 eps.up = true; 536 AltNode a1 = new AltNode(this, eps); 537 a1.down = alt; 538 if (alt == n) { 539 if (parent == null) { 540 s.graph.l = a1; 541 } else { 542 parent.sub = a1; 543 } 544 } else { 545 Node i = n; 546 while (i.next != alt) { 547 i = i.next; 548 } 549 i.next = a1; 550 } 551 a1.next = alt.next; 552 a1.up = alt.up; 553 alt.next = null; 554 } 555 } 556 } 557 558 //optimizes enclosing structures and recursively its substructures 559 void optimizeIter(Node n, Node parent, Symbol s) { 560 boolean samelevel = true; //next node in same level? 561 Node i = n; 562 563 while (i != null && samelevel) { 564 if (i.typ == NodeType.OPT) { 565 optimizeIter(i.sub, i, s); 566 } else if (i.typ == NodeType.ALT) { 567 Node a = i; 568 while (a != null) { 569 optimizeIter(a.sub, a, s); 570 a = a.down; 571 } 572 } else if (i.typ == NodeType.ITER) { 573 //first optimize the iter substructure 574 optimizeIter(i.sub, i, s); 575 576 //while loop to deepCompare from every node until the iter node 577 Node j = n; 578 boolean matchFound = false; 579 while (j != i && !matchFound) { 580 Node k = i.sub; 581 boolean samelevel2 = true; 582 while (k != null && samelevel2 && !matchFound) { 583 if (deepCompare(j, k, true)) { 584 //EbnfForm.WriteLine("Iter node optimized."); 585 matchFound = true; 586 //replace the iter node and the nodes before by the rerun node 587 Node re = new Node(this, NodeType.RERUN, k); 588 if (j == n) { 589 590 if (parent == null) { 591 s.graph.l = re; 592 n = re; 593 } else { 594 parent.sub = re; 595 n = re; 596 } 597 } else { 598 Node l = n; 599 while (l.next != j) { 600 l = l.next; 601 } 602 l.next = re; 603 } 604 605 //if a {b a} isolate b 606 if (k != i.sub) { 607 re.itergraph = i.sub; 608 Node temp = re.itergraph; 609 while (temp.next != k) { 610 temp = temp.next; 611 } 612 temp.next = null; 613 } 614 615 re.next = i.next; 616 re.up = i.up; 617 i = re; 618 } 619 if (k.up) { 620 samelevel2 = false; 621 } 622 k = k.next; 623 } 624 625 j = j.next; 626 } 627 } 628 if (i.up) { 629 samelevel = false; 630 } 631 i = i.next; 632 } 633 } 634 635 public void makeEpsilon(Graph g) { 636 g.l = new Node(this, NodeType.EPS, null); 637 g.r = g.l; 638 } 639 640 public void makeFirstAlt(Graph g) { 641 g.l = new AltNode(this, g.l); 642 // g.l.next = g.r; 643 g.r = g.l; 644 } 645 646 public void makeAlternative(Graph g1, Graph g2) { 647 g2.l = new AltNode(this, g2.l); 648 Node p = g1.l; 649 while (p.down != null) { 650 p = p.down; 651 } 652 p.down = g2.l; 653 p = g1.r; 654 while (p.next != null) { 655 p = p.next; 656 } 657 // p.next = g2.r; 658 } 659 660 public void makeSequence(Graph g1, Graph g2) { 661 if (g1.l == null && g1.r == null) {/*case: g1 is empty */ 662 g1.l = g2.l; 663 g1.r = g2.r; 664 } else { 665 Node p = g1.r.next; 666 g1.r.next = g2.l; // link head node 667 while (p != null) { // link substructure 668 Node q = p.next; 669 p.next = g2.l; 670 p.up = true; 671 p = q; 672 } 673 g1.r = g2.r; 674 } 675 } 676 677 public void makeIteration(Graph g) { 678 g.l = new Node(this, NodeType.ITER, g.l); 679 Node p = g.r; 680 g.r = g.l; 681 while (p != null) { 682 Node q = p.next; 683 p.next = g.l; 684 p.up = true; 685 p = q; 686 } 687 } 688 689 public void makeOption(Graph g) { 690 g.l = new Node(this, NodeType.OPT, g.l); 691 g.l.next = g.r; 692 g.r = g.l; 693 } 694 695 public enum Direction { 696 LEFT, 697 RIGHT, 698 UP, 699 DOWN 700 } 701 702 /** 703 * Finds a terminal or non-terminal with a given name. 704 * 705 * @param name Name of symbol 706 * @return terminal or non-terminal 707 */ 708 public Node find(String name) { 709 for (Node n : nodes) { 710 if (n.sym != null && n.sym.name.equals(name)) { 711 return n; 712 } 713 } 714 return null; 715 } 716 717 /** 718 * Converts the terminal with a given name to a non-terminal. 719 * 720 * @param name Name of non-terminal. 721 */ 722 public void terminalToNt(String name) { 723 for (Node n : nodes) { 724 if (n.sym != null && n.sym.name.equals(name)) { 725 n.typ = NodeType.NONTERM; 726 } 727 } 728 for (Symbol s : terminals) { 729 if (s.name.equals(name)) { 730 nonterminals.add(s); 731 terminals.remove(s); 732 break; 733 } 734 } 735 } 736 737 public void printNodes(PrintStream out) { 738 out.println("Graph nodes:"); 739 out.println("(S...Starting nodes)"); 740 out.println("--------------------------------------------"); 741 out.println("S n type name next down sub "); 742 out.println("--------------------------------------------"); 743 744 for (Node p : nodes) { 745 boolean nt = false; 746 for (Symbol s : nonterminals) { 747 if (s.graph.l.n == p.n) { 748 out.print("*"); 749 nt = true; 750 } 751 } 752 if (!nt) { 753 out.print(" "); 754 } 755 756 out.format("%1$-4s %2$-4s ", p.n, p.typ.name()); 757 758 if (p.sym != null) { 759 out.format("%1$-12s ", p.sym.name); 760 } else { 761 out.print(" "); 762 } 763 764 out.format("%1$5s ", Grammar.ptr(p.next, p.up)); 765 766 switch (p.typ) { 767 case ALT: 768 case ITER: 769 case OPT: 770 case RERUN: 771 out.format( 772 "%1$5d %2$5d", 773 Grammar.ptr(p.down, false), 774 Grammar.ptr(p.sub, false)); 775 break; 776 case EPS: 777 out.print(" "); 778 break; 779 } 780 out.println(); 781 } 782 out.println(); 783 784 for (Symbol symbol : symbolMap.values()) { 785 final StringBuffer buf = new StringBuffer(); 786 out.println(symbol.name + " ::="); 787 symbol.graph.l.unparse(buf); 788 out.println(" " + buf); 789 } 790 out.println(); 791 } 792 793 // starts to draw the rule at the given symbol s 794 795 /* 796 * compare two graphs on the basis of structure and value 797 * if untilIter is set, n1 and n2 are treated in a different way: 798 * the graph n1 to the iter node is compared to the iter subnode 799 * params: n1 must be the node before iter if untilIter==true 800 * params: n2 must be the first subnode of iter if untilIter==true 801 */ 802 } 803 804 // End Grammar.java