LICENSE, MINES
[Faustine.git] / interpreter / preprocessor / faust-0.9.47mr3 / compiler / patternmatcher / patternmatcher.cpp
1
2 /* compiler/patternmatcher/patternmatcher.cpp: implementation of the Faust
3 rewriting engine */
4
5 #include "tlib.hh"
6 #include "list.hh"
7 #include "boxes.hh"
8 #include "ppbox.hh"
9 #include "eval.hh"
10 #include "patternmatcher.hh"
11
12 using namespace std;
13 #include <vector>
14 #include <list>
15 #include <set>
16 #include <utility>
17
18 /* Uncomment for debugging output. */
19 //#define DEBUG
20
21 /* Additional Tree deconstruction operations. */
22
23 /* Check for cons (nonempty list) nodes. */
24
25 static inline bool isCons(Tree x, Tree& h, Tree& t)
26 {
27 if (isList(x)) {
28 h = hd(x); t = tl(x);
29 return true;
30 } else
31 return false;
32 }
33
34 /* Deconstruct a (BDA) op pattern (YO). */
35
36 static inline bool isBoxPatternOp(Tree box, Node& n, Tree& t1, Tree& t2)
37 {
38 if ( isBoxPar(box, t1, t2) ||
39 isBoxSeq(box, t1, t2) ||
40 isBoxSplit(box, t1, t2) ||
41 isBoxMerge(box, t1, t2) ||
42 isBoxHGroup(box, t1, t2) ||
43 isBoxVGroup(box, t1, t2) ||
44 isBoxTGroup(box, t1, t2) ||
45 isBoxRec(box, t1, t2) )
46 {
47 n = box->node();
48 return true;
49 } else {
50 return false;
51 }
52 }
53
54 /* TA data structures. */
55
56 /* subterm paths */
57
58 typedef vector<int> Path;
59
60 /* Subterm at given path in given term tree. */
61
62 static Tree subtree(Tree X, int i, const Path& p)
63 {
64 int n = p.size();
65 Node op(0);
66 Tree x0, x1;
67 if (i < n && isBoxPatternOp(X, op, x0, x1))
68 return subtree((p[i]==0)?x0:x1, i+1, p);
69 else
70 return X;
71 }
72
73 /* rule markers */
74
75 struct Rule {
76 int r; // rule number
77 Tree id; // matched variable (NULL if none)
78 Path p; // subterm path indicating where variable value is found
79
80 Rule(int _r, Tree _id) : r(_r), id(_id), p(Path()) {}
81 Rule(int _r, Tree _id, const Path& _p) : r(_r), id(_id), p(_p) {}
82 Rule(const Rule& rule) : r(rule.r), id(rule.id), p(rule.p) {}
83
84 Rule& operator = (const Rule& rule)
85 { r = rule.r; id = rule.id; p = rule.p; return *this; }
86
87 bool operator == (const Rule& rule) const
88 { return r == rule.r; }
89 bool operator < (const Rule& rule) const
90 { return r < rule.r; }
91
92 #ifdef DEBUG
93 ostream& print(ostream& fout) const;
94 #endif
95 };
96
97 struct State;
98
99 /* transitions */
100
101 struct Trans {
102 Tree x; // symbol or constant (NULL for variable)
103 Node n; // operator symbol (if arity>0)
104 int arity; // symbol arity
105 State *state; // successor state
106
107 Trans(Tree _x);
108 Trans(const Node& _n, int _arity);
109 Trans(const Trans& trans);
110 ~Trans();
111
112 Trans& operator = (const Trans& trans);
113
114 bool is_var_trans() const { return arity == 0 && x == NULL; }
115 bool is_cst_trans(Tree &_x) const { _x = x; return arity == 0 && x != NULL; }
116 bool is_op_trans(Node &_n) const { _n = n; return arity > 0; }
117
118 bool operator == (const Trans& trans) const
119 { return arity == trans.arity && x == trans.x && n == trans.n; }
120 bool operator < (const Trans& trans) const
121 { return (arity < trans.arity) ? 1 :
122 (arity > trans.arity) ? 0 :
123 (arity == 0) ? (x < trans.x) :
124 (n.getSym() < trans.n.getSym()); }
125
126 #ifdef DEBUG
127 ostream& print(ostream& fout) const;
128 #endif
129 };
130
131 /* states */
132
133 struct State {
134 int s; // state number
135 bool match_num; // whether state has a transition on a numeric constant
136 list<Rule> rules; // rule markers
137 list<Trans> trans; // transitions (1st transition is on variable if available)
138 State() :
139 s(0), match_num(false), rules(list<Rule>()), trans(list<Trans>()) {}
140 State(const State& state) :
141 s(state.s), match_num(state.match_num),
142 rules(state.rules), trans(state.trans) {}
143
144 State& operator = (const State& state)
145 { s = state.s; match_num = state.match_num;
146 rules = state.rules; trans = state.trans;
147 return *this;
148 }
149
150 #ifdef DEBUG
151 ostream& print(ostream& fout) const;
152 #endif
153 };
154
155 // these need to come here so that the storage size of struct State is known
156
157 Trans::Trans(Tree _x) :
158 x(_x), n(0), arity(0), state(new State)
159 {
160 }
161
162 Trans::Trans(const Node& _n, int _arity) :
163 x(NULL), n(_n), arity(_arity), state(new State)
164 {
165 }
166
167 Trans::Trans(const Trans& trans) :
168 x(trans.x), n(trans.n), arity(trans.arity)
169 {
170 state = new State(*trans.state);
171 }
172
173 Trans::~Trans()
174 {
175 delete state;
176 }
177
178 Trans& Trans::operator = (const Trans& trans)
179 {
180 x = trans.x; n = trans.n; arity = trans.arity;
181 state = new State(*trans.state);
182 return *this;
183 }
184
185 /* the automaton */
186
187 struct Automaton {
188 vector<State*> state;
189 vector<Tree> rhs;
190
191 Automaton() : state(vector<State*>()), rhs(vector<Tree>()), s(0) {}
192
193 // number of rules
194 int n_rules() { return rhs.size(); }
195 // markers of rules still active in state s
196 const list<Rule>& rules(int s) { return state[s]->rules; }
197 // transitions in state s
198 const list<Trans>& trans(int s) { return state[s]->trans; }
199 // is s a final state?
200 bool final(int s) { return trans(s).empty(); }
201
202 // assign state numbers and build the state table
203 int s;
204 void build(State *st);
205
206 #ifdef DEBUG
207 ostream& print(ostream& fout) const;
208 #endif
209 };
210
211 void Automaton::build(State *st)
212 {
213 state.push_back(st);
214 st->s = s++;
215 list<Trans>::const_iterator t;
216 for (t = st->trans.begin(); t != st->trans.end(); t++) {
217 Tree x;
218 double f;
219 int i;
220 if (t->is_cst_trans(x) &&
221 (isBoxInt(x, &i) || isBoxReal(x, &f)))
222 st->match_num = true;
223 build(t->state);
224 }
225 }
226
227 /* Debugging output. */
228
229 #ifdef DEBUG
230 inline ostream& operator << (ostream& s, const Rule& x)
231 { return x.print(s); }
232 inline ostream& operator << (ostream& s, const Trans& x)
233 { return x.print(s); }
234 inline ostream& operator << (ostream& s, const State& x)
235 { return x.print(s); }
236 inline ostream& operator << (ostream& s, const Automaton& x)
237 { return x.print(s); }
238
239 ostream& Rule::print(ostream& fout) const
240 {
241 if (id != NULL)
242 fout << "#" << r << "(" << *id << ")";
243 else
244 fout << "#" << r;
245 return fout;
246 }
247
248 ostream& Trans::print(ostream& fout) const
249 {
250 if (arity > 0)
251 fout << "\top " << n << ": state " << state->s << endl;
252 else if (x == NULL)
253 fout << "\tvar _: state " << state->s << endl;
254 else
255 fout << "\tcst " << *x << ": state " << state->s << endl;
256 return fout;
257 }
258
259 ostream& State::print(ostream& fout) const
260 {
261 fout << "state " << s << ":";
262 list<Rule>::const_iterator r;
263 for (r = rules.begin(); r != rules.end(); r++)
264 fout << " " << *r;
265 fout << endl;
266 list<Trans>::const_iterator t;
267 for (t = trans.begin(); t != trans.end(); t++)
268 fout << *t;
269 return fout;
270 }
271
272 ostream& Automaton::print(ostream& fout) const
273 {
274 int i, n = rhs.size();
275 for (i = 0; i < n; i++)
276 fout << "rule #" << i << ": " << *rhs[i] << endl;
277 n = state.size();
278 for (i = 0; i < n; i++)
279 fout << *state[i];
280 return fout;
281 }
282 #endif
283
284 /* Construction algorithm for the pattern matching automaton.
285 *
286 * We employ the incremental technique described in Graef: Left-To-Right Tree
287 * Pattern Matching, Proc. RTA 1991, Springer 1991 (LNCS 488) to construct a
288 * tree automaton (TA) for the given patterns. The basic outline of the
289 * technique is as follows. Initially, the automaton is empty. From each
290 * pattern we produce a trie (considering the pattern as a string of variable
291 * and function symbols and constants). This trie is then merged with the TA
292 * obtained so far. The latter process is similar to merging two deterministic
293 * finite automata, but it also takes into account the variables (see the
294 * merge_state() routine below).
295 */
296
297 /* Construct a trie from a term tree. Takes the "start" and returns the "end"
298 state of the (sub-)trie. */
299
300 static State *make_state(State *state, int r, Tree x, Path& p)
301 {
302 Tree id, x0, x1;
303 Node op(0);
304 if (isBoxPatternVar(x, id)) {
305 /* variable */
306 Rule rule(r, id, p);
307 state->rules.push_back(rule);
308 Trans trans(NULL);
309 state->trans.push_back(trans);
310 return state->trans.begin()->state;
311 } else if (isBoxPatternOp(x, op, x0, x1)) {
312 /* composite pattern */
313 Rule rule(r, NULL);
314 state->rules.push_back(rule);
315 Trans trans(op, 2);
316 state->trans.push_back(trans);
317 State *next = state->trans.begin()->state;
318 p.push_back(0);
319 next = make_state(next, r, x0, p);
320 p.pop_back();
321 p.push_back(1);
322 next = make_state(next, r, x1, p);
323 p.pop_back();
324 return next;
325 } else {
326 /* constant */
327 Rule rule(r, NULL);
328 state->rules.push_back(rule);
329 Trans trans(x);
330 state->trans.push_back(trans);
331 return state->trans.begin()->state;
332 }
333 }
334
335 /* Take a copy of a state and prefix it with n variable transitions. */
336
337 static State *make_var_state(int n, State *state)
338 {
339 if (n <= 0)
340 return new State(*state);
341 list<Rule>rules = state->rules;
342 list<Rule>::iterator r;
343 for (r = rules.begin(); r != rules.end(); r++) {
344 r->id = NULL; r->p = Path();
345 }
346 State *prefix = new State, *current = prefix;
347 while (n-- > 0) {
348 current->rules = rules;
349 Trans trans(NULL);
350 current->trans.push_back(trans);
351 current = current->trans.begin()->state;
352 }
353 *current = *state;
354 return prefix;
355 }
356
357 /* Merge two tree automata. Merges the tree automaton rooted at state2 into
358 the automaton rooted at state1. We assume that state2 is in "trie" form,
359 i.e., each state has at most one transition, which is always guaranteed
360 here and simplifies the algorithm. */
361
362 static void merge_state(State *state1, State *state2);
363
364 static void inline merge_rules(list<Rule>& rules1, list<Rule>& rules2)
365 {
366 list<Rule> cprules2 = rules2;
367 rules1.merge(cprules2);
368 }
369
370 static void merge_trans_var(list<Trans>& trans, State *state)
371 {
372 if (!trans.begin()->is_var_trans()) {
373 /* If we don't have a variable transition in this state yet then create a
374 new one. */
375 Trans tr(NULL);
376 trans.push_front(tr);
377 }
378 list<Trans>::const_iterator t;
379 Tree x;
380 Node op(0);
381 for (t = trans.begin(); t != trans.end(); t++) {
382 if (t->is_var_trans())
383 merge_state(t->state, state);
384 else if (t->is_cst_trans(x)) {
385 /* add the completion of the given state for a constant */
386 merge_state(t->state, state);
387 } else if (t->is_op_trans(op)) {
388 /* add the completion of the given state for an arity>0 op */
389 State *state1 = make_var_state(t->arity, state);
390 merge_state(t->state, state1);
391 delete state1;
392 }
393 }
394 }
395
396 static void merge_trans_cst(list<Trans>& trans, Tree x, State *state)
397 {
398 list<Trans>::iterator t0 = trans.begin(), t1 = t0, t;
399 Tree x1;
400 if (t0->is_var_trans()) t1++;
401 for (t = t1; t != trans.end(); t++) {
402 if (t->is_cst_trans(x1)) {
403 if (x == x1) {
404 merge_state(t->state, state);
405 return;
406 } else if (x < x1)
407 break;
408 }
409 }
410 /* no matching transition has been found; add a new one */
411 Trans tr(x);
412 trans.insert(t, tr); t--;
413 State *state1 = t->state;
414 *state1 = *state;
415 if (t1 != t0) {
416 /* if we have a variable transition in the current state, we also need to
417 merge its completion for the current symbol/constant */
418 merge_state(state1, t0->state);
419 }
420 }
421
422 static void merge_trans_op(list<Trans>& trans, const Node& op,
423 int arity, State *state)
424 {
425 /* analogous to merge_trans_cst above, but handles the arity>0 case */
426 list<Trans>::iterator t0 = trans.begin(), t1 = t0, t;
427 Node op1(0);
428 if (t0->is_var_trans()) t1++;
429 for (t = t1; t != trans.end(); t++) {
430 if (t->is_op_trans(op1)) {
431 if (op == op1) {
432 merge_state(t->state, state);
433 return;
434 } else if (op.getSym() < op1.getSym())
435 break;
436 }
437 }
438 Trans tr(op, arity);
439 trans.insert(t, tr); t--;
440 State *state1 = t->state;
441 *state1 = *state;
442 if (t1 != t0) {
443 State *state2 = make_var_state(arity, t0->state);
444 merge_state(state1, state2);
445 delete state2;
446 }
447 }
448
449 static void merge_trans(list<Trans>& trans1, list<Trans>& trans2)
450 {
451 Tree x;
452 Node op(0);
453 if (trans2.empty())
454 ;
455 else if (trans1.empty()) {
456 list<Trans> cptrans2 = trans2;
457 /* append a copy of trans2 to trans1 */
458 trans1.splice(trans1.end(), cptrans2);
459 } else if (trans2.begin()->is_var_trans())
460 /* merge a variable transition */
461 merge_trans_var(trans1, trans2.begin()->state);
462 else if (trans2.begin()->is_cst_trans(x))
463 /* merge a constant transition */
464 merge_trans_cst(trans1, x, trans2.begin()->state);
465 else if (trans2.begin()->is_op_trans(op))
466 /* merge a BDA op transition */
467 merge_trans_op(trans1, op, trans2.begin()->arity, trans2.begin()->state);
468 }
469
470 static void merge_state(State *state1, State *state2)
471 {
472 merge_rules(state1->rules, state2->rules);
473 merge_trans(state1->trans, state2->trans);
474 }
475
476 /* Take the rules of a BoxCase expression and return a pointer to the
477 corresponding TA automaton (interface operation). */
478
479 Automaton *make_pattern_matcher(Tree R)
480 /* Tree R encodes the rules of a case box expressions as a Tree object, as
481 follows:
482
483 Rules ::= cons Rule (cons Rule ... nil)
484 Rule ::= cons Lhs Rhs
485 Lhs ::= cons Pattern (cons Pattern ... nil)
486 Pattern ::= Tree (parameter pattern)
487 Rhs ::= Tree
488
489 NOTE: The lists of rules and patterns are actually delivered in reverse
490 order by the parser, so we have to reverse them on the fly. */
491 {
492 Automaton *A = new Automaton;
493 int n = len(R), r = n;
494 State *start = new State;
495 Tree rule, rest;
496 vector<Tree> rules(n, (Tree)NULL);
497 vector< vector<Tree> > testpats(n);
498 while (isCons(R, rule, rest)) {
499 rules[--r] = rule;
500 R = rest;
501 }
502 for (r = 0; r < n; r++) {
503 Tree lhs, rhs;
504 if (isCons(rules[r], lhs, rhs)) {
505 Tree pat, rest;
506 int m = len(lhs), i = m;
507 vector<Tree> pats(len(lhs), (Tree)NULL);
508 State *state0 = new State, *state = state0;
509 A->rhs.push_back(rhs);
510 while (isCons(lhs, pat, rest)) {
511 pats[--i] = pat;
512 lhs = rest;
513 }
514 testpats[r] = pats;
515 for (i = 0; i < m; i++) {
516 Path p;
517 state = make_state(state, r, pats[i], p);
518 }
519 Rule rule(r, NULL);
520 state->rules.push_back(rule);
521 merge_state(start, state0);
522 delete state0;
523 }
524 }
525 A->build(start);
526 /* Check for shadowed rules. Note that because of potential nonlinearities
527 it is *not* enough to just check the rule lists of final states and
528 determine whether they have multiple matched rules. */
529 for (r = 0; r < n; r++) {
530 int s = 0, m = testpats[r].size();
531 Tree C;
532 vector<Tree> E(n, nil);
533 /* try to match the lhs of rule #r */
534 for (int i = 0; i < m; i++) {
535 s = apply_pattern_matcher(A, s, testpats[r][i], C, E);
536 if (s < 0) break;
537 }
538 if (A->final(s)) {
539 list<Rule>::const_iterator ru;
540 for (ru = A->rules(s).begin(); ru != A->rules(s).end(); ru++)
541 if (!isBoxError(E[ru->r]))
542 if (ru->r < r) {
543 /* Lhs of rule #r matched a higher-priority rule, so rule #r may
544 be shadowed. */
545 Tree lhs1, rhs1, lhs2, rhs2;
546 if (isCons(rules[ru->r], lhs1, rhs1) && isCons(rules[r], lhs2, rhs2)) {
547 cerr << "WARNING : shadowed pattern-matching rule: "
548 << boxpp(reverse(lhs2)) << " => " << boxpp(rhs2) << ";"
549 << " previous rule was: "
550 << boxpp(reverse(lhs1)) << " => " << boxpp(rhs1) << ";"
551 << endl;
552 } else {
553 cerr << "INTERNAL ERROR : " << __FILE__ << ":" << __LINE__ << endl;
554 exit(1);
555 }
556 } else if (ru->r >= r)
557 break;
558 }
559 }
560 #ifdef DEBUG
561 cerr << "automaton " << A << endl << *A << "end automaton" << endl;
562 #endif
563 return A;
564 }
565
566 /* Helper type to represent variable substitutions which are recorded during
567 matching. Each variable is associated with the path pointing at the subterm
568 of the argument where the substitution of the matched variable is to be
569 found. */
570
571 struct Assoc {
572 Tree id;
573 Path p;
574 Assoc(Tree _id, const Path& _p) : id(_id), p(_p) {}
575 };
576 typedef list<Assoc> Subst;
577
578 /* add all substitutions for a given state */
579
580 static void add_subst(vector<Subst>& subst, Automaton *A, int s)
581 {
582 list<Rule> rules = A->rules(s);
583 list<Rule>::const_iterator r;
584 for (r = rules.begin(); r != rules.end(); r++)
585 if (r->id != NULL)
586 subst[r->r].push_back(Assoc(r->id, r->p));
587 }
588
589 /* Process a given term tree X starting from state s, modify variable
590 substitutions accordingly. Returns the resulting state, or -1 if no
591 match. This does all the grunt work of matching. */
592
593 static int apply_pattern_matcher_internal(Automaton *A, int s, Tree X,
594 vector<Subst>& subst)
595 {
596 /* FIXME: rewrite this non-recursively? */
597 if (s >= 0) {
598 list<Trans>::const_iterator t;
599 if (A->state[s]->match_num)
600 /* simplify possible numeric argument on the fly */
601 X = simplifyPattern(X);
602 /* first check for applicable non-variable transitions */
603 for (t = A->trans(s).begin(); t != A->trans(s).end(); t++) {
604 Tree x;
605 Node op(0), op1(0);
606 if (t->is_var_trans())
607 continue;
608 else if (t->is_cst_trans(x)) {
609 if (X==x) {
610 /* transition on constant */
611 #ifdef DEBUG
612 cerr << "state " << s << ", " << *x << ": goto state " << t->state->s << endl;
613 #endif
614 add_subst(subst, A, s);
615 s = t->state->s;
616 return s;
617 }
618 } else if (t->is_op_trans(op)) {
619 Tree x0, x1;
620 if (isBoxPatternOp(X, op1, x0, x1) && op == op1) {
621 /* transition on operation symbol */
622 #ifdef DEBUG
623 cerr << "state " << s << ", " << op << ": goto state " << t->state->s << endl;
624 #endif
625 add_subst(subst, A, s);
626 s = t->state->s;
627 if (s >= 0)
628 s = apply_pattern_matcher_internal(A, s, x0, subst);
629 if (s >= 0)
630 s = apply_pattern_matcher_internal(A, s, x1, subst);
631 return s;
632 }
633 }
634 }
635 /* check for variable transition (is always first in the list of
636 transitions) */
637 t = A->trans(s).begin();
638 if (t->is_var_trans()) {
639 #ifdef DEBUG
640 cerr << "state " << s << ", _: goto state " << t->state->s << endl;
641 #endif
642 add_subst(subst, A, s);
643 s = t->state->s;
644 } else {
645 #ifdef DEBUG
646 cerr << "state " << s << ", *** match failed ***" << endl;
647 #endif
648 s = -1;
649 }
650 }
651 return s;
652 }
653
654 /* Apply the pattern matcher to a single argument, starting from a given state
655 (interface operation). Returns the resulting state, modifies the variable
656 bindings E accordingly, and sets C to the resulting closure if a final
657 state is reached. Result will be -1 to indicate a matching failure, and C
658 will be set to nil if no final state has been reached yet. */
659
660 int apply_pattern_matcher(Automaton *A, // automaton
661 int s, // start state
662 Tree X, // arg to be matched
663 Tree& C, // output closure (if any)
664 vector<Tree>& E) // modified output environments
665 {
666 int n = A->n_rules();
667 vector<Subst> subst(n, Subst());
668 /* perform matching, record variable substitutions */
669 #ifdef DEBUG
670 cerr << "automaton " << A << ", state " << s << ", start match on arg: " << *X << endl;
671 #endif
672 s = apply_pattern_matcher_internal(A, s, X, subst);
673 C = nil;
674 if (s < 0)
675 /* failed match */
676 return s;
677 /* process variable substitutions */
678 list<Rule>::const_iterator r;
679 for (r = A->rules(s).begin(); r != A->rules(s).end(); r++) {
680 // all rules still active in state s
681 if (!isBoxError(E[r->r])) { // and still viable
682 Subst::const_iterator assoc;
683 for (assoc = subst[r->r].begin(); assoc != subst[r->r].end(); assoc++) {
684 Tree Z, Z1 = subtree(X, 0, assoc->p);
685 if (searchIdDef(assoc->id, Z, E[r->r])) {
686 if (Z != Z1) {
687 /* failed nonlinearity, add to the set of nonviable rules */
688 #ifdef DEBUG
689 cerr << "state " << s << ", rule #" << r->r << ": " <<
690 *assoc->id << " := " << *Z1 << " *** failed *** old value: " <<
691 *Z << endl;
692 #endif
693 E[r->r] = boxError();
694 }
695 } else {
696 /* bind a variable for the current rule */
697 #ifdef DEBUG
698 cerr << "state " << s << ", rule #" << r->r << ": " <<
699 *assoc->id << " := " << *Z1 << endl;
700 #endif
701 E[r->r] = pushValueDef(assoc->id, Z1, E[r->r]);
702 }
703 }
704 }
705 }
706 if (A->final(s)) {
707 /* if in a final state then return the right-hand side together with the
708 corresponding variable environment */
709 for (r = A->rules(s).begin(); r != A->rules(s).end(); r++) // all rules matched in state s
710 if (!isBoxError(E[r->r])) { // and still viable
711 /* return the rhs of the matched rule */
712 C = closure(A->rhs[r->r], nil, nil, E[r->r]);
713 #ifdef DEBUG
714 cerr << "state " << s << ", complete match yields rhs #" << r->r <<
715 ": " << *A->rhs[r->r] << endl;
716 #endif
717 return s;
718 }
719 /* if none of the rules were matched then declare a failed match */
720 #ifdef DEBUG
721 cerr << "state " << s << ", *** match failed ***" << endl;
722 #endif
723 return -1;
724 }
725 #ifdef DEBUG
726 cerr << "state " << s << ", successful incomplete match" << endl;
727 #endif
728 return s;
729 }