Вот пример с активным синтаксическим деревом транслятора Рефала-0.
RocotStructs - реализация общих древовидных выражений, т.е. списков, термы которых могут быть тоже списками (типа объектных выражений Рефала, в общем).
Соответственно, описаны разные типы узлов синтаксического дерева языка, в комментарии в теле RECORD - структура динамически размещаемых подузлов, если есть.
Пример, конечно, простенький. У Info21, как я понял из некоторых его рассказов, узлы нижних уровней могут отвечать на запросы верхних (типа, а знает ли функция точно свою производную и т.п.).
Код:
MODULE RocotRefal0Front;
(*
Syntax specification in EBNF:
Program = { Function [ ";" ] } .
Function = FuncName "{" Sentence { ";" Sentence } [";"] "}" .
Sentence = Pattern {"," Condition} "=" Expression .
Pattern = [Template] [FreeVar] [Template] [FreeVar] [Template] .
Template = (String | CharVar) {Template} .
Condition = "<"FuncName (CharVar | FreeVar) ">" ":" ("T" OR "F") .
Expression = (String | CharVar | FreeVar | FunctionCall) .
FunctionCall = "<"FuncName Expression ">" .
FuncName = LatinLetter { LatinLetter | Digit | "_" | "-" } .
Identifier = { LatinLetter | Digit } .
CharVar = "s"Identifier .
FreeVar = "e"Identifier .
String = "'"{ NormalChar | "\"SpecialChar | CharSig | CharCode | lineBreak } "'" .
NormalChar = ~SpecialChar .
SpecialChar = "\" | """ | '"" .
CharSig = "\n" | "\t" .
CharCode = "\"Digit{Digit} .
*)
IMPORT Structs := RocotStructs, Strings := RocotStrings, Text := RocotText, Log;
TYPE
Name* = ARRAY 256 OF CHAR;
Program* = POINTER TO EXTENSIBLE RECORD (Structs.Expr)
(* SymTab { Function } *)
END;
Function* = POINTER TO EXTENSIBLE RECORD (Structs.Expr)
name*: Name;
(* Sentence { Sentence } *)
END;
Sentence* = POINTER TO EXTENSIBLE RECORD (Structs.Expr)
(* SymTab Pattern { Condition } Expression *)
END;
Pattern* = POINTER TO EXTENSIBLE RECORD (Structs.Expr)
(* { Template | FreeVar } *)
END;
Condition* = POINTER TO EXTENSIBLE RECORD (Structs.Expr)
predicate*: Name;
expect*: BOOLEAN
(* VarRef *)
END;
Template* = POINTER TO EXTENSIBLE RECORD (Structs.Expr)
(* (Char | CharVar | VarRef) { Char | CharVar | VarRef } *)
END;
Char* = POINTER TO EXTENSIBLE RECORD (Structs.Term)
c*: CHAR;
encode*: BOOLEAN
END;
Variable* = POINTER TO ABSTRACT RECORD (Structs.Term)
name*: Name
END;
FreeVar* = POINTER TO EXTENSIBLE RECORD (Variable) END;
CharVar* = POINTER TO EXTENSIBLE RECORD (Variable) END;
VarRef* = POINTER TO EXTENSIBLE RECORD (Structs.Term)
var*: Variable
END;
Expression* = POINTER TO EXTENSIBLE RECORD (Structs.Expr)
traits*: SET
(* { Char | VarRef | FuncCall } *)
END;
FuncCall* = POINTER TO EXTENSIBLE RECORD (Structs.Expr)
funcName*: Name;
(* Expression *)
END;
Message = ABSTRACT RECORD (Structs.Message) error: INTEGER END;
NewFuncMsg = RECORD (Message) END;
FuncNameMsg = RECORD (Message) name: Name END;
NewSentMsg = RECORD (Message) END;
BeginExprMsg = RECORD (Message) END;
EndExprMsg = RECORD (Message) END;
CharMsg = RECORD (Message) char: CHAR; encode: BOOLEAN END;
VarMsg = ABSTRACT RECORD (Message) symTab: Structs.SymTab; name: Name END;
CharVarMsg = RECORD (VarMsg) END;
FreeVarMsg = RECORD (VarMsg) END;
NewCondMsg = RECORD (Message) END;
(* ... опущены всякие вещи ... *)
PROCEDURE (t: Template) HandleMsg* (VAR msg: Structs.Message);
VAR c: Char;
lastVar: ANYPTR;
var: CharVar;
ref: VarRef;
BEGIN
WITH msg: CharMsg DO
NEW(c); c.c := msg.char; c.encode := msg.encode;
t.Insert(c, t.last)
| msg: CharVarMsg DO
lastVar := msg.symTab.Look(msg.name);
IF ~ ((lastVar = NIL) OR (lastVar IS CharVar)) THEN
msg.error := sameNameForDifVars
ELSIF lastVar = NIL THEN
NEW(var);
var.name := msg.name$;
msg.symTab.Add(msg.name, var);
t.Insert(var, t.last)
ELSE
NEW(ref); ref.var := lastVar(Variable);
t.Insert(ref, t.last)
END
ELSE END
END HandleMsg;
PROCEDURE (p: Pattern) HandleMsg* (VAR msg: Structs.Message);
VAR tem: Template;
lastVar: ANYPTR;
var: FreeVar;
ref: VarRef;
BEGIN
IF (msg IS CharMsg) OR (msg IS CharVarMsg) THEN
IF (p.last = NIL) OR ~(p.last IS Template) THEN
NEW(tem);
p.Insert(tem, p.last)
END;
p.last(Template).HandleMsg(msg)
END;
WITH msg: FreeVarMsg DO
lastVar := msg.symTab.Look(msg.name);
IF (p.last # NIL) & (p.last IS FreeVar) THEN
msg.error := unexpectedFreeVar
ELSIF lastVar # NIL THEN
IF lastVar IS FreeVar THEN
msg.error := duplicatedFreeVar;
ELSE
msg.error := sameNameForDifVars
END
ELSE
NEW(var); var.name := msg.name$;
p.Insert(var, p.last); msg.symTab.Add(var.name, var)
END
ELSE END
END HandleMsg;
PROCEDURE (c: Condition) HandleMsg* (VAR msg: Structs.Message);
VAR lastVar: ANYPTR;
ref: VarRef;
BEGIN
WITH msg: FuncNameMsg DO
c.predicate := msg.name$
| msg: CharVarMsg DO
lastVar := msg.symTab.Look(msg.name);
IF ~( (lastVar # NIL) & (lastVar IS CharVar) ) THEN
msg.error := unknownVariable
ELSE
NEW(ref); ref.var := lastVar(Variable)(CharVar);
c.Insert(ref, c.last)
END
| msg: CharMsg DO
CASE msg.char OF
| 'T': c.expect := TRUE
| 'F': c.expect := FALSE
END
ELSE END
END HandleMsg;
PROCEDURE (fc: FuncCall) HandleMsg* (VAR msg: Structs.Message);
BEGIN
WITH msg: FuncNameMsg DO
fc.funcName := msg.name
ELSE END
END HandleMsg;
PROCEDURE ^ (s: Sentence) GetFreeVars* (OUT e1, e2: FreeVar), NEW;
PROCEDURE (e: Expression) CheckFreeVar (VAR msg: FreeVarMsg), NEW;
VAR e1, e2: FreeVar;
BEGIN
UpSentence(e).GetFreeVars(e1, e2);
IF (e1 # NIL) & (msg.name = e1.name) THEN
IF e.traits * {exprFirstFreeVar, exprSecondFreeVar} # {} THEN
msg.error := wrongFreeVarOrder
ELSE
INCL(e.traits, exprFirstFreeVar)
END
ELSIF (e2 # NIL) & (msg.name = e2.name) THEN
IF exprSecondFreeVar IN e.traits THEN
msg.error := wrongFreeVarOrder
ELSE
INCL(e.traits, exprSecondFreeVar)
END
ELSE
msg.error := unknownVariable
END
END CheckFreeVar;
PROCEDURE (e: Expression) HandleMsg* (VAR msg: Structs.Message);
VAR c: Char;
lastVar: ANYPTR;
ref: VarRef;
fc: FuncCall;
fe: Expression;
base: Structs.Expr;
BEGIN
IF msg IS FreeVarMsg THEN e.CheckFreeVar(msg(FreeVarMsg)) END;
IF (msg IS Message) & (msg(Message).error = 0) THEN
IF (e.last # NIL) & (e.last IS Expression) THEN
e.last.HandleMsg(msg)
ELSE
WITH msg: CharMsg DO
NEW(c); c.c := msg.char; c.encode := msg.encode;
e.Insert(c, e.last)
| msg: CharVarMsg DO
lastVar := msg.symTab.Look(msg.name);
IF ~ ( (lastVar # NIL) & (lastVar IS CharVar) ) THEN
msg.error := unknownVariable
ELSE
NEW(ref); ref.var := lastVar(CharVar);
e.Insert(ref, e.last)
END
| msg: FreeVarMsg DO
lastVar := msg.symTab.First(msg.name); msg.symTab.End;
IF ~ ( (lastVar # NIL) & (lastVar IS FreeVar) ) THEN
msg.error := unknownVariable
ELSE
NEW(ref); ref.var := lastVar(FreeVar);
e.Insert(ref, e.last)
END
| msg: BeginExprMsg DO
NEW(fc);
e.Insert(fc, e.last)
| msg: FuncNameMsg DO
e.last(FuncCall).HandleMsg(msg);
NEW(fe);
e.Insert(fe, e.last)
| msg: EndExprMsg DO
base := e.Expr();
WITH base: Expression DO
base.last(Expression).prev(FuncCall).Paste(base, e, e, NIL)
| base: Sentence DO
(* nothing *)
END
| msg: Message DO
e.last.HandleMsg(msg)
ELSE END
END
END
END HandleMsg;
PROCEDURE (s: Sentence) HandleMsg* (VAR msg: Structs.Message);
VAR cond: Condition;
e: Expression;
BEGIN
WITH msg: CharMsg DO
s.last.HandleMsg(msg)
| msg: VarMsg DO
msg.symTab := s.first(Structs.SymTab);
s.last.HandleMsg(msg)
| msg: NewCondMsg DO
NEW(cond);
s.Insert(cond, s.last)
| msg: BeginExprMsg DO
IF (s.last # NIL) & (s.last IS Expression) THEN
s.last.HandleMsg(msg)
ELSE
NEW(e); s.Insert(e, s.last)
END
| msg: EndExprMsg DO
s.last(Expression).HandleMsg(msg)
| msg: Message DO
s.last.HandleMsg(msg)
ELSE END
END HandleMsg;
PROCEDURE (s: Sentence) GetFreeVars* (OUT e1, e2: FreeVar), NEW;
VAR t: Structs.Term;
BEGIN
e1 := NIL; e2 := NIL;
t := s.first(Structs.SymTab).next(Pattern).first;
WHILE (t # NIL) & ~(t IS FreeVar) DO t := t.next END;
IF t # NIL THEN
e1 := t(FreeVar);
t := t.next;
WHILE (t # NIL) & ~(t IS FreeVar) DO t := t.next END;
IF t # NIL THEN e2 := t(FreeVar) END
END
END GetFreeVars;
PROCEDURE (f: Function) HandleMsg* (VAR msg: Structs.Message);
VAR s: Sentence;
nsm: NewSentMsg;
symTab: Structs.SymTab;
pat: Pattern;
ok: BOOLEAN;
BEGIN
WITH msg: NewSentMsg DO
NEW(s); symTab := Structs.dir.NewSymTab(7); s.Insert(symTab, s.last);
NEW(pat); s.Insert(pat, s.last);
f.Insert(s, f.last)
| msg: FuncNameMsg DO
IF f.first = NIL THEN
f.name := msg.name$;
f.HandleMsg(nsm);
f.Expr()(Program).first(Structs.SymTab).AddUnicum(f.name, f, ok);
IF ~ok THEN
msg.error := duplicatedFuncName
END
ELSE
f.last.HandleMsg(msg)
END
| msg: Message DO
f.last.HandleMsg(msg)
ELSE END
END HandleMsg;
PROCEDURE (p: Program) HandleMsg* (VAR msg: Structs.Message);
VAR f: Function;
BEGIN
WITH msg: NewFuncMsg DO
msg.error := 0;
NEW(f);
p.Insert(f, p.last)
| msg: Message DO
msg.error := 0;
p.last.HandleMsg(msg)
ELSE END
END HandleMsg;
А вот связующий слой, который получает вызовы-события от синт. анализатора и трансформирует соотв. фрагмент исходника в сообщение, которое посылает корню дерева:
Код:
PROCEDURE EmitNewFunc (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR msg: NewFuncMsg;
BEGIN
par(Parser).program.HandleMsg(msg);
error := msg.error
END EmitNewFunc;
PROCEDURE EmitFuncName (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR msg: FuncNameMsg;
BEGIN
Strings.Extract(in, p, q - p, msg.name);
par(Parser).program.HandleMsg(msg);
IF msg.error = 0 THEN p := q ELSE error := msg.error END
END EmitFuncName;
PROCEDURE EmitNewSent (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR msg: NewSentMsg;
BEGIN
par(Parser).program.HandleMsg(msg);
error := msg.error
END EmitNewSent;
PROCEDURE EmitBeginExpr (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR msg: BeginExprMsg;
BEGIN
par(Parser).program.HandleMsg(msg);
error := msg.error
END EmitBeginExpr;
PROCEDURE EmitEndExpr (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR msg: EndExprMsg;
BEGIN
par(Parser).program.HandleMsg(msg);
error := msg.error
END EmitEndExpr;
PROCEDURE EmitChar (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR msg: CharMsg;
BEGIN
msg.char := in[p]; msg.encode := (msg.char = 0DX) OR (msg.char = 09X);
par(Parser).program.HandleMsg(msg);
IF msg.error = 0 THEN p := q ELSE error := msg.error END
END EmitChar;
PROCEDURE EmitInvalidChar (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
BEGIN
error := unexpectedChar
END EmitInvalidChar;
PROCEDURE EmitCharCode (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR xs: ARRAY 16 OF CHAR;
x, res: INTEGER;
msg: CharMsg;
BEGIN
Strings.Extract(in, p, q-p, xs);
Strings.StringToInt(xs, x, res);
IF ~( (res = 0) OR (x < 65536) ) THEN
error := tooLongCharCode
ELSE
msg.char := CHR(SHORT(x)); msg.encode := TRUE;
par(Parser).program.HandleMsg(msg);
IF msg.error = 0 THEN p := q ELSE error := msg.error END
END
END EmitCharCode;
PROCEDURE EmitCharVar (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR msg: CharVarMsg;
BEGIN
Strings.Extract(in, p, q - p, msg.name);
par(Parser).program.HandleMsg(msg);
IF msg.error = 0 THEN p := q ELSE error := msg.error END
END EmitCharVar;
PROCEDURE EmitFreeVar (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR msg: FreeVarMsg;
BEGIN
Strings.Extract(in, p, q - p, msg.name);
par(Parser).program.HandleMsg(msg);
IF msg.error = 0 THEN p := q ELSE error := msg.error END
END EmitFreeVar;
PROCEDURE EmitNewCond (VAR par: ANYREC; VAR in: ARRAY OF CHAR; VAR p: INTEGER; q: INTEGER; VAR out: ARRAY OF CHAR; VAR r: INTEGER; OUT error: INTEGER);
VAR msg: NewCondMsg;
BEGIN
par(Parser).program.HandleMsg(msg);
error := msg.error
END EmitNewCond;
Процедурки получаются маленькие, обозримые, очевидные... Не сравнить с тем, как обычно в ручную написанных на императиве трансляторах...
Ну и для полноты вот сам синт. анализатор Рефала-0, раскрученный на нём самом - он обращается к вышепоказанным процедурам Emit...
Код:
RocotRefal0 (parserParam = TRUE; funcPrefix = 'Parse'; exceptProc = 'SyntaxError');
Preprocess {
'\n' e1 = ' ' <Preprocess e1>;
'\t' e1 = ' ' <Preprocess e1>;
'\'' e1 = '\'' <PreString e1>;
'/*' e1 = ' ' <PreComment e1>;
s1 e2 = s1 <Preprocess e2>;
e1 = e1 /* '' = '' */
}
PreComment {
'*/' e1 = ' ' <Preprocess e1>;
s1 e2 = ' ' <PreComment e2>
}
PreString {
'\'' e1 = '\'' <Preprocess e1>;
'\\\\' e1 = '\\\\' <PreString e1>;
'\\\'' e1 = '\\\'' <PreString e1>;
s1 e2 = s1 <PreString e2>
}
Expect {
sC sC e1 = e1;
sC ' ' e1 = <Expect sC e1>
}
Optional {
sC sC e1 = e1;
sC ' ' e1 = <Optional sC e1>;
sC e1 = e1
}
Main {
e1 = <Program <Preprocess e1>>
}
Program {
/* Program = { Function [";"] } . */
'' = '';
' ' e1 = <Program e1>;
e1 = <Program <Optional ';' <Function e1>>>
}
Function {
/* Function = FuncName "{" Sentence { ";" Sentence } [";"] "}" . */
e1 = <EmitNewFunc> <Expect '}' <Block <Sentence <Expect '{' <FuncName e1>>>>>
}
FuncName {
s1 eName s2 e3, <IsFirstFuncChar s1>: 'T', <IsFuncChar s2>: 'F' =
<EmitFuncName s1 eName> s2 e3
}
Block {
' ' e1 = <Block e1>;
';' e1 = <Block <EmitNewSent><Sentence e1>>;
e1 = e1
}
/* Sentence & sentence pattern */
Sentence {
/* Sentence = Pattern {"," Condition} "=" Expression . */
e1 = <Expression
<Expect '=' <CondSection <Pattern e1>>><EmitBeginExpr>>
<EmitEndExpr>
}
Pattern {
/* Pattern = | Template | [Template] FreeVar [Template] |
[Template] FreeVar Template FreeVar [Template] . */
e1 = <Template<FreePart<Template<FreePart<Template e1>>>>>
}
FreePart {
'e' e1 = <FreeVar 'e' e1>;
e1 = e1
}
Template {
/* Template = (String | CharVar) {Template} . */
' ' e1 = <Template e1>;
'\'' e1 = <Template <String e1>>;
's' e1 = <Template <CharVar 's' e1>>;
e1 = e1
}
/* Pattern elements */
String {
'\'' e1 = e1;
'\\\'' e1 = <EmitChar '\''> <String e1>;
'\\\"' e1 = <EmitChar '\"'> <String e1>;
'\\\\' e1 = <EmitChar '\\'> <String e1>;
'\\\n' e1 = <String e1>; /* \ to skip end of line; else it be included in string directly */
'\\n' e1 = <EmitChar '\n'> <String e1>;
'\\t' e1 = <EmitChar '\t'> <String e1>;
'\\' s1 e2, <IsDigit s1>: 'T'
= <String <CharCode s1 e2>>;
'\"' e1 = <EmitInvalidChar> e1;
'\\' e1 = <EmitInvalidChar> e1;
s1 e2 = <EmitChar s1> <String e2>
}
CharCode {
eCode s1 e2, <IsDigit s1>: 'F' = <EmitCharCode eCode> s1 e2
}
CharVar {
' ' e1 = <CharVar e1>;
's' s1 eIndex s2 e3, <IsIndexChar s1>: 'T', <IsIndexChar s2>: 'F' =
<EmitCharVar s1 eIndex> s2 e3
}
FreeVar {
' ' e1 = <FreeVar e1>;
'e' s1 eIndex s2 e3, <IsIndexChar s1>: 'T', <IsIndexChar s2>: 'F' =
<EmitFreeVar s1 eIndex> s2 e3
}
/* Condition list */
CondSection {
/* CondSection = {"," Condition} . */
' ' e1 = <CondSection e1>;
',' e1 = <CondSection <EmitNewCond> <Condition e1> >;
e1 = e1
}
Condition {
/* Condition = "<"Function Charvar ">" ":" ("T" OR "F") . */
' ' e1 = <Condition e1>;
'<' e1 = <CondConst <Expect ':' <Expect '>' <CharVar <FuncName e1>>>>>
}
CondConst {
' ' e1 = <CondConst e1>;
'\'T\'' e1 = <EmitChar 'T'> e1;
'\'F\'' e1 = <EmitChar 'F'> e1
}
/* Substitute expression */
Expression {
/* Expression = {String | CharVar | FreeVar | FunctionCall}. */
' ' e1 = <Expression e1>;
'\'' e1 = <Expression <String e1>>;
's' e1 = <Expression <CharVar 's' e1>>;
'e' e1 = <Expression <FreeVar 'e' e1>>;
'<' e1 = <Expression <Expect '>' <EmitBeginExpr>
<Expression <FuncName e1>>
<EmitEndExpr> >
>;
e1 = e1
}