// SDLang-D // Written in the D programming language. module sdlang.parser; import std.file; import libInputVisitor; import sdlang.ast; import sdlang.exception; import sdlang.lexer; import sdlang.symbol; import sdlang.token; import sdlang.util; /// Returns root tag. Tag parseFile(string filename) { auto source = cast(string)read(filename); return parseSource(source, filename); } /// Returns root tag. The optional 'filename' parameter can be included /// so that the SDL document's filename (if any) can be displayed with /// any syntax error messages. Tag parseSource(string source, string filename=null) { auto lexer = new Lexer(source, filename); auto parser = DOMParser(lexer); return parser.parseRoot(); } /++ Parses an SDL document using StAX/Pull-style. Returns an InputRange with element type ParserEvent. The pullParseFile version reads a file and parses it, while pullParseSource parses a string passed in. The optional 'filename' parameter in pullParseSource can be included so that the SDL document's filename (if any) can be displayed with any syntax error messages. Warning! The FileStartEvent and FileEndEvent events *might* be removed later. See $(LINK https://github.com/Abscissa/SDLang-D/issues/17) Example: ------------------ parent 12 attr="q" { childA 34 childB 56 } lastTag ------------------ The ParserEvent sequence emitted for that SDL document would be as follows (indented for readability): ------------------ FileStartEvent TagStartEvent (parent) ValueEvent (12) AttributeEvent (attr, "q") TagStartEvent (childA) ValueEvent (34) TagEndEvent TagStartEvent (childB) ValueEvent (56) TagEndEvent TagEndEvent TagStartEvent (lastTag) TagEndEvent FileEndEvent ------------------ Example: ------------------ foreach(event; pullParseFile("stuff.sdl")) { import std.stdio; if(event.peek!FileStartEvent()) writeln("FileStartEvent, starting! "); else if(event.peek!FileEndEvent()) writeln("FileEndEvent, done! "); else if(auto e = event.peek!TagStartEvent()) writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location); else if(event.peek!TagEndEvent()) writeln("TagEndEvent"); else if(auto e = event.peek!ValueEvent()) writeln("ValueEvent: ", e.value); else if(auto e = event.peek!AttributeEvent()) writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value); else // Shouldn't happen throw new Exception("Received unknown parser event"); } ------------------ +/ auto pullParseFile(string filename) { auto source = cast(string)read(filename); return parseSource(source, filename); } ///ditto auto pullParseSource(string source, string filename=null) { auto lexer = new Lexer(source, filename); auto parser = PullParser(lexer); return inputVisitor!ParserEvent( parser ); } /// The element of the InputRange returned by pullParseFile and pullParseSource: alias ParserEvent = std.variant.Algebraic!( FileStartEvent, FileEndEvent, TagStartEvent, TagEndEvent, ValueEvent, AttributeEvent, ); /// Event: Start of file struct FileStartEvent { Location location; } /// Event: End of file struct FileEndEvent { Location location; } /// Event: Start of tag struct TagStartEvent { Location location; string namespace; string name; } /// Event: End of tag struct TagEndEvent { //Location location; } /// Event: Found a Value in the current tag struct ValueEvent { Location location; Value value; } /// Event: Found an Attribute in the current tag struct AttributeEvent { Location location; string namespace; string name; Value value; } // The actual pull parser private struct PullParser { private Lexer lexer; private struct IDFull { string namespace; string name; } private void error(string msg) { error(lexer.front.location, msg); } private void error(Location loc, string msg) { throw new SDLangParseException(loc, "Error: "~msg); } private InputVisitor!(PullParser, ParserEvent) v; void visit(InputVisitor!(PullParser, ParserEvent) v) { this.v = v; parseRoot(); } private void emit(Event)(Event event) { v.yield( ParserEvent(event) ); } /// <Root> ::= <Tags> EOF (Lookaheads: Anything) private void parseRoot() { //trace("Starting parse of file: ", lexer.filename); //trace(__FUNCTION__, ": <Root> ::= <Tags> EOF (Lookaheads: Anything)"); auto startLocation = Location(lexer.filename, 0, 0, 0); emit( FileStartEvent(startLocation) ); parseTags(); auto token = lexer.front; if(!token.matches!"EOF"()) error("Expected end-of-file, not " ~ token.symbol.name); emit( FileEndEvent(token.location) ); } /// <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value) /// | EOL <Tags> (Lookaheads: EOL) /// | {empty} (Lookaheads: Anything else, except '{') void parseTags() { //trace("Enter ", __FUNCTION__); while(true) { auto token = lexer.front; if(token.matches!"Ident"() || token.matches!"Value"()) { //trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value)"); parseTag(); continue; } else if(token.matches!"EOL"()) { //trace(__FUNCTION__, ": <Tags> ::= EOL <Tags> (Lookaheads: EOL)"); lexer.popFront(); continue; } else if(token.matches!"{"()) { error("Anonymous tags must have at least one value. They cannot just have children and attributes only."); } else { //trace(__FUNCTION__, ": <Tags> ::= {empty} (Lookaheads: Anything else, except '{')"); break; } } } /// <Tag> /// ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident) /// | <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value) void parseTag() { auto token = lexer.front; if(token.matches!"Ident"()) { //trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident)"); //trace("Found tag named: ", tag.fullName); auto id = parseIDFull(); emit( TagStartEvent(token.location, id.namespace, id.name) ); } else if(token.matches!"Value"()) { //trace(__FUNCTION__, ": <Tag> ::= <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value)"); //trace("Found anonymous tag."); emit( TagStartEvent(token.location, null, null) ); } else error("Expected tag name or value, not " ~ token.symbol.name); if(lexer.front.matches!"="()) error("Anonymous tags must have at least one value. They cannot just have attributes and children only."); parseValues(); parseAttributes(); parseOptChild(); parseTagTerminator(); emit( TagEndEvent() ); } /// <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident) IDFull parseIDFull() { auto token = lexer.front; if(token.matches!"Ident"()) { //trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident)"); lexer.popFront(); return parseIDSuffix(token.data); } else { error("Expected namespace or identifier, not " ~ token.symbol.name); assert(0); } } /// <IDSuffix> /// ::= ':' Ident (Lookaheads: ':') /// ::= {empty} (Lookaheads: Anything else) IDFull parseIDSuffix(string firstIdent) { auto token = lexer.front; if(token.matches!":"()) { //trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident (Lookaheads: ':')"); lexer.popFront(); token = lexer.front; if(token.matches!"Ident"()) { lexer.popFront(); return IDFull(firstIdent, token.data); } else { error("Expected name, not " ~ token.symbol.name); assert(0); } } else { //trace(__FUNCTION__, ": <IDSuffix> ::= {empty} (Lookaheads: Anything else)"); return IDFull("", firstIdent); } } /// <Values> /// ::= Value <Values> (Lookaheads: Value) /// | {empty} (Lookaheads: Anything else) void parseValues() { while(true) { auto token = lexer.front; if(token.matches!"Value"()) { //trace(__FUNCTION__, ": <Values> ::= Value <Values> (Lookaheads: Value)"); parseValue(); continue; } else { //trace(__FUNCTION__, ": <Values> ::= {empty} (Lookaheads: Anything else)"); break; } } } /// Handle Value terminals that aren't part of an attribute void parseValue() { auto token = lexer.front; if(token.matches!"Value"()) { //trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)"); auto value = token.value; //trace("In tag '", parent.fullName, "', found value: ", value); emit( ValueEvent(token.location, value) ); lexer.popFront(); } else error("Expected value, not "~token.symbol.name); } /// <Attributes> /// ::= <Attribute> <Attributes> (Lookaheads: Ident) /// | {empty} (Lookaheads: Anything else) void parseAttributes() { while(true) { auto token = lexer.front; if(token.matches!"Ident"()) { //trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes> (Lookaheads: Ident)"); parseAttribute(); continue; } else { //trace(__FUNCTION__, ": <Attributes> ::= {empty} (Lookaheads: Anything else)"); break; } } } /// <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident) void parseAttribute() { //trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident)"); auto token = lexer.front; if(!token.matches!"Ident"()) error("Expected attribute name, not "~token.symbol.name); auto id = parseIDFull(); token = lexer.front; if(!token.matches!"="()) error("Expected '=' after attribute name, not "~token.symbol.name); lexer.popFront(); token = lexer.front; if(!token.matches!"Value"()) error("Expected attribute value, not "~token.symbol.name); //trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'"); emit( AttributeEvent(token.location, id.namespace, id.name, token.value) ); lexer.popFront(); } /// <OptChild> /// ::= '{' EOL <Tags> '}' (Lookaheads: '{') /// | {empty} (Lookaheads: Anything else) void parseOptChild() { auto token = lexer.front; if(token.matches!"{") { //trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}' (Lookaheads: '{')"); lexer.popFront(); token = lexer.front; if(!token.matches!"EOL"()) error("Expected newline or semicolon after '{', not "~token.symbol.name); lexer.popFront(); parseTags(); token = lexer.front; if(!token.matches!"}"()) error("Expected '}' after child tags, not "~token.symbol.name); lexer.popFront(); } else { //trace(__FUNCTION__, ": <OptChild> ::= {empty} (Lookaheads: Anything else)"); // Do nothing, no error. } } /// <TagTerminator> /// ::= EOL (Lookahead: EOL) /// | {empty} (Lookahead: EOF) void parseTagTerminator() { auto token = lexer.front; if(token.matches!"EOL") { //trace(__FUNCTION__, ": <TagTerminator> ::= EOL (Lookahead: EOL)"); lexer.popFront(); } else if(token.matches!"EOF") { //trace(__FUNCTION__, ": <TagTerminator> ::= {empty} (Lookahead: EOF)"); // Do nothing } else error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name); } } private struct DOMParser { Lexer lexer; Tag parseRoot() { auto currTag = new Tag(null, null, "root"); currTag.location = Location(lexer.filename, 0, 0, 0); auto parser = PullParser(lexer); auto eventRange = inputVisitor!ParserEvent( parser ); foreach(event; eventRange) { if(auto e = event.peek!TagStartEvent()) { auto newTag = new Tag(currTag, e.namespace, e.name); newTag.location = e.location; currTag = newTag; } else if(event.peek!TagEndEvent()) { currTag = currTag.parent; if(!currTag) parser.error("Internal Error: Received an extra TagEndEvent"); } else if(auto e = event.peek!ValueEvent()) { currTag.add(e.value); } else if(auto e = event.peek!AttributeEvent()) { auto attr = new Attribute(e.namespace, e.name, e.value, e.location); currTag.add(attr); } else if(event.peek!FileStartEvent()) { // Do nothing } else if(event.peek!FileEndEvent()) { // There shouldn't be another parent. if(currTag.parent) parser.error("Internal Error: Unexpected end of file, not enough TagEndEvent"); } else parser.error("Internal Error: Received unknown parser event"); } return currTag; } } // Other parser tests are part of the AST's tests over in the ast module. // Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16 version(sdlangUnittest) unittest { import std.stdio; writeln("parser: Regression test issue #16..."); stdout.flush(); // Shouldn't crash foreach(event; pullParseSource(`tag "data"`)) { event.peek!FileStartEvent(); } } // Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31 // "Escape sequence results in range violation error" version(sdlangUnittest) unittest { import std.stdio; writeln("parser: Regression test issue #31..."); stdout.flush(); // Shouldn't get a Range violation parseSource(`test "\"foo\""`); }