// SDLang-D
// Written in the D programming language.

module sdlang.parser;

import std.file;

import libInputVisitor;

import sdlang.ast;
import sdlang.exception;
import sdlang.lexer;
import sdlang.symbol;
import sdlang.token;
import sdlang.util;

/// Returns root tag.
Tag parseFile(string filename)
{
	auto source = cast(string)read(filename);
	return parseSource(source, filename);
}

/// Returns root tag. The optional 'filename' parameter can be included
/// so that the SDL document's filename (if any) can be displayed with
/// any syntax error messages.
Tag parseSource(string source, string filename=null)
{
	auto lexer = new Lexer(source, filename);
	auto parser = DOMParser(lexer);
	return parser.parseRoot();
}

/++
Parses an SDL document using StAX/Pull-style. Returns an InputRange with
element type ParserEvent.

The pullParseFile version reads a file and parses it, while pullParseSource
parses a string passed in. The optional 'filename' parameter in pullParseSource
can be included so that the SDL document's filename (if any) can be displayed
with any syntax error messages.

Warning! The FileStartEvent and FileEndEvent events *might* be removed later.
See $(LINK https://github.com/Abscissa/SDLang-D/issues/17)

Example:
------------------
parent 12 attr="q" {
	childA 34
	childB 56
}
lastTag
------------------

The ParserEvent sequence emitted for that SDL document would be as
follows (indented for readability):
------------------
FileStartEvent
	TagStartEvent (parent)
		ValueEvent (12)
		AttributeEvent (attr, "q")
		TagStartEvent (childA)
			ValueEvent (34)
		TagEndEvent
		TagStartEvent (childB)
			ValueEvent (56)
		TagEndEvent
	TagEndEvent
	TagStartEvent  (lastTag)
	TagEndEvent
FileEndEvent
------------------

Example:
------------------
foreach(event; pullParseFile("stuff.sdl"))
{
	import std.stdio;

	if(event.peek!FileStartEvent())
		writeln("FileStartEvent, starting! ");

	else if(event.peek!FileEndEvent())
		writeln("FileEndEvent, done! ");

	else if(auto e = event.peek!TagStartEvent())
		writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location);

	else if(event.peek!TagEndEvent())
		writeln("TagEndEvent");

	else if(auto e = event.peek!ValueEvent())
		writeln("ValueEvent: ", e.value);

	else if(auto e = event.peek!AttributeEvent())
		writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value);

	else // Shouldn't happen
		throw new Exception("Received unknown parser event");
}
------------------
+/
auto pullParseFile(string filename)
{
	auto source = cast(string)read(filename);
	return parseSource(source, filename);
}

///ditto
auto pullParseSource(string source, string filename=null)
{
	auto lexer = new Lexer(source, filename);
	auto parser = PullParser(lexer);
	return inputVisitor!ParserEvent( parser );
}

/// The element of the InputRange returned by pullParseFile and pullParseSource:
alias ParserEvent = std.variant.Algebraic!(
	FileStartEvent,
	FileEndEvent,
	TagStartEvent,
	TagEndEvent,
	ValueEvent,
	AttributeEvent,
);

/// Event: Start of file
struct FileStartEvent
{
	Location location;
}

/// Event: End of file
struct FileEndEvent
{
	Location location;
}

/// Event: Start of tag
struct TagStartEvent
{
	Location location;
	string namespace;
	string name;
}

/// Event: End of tag
struct TagEndEvent
{
	//Location location;
}

/// Event: Found a Value in the current tag
struct ValueEvent
{
	Location location;
	Value value;
}

/// Event: Found an Attribute in the current tag
struct AttributeEvent
{
	Location location;
	string namespace;
	string name;
	Value value;
}

// The actual pull parser
private struct PullParser
{
	private Lexer lexer;
	
	private struct IDFull
	{
		string namespace;
		string name;
	}
	
	private void error(string msg)
	{
		error(lexer.front.location, msg);
	}

	private void error(Location loc, string msg)
	{
		throw new SDLangParseException(loc, "Error: "~msg);
	}
	
	private InputVisitor!(PullParser, ParserEvent) v;
	
	void visit(InputVisitor!(PullParser, ParserEvent) v)
	{
		this.v = v;
		parseRoot();
	}
	
	private void emit(Event)(Event event)
	{
		v.yield( ParserEvent(event) );
	}
	
	/// <Root> ::= <Tags> EOF  (Lookaheads: Anything)
	private void parseRoot()
	{
		//trace("Starting parse of file: ", lexer.filename);
		//trace(__FUNCTION__, ": <Root> ::= <Tags> EOF  (Lookaheads: Anything)");

		auto startLocation = Location(lexer.filename, 0, 0, 0);
		emit( FileStartEvent(startLocation) );

		parseTags();
		
		auto token = lexer.front;
		if(!token.matches!"EOF"())
			error("Expected end-of-file, not " ~ token.symbol.name);
		
		emit( FileEndEvent(token.location) );
	}

	/// <Tags> ::= <Tag> <Tags>  (Lookaheads: Ident Value)
	///        |   EOL   <Tags>  (Lookaheads: EOL)
	///        |   {empty}       (Lookaheads: Anything else, except '{')
	void parseTags()
	{
		//trace("Enter ", __FUNCTION__);
		while(true)
		{
			auto token = lexer.front;
			if(token.matches!"Ident"() || token.matches!"Value"())
			{
				//trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags>  (Lookaheads: Ident Value)");
				parseTag();
				continue;
			}
			else if(token.matches!"EOL"())
			{
				//trace(__FUNCTION__, ": <Tags> ::= EOL <Tags>  (Lookaheads: EOL)");
				lexer.popFront();
				continue;
			}
			else if(token.matches!"{"())
			{
				error("Anonymous tags must have at least one value. They cannot just have children and attributes only.");
			}
			else
			{
				//trace(__FUNCTION__, ": <Tags> ::= {empty}  (Lookaheads: Anything else, except '{')");
				break;
			}
		}
	}

	/// <Tag>
	///     ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Ident)
	///     |   <Value>  <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Value)
	void parseTag()
	{
		auto token = lexer.front;
		if(token.matches!"Ident"())
		{
			//trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Ident)");
			//trace("Found tag named: ", tag.fullName);
			auto id = parseIDFull();
			emit( TagStartEvent(token.location, id.namespace, id.name) );
		}
		else if(token.matches!"Value"())
		{
			//trace(__FUNCTION__, ": <Tag> ::= <Value>  <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Value)");
			//trace("Found anonymous tag.");
			emit( TagStartEvent(token.location, null, null) );
		}
		else
			error("Expected tag name or value, not " ~ token.symbol.name);

		if(lexer.front.matches!"="())
			error("Anonymous tags must have at least one value. They cannot just have attributes and children only.");

		parseValues();
		parseAttributes();
		parseOptChild();
		parseTagTerminator();
		
		emit( TagEndEvent() );
	}

	/// <IDFull> ::= Ident <IDSuffix>  (Lookaheads: Ident)
	IDFull parseIDFull()
	{
		auto token = lexer.front;
		if(token.matches!"Ident"())
		{
			//trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix>  (Lookaheads: Ident)");
			lexer.popFront();
			return parseIDSuffix(token.data);
		}
		else
		{
			error("Expected namespace or identifier, not " ~ token.symbol.name);
			assert(0);
		}
	}

	/// <IDSuffix>
	///     ::= ':' Ident  (Lookaheads: ':')
	///     ::= {empty}    (Lookaheads: Anything else)
	IDFull parseIDSuffix(string firstIdent)
	{
		auto token = lexer.front;
		if(token.matches!":"())
		{
			//trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident  (Lookaheads: ':')");
			lexer.popFront();
			token = lexer.front;
			if(token.matches!"Ident"())
			{
				lexer.popFront();
				return IDFull(firstIdent, token.data);
			}
			else
			{
				error("Expected name, not " ~ token.symbol.name);
				assert(0);
			}
		}
		else
		{
			//trace(__FUNCTION__, ": <IDSuffix> ::= {empty}  (Lookaheads: Anything else)");
			return IDFull("", firstIdent);
		}
	}

	/// <Values>
	///     ::= Value <Values>  (Lookaheads: Value)
	///     |   {empty}         (Lookaheads: Anything else)
	void parseValues()
	{
		while(true)
		{
			auto token = lexer.front;
			if(token.matches!"Value"())
			{
				//trace(__FUNCTION__, ": <Values> ::= Value <Values>  (Lookaheads: Value)");
				parseValue();
				continue;
			}
			else
			{
				//trace(__FUNCTION__, ": <Values> ::= {empty}  (Lookaheads: Anything else)");
				break;
			}
		}
	}

	/// Handle Value terminals that aren't part of an attribute
	void parseValue()
	{
		auto token = lexer.front;
		if(token.matches!"Value"())
		{
			//trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)");
			auto value = token.value;
			//trace("In tag '", parent.fullName, "', found value: ", value);
			emit( ValueEvent(token.location, value) );
			
			lexer.popFront();
		}
		else
			error("Expected value, not "~token.symbol.name);
	}

	/// <Attributes>
	///     ::= <Attribute> <Attributes>  (Lookaheads: Ident)
	///     |   {empty}                   (Lookaheads: Anything else)
	void parseAttributes()
	{
		while(true)
		{
			auto token = lexer.front;
			if(token.matches!"Ident"())
			{
				//trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes>  (Lookaheads: Ident)");
				parseAttribute();
				continue;
			}
			else
			{
				//trace(__FUNCTION__, ": <Attributes> ::= {empty}  (Lookaheads: Anything else)");
				break;
			}
		}
	}

	/// <Attribute> ::= <IDFull> '=' Value  (Lookaheads: Ident)
	void parseAttribute()
	{
		//trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value  (Lookaheads: Ident)");
		auto token = lexer.front;
		if(!token.matches!"Ident"())
			error("Expected attribute name, not "~token.symbol.name);
		
		auto id = parseIDFull();
		
		token = lexer.front;
		if(!token.matches!"="())
			error("Expected '=' after attribute name, not "~token.symbol.name);
		
		lexer.popFront();
		token = lexer.front;
		if(!token.matches!"Value"())
			error("Expected attribute value, not "~token.symbol.name);
		
		//trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'");
		emit( AttributeEvent(token.location, id.namespace, id.name, token.value) );
		
		lexer.popFront();
	}

	/// <OptChild>
	///      ::= '{' EOL <Tags> '}'  (Lookaheads: '{')
	///      |   {empty}             (Lookaheads: Anything else)
	void parseOptChild()
	{
		auto token = lexer.front;
		if(token.matches!"{")
		{
			//trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}'  (Lookaheads: '{')");
			lexer.popFront();
			token = lexer.front;
			if(!token.matches!"EOL"())
				error("Expected newline or semicolon after '{', not "~token.symbol.name);
			
			lexer.popFront();
			parseTags();
			
			token = lexer.front;
			if(!token.matches!"}"())
				error("Expected '}' after child tags, not "~token.symbol.name);
			lexer.popFront();
		}
		else
		{
			//trace(__FUNCTION__, ": <OptChild> ::= {empty}  (Lookaheads: Anything else)");
			// Do nothing, no error.
		}
	}
	
	/// <TagTerminator>
	///     ::= EOL      (Lookahead: EOL)
	///     |   {empty}  (Lookahead: EOF)
	void parseTagTerminator()
	{
		auto token = lexer.front;
		if(token.matches!"EOL")
		{
			//trace(__FUNCTION__, ": <TagTerminator> ::= EOL  (Lookahead: EOL)");
			lexer.popFront();
		}
		else if(token.matches!"EOF")
		{
			//trace(__FUNCTION__, ": <TagTerminator> ::= {empty}  (Lookahead: EOF)");
			// Do nothing
		}
		else
			error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name);
	}
}

private struct DOMParser
{
	Lexer lexer;
	
	Tag parseRoot()
	{
		auto currTag = new Tag(null, null, "root");
		currTag.location = Location(lexer.filename, 0, 0, 0);
		
		auto parser = PullParser(lexer);
		auto eventRange = inputVisitor!ParserEvent( parser );
		foreach(event; eventRange)
		{
			if(auto e = event.peek!TagStartEvent())
			{
				auto newTag = new Tag(currTag, e.namespace, e.name);
				newTag.location = e.location;
				
				currTag = newTag;
			}
			else if(event.peek!TagEndEvent())
			{
				currTag = currTag.parent;

				if(!currTag)
					parser.error("Internal Error: Received an extra TagEndEvent");
			}
			else if(auto e = event.peek!ValueEvent())
			{
				currTag.add(e.value);
			}
			else if(auto e = event.peek!AttributeEvent())
			{
				auto attr = new Attribute(e.namespace, e.name, e.value, e.location);
				currTag.add(attr);
			}
			else if(event.peek!FileStartEvent())
			{
				// Do nothing
			}
			else if(event.peek!FileEndEvent())
			{
				// There shouldn't be another parent.
				if(currTag.parent)
					parser.error("Internal Error: Unexpected end of file, not enough TagEndEvent");
			}
			else
				parser.error("Internal Error: Received unknown parser event");
		}
		
		return currTag;
	}
}

// Other parser tests are part of the AST's tests over in the ast module.

// Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16
version(sdlangUnittest)
unittest
{
	import std.stdio;
	writeln("parser: Regression test issue #16...");
	stdout.flush();

	// Shouldn't crash
	foreach(event; pullParseSource(`tag "data"`))
	{
		event.peek!FileStartEvent();
	}
}

// Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31
// "Escape sequence results in range violation error"
version(sdlangUnittest)
unittest
{
	import std.stdio;
	writeln("parser: Regression test issue #31...");
	stdout.flush();

	// Shouldn't get a Range violation
	parseSource(`test "\"foo\""`);
}