Parsing variable declarations

This commit is contained in:
Robert Bendun 2022-05-16 16:58:31 +02:00
parent 0b9e7e8f4a
commit 0f9c46dffb
6 changed files with 109 additions and 31 deletions

View File

@ -69,6 +69,7 @@ std::ostream& operator<<(std::ostream& os, Error const& err)
case errors::Unrecognized_Character:
return err.message.empty() ? os << "unrecognized character\n" : os << err.message;
case errors::Expected_Keyword:
case errors::Function_Not_Defined:
case errors::Unexpected_Token_Type:
case errors::Unresolved_Operator:
@ -163,6 +164,15 @@ Error errors::unresolved_operator(Token const& op)
return err;
}
Error errors::expected_keyword(Token const& unexpected, std::string_view keyword)
{
Error err;
err.type = errors::Expected_Keyword;
err.location = unexpected.location;
err.message = format("Expected keyword '", keyword, "', but found ", unexpected);
return err;
}
void errors::all_tokens_were_not_parsed(std::span<Token> tokens)
{
error_heading(std::cerr, std::nullopt, Error_Level::Bug);

View File

@ -74,7 +74,7 @@ auto Lexer::next_token() -> Result<Token>
// is operator, then this character is part of operator sequence.
// Additionally we explicitly allow for `|foo|=0` here
if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
return { Token::Type::Variable_Separator, finish(), token_location };
return { Token::Type::Parameter_Separator, finish(), token_location };
}
// Number literals like .75
@ -251,16 +251,16 @@ std::ostream& operator<<(std::ostream& os, Token const& token)
std::ostream& operator<<(std::ostream& os, Token::Type type)
{
switch (type) {
case Token::Type::Open_Block: return os << "OPEN BLOCK";
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
case Token::Type::Open_Paren: return os << "OPEN PAREN";
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR";
case Token::Type::Chord: return os << "CHORD";
case Token::Type::Numeric: return os << "NUMERIC";
case Token::Type::Symbol: return os << "SYMBOL";
case Token::Type::Operator: return os << "OPERATOR";
case Token::Type::Expression_Separator: return os << "EXPRESSION SEPARATOR";
case Token::Type::Open_Block: return os << "OPEN BLOCK";
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
case Token::Type::Open_Paren: return os << "OPEN PAREN";
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
case Token::Type::Parameter_Separator: return os << "PARAMETER SEPARATOR";
case Token::Type::Chord: return os << "CHORD";
case Token::Type::Numeric: return os << "NUMERIC";
case Token::Type::Symbol: return os << "SYMBOL";
case Token::Type::Operator: return os << "OPERATOR";
case Token::Type::Expression_Separator: return os << "EXPRESSION SEPARATOR";
}
unreachable();
}

View File

@ -46,7 +46,8 @@ namespace errors
Failed_Numeric_Parsing,
Function_Not_Defined,
Unresolved_Operator
Unresolved_Operator,
Expected_Keyword,
};
}
@ -218,7 +219,7 @@ struct Token
Numeric,
// "|" separaters arguments from block body, and provides variable introduction syntax
Variable_Separator,
Parameter_Separator,
// ";" separates expressions. Used to separate calls, like `foo 1 2; bar 3 4`
Expression_Separator,
@ -292,19 +293,21 @@ struct Lexer
struct Ast
{
// Named constructors of AST structure
static Ast literal(Token);
static Ast binary(Token, Ast lhs, Ast rhs);
static Ast call(std::vector<Ast> call);
static Ast sequence(std::vector<Ast> call);
static Ast block(Location location, Ast seq = sequence({}), std::vector<Ast> parameters = {});
static Ast call(std::vector<Ast> call);
static Ast literal(Token);
static Ast sequence(std::vector<Ast> call);
static Ast variable_declaration(Location loc, std::vector<Ast> lvalues, std::optional<Ast> rvalue);
enum class Type
{
Literal, // Compile time known constant like `c` or `1`
Binary, // Binary operator application like `1` + `2`
Call, // Function call application like `print 42`
Sequence, // Several expressions sequences like `42`, `42; 32`
Block, // Block expressions like `[42; hello]`
Binary, // Binary operator application like `1` + `2`
Block, // Block expressions like `[42; hello]`
Call, // Function call application like `print 42`
Literal, // Compile time known constant like `c` or `1`
Sequence, // Several expressions sequences like `42`, `42; 32`
Variable_Declaration, // Declaration of a variable with optional value assigment like `var x = 10` or `var y`
};
Type type;
@ -331,6 +334,9 @@ struct Parser
Result<Ast> parse_expression();
Result<Ast> parse_infix_expression();
Result<Ast> parse_atomic_expression();
Result<Ast> parse_variable_declaration();
Result<Ast> parse_identifier_with_trailing_separators();
Result<Ast> parse_identifier();
Result<Token> peek() const;
@ -339,6 +345,7 @@ struct Parser
// Tests if current token has given type
bool expect(Token::Type type) const;
bool expect(Token::Type type, std::string_view lexeme) const;
// Ensures that current token has one of types given.
// Otherwise returns error
@ -440,6 +447,7 @@ namespace errors
Error function_not_defined(Value const& v);
Error unresolved_operator(Token const& op);
Error expected_keyword(Token const& unexpected, std::string_view keyword);
[[noreturn]]
void all_tokens_were_not_parsed(std::span<Token>);

View File

@ -47,7 +47,27 @@ Result<Ast> Parser::parse_sequence()
Result<Ast> Parser::parse_expression()
{
return parse_infix_expression();
auto var = parse_variable_declaration();
if (!var.has_value())
return parse_infix_expression();
return var;
}
Result<Ast> Parser::parse_variable_declaration()
{
if (!expect(Token::Type::Symbol, "var")) {
return errors::expected_keyword(Try(peek()), "var");
}
auto var = consume();
auto lvalue = Try(parse_many(*this, &Parser::parse_identifier, std::nullopt, At_Least::One));
if (expect(Token::Type::Operator, "=")) {
consume();
return Ast::variable_declaration(var.location, std::move(lvalue), Try(parse_expression()));
}
return Ast::variable_declaration(var.location, std::move(lvalue), std::nullopt);
}
Result<Ast> Parser::parse_infix_expression()
@ -83,7 +103,7 @@ Result<Ast> Parser::parse_atomic_expression()
auto start = token_id;
std::vector<Ast> parameters;
if (auto p = parse_many(*this, &Parser::parse_identifier, std::nullopt, At_Least::One); p && expect(Token::Type::Variable_Separator)) {
if (auto p = parse_many(*this, &Parser::parse_identifier_with_trailing_separators, std::nullopt, At_Least::One); p && expect(Token::Type::Parameter_Separator)) {
consume();
parameters = std::move(p).value();
} else {
@ -112,7 +132,7 @@ Result<Ast> Parser::parse_atomic_expression()
}
}
Result<Ast> Parser::parse_identifier()
Result<Ast> Parser::parse_identifier_with_trailing_separators()
{
Try(ensure(Token::Type::Symbol));
auto lit = Ast::literal(consume());
@ -120,7 +140,11 @@ Result<Ast> Parser::parse_identifier()
return lit;
}
Result<Ast> Parser::parse_identifier()
{
Try(ensure(Token::Type::Symbol));
return Ast::literal(consume());
}
static Result<std::vector<Ast>> parse_many(
Parser &p,
@ -173,6 +197,11 @@ bool Parser::expect(Token::Type type) const
return token_id < tokens.size() && tokens[token_id].type == type;
}
bool Parser::expect(Token::Type type, std::string_view lexeme) const
{
return token_id < tokens.size() && tokens[token_id].type == type && tokens[token_id].source == lexeme;
}
Result<void> Parser::ensure(Token::Type type) const
{
return token_id >= tokens.size()
@ -234,6 +263,18 @@ Ast Ast::block(Location location, Ast seq, std::vector<Ast> parameters)
return ast;
}
Ast Ast::variable_declaration(Location loc, std::vector<Ast> lvalues, std::optional<Ast> rvalue)
{
Ast ast;
ast.type = Type::Variable_Declaration;
ast.location = loc;
ast.arguments = std::move(lvalues);
if (rvalue) {
ast.arguments.push_back(*std::move(rvalue));
}
return ast;
}
Ast wrap_if_several(std::vector<Ast> &&ast, Ast(*wrapper)(std::vector<Ast>))
{
if (ast.size() == 1)
@ -260,6 +301,7 @@ bool operator==(Ast const& lhs, Ast const& rhs)
case Ast::Type::Block:
case Ast::Type::Call:
case Ast::Type::Sequence:
case Ast::Type::Variable_Declaration:
return lhs.arguments.size() == rhs.arguments.size()
&& std::equal(lhs.arguments.begin(), lhs.arguments.end(), rhs.arguments.begin());
}
@ -270,11 +312,12 @@ bool operator==(Ast const& lhs, Ast const& rhs)
std::ostream& operator<<(std::ostream& os, Ast::Type type)
{
switch (type) {
case Ast::Type::Binary: return os << "BINARY";
case Ast::Type::Block: return os << "BLOCK";
case Ast::Type::Call: return os << "CALL";
case Ast::Type::Literal: return os << "LITERAL";
case Ast::Type::Sequence: return os << "SEQUENCE";
case Ast::Type::Binary: return os << "BINARY";
case Ast::Type::Block: return os << "BLOCK";
case Ast::Type::Call: return os << "CALL";
case Ast::Type::Literal: return os << "LITERAL";
case Ast::Type::Sequence: return os << "SEQUENCE";
case Ast::Type::Variable_Declaration: return os << "VAR";
}
unreachable();
}

View File

@ -107,7 +107,7 @@ suite lexer_test = [] {
expect_token_type(Token::Type::Close_Paren, ")");
expect_token_type(Token::Type::Open_Block, "[");
expect_token_type(Token::Type::Open_Paren, "(");
expect_token_type(Token::Type::Variable_Separator, "|");
expect_token_type(Token::Type::Parameter_Separator, "|");
expect_token_type(Token::Type::Expression_Separator, ";");
};

View File

@ -169,4 +169,21 @@ suite parser_test = [] {
Ast::literal({ Token::Type::Symbol, "k", {} })
}));
};
"Variable declarations"_test = [] {
expect_ast("var x = 10", Ast::variable_declaration(
{},
{ Ast::literal({ Token::Type::Symbol, "x", {} }) },
Ast::literal({ Token::Type::Numeric, "10", {} })));
expect_ast("var x", Ast::variable_declaration(
{},
{ Ast::literal({ Token::Type::Symbol, "x", {} }) },
std::nullopt));
expect_ast("var x y", Ast::variable_declaration(
{},
{Ast::literal({ Token::Type::Symbol, "x", {} }), Ast::literal({ Token::Type::Symbol, "y", {} })},
std::nullopt));
};
};