Parsing variable declarations
This commit is contained in:
parent
0b9e7e8f4a
commit
0f9c46dffb
@ -69,6 +69,7 @@ std::ostream& operator<<(std::ostream& os, Error const& err)
|
||||
case errors::Unrecognized_Character:
|
||||
return err.message.empty() ? os << "unrecognized character\n" : os << err.message;
|
||||
|
||||
case errors::Expected_Keyword:
|
||||
case errors::Function_Not_Defined:
|
||||
case errors::Unexpected_Token_Type:
|
||||
case errors::Unresolved_Operator:
|
||||
@ -163,6 +164,15 @@ Error errors::unresolved_operator(Token const& op)
|
||||
return err;
|
||||
}
|
||||
|
||||
Error errors::expected_keyword(Token const& unexpected, std::string_view keyword)
|
||||
{
|
||||
Error err;
|
||||
err.type = errors::Expected_Keyword;
|
||||
err.location = unexpected.location;
|
||||
err.message = format("Expected keyword '", keyword, "', but found ", unexpected);
|
||||
return err;
|
||||
}
|
||||
|
||||
void errors::all_tokens_were_not_parsed(std::span<Token> tokens)
|
||||
{
|
||||
error_heading(std::cerr, std::nullopt, Error_Level::Bug);
|
||||
|
22
src/lexer.cc
22
src/lexer.cc
@ -74,7 +74,7 @@ auto Lexer::next_token() -> Result<Token>
|
||||
// is operator, then this character is part of operator sequence.
|
||||
// Additionally we explicitly allow for `|foo|=0` here
|
||||
if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
|
||||
return { Token::Type::Variable_Separator, finish(), token_location };
|
||||
return { Token::Type::Parameter_Separator, finish(), token_location };
|
||||
}
|
||||
|
||||
// Number literals like .75
|
||||
@ -251,16 +251,16 @@ std::ostream& operator<<(std::ostream& os, Token const& token)
|
||||
std::ostream& operator<<(std::ostream& os, Token::Type type)
|
||||
{
|
||||
switch (type) {
|
||||
case Token::Type::Open_Block: return os << "OPEN BLOCK";
|
||||
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
|
||||
case Token::Type::Open_Paren: return os << "OPEN PAREN";
|
||||
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
|
||||
case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR";
|
||||
case Token::Type::Chord: return os << "CHORD";
|
||||
case Token::Type::Numeric: return os << "NUMERIC";
|
||||
case Token::Type::Symbol: return os << "SYMBOL";
|
||||
case Token::Type::Operator: return os << "OPERATOR";
|
||||
case Token::Type::Expression_Separator: return os << "EXPRESSION SEPARATOR";
|
||||
case Token::Type::Open_Block: return os << "OPEN BLOCK";
|
||||
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
|
||||
case Token::Type::Open_Paren: return os << "OPEN PAREN";
|
||||
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
|
||||
case Token::Type::Parameter_Separator: return os << "PARAMETER SEPARATOR";
|
||||
case Token::Type::Chord: return os << "CHORD";
|
||||
case Token::Type::Numeric: return os << "NUMERIC";
|
||||
case Token::Type::Symbol: return os << "SYMBOL";
|
||||
case Token::Type::Operator: return os << "OPERATOR";
|
||||
case Token::Type::Expression_Separator: return os << "EXPRESSION SEPARATOR";
|
||||
}
|
||||
unreachable();
|
||||
}
|
||||
|
@ -46,7 +46,8 @@ namespace errors
|
||||
Failed_Numeric_Parsing,
|
||||
|
||||
Function_Not_Defined,
|
||||
Unresolved_Operator
|
||||
Unresolved_Operator,
|
||||
Expected_Keyword,
|
||||
};
|
||||
}
|
||||
|
||||
@ -218,7 +219,7 @@ struct Token
|
||||
Numeric,
|
||||
|
||||
// "|" separaters arguments from block body, and provides variable introduction syntax
|
||||
Variable_Separator,
|
||||
Parameter_Separator,
|
||||
|
||||
// ";" separates expressions. Used to separate calls, like `foo 1 2; bar 3 4`
|
||||
Expression_Separator,
|
||||
@ -292,19 +293,21 @@ struct Lexer
|
||||
struct Ast
|
||||
{
|
||||
// Named constructors of AST structure
|
||||
static Ast literal(Token);
|
||||
static Ast binary(Token, Ast lhs, Ast rhs);
|
||||
static Ast call(std::vector<Ast> call);
|
||||
static Ast sequence(std::vector<Ast> call);
|
||||
static Ast block(Location location, Ast seq = sequence({}), std::vector<Ast> parameters = {});
|
||||
static Ast call(std::vector<Ast> call);
|
||||
static Ast literal(Token);
|
||||
static Ast sequence(std::vector<Ast> call);
|
||||
static Ast variable_declaration(Location loc, std::vector<Ast> lvalues, std::optional<Ast> rvalue);
|
||||
|
||||
enum class Type
|
||||
{
|
||||
Literal, // Compile time known constant like `c` or `1`
|
||||
Binary, // Binary operator application like `1` + `2`
|
||||
Call, // Function call application like `print 42`
|
||||
Sequence, // Several expressions sequences like `42`, `42; 32`
|
||||
Block, // Block expressions like `[42; hello]`
|
||||
Binary, // Binary operator application like `1` + `2`
|
||||
Block, // Block expressions like `[42; hello]`
|
||||
Call, // Function call application like `print 42`
|
||||
Literal, // Compile time known constant like `c` or `1`
|
||||
Sequence, // Several expressions sequences like `42`, `42; 32`
|
||||
Variable_Declaration, // Declaration of a variable with optional value assigment like `var x = 10` or `var y`
|
||||
};
|
||||
|
||||
Type type;
|
||||
@ -331,6 +334,9 @@ struct Parser
|
||||
Result<Ast> parse_expression();
|
||||
Result<Ast> parse_infix_expression();
|
||||
Result<Ast> parse_atomic_expression();
|
||||
Result<Ast> parse_variable_declaration();
|
||||
|
||||
Result<Ast> parse_identifier_with_trailing_separators();
|
||||
Result<Ast> parse_identifier();
|
||||
|
||||
Result<Token> peek() const;
|
||||
@ -339,6 +345,7 @@ struct Parser
|
||||
|
||||
// Tests if current token has given type
|
||||
bool expect(Token::Type type) const;
|
||||
bool expect(Token::Type type, std::string_view lexeme) const;
|
||||
|
||||
// Ensures that current token has one of types given.
|
||||
// Otherwise returns error
|
||||
@ -440,6 +447,7 @@ namespace errors
|
||||
|
||||
Error function_not_defined(Value const& v);
|
||||
Error unresolved_operator(Token const& op);
|
||||
Error expected_keyword(Token const& unexpected, std::string_view keyword);
|
||||
|
||||
[[noreturn]]
|
||||
void all_tokens_were_not_parsed(std::span<Token>);
|
||||
|
@ -47,7 +47,27 @@ Result<Ast> Parser::parse_sequence()
|
||||
|
||||
Result<Ast> Parser::parse_expression()
|
||||
{
|
||||
return parse_infix_expression();
|
||||
auto var = parse_variable_declaration();
|
||||
if (!var.has_value())
|
||||
return parse_infix_expression();
|
||||
return var;
|
||||
}
|
||||
|
||||
Result<Ast> Parser::parse_variable_declaration()
|
||||
{
|
||||
if (!expect(Token::Type::Symbol, "var")) {
|
||||
return errors::expected_keyword(Try(peek()), "var");
|
||||
}
|
||||
auto var = consume();
|
||||
|
||||
auto lvalue = Try(parse_many(*this, &Parser::parse_identifier, std::nullopt, At_Least::One));
|
||||
|
||||
if (expect(Token::Type::Operator, "=")) {
|
||||
consume();
|
||||
return Ast::variable_declaration(var.location, std::move(lvalue), Try(parse_expression()));
|
||||
}
|
||||
|
||||
return Ast::variable_declaration(var.location, std::move(lvalue), std::nullopt);
|
||||
}
|
||||
|
||||
Result<Ast> Parser::parse_infix_expression()
|
||||
@ -83,7 +103,7 @@ Result<Ast> Parser::parse_atomic_expression()
|
||||
auto start = token_id;
|
||||
std::vector<Ast> parameters;
|
||||
|
||||
if (auto p = parse_many(*this, &Parser::parse_identifier, std::nullopt, At_Least::One); p && expect(Token::Type::Variable_Separator)) {
|
||||
if (auto p = parse_many(*this, &Parser::parse_identifier_with_trailing_separators, std::nullopt, At_Least::One); p && expect(Token::Type::Parameter_Separator)) {
|
||||
consume();
|
||||
parameters = std::move(p).value();
|
||||
} else {
|
||||
@ -112,7 +132,7 @@ Result<Ast> Parser::parse_atomic_expression()
|
||||
}
|
||||
}
|
||||
|
||||
Result<Ast> Parser::parse_identifier()
|
||||
Result<Ast> Parser::parse_identifier_with_trailing_separators()
|
||||
{
|
||||
Try(ensure(Token::Type::Symbol));
|
||||
auto lit = Ast::literal(consume());
|
||||
@ -120,7 +140,11 @@ Result<Ast> Parser::parse_identifier()
|
||||
return lit;
|
||||
}
|
||||
|
||||
|
||||
Result<Ast> Parser::parse_identifier()
|
||||
{
|
||||
Try(ensure(Token::Type::Symbol));
|
||||
return Ast::literal(consume());
|
||||
}
|
||||
|
||||
static Result<std::vector<Ast>> parse_many(
|
||||
Parser &p,
|
||||
@ -173,6 +197,11 @@ bool Parser::expect(Token::Type type) const
|
||||
return token_id < tokens.size() && tokens[token_id].type == type;
|
||||
}
|
||||
|
||||
bool Parser::expect(Token::Type type, std::string_view lexeme) const
|
||||
{
|
||||
return token_id < tokens.size() && tokens[token_id].type == type && tokens[token_id].source == lexeme;
|
||||
}
|
||||
|
||||
Result<void> Parser::ensure(Token::Type type) const
|
||||
{
|
||||
return token_id >= tokens.size()
|
||||
@ -234,6 +263,18 @@ Ast Ast::block(Location location, Ast seq, std::vector<Ast> parameters)
|
||||
return ast;
|
||||
}
|
||||
|
||||
Ast Ast::variable_declaration(Location loc, std::vector<Ast> lvalues, std::optional<Ast> rvalue)
|
||||
{
|
||||
Ast ast;
|
||||
ast.type = Type::Variable_Declaration;
|
||||
ast.location = loc;
|
||||
ast.arguments = std::move(lvalues);
|
||||
if (rvalue) {
|
||||
ast.arguments.push_back(*std::move(rvalue));
|
||||
}
|
||||
return ast;
|
||||
}
|
||||
|
||||
Ast wrap_if_several(std::vector<Ast> &&ast, Ast(*wrapper)(std::vector<Ast>))
|
||||
{
|
||||
if (ast.size() == 1)
|
||||
@ -260,6 +301,7 @@ bool operator==(Ast const& lhs, Ast const& rhs)
|
||||
case Ast::Type::Block:
|
||||
case Ast::Type::Call:
|
||||
case Ast::Type::Sequence:
|
||||
case Ast::Type::Variable_Declaration:
|
||||
return lhs.arguments.size() == rhs.arguments.size()
|
||||
&& std::equal(lhs.arguments.begin(), lhs.arguments.end(), rhs.arguments.begin());
|
||||
}
|
||||
@ -270,11 +312,12 @@ bool operator==(Ast const& lhs, Ast const& rhs)
|
||||
std::ostream& operator<<(std::ostream& os, Ast::Type type)
|
||||
{
|
||||
switch (type) {
|
||||
case Ast::Type::Binary: return os << "BINARY";
|
||||
case Ast::Type::Block: return os << "BLOCK";
|
||||
case Ast::Type::Call: return os << "CALL";
|
||||
case Ast::Type::Literal: return os << "LITERAL";
|
||||
case Ast::Type::Sequence: return os << "SEQUENCE";
|
||||
case Ast::Type::Binary: return os << "BINARY";
|
||||
case Ast::Type::Block: return os << "BLOCK";
|
||||
case Ast::Type::Call: return os << "CALL";
|
||||
case Ast::Type::Literal: return os << "LITERAL";
|
||||
case Ast::Type::Sequence: return os << "SEQUENCE";
|
||||
case Ast::Type::Variable_Declaration: return os << "VAR";
|
||||
}
|
||||
unreachable();
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ suite lexer_test = [] {
|
||||
expect_token_type(Token::Type::Close_Paren, ")");
|
||||
expect_token_type(Token::Type::Open_Block, "[");
|
||||
expect_token_type(Token::Type::Open_Paren, "(");
|
||||
expect_token_type(Token::Type::Variable_Separator, "|");
|
||||
expect_token_type(Token::Type::Parameter_Separator, "|");
|
||||
expect_token_type(Token::Type::Expression_Separator, ";");
|
||||
};
|
||||
|
||||
|
@ -169,4 +169,21 @@ suite parser_test = [] {
|
||||
Ast::literal({ Token::Type::Symbol, "k", {} })
|
||||
}));
|
||||
};
|
||||
|
||||
"Variable declarations"_test = [] {
|
||||
expect_ast("var x = 10", Ast::variable_declaration(
|
||||
{},
|
||||
{ Ast::literal({ Token::Type::Symbol, "x", {} }) },
|
||||
Ast::literal({ Token::Type::Numeric, "10", {} })));
|
||||
|
||||
expect_ast("var x", Ast::variable_declaration(
|
||||
{},
|
||||
{ Ast::literal({ Token::Type::Symbol, "x", {} }) },
|
||||
std::nullopt));
|
||||
|
||||
expect_ast("var x y", Ast::variable_declaration(
|
||||
{},
|
||||
{Ast::literal({ Token::Type::Symbol, "x", {} }), Ast::literal({ Token::Type::Symbol, "y", {} })},
|
||||
std::nullopt));
|
||||
};
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user