Added expression separator tokenization

Closes #5
This commit is contained in:
Robert Bendun 2022-05-07 18:20:22 +02:00
parent 3b4c306199
commit df1f90d827
3 changed files with 71 additions and 19 deletions

View File

@ -60,13 +60,17 @@ auto Lexer::next_token() -> Result<Token>
}
switch (peek()) {
case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location };
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location };
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
case ';': consume(); return { Token::Type::Expression_Separator, finish(), token_location };
case '|':
consume();
// We explicitly allow for `|foo|=0` here
// `|` may be part of operator, like `||`. So we need to check what follows. If next char
// is operator, then this character is part of operator sequence.
// Additionally we explicitly allow for `|foo|=0` here
if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
return { Token::Type::Variable_Separator, finish(), token_location };
}
@ -218,15 +222,16 @@ std::ostream& operator<<(std::ostream& os, Token const&)
std::ostream& operator<<(std::ostream& os, Token::Type type)
{
switch (type) {
case Token::Type::Open_Block: return os << "OPEN BLOCK";
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
case Token::Type::Open_Paren: return os << "OPEN PAREN";
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR";
case Token::Type::Chord: return os << "CHORD";
case Token::Type::Numeric: return os << "NUMERIC";
case Token::Type::Symbol: return os << "SYMBOL";
case Token::Type::Operator: return os << "OPERATOR";
case Token::Type::Open_Block: return os << "OPEN BLOCK";
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
case Token::Type::Open_Paren: return os << "OPEN PAREN";
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR";
case Token::Type::Chord: return os << "CHORD";
case Token::Type::Numeric: return os << "NUMERIC";
case Token::Type::Symbol: return os << "SYMBOL";
case Token::Type::Operator: return os << "OPERATOR";
case Token::Type::Expression_Separator: return os << "EXPRESSION SEPARATOR";
}
assert(false && "exhaustive handling of Token::Type enumeration");

View File

@ -162,6 +162,9 @@ struct Token
// "|" separaters arguments from block body, and provides variable introduction syntax
Variable_Separator,
// ";" separates expressions. Used to separate calls, like `foo 1 2; bar 3 4`
Expression_Separator,
// "[" and "]", delimit anonymous block of code (potentially a function)
Open_Block,
Close_Block,

View File

@ -63,6 +63,29 @@ static void expect_empty_file(
}
}
template<auto N>
static void expect_token_sequence(
std::string_view source,
std::array<Token, N> const& expected_tokens,
reflection::source_location const& sl = reflection::source_location::current())
{
Lexer lexer{source};
for (Token const& expected : expected_tokens) {
auto const result = lexer.next_token();
expect(result.has_value(), sl) << "expected token, received nothing";
if (result.has_value()) {
expect(eq(result->type, expected.type)) << "different token type then expected";
expect(eq(result->source, expected.source)) << "different token source then expected";
expect(eq(result->location, expected.location)) << "different token location then expected";
}
}
auto const result = lexer.next_token();
expect(not result.has_value(), sl) << "more tokens then expected";
}
suite lexer_test = [] {
"Empty file"_test = [] {
expect_empty_file("");
@ -80,11 +103,12 @@ suite lexer_test = [] {
};
"Simple token types"_test = [] {
expect_token_type(Token::Type::Close_Block, "]");
expect_token_type(Token::Type::Close_Paren, ")");
expect_token_type(Token::Type::Open_Block, "[");
expect_token_type(Token::Type::Open_Paren, "(");
expect_token_type(Token::Type::Variable_Separator, "|");
expect_token_type(Token::Type::Close_Block, "]");
expect_token_type(Token::Type::Close_Paren, ")");
expect_token_type(Token::Type::Open_Block, "[");
expect_token_type(Token::Type::Open_Paren, "(");
expect_token_type(Token::Type::Variable_Separator, "|");
expect_token_type(Token::Type::Expression_Separator, ";");
};
"Numeric tokens"_test = [] {
@ -143,4 +167,24 @@ suite lexer_test = [] {
expect_token_type_and_value(Token::Type::Operator, "v");
expect_token_type_and_value(Token::Type::Operator, "%");
};
"Multiple tokens"_test = [] {
Location l;
expect_token_sequence("1 + foo", std::array {
Token { Token::Type::Numeric, "1", l.at(1, 1) },
Token { Token::Type::Operator, "+", l.at(1, 3) },
Token { Token::Type::Symbol, "foo", l.at(1, 5) }
});
expect_token_sequence("foo 1 2; bar 3 4", std::array {
Token { Token::Type::Symbol, "foo", l.at(1, 1) },
Token { Token::Type::Numeric, "1", l.at(1, 5) },
Token { Token::Type::Numeric, "2", l.at(1, 7) },
Token { Token::Type::Expression_Separator, ";", l.at(1, 8) },
Token { Token::Type::Symbol, "bar", l.at(1, 10) },
Token { Token::Type::Numeric, "3", l.at(1, 14) },
Token { Token::Type::Numeric, "4", l.at(1, 16) }
});
};
};