parent
3b4c306199
commit
df1f90d827
33
src/lexer.cc
33
src/lexer.cc
@ -60,13 +60,17 @@ auto Lexer::next_token() -> Result<Token>
|
||||
}
|
||||
|
||||
switch (peek()) {
|
||||
case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location };
|
||||
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
|
||||
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
|
||||
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
|
||||
case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location };
|
||||
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
|
||||
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
|
||||
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
|
||||
case ';': consume(); return { Token::Type::Expression_Separator, finish(), token_location };
|
||||
|
||||
case '|':
|
||||
consume();
|
||||
// We explicitly allow for `|foo|=0` here
|
||||
// `|` may be part of operator, like `||`. So we need to check what follows. If next char
|
||||
// is operator, then this character is part of operator sequence.
|
||||
// Additionally we explicitly allow for `|foo|=0` here
|
||||
if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
|
||||
return { Token::Type::Variable_Separator, finish(), token_location };
|
||||
}
|
||||
@ -218,15 +222,16 @@ std::ostream& operator<<(std::ostream& os, Token const&)
|
||||
std::ostream& operator<<(std::ostream& os, Token::Type type)
|
||||
{
|
||||
switch (type) {
|
||||
case Token::Type::Open_Block: return os << "OPEN BLOCK";
|
||||
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
|
||||
case Token::Type::Open_Paren: return os << "OPEN PAREN";
|
||||
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
|
||||
case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR";
|
||||
case Token::Type::Chord: return os << "CHORD";
|
||||
case Token::Type::Numeric: return os << "NUMERIC";
|
||||
case Token::Type::Symbol: return os << "SYMBOL";
|
||||
case Token::Type::Operator: return os << "OPERATOR";
|
||||
case Token::Type::Open_Block: return os << "OPEN BLOCK";
|
||||
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
|
||||
case Token::Type::Open_Paren: return os << "OPEN PAREN";
|
||||
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
|
||||
case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR";
|
||||
case Token::Type::Chord: return os << "CHORD";
|
||||
case Token::Type::Numeric: return os << "NUMERIC";
|
||||
case Token::Type::Symbol: return os << "SYMBOL";
|
||||
case Token::Type::Operator: return os << "OPERATOR";
|
||||
case Token::Type::Expression_Separator: return os << "EXPRESSION SEPARATOR";
|
||||
}
|
||||
|
||||
assert(false && "exhaustive handling of Token::Type enumeration");
|
||||
|
@ -162,6 +162,9 @@ struct Token
|
||||
// "|" separaters arguments from block body, and provides variable introduction syntax
|
||||
Variable_Separator,
|
||||
|
||||
// ";" separates expressions. Used to separate calls, like `foo 1 2; bar 3 4`
|
||||
Expression_Separator,
|
||||
|
||||
// "[" and "]", delimit anonymous block of code (potentially a function)
|
||||
Open_Block,
|
||||
Close_Block,
|
||||
|
@ -63,6 +63,29 @@ static void expect_empty_file(
|
||||
}
|
||||
}
|
||||
|
||||
template<auto N>
|
||||
static void expect_token_sequence(
|
||||
std::string_view source,
|
||||
std::array<Token, N> const& expected_tokens,
|
||||
reflection::source_location const& sl = reflection::source_location::current())
|
||||
{
|
||||
Lexer lexer{source};
|
||||
|
||||
for (Token const& expected : expected_tokens) {
|
||||
auto const result = lexer.next_token();
|
||||
expect(result.has_value(), sl) << "expected token, received nothing";
|
||||
|
||||
if (result.has_value()) {
|
||||
expect(eq(result->type, expected.type)) << "different token type then expected";
|
||||
expect(eq(result->source, expected.source)) << "different token source then expected";
|
||||
expect(eq(result->location, expected.location)) << "different token location then expected";
|
||||
}
|
||||
}
|
||||
|
||||
auto const result = lexer.next_token();
|
||||
expect(not result.has_value(), sl) << "more tokens then expected";
|
||||
}
|
||||
|
||||
suite lexer_test = [] {
|
||||
"Empty file"_test = [] {
|
||||
expect_empty_file("");
|
||||
@ -80,11 +103,12 @@ suite lexer_test = [] {
|
||||
};
|
||||
|
||||
"Simple token types"_test = [] {
|
||||
expect_token_type(Token::Type::Close_Block, "]");
|
||||
expect_token_type(Token::Type::Close_Paren, ")");
|
||||
expect_token_type(Token::Type::Open_Block, "[");
|
||||
expect_token_type(Token::Type::Open_Paren, "(");
|
||||
expect_token_type(Token::Type::Variable_Separator, "|");
|
||||
expect_token_type(Token::Type::Close_Block, "]");
|
||||
expect_token_type(Token::Type::Close_Paren, ")");
|
||||
expect_token_type(Token::Type::Open_Block, "[");
|
||||
expect_token_type(Token::Type::Open_Paren, "(");
|
||||
expect_token_type(Token::Type::Variable_Separator, "|");
|
||||
expect_token_type(Token::Type::Expression_Separator, ";");
|
||||
};
|
||||
|
||||
"Numeric tokens"_test = [] {
|
||||
@ -143,4 +167,24 @@ suite lexer_test = [] {
|
||||
expect_token_type_and_value(Token::Type::Operator, "v");
|
||||
expect_token_type_and_value(Token::Type::Operator, "%");
|
||||
};
|
||||
|
||||
"Multiple tokens"_test = [] {
|
||||
Location l;
|
||||
|
||||
expect_token_sequence("1 + foo", std::array {
|
||||
Token { Token::Type::Numeric, "1", l.at(1, 1) },
|
||||
Token { Token::Type::Operator, "+", l.at(1, 3) },
|
||||
Token { Token::Type::Symbol, "foo", l.at(1, 5) }
|
||||
});
|
||||
|
||||
expect_token_sequence("foo 1 2; bar 3 4", std::array {
|
||||
Token { Token::Type::Symbol, "foo", l.at(1, 1) },
|
||||
Token { Token::Type::Numeric, "1", l.at(1, 5) },
|
||||
Token { Token::Type::Numeric, "2", l.at(1, 7) },
|
||||
Token { Token::Type::Expression_Separator, ";", l.at(1, 8) },
|
||||
Token { Token::Type::Symbol, "bar", l.at(1, 10) },
|
||||
Token { Token::Type::Numeric, "3", l.at(1, 14) },
|
||||
Token { Token::Type::Numeric, "4", l.at(1, 16) }
|
||||
});
|
||||
};
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user