Operator lexing

This commit is contained in:
Robert Bendun 2022-05-02 19:42:57 +02:00
parent 62c2faa33a
commit 240939c3d6
3 changed files with 49 additions and 9 deletions

View File

@ -1,6 +1,11 @@
#include <musique.hh> #include <musique.hh>
constexpr std::string_view Notes_Symbols = "abcedefgh"; constexpr std::string_view Notes_Symbols = "abcedefgh";
constexpr std::string_view Valid_Operator_Chars =
"+-*/" // arithmetic
"|&^" // logic & bit operations
"<>=!" // comparisons
;
auto Lexer::next_token() -> Result<Token> auto Lexer::next_token() -> Result<Token>
{ {
@ -16,7 +21,11 @@ auto Lexer::next_token() -> Result<Token>
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location }; case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location }; case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location }; case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
case '|': consume(); return { Token::Type::Variable_Separator, finish(), token_location }; case '|':
consume();
// We explicitly allow for `|foo|=0` here
if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
return { Token::Type::Variable_Separator, finish(), token_location };
} }
// Number literals like .75 // Number literals like .75
@ -98,6 +107,11 @@ auto Lexer::next_token() -> Result<Token>
return { Token::Type::Symbol, finish(), token_location }; return { Token::Type::Symbol, finish(), token_location };
} }
if (Valid_Operator_Chars.find(peek()) != std::string_view::npos) {
while (consume() && Valid_Operator_Chars.find(peek()) != std::string_view::npos) {}
return { Token::Type::Operator, finish(), token_location };
}
return errors::unrecognized_character(peek(), token_location); return errors::unrecognized_character(peek(), token_location);
} }
@ -156,6 +170,23 @@ std::ostream& operator<<(std::ostream& os, Token const&)
return os; return os;
} }
std::ostream& operator<<(std::ostream& os, Token::Type type)
{
switch (type) {
case Token::Type::Open_Block: return os << "OPEN BLOCK";
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
case Token::Type::Open_Paren: return os << "OPEN PAREN";
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR";
case Token::Type::Chord: return os << "CHORD";
case Token::Type::Numeric: return os << "NUMERIC";
case Token::Type::Symbol: return os << "SYMBOL";
case Token::Type::Operator: return os << "OPERATOR";
}
assert(false && "exhaustive handling of Token::Type enumeration");
}
Location Location::advance(u32 rune) Location Location::advance(u32 rune)
{ {
switch (rune) { switch (rune) {

View File

@ -148,6 +148,9 @@ struct Token
// like repeat or choose or chord // like repeat or choose or chord
Symbol, Symbol,
// like + - ++ < >
Operator,
// chord literal, like c125 // chord literal, like c125
Chord, Chord,
@ -172,6 +175,7 @@ struct Token
}; };
std::ostream& operator<<(std::ostream& os, Token const& tok); std::ostream& operator<<(std::ostream& os, Token const& tok);
std::ostream& operator<<(std::ostream& os, Token::Type type);
struct Lexer struct Lexer
{ {

View File

@ -3,11 +3,6 @@
using namespace boost::ut; using namespace boost::ut;
auto under(auto enumeration) requires std::is_enum_v<decltype(enumeration)>
{
return static_cast<std::underlying_type_t<decltype(enumeration)>>(enumeration);
}
static void expect_token_type( static void expect_token_type(
Token::Type expected_type, Token::Type expected_type,
std::string source, std::string source,
@ -16,7 +11,7 @@ static void expect_token_type(
Lexer lexer{source}; Lexer lexer{source};
auto result = lexer.next_token(); auto result = lexer.next_token();
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens"; expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected"; expect(eq(result->type, expected_type), sl) << "different token type then expected";
} }
static void expect_token_type_and_value( static void expect_token_type_and_value(
@ -30,7 +25,7 @@ static void expect_token_type_and_value(
expect(result.has_value(), sl) << "have not parsed any tokens"; expect(result.has_value(), sl) << "have not parsed any tokens";
if (result.has_value()) { if (result.has_value()) {
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected"; expect(eq(result->type, expected_type), sl) << "different token type then expected";
expect(eq(result->source, expected), sl) << "tokenized source is not equal to original"; expect(eq(result->source, expected), sl) << "tokenized source is not equal to original";
} }
} }
@ -52,7 +47,7 @@ static void expect_token_type_and_location(
Lexer lexer{source}; Lexer lexer{source};
auto result = lexer.next_token(); auto result = lexer.next_token();
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens"; expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected"; expect(eq(result->type, expected_type), sl) << "different token type then expected";
expect(eq(result->location, location), sl) << "tokenized source is at different place then expected"; expect(eq(result->location, location), sl) << "tokenized source is at different place then expected";
} }
@ -110,4 +105,14 @@ suite lexer_test = [] {
expect_token_type_and_value(Token::Type::Symbol, "PascalCase"); expect_token_type_and_value(Token::Type::Symbol, "PascalCase");
expect_token_type_and_value(Token::Type::Symbol, "haskell'"); expect_token_type_and_value(Token::Type::Symbol, "haskell'");
}; };
"Operators"_test = [] {
expect_token_type_and_value(Token::Type::Operator, "+");
expect_token_type_and_value(Token::Type::Operator, "&&");
expect_token_type_and_value(Token::Type::Operator, "||");
expect_token_type_and_value(Token::Type::Operator, "*");
expect_token_type_and_value(Token::Type::Operator, "**");
expect_token_type_and_value(Token::Type::Operator, "=");
expect_token_type_and_value(Token::Type::Operator, "<");
};
}; };