Operator lexing

This commit is contained in:
Robert Bendun 2022-05-02 19:42:57 +02:00
parent 62c2faa33a
commit 240939c3d6
3 changed files with 49 additions and 9 deletions

View File

@ -1,6 +1,11 @@
#include <musique.hh>
constexpr std::string_view Notes_Symbols = "abcedefgh";
constexpr std::string_view Valid_Operator_Chars =
"+-*/" // arithmetic
"|&^" // logic & bit operations
"<>=!" // comparisons
;
auto Lexer::next_token() -> Result<Token>
{
@ -16,7 +21,11 @@ auto Lexer::next_token() -> Result<Token>
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
case '|': consume(); return { Token::Type::Variable_Separator, finish(), token_location };
case '|':
consume();
// We explicitly allow for `|foo|=0` here
if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
return { Token::Type::Variable_Separator, finish(), token_location };
}
// Number literals like .75
@ -98,6 +107,11 @@ auto Lexer::next_token() -> Result<Token>
return { Token::Type::Symbol, finish(), token_location };
}
if (Valid_Operator_Chars.find(peek()) != std::string_view::npos) {
while (consume() && Valid_Operator_Chars.find(peek()) != std::string_view::npos) {}
return { Token::Type::Operator, finish(), token_location };
}
return errors::unrecognized_character(peek(), token_location);
}
@ -156,6 +170,23 @@ std::ostream& operator<<(std::ostream& os, Token const&)
return os;
}
std::ostream& operator<<(std::ostream& os, Token::Type type)
{
switch (type) {
case Token::Type::Open_Block: return os << "OPEN BLOCK";
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
case Token::Type::Open_Paren: return os << "OPEN PAREN";
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR";
case Token::Type::Chord: return os << "CHORD";
case Token::Type::Numeric: return os << "NUMERIC";
case Token::Type::Symbol: return os << "SYMBOL";
case Token::Type::Operator: return os << "OPERATOR";
}
assert(false && "exhaustive handling of Token::Type enumeration");
}
Location Location::advance(u32 rune)
{
switch (rune) {

View File

@ -148,6 +148,9 @@ struct Token
// like repeat or choose or chord
Symbol,
// like + - ++ < >
Operator,
// chord literal, like c125
Chord,
@ -172,6 +175,7 @@ struct Token
};
std::ostream& operator<<(std::ostream& os, Token const& tok);
std::ostream& operator<<(std::ostream& os, Token::Type type);
struct Lexer
{

View File

@ -3,11 +3,6 @@
using namespace boost::ut;
auto under(auto enumeration) requires std::is_enum_v<decltype(enumeration)>
{
return static_cast<std::underlying_type_t<decltype(enumeration)>>(enumeration);
}
static void expect_token_type(
Token::Type expected_type,
std::string source,
@ -16,7 +11,7 @@ static void expect_token_type(
Lexer lexer{source};
auto result = lexer.next_token();
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected";
expect(eq(result->type, expected_type), sl) << "different token type then expected";
}
static void expect_token_type_and_value(
@ -30,7 +25,7 @@ static void expect_token_type_and_value(
expect(result.has_value(), sl) << "have not parsed any tokens";
if (result.has_value()) {
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected";
expect(eq(result->type, expected_type), sl) << "different token type then expected";
expect(eq(result->source, expected), sl) << "tokenized source is not equal to original";
}
}
@ -52,7 +47,7 @@ static void expect_token_type_and_location(
Lexer lexer{source};
auto result = lexer.next_token();
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected";
expect(eq(result->type, expected_type), sl) << "different token type then expected";
expect(eq(result->location, location), sl) << "tokenized source is at different place then expected";
}
@ -110,4 +105,14 @@ suite lexer_test = [] {
expect_token_type_and_value(Token::Type::Symbol, "PascalCase");
expect_token_type_and_value(Token::Type::Symbol, "haskell'");
};
"Operators"_test = [] {
expect_token_type_and_value(Token::Type::Operator, "+");
expect_token_type_and_value(Token::Type::Operator, "&&");
expect_token_type_and_value(Token::Type::Operator, "||");
expect_token_type_and_value(Token::Type::Operator, "*");
expect_token_type_and_value(Token::Type::Operator, "**");
expect_token_type_and_value(Token::Type::Operator, "=");
expect_token_type_and_value(Token::Type::Operator, "<");
};
};