Operator lexing
This commit is contained in:
parent
62c2faa33a
commit
240939c3d6
33
src/lexer.cc
33
src/lexer.cc
@ -1,6 +1,11 @@
|
|||||||
#include <musique.hh>
|
#include <musique.hh>
|
||||||
|
|
||||||
constexpr std::string_view Notes_Symbols = "abcedefgh";
|
constexpr std::string_view Notes_Symbols = "abcedefgh";
|
||||||
|
constexpr std::string_view Valid_Operator_Chars =
|
||||||
|
"+-*/" // arithmetic
|
||||||
|
"|&^" // logic & bit operations
|
||||||
|
"<>=!" // comparisons
|
||||||
|
;
|
||||||
|
|
||||||
auto Lexer::next_token() -> Result<Token>
|
auto Lexer::next_token() -> Result<Token>
|
||||||
{
|
{
|
||||||
@ -16,7 +21,11 @@ auto Lexer::next_token() -> Result<Token>
|
|||||||
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
|
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
|
||||||
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
|
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
|
||||||
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
|
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
|
||||||
case '|': consume(); return { Token::Type::Variable_Separator, finish(), token_location };
|
case '|':
|
||||||
|
consume();
|
||||||
|
// We explicitly allow for `|foo|=0` here
|
||||||
|
if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
|
||||||
|
return { Token::Type::Variable_Separator, finish(), token_location };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Number literals like .75
|
// Number literals like .75
|
||||||
@ -98,6 +107,11 @@ auto Lexer::next_token() -> Result<Token>
|
|||||||
return { Token::Type::Symbol, finish(), token_location };
|
return { Token::Type::Symbol, finish(), token_location };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Valid_Operator_Chars.find(peek()) != std::string_view::npos) {
|
||||||
|
while (consume() && Valid_Operator_Chars.find(peek()) != std::string_view::npos) {}
|
||||||
|
return { Token::Type::Operator, finish(), token_location };
|
||||||
|
}
|
||||||
|
|
||||||
return errors::unrecognized_character(peek(), token_location);
|
return errors::unrecognized_character(peek(), token_location);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,6 +170,23 @@ std::ostream& operator<<(std::ostream& os, Token const&)
|
|||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& os, Token::Type type)
|
||||||
|
{
|
||||||
|
switch (type) {
|
||||||
|
case Token::Type::Open_Block: return os << "OPEN BLOCK";
|
||||||
|
case Token::Type::Close_Block: return os << "CLOSE BLOCK";
|
||||||
|
case Token::Type::Open_Paren: return os << "OPEN PAREN";
|
||||||
|
case Token::Type::Close_Paren: return os << "CLOSE PAREN";
|
||||||
|
case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR";
|
||||||
|
case Token::Type::Chord: return os << "CHORD";
|
||||||
|
case Token::Type::Numeric: return os << "NUMERIC";
|
||||||
|
case Token::Type::Symbol: return os << "SYMBOL";
|
||||||
|
case Token::Type::Operator: return os << "OPERATOR";
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(false && "exhaustive handling of Token::Type enumeration");
|
||||||
|
}
|
||||||
|
|
||||||
Location Location::advance(u32 rune)
|
Location Location::advance(u32 rune)
|
||||||
{
|
{
|
||||||
switch (rune) {
|
switch (rune) {
|
||||||
|
@ -148,6 +148,9 @@ struct Token
|
|||||||
// like repeat or choose or chord
|
// like repeat or choose or chord
|
||||||
Symbol,
|
Symbol,
|
||||||
|
|
||||||
|
// like + - ++ < >
|
||||||
|
Operator,
|
||||||
|
|
||||||
// chord literal, like c125
|
// chord literal, like c125
|
||||||
Chord,
|
Chord,
|
||||||
|
|
||||||
@ -172,6 +175,7 @@ struct Token
|
|||||||
};
|
};
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, Token const& tok);
|
std::ostream& operator<<(std::ostream& os, Token const& tok);
|
||||||
|
std::ostream& operator<<(std::ostream& os, Token::Type type);
|
||||||
|
|
||||||
struct Lexer
|
struct Lexer
|
||||||
{
|
{
|
||||||
|
@ -3,11 +3,6 @@
|
|||||||
|
|
||||||
using namespace boost::ut;
|
using namespace boost::ut;
|
||||||
|
|
||||||
auto under(auto enumeration) requires std::is_enum_v<decltype(enumeration)>
|
|
||||||
{
|
|
||||||
return static_cast<std::underlying_type_t<decltype(enumeration)>>(enumeration);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void expect_token_type(
|
static void expect_token_type(
|
||||||
Token::Type expected_type,
|
Token::Type expected_type,
|
||||||
std::string source,
|
std::string source,
|
||||||
@ -16,7 +11,7 @@ static void expect_token_type(
|
|||||||
Lexer lexer{source};
|
Lexer lexer{source};
|
||||||
auto result = lexer.next_token();
|
auto result = lexer.next_token();
|
||||||
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
|
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
|
||||||
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected";
|
expect(eq(result->type, expected_type), sl) << "different token type then expected";
|
||||||
}
|
}
|
||||||
|
|
||||||
static void expect_token_type_and_value(
|
static void expect_token_type_and_value(
|
||||||
@ -30,7 +25,7 @@ static void expect_token_type_and_value(
|
|||||||
expect(result.has_value(), sl) << "have not parsed any tokens";
|
expect(result.has_value(), sl) << "have not parsed any tokens";
|
||||||
|
|
||||||
if (result.has_value()) {
|
if (result.has_value()) {
|
||||||
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected";
|
expect(eq(result->type, expected_type), sl) << "different token type then expected";
|
||||||
expect(eq(result->source, expected), sl) << "tokenized source is not equal to original";
|
expect(eq(result->source, expected), sl) << "tokenized source is not equal to original";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -52,7 +47,7 @@ static void expect_token_type_and_location(
|
|||||||
Lexer lexer{source};
|
Lexer lexer{source};
|
||||||
auto result = lexer.next_token();
|
auto result = lexer.next_token();
|
||||||
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
|
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
|
||||||
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected";
|
expect(eq(result->type, expected_type), sl) << "different token type then expected";
|
||||||
expect(eq(result->location, location), sl) << "tokenized source is at different place then expected";
|
expect(eq(result->location, location), sl) << "tokenized source is at different place then expected";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,4 +105,14 @@ suite lexer_test = [] {
|
|||||||
expect_token_type_and_value(Token::Type::Symbol, "PascalCase");
|
expect_token_type_and_value(Token::Type::Symbol, "PascalCase");
|
||||||
expect_token_type_and_value(Token::Type::Symbol, "haskell'");
|
expect_token_type_and_value(Token::Type::Symbol, "haskell'");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
"Operators"_test = [] {
|
||||||
|
expect_token_type_and_value(Token::Type::Operator, "+");
|
||||||
|
expect_token_type_and_value(Token::Type::Operator, "&&");
|
||||||
|
expect_token_type_and_value(Token::Type::Operator, "||");
|
||||||
|
expect_token_type_and_value(Token::Type::Operator, "*");
|
||||||
|
expect_token_type_and_value(Token::Type::Operator, "**");
|
||||||
|
expect_token_type_and_value(Token::Type::Operator, "=");
|
||||||
|
expect_token_type_and_value(Token::Type::Operator, "<");
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user