Infrastructure for note parsing

This commit is contained in:
Robert Bendun 2022-05-24 00:48:15 +02:00
parent 3c476d4629
commit 7d2ba379f7
5 changed files with 77 additions and 49 deletions

View File

@ -46,9 +46,13 @@ void assert(bool condition, std::string message, Location loc)
std::exit(1); std::exit(1);
} }
[[noreturn]] void unimplemented(Location loc) [[noreturn]] void unimplemented(std::string_view message, Location loc)
{ {
error_heading(std::cerr, loc, Error_Level::Bug) << "this part was not implemented yet" << std::endl; if (message.empty()) {
error_heading(std::cerr, loc, Error_Level::Bug) << "this part was not implemented yet" << std::endl;
} else {
error_heading(std::cerr, loc, Error_Level::Bug) << message << std::endl;
}
std::exit(1); std::exit(1);
} }

View File

@ -70,11 +70,11 @@ auto Lexer::next_token() -> Result<Token>
} }
switch (peek()) { switch (peek()) {
case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location }; case '(': consume(); return Token { Token::Type::Open_Paren, finish(), token_location };
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location }; case ')': consume(); return Token { Token::Type::Close_Paren, finish(), token_location };
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location }; case '[': consume(); return Token { Token::Type::Open_Block, finish(), token_location };
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location }; case ']': consume(); return Token { Token::Type::Close_Block, finish(), token_location };
case ';': consume(); return { Token::Type::Expression_Separator, finish(), token_location }; case ';': consume(); return Token { Token::Type::Expression_Separator, finish(), token_location };
case '|': case '|':
consume(); consume();
@ -82,7 +82,7 @@ auto Lexer::next_token() -> Result<Token>
// is operator, then this character is part of operator sequence. // is operator, then this character is part of operator sequence.
// Additionally we explicitly allow for `|foo|=0` here // Additionally we explicitly allow for `|foo|=0` here
if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=') if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
return { Token::Type::Parameter_Separator, finish(), token_location }; return Token { Token::Type::Parameter_Separator, finish(), token_location };
} }
if (consume_if(unicode::is_digit)) { if (consume_if(unicode::is_digit)) {
@ -97,52 +97,28 @@ auto Lexer::next_token() -> Result<Token>
rewind(); rewind();
} }
} }
return { Token::Type::Numeric, finish(), token_location }; return Token { Token::Type::Numeric, finish(), token_location };
} }
// lex chord declaration
if (consume_if(Notes_Symbols)) { if (consume_if(Notes_Symbols)) {
// chord declaration // Allow `c#`
constexpr u8 Expect_Number = 0b01;
constexpr u8 Expect_Move = 0b10;
constexpr u8 Expect_Number_Or_Move = 0b11;
auto current = Expect_Number;
std::string_view accepted_digits = "12357";
usize digit_cursor = 0;
consume_if('#'); consume_if('#');
for (;;) { // Any of the following sequences are allowed
if ((current & Expect_Move) == Expect_Move && consume_if(",'")) { // c,,,,,,,,,,,,,,,,
current = Expect_Number; // c1,,,,2,3212
continue; // c1234'''''
} // during lexing
while (consume_if(",'") || consume_if(unicode::is_digit)) {}
if ((current & Expect_Number) == Expect_Number) {
bool found = false;
for (; digit_cursor < accepted_digits.size(); ++digit_cursor) {
if (consume_if(accepted_digits[digit_cursor])) {
found = true;
break;
}
}
if (found) {
current = digit_cursor < accepted_digits.size()
? Expect_Number_Or_Move
: Expect_Move;
continue;
}
}
break;
}
// If we encounter any letter that is not part of chord declaration,
// then we have symbol, not chord declaration
if (unicode::is_identifier(peek(), unicode::First_Character::No)) { if (unicode::is_identifier(peek(), unicode::First_Character::No)) {
goto symbol_lexing; goto symbol_lexing;
} }
return { Token::Type::Chord, finish(), token_location }; return Token { Token::Type::Chord, finish(), token_location };
} }
using namespace std::placeholders; using namespace std::placeholders;
@ -164,7 +140,7 @@ auto Lexer::next_token() -> Result<Token>
if (consume_if(Valid_Operator_Chars)) { if (consume_if(Valid_Operator_Chars)) {
while (consume_if(Valid_Operator_Chars)) {} while (consume_if(Valid_Operator_Chars)) {}
return { Token::Type::Operator, finish(), token_location }; return Token { Token::Type::Operator, finish(), token_location };
} }
return errors::unrecognized_character(peek(), token_location); return errors::unrecognized_character(peek(), token_location);

View File

@ -100,7 +100,7 @@ std::ostream& operator<<(std::ostream& os, Location const& location);
void assert(bool condition, std::string message, Location loc = Location::caller()); void assert(bool condition, std::string message, Location loc = Location::caller());
// Marks part of code that was not implemented yet // Marks part of code that was not implemented yet
[[noreturn]] void unimplemented(Location loc = Location::caller()); [[noreturn]] void unimplemented(std::string_view message = {}, Location loc = Location::caller());
// Marks location that should not be reached // Marks location that should not be reached
[[noreturn]] void unreachable(Location loc = Location::caller()); [[noreturn]] void unreachable(Location loc = Location::caller());

View File

@ -94,12 +94,16 @@ Result<Ast> Parser::parse_atomic_expression()
{ {
switch (Try(peek_type())) { switch (Try(peek_type())) {
case Token::Type::Keyword: case Token::Type::Keyword:
// Not all keywords are literals. Keywords like `true` can be part of atomic expression (essentialy single value like)
// but keywords like `var` announce variable declaration which is higher up in expression parsing.
// So we need to explicitly allow only keywords that are also literals
if (std::find(Literal_Keywords.begin(), Literal_Keywords.end(), peek()->source) == Literal_Keywords.end()) { if (std::find(Literal_Keywords.begin(), Literal_Keywords.end(), peek()->source) == Literal_Keywords.end()) {
return errors::unexpected_token(*peek()); return errors::unexpected_token(*peek());
} }
[[fallthrough]]; [[fallthrough]];
case Token::Type::Symbol: case Token::Type::Chord:
case Token::Type::Numeric: case Token::Type::Numeric:
case Token::Type::Symbol:
return Ast::literal(consume()); return Ast::literal(consume());
case Token::Type::Open_Block: case Token::Type::Open_Block:
@ -233,6 +237,8 @@ Result<void> Parser::ensure(Token::Type type) const
: Result<void>{}; : Result<void>{};
} }
// Don't know if it's a good idea to defer parsing of literal values up to value creation, which is current approach.
// This may create unexpected performance degradation during program evaluation.
Ast Ast::literal(Token token) Ast Ast::literal(Token token)
{ {
Ast ast; Ast ast;

View File

@ -1,19 +1,61 @@
#include <musique.hh> #include <musique.hh>
template<typename T, typename Index, typename Expected>
concept Indexable = requires(T t, Index i) {
{ t[i] } -> std::convertible_to<Expected>;
};
/// Create hash out of note literal like `c` or `e#`
constexpr u16 hash_note(Indexable<usize, char> auto const& note)
{
return u8(note[0]) | (note[1] << 8);
}
constexpr u8 note_index(Indexable<usize, char> auto const& note)
{
switch (hash_note(note)) {
case hash_note("c"): return 0;
case hash_note("c#"): return 1;
case hash_note("d"): return 2;
case hash_note("d#"): return 3;
case hash_note("e"): return 4;
case hash_note("e#"): return 4;
case hash_note("f"): return 5;
case hash_note("f#"): return 6;
case hash_note("g"): return 7;
case hash_note("g#"): return 8;
case hash_note("a"): return 9;
case hash_note("a#"): return 10;
case hash_note("h"): return 11;
case hash_note("b"): return 11;
case hash_note("h#"): return 12;
case hash_note("b#"): return 12;
}
// This should be unreachable since parser limits what character can pass as notes
unreachable();
}
Result<Value> Value::from(Token t) Result<Value> Value::from(Token t)
{ {
switch (t.type) { switch (t.type) {
case Token::Type::Numeric: case Token::Type::Numeric:
return Value::number(Try(Number::from(std::move(t)))); return Value::number(Try(Number::from(std::move(t))));
case Token::Type::Symbol:
return Value::symbol(std::string(t.source));
case Token::Type::Keyword: case Token::Type::Keyword:
if (t.source == "false") return Value::boolean(false); if (t.source == "false") return Value::boolean(false);
if (t.source == "nil") return Value{}; if (t.source == "nil") return Value{};
if (t.source == "true") return Value::boolean(true); if (t.source == "true") return Value::boolean(true);
unreachable(); unreachable();
case Token::Type::Symbol: case Token::Type::Chord:
return Value::symbol(std::string(t.source)); if (t.source.size() == 1 || (t.source.size() == 2 && t.source.back() == '#')) {
unimplemented();
}
unimplemented("only simple note values (like c or e#) are supported now");
default: default:
unimplemented(); unimplemented();