Infrastructure for note parsing

2022-05-24 00:48:15 +02:00 · 2022-05-24 00:48:15 +02:00 · 7d2ba379f7
commit 7d2ba379f7
parent 3c476d4629
5 changed files with 77 additions and 49 deletions
--- a/src/errors.cc
+++ b/src/errors.cc
@ -46,9 +46,13 @@ void assert(bool condition, std::string message, Location loc)
 	std::exit(1);
 }
-[[noreturn]] void unimplemented(Location loc)
+[[noreturn]] void unimplemented(std::string_view message, Location loc)
 {
-	error_heading(std::cerr, loc, Error_Level::Bug) << "this part was not implemented yet" << std::endl;
+	if (message.empty()) {
 		error_heading(std::cerr, loc, Error_Level::Bug) << "this part was not implemented yet" << std::endl;
 	} else {
 		error_heading(std::cerr, loc, Error_Level::Bug) << message << std::endl;
 	}
 	std::exit(1);
 }
--- a/src/lexer.cc
+++ b/src/lexer.cc
@ -70,11 +70,11 @@ auto Lexer::next_token() -> Result<Token>
 	}
 	switch (peek()) {
-	case '(': consume(); return { Token::Type::Open_Paren,           finish(), token_location };
+	case '(': consume(); return Token { Token::Type::Open_Paren,           finish(), token_location };
-	case ')': consume(); return { Token::Type::Close_Paren,          finish(), token_location };
+	case ')': consume(); return Token { Token::Type::Close_Paren,          finish(), token_location };
-	case '[': consume(); return { Token::Type::Open_Block,           finish(), token_location };
+	case '[': consume(); return Token { Token::Type::Open_Block,           finish(), token_location };
-	case ']': consume(); return { Token::Type::Close_Block,          finish(), token_location };
+	case ']': consume(); return Token { Token::Type::Close_Block,          finish(), token_location };
-	case ';': consume(); return { Token::Type::Expression_Separator, finish(), token_location };
+	case ';': consume(); return Token { Token::Type::Expression_Separator, finish(), token_location };
 	case '|':
 		consume();
@ -82,7 +82,7 @@ auto Lexer::next_token() -> Result<Token>
 		// is operator, then this character is part of operator sequence.
 		// Additionally we explicitly allow for `|foo|=0` here
 		if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
-			return { Token::Type::Parameter_Separator, finish(), token_location };
+			return Token { Token::Type::Parameter_Separator, finish(), token_location };
 	}
 	if (consume_if(unicode::is_digit)) {
@ -97,52 +97,28 @@ auto Lexer::next_token() -> Result<Token>
 				rewind();
 			}
 		}
-		return { Token::Type::Numeric, finish(), token_location };
+		return Token { Token::Type::Numeric, finish(), token_location };
 	}
 	// lex chord declaration
 	if (consume_if(Notes_Symbols)) {
-		// chord declaration
+		// Allow `c#`
 		constexpr u8 Expect_Number         = 0b01;
 		constexpr u8 Expect_Move           = 0b10;
 		constexpr u8 Expect_Number_Or_Move = 0b11;
 		auto current = Expect_Number;
 		std::string_view accepted_digits = "12357";
 		usize digit_cursor = 0;
 		consume_if('#');
-		for (;;) {
+		// Any of the following sequences are allowed
-			if ((current & Expect_Move) == Expect_Move && consume_if(",'")) {
+		// c,,,,,,,,,,,,,,,,
-				current = Expect_Number;
+		// c1,,,,2,3212
-				continue;
+		// c1234'''''
-			}
+		// during lexing
-
+		while (consume_if(",'") || consume_if(unicode::is_digit)) {}
 			if ((current & Expect_Number) == Expect_Number) {
 				bool found = false;
 				for (; digit_cursor < accepted_digits.size(); ++digit_cursor) {
 					if (consume_if(accepted_digits[digit_cursor])) {
 						found = true;
 						break;
 					}
 				}
 				if (found) {
 					current = digit_cursor < accepted_digits.size()
 						? Expect_Number_Or_Move
 						: Expect_Move;
 					continue;
 				}
 			}
 			break;
 		}
 		// If we encounter any letter that is not part of chord declaration,
 		// then we have symbol, not chord declaration
 		if (unicode::is_identifier(peek(), unicode::First_Character::No)) {
 			goto symbol_lexing;
 		}
-		return { Token::Type::Chord, finish(), token_location };
+		return Token { Token::Type::Chord, finish(), token_location };
 	}
 	using namespace std::placeholders;
@ -164,7 +140,7 @@ auto Lexer::next_token() -> Result<Token>
 	if (consume_if(Valid_Operator_Chars)) {
 		while (consume_if(Valid_Operator_Chars)) {}
-		return { Token::Type::Operator, finish(), token_location };
+		return Token { Token::Type::Operator, finish(), token_location };
 	}
 	return errors::unrecognized_character(peek(), token_location);
--- a/src/musique.hh
+++ b/src/musique.hh
@ -100,7 +100,7 @@ std::ostream& operator<<(std::ostream& os, Location const& location);
 void assert(bool condition, std::string message, Location loc = Location::caller());
 // Marks part of code that was not implemented yet
-[[noreturn]] void unimplemented(Location loc = Location::caller());
+[[noreturn]] void unimplemented(std::string_view message = {}, Location loc = Location::caller());
 // Marks location that should not be reached
 [[noreturn]] void unreachable(Location loc = Location::caller());
--- a/src/parser.cc
+++ b/src/parser.cc
@ -94,12 +94,16 @@ Result<Ast> Parser::parse_atomic_expression()
 {
 	switch (Try(peek_type())) {
 	case Token::Type::Keyword:
 		// Not all keywords are literals. Keywords like `true` can be part of atomic expression (essentialy single value like)
 		// but keywords like `var` announce variable declaration which is higher up in expression parsing.
 		// So we need to explicitly allow only keywords that are also literals
 		if (std::find(Literal_Keywords.begin(), Literal_Keywords.end(), peek()->source) == Literal_Keywords.end()) {
 			return errors::unexpected_token(*peek());
 		}
 		[[fallthrough]];
-	case Token::Type::Symbol:
+	case Token::Type::Chord:
 	case Token::Type::Numeric:
 	case Token::Type::Symbol:
 		return Ast::literal(consume());
 	case Token::Type::Open_Block:
@ -233,6 +237,8 @@ Result<void> Parser::ensure(Token::Type type) const
 		: Result<void>{};
 }
 // Don't know if it's a good idea to defer parsing of literal values up to value creation, which is current approach.
 // This may create unexpected performance degradation during program evaluation.
 Ast Ast::literal(Token token)
 {
 	Ast ast;
--- a/src/value.cc
+++ b/src/value.cc
@ -1,19 +1,61 @@
 #include <musique.hh>
 template<typename T, typename Index, typename Expected>
 concept Indexable = requires(T t, Index i) {
 	{ t[i] } -> std::convertible_to<Expected>;
 };
 /// Create hash out of note literal like `c` or `e#`
 constexpr u16 hash_note(Indexable<usize, char> auto const& note)
 {
 	return u8(note[0]) | (note[1] << 8);
 }
 constexpr u8 note_index(Indexable<usize, char> auto const& note)
 {
 	switch (hash_note(note)) {
 	case hash_note("c"):  return  0;
 	case hash_note("c#"): return  1;
 	case hash_note("d"):  return  2;
 	case hash_note("d#"): return  3;
 	case hash_note("e"):  return  4;
 	case hash_note("e#"): return  4;
 	case hash_note("f"):  return  5;
 	case hash_note("f#"): return  6;
 	case hash_note("g"):  return  7;
 	case hash_note("g#"): return  8;
 	case hash_note("a"):  return  9;
 	case hash_note("a#"): return 10;
 	case hash_note("h"):  return 11;
 	case hash_note("b"):  return 11;
 	case hash_note("h#"): return 12;
 	case hash_note("b#"): return 12;
 	}
 	// This should be unreachable since parser limits what character can pass as notes
 	unreachable();
 }
 Result<Value> Value::from(Token t)
 {
 	switch (t.type) {
 	case Token::Type::Numeric:
 		return Value::number(Try(Number::from(std::move(t))));
 	case Token::Type::Symbol:
 		return Value::symbol(std::string(t.source));
 	case Token::Type::Keyword:
 		if (t.source == "false") return Value::boolean(false);
 		if (t.source == "nil")   return Value{};
 		if (t.source == "true")  return Value::boolean(true);
 		unreachable();
-	case Token::Type::Symbol:
+	case Token::Type::Chord:
-		return Value::symbol(std::string(t.source));
+		if (t.source.size() == 1 || (t.source.size() == 2 && t.source.back() == '#')) {
 			unimplemented();
 		}
 		unimplemented("only simple note values (like c or e#) are supported now");
 	default:
 		unimplemented();