Infrastructure for note parsing

2022-05-24 00:48:15 +02:00 · 2022-05-24 00:48:15 +02:00 · 7d2ba379f7
commit 7d2ba379f7
parent 3c476d4629
5 changed files with 77 additions and 49 deletions
--- a/src/errors.cc
+++ b/src/errors.cc
@ -46,9 +46,13 @@ void assert(bool condition, std::string message, Location loc)
 	std::exit(1);
 }

-[[noreturn]] void unimplemented(Location loc)
+[[noreturn]] void unimplemented(std::string_view message, Location loc)
 {
-	error_heading(std::cerr, loc, Error_Level::Bug) << "this part was not implemented yet" << std::endl;
+	if (message.empty()) {
+		error_heading(std::cerr, loc, Error_Level::Bug) << "this part was not implemented yet" << std::endl;
+	} else {
+		error_heading(std::cerr, loc, Error_Level::Bug) << message << std::endl;
+	}
 	std::exit(1);
 }

--- a/src/lexer.cc
+++ b/src/lexer.cc
@ -70,11 +70,11 @@ auto Lexer::next_token() -> Result<Token>
 	}

 	switch (peek()) {
-	case '(': consume(); return { Token::Type::Open_Paren,           finish(), token_location };
-	case ')': consume(); return { Token::Type::Close_Paren,          finish(), token_location };
-	case '[': consume(); return { Token::Type::Open_Block,           finish(), token_location };
-	case ']': consume(); return { Token::Type::Close_Block,          finish(), token_location };
-	case ';': consume(); return { Token::Type::Expression_Separator, finish(), token_location };
+	case '(': consume(); return Token { Token::Type::Open_Paren,           finish(), token_location };
+	case ')': consume(); return Token { Token::Type::Close_Paren,          finish(), token_location };
+	case '[': consume(); return Token { Token::Type::Open_Block,           finish(), token_location };
+	case ']': consume(); return Token { Token::Type::Close_Block,          finish(), token_location };
+	case ';': consume(); return Token { Token::Type::Expression_Separator, finish(), token_location };

 	case '|':
 		consume();
@ -82,7 +82,7 @@ auto Lexer::next_token() -> Result<Token>
 		// is operator, then this character is part of operator sequence.
 		// Additionally we explicitly allow for `|foo|=0` here
 		if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
-			return { Token::Type::Parameter_Separator, finish(), token_location };
+			return Token { Token::Type::Parameter_Separator, finish(), token_location };
 	}

 	if (consume_if(unicode::is_digit)) {
@ -97,52 +97,28 @@ auto Lexer::next_token() -> Result<Token>
 				rewind();
 			}
 		}
-		return { Token::Type::Numeric, finish(), token_location };
+		return Token { Token::Type::Numeric, finish(), token_location };
 	}

+	// lex chord declaration
 	if (consume_if(Notes_Symbols)) {
-		// chord declaration
-		constexpr u8 Expect_Number         = 0b01;
-		constexpr u8 Expect_Move           = 0b10;
-		constexpr u8 Expect_Number_Or_Move = 0b11;
-
-		auto current = Expect_Number;
-		std::string_view accepted_digits = "12357";
-		usize digit_cursor = 0;
-
+		// Allow `c#`
 		consume_if('#');

-		for (;;) {
-			if ((current & Expect_Move) == Expect_Move && consume_if(",'")) {
-				current = Expect_Number;
-				continue;
-			}
-
-			if ((current & Expect_Number) == Expect_Number) {
-				bool found = false;
-				for (; digit_cursor < accepted_digits.size(); ++digit_cursor) {
-					if (consume_if(accepted_digits[digit_cursor])) {
-						found = true;
-						break;
-					}
-				}
-
-				if (found) {
-					current = digit_cursor < accepted_digits.size()
-						? Expect_Number_Or_Move
-						: Expect_Move;
-					continue;
-				}
-			}
-
-			break;
-		}
+		// Any of the following sequences are allowed
+		// c,,,,,,,,,,,,,,,,
+		// c1,,,,2,3212
+		// c1234'''''
+		// during lexing
+		while (consume_if(",'") || consume_if(unicode::is_digit)) {}

+		// If we encounter any letter that is not part of chord declaration,
+		// then we have symbol, not chord declaration
 		if (unicode::is_identifier(peek(), unicode::First_Character::No)) {
 			goto symbol_lexing;
 		}

-		return { Token::Type::Chord, finish(), token_location };
+		return Token { Token::Type::Chord, finish(), token_location };
 	}

 	using namespace std::placeholders;
@ -164,7 +140,7 @@ auto Lexer::next_token() -> Result<Token>

 	if (consume_if(Valid_Operator_Chars)) {
 		while (consume_if(Valid_Operator_Chars)) {}
-		return { Token::Type::Operator, finish(), token_location };
+		return Token { Token::Type::Operator, finish(), token_location };
 	}

 	return errors::unrecognized_character(peek(), token_location);
--- a/src/musique.hh
+++ b/src/musique.hh
@ -100,7 +100,7 @@ std::ostream& operator<<(std::ostream& os, Location const& location);
 void assert(bool condition, std::string message, Location loc = Location::caller());

 // Marks part of code that was not implemented yet
-[[noreturn]] void unimplemented(Location loc = Location::caller());
+[[noreturn]] void unimplemented(std::string_view message = {}, Location loc = Location::caller());

 // Marks location that should not be reached
 [[noreturn]] void unreachable(Location loc = Location::caller());
--- a/src/parser.cc
+++ b/src/parser.cc
@ -94,12 +94,16 @@ Result<Ast> Parser::parse_atomic_expression()
 {
 	switch (Try(peek_type())) {
 	case Token::Type::Keyword:
+		// Not all keywords are literals. Keywords like `true` can be part of atomic expression (essentialy single value like)
+		// but keywords like `var` announce variable declaration which is higher up in expression parsing.
+		// So we need to explicitly allow only keywords that are also literals
 		if (std::find(Literal_Keywords.begin(), Literal_Keywords.end(), peek()->source) == Literal_Keywords.end()) {
 			return errors::unexpected_token(*peek());
 		}
 		[[fallthrough]];
-	case Token::Type::Symbol:
+	case Token::Type::Chord:
 	case Token::Type::Numeric:
+	case Token::Type::Symbol:
 		return Ast::literal(consume());

 	case Token::Type::Open_Block:
@ -233,6 +237,8 @@ Result<void> Parser::ensure(Token::Type type) const
 		: Result<void>{};
 }

+// Don't know if it's a good idea to defer parsing of literal values up to value creation, which is current approach.
+// This may create unexpected performance degradation during program evaluation.
 Ast Ast::literal(Token token)
 {
 	Ast ast;
--- a/src/value.cc
+++ b/src/value.cc
@ -1,19 +1,61 @@
 #include <musique.hh>

+template<typename T, typename Index, typename Expected>
+concept Indexable = requires(T t, Index i) {
+	{ t[i] } -> std::convertible_to<Expected>;
+};
+
+/// Create hash out of note literal like `c` or `e#`
+constexpr u16 hash_note(Indexable<usize, char> auto const& note)
+{
+	return u8(note[0]) | (note[1] << 8);
+}
+
+constexpr u8 note_index(Indexable<usize, char> auto const& note)
+{
+	switch (hash_note(note)) {
+	case hash_note("c"):  return  0;
+	case hash_note("c#"): return  1;
+	case hash_note("d"):  return  2;
+	case hash_note("d#"): return  3;
+	case hash_note("e"):  return  4;
+	case hash_note("e#"): return  4;
+	case hash_note("f"):  return  5;
+	case hash_note("f#"): return  6;
+	case hash_note("g"):  return  7;
+	case hash_note("g#"): return  8;
+	case hash_note("a"):  return  9;
+	case hash_note("a#"): return 10;
+	case hash_note("h"):  return 11;
+	case hash_note("b"):  return 11;
+	case hash_note("h#"): return 12;
+	case hash_note("b#"): return 12;
+	}
+	// This should be unreachable since parser limits what character can pass as notes
+	unreachable();
+}
+
 Result<Value> Value::from(Token t)
 {
 	switch (t.type) {
 	case Token::Type::Numeric:
 		return Value::number(Try(Number::from(std::move(t))));

+	case Token::Type::Symbol:
+		return Value::symbol(std::string(t.source));
+
 	case Token::Type::Keyword:
 		if (t.source == "false") return Value::boolean(false);
 		if (t.source == "nil")   return Value{};
 		if (t.source == "true")  return Value::boolean(true);
 		unreachable();

-	case Token::Type::Symbol:
-		return Value::symbol(std::string(t.source));
+	case Token::Type::Chord:
+		if (t.source.size() == 1 || (t.source.size() == 2 && t.source.back() == '#')) {
+			unimplemented();
+		}
+
+		unimplemented("only simple note values (like c or e#) are supported now");

 	default:
 		unimplemented();