Parsing symbols

2022-05-02 19:24:29 +02:00 · 2022-05-02 19:24:29 +02:00 · 62c2faa33a
commit 62c2faa33a
parent d6edc2e6e4
4 changed files with 44 additions and 2 deletions
--- a/src/lexer.cc
+++ b/src/lexer.cc
@ -80,13 +80,24 @@ auto Lexer::next_token() -> Result<Token>
 			break;
 		}
-		if (unicode::is_letter(peek())) {
+		if (unicode::is_identifier(peek(), unicode::First_Character::No)) {
-			assert(false && "symbols are not implemented yet");
+			goto symbol_lexing;
 		}
 		return { Token::Type::Chord, finish(), token_location };
 	}
 	using namespace std::placeholders;
 	if (consume_if(std::bind(unicode::is_identifier, _1, unicode::First_Character::Yes))) {
 	symbol_lexing:
 		for (auto predicate = std::bind(unicode::is_identifier, _1, unicode::First_Character::No);
 				consume_if(predicate);
 		) {
 		}
 		return { Token::Type::Symbol, finish(), token_location };
 	}
 	return errors::unrecognized_character(peek(), token_location);
 }
--- a/src/musique.hh
+++ b/src/musique.hh
@ -111,9 +111,21 @@ namespace unicode
 		constexpr u32 Max_Bytes  = 4;
 	}
 	// is_digit returns true if `digit` is ASCII digit
 	bool is_digit(u32 digit);
 	// is_space return true if `space` is ASCII blank character
 	bool is_space(u32 space);
 	// is_letter returns true if `letter` is considered a letter by Unicode
 	bool is_letter(u32 letter);
 	// is_identifier returns true if `letter` is valid character for identifier.
 	//
 	// It's modifier by is_first_character flag to determine some character classes
 	// allowance like numbers, which are only allowed NOT at the front of the identifier
 	enum class First_Character : bool { Yes = true, No = false };
 	bool is_identifier(u32 letter, First_Character is_first_character);
 }
 namespace utf8
--- a/src/tests/lex.cc
+++ b/src/tests/lex.cc
@ -98,4 +98,16 @@ suite lexer_test = [] {
 		expect_token_type_and_value(Token::Type::Chord, "f1'2'3'5'7'");
 		expect_token_type_and_value(Token::Type::Chord, "b1,2,5,7,");
 	};
 	"Symbol literals"_test = [] {
 		expect_token_type_and_value(Token::Type::Symbol, "i");
 		expect_token_type_and_value(Token::Type::Symbol, "i2");
 		expect_token_type_and_value(Token::Type::Symbol, "example");
 		expect_token_type_and_value(Token::Type::Symbol, "d1envelope");
 		expect_token_type_and_value(Token::Type::Symbol, "kebab-case");
 		expect_token_type_and_value(Token::Type::Symbol, "snake_case");
 		expect_token_type_and_value(Token::Type::Symbol, "camelCase");
 		expect_token_type_and_value(Token::Type::Symbol, "PascalCase");
 		expect_token_type_and_value(Token::Type::Symbol, "haskell'");
 	};
 };
--- a/src/unicode.cc
+++ b/src/unicode.cc
@ -103,3 +103,10 @@ bool unicode::is_letter(u32 letter)
 	// TODO Unicode letters handling
 	return std::isalpha(letter);
 }
 bool unicode::is_identifier(u32 letter, unicode::First_Character is_first_character)
 {
 	return (unicode::is_letter(letter) || letter == '_')
 	|| (!bool(is_first_character) && (
 			letter == '-' || letter == '_' || letter == '\'' || unicode::is_digit(letter)));
 }