Parsing symbols

2022-05-02 19:24:29 +02:00 · 2022-05-02 19:24:29 +02:00 · 62c2faa33a
commit 62c2faa33a
parent d6edc2e6e4
4 changed files with 44 additions and 2 deletions
--- a/src/lexer.cc
+++ b/src/lexer.cc
@ -80,13 +80,24 @@ auto Lexer::next_token() -> Result<Token>
 			break;
 		}

-		if (unicode::is_letter(peek())) {
-			assert(false && "symbols are not implemented yet");
+		if (unicode::is_identifier(peek(), unicode::First_Character::No)) {
+			goto symbol_lexing;
 		}

 		return { Token::Type::Chord, finish(), token_location };
 	}

+	using namespace std::placeholders;
+	if (consume_if(std::bind(unicode::is_identifier, _1, unicode::First_Character::Yes))) {
+	symbol_lexing:
+		for (auto predicate = std::bind(unicode::is_identifier, _1, unicode::First_Character::No);
+				consume_if(predicate);
+		) {
+		}
+
+		return { Token::Type::Symbol, finish(), token_location };
+	}
+
 	return errors::unrecognized_character(peek(), token_location);
 }

--- a/src/musique.hh
+++ b/src/musique.hh
@ -111,9 +111,21 @@ namespace unicode
 		constexpr u32 Max_Bytes  = 4;
 	}

+	// is_digit returns true if `digit` is ASCII digit
 	bool is_digit(u32 digit);
+
+	// is_space return true if `space` is ASCII blank character
 	bool is_space(u32 space);
+
+	// is_letter returns true if `letter` is considered a letter by Unicode
 	bool is_letter(u32 letter);
+
+	// is_identifier returns true if `letter` is valid character for identifier.
+	//
+	// It's modifier by is_first_character flag to determine some character classes
+	// allowance like numbers, which are only allowed NOT at the front of the identifier
+	enum class First_Character : bool { Yes = true, No = false };
+	bool is_identifier(u32 letter, First_Character is_first_character);
 }

 namespace utf8
--- a/src/tests/lex.cc
+++ b/src/tests/lex.cc
@ -98,4 +98,16 @@ suite lexer_test = [] {
 		expect_token_type_and_value(Token::Type::Chord, "f1'2'3'5'7'");
 		expect_token_type_and_value(Token::Type::Chord, "b1,2,5,7,");
 	};
+
+	"Symbol literals"_test = [] {
+		expect_token_type_and_value(Token::Type::Symbol, "i");
+		expect_token_type_and_value(Token::Type::Symbol, "i2");
+		expect_token_type_and_value(Token::Type::Symbol, "example");
+		expect_token_type_and_value(Token::Type::Symbol, "d1envelope");
+		expect_token_type_and_value(Token::Type::Symbol, "kebab-case");
+		expect_token_type_and_value(Token::Type::Symbol, "snake_case");
+		expect_token_type_and_value(Token::Type::Symbol, "camelCase");
+		expect_token_type_and_value(Token::Type::Symbol, "PascalCase");
+		expect_token_type_and_value(Token::Type::Symbol, "haskell'");
+	};
 };
--- a/src/unicode.cc
+++ b/src/unicode.cc
@ -103,3 +103,10 @@ bool unicode::is_letter(u32 letter)
 	// TODO Unicode letters handling
 	return std::isalpha(letter);
 }
+
+bool unicode::is_identifier(u32 letter, unicode::First_Character is_first_character)
+{
+	return (unicode::is_letter(letter) || letter == '_')
+	|| (!bool(is_first_character) && (
+			letter == '-' || letter == '_' || letter == '\'' || unicode::is_digit(letter)));
+}