Parsing symbols

This commit is contained in:
Robert Bendun 2022-05-02 19:24:29 +02:00
parent d6edc2e6e4
commit 62c2faa33a
4 changed files with 44 additions and 2 deletions

View File

@ -80,13 +80,24 @@ auto Lexer::next_token() -> Result<Token>
break; break;
} }
if (unicode::is_letter(peek())) { if (unicode::is_identifier(peek(), unicode::First_Character::No)) {
assert(false && "symbols are not implemented yet"); goto symbol_lexing;
} }
return { Token::Type::Chord, finish(), token_location }; return { Token::Type::Chord, finish(), token_location };
} }
using namespace std::placeholders;
if (consume_if(std::bind(unicode::is_identifier, _1, unicode::First_Character::Yes))) {
symbol_lexing:
for (auto predicate = std::bind(unicode::is_identifier, _1, unicode::First_Character::No);
consume_if(predicate);
) {
}
return { Token::Type::Symbol, finish(), token_location };
}
return errors::unrecognized_character(peek(), token_location); return errors::unrecognized_character(peek(), token_location);
} }

View File

@ -111,9 +111,21 @@ namespace unicode
constexpr u32 Max_Bytes = 4; constexpr u32 Max_Bytes = 4;
} }
// is_digit returns true if `digit` is ASCII digit
bool is_digit(u32 digit); bool is_digit(u32 digit);
// is_space return true if `space` is ASCII blank character
bool is_space(u32 space); bool is_space(u32 space);
// is_letter returns true if `letter` is considered a letter by Unicode
bool is_letter(u32 letter); bool is_letter(u32 letter);
// is_identifier returns true if `letter` is valid character for identifier.
//
// It's modifier by is_first_character flag to determine some character classes
// allowance like numbers, which are only allowed NOT at the front of the identifier
enum class First_Character : bool { Yes = true, No = false };
bool is_identifier(u32 letter, First_Character is_first_character);
} }
namespace utf8 namespace utf8

View File

@ -98,4 +98,16 @@ suite lexer_test = [] {
expect_token_type_and_value(Token::Type::Chord, "f1'2'3'5'7'"); expect_token_type_and_value(Token::Type::Chord, "f1'2'3'5'7'");
expect_token_type_and_value(Token::Type::Chord, "b1,2,5,7,"); expect_token_type_and_value(Token::Type::Chord, "b1,2,5,7,");
}; };
"Symbol literals"_test = [] {
expect_token_type_and_value(Token::Type::Symbol, "i");
expect_token_type_and_value(Token::Type::Symbol, "i2");
expect_token_type_and_value(Token::Type::Symbol, "example");
expect_token_type_and_value(Token::Type::Symbol, "d1envelope");
expect_token_type_and_value(Token::Type::Symbol, "kebab-case");
expect_token_type_and_value(Token::Type::Symbol, "snake_case");
expect_token_type_and_value(Token::Type::Symbol, "camelCase");
expect_token_type_and_value(Token::Type::Symbol, "PascalCase");
expect_token_type_and_value(Token::Type::Symbol, "haskell'");
};
}; };

View File

@ -103,3 +103,10 @@ bool unicode::is_letter(u32 letter)
// TODO Unicode letters handling // TODO Unicode letters handling
return std::isalpha(letter); return std::isalpha(letter);
} }
bool unicode::is_identifier(u32 letter, unicode::First_Character is_first_character)
{
return (unicode::is_letter(letter) || letter == '_')
|| (!bool(is_first_character) && (
letter == '-' || letter == '_' || letter == '\'' || unicode::is_digit(letter)));
}