Parsing symbols
This commit is contained in:
parent
d6edc2e6e4
commit
62c2faa33a
15
src/lexer.cc
15
src/lexer.cc
@ -80,13 +80,24 @@ auto Lexer::next_token() -> Result<Token>
|
||||
break;
|
||||
}
|
||||
|
||||
if (unicode::is_letter(peek())) {
|
||||
assert(false && "symbols are not implemented yet");
|
||||
if (unicode::is_identifier(peek(), unicode::First_Character::No)) {
|
||||
goto symbol_lexing;
|
||||
}
|
||||
|
||||
return { Token::Type::Chord, finish(), token_location };
|
||||
}
|
||||
|
||||
using namespace std::placeholders;
|
||||
if (consume_if(std::bind(unicode::is_identifier, _1, unicode::First_Character::Yes))) {
|
||||
symbol_lexing:
|
||||
for (auto predicate = std::bind(unicode::is_identifier, _1, unicode::First_Character::No);
|
||||
consume_if(predicate);
|
||||
) {
|
||||
}
|
||||
|
||||
return { Token::Type::Symbol, finish(), token_location };
|
||||
}
|
||||
|
||||
return errors::unrecognized_character(peek(), token_location);
|
||||
}
|
||||
|
||||
|
@ -111,9 +111,21 @@ namespace unicode
|
||||
constexpr u32 Max_Bytes = 4;
|
||||
}
|
||||
|
||||
// is_digit returns true if `digit` is ASCII digit
|
||||
bool is_digit(u32 digit);
|
||||
|
||||
// is_space return true if `space` is ASCII blank character
|
||||
bool is_space(u32 space);
|
||||
|
||||
// is_letter returns true if `letter` is considered a letter by Unicode
|
||||
bool is_letter(u32 letter);
|
||||
|
||||
// is_identifier returns true if `letter` is valid character for identifier.
|
||||
//
|
||||
// It's modifier by is_first_character flag to determine some character classes
|
||||
// allowance like numbers, which are only allowed NOT at the front of the identifier
|
||||
enum class First_Character : bool { Yes = true, No = false };
|
||||
bool is_identifier(u32 letter, First_Character is_first_character);
|
||||
}
|
||||
|
||||
namespace utf8
|
||||
|
@ -98,4 +98,16 @@ suite lexer_test = [] {
|
||||
expect_token_type_and_value(Token::Type::Chord, "f1'2'3'5'7'");
|
||||
expect_token_type_and_value(Token::Type::Chord, "b1,2,5,7,");
|
||||
};
|
||||
|
||||
"Symbol literals"_test = [] {
|
||||
expect_token_type_and_value(Token::Type::Symbol, "i");
|
||||
expect_token_type_and_value(Token::Type::Symbol, "i2");
|
||||
expect_token_type_and_value(Token::Type::Symbol, "example");
|
||||
expect_token_type_and_value(Token::Type::Symbol, "d1envelope");
|
||||
expect_token_type_and_value(Token::Type::Symbol, "kebab-case");
|
||||
expect_token_type_and_value(Token::Type::Symbol, "snake_case");
|
||||
expect_token_type_and_value(Token::Type::Symbol, "camelCase");
|
||||
expect_token_type_and_value(Token::Type::Symbol, "PascalCase");
|
||||
expect_token_type_and_value(Token::Type::Symbol, "haskell'");
|
||||
};
|
||||
};
|
||||
|
@ -103,3 +103,10 @@ bool unicode::is_letter(u32 letter)
|
||||
// TODO Unicode letters handling
|
||||
return std::isalpha(letter);
|
||||
}
|
||||
|
||||
bool unicode::is_identifier(u32 letter, unicode::First_Character is_first_character)
|
||||
{
|
||||
return (unicode::is_letter(letter) || letter == '_')
|
||||
|| (!bool(is_first_character) && (
|
||||
letter == '-' || letter == '_' || letter == '\'' || unicode::is_digit(letter)));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user