Parsing symbols
This commit is contained in:
parent
d6edc2e6e4
commit
62c2faa33a
15
src/lexer.cc
15
src/lexer.cc
@ -80,13 +80,24 @@ auto Lexer::next_token() -> Result<Token>
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unicode::is_letter(peek())) {
|
if (unicode::is_identifier(peek(), unicode::First_Character::No)) {
|
||||||
assert(false && "symbols are not implemented yet");
|
goto symbol_lexing;
|
||||||
}
|
}
|
||||||
|
|
||||||
return { Token::Type::Chord, finish(), token_location };
|
return { Token::Type::Chord, finish(), token_location };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using namespace std::placeholders;
|
||||||
|
if (consume_if(std::bind(unicode::is_identifier, _1, unicode::First_Character::Yes))) {
|
||||||
|
symbol_lexing:
|
||||||
|
for (auto predicate = std::bind(unicode::is_identifier, _1, unicode::First_Character::No);
|
||||||
|
consume_if(predicate);
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
|
return { Token::Type::Symbol, finish(), token_location };
|
||||||
|
}
|
||||||
|
|
||||||
return errors::unrecognized_character(peek(), token_location);
|
return errors::unrecognized_character(peek(), token_location);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,9 +111,21 @@ namespace unicode
|
|||||||
constexpr u32 Max_Bytes = 4;
|
constexpr u32 Max_Bytes = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// is_digit returns true if `digit` is ASCII digit
|
||||||
bool is_digit(u32 digit);
|
bool is_digit(u32 digit);
|
||||||
|
|
||||||
|
// is_space return true if `space` is ASCII blank character
|
||||||
bool is_space(u32 space);
|
bool is_space(u32 space);
|
||||||
|
|
||||||
|
// is_letter returns true if `letter` is considered a letter by Unicode
|
||||||
bool is_letter(u32 letter);
|
bool is_letter(u32 letter);
|
||||||
|
|
||||||
|
// is_identifier returns true if `letter` is valid character for identifier.
|
||||||
|
//
|
||||||
|
// It's modifier by is_first_character flag to determine some character classes
|
||||||
|
// allowance like numbers, which are only allowed NOT at the front of the identifier
|
||||||
|
enum class First_Character : bool { Yes = true, No = false };
|
||||||
|
bool is_identifier(u32 letter, First_Character is_first_character);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace utf8
|
namespace utf8
|
||||||
|
@ -98,4 +98,16 @@ suite lexer_test = [] {
|
|||||||
expect_token_type_and_value(Token::Type::Chord, "f1'2'3'5'7'");
|
expect_token_type_and_value(Token::Type::Chord, "f1'2'3'5'7'");
|
||||||
expect_token_type_and_value(Token::Type::Chord, "b1,2,5,7,");
|
expect_token_type_and_value(Token::Type::Chord, "b1,2,5,7,");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
"Symbol literals"_test = [] {
|
||||||
|
expect_token_type_and_value(Token::Type::Symbol, "i");
|
||||||
|
expect_token_type_and_value(Token::Type::Symbol, "i2");
|
||||||
|
expect_token_type_and_value(Token::Type::Symbol, "example");
|
||||||
|
expect_token_type_and_value(Token::Type::Symbol, "d1envelope");
|
||||||
|
expect_token_type_and_value(Token::Type::Symbol, "kebab-case");
|
||||||
|
expect_token_type_and_value(Token::Type::Symbol, "snake_case");
|
||||||
|
expect_token_type_and_value(Token::Type::Symbol, "camelCase");
|
||||||
|
expect_token_type_and_value(Token::Type::Symbol, "PascalCase");
|
||||||
|
expect_token_type_and_value(Token::Type::Symbol, "haskell'");
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
@ -103,3 +103,10 @@ bool unicode::is_letter(u32 letter)
|
|||||||
// TODO Unicode letters handling
|
// TODO Unicode letters handling
|
||||||
return std::isalpha(letter);
|
return std::isalpha(letter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool unicode::is_identifier(u32 letter, unicode::First_Character is_first_character)
|
||||||
|
{
|
||||||
|
return (unicode::is_letter(letter) || letter == '_')
|
||||||
|
|| (!bool(is_first_character) && (
|
||||||
|
letter == '-' || letter == '_' || letter == '\'' || unicode::is_digit(letter)));
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user