Solved almost all problems with lexing
The remaining one is better unicode support
This commit is contained in:
parent
e32710da85
commit
22eb0684c2
20
src/lexer.cc
20
src/lexer.cc
@ -2,7 +2,7 @@
|
||||
|
||||
constexpr std::string_view Notes_Symbols = "abcedefgh";
|
||||
constexpr std::string_view Valid_Operator_Chars =
|
||||
"+-*/" // arithmetic
|
||||
"+-*/:%" // arithmetic
|
||||
"|&^" // logic & bit operations
|
||||
"<>=!" // comparisons
|
||||
;
|
||||
@ -51,7 +51,7 @@ auto Lexer::next_token() -> Result<Token>
|
||||
return { Token::Type::Numeric, finish(), token_location };
|
||||
}
|
||||
|
||||
if (consume_if([](u32 ch) { return Notes_Symbols.find(ch) != std::string_view::npos; })) {
|
||||
if (consume_if(Notes_Symbols)) {
|
||||
// chord declaration
|
||||
constexpr u8 Expect_Number = 0b01;
|
||||
constexpr u8 Expect_Move = 0b10;
|
||||
@ -61,10 +61,10 @@ auto Lexer::next_token() -> Result<Token>
|
||||
std::string_view accepted_digits = "12357";
|
||||
usize digit_cursor = 0;
|
||||
|
||||
consume_if('#');
|
||||
|
||||
for (;;) {
|
||||
if ((current & Expect_Move) == Expect_Move
|
||||
&& consume_if([](u32 c) { return c == ',' || c == '\''; })
|
||||
) {
|
||||
if ((current & Expect_Move) == Expect_Move && consume_if(",'")) {
|
||||
current = Expect_Number;
|
||||
continue;
|
||||
}
|
||||
@ -72,7 +72,7 @@ auto Lexer::next_token() -> Result<Token>
|
||||
if ((current & Expect_Number) == Expect_Number) {
|
||||
bool found = false;
|
||||
for (; digit_cursor < accepted_digits.size(); ++digit_cursor) {
|
||||
if (consume_if([&](u32 c) { return u32(accepted_digits[digit_cursor]) == c; })) {
|
||||
if (consume_if(accepted_digits[digit_cursor])) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
@ -104,11 +104,13 @@ auto Lexer::next_token() -> Result<Token>
|
||||
) {
|
||||
}
|
||||
|
||||
return { Token::Type::Symbol, finish(), token_location };
|
||||
Token t = { Token::Type::Symbol, finish(), token_location };
|
||||
if (t.source == "v") t.type = Token::Type::Operator;
|
||||
return t;
|
||||
}
|
||||
|
||||
if (Valid_Operator_Chars.find(peek()) != std::string_view::npos) {
|
||||
while (consume() && Valid_Operator_Chars.find(peek()) != std::string_view::npos) {}
|
||||
if (consume_if(Valid_Operator_Chars)) {
|
||||
while (consume_if(Valid_Operator_Chars)) {}
|
||||
return { Token::Type::Operator, finish(), token_location };
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <concepts>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <string_view>
|
||||
@ -202,7 +204,18 @@ struct Lexer
|
||||
|
||||
inline auto consume_if(auto test) -> bool
|
||||
{
|
||||
return test(peek()) && (consume(), true);
|
||||
bool condition;
|
||||
if constexpr (requires { test(peek()) && true; }) {
|
||||
condition = test(peek());
|
||||
} else if constexpr (std::is_integral_v<decltype(test)>) {
|
||||
condition = (u32(test) == peek());
|
||||
} else if constexpr (std::is_convertible_v<decltype(test), char const*>) {
|
||||
auto const end = test + std::strlen(test);
|
||||
condition = std::find(test, end, peek()) != end;
|
||||
} else {
|
||||
condition = std::find(std::begin(test), std::end(test), peek()) != std::end(test);
|
||||
}
|
||||
return condition && (consume(), true);
|
||||
}
|
||||
|
||||
// Goes back last rune
|
||||
|
@ -106,7 +106,7 @@ bool unicode::is_letter(u32 letter)
|
||||
|
||||
bool unicode::is_identifier(u32 letter, unicode::First_Character is_first_character)
|
||||
{
|
||||
return (unicode::is_letter(letter) || letter == '_')
|
||||
return (unicode::is_letter(letter) || letter == '_' || letter == '#' || letter == '$' || letter == '@')
|
||||
|| (!bool(is_first_character) && (
|
||||
letter == '-' || letter == '_' || letter == '\'' || unicode::is_digit(letter)));
|
||||
letter == '-' || letter == '\'' || unicode::is_digit(letter)));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user