Solved almost all problems with lexing

The remaining one is better unicode support
This commit is contained in:
Robert Bendun 2022-05-02 20:06:40 +02:00
parent e32710da85
commit 22eb0684c2
3 changed files with 27 additions and 12 deletions

View File

@ -2,7 +2,7 @@
constexpr std::string_view Notes_Symbols = "abcedefgh"; constexpr std::string_view Notes_Symbols = "abcedefgh";
constexpr std::string_view Valid_Operator_Chars = constexpr std::string_view Valid_Operator_Chars =
"+-*/" // arithmetic "+-*/:%" // arithmetic
"|&^" // logic & bit operations "|&^" // logic & bit operations
"<>=!" // comparisons "<>=!" // comparisons
; ;
@ -51,7 +51,7 @@ auto Lexer::next_token() -> Result<Token>
return { Token::Type::Numeric, finish(), token_location }; return { Token::Type::Numeric, finish(), token_location };
} }
if (consume_if([](u32 ch) { return Notes_Symbols.find(ch) != std::string_view::npos; })) { if (consume_if(Notes_Symbols)) {
// chord declaration // chord declaration
constexpr u8 Expect_Number = 0b01; constexpr u8 Expect_Number = 0b01;
constexpr u8 Expect_Move = 0b10; constexpr u8 Expect_Move = 0b10;
@ -61,10 +61,10 @@ auto Lexer::next_token() -> Result<Token>
std::string_view accepted_digits = "12357"; std::string_view accepted_digits = "12357";
usize digit_cursor = 0; usize digit_cursor = 0;
consume_if('#');
for (;;) { for (;;) {
if ((current & Expect_Move) == Expect_Move if ((current & Expect_Move) == Expect_Move && consume_if(",'")) {
&& consume_if([](u32 c) { return c == ',' || c == '\''; })
) {
current = Expect_Number; current = Expect_Number;
continue; continue;
} }
@ -72,7 +72,7 @@ auto Lexer::next_token() -> Result<Token>
if ((current & Expect_Number) == Expect_Number) { if ((current & Expect_Number) == Expect_Number) {
bool found = false; bool found = false;
for (; digit_cursor < accepted_digits.size(); ++digit_cursor) { for (; digit_cursor < accepted_digits.size(); ++digit_cursor) {
if (consume_if([&](u32 c) { return u32(accepted_digits[digit_cursor]) == c; })) { if (consume_if(accepted_digits[digit_cursor])) {
found = true; found = true;
break; break;
} }
@ -104,11 +104,13 @@ auto Lexer::next_token() -> Result<Token>
) { ) {
} }
return { Token::Type::Symbol, finish(), token_location }; Token t = { Token::Type::Symbol, finish(), token_location };
if (t.source == "v") t.type = Token::Type::Operator;
return t;
} }
if (Valid_Operator_Chars.find(peek()) != std::string_view::npos) { if (consume_if(Valid_Operator_Chars)) {
while (consume() && Valid_Operator_Chars.find(peek()) != std::string_view::npos) {} while (consume_if(Valid_Operator_Chars)) {}
return { Token::Type::Operator, finish(), token_location }; return { Token::Type::Operator, finish(), token_location };
} }

View File

@ -1,7 +1,9 @@
#pragma once #pragma once
#include <cassert> #include <cassert>
#include <concepts>
#include <cstdint> #include <cstdint>
#include <cstring>
#include <optional> #include <optional>
#include <ostream> #include <ostream>
#include <string_view> #include <string_view>
@ -202,7 +204,18 @@ struct Lexer
inline auto consume_if(auto test) -> bool inline auto consume_if(auto test) -> bool
{ {
return test(peek()) && (consume(), true); bool condition;
if constexpr (requires { test(peek()) && true; }) {
condition = test(peek());
} else if constexpr (std::is_integral_v<decltype(test)>) {
condition = (u32(test) == peek());
} else if constexpr (std::is_convertible_v<decltype(test), char const*>) {
auto const end = test + std::strlen(test);
condition = std::find(test, end, peek()) != end;
} else {
condition = std::find(std::begin(test), std::end(test), peek()) != std::end(test);
}
return condition && (consume(), true);
} }
// Goes back last rune // Goes back last rune

View File

@ -106,7 +106,7 @@ bool unicode::is_letter(u32 letter)
bool unicode::is_identifier(u32 letter, unicode::First_Character is_first_character) bool unicode::is_identifier(u32 letter, unicode::First_Character is_first_character)
{ {
return (unicode::is_letter(letter) || letter == '_') return (unicode::is_letter(letter) || letter == '_' || letter == '#' || letter == '$' || letter == '@')
|| (!bool(is_first_character) && ( || (!bool(is_first_character) && (
letter == '-' || letter == '_' || letter == '\'' || unicode::is_digit(letter))); letter == '-' || letter == '\'' || unicode::is_digit(letter)));
} }