2022-04-24 15:27:09 +02:00
|
|
|
#pragma once
|
|
|
|
|
2022-04-27 14:58:02 +02:00
|
|
|
#include <cassert>
|
2022-04-24 15:27:09 +02:00
|
|
|
#include <cstdint>
|
2022-05-02 14:50:04 +02:00
|
|
|
#include <optional>
|
2022-04-24 15:27:09 +02:00
|
|
|
#include <ostream>
|
2022-04-27 14:58:02 +02:00
|
|
|
#include <string_view>
|
2022-04-24 15:27:09 +02:00
|
|
|
#include <tl/expected.hpp>
|
2022-05-02 14:50:04 +02:00
|
|
|
#include <variant>
|
2022-04-24 15:27:09 +02:00
|
|
|
|
|
|
|
using u8 = std::uint8_t;
|
|
|
|
using u16 = std::uint16_t;
|
|
|
|
using u32 = std::uint32_t;
|
|
|
|
using u64 = std::uint64_t;
|
|
|
|
|
|
|
|
using i8 = std::int8_t;
|
|
|
|
using i16 = std::int16_t;
|
|
|
|
using i32 = std::int32_t;
|
|
|
|
using i64 = std::int64_t;
|
|
|
|
|
2022-04-27 13:48:50 +02:00
|
|
|
using usize = std::size_t;
|
|
|
|
using isize = std::ptrdiff_t;
|
2022-04-24 16:09:55 +02:00
|
|
|
|
2022-04-27 14:37:21 +02:00
|
|
|
#define Fun(Function) ([]<typename ...T>(T&& ...args) { return (Function)(std::forward<T>(args)...); })
|
|
|
|
|
2022-04-24 16:09:55 +02:00
|
|
|
namespace errors
|
|
|
|
{
|
|
|
|
enum Type
|
|
|
|
{
|
2022-04-27 14:37:21 +02:00
|
|
|
End_Of_File,
|
|
|
|
Unrecognized_Character
|
2022-04-24 16:09:55 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-04-27 14:58:02 +02:00
|
|
|
struct Location
|
|
|
|
{
|
|
|
|
std::string_view filename = "<unnamed>";
|
|
|
|
usize column = 1, line = 1;
|
|
|
|
|
|
|
|
Location advance(u32 rune);
|
|
|
|
|
|
|
|
bool operator==(Location const& rhs) const = default;
|
|
|
|
|
|
|
|
static Location at(usize line, usize column)
|
|
|
|
{
|
|
|
|
Location loc;
|
|
|
|
loc.line = line;
|
|
|
|
loc.column = column;
|
|
|
|
return loc;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& os, Location const& location);
|
|
|
|
|
2022-04-24 15:27:09 +02:00
|
|
|
struct Error
|
|
|
|
{
|
2022-04-24 16:09:55 +02:00
|
|
|
errors::Type type;
|
2022-05-02 14:50:04 +02:00
|
|
|
std::optional<Location> location = std::nullopt;
|
|
|
|
u32 invalid_character = 0;
|
2022-04-24 16:09:55 +02:00
|
|
|
|
|
|
|
bool operator==(errors::Type);
|
2022-04-24 15:27:09 +02:00
|
|
|
};
|
|
|
|
|
2022-05-02 15:13:12 +02:00
|
|
|
namespace errors
|
|
|
|
{
|
|
|
|
Error unrecognized_character(u32 invalid_character);
|
|
|
|
Error unrecognized_character(u32 invalid_character, Location location);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-04-24 15:27:09 +02:00
|
|
|
template<typename T>
|
2022-04-27 13:48:50 +02:00
|
|
|
struct Result : tl::expected<T, Error>
|
|
|
|
{
|
2022-05-02 15:13:12 +02:00
|
|
|
using Storage = tl::expected<T, Error>;
|
|
|
|
|
2022-04-27 13:48:50 +02:00
|
|
|
constexpr Result() = default;
|
|
|
|
|
2022-05-02 15:13:12 +02:00
|
|
|
constexpr Result(errors::Type error)
|
|
|
|
: Storage(tl::unexpected(Error { error }))
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr Result(Error error)
|
|
|
|
: Storage(tl::unexpected(std::move(error)))
|
2022-04-27 13:48:50 +02:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename ...Args>
|
|
|
|
constexpr Result(Args&& ...args)
|
2022-05-02 15:13:12 +02:00
|
|
|
: Storage( T{ std::forward<Args>(args)... } )
|
2022-04-27 13:48:50 +02:00
|
|
|
{
|
|
|
|
}
|
|
|
|
};
|
2022-04-24 15:27:09 +02:00
|
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& os, Error const& err);
|
|
|
|
|
|
|
|
// NOTE This implementation requires C++ language extension: statement expressions
|
|
|
|
// It's supported by GCC, other compilers i don't know
|
|
|
|
#define Try(Value) ({ \
|
|
|
|
auto try_value = (Value); \
|
|
|
|
if (not try_value.has_value()) return tl::unexpected(try_value.error()); \
|
|
|
|
*std::move(try_value); \
|
|
|
|
})
|
|
|
|
|
2022-04-27 14:37:21 +02:00
|
|
|
namespace unicode
|
|
|
|
{
|
|
|
|
inline namespace special_runes
|
|
|
|
{
|
|
|
|
constexpr u32 Rune_Error = 0xfffd;
|
|
|
|
constexpr u32 Rune_Self = 0x80;
|
|
|
|
constexpr u32 Max_Bytes = 4;
|
|
|
|
}
|
|
|
|
|
2022-05-02 19:24:29 +02:00
|
|
|
// is_digit returns true if `digit` is ASCII digit
|
2022-04-27 14:37:21 +02:00
|
|
|
bool is_digit(u32 digit);
|
2022-05-02 19:24:29 +02:00
|
|
|
|
|
|
|
// is_space return true if `space` is ASCII blank character
|
2022-04-27 14:37:21 +02:00
|
|
|
bool is_space(u32 space);
|
2022-05-02 19:24:29 +02:00
|
|
|
|
|
|
|
// is_letter returns true if `letter` is considered a letter by Unicode
|
2022-05-02 19:00:11 +02:00
|
|
|
bool is_letter(u32 letter);
|
2022-05-02 19:24:29 +02:00
|
|
|
|
|
|
|
// is_identifier returns true if `letter` is valid character for identifier.
|
|
|
|
//
|
|
|
|
// It's modifier by is_first_character flag to determine some character classes
|
|
|
|
// allowance like numbers, which are only allowed NOT at the front of the identifier
|
|
|
|
enum class First_Character : bool { Yes = true, No = false };
|
|
|
|
bool is_identifier(u32 letter, First_Character is_first_character);
|
2022-04-27 14:37:21 +02:00
|
|
|
}
|
|
|
|
|
2022-04-27 13:48:50 +02:00
|
|
|
namespace utf8
|
|
|
|
{
|
2022-04-27 14:37:21 +02:00
|
|
|
using namespace unicode::special_runes;
|
2022-04-27 13:48:50 +02:00
|
|
|
|
|
|
|
// Decodes rune and returns remaining string
|
2022-05-02 14:50:04 +02:00
|
|
|
auto decode(std::string_view s) -> std::pair<u32, std::string_view>;
|
|
|
|
auto length(std::string_view s) -> usize;
|
|
|
|
|
|
|
|
struct Print { u32 rune; };
|
2022-04-27 13:48:50 +02:00
|
|
|
}
|
|
|
|
|
2022-05-02 14:50:04 +02:00
|
|
|
std::ostream& operator<<(std::ostream& os, utf8::Print const& print);
|
|
|
|
|
2022-04-24 15:27:09 +02:00
|
|
|
struct Token
|
|
|
|
{
|
|
|
|
enum class Type
|
|
|
|
{
|
|
|
|
// like repeat or choose or chord
|
|
|
|
Symbol,
|
|
|
|
|
2022-05-02 19:42:57 +02:00
|
|
|
// like + - ++ < >
|
|
|
|
Operator,
|
|
|
|
|
2022-04-24 15:27:09 +02:00
|
|
|
// chord literal, like c125
|
|
|
|
Chord,
|
|
|
|
|
|
|
|
// numeric literal (floating point or integer)
|
|
|
|
Numeric,
|
|
|
|
|
|
|
|
// "|" separaters arguments from block body, and provides variable introduction syntax
|
|
|
|
Variable_Separator,
|
|
|
|
|
|
|
|
// "[" and "]", delimit anonymous block of code (potentially a function)
|
|
|
|
Open_Block,
|
|
|
|
Close_Block,
|
|
|
|
|
|
|
|
// "(" and ")", used in arithmetic or as function invocation sarrounding (like in Haskell)
|
|
|
|
Open_Paren,
|
|
|
|
Close_Paren
|
|
|
|
};
|
|
|
|
|
2022-04-24 16:09:55 +02:00
|
|
|
Type type;
|
2022-04-24 15:27:09 +02:00
|
|
|
std::string_view source;
|
2022-04-27 14:58:02 +02:00
|
|
|
Location location;
|
2022-04-24 15:27:09 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& os, Token const& tok);
|
2022-05-02 19:42:57 +02:00
|
|
|
std::ostream& operator<<(std::ostream& os, Token::Type type);
|
2022-04-24 15:27:09 +02:00
|
|
|
|
|
|
|
struct Lexer
|
|
|
|
{
|
|
|
|
// Source that is beeing lexed
|
|
|
|
std::string_view source;
|
|
|
|
|
2022-04-27 13:48:50 +02:00
|
|
|
// Used for rewinding
|
|
|
|
u32 last_rune_length = 0;
|
|
|
|
|
2022-04-27 14:37:21 +02:00
|
|
|
char const* token_start = nullptr;
|
|
|
|
usize token_length = 0;
|
2022-04-27 14:58:02 +02:00
|
|
|
Location token_location{};
|
2022-04-27 14:37:21 +02:00
|
|
|
|
2022-04-27 14:58:02 +02:00
|
|
|
Location prev_location{};
|
|
|
|
Location location{};
|
2022-04-24 15:27:09 +02:00
|
|
|
|
|
|
|
auto next_token() -> Result<Token>;
|
2022-04-27 13:48:50 +02:00
|
|
|
|
|
|
|
// Finds next rune in source
|
2022-04-27 14:37:21 +02:00
|
|
|
auto peek() const -> u32;
|
|
|
|
|
|
|
|
// Finds next rune in source and returns it, advancing the string
|
|
|
|
auto consume() -> u32;
|
|
|
|
|
2022-05-02 19:00:11 +02:00
|
|
|
inline auto consume_if(auto test) -> bool
|
2022-04-27 14:37:21 +02:00
|
|
|
{
|
|
|
|
return test(peek()) && (consume(), true);
|
|
|
|
}
|
2022-04-27 13:48:50 +02:00
|
|
|
|
|
|
|
// Goes back last rune
|
|
|
|
void rewind();
|
2022-04-27 14:37:21 +02:00
|
|
|
|
|
|
|
// Marks begin of token
|
|
|
|
void start();
|
|
|
|
|
|
|
|
// Marks end of token and returns it's matching source
|
|
|
|
std::string_view finish();
|
2022-04-24 15:27:09 +02:00
|
|
|
};
|