#pragma once #include #include #include #include #include #include #include #include #include #if defined(__cpp_lib_source_location) #include #endif // To make sure, that we don't collide with macro #ifdef assert #undef assert #endif using u8 = std::uint8_t; using u16 = std::uint16_t; using u32 = std::uint32_t; using u64 = std::uint64_t; using i8 = std::int8_t; using i16 = std::int16_t; using i32 = std::int32_t; using i64 = std::int64_t; using usize = std::size_t; using isize = std::ptrdiff_t; // Error handling mechanism inspired by Andrew Kelly approach, that was implemented // as first class feature in Zig programming language. namespace errors { enum Type { End_Of_File, Unrecognized_Character, Unexpected_Token_Type, Unexpected_Empty_Source, Failed_Numeric_Parsing, }; } /// \brief Location describes code position in `file line column` format. /// It's used both to represent position in source files provided // to interpreter and internal interpreter usage. struct Location { std::string_view filename = ""; ///< File that location is pointing to usize line = 1; ///< Line number (1 based) that location is pointing to usize column = 1; ///< Column number (1 based) that location is pointing to /// Advances line and column numbers based on provided rune /// /// If rune is newline, then column is reset to 1, and line number is incremented. /// Otherwise column number is incremented. /// /// @param rune Rune from which column and line numbers advancements are made. Location& advance(u32 rune); bool operator==(Location const& rhs) const = default; //! Creates location at default filename with specified line and column number static Location at(usize line, usize column); // Used to describe location of function call in interpreter (internal use only) #if defined(__cpp_lib_source_location) static Location caller(std::source_location loc = std::source_location::current()); #elif (__has_builtin(__builtin_FILE) and __has_builtin(__builtin_LINE)) static Location caller(char const* file = __builtin_FILE(), usize line = __builtin_LINE()); #else #error Cannot implement Location::caller function /// Returns location of call in interpreter source code. /// /// Example of reporting where `foo()` was beeing called: /// @code /// void foo(Location loc = Location::caller()) { std::cout << loc << '\n'; } /// @endcode static Location caller(); #endif }; std::ostream& operator<<(std::ostream& os, Location const& location); void assert(bool condition, std::string message, Location loc = Location::caller()); // Marks part of code that was not implemented yet [[noreturn]] void unimplemented(Location loc = Location::caller()); // Marks location that should not be reached [[noreturn]] void unreachable(Location loc = Location::caller()); struct Error { errors::Type type; std::optional location = std::nullopt; std::string message{}; std::errc error_code{}; bool operator==(errors::Type); Error with(Location) &&; }; std::ostream& operator<<(std::ostream& os, Error const& err); template struct [[nodiscard("This value may contain critical error, so it should NOT be ignored")]] Result : tl::expected { using Storage = tl::expected; constexpr Result() = default; template requires (not std::is_void_v) && std::is_constructible_v constexpr Result(Args&& ...args) : Storage( T{ std::forward(args)... } ) { } template requires std::is_constructible_v constexpr Result(Arg &&arg) : Storage(std::forward(arg)) { } constexpr Result(errors::Type error) : Storage(tl::unexpect, Error { error } ) { } inline Result(Error error) : Storage(tl::unexpected(std::move(error))) { } // Internal function used for definition of Try macro inline auto value() && { if constexpr (not std::is_void_v) { return Storage::value(); } } }; // NOTE This implementation requires C++ language extension: statement expressions // It's supported by GCC and Clang, other compilers i don't know // // Inspired by SerenityOS TRY macro #define Try(Value) \ ({ \ auto try_value = (Value); \ if (not try_value.has_value()) [[unlikely]] \ return tl::unexpected(try_value.error()); \ std::move(try_value).value(); \ }) namespace unicode { inline namespace special_runes { [[maybe_unused]] constexpr u32 Rune_Error = 0xfffd; [[maybe_unused]] constexpr u32 Rune_Self = 0x80; [[maybe_unused]] constexpr u32 Max_Bytes = 4; } // is_digit returns true if `digit` is ASCII digit bool is_digit(u32 digit); // is_space return true if `space` is ASCII blank character bool is_space(u32 space); // is_letter returns true if `letter` is considered a letter by Unicode bool is_letter(u32 letter); // is_identifier returns true if `letter` is valid character for identifier. // // It's modifier by is_first_character flag to determine some character classes // allowance like numbers, which are only allowed NOT at the front of the identifier enum class First_Character : bool { Yes = true, No = false }; bool is_identifier(u32 letter, First_Character is_first_character); } namespace utf8 { using namespace unicode::special_runes; // Decodes rune and returns remaining string auto decode(std::string_view s) -> std::pair; auto length(std::string_view s) -> usize; struct Print { u32 rune; }; } std::ostream& operator<<(std::ostream& os, utf8::Print const& print); struct Token { enum class Type { // like repeat or choose or chord Symbol, // like + - ++ < > Operator, // chord literal, like c125 Chord, // numeric literal (floating point or integer) Numeric, // "|" separaters arguments from block body, and provides variable introduction syntax Variable_Separator, // ";" separates expressions. Used to separate calls, like `foo 1 2; bar 3 4` Expression_Separator, // "[" and "]", delimit anonymous block of code (potentially a function) Open_Block, Close_Block, // "(" and ")", used in arithmetic or as function invocation sarrounding (like in Haskell) Open_Paren, Close_Paren }; Type type; std::string_view source; Location location; }; std::ostream& operator<<(std::ostream& os, Token const& tok); std::ostream& operator<<(std::ostream& os, Token::Type type); struct Lexer { // Source that is beeing lexed std::string_view source; // Used for rewinding u32 last_rune_length = 0; char const* token_start = nullptr; usize token_length = 0; Location token_location{}; Location prev_location{}; Location location{}; auto next_token() -> Result; // Utility function for next_token() void skip_whitespace_and_comments(); // Finds next rune in source auto peek() const -> u32; // Finds next rune in source and returns it, advancing the string auto consume() -> u32; // For test beeing // callable, current rune is passed to test // integral, current rune is tested for equality with test // string, current rune is tested for beeing in it // otherwise, current rune is tested for beeing in test // // When testing above yields truth, current rune is consumed. // Returns if rune was consumed auto consume_if(auto test) -> bool; // Consume two runes with given tests otherwise backtrack auto consume_if(auto first, auto second) -> bool; // Goes back last rune void rewind(); // Marks begin of token void start(); // Marks end of token and returns it's matching source std::string_view finish(); }; struct Ast { // Named constructors of AST structure static Ast literal(Token); static Ast binary(Token, Ast lhs, Ast rhs); static Ast call(std::vector call); static Ast sequence(std::vector call); static Ast block(Location location, Ast seq = sequence({}), std::vector parameters = {}); enum class Type { Literal, // Compile time known constant like `c` or `1` Binary, // Binary operator application like `1` + `2` Call, // Function call application like `print 42` Sequence, // Several expressions sequences like `42`, `42; 32` Block, // Block expressions like `[42; hello]` }; Type type; Location location; Token token; std::vector arguments{}; }; bool operator==(Ast const& lhs, Ast const& rhs); std::ostream& operator<<(std::ostream& os, Ast::Type type); std::ostream& operator<<(std::ostream& os, Ast const& tree); void dump(Ast const& ast, unsigned indent = 0); struct Parser { std::vector tokens; unsigned token_id = 0; // Parses whole source code producing Ast or Error // using Parser structure internally static Result parse(std::string_view source, std::string_view filename); Result parse_sequence(); Result parse_expression(); Result parse_infix_expression(); Result parse_atomic_expression(); Result parse_identifier(); Result peek() const; Result peek_type() const; Token consume(); // Tests if current token has given type bool expect(Token::Type type) const; // Ensures that current token has one of types given. // Otherwise returns error Result ensure(Token::Type type) const; }; // Number type supporting integer and fractional constants // Invariant: gcd(num, den) == 1, after any operation struct Number { using value_type = i64; value_type num = 0, den = 1; constexpr Number() = default; constexpr Number(Number const&) = default; constexpr Number(Number &&) = default; constexpr Number& operator=(Number const&) = default; constexpr Number& operator=(Number &&) = default; explicit Number(value_type v); Number(value_type num, value_type den); auto as_int() const -> value_type; // Returns self as int auto simplify() const -> Number; // Returns self, but with gcd(num, den) == 1 void simplify_inplace(); // Update self, to have gcd(num, den) == 1 bool operator==(Number const&) const; bool operator!=(Number const&) const; std::strong_ordering operator<=>(Number const&) const; Number operator+(Number const& rhs) const; Number& operator+=(Number const& rhs); Number operator-(Number const& rhs) const; Number& operator-=(Number const& rhs); Number operator*(Number const& rhs) const; Number& operator*=(Number const& rhs); Number operator/(Number const& rhs) const; Number& operator/=(Number const& rhs); static Result from(Token token); }; std::ostream& operator<<(std::ostream& os, Number const& num); namespace errors { Error unrecognized_character(u32 invalid_character); Error unrecognized_character(u32 invalid_character, Location location); Error unexpected_token(Token::Type expected, Token const& unexpected); Error unexpected_token(Token const& unexpected); Error unexpected_end_of_source(Location location); Error failed_numeric_parsing(Location location, std::errc errc, std::string_view source); [[noreturn]] void all_tokens_were_not_parsed(std::span); }