diff --git a/Makefile b/Makefile index 38423d1..fb28775 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ CPPFLAGS:=$(CPPFLAGS) -Ilib/expected/ -Ilib/ut/ -Isrc/ Obj=bin/errors.o \ bin/lexer.o \ + bin/parser.o \ bin/unicode.o \ bin/unicode_tables.o diff --git a/src/errors.cc b/src/errors.cc index 75dcf06..6efe905 100644 --- a/src/errors.cc +++ b/src/errors.cc @@ -1,49 +1,112 @@ #include +#include +#include + bool Error::operator==(errors::Type type) { return this->type == type; } -std::ostream& operator<<(std::ostream& os, Error const& err) +Error Error::with(Location loc) && { - if (err.location) { - os << *err.location; + location = loc; + return *this; +} + +enum class Error_Level +{ + Error, + Notice, + Bug +}; + +static void error_heading(std::ostream &os, std::optional location, Error_Level lvl) +{ + if (location) { + os << *location; } else { os << "musique"; } - os << ": error: "; + switch (lvl) { + case Error_Level::Bug: os << ": implementation bug: "; return; + case Error_Level::Error: os << ": error: "; return; + case Error_Level::Notice: os << ": notice: "; return; + } +} + +std::ostream& operator<<(std::ostream& os, Error const& err) +{ + error_heading(os, err.location, Error_Level::Error); switch (err.type) { case errors::End_Of_File: return os << "end of file\n"; case errors::Unrecognized_Character: - if (err.invalid_character) { - return os << "unrecognized charater U+" << std::hex << err.invalid_character - << " (char: '" << utf8::Print{err.invalid_character} << "')\n"; - } else { - return os << "unrecognized character\n"; - } + return err.message.empty() ? os << "unrecognized character\n" : os << err.message; + + case errors::Unexpected_Token_Type: + return os << err.message; + + case errors::Unexpected_Empty_Source: + return os << "unexpected end of input\n"; } return os << "unrecognized error type\n"; } +static std::string format(auto const& ...args) +{ + std::stringstream ss; + (void) (ss << ... << args); + return ss.str(); +} + Error errors::unrecognized_character(u32 invalid_character) { Error err; err.type = errors::Unrecognized_Character; - err.invalid_character = invalid_character; + err.message = format( + "unrecognized charater U+", + std::hex, invalid_character, + " (char: '", utf8::Print{invalid_character}, "')"); + return err; } Error errors::unrecognized_character(u32 invalid_character, Location location) +{ + return unrecognized_character(invalid_character).with(std::move(location)); +} + +Error errors::unexpected_token(Token::Type expected, Token const& unexpected) { Error err; - err.type = errors::Unrecognized_Character; - err.invalid_character = invalid_character; - err.location = std::move(location); + err.type = errors::Unexpected_Token_Type; + err.location = unexpected.location; + err.message = format("expected ", expected, ", but got ", unexpected.type); return err; } + +Error errors::unexpected_end_of_source(Location location) +{ + Error err; + err.type = errors::Unexpected_Empty_Source; + err.location = location; + return err; +} + +void errors::all_tokens_were_not_parsed(std::span tokens) +{ + error_heading(std::cerr, std::nullopt, Error_Level::Bug); + std::cerr << "remaining tokens after parsing. Listing remaining tokens:\n"; + + for (auto const& token : tokens) { + error_heading(std::cerr, token.location, Error_Level::Notice); + std::cerr << token << '\n'; + } + + std::exit(1); +} diff --git a/src/lexer.cc b/src/lexer.cc index aef8ce4..60bd4fe 100644 --- a/src/lexer.cc +++ b/src/lexer.cc @@ -1,5 +1,7 @@ #include +#include + constexpr std::string_view Notes_Symbols = "abcedefgh"; constexpr std::string_view Valid_Operator_Chars = "+-*/:%" // arithmetic @@ -241,10 +243,9 @@ std::string_view Lexer::finish() return result; } -std::ostream& operator<<(std::ostream& os, Token const&) +std::ostream& operator<<(std::ostream& os, Token const& token) { - os << "Token"; - return os; + return os << '{' << token.type << ", " << std::quoted(token.source) << ", " << token.location << '}'; } std::ostream& operator<<(std::ostream& os, Token::Type type) diff --git a/src/main.cc b/src/main.cc index fc6fec5..5a7b759 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1,25 +1,42 @@ #include #include -std::string_view Source = R"musique( - nums = [ 1 2 3 ] - say ( min nums + max nums ) -)musique"; +#include -tl::expected Main() +static std::string_view pop(std::span &span) { - Lexer lexer{Source}; - lexer.location.filename = "example.mq"; - - for (;;) { - auto token = Try(lexer.next_token()); - std::cout << token << '\n'; - } + auto element = span.front(); + span = span.subspan(1); + return element; } -int main() +Result Main(std::span args) { - auto result = Main(); + while (not args.empty()) { + std::string_view arg = pop(args); + + if (arg == "-c" || arg == "--run") { + if (args.empty()) { + std::cerr << "musique: error: option " << arg << " requires an argument" << std::endl; + std::exit(1); + } + + auto const source = pop(args); + Try(Parser::parse(source, "arguments")); + std::cout << "successfully parsed: " << source << " \n"; + continue; + } + + std::cerr << "musique: error: unrecognized command line option: " << arg << std::endl; + std::exit(1); + } + + return {}; +} + +int main(int argc, char const** argv) +{ + auto result = Main(std::span{ argv+1, usize(argc-1) }); if (not result.has_value()) { std::cerr << result.error() << std::flush; return 1; diff --git a/src/musique.hh b/src/musique.hh index 4723359..855f508 100644 --- a/src/musique.hh +++ b/src/musique.hh @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -23,14 +24,21 @@ using i64 = std::int64_t; using usize = std::size_t; using isize = std::ptrdiff_t; +struct Unit {}; + #define Fun(Function) ([](T&& ...args) { return (Function)(std::forward(args)...); }) +// Error handling mechanism inspired by Andrew Kelly approach, that was implemented +// as first class feature in Zig programming language. namespace errors { enum Type { End_Of_File, - Unrecognized_Character + Unrecognized_Character, + + Unexpected_Token_Type, + Unexpected_Empty_Source, }; } @@ -58,36 +66,40 @@ struct Error { errors::Type type; std::optional location = std::nullopt; - u32 invalid_character = 0; + std::string message{}; bool operator==(errors::Type); + Error with(Location) &&; }; -namespace errors -{ - Error unrecognized_character(u32 invalid_character); - Error unrecognized_character(u32 invalid_character, Location location); -} - - template struct Result : tl::expected { using Storage = tl::expected; - constexpr Result() = default; + constexpr Result() = default; + constexpr Result(Result const&) = default; + constexpr Result(Result&&) = default; + constexpr Result& operator=(Result const&) = default; + constexpr Result& operator=(Result&&) = default; constexpr Result(errors::Type error) : Storage(tl::unexpected(Error { error })) { } - constexpr Result(Error error) + inline Result(Error error) : Storage(tl::unexpected(std::move(error))) { } + inline Result(tl::unexpected error) + : Storage(std::move(error)) + { + } + template + requires std::is_constructible_v constexpr Result(Args&& ...args) : Storage( T{ std::forward(args)... } ) { @@ -230,3 +242,55 @@ struct Lexer // Marks end of token and returns it's matching source std::string_view finish(); }; + +struct Ast +{ + // Named constructors of AST structure + static Ast literal(Token); + + enum class Type + { + Literal + }; + + Type type; + Token token; +}; + +template +concept Var_Args = (std::is_same_v && ...) && (sizeof...(T) >= 1); + +struct Parser +{ + std::vector tokens; + unsigned token_id = 0; + + // Parses whole source code producing Ast or Error + // using Parser structure internally + static Result parse(std::string_view source, std::string_view filename); + + Result parse_expression(); + Result parse_binary_operator(); + Result parse_literal(); + + Token consume(); + + // Tests if current token has given type + bool expect(Token::Type type) const; + + // Ensures that current token has one of types given. + // Otherwise returns error + Result ensure(Token::Type type) const; +}; + +namespace errors +{ + Error unrecognized_character(u32 invalid_character); + Error unrecognized_character(u32 invalid_character, Location location); + + Error unexpected_token(Token::Type expected, Token const& unexpected); + Error unexpected_end_of_source(Location location); + + [[noreturn]] + void all_tokens_were_not_parsed(std::span); +} diff --git a/src/parser.cc b/src/parser.cc new file mode 100644 index 0000000..917cd30 --- /dev/null +++ b/src/parser.cc @@ -0,0 +1,67 @@ +#include + +Result Parser::parse(std::string_view source, std::string_view filename) +{ + Lexer lexer{source}; + lexer.location.filename = filename; + Parser parser; + + for (;;) if (auto maybe_token = lexer.next_token(); maybe_token.has_value()) { + parser.tokens.emplace_back(*std::move(maybe_token)); + } else if (maybe_token.error().type == errors::End_Of_File) { + break; + } else { + return std::move(maybe_token).error(); + } + + auto const result = parser.parse_expression(); + + if (parser.token_id < parser.tokens.size()) { + errors::all_tokens_were_not_parsed(std::span(parser.tokens).subspan(parser.token_id)); + } + + return result; +} + +Result Parser::parse_expression() +{ + return parse_binary_operator(); +} + +Result Parser::parse_binary_operator() +{ + return parse_literal(); +} + +Result Parser::parse_literal() +{ + Try(ensure(Token::Type::Numeric)); + return Ast::literal(consume()); +} + +Token Parser::consume() +{ + return std::move(tokens[token_id++]); +} + +bool Parser::expect(Token::Type type) const +{ + return token_id < tokens.size() && tokens[token_id].type == type; +} + +Result Parser::ensure(Token::Type type) const +{ + return token_id >= tokens.size() + ? errors::unexpected_end_of_source(tokens.back().location) + : tokens[token_id].type != type + ? errors::unexpected_token(type, tokens[token_id]) + : Result{}; +} + +Ast Ast::literal(Token token) +{ + Ast ast; + ast.type = Ast::Type::Literal; + ast.token = std::move(token); + return ast; +} diff --git a/src/tests/parser.cc b/src/tests/parser.cc new file mode 100644 index 0000000..2abf360 --- /dev/null +++ b/src/tests/parser.cc @@ -0,0 +1,11 @@ +#include +#include + +using namespace boost::ut; + +suite parser_test = [] { + "Literal parsing"_test = [] { + auto result = Parser::parse("1", "test"); + expect(result.has_value()) << "code was expected to parse, but had not"; + }; +};