Parser started: basic literal parsing

Mostly this commit implements structure to implement parsing, not actual
parsing itself
This commit is contained in:
Robert Bendun 2022-05-07 20:52:09 +02:00
parent ead70bda5d
commit 31262e74a0
7 changed files with 266 additions and 42 deletions

View File

@ -4,6 +4,7 @@ CPPFLAGS:=$(CPPFLAGS) -Ilib/expected/ -Ilib/ut/ -Isrc/
Obj=bin/errors.o \
bin/lexer.o \
bin/parser.o \
bin/unicode.o \
bin/unicode_tables.o

View File

@ -1,49 +1,112 @@
#include <musique.hh>
#include <iostream>
#include <sstream>
bool Error::operator==(errors::Type type)
{
return this->type == type;
}
std::ostream& operator<<(std::ostream& os, Error const& err)
Error Error::with(Location loc) &&
{
if (err.location) {
os << *err.location;
location = loc;
return *this;
}
enum class Error_Level
{
Error,
Notice,
Bug
};
static void error_heading(std::ostream &os, std::optional<Location> location, Error_Level lvl)
{
if (location) {
os << *location;
} else {
os << "musique";
}
os << ": error: ";
switch (lvl) {
case Error_Level::Bug: os << ": implementation bug: "; return;
case Error_Level::Error: os << ": error: "; return;
case Error_Level::Notice: os << ": notice: "; return;
}
}
std::ostream& operator<<(std::ostream& os, Error const& err)
{
error_heading(os, err.location, Error_Level::Error);
switch (err.type) {
case errors::End_Of_File:
return os << "end of file\n";
case errors::Unrecognized_Character:
if (err.invalid_character) {
return os << "unrecognized charater U+" << std::hex << err.invalid_character
<< " (char: '" << utf8::Print{err.invalid_character} << "')\n";
} else {
return os << "unrecognized character\n";
}
return err.message.empty() ? os << "unrecognized character\n" : os << err.message;
case errors::Unexpected_Token_Type:
return os << err.message;
case errors::Unexpected_Empty_Source:
return os << "unexpected end of input\n";
}
return os << "unrecognized error type\n";
}
static std::string format(auto const& ...args)
{
std::stringstream ss;
(void) (ss << ... << args);
return ss.str();
}
Error errors::unrecognized_character(u32 invalid_character)
{
Error err;
err.type = errors::Unrecognized_Character;
err.invalid_character = invalid_character;
err.message = format(
"unrecognized charater U+",
std::hex, invalid_character,
" (char: '", utf8::Print{invalid_character}, "')");
return err;
}
Error errors::unrecognized_character(u32 invalid_character, Location location)
{
return unrecognized_character(invalid_character).with(std::move(location));
}
Error errors::unexpected_token(Token::Type expected, Token const& unexpected)
{
Error err;
err.type = errors::Unrecognized_Character;
err.invalid_character = invalid_character;
err.location = std::move(location);
err.type = errors::Unexpected_Token_Type;
err.location = unexpected.location;
err.message = format("expected ", expected, ", but got ", unexpected.type);
return err;
}
Error errors::unexpected_end_of_source(Location location)
{
Error err;
err.type = errors::Unexpected_Empty_Source;
err.location = location;
return err;
}
void errors::all_tokens_were_not_parsed(std::span<Token> tokens)
{
error_heading(std::cerr, std::nullopt, Error_Level::Bug);
std::cerr << "remaining tokens after parsing. Listing remaining tokens:\n";
for (auto const& token : tokens) {
error_heading(std::cerr, token.location, Error_Level::Notice);
std::cerr << token << '\n';
}
std::exit(1);
}

View File

@ -1,5 +1,7 @@
#include <musique.hh>
#include <iomanip>
constexpr std::string_view Notes_Symbols = "abcedefgh";
constexpr std::string_view Valid_Operator_Chars =
"+-*/:%" // arithmetic
@ -241,10 +243,9 @@ std::string_view Lexer::finish()
return result;
}
std::ostream& operator<<(std::ostream& os, Token const&)
std::ostream& operator<<(std::ostream& os, Token const& token)
{
os << "Token";
return os;
return os << '{' << token.type << ", " << std::quoted(token.source) << ", " << token.location << '}';
}
std::ostream& operator<<(std::ostream& os, Token::Type type)

View File

@ -1,25 +1,42 @@
#include <iostream>
#include <musique.hh>
std::string_view Source = R"musique(
nums = [ 1 2 3 ]
say ( min nums + max nums )
)musique";
#include <span>
tl::expected<void, Error> Main()
static std::string_view pop(std::span<char const*> &span)
{
Lexer lexer{Source};
lexer.location.filename = "example.mq";
for (;;) {
auto token = Try(lexer.next_token());
std::cout << token << '\n';
}
auto element = span.front();
span = span.subspan(1);
return element;
}
int main()
Result<Unit> Main(std::span<char const*> args)
{
auto result = Main();
while (not args.empty()) {
std::string_view arg = pop(args);
if (arg == "-c" || arg == "--run") {
if (args.empty()) {
std::cerr << "musique: error: option " << arg << " requires an argument" << std::endl;
std::exit(1);
}
auto const source = pop(args);
Try(Parser::parse(source, "arguments"));
std::cout << "successfully parsed: " << source << " \n";
continue;
}
std::cerr << "musique: error: unrecognized command line option: " << arg << std::endl;
std::exit(1);
}
return {};
}
int main(int argc, char const** argv)
{
auto result = Main(std::span{ argv+1, usize(argc-1) });
if (not result.has_value()) {
std::cerr << result.error() << std::flush;
return 1;

View File

@ -6,6 +6,7 @@
#include <cstring>
#include <optional>
#include <ostream>
#include <span>
#include <string_view>
#include <tl/expected.hpp>
#include <variant>
@ -23,14 +24,21 @@ using i64 = std::int64_t;
using usize = std::size_t;
using isize = std::ptrdiff_t;
struct Unit {};
#define Fun(Function) ([]<typename ...T>(T&& ...args) { return (Function)(std::forward<T>(args)...); })
// Error handling mechanism inspired by Andrew Kelly approach, that was implemented
// as first class feature in Zig programming language.
namespace errors
{
enum Type
{
End_Of_File,
Unrecognized_Character
Unrecognized_Character,
Unexpected_Token_Type,
Unexpected_Empty_Source,
};
}
@ -58,36 +66,40 @@ struct Error
{
errors::Type type;
std::optional<Location> location = std::nullopt;
u32 invalid_character = 0;
std::string message{};
bool operator==(errors::Type);
Error with(Location) &&;
};
namespace errors
{
Error unrecognized_character(u32 invalid_character);
Error unrecognized_character(u32 invalid_character, Location location);
}
template<typename T>
struct Result : tl::expected<T, Error>
{
using Storage = tl::expected<T, Error>;
constexpr Result() = default;
constexpr Result() = default;
constexpr Result(Result const&) = default;
constexpr Result(Result&&) = default;
constexpr Result& operator=(Result const&) = default;
constexpr Result& operator=(Result&&) = default;
constexpr Result(errors::Type error)
: Storage(tl::unexpected(Error { error }))
{
}
constexpr Result(Error error)
inline Result(Error error)
: Storage(tl::unexpected(std::move(error)))
{
}
inline Result(tl::unexpected<Error> error)
: Storage(std::move(error))
{
}
template<typename ...Args>
requires std::is_constructible_v<T, Args...>
constexpr Result(Args&& ...args)
: Storage( T{ std::forward<Args>(args)... } )
{
@ -230,3 +242,55 @@ struct Lexer
// Marks end of token and returns it's matching source
std::string_view finish();
};
struct Ast
{
// Named constructors of AST structure
static Ast literal(Token);
enum class Type
{
Literal
};
Type type;
Token token;
};
template<typename Expected, typename ...T>
concept Var_Args = (std::is_same_v<Expected, T> && ...) && (sizeof...(T) >= 1);
struct Parser
{
std::vector<Token> tokens;
unsigned token_id = 0;
// Parses whole source code producing Ast or Error
// using Parser structure internally
static Result<Ast> parse(std::string_view source, std::string_view filename);
Result<Ast> parse_expression();
Result<Ast> parse_binary_operator();
Result<Ast> parse_literal();
Token consume();
// Tests if current token has given type
bool expect(Token::Type type) const;
// Ensures that current token has one of types given.
// Otherwise returns error
Result<Unit> ensure(Token::Type type) const;
};
namespace errors
{
Error unrecognized_character(u32 invalid_character);
Error unrecognized_character(u32 invalid_character, Location location);
Error unexpected_token(Token::Type expected, Token const& unexpected);
Error unexpected_end_of_source(Location location);
[[noreturn]]
void all_tokens_were_not_parsed(std::span<Token>);
}

67
src/parser.cc Normal file
View File

@ -0,0 +1,67 @@
#include <musique.hh>
Result<Ast> Parser::parse(std::string_view source, std::string_view filename)
{
Lexer lexer{source};
lexer.location.filename = filename;
Parser parser;
for (;;) if (auto maybe_token = lexer.next_token(); maybe_token.has_value()) {
parser.tokens.emplace_back(*std::move(maybe_token));
} else if (maybe_token.error().type == errors::End_Of_File) {
break;
} else {
return std::move(maybe_token).error();
}
auto const result = parser.parse_expression();
if (parser.token_id < parser.tokens.size()) {
errors::all_tokens_were_not_parsed(std::span(parser.tokens).subspan(parser.token_id));
}
return result;
}
Result<Ast> Parser::parse_expression()
{
return parse_binary_operator();
}
Result<Ast> Parser::parse_binary_operator()
{
return parse_literal();
}
Result<Ast> Parser::parse_literal()
{
Try(ensure(Token::Type::Numeric));
return Ast::literal(consume());
}
Token Parser::consume()
{
return std::move(tokens[token_id++]);
}
bool Parser::expect(Token::Type type) const
{
return token_id < tokens.size() && tokens[token_id].type == type;
}
Result<Unit> Parser::ensure(Token::Type type) const
{
return token_id >= tokens.size()
? errors::unexpected_end_of_source(tokens.back().location)
: tokens[token_id].type != type
? errors::unexpected_token(type, tokens[token_id])
: Result<Unit>{};
}
Ast Ast::literal(Token token)
{
Ast ast;
ast.type = Ast::Type::Literal;
ast.token = std::move(token);
return ast;
}

11
src/tests/parser.cc Normal file
View File

@ -0,0 +1,11 @@
#include <boost/ut.hpp>
#include <musique.hh>
using namespace boost::ut;
suite parser_test = [] {
"Literal parsing"_test = [] {
auto result = Parser::parse("1", "test");
expect(result.has_value()) << "code was expected to parse, but had not";
};
};