Parser started: basic literal parsing
Mostly this commit implements structure to implement parsing, not actual parsing itself
This commit is contained in:
parent
ead70bda5d
commit
31262e74a0
1
Makefile
1
Makefile
@ -4,6 +4,7 @@ CPPFLAGS:=$(CPPFLAGS) -Ilib/expected/ -Ilib/ut/ -Isrc/
|
||||
|
||||
Obj=bin/errors.o \
|
||||
bin/lexer.o \
|
||||
bin/parser.o \
|
||||
bin/unicode.o \
|
||||
bin/unicode_tables.o
|
||||
|
||||
|
@ -1,49 +1,112 @@
|
||||
#include <musique.hh>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
bool Error::operator==(errors::Type type)
|
||||
{
|
||||
return this->type == type;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, Error const& err)
|
||||
Error Error::with(Location loc) &&
|
||||
{
|
||||
if (err.location) {
|
||||
os << *err.location;
|
||||
location = loc;
|
||||
return *this;
|
||||
}
|
||||
|
||||
enum class Error_Level
|
||||
{
|
||||
Error,
|
||||
Notice,
|
||||
Bug
|
||||
};
|
||||
|
||||
static void error_heading(std::ostream &os, std::optional<Location> location, Error_Level lvl)
|
||||
{
|
||||
if (location) {
|
||||
os << *location;
|
||||
} else {
|
||||
os << "musique";
|
||||
}
|
||||
|
||||
os << ": error: ";
|
||||
switch (lvl) {
|
||||
case Error_Level::Bug: os << ": implementation bug: "; return;
|
||||
case Error_Level::Error: os << ": error: "; return;
|
||||
case Error_Level::Notice: os << ": notice: "; return;
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, Error const& err)
|
||||
{
|
||||
error_heading(os, err.location, Error_Level::Error);
|
||||
|
||||
switch (err.type) {
|
||||
case errors::End_Of_File:
|
||||
return os << "end of file\n";
|
||||
|
||||
case errors::Unrecognized_Character:
|
||||
if (err.invalid_character) {
|
||||
return os << "unrecognized charater U+" << std::hex << err.invalid_character
|
||||
<< " (char: '" << utf8::Print{err.invalid_character} << "')\n";
|
||||
} else {
|
||||
return os << "unrecognized character\n";
|
||||
}
|
||||
return err.message.empty() ? os << "unrecognized character\n" : os << err.message;
|
||||
|
||||
case errors::Unexpected_Token_Type:
|
||||
return os << err.message;
|
||||
|
||||
case errors::Unexpected_Empty_Source:
|
||||
return os << "unexpected end of input\n";
|
||||
}
|
||||
|
||||
return os << "unrecognized error type\n";
|
||||
}
|
||||
|
||||
static std::string format(auto const& ...args)
|
||||
{
|
||||
std::stringstream ss;
|
||||
(void) (ss << ... << args);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
Error errors::unrecognized_character(u32 invalid_character)
|
||||
{
|
||||
Error err;
|
||||
err.type = errors::Unrecognized_Character;
|
||||
err.invalid_character = invalid_character;
|
||||
err.message = format(
|
||||
"unrecognized charater U+",
|
||||
std::hex, invalid_character,
|
||||
" (char: '", utf8::Print{invalid_character}, "')");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
Error errors::unrecognized_character(u32 invalid_character, Location location)
|
||||
{
|
||||
return unrecognized_character(invalid_character).with(std::move(location));
|
||||
}
|
||||
|
||||
Error errors::unexpected_token(Token::Type expected, Token const& unexpected)
|
||||
{
|
||||
Error err;
|
||||
err.type = errors::Unrecognized_Character;
|
||||
err.invalid_character = invalid_character;
|
||||
err.location = std::move(location);
|
||||
err.type = errors::Unexpected_Token_Type;
|
||||
err.location = unexpected.location;
|
||||
err.message = format("expected ", expected, ", but got ", unexpected.type);
|
||||
return err;
|
||||
}
|
||||
|
||||
Error errors::unexpected_end_of_source(Location location)
|
||||
{
|
||||
Error err;
|
||||
err.type = errors::Unexpected_Empty_Source;
|
||||
err.location = location;
|
||||
return err;
|
||||
}
|
||||
|
||||
void errors::all_tokens_were_not_parsed(std::span<Token> tokens)
|
||||
{
|
||||
error_heading(std::cerr, std::nullopt, Error_Level::Bug);
|
||||
std::cerr << "remaining tokens after parsing. Listing remaining tokens:\n";
|
||||
|
||||
for (auto const& token : tokens) {
|
||||
error_heading(std::cerr, token.location, Error_Level::Notice);
|
||||
std::cerr << token << '\n';
|
||||
}
|
||||
|
||||
std::exit(1);
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include <musique.hh>
|
||||
|
||||
#include <iomanip>
|
||||
|
||||
constexpr std::string_view Notes_Symbols = "abcedefgh";
|
||||
constexpr std::string_view Valid_Operator_Chars =
|
||||
"+-*/:%" // arithmetic
|
||||
@ -241,10 +243,9 @@ std::string_view Lexer::finish()
|
||||
return result;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, Token const&)
|
||||
std::ostream& operator<<(std::ostream& os, Token const& token)
|
||||
{
|
||||
os << "Token";
|
||||
return os;
|
||||
return os << '{' << token.type << ", " << std::quoted(token.source) << ", " << token.location << '}';
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, Token::Type type)
|
||||
|
45
src/main.cc
45
src/main.cc
@ -1,25 +1,42 @@
|
||||
#include <iostream>
|
||||
#include <musique.hh>
|
||||
|
||||
std::string_view Source = R"musique(
|
||||
nums = [ 1 2 3 ]
|
||||
say ( min nums + max nums )
|
||||
)musique";
|
||||
#include <span>
|
||||
|
||||
tl::expected<void, Error> Main()
|
||||
static std::string_view pop(std::span<char const*> &span)
|
||||
{
|
||||
Lexer lexer{Source};
|
||||
lexer.location.filename = "example.mq";
|
||||
|
||||
for (;;) {
|
||||
auto token = Try(lexer.next_token());
|
||||
std::cout << token << '\n';
|
||||
}
|
||||
auto element = span.front();
|
||||
span = span.subspan(1);
|
||||
return element;
|
||||
}
|
||||
|
||||
int main()
|
||||
Result<Unit> Main(std::span<char const*> args)
|
||||
{
|
||||
auto result = Main();
|
||||
while (not args.empty()) {
|
||||
std::string_view arg = pop(args);
|
||||
|
||||
if (arg == "-c" || arg == "--run") {
|
||||
if (args.empty()) {
|
||||
std::cerr << "musique: error: option " << arg << " requires an argument" << std::endl;
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
auto const source = pop(args);
|
||||
Try(Parser::parse(source, "arguments"));
|
||||
std::cout << "successfully parsed: " << source << " \n";
|
||||
continue;
|
||||
}
|
||||
|
||||
std::cerr << "musique: error: unrecognized command line option: " << arg << std::endl;
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
int main(int argc, char const** argv)
|
||||
{
|
||||
auto result = Main(std::span{ argv+1, usize(argc-1) });
|
||||
if (not result.has_value()) {
|
||||
std::cerr << result.error() << std::flush;
|
||||
return 1;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
#include <tl/expected.hpp>
|
||||
#include <variant>
|
||||
@ -23,14 +24,21 @@ using i64 = std::int64_t;
|
||||
using usize = std::size_t;
|
||||
using isize = std::ptrdiff_t;
|
||||
|
||||
struct Unit {};
|
||||
|
||||
#define Fun(Function) ([]<typename ...T>(T&& ...args) { return (Function)(std::forward<T>(args)...); })
|
||||
|
||||
// Error handling mechanism inspired by Andrew Kelly approach, that was implemented
|
||||
// as first class feature in Zig programming language.
|
||||
namespace errors
|
||||
{
|
||||
enum Type
|
||||
{
|
||||
End_Of_File,
|
||||
Unrecognized_Character
|
||||
Unrecognized_Character,
|
||||
|
||||
Unexpected_Token_Type,
|
||||
Unexpected_Empty_Source,
|
||||
};
|
||||
}
|
||||
|
||||
@ -58,36 +66,40 @@ struct Error
|
||||
{
|
||||
errors::Type type;
|
||||
std::optional<Location> location = std::nullopt;
|
||||
u32 invalid_character = 0;
|
||||
std::string message{};
|
||||
|
||||
bool operator==(errors::Type);
|
||||
Error with(Location) &&;
|
||||
};
|
||||
|
||||
namespace errors
|
||||
{
|
||||
Error unrecognized_character(u32 invalid_character);
|
||||
Error unrecognized_character(u32 invalid_character, Location location);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
struct Result : tl::expected<T, Error>
|
||||
{
|
||||
using Storage = tl::expected<T, Error>;
|
||||
|
||||
constexpr Result() = default;
|
||||
constexpr Result(Result const&) = default;
|
||||
constexpr Result(Result&&) = default;
|
||||
constexpr Result& operator=(Result const&) = default;
|
||||
constexpr Result& operator=(Result&&) = default;
|
||||
|
||||
constexpr Result(errors::Type error)
|
||||
: Storage(tl::unexpected(Error { error }))
|
||||
{
|
||||
}
|
||||
|
||||
constexpr Result(Error error)
|
||||
inline Result(Error error)
|
||||
: Storage(tl::unexpected(std::move(error)))
|
||||
{
|
||||
}
|
||||
|
||||
inline Result(tl::unexpected<Error> error)
|
||||
: Storage(std::move(error))
|
||||
{
|
||||
}
|
||||
|
||||
template<typename ...Args>
|
||||
requires std::is_constructible_v<T, Args...>
|
||||
constexpr Result(Args&& ...args)
|
||||
: Storage( T{ std::forward<Args>(args)... } )
|
||||
{
|
||||
@ -230,3 +242,55 @@ struct Lexer
|
||||
// Marks end of token and returns it's matching source
|
||||
std::string_view finish();
|
||||
};
|
||||
|
||||
struct Ast
|
||||
{
|
||||
// Named constructors of AST structure
|
||||
static Ast literal(Token);
|
||||
|
||||
enum class Type
|
||||
{
|
||||
Literal
|
||||
};
|
||||
|
||||
Type type;
|
||||
Token token;
|
||||
};
|
||||
|
||||
template<typename Expected, typename ...T>
|
||||
concept Var_Args = (std::is_same_v<Expected, T> && ...) && (sizeof...(T) >= 1);
|
||||
|
||||
struct Parser
|
||||
{
|
||||
std::vector<Token> tokens;
|
||||
unsigned token_id = 0;
|
||||
|
||||
// Parses whole source code producing Ast or Error
|
||||
// using Parser structure internally
|
||||
static Result<Ast> parse(std::string_view source, std::string_view filename);
|
||||
|
||||
Result<Ast> parse_expression();
|
||||
Result<Ast> parse_binary_operator();
|
||||
Result<Ast> parse_literal();
|
||||
|
||||
Token consume();
|
||||
|
||||
// Tests if current token has given type
|
||||
bool expect(Token::Type type) const;
|
||||
|
||||
// Ensures that current token has one of types given.
|
||||
// Otherwise returns error
|
||||
Result<Unit> ensure(Token::Type type) const;
|
||||
};
|
||||
|
||||
namespace errors
|
||||
{
|
||||
Error unrecognized_character(u32 invalid_character);
|
||||
Error unrecognized_character(u32 invalid_character, Location location);
|
||||
|
||||
Error unexpected_token(Token::Type expected, Token const& unexpected);
|
||||
Error unexpected_end_of_source(Location location);
|
||||
|
||||
[[noreturn]]
|
||||
void all_tokens_were_not_parsed(std::span<Token>);
|
||||
}
|
||||
|
67
src/parser.cc
Normal file
67
src/parser.cc
Normal file
@ -0,0 +1,67 @@
|
||||
#include <musique.hh>
|
||||
|
||||
Result<Ast> Parser::parse(std::string_view source, std::string_view filename)
|
||||
{
|
||||
Lexer lexer{source};
|
||||
lexer.location.filename = filename;
|
||||
Parser parser;
|
||||
|
||||
for (;;) if (auto maybe_token = lexer.next_token(); maybe_token.has_value()) {
|
||||
parser.tokens.emplace_back(*std::move(maybe_token));
|
||||
} else if (maybe_token.error().type == errors::End_Of_File) {
|
||||
break;
|
||||
} else {
|
||||
return std::move(maybe_token).error();
|
||||
}
|
||||
|
||||
auto const result = parser.parse_expression();
|
||||
|
||||
if (parser.token_id < parser.tokens.size()) {
|
||||
errors::all_tokens_were_not_parsed(std::span(parser.tokens).subspan(parser.token_id));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Result<Ast> Parser::parse_expression()
|
||||
{
|
||||
return parse_binary_operator();
|
||||
}
|
||||
|
||||
Result<Ast> Parser::parse_binary_operator()
|
||||
{
|
||||
return parse_literal();
|
||||
}
|
||||
|
||||
Result<Ast> Parser::parse_literal()
|
||||
{
|
||||
Try(ensure(Token::Type::Numeric));
|
||||
return Ast::literal(consume());
|
||||
}
|
||||
|
||||
Token Parser::consume()
|
||||
{
|
||||
return std::move(tokens[token_id++]);
|
||||
}
|
||||
|
||||
bool Parser::expect(Token::Type type) const
|
||||
{
|
||||
return token_id < tokens.size() && tokens[token_id].type == type;
|
||||
}
|
||||
|
||||
Result<Unit> Parser::ensure(Token::Type type) const
|
||||
{
|
||||
return token_id >= tokens.size()
|
||||
? errors::unexpected_end_of_source(tokens.back().location)
|
||||
: tokens[token_id].type != type
|
||||
? errors::unexpected_token(type, tokens[token_id])
|
||||
: Result<Unit>{};
|
||||
}
|
||||
|
||||
Ast Ast::literal(Token token)
|
||||
{
|
||||
Ast ast;
|
||||
ast.type = Ast::Type::Literal;
|
||||
ast.token = std::move(token);
|
||||
return ast;
|
||||
}
|
11
src/tests/parser.cc
Normal file
11
src/tests/parser.cc
Normal file
@ -0,0 +1,11 @@
|
||||
#include <boost/ut.hpp>
|
||||
#include <musique.hh>
|
||||
|
||||
using namespace boost::ut;
|
||||
|
||||
suite parser_test = [] {
|
||||
"Literal parsing"_test = [] {
|
||||
auto result = Parser::parse("1", "test");
|
||||
expect(result.has_value()) << "code was expected to parse, but had not";
|
||||
};
|
||||
};
|
Loading…
Reference in New Issue
Block a user