Parser started: basic literal parsing
Mostly this commit implements structure to implement parsing, not actual parsing itself
This commit is contained in:
parent
ead70bda5d
commit
31262e74a0
1
Makefile
1
Makefile
@ -4,6 +4,7 @@ CPPFLAGS:=$(CPPFLAGS) -Ilib/expected/ -Ilib/ut/ -Isrc/
|
|||||||
|
|
||||||
Obj=bin/errors.o \
|
Obj=bin/errors.o \
|
||||||
bin/lexer.o \
|
bin/lexer.o \
|
||||||
|
bin/parser.o \
|
||||||
bin/unicode.o \
|
bin/unicode.o \
|
||||||
bin/unicode_tables.o
|
bin/unicode_tables.o
|
||||||
|
|
||||||
|
@ -1,49 +1,112 @@
|
|||||||
#include <musique.hh>
|
#include <musique.hh>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
bool Error::operator==(errors::Type type)
|
bool Error::operator==(errors::Type type)
|
||||||
{
|
{
|
||||||
return this->type == type;
|
return this->type == type;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, Error const& err)
|
Error Error::with(Location loc) &&
|
||||||
{
|
{
|
||||||
if (err.location) {
|
location = loc;
|
||||||
os << *err.location;
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class Error_Level
|
||||||
|
{
|
||||||
|
Error,
|
||||||
|
Notice,
|
||||||
|
Bug
|
||||||
|
};
|
||||||
|
|
||||||
|
static void error_heading(std::ostream &os, std::optional<Location> location, Error_Level lvl)
|
||||||
|
{
|
||||||
|
if (location) {
|
||||||
|
os << *location;
|
||||||
} else {
|
} else {
|
||||||
os << "musique";
|
os << "musique";
|
||||||
}
|
}
|
||||||
|
|
||||||
os << ": error: ";
|
switch (lvl) {
|
||||||
|
case Error_Level::Bug: os << ": implementation bug: "; return;
|
||||||
|
case Error_Level::Error: os << ": error: "; return;
|
||||||
|
case Error_Level::Notice: os << ": notice: "; return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& os, Error const& err)
|
||||||
|
{
|
||||||
|
error_heading(os, err.location, Error_Level::Error);
|
||||||
|
|
||||||
switch (err.type) {
|
switch (err.type) {
|
||||||
case errors::End_Of_File:
|
case errors::End_Of_File:
|
||||||
return os << "end of file\n";
|
return os << "end of file\n";
|
||||||
|
|
||||||
case errors::Unrecognized_Character:
|
case errors::Unrecognized_Character:
|
||||||
if (err.invalid_character) {
|
return err.message.empty() ? os << "unrecognized character\n" : os << err.message;
|
||||||
return os << "unrecognized charater U+" << std::hex << err.invalid_character
|
|
||||||
<< " (char: '" << utf8::Print{err.invalid_character} << "')\n";
|
case errors::Unexpected_Token_Type:
|
||||||
} else {
|
return os << err.message;
|
||||||
return os << "unrecognized character\n";
|
|
||||||
}
|
case errors::Unexpected_Empty_Source:
|
||||||
|
return os << "unexpected end of input\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
return os << "unrecognized error type\n";
|
return os << "unrecognized error type\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::string format(auto const& ...args)
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
(void) (ss << ... << args);
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
Error errors::unrecognized_character(u32 invalid_character)
|
Error errors::unrecognized_character(u32 invalid_character)
|
||||||
{
|
{
|
||||||
Error err;
|
Error err;
|
||||||
err.type = errors::Unrecognized_Character;
|
err.type = errors::Unrecognized_Character;
|
||||||
err.invalid_character = invalid_character;
|
err.message = format(
|
||||||
|
"unrecognized charater U+",
|
||||||
|
std::hex, invalid_character,
|
||||||
|
" (char: '", utf8::Print{invalid_character}, "')");
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
Error errors::unrecognized_character(u32 invalid_character, Location location)
|
Error errors::unrecognized_character(u32 invalid_character, Location location)
|
||||||
|
{
|
||||||
|
return unrecognized_character(invalid_character).with(std::move(location));
|
||||||
|
}
|
||||||
|
|
||||||
|
Error errors::unexpected_token(Token::Type expected, Token const& unexpected)
|
||||||
{
|
{
|
||||||
Error err;
|
Error err;
|
||||||
err.type = errors::Unrecognized_Character;
|
err.type = errors::Unexpected_Token_Type;
|
||||||
err.invalid_character = invalid_character;
|
err.location = unexpected.location;
|
||||||
err.location = std::move(location);
|
err.message = format("expected ", expected, ", but got ", unexpected.type);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Error errors::unexpected_end_of_source(Location location)
|
||||||
|
{
|
||||||
|
Error err;
|
||||||
|
err.type = errors::Unexpected_Empty_Source;
|
||||||
|
err.location = location;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
void errors::all_tokens_were_not_parsed(std::span<Token> tokens)
|
||||||
|
{
|
||||||
|
error_heading(std::cerr, std::nullopt, Error_Level::Bug);
|
||||||
|
std::cerr << "remaining tokens after parsing. Listing remaining tokens:\n";
|
||||||
|
|
||||||
|
for (auto const& token : tokens) {
|
||||||
|
error_heading(std::cerr, token.location, Error_Level::Notice);
|
||||||
|
std::cerr << token << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
std::exit(1);
|
||||||
|
}
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
#include <musique.hh>
|
#include <musique.hh>
|
||||||
|
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
constexpr std::string_view Notes_Symbols = "abcedefgh";
|
constexpr std::string_view Notes_Symbols = "abcedefgh";
|
||||||
constexpr std::string_view Valid_Operator_Chars =
|
constexpr std::string_view Valid_Operator_Chars =
|
||||||
"+-*/:%" // arithmetic
|
"+-*/:%" // arithmetic
|
||||||
@ -241,10 +243,9 @@ std::string_view Lexer::finish()
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, Token const&)
|
std::ostream& operator<<(std::ostream& os, Token const& token)
|
||||||
{
|
{
|
||||||
os << "Token";
|
return os << '{' << token.type << ", " << std::quoted(token.source) << ", " << token.location << '}';
|
||||||
return os;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, Token::Type type)
|
std::ostream& operator<<(std::ostream& os, Token::Type type)
|
||||||
|
45
src/main.cc
45
src/main.cc
@ -1,25 +1,42 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <musique.hh>
|
#include <musique.hh>
|
||||||
|
|
||||||
std::string_view Source = R"musique(
|
#include <span>
|
||||||
nums = [ 1 2 3 ]
|
|
||||||
say ( min nums + max nums )
|
|
||||||
)musique";
|
|
||||||
|
|
||||||
tl::expected<void, Error> Main()
|
static std::string_view pop(std::span<char const*> &span)
|
||||||
{
|
{
|
||||||
Lexer lexer{Source};
|
auto element = span.front();
|
||||||
lexer.location.filename = "example.mq";
|
span = span.subspan(1);
|
||||||
|
return element;
|
||||||
for (;;) {
|
|
||||||
auto token = Try(lexer.next_token());
|
|
||||||
std::cout << token << '\n';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
Result<Unit> Main(std::span<char const*> args)
|
||||||
{
|
{
|
||||||
auto result = Main();
|
while (not args.empty()) {
|
||||||
|
std::string_view arg = pop(args);
|
||||||
|
|
||||||
|
if (arg == "-c" || arg == "--run") {
|
||||||
|
if (args.empty()) {
|
||||||
|
std::cerr << "musique: error: option " << arg << " requires an argument" << std::endl;
|
||||||
|
std::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const source = pop(args);
|
||||||
|
Try(Parser::parse(source, "arguments"));
|
||||||
|
std::cout << "successfully parsed: " << source << " \n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cerr << "musique: error: unrecognized command line option: " << arg << std::endl;
|
||||||
|
std::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const** argv)
|
||||||
|
{
|
||||||
|
auto result = Main(std::span{ argv+1, usize(argc-1) });
|
||||||
if (not result.has_value()) {
|
if (not result.has_value()) {
|
||||||
std::cerr << result.error() << std::flush;
|
std::cerr << result.error() << std::flush;
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
#include <span>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <tl/expected.hpp>
|
#include <tl/expected.hpp>
|
||||||
#include <variant>
|
#include <variant>
|
||||||
@ -23,14 +24,21 @@ using i64 = std::int64_t;
|
|||||||
using usize = std::size_t;
|
using usize = std::size_t;
|
||||||
using isize = std::ptrdiff_t;
|
using isize = std::ptrdiff_t;
|
||||||
|
|
||||||
|
struct Unit {};
|
||||||
|
|
||||||
#define Fun(Function) ([]<typename ...T>(T&& ...args) { return (Function)(std::forward<T>(args)...); })
|
#define Fun(Function) ([]<typename ...T>(T&& ...args) { return (Function)(std::forward<T>(args)...); })
|
||||||
|
|
||||||
|
// Error handling mechanism inspired by Andrew Kelly approach, that was implemented
|
||||||
|
// as first class feature in Zig programming language.
|
||||||
namespace errors
|
namespace errors
|
||||||
{
|
{
|
||||||
enum Type
|
enum Type
|
||||||
{
|
{
|
||||||
End_Of_File,
|
End_Of_File,
|
||||||
Unrecognized_Character
|
Unrecognized_Character,
|
||||||
|
|
||||||
|
Unexpected_Token_Type,
|
||||||
|
Unexpected_Empty_Source,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -58,36 +66,40 @@ struct Error
|
|||||||
{
|
{
|
||||||
errors::Type type;
|
errors::Type type;
|
||||||
std::optional<Location> location = std::nullopt;
|
std::optional<Location> location = std::nullopt;
|
||||||
u32 invalid_character = 0;
|
std::string message{};
|
||||||
|
|
||||||
bool operator==(errors::Type);
|
bool operator==(errors::Type);
|
||||||
|
Error with(Location) &&;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace errors
|
|
||||||
{
|
|
||||||
Error unrecognized_character(u32 invalid_character);
|
|
||||||
Error unrecognized_character(u32 invalid_character, Location location);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
struct Result : tl::expected<T, Error>
|
struct Result : tl::expected<T, Error>
|
||||||
{
|
{
|
||||||
using Storage = tl::expected<T, Error>;
|
using Storage = tl::expected<T, Error>;
|
||||||
|
|
||||||
constexpr Result() = default;
|
constexpr Result() = default;
|
||||||
|
constexpr Result(Result const&) = default;
|
||||||
|
constexpr Result(Result&&) = default;
|
||||||
|
constexpr Result& operator=(Result const&) = default;
|
||||||
|
constexpr Result& operator=(Result&&) = default;
|
||||||
|
|
||||||
constexpr Result(errors::Type error)
|
constexpr Result(errors::Type error)
|
||||||
: Storage(tl::unexpected(Error { error }))
|
: Storage(tl::unexpected(Error { error }))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr Result(Error error)
|
inline Result(Error error)
|
||||||
: Storage(tl::unexpected(std::move(error)))
|
: Storage(tl::unexpected(std::move(error)))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline Result(tl::unexpected<Error> error)
|
||||||
|
: Storage(std::move(error))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
template<typename ...Args>
|
template<typename ...Args>
|
||||||
|
requires std::is_constructible_v<T, Args...>
|
||||||
constexpr Result(Args&& ...args)
|
constexpr Result(Args&& ...args)
|
||||||
: Storage( T{ std::forward<Args>(args)... } )
|
: Storage( T{ std::forward<Args>(args)... } )
|
||||||
{
|
{
|
||||||
@ -230,3 +242,55 @@ struct Lexer
|
|||||||
// Marks end of token and returns it's matching source
|
// Marks end of token and returns it's matching source
|
||||||
std::string_view finish();
|
std::string_view finish();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Ast
|
||||||
|
{
|
||||||
|
// Named constructors of AST structure
|
||||||
|
static Ast literal(Token);
|
||||||
|
|
||||||
|
enum class Type
|
||||||
|
{
|
||||||
|
Literal
|
||||||
|
};
|
||||||
|
|
||||||
|
Type type;
|
||||||
|
Token token;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Expected, typename ...T>
|
||||||
|
concept Var_Args = (std::is_same_v<Expected, T> && ...) && (sizeof...(T) >= 1);
|
||||||
|
|
||||||
|
struct Parser
|
||||||
|
{
|
||||||
|
std::vector<Token> tokens;
|
||||||
|
unsigned token_id = 0;
|
||||||
|
|
||||||
|
// Parses whole source code producing Ast or Error
|
||||||
|
// using Parser structure internally
|
||||||
|
static Result<Ast> parse(std::string_view source, std::string_view filename);
|
||||||
|
|
||||||
|
Result<Ast> parse_expression();
|
||||||
|
Result<Ast> parse_binary_operator();
|
||||||
|
Result<Ast> parse_literal();
|
||||||
|
|
||||||
|
Token consume();
|
||||||
|
|
||||||
|
// Tests if current token has given type
|
||||||
|
bool expect(Token::Type type) const;
|
||||||
|
|
||||||
|
// Ensures that current token has one of types given.
|
||||||
|
// Otherwise returns error
|
||||||
|
Result<Unit> ensure(Token::Type type) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace errors
|
||||||
|
{
|
||||||
|
Error unrecognized_character(u32 invalid_character);
|
||||||
|
Error unrecognized_character(u32 invalid_character, Location location);
|
||||||
|
|
||||||
|
Error unexpected_token(Token::Type expected, Token const& unexpected);
|
||||||
|
Error unexpected_end_of_source(Location location);
|
||||||
|
|
||||||
|
[[noreturn]]
|
||||||
|
void all_tokens_were_not_parsed(std::span<Token>);
|
||||||
|
}
|
||||||
|
67
src/parser.cc
Normal file
67
src/parser.cc
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
#include <musique.hh>
|
||||||
|
|
||||||
|
Result<Ast> Parser::parse(std::string_view source, std::string_view filename)
|
||||||
|
{
|
||||||
|
Lexer lexer{source};
|
||||||
|
lexer.location.filename = filename;
|
||||||
|
Parser parser;
|
||||||
|
|
||||||
|
for (;;) if (auto maybe_token = lexer.next_token(); maybe_token.has_value()) {
|
||||||
|
parser.tokens.emplace_back(*std::move(maybe_token));
|
||||||
|
} else if (maybe_token.error().type == errors::End_Of_File) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
return std::move(maybe_token).error();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const result = parser.parse_expression();
|
||||||
|
|
||||||
|
if (parser.token_id < parser.tokens.size()) {
|
||||||
|
errors::all_tokens_were_not_parsed(std::span(parser.tokens).subspan(parser.token_id));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
Result<Ast> Parser::parse_expression()
|
||||||
|
{
|
||||||
|
return parse_binary_operator();
|
||||||
|
}
|
||||||
|
|
||||||
|
Result<Ast> Parser::parse_binary_operator()
|
||||||
|
{
|
||||||
|
return parse_literal();
|
||||||
|
}
|
||||||
|
|
||||||
|
Result<Ast> Parser::parse_literal()
|
||||||
|
{
|
||||||
|
Try(ensure(Token::Type::Numeric));
|
||||||
|
return Ast::literal(consume());
|
||||||
|
}
|
||||||
|
|
||||||
|
Token Parser::consume()
|
||||||
|
{
|
||||||
|
return std::move(tokens[token_id++]);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Parser::expect(Token::Type type) const
|
||||||
|
{
|
||||||
|
return token_id < tokens.size() && tokens[token_id].type == type;
|
||||||
|
}
|
||||||
|
|
||||||
|
Result<Unit> Parser::ensure(Token::Type type) const
|
||||||
|
{
|
||||||
|
return token_id >= tokens.size()
|
||||||
|
? errors::unexpected_end_of_source(tokens.back().location)
|
||||||
|
: tokens[token_id].type != type
|
||||||
|
? errors::unexpected_token(type, tokens[token_id])
|
||||||
|
: Result<Unit>{};
|
||||||
|
}
|
||||||
|
|
||||||
|
Ast Ast::literal(Token token)
|
||||||
|
{
|
||||||
|
Ast ast;
|
||||||
|
ast.type = Ast::Type::Literal;
|
||||||
|
ast.token = std::move(token);
|
||||||
|
return ast;
|
||||||
|
}
|
11
src/tests/parser.cc
Normal file
11
src/tests/parser.cc
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#include <boost/ut.hpp>
|
||||||
|
#include <musique.hh>
|
||||||
|
|
||||||
|
using namespace boost::ut;
|
||||||
|
|
||||||
|
suite parser_test = [] {
|
||||||
|
"Literal parsing"_test = [] {
|
||||||
|
auto result = Parser::parse("1", "test");
|
||||||
|
expect(result.has_value()) << "code was expected to parse, but had not";
|
||||||
|
};
|
||||||
|
};
|
Loading…
Reference in New Issue
Block a user