From 8d0507e341d5afb7563f8529ea10603f32a9b6b7 Mon Sep 17 00:00:00 2001 From: Robert Bendun Date: Wed, 27 Apr 2022 14:58:02 +0200 Subject: [PATCH] Token location tracking --- src/lexer.cc | 39 ++++++++++++++++++++++++++++++++------- src/musique.hh | 30 ++++++++++++++++++++++++++---- src/tests/lex.cc | 19 +++++++++++++++++++ 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/src/lexer.cc b/src/lexer.cc index e80e92c..df5b719 100644 --- a/src/lexer.cc +++ b/src/lexer.cc @@ -11,11 +11,11 @@ auto Lexer::next_token() -> Result } switch (peek()) { - case '(': consume(); return { Token::Type::Open_Paren, finish() }; - case ')': consume(); return { Token::Type::Close_Paren, finish() }; - case '[': consume(); return { Token::Type::Open_Block, finish() }; - case ']': consume(); return { Token::Type::Close_Block, finish() }; - case '|': consume(); return { Token::Type::Variable_Separator, finish() }; + case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location }; + case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location }; + case '[': consume(); return { Token::Type::Open_Block, finish(), token_location }; + case ']': consume(); return { Token::Type::Close_Block, finish(), token_location }; + case '|': consume(); return { Token::Type::Variable_Separator, finish(), token_location }; } // Number literals like .75 @@ -23,7 +23,7 @@ auto Lexer::next_token() -> Result consume(); while (consume_if(unicode::is_digit)) {} if (token_length != 1) - return { Token::Type::Numeric, finish() }; + return { Token::Type::Numeric, finish(), token_location }; } if (consume_if(unicode::is_digit)) { @@ -38,7 +38,7 @@ auto Lexer::next_token() -> Result rewind(); } } - return { Token::Type::Numeric, finish() }; + return { Token::Type::Numeric, finish(), token_location }; } return errors::Unrecognized_Character; @@ -56,11 +56,13 @@ auto Lexer::peek() const -> u32 auto Lexer::consume() -> u32 { + prev_location = location; if (not source.empty()) { if (auto [rune, remaining] = utf8::decode(source); rune != utf8::Rune_Error) { last_rune_length = remaining.data() - source.data(); source = remaining; token_length += last_rune_length; + location.advance(rune); return rune; } } @@ -69,14 +71,18 @@ auto Lexer::consume() -> u32 void Lexer::rewind() { + assert(last_rune_length != 0); source = { source.data() - last_rune_length, source.size() + last_rune_length }; token_length -= last_rune_length; + location = prev_location; + last_rune_length = 0; } void Lexer::start() { token_start = source.data(); token_length = 0; + token_location = location; } std::string_view Lexer::finish() @@ -92,3 +98,22 @@ std::ostream& operator<<(std::ostream& os, Token const&) os << "Token"; return os; } + +Location Location::advance(u32 rune) +{ + switch (rune) { + case '\n': + line += 1; + [[fallthrough]]; + case '\r': + column = 1; + return *this; + } + column += 1; + return *this; +} + +std::ostream& operator<<(std::ostream& os, Location const& location) +{ + return os << location.filename << ':' << location.line << ':' << location.column; +} diff --git a/src/musique.hh b/src/musique.hh index 8572258..852fd14 100644 --- a/src/musique.hh +++ b/src/musique.hh @@ -1,8 +1,9 @@ #pragma once +#include #include -#include #include +#include #include using u8 = std::uint8_t; @@ -29,6 +30,26 @@ namespace errors }; } +struct Location +{ + std::string_view filename = ""; + usize column = 1, line = 1; + + Location advance(u32 rune); + + bool operator==(Location const& rhs) const = default; + + static Location at(usize line, usize column) + { + Location loc; + loc.line = line; + loc.column = column; + return loc; + } +}; + +std::ostream& operator<<(std::ostream& os, Location const& location); + struct Error { errors::Type type; @@ -111,6 +132,7 @@ struct Token Type type; std::string_view source; + Location location; }; std::ostream& operator<<(std::ostream& os, Token const& tok); @@ -125,10 +147,10 @@ struct Lexer char const* token_start = nullptr; usize token_length = 0; + Location token_location{}; - // Determine location of tokens to produce nice errors - std::string_view source_name = ""; - unsigned column = 1, row = 1; + Location prev_location{}; + Location location{}; auto next_token() -> Result; diff --git a/src/tests/lex.cc b/src/tests/lex.cc index 617d78d..2319d11 100644 --- a/src/tests/lex.cc +++ b/src/tests/lex.cc @@ -40,6 +40,18 @@ static void expect_token_type_and_value( expect_token_type_and_value(expected_type, source, source, sl); } +static void expect_token_type_and_location( + Token::Type expected_type, + std::string_view source, + Location location, + reflection::source_location const& sl = reflection::source_location::current()) +{ + Lexer lexer{source}; + auto result = lexer.next_token(); + expect(result.has_value() >> fatal, sl) << "have not parsed any tokens"; + expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected"; + expect(eq(result->location, location), sl) << "tokenized source is at different place then expected"; +} suite lexer_test = [] { "Empty file"_test = [] { @@ -67,4 +79,11 @@ suite lexer_test = [] { expect_token_type_and_value(Token::Type::Numeric, " 1 ", "1"); expect_token_type_and_value(Token::Type::Numeric, " 123 ", "123"); }; + + "Proper location marking"_test = [] { + expect_token_type_and_location(Token::Type::Numeric, "123", Location::at(1, 1)); + expect_token_type_and_location(Token::Type::Numeric, " 123", Location::at(1, 4)); + expect_token_type_and_location(Token::Type::Numeric, "\n123", Location::at(2, 1)); + expect_token_type_and_location(Token::Type::Numeric, "\n 123", Location::at(2, 3)); + }; };