Token location tracking

This commit is contained in:
Robert Bendun 2022-04-27 14:58:02 +02:00
parent fd4db02e8d
commit 8d0507e341
3 changed files with 77 additions and 11 deletions

View File

@ -11,11 +11,11 @@ auto Lexer::next_token() -> Result<Token>
} }
switch (peek()) { switch (peek()) {
case '(': consume(); return { Token::Type::Open_Paren, finish() }; case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location };
case ')': consume(); return { Token::Type::Close_Paren, finish() }; case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
case '[': consume(); return { Token::Type::Open_Block, finish() }; case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
case ']': consume(); return { Token::Type::Close_Block, finish() }; case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
case '|': consume(); return { Token::Type::Variable_Separator, finish() }; case '|': consume(); return { Token::Type::Variable_Separator, finish(), token_location };
} }
// Number literals like .75 // Number literals like .75
@ -23,7 +23,7 @@ auto Lexer::next_token() -> Result<Token>
consume(); consume();
while (consume_if(unicode::is_digit)) {} while (consume_if(unicode::is_digit)) {}
if (token_length != 1) if (token_length != 1)
return { Token::Type::Numeric, finish() }; return { Token::Type::Numeric, finish(), token_location };
} }
if (consume_if(unicode::is_digit)) { if (consume_if(unicode::is_digit)) {
@ -38,7 +38,7 @@ auto Lexer::next_token() -> Result<Token>
rewind(); rewind();
} }
} }
return { Token::Type::Numeric, finish() }; return { Token::Type::Numeric, finish(), token_location };
} }
return errors::Unrecognized_Character; return errors::Unrecognized_Character;
@ -56,11 +56,13 @@ auto Lexer::peek() const -> u32
auto Lexer::consume() -> u32 auto Lexer::consume() -> u32
{ {
prev_location = location;
if (not source.empty()) { if (not source.empty()) {
if (auto [rune, remaining] = utf8::decode(source); rune != utf8::Rune_Error) { if (auto [rune, remaining] = utf8::decode(source); rune != utf8::Rune_Error) {
last_rune_length = remaining.data() - source.data(); last_rune_length = remaining.data() - source.data();
source = remaining; source = remaining;
token_length += last_rune_length; token_length += last_rune_length;
location.advance(rune);
return rune; return rune;
} }
} }
@ -69,14 +71,18 @@ auto Lexer::consume() -> u32
void Lexer::rewind() void Lexer::rewind()
{ {
assert(last_rune_length != 0);
source = { source.data() - last_rune_length, source.size() + last_rune_length }; source = { source.data() - last_rune_length, source.size() + last_rune_length };
token_length -= last_rune_length; token_length -= last_rune_length;
location = prev_location;
last_rune_length = 0;
} }
void Lexer::start() void Lexer::start()
{ {
token_start = source.data(); token_start = source.data();
token_length = 0; token_length = 0;
token_location = location;
} }
std::string_view Lexer::finish() std::string_view Lexer::finish()
@ -92,3 +98,22 @@ std::ostream& operator<<(std::ostream& os, Token const&)
os << "Token"; os << "Token";
return os; return os;
} }
Location Location::advance(u32 rune)
{
switch (rune) {
case '\n':
line += 1;
[[fallthrough]];
case '\r':
column = 1;
return *this;
}
column += 1;
return *this;
}
std::ostream& operator<<(std::ostream& os, Location const& location)
{
return os << location.filename << ':' << location.line << ':' << location.column;
}

View File

@ -1,8 +1,9 @@
#pragma once #pragma once
#include <cassert>
#include <cstdint> #include <cstdint>
#include <string_view>
#include <ostream> #include <ostream>
#include <string_view>
#include <tl/expected.hpp> #include <tl/expected.hpp>
using u8 = std::uint8_t; using u8 = std::uint8_t;
@ -29,6 +30,26 @@ namespace errors
}; };
} }
struct Location
{
std::string_view filename = "<unnamed>";
usize column = 1, line = 1;
Location advance(u32 rune);
bool operator==(Location const& rhs) const = default;
static Location at(usize line, usize column)
{
Location loc;
loc.line = line;
loc.column = column;
return loc;
}
};
std::ostream& operator<<(std::ostream& os, Location const& location);
struct Error struct Error
{ {
errors::Type type; errors::Type type;
@ -111,6 +132,7 @@ struct Token
Type type; Type type;
std::string_view source; std::string_view source;
Location location;
}; };
std::ostream& operator<<(std::ostream& os, Token const& tok); std::ostream& operator<<(std::ostream& os, Token const& tok);
@ -125,10 +147,10 @@ struct Lexer
char const* token_start = nullptr; char const* token_start = nullptr;
usize token_length = 0; usize token_length = 0;
Location token_location{};
// Determine location of tokens to produce nice errors Location prev_location{};
std::string_view source_name = "<unnamed>"; Location location{};
unsigned column = 1, row = 1;
auto next_token() -> Result<Token>; auto next_token() -> Result<Token>;

View File

@ -40,6 +40,18 @@ static void expect_token_type_and_value(
expect_token_type_and_value(expected_type, source, source, sl); expect_token_type_and_value(expected_type, source, source, sl);
} }
static void expect_token_type_and_location(
Token::Type expected_type,
std::string_view source,
Location location,
reflection::source_location const& sl = reflection::source_location::current())
{
Lexer lexer{source};
auto result = lexer.next_token();
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected";
expect(eq(result->location, location), sl) << "tokenized source is at different place then expected";
}
suite lexer_test = [] { suite lexer_test = [] {
"Empty file"_test = [] { "Empty file"_test = [] {
@ -67,4 +79,11 @@ suite lexer_test = [] {
expect_token_type_and_value(Token::Type::Numeric, " 1 ", "1"); expect_token_type_and_value(Token::Type::Numeric, " 1 ", "1");
expect_token_type_and_value(Token::Type::Numeric, " 123 ", "123"); expect_token_type_and_value(Token::Type::Numeric, " 123 ", "123");
}; };
"Proper location marking"_test = [] {
expect_token_type_and_location(Token::Type::Numeric, "123", Location::at(1, 1));
expect_token_type_and_location(Token::Type::Numeric, " 123", Location::at(1, 4));
expect_token_type_and_location(Token::Type::Numeric, "\n123", Location::at(2, 1));
expect_token_type_and_location(Token::Type::Numeric, "\n 123", Location::at(2, 3));
};
}; };