Token location tracking

This commit is contained in:
Robert Bendun 2022-04-27 14:58:02 +02:00
parent fd4db02e8d
commit 8d0507e341
3 changed files with 77 additions and 11 deletions

View File

@ -11,11 +11,11 @@ auto Lexer::next_token() -> Result<Token>
}
switch (peek()) {
case '(': consume(); return { Token::Type::Open_Paren, finish() };
case ')': consume(); return { Token::Type::Close_Paren, finish() };
case '[': consume(); return { Token::Type::Open_Block, finish() };
case ']': consume(); return { Token::Type::Close_Block, finish() };
case '|': consume(); return { Token::Type::Variable_Separator, finish() };
case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location };
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
case '|': consume(); return { Token::Type::Variable_Separator, finish(), token_location };
}
// Number literals like .75
@ -23,7 +23,7 @@ auto Lexer::next_token() -> Result<Token>
consume();
while (consume_if(unicode::is_digit)) {}
if (token_length != 1)
return { Token::Type::Numeric, finish() };
return { Token::Type::Numeric, finish(), token_location };
}
if (consume_if(unicode::is_digit)) {
@ -38,7 +38,7 @@ auto Lexer::next_token() -> Result<Token>
rewind();
}
}
return { Token::Type::Numeric, finish() };
return { Token::Type::Numeric, finish(), token_location };
}
return errors::Unrecognized_Character;
@ -56,11 +56,13 @@ auto Lexer::peek() const -> u32
auto Lexer::consume() -> u32
{
prev_location = location;
if (not source.empty()) {
if (auto [rune, remaining] = utf8::decode(source); rune != utf8::Rune_Error) {
last_rune_length = remaining.data() - source.data();
source = remaining;
token_length += last_rune_length;
location.advance(rune);
return rune;
}
}
@ -69,14 +71,18 @@ auto Lexer::consume() -> u32
void Lexer::rewind()
{
assert(last_rune_length != 0);
source = { source.data() - last_rune_length, source.size() + last_rune_length };
token_length -= last_rune_length;
location = prev_location;
last_rune_length = 0;
}
void Lexer::start()
{
token_start = source.data();
token_length = 0;
token_location = location;
}
std::string_view Lexer::finish()
@ -92,3 +98,22 @@ std::ostream& operator<<(std::ostream& os, Token const&)
os << "Token";
return os;
}
Location Location::advance(u32 rune)
{
switch (rune) {
case '\n':
line += 1;
[[fallthrough]];
case '\r':
column = 1;
return *this;
}
column += 1;
return *this;
}
std::ostream& operator<<(std::ostream& os, Location const& location)
{
return os << location.filename << ':' << location.line << ':' << location.column;
}

View File

@ -1,8 +1,9 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <string_view>
#include <ostream>
#include <string_view>
#include <tl/expected.hpp>
using u8 = std::uint8_t;
@ -29,6 +30,26 @@ namespace errors
};
}
struct Location
{
std::string_view filename = "<unnamed>";
usize column = 1, line = 1;
Location advance(u32 rune);
bool operator==(Location const& rhs) const = default;
static Location at(usize line, usize column)
{
Location loc;
loc.line = line;
loc.column = column;
return loc;
}
};
std::ostream& operator<<(std::ostream& os, Location const& location);
struct Error
{
errors::Type type;
@ -111,6 +132,7 @@ struct Token
Type type;
std::string_view source;
Location location;
};
std::ostream& operator<<(std::ostream& os, Token const& tok);
@ -125,10 +147,10 @@ struct Lexer
char const* token_start = nullptr;
usize token_length = 0;
Location token_location{};
// Determine location of tokens to produce nice errors
std::string_view source_name = "<unnamed>";
unsigned column = 1, row = 1;
Location prev_location{};
Location location{};
auto next_token() -> Result<Token>;

View File

@ -40,6 +40,18 @@ static void expect_token_type_and_value(
expect_token_type_and_value(expected_type, source, source, sl);
}
static void expect_token_type_and_location(
Token::Type expected_type,
std::string_view source,
Location location,
reflection::source_location const& sl = reflection::source_location::current())
{
Lexer lexer{source};
auto result = lexer.next_token();
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected";
expect(eq(result->location, location), sl) << "tokenized source is at different place then expected";
}
suite lexer_test = [] {
"Empty file"_test = [] {
@ -67,4 +79,11 @@ suite lexer_test = [] {
expect_token_type_and_value(Token::Type::Numeric, " 1 ", "1");
expect_token_type_and_value(Token::Type::Numeric, " 123 ", "123");
};
"Proper location marking"_test = [] {
expect_token_type_and_location(Token::Type::Numeric, "123", Location::at(1, 1));
expect_token_type_and_location(Token::Type::Numeric, " 123", Location::at(1, 4));
expect_token_type_and_location(Token::Type::Numeric, "\n123", Location::at(2, 1));
expect_token_type_and_location(Token::Type::Numeric, "\n 123", Location::at(2, 3));
};
};