Token location tracking
This commit is contained in:
parent
fd4db02e8d
commit
8d0507e341
39
src/lexer.cc
39
src/lexer.cc
@ -11,11 +11,11 @@ auto Lexer::next_token() -> Result<Token>
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch (peek()) {
|
switch (peek()) {
|
||||||
case '(': consume(); return { Token::Type::Open_Paren, finish() };
|
case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location };
|
||||||
case ')': consume(); return { Token::Type::Close_Paren, finish() };
|
case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location };
|
||||||
case '[': consume(); return { Token::Type::Open_Block, finish() };
|
case '[': consume(); return { Token::Type::Open_Block, finish(), token_location };
|
||||||
case ']': consume(); return { Token::Type::Close_Block, finish() };
|
case ']': consume(); return { Token::Type::Close_Block, finish(), token_location };
|
||||||
case '|': consume(); return { Token::Type::Variable_Separator, finish() };
|
case '|': consume(); return { Token::Type::Variable_Separator, finish(), token_location };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Number literals like .75
|
// Number literals like .75
|
||||||
@ -23,7 +23,7 @@ auto Lexer::next_token() -> Result<Token>
|
|||||||
consume();
|
consume();
|
||||||
while (consume_if(unicode::is_digit)) {}
|
while (consume_if(unicode::is_digit)) {}
|
||||||
if (token_length != 1)
|
if (token_length != 1)
|
||||||
return { Token::Type::Numeric, finish() };
|
return { Token::Type::Numeric, finish(), token_location };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (consume_if(unicode::is_digit)) {
|
if (consume_if(unicode::is_digit)) {
|
||||||
@ -38,7 +38,7 @@ auto Lexer::next_token() -> Result<Token>
|
|||||||
rewind();
|
rewind();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return { Token::Type::Numeric, finish() };
|
return { Token::Type::Numeric, finish(), token_location };
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors::Unrecognized_Character;
|
return errors::Unrecognized_Character;
|
||||||
@ -56,11 +56,13 @@ auto Lexer::peek() const -> u32
|
|||||||
|
|
||||||
auto Lexer::consume() -> u32
|
auto Lexer::consume() -> u32
|
||||||
{
|
{
|
||||||
|
prev_location = location;
|
||||||
if (not source.empty()) {
|
if (not source.empty()) {
|
||||||
if (auto [rune, remaining] = utf8::decode(source); rune != utf8::Rune_Error) {
|
if (auto [rune, remaining] = utf8::decode(source); rune != utf8::Rune_Error) {
|
||||||
last_rune_length = remaining.data() - source.data();
|
last_rune_length = remaining.data() - source.data();
|
||||||
source = remaining;
|
source = remaining;
|
||||||
token_length += last_rune_length;
|
token_length += last_rune_length;
|
||||||
|
location.advance(rune);
|
||||||
return rune;
|
return rune;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -69,14 +71,18 @@ auto Lexer::consume() -> u32
|
|||||||
|
|
||||||
void Lexer::rewind()
|
void Lexer::rewind()
|
||||||
{
|
{
|
||||||
|
assert(last_rune_length != 0);
|
||||||
source = { source.data() - last_rune_length, source.size() + last_rune_length };
|
source = { source.data() - last_rune_length, source.size() + last_rune_length };
|
||||||
token_length -= last_rune_length;
|
token_length -= last_rune_length;
|
||||||
|
location = prev_location;
|
||||||
|
last_rune_length = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Lexer::start()
|
void Lexer::start()
|
||||||
{
|
{
|
||||||
token_start = source.data();
|
token_start = source.data();
|
||||||
token_length = 0;
|
token_length = 0;
|
||||||
|
token_location = location;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string_view Lexer::finish()
|
std::string_view Lexer::finish()
|
||||||
@ -92,3 +98,22 @@ std::ostream& operator<<(std::ostream& os, Token const&)
|
|||||||
os << "Token";
|
os << "Token";
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Location Location::advance(u32 rune)
|
||||||
|
{
|
||||||
|
switch (rune) {
|
||||||
|
case '\n':
|
||||||
|
line += 1;
|
||||||
|
[[fallthrough]];
|
||||||
|
case '\r':
|
||||||
|
column = 1;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
column += 1;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& os, Location const& location)
|
||||||
|
{
|
||||||
|
return os << location.filename << ':' << location.line << ':' << location.column;
|
||||||
|
}
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <string_view>
|
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
#include <string_view>
|
||||||
#include <tl/expected.hpp>
|
#include <tl/expected.hpp>
|
||||||
|
|
||||||
using u8 = std::uint8_t;
|
using u8 = std::uint8_t;
|
||||||
@ -29,6 +30,26 @@ namespace errors
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Location
|
||||||
|
{
|
||||||
|
std::string_view filename = "<unnamed>";
|
||||||
|
usize column = 1, line = 1;
|
||||||
|
|
||||||
|
Location advance(u32 rune);
|
||||||
|
|
||||||
|
bool operator==(Location const& rhs) const = default;
|
||||||
|
|
||||||
|
static Location at(usize line, usize column)
|
||||||
|
{
|
||||||
|
Location loc;
|
||||||
|
loc.line = line;
|
||||||
|
loc.column = column;
|
||||||
|
return loc;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& os, Location const& location);
|
||||||
|
|
||||||
struct Error
|
struct Error
|
||||||
{
|
{
|
||||||
errors::Type type;
|
errors::Type type;
|
||||||
@ -111,6 +132,7 @@ struct Token
|
|||||||
|
|
||||||
Type type;
|
Type type;
|
||||||
std::string_view source;
|
std::string_view source;
|
||||||
|
Location location;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, Token const& tok);
|
std::ostream& operator<<(std::ostream& os, Token const& tok);
|
||||||
@ -125,10 +147,10 @@ struct Lexer
|
|||||||
|
|
||||||
char const* token_start = nullptr;
|
char const* token_start = nullptr;
|
||||||
usize token_length = 0;
|
usize token_length = 0;
|
||||||
|
Location token_location{};
|
||||||
|
|
||||||
// Determine location of tokens to produce nice errors
|
Location prev_location{};
|
||||||
std::string_view source_name = "<unnamed>";
|
Location location{};
|
||||||
unsigned column = 1, row = 1;
|
|
||||||
|
|
||||||
auto next_token() -> Result<Token>;
|
auto next_token() -> Result<Token>;
|
||||||
|
|
||||||
|
@ -40,6 +40,18 @@ static void expect_token_type_and_value(
|
|||||||
expect_token_type_and_value(expected_type, source, source, sl);
|
expect_token_type_and_value(expected_type, source, source, sl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void expect_token_type_and_location(
|
||||||
|
Token::Type expected_type,
|
||||||
|
std::string_view source,
|
||||||
|
Location location,
|
||||||
|
reflection::source_location const& sl = reflection::source_location::current())
|
||||||
|
{
|
||||||
|
Lexer lexer{source};
|
||||||
|
auto result = lexer.next_token();
|
||||||
|
expect(result.has_value() >> fatal, sl) << "have not parsed any tokens";
|
||||||
|
expect(eq(under(result->type), under(expected_type)), sl) << "different token type then expected";
|
||||||
|
expect(eq(result->location, location), sl) << "tokenized source is at different place then expected";
|
||||||
|
}
|
||||||
|
|
||||||
suite lexer_test = [] {
|
suite lexer_test = [] {
|
||||||
"Empty file"_test = [] {
|
"Empty file"_test = [] {
|
||||||
@ -67,4 +79,11 @@ suite lexer_test = [] {
|
|||||||
expect_token_type_and_value(Token::Type::Numeric, " 1 ", "1");
|
expect_token_type_and_value(Token::Type::Numeric, " 1 ", "1");
|
||||||
expect_token_type_and_value(Token::Type::Numeric, " 123 ", "123");
|
expect_token_type_and_value(Token::Type::Numeric, " 123 ", "123");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
"Proper location marking"_test = [] {
|
||||||
|
expect_token_type_and_location(Token::Type::Numeric, "123", Location::at(1, 1));
|
||||||
|
expect_token_type_and_location(Token::Type::Numeric, " 123", Location::at(1, 4));
|
||||||
|
expect_token_type_and_location(Token::Type::Numeric, "\n123", Location::at(2, 1));
|
||||||
|
expect_token_type_and_location(Token::Type::Numeric, "\n 123", Location::at(2, 3));
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user