From df1f90d8279fde6fddfee48a9540a0f43457a864 Mon Sep 17 00:00:00 2001 From: Robert Bendun Date: Sat, 7 May 2022 18:20:22 +0200 Subject: [PATCH] Added expression separator tokenization Closes #5 --- src/lexer.cc | 33 ++++++++++++++++------------- src/musique.hh | 3 +++ src/tests/lex.cc | 54 +++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 71 insertions(+), 19 deletions(-) diff --git a/src/lexer.cc b/src/lexer.cc index 0cb1d72..9553d15 100644 --- a/src/lexer.cc +++ b/src/lexer.cc @@ -60,13 +60,17 @@ auto Lexer::next_token() -> Result } switch (peek()) { - case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location }; - case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location }; - case '[': consume(); return { Token::Type::Open_Block, finish(), token_location }; - case ']': consume(); return { Token::Type::Close_Block, finish(), token_location }; + case '(': consume(); return { Token::Type::Open_Paren, finish(), token_location }; + case ')': consume(); return { Token::Type::Close_Paren, finish(), token_location }; + case '[': consume(); return { Token::Type::Open_Block, finish(), token_location }; + case ']': consume(); return { Token::Type::Close_Block, finish(), token_location }; + case ';': consume(); return { Token::Type::Expression_Separator, finish(), token_location }; + case '|': consume(); - // We explicitly allow for `|foo|=0` here + // `|` may be part of operator, like `||`. So we need to check what follows. If next char + // is operator, then this character is part of operator sequence. + // Additionally we explicitly allow for `|foo|=0` here if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=') return { Token::Type::Variable_Separator, finish(), token_location }; } @@ -218,15 +222,16 @@ std::ostream& operator<<(std::ostream& os, Token const&) std::ostream& operator<<(std::ostream& os, Token::Type type) { switch (type) { - case Token::Type::Open_Block: return os << "OPEN BLOCK"; - case Token::Type::Close_Block: return os << "CLOSE BLOCK"; - case Token::Type::Open_Paren: return os << "OPEN PAREN"; - case Token::Type::Close_Paren: return os << "CLOSE PAREN"; - case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR"; - case Token::Type::Chord: return os << "CHORD"; - case Token::Type::Numeric: return os << "NUMERIC"; - case Token::Type::Symbol: return os << "SYMBOL"; - case Token::Type::Operator: return os << "OPERATOR"; + case Token::Type::Open_Block: return os << "OPEN BLOCK"; + case Token::Type::Close_Block: return os << "CLOSE BLOCK"; + case Token::Type::Open_Paren: return os << "OPEN PAREN"; + case Token::Type::Close_Paren: return os << "CLOSE PAREN"; + case Token::Type::Variable_Separator: return os << "VARIABLE SEPARATOR"; + case Token::Type::Chord: return os << "CHORD"; + case Token::Type::Numeric: return os << "NUMERIC"; + case Token::Type::Symbol: return os << "SYMBOL"; + case Token::Type::Operator: return os << "OPERATOR"; + case Token::Type::Expression_Separator: return os << "EXPRESSION SEPARATOR"; } assert(false && "exhaustive handling of Token::Type enumeration"); diff --git a/src/musique.hh b/src/musique.hh index 1d600b2..ea59765 100644 --- a/src/musique.hh +++ b/src/musique.hh @@ -162,6 +162,9 @@ struct Token // "|" separaters arguments from block body, and provides variable introduction syntax Variable_Separator, + // ";" separates expressions. Used to separate calls, like `foo 1 2; bar 3 4` + Expression_Separator, + // "[" and "]", delimit anonymous block of code (potentially a function) Open_Block, Close_Block, diff --git a/src/tests/lex.cc b/src/tests/lex.cc index 8a18c8b..50ea1fa 100644 --- a/src/tests/lex.cc +++ b/src/tests/lex.cc @@ -63,6 +63,29 @@ static void expect_empty_file( } } +template +static void expect_token_sequence( + std::string_view source, + std::array const& expected_tokens, + reflection::source_location const& sl = reflection::source_location::current()) +{ + Lexer lexer{source}; + + for (Token const& expected : expected_tokens) { + auto const result = lexer.next_token(); + expect(result.has_value(), sl) << "expected token, received nothing"; + + if (result.has_value()) { + expect(eq(result->type, expected.type)) << "different token type then expected"; + expect(eq(result->source, expected.source)) << "different token source then expected"; + expect(eq(result->location, expected.location)) << "different token location then expected"; + } + } + + auto const result = lexer.next_token(); + expect(not result.has_value(), sl) << "more tokens then expected"; +} + suite lexer_test = [] { "Empty file"_test = [] { expect_empty_file(""); @@ -80,11 +103,12 @@ suite lexer_test = [] { }; "Simple token types"_test = [] { - expect_token_type(Token::Type::Close_Block, "]"); - expect_token_type(Token::Type::Close_Paren, ")"); - expect_token_type(Token::Type::Open_Block, "["); - expect_token_type(Token::Type::Open_Paren, "("); - expect_token_type(Token::Type::Variable_Separator, "|"); + expect_token_type(Token::Type::Close_Block, "]"); + expect_token_type(Token::Type::Close_Paren, ")"); + expect_token_type(Token::Type::Open_Block, "["); + expect_token_type(Token::Type::Open_Paren, "("); + expect_token_type(Token::Type::Variable_Separator, "|"); + expect_token_type(Token::Type::Expression_Separator, ";"); }; "Numeric tokens"_test = [] { @@ -143,4 +167,24 @@ suite lexer_test = [] { expect_token_type_and_value(Token::Type::Operator, "v"); expect_token_type_and_value(Token::Type::Operator, "%"); }; + + "Multiple tokens"_test = [] { + Location l; + + expect_token_sequence("1 + foo", std::array { + Token { Token::Type::Numeric, "1", l.at(1, 1) }, + Token { Token::Type::Operator, "+", l.at(1, 3) }, + Token { Token::Type::Symbol, "foo", l.at(1, 5) } + }); + + expect_token_sequence("foo 1 2; bar 3 4", std::array { + Token { Token::Type::Symbol, "foo", l.at(1, 1) }, + Token { Token::Type::Numeric, "1", l.at(1, 5) }, + Token { Token::Type::Numeric, "2", l.at(1, 7) }, + Token { Token::Type::Expression_Separator, ";", l.at(1, 8) }, + Token { Token::Type::Symbol, "bar", l.at(1, 10) }, + Token { Token::Type::Numeric, "3", l.at(1, 14) }, + Token { Token::Type::Numeric, "4", l.at(1, 16) } + }); + }; };