Added expression separator tokenization

Closes #5
2022-05-07 18:20:22 +02:00 · 2022-05-07 18:20:22 +02:00 · df1f90d827
commit df1f90d827
parent 3b4c306199
3 changed files with 71 additions and 19 deletions
--- a/src/lexer.cc
+++ b/src/lexer.cc
@ -64,9 +64,13 @@ auto Lexer::next_token() -> Result<Token>
 	case ')': consume(); return { Token::Type::Close_Paren,          finish(), token_location };
 	case '[': consume(); return { Token::Type::Open_Block,           finish(), token_location };
 	case ']': consume(); return { Token::Type::Close_Block,          finish(), token_location };
 	case ';': consume(); return { Token::Type::Expression_Separator, finish(), token_location };
 	case '|':
 		consume();
-		// We explicitly allow for `|foo|=0` here
+		// `|` may be part of operator, like `||`. So we need to check what follows. If next char
 		// is operator, then this character is part of operator sequence.
 		// Additionally we explicitly allow for `|foo|=0` here
 		if (Valid_Operator_Chars.find(peek()) == std::string_view::npos || peek() == '=')
 			return { Token::Type::Variable_Separator, finish(), token_location };
 	}
@ -227,6 +231,7 @@ std::ostream& operator<<(std::ostream& os, Token::Type type)
 	case Token::Type::Numeric:              return os << "NUMERIC";
 	case Token::Type::Symbol:               return os << "SYMBOL";
 	case Token::Type::Operator:             return os << "OPERATOR";
 	case Token::Type::Expression_Separator: return os << "EXPRESSION SEPARATOR";
 	}
 	assert(false && "exhaustive handling of Token::Type enumeration");
--- a/src/musique.hh
+++ b/src/musique.hh
@ -162,6 +162,9 @@ struct Token
 		// "|" separaters arguments from block body, and provides variable introduction syntax
 		Variable_Separator,
 		// ";" separates expressions. Used to separate calls, like `foo 1 2; bar 3 4`
 		Expression_Separator,
 		// "[" and "]", delimit anonymous block of code (potentially a function)
 		Open_Block,
 		Close_Block,
--- a/src/tests/lex.cc
+++ b/src/tests/lex.cc
@ -63,6 +63,29 @@ static void expect_empty_file(
 		}
 }
 template<auto N>
 static void expect_token_sequence(
 		std::string_view source,
 		std::array<Token, N> const& expected_tokens,
 		reflection::source_location const& sl = reflection::source_location::current())
 {
 	Lexer lexer{source};
 	for (Token const& expected : expected_tokens) {
 		auto const result = lexer.next_token();
 		expect(result.has_value(), sl)                  << "expected token, received nothing";
 		if (result.has_value()) {
 			expect(eq(result->type, expected.type))         << "different token type then expected";
 			expect(eq(result->source, expected.source))     << "different token source then expected";
 			expect(eq(result->location, expected.location)) << "different token location then expected";
 		}
 	}
 	auto const result = lexer.next_token();
 	expect(not result.has_value(), sl) << "more tokens then expected";
 }
 suite lexer_test = [] {
 	"Empty file"_test = [] {
 		expect_empty_file("");
@ -85,6 +108,7 @@ suite lexer_test = [] {
 		expect_token_type(Token::Type::Open_Block,           "[");
 		expect_token_type(Token::Type::Open_Paren,           "(");
 		expect_token_type(Token::Type::Variable_Separator,   "|");
 		expect_token_type(Token::Type::Expression_Separator, ";");
 	};
 	"Numeric tokens"_test = [] {
@ -143,4 +167,24 @@ suite lexer_test = [] {
 		expect_token_type_and_value(Token::Type::Operator, "v");
 		expect_token_type_and_value(Token::Type::Operator, "%");
 	};
 	"Multiple tokens"_test = [] {
 		Location l;
 		expect_token_sequence("1 + foo", std::array {
 			Token { Token::Type::Numeric,  "1",   l.at(1, 1) },
 			Token { Token::Type::Operator, "+",   l.at(1, 3) },
 			Token { Token::Type::Symbol,   "foo", l.at(1, 5) }
 		});
 		expect_token_sequence("foo 1 2; bar 3 4", std::array {
 			Token { Token::Type::Symbol,               "foo", l.at(1,  1) },
 			Token { Token::Type::Numeric,              "1",   l.at(1,  5) },
 			Token { Token::Type::Numeric,              "2",   l.at(1,  7) },
 			Token { Token::Type::Expression_Separator, ";",   l.at(1,  8) },
 			Token { Token::Type::Symbol,               "bar", l.at(1, 10) },
 			Token { Token::Type::Numeric,              "3",   l.at(1, 14) },
 			Token { Token::Type::Numeric,              "4",   l.at(1, 16) }
 		});
 	};
 };