diff --git a/src/lexer.cc b/src/lexer.cc index 37b50d8..0cb1d72 100644 --- a/src/lexer.cc +++ b/src/lexer.cc @@ -7,9 +7,52 @@ constexpr std::string_view Valid_Operator_Chars = "<>=!" // comparisons ; +void Lexer::skip_whitespace_and_comments() +{ + for (;;) { + bool done_something = false; + + while (consume_if(unicode::is_space)) { + done_something = true; + } + + // #! line comments + if (consume_if('#', '!')) { + done_something = true; + while (peek() && peek() != '\n') { + consume(); + } + } + + // -- line and multiline coments + if (consume_if('-', '-')) { + done_something = true; + if (consume_if('-')) { + // multiline + unsigned count = 0; + while (count < 3) if (consume_if('-')) { + ++count; + } else { + consume(); + count = 0; + } + while (consume_if('-')) {} + } else { + // single line + while (peek() && peek() != '\n') { + consume(); + } + } + } + + if (not done_something) + break; + } +} + auto Lexer::next_token() -> Result { - while (consume_if(unicode::is_space)) {} + skip_whitespace_and_comments(); start(); if (peek() == 0) { diff --git a/src/musique.hh b/src/musique.hh index 803bba9..1d600b2 100644 --- a/src/musique.hh +++ b/src/musique.hh @@ -196,6 +196,9 @@ struct Lexer auto next_token() -> Result; + // Utility function for next_token() + void skip_whitespace_and_comments(); + // Finds next rune in source auto peek() const -> u32; @@ -218,6 +221,18 @@ struct Lexer return condition && (consume(), true); } + inline auto consume_if(auto first, auto second) -> bool + { + if (consume_if(first)) { + if (consume_if(second)) { + return true; + } else { + rewind(); + } + } + return false; + } + // Goes back last rune void rewind(); diff --git a/src/tests/lex.cc b/src/tests/lex.cc index 5ebfa29..8a18c8b 100644 --- a/src/tests/lex.cc +++ b/src/tests/lex.cc @@ -51,12 +51,32 @@ static void expect_token_type_and_location( expect(eq(result->location, location), sl) << "tokenized source is at different place then expected"; } +static void expect_empty_file( + std::string_view source, + reflection::source_location const& sl = reflection::source_location::current()) +{ + Lexer lexer{source}; + auto result = lexer.next_token(); + expect(!result.has_value(), sl) << "could not produce any tokens from empty file"; + if (not result.has_value()) { + expect(result.error() == errors::End_Of_File, sl) << "could not produce any tokens from empty file"; + } +} + suite lexer_test = [] { "Empty file"_test = [] { - Lexer lexer{""}; - auto result = lexer.next_token(); - expect(!result.has_value() >> fatal) << "could not produce any tokens from empty file"; - expect(result.error() == errors::End_Of_File) << "could not produce any tokens from empty file"; + expect_empty_file(""); + }; + + "Comments"_test = [] { + expect_empty_file("#!/bin/sh"); + expect_empty_file("-- line comment"); + expect_token_type_and_value(Token::Type::Numeric, "--- block comment --- 0", "0"); + expect_empty_file(R"musique( + --- hello + multiline comment + --- + )musique"); }; "Simple token types"_test = [] {