diff --git a/+UnitTest/+tokernizer/TokenizeTests.m b/+UnitTest/+tokernizer/TokenizeTests.m new file mode 100644 index 0000000..74e7d3b --- /dev/null +++ b/+UnitTest/+tokernizer/TokenizeTests.m @@ -0,0 +1,73 @@ +classdef TokenizeTests < matlab.unittest.TestCase + %TOKENIZETESTS Tests for tokenize_code + + methods(Test) + function testDoubleQuote(obj) + %TESTDOUBLEQUOTE Tests a double quoted string + + % Input data for the test + input_str = '"test"'; % String: "test" + + % Construct expected output for comparison + expected = Token('string', input_str, 1, 1); + + % Get actual output + actual = tokenize_code(input_str); + + % Compare actual output with expected output + obj.verifyEqual(actual, expected); + end + + function testSoloDoubleQuote(obj) + %TESTSOLODOUBLEQUOTE Tests a string with only a double quoted + + % Input data for the test + input_str = 'output = "test"'; % String: output = 'test' + + % Construct expected output for comparison + expected(1) = Token('identifier', 'output', 1, 1); + expected(2) = Token('space', ' ', 1, 7); + expected(3) = Token('punctuation', '=', 1, 8); + expected(4) = Token('space', ' ', 1, 9); + expected(5) = Token('string', '"test"', 1, 10); + + % Get actual output + actual = tokenize_code(input_str); + + % Compare actual output with expected output + obj.verifyEqual(actual, expected); + end + + function testNestedQuote(obj) + %TESTNESTEDQUOTE Tests a double quote inside single quote + + % Input data for the test + input_str = '"let''s go"'; % String: "let's go" + + % Construct expected output for comparison + expected = Token('string', input_str, 1, 1); + + % Get actual output + actual = tokenize_code(input_str); + + % Compare actual output with expected output + obj.verifyEqual(actual, expected); + end + + function testNestedQuote2(obj) + %TESTNESTEDQUOTE2 Tests a double quote inside single quote + + % Input data for the test + input_str = '''He said, "hi"'''; % String: 'He said, "hi"' + + % Construct expected output for comparison + expected = Token('string', input_str, 1, 1); + + % Get actual output + actual = tokenize_code(input_str); + + % Compare actual output with expected output + obj.verifyEqual(actual, expected); + end + end +end \ No newline at end of file diff --git a/run_unittests.m b/run_unittests.m new file mode 100644 index 0000000..7f7723d --- /dev/null +++ b/run_unittests.m @@ -0,0 +1,28 @@ +function run_unittests() + %RUN_UNITTESTS Runs all unit tests + + import matlab.unittest.TestSuite + import matlab.unittest.TestRunner + + try + % Create a test suite + suite = ... + TestSuite.fromPackage('UnitTest', ... + 'IncludingSubpackages', true); + + % Run all tests + runner = TestRunner.withTextOutput; + result = runner.run(suite); + + % Display results + disp(table(result)); + disp(result); + + % Throw an error if any test failed + if sum([result(:).Failed]) + sum([result(:).Incomplete]) > 0 + error('There are failing unittests!') + end + catch err + disp(err.getReport) + end +end diff --git a/tokenize_code.m b/tokenize_code.m index fffc0b6..dd5d628 100644 --- a/tokenize_code.m +++ b/tokenize_code.m @@ -126,22 +126,35 @@ % strings and transpose begin with `'`. The `.'` operator has % already been handled above: elseif letter == '''' - is_first_symbol = false; - previous = tokenlist(end); - % transpose operator: - % To differentiate the start of a string from the transpose - % operator, we need to check whether the previous token was a - % value or an operator. If a value, `'` means transpose. If an - % operator, `'` marks the start of a string. - if previous.isEqual('pair', {'}' ']' ')'}) || ... - previous.hasType({'identifier' 'number' 'property'}) - pos = pos + 1; - add_token('punctuation', letter); - % strings: - else - string = skip_string(); + % the first symbol cannot be transpose, so must be string + if is_first_symbol + string = skip_string(''''); add_token('string', string); + else + previous = tokenlist(end); + + % transpose operator: + % To differentiate the start of a string from the + % transpose operator, we need to check whether the + % previous token was a value or an operator. If a value, + % `'` means transpose. If an operator, `'` marks the start + % of a string. + if previous.isEqual('pair', {'}' ']' ')'}) || ... + previous.hasType({'identifier' 'number' 'property'}) + pos = pos + 1; + add_token('punctuation', letter); + % strings: + else + string = skip_string(''''); + add_token('string', string); + end end + is_first_symbol = false; + % string that starts with double quotes (") + elseif letter == '"' + is_first_symbol = false; + string = skip_string('"'); + add_token('string', string); % we don't make any distinction between different kinds of parens: elseif any(letter == open_pairs) is_first_symbol = false; @@ -246,18 +259,20 @@ function add_token(token_type, token_text) string = source_code(string_start:pos-1); end - function string = skip_string() + function string = skip_string(quote_type) %SKIP_STRING skips to the end of the string and returns the STRING - % the STRING includes both quotation marks. + % the STRING includes both quotation marks. QUOTE_TYPE is the + % type of quote character to look for (' or "). % this modifies POS! string_start = pos; while true - if source_code(pos) ~= '''' || pos == string_start + if source_code(pos) ~= quote_type || pos == string_start pos = pos + 1; - elseif length(source_code) > pos && source_code(pos+1) == '''' + elseif length(source_code) > pos ... + && source_code(pos+1) == quote_type pos = pos + 2; - else % source_code(pos) == '''' + else % source_code(pos) == quote_type pos = pos + 1; break; end @@ -302,7 +317,10 @@ function parse_command() letter = source_code(pos); % commands can contain literal strings: if letter == '''' - string_literal = skip_string(); + string_literal = skip_string(''''); + add_token('string', string_literal); + elseif letter == '"' + string_literal = skip_string('"'); add_token('string', string_literal); % commands can contain spaces: elseif any(letter == spaces)