Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement token list API #1829

Merged
merged 5 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions ext/rbs_extension/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -2857,9 +2857,29 @@ rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE end_pos)
return pair;
}

static VALUE
rbsparser_lex(VALUE self, VALUE buffer, VALUE end_pos) {
lexstate *lexer = alloc_lexer(buffer, 0, FIX2INT(end_pos));
VALUE results = rb_ary_new();

token token = NullToken;
while (token.type != pEOF) {
token = rbsparser_next_token(lexer);
VALUE type = ID2SYM(rb_intern(token_type_str(token.type)));
VALUE location = rbs_new_location(buffer, token.range);
VALUE pair = rb_ary_new3(2, type, location);
rb_ary_push(results, pair);
}

free(lexer);

return results;
}

void rbs__init_parser(void) {
RBS_Parser = rb_define_class_under(RBS, "Parser", rb_cObject);
rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 5);
rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 5);
rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 2);
rb_define_singleton_method(RBS_Parser, "_lex", rbsparser_lex, 2);
}
7 changes: 6 additions & 1 deletion ext/rbs_extension/parserstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ VALUE comment_to_ruby(comment *com, VALUE buffer) {
);
}

parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variables) {
lexstate *alloc_lexer(VALUE buffer, int start_pos, int end_pos) {
VALUE string = rb_funcall(buffer, rb_intern("content"), 0);

StringValue(string);
Expand All @@ -290,6 +290,11 @@ parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variab
lexer->start = lexer->current;
lexer->first_token_of_line = lexer->current.column == 0;

return lexer;
}

parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variables) {
lexstate *lexer = alloc_lexer(buffer, start_pos, end_pos);
parserstate *parser = calloc(1, sizeof(parserstate));
parser->lexstate = lexer;
parser->buffer = buffer;
Expand Down
9 changes: 9 additions & 0 deletions ext/rbs_extension/parserstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@ void parser_insert_typevar(parserstate *state, ID id);
* */
bool parser_typevar_member(parserstate *state, ID id);

/**
* Allocate new lexstate object.
*
* ```
* alloc_lexer(buffer, 0, 31) // New lexstate with buffer
* ```
* */
lexstate *alloc_lexer(VALUE buffer, int start_pos, int end_pos);

/**
* Allocate new parserstate object.
*
Expand Down
15 changes: 15 additions & 0 deletions lib/rbs/parser/lex_result.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# frozen_string_literal: true

module RBS
class Parser
class LexResult
attr_reader :buffer
attr_reader :value

def initialize(buffer:, value:)
@buffer = buffer
@value = value
end
end
end
end
23 changes: 23 additions & 0 deletions lib/rbs/parser/token.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# frozen_string_literal: true

module RBS
class Parser
class Token
attr_reader :type
attr_reader :location

def initialize(type:, location:)
@type = type
@location = location
end

def value
@location.source
end

def comment?
@type == :tCOMMENT || @type == :tLINECOMMENT
end
end
end
end
12 changes: 12 additions & 0 deletions lib/rbs/parser_aux.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# frozen_string_literal: true

require_relative "parser/lex_result"
require_relative "parser/token"

module RBS
class Parser
def self.parse_type(source, range: 0..., variables: [], require_eof: false)
Expand All @@ -19,6 +22,15 @@ def self.parse_signature(source)
[buf, dirs, decls]
end

def self.lex(source)
buf = buffer(source)
list = _lex(buf, buf.last_position)
value = list.map do |type, location|
Token.new(type: type, location: location)
end
LexResult.new(buffer: buf, value: value)
end

def self.buffer(source)
case source
when String
Expand Down
28 changes: 28 additions & 0 deletions sig/parser.rbs
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
module RBS
class Parser
# Result of `Parser.lex`
class LexResult
attr_reader buffer: Buffer
attr_reader value: Array[Token]

def initialize: (buffer: Buffer, value: Array[Token]) -> void
end

# Represents a token per result of `Parser.lex`.
class Token
attr_reader type: Symbol
attr_reader location: Location[untyped, untyped]

def initialize: (type: Symbol, location: Location[untyped, untyped]) -> void
def value: () -> String
def comment?: () -> bool
end

# Parse a method type and return it
#
# When `range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
Expand Down Expand Up @@ -50,6 +68,14 @@ module RBS
#
def self.parse_signature: (Buffer | String) -> [Buffer, Array[AST::Directives::t], Array[AST::Declarations::t]]

# Parse whole RBS file and return result.
#
# ```ruby
# RBS::Parser.lex("# Comment\nmodule A\nend\n").value.map(&:type)
# # => [:tLINECOMMENT, :kMODULE, :tUIDENT, :kEND, :pEOF]
# ```
def self.lex: (Buffer | String) -> LexResult

KEYWORDS: Hash[String, bot]

private
Expand All @@ -62,6 +88,8 @@ module RBS

def self._parse_signature: (Buffer, Integer end_pos) -> [Array[AST::Directives::t], Array[AST::Declarations::t]]

def self._lex: (Buffer, Integer end_pos) -> Array[[Symbol, Location[untyped, untyped]]]

class LocatedValue
end
end
Expand Down
22 changes: 22 additions & 0 deletions test/rbs/parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -767,4 +767,26 @@ def test_proc__untyped_function_parse_error
RBS::Parser.parse_type("^(?) { (?) -> void } -> Integer")
end
end

def test__lex
content = <<~RBS
# LineComment
class Foo[T < Integer] < Bar # Comment
end
RBS
tokens = RBS::Parser._lex(buffer(content), content.length)
assert_equal [:tLINECOMMENT, '# LineComment', 0...13], tokens[0].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:kCLASS, 'class', 14...19], tokens[1].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tUIDENT, 'Foo', 20...23], tokens[2].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pLBRACKET, '[', 23...24], tokens[3].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tUIDENT, 'T', 24...25], tokens[4].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pLT, '<', 26...27], tokens[5].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tUIDENT, 'Integer', 28...35], tokens[6].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pRBRACKET, ']', 35...36], tokens[7].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pLT, '<', 37...38], tokens[8].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tUIDENT, 'Bar', 39...42], tokens[9].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tCOMMENT, '# Comment', 43...52], tokens[10].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:kEND, 'end', 53...56], tokens[11].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pEOF, '', 57...58], tokens[12].then { |t| [t[0], t[1].source, t[1].range] }
end
end