From c2125304986a5e6706b93b3e13ae061e058c95b1 Mon Sep 17 00:00:00 2001 From: Vinicius Stock Date: Tue, 12 Apr 2022 16:27:09 -0400 Subject: [PATCH 1/2] Add column position to Location --- lib/syntax_tree/node.rb | 61 ++++--- lib/syntax_tree/parser.rb | 331 ++++++++++++++++++++++++++------------ test/node_test.rb | 19 ++- 3 files changed, 281 insertions(+), 130 deletions(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 9081d977..f940499a 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -3,13 +3,15 @@ module SyntaxTree # Represents the location of a node in the tree from the source code. class Location - attr_reader :start_line, :start_char, :end_line, :end_char + attr_reader :start_line, :start_char, :start_column, :end_line, :end_char, :end_column - def initialize(start_line:, start_char:, end_line:, end_char:) + def initialize(start_line:, start_char:, start_column:, end_line:, end_char:, end_column:) @start_line = start_line @start_char = start_char + @start_column = start_column @end_line = end_line @end_char = end_char + @end_column = end_column end def lines @@ -26,22 +28,26 @@ def to(other) Location.new( start_line: start_line, start_char: start_char, + start_column: start_column, end_line: [end_line, other.end_line].max, - end_char: other.end_char + end_char: other.end_char, + end_column: other.end_column ) end - def self.token(line:, char:, size:) + def self.token(line:, char:, column:, size:) new( start_line: line, start_char: char, + start_column: column, end_line: line, - end_char: char + size + end_char: char + size, + end_column: column + size ) end - def self.fixed(line:, char:) - new(start_line: line, start_char: char, end_line: line, end_char: char) + def self.fixed(line:, char:, column:) + new(start_line: line, start_char: char, start_column: column, end_line: line, end_char: char, end_column: column) end end @@ -2047,13 +2053,15 @@ def initialize( @comments = comments end - def bind(start_char, end_char) + def bind(start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, start_char: start_char, + start_column: start_column, end_line: location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) parts = [rescue_clause, else_clause, ensure_clause] @@ -2062,14 +2070,17 @@ def bind(start_char, end_char) consequent = parts.compact.first statements.bind( start_char, - consequent ? consequent.location.start_char : end_char + start_column, + consequent ? consequent.location.start_char : end_char, + consequent ? consequent.location.start_column : end_column ) # Next we're going to determine the rescue clause if there is one if rescue_clause consequent = parts.drop(1).compact.first rescue_clause.bind_end( - consequent ? consequent.location.start_char : end_char + consequent ? consequent.location.start_char : end_char, + consequent ? consequent.location.start_column : end_column ) end end @@ -8413,20 +8424,22 @@ def initialize( @comments = comments end - def bind_end(end_char) + def bind_end(end_char, end_column) @location = Location.new( start_line: location.start_line, start_char: location.start_char, + start_column: location.start_column, end_line: location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) if consequent - consequent.bind_end(end_char) - statements.bind_end(consequent.location.start_char) + consequent.bind_end(end_char, end_column) + statements.bind_end(consequent.location.start_char, consequent.location.start_column) else - statements.bind_end(end_char) + statements.bind_end(end_char, end_column) end end @@ -8885,13 +8898,15 @@ def initialize(parser, body:, location:, comments: []) @comments = comments end - def bind(start_char, end_char) + def bind(start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, start_char: start_char, + start_column: start_column, end_line: location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) if body[0].is_a?(VoidStmt) @@ -8900,8 +8915,10 @@ def bind(start_char, end_char) Location.new( start_line: location.start_line, start_char: start_char, + start_column: start_column, end_line: location.end_line, - end_char: start_char + end_char: start_char, + end_column: end_column ) body[0] = VoidStmt.new(location: location) @@ -8910,13 +8927,15 @@ def bind(start_char, end_char) attach_comments(start_char, end_char) end - def bind_end(end_char) + def bind_end(end_char, end_column) @location = Location.new( start_line: location.start_line, start_char: location.start_char, + start_column: location.start_column, end_line: location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) end diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 5bd89dc2..6bc27d5c 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -163,6 +163,12 @@ def char_pos line_counts[lineno - 1][column] end + # This represents the current column we're in relative to the beginning of + # the current line. + def current_column + column - line_counts[lineno - 1].start + end + # As we build up a list of tokens, we'll periodically need to go backwards # and find the ones that we've already hit in order to determine the # location information for nodes that use them. For example, if you have a @@ -251,10 +257,13 @@ def find_next_statement_start(position) def on_BEGIN(statements) lbrace = find_token(LBrace) rbrace = find_token(RBrace) + start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( - find_next_statement_start(lbrace.location.end_char), - rbrace.location.start_char + start_char, + start_char - line_counts[lbrace.location.start_line - 1].start, + rbrace.location.start_char, + rbrace.location.start_column, ) keyword = find_token(Kw, "BEGIN") @@ -271,7 +280,7 @@ def on_BEGIN(statements) def on_CHAR(value) CHAR.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -280,10 +289,13 @@ def on_CHAR(value) def on_END(statements) lbrace = find_token(LBrace) rbrace = find_token(RBrace) + start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( - find_next_statement_start(lbrace.location.end_char), - rbrace.location.start_char + start_char, + start_char - line_counts[lbrace.location.start_line - 1].start, + rbrace.location.start_char, + rbrace.location.start_column ) keyword = find_token(Kw, "END") @@ -301,7 +313,7 @@ def on___end__(value) @__end__ = EndContent.new( value: source[(char_pos + value.length)..-1], - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -465,7 +477,7 @@ def on_args_forward # :call-seq: # on_args_new: () -> Args def on_args_new - Args.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + Args.new(parts: [], location: Location.fixed(line: lineno, column: current_column, char: char_pos)) end # :call-seq: @@ -551,7 +563,7 @@ def on_assoc_splat(value) def on_backref(value) Backref.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -561,7 +573,7 @@ def on_backtick(value) node = Backtick.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -592,15 +604,20 @@ def on_begin(bodystmt) PinnedBegin.new(statement: bodystmt, location: location) else keyword = find_token(Kw, "begin") - end_char = + end_location = if bodystmt.rescue_clause || bodystmt.ensure_clause || bodystmt.else_clause - bodystmt.location.end_char + bodystmt.location else - find_token(Kw, "end").location.end_char + find_token(Kw, "end").location end - bodystmt.bind(keyword.location.end_char, end_char) + bodystmt.bind( + keyword.location.end_char, + keyword.location.end_column, + end_location.end_char, + end_location.end_column + ) location = keyword.location.to(bodystmt.location) Begin.new(bodystmt: bodystmt, location: location) @@ -685,7 +702,7 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) else_keyword: else_clause && find_token(Kw, "else"), else_clause: else_clause, ensure_clause: ensure_clause, - location: Location.fixed(line: lineno, char: char_pos) + location: Location.fixed(line: lineno, char: char_pos, column: current_column) ) end @@ -697,18 +714,24 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) def on_brace_block(block_var, statements) lbrace = find_token(LBrace) rbrace = find_token(RBrace) + location = (block_var || lbrace).location + start_char = find_next_statement_start(location.end_char) statements.bind( - find_next_statement_start((block_var || lbrace).location.end_char), - rbrace.location.start_char + start_char, + start_char - line_counts[location.start_line - 1].start, + rbrace.location.start_char, + rbrace.location.start_column ) location = Location.new( start_line: lbrace.location.start_line, start_char: lbrace.location.start_char, + start_column: lbrace.location.start_column, end_line: [rbrace.location.end_line, statements.location.end_line].max, - end_char: rbrace.location.end_char + end_char: rbrace.location.end_char, + end_column: rbrace.location.end_column ) BraceBlock.new( @@ -782,10 +805,14 @@ def on_case(value, consequent) def on_class(constant, superclass, bodystmt) beginning = find_token(Kw, "class") ending = find_token(Kw, "end") + location = (superclass || constant).location + start_char = find_next_statement_start(location.end_char) bodystmt.bind( - find_next_statement_start((superclass || constant).location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) ClassDeclaration.new( @@ -802,7 +829,7 @@ def on_comma(value) node = Comma.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -847,7 +874,7 @@ def on_comment(value) value: value.chomp, inline: value.strip != lines[line - 1].strip, location: - Location.token(line: line, char: char_pos, size: value.size - 1) + Location.token(line: line, char: char_pos, column: current_column, size: value.size - 1) ) @comments << comment @@ -859,7 +886,7 @@ def on_comment(value) def on_const(value) Const.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -894,7 +921,7 @@ def on_const_ref(constant) def on_cvar(value) CVar.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -918,12 +945,15 @@ def on_def(name, params, bodystmt) # location information if params.is_a?(Params) && params.empty? end_char = name.location.end_char + end_column = name.location.end_column location = Location.new( start_line: params.location.start_line, start_char: end_char, + start_column: end_column, end_line: params.location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) params = Params.new(location: location) @@ -933,9 +963,13 @@ def on_def(name, params, bodystmt) if ending tokens.delete(ending) + start_char = find_next_statement_start(params.location.end_char) + bodystmt.bind( - find_next_statement_start(params.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[params.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) Def.new( @@ -993,12 +1027,15 @@ def on_defs(target, operator, name, params, bodystmt) # location information if params.is_a?(Params) && params.empty? end_char = name.location.end_char + end_column = name.location.end_column location = Location.new( start_line: params.location.start_line, start_char: end_char, + start_column: end_column, end_line: params.location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) params = Params.new(location: location) @@ -1009,9 +1046,13 @@ def on_defs(target, operator, name, params, bodystmt) if ending tokens.delete(ending) + start_char = find_next_statement_start(params.location.end_char) + bodystmt.bind( - find_next_statement_start(params.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[params.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) Defs.new( @@ -1043,10 +1084,14 @@ def on_defs(target, operator, name, params, bodystmt) def on_do_block(block_var, bodystmt) beginning = find_token(Kw, "do") ending = find_token(Kw, "end") + location = (block_var || beginning).location + start_char = find_next_statement_start(location.end_char) bodystmt.bind( - find_next_statement_start((block_var || beginning).location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) DoBlock.new( @@ -1128,10 +1173,13 @@ def on_else(statements) node = tokens[index] ending = node.value == "end" ? tokens.delete_at(index) : node + start_char = find_next_statement_start(keyword.location.end_char) statements.bind( - find_next_statement_start(keyword.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[keyword.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) Else.new( @@ -1151,7 +1199,12 @@ def on_elsif(predicate, statements, consequent) beginning = find_token(Kw, "elsif") ending = consequent || find_token(Kw, "end") - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) Elsif.new( predicate: predicate, @@ -1174,7 +1227,7 @@ def on_embdoc_beg(value) @embdoc = EmbDoc.new( value: value, - location: Location.fixed(line: lineno, char: char_pos) + location: Location.fixed(line: lineno, column: current_column, char: char_pos) ) end @@ -1189,8 +1242,10 @@ def on_embdoc_end(value) Location.new( start_line: location.start_line, start_char: location.start_char, + start_column: location.start_column, end_line: lineno, - end_char: char_pos + value.length - 1 + end_char: char_pos + value.length - 1, + end_column: current_column + value.length - 1 ) ) @@ -1206,7 +1261,7 @@ def on_embexpr_beg(value) node = EmbExprBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1219,7 +1274,7 @@ def on_embexpr_end(value) node = EmbExprEnd.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1232,7 +1287,7 @@ def on_embvar(value) node = EmbVar.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1247,9 +1302,12 @@ def on_ensure(statements) # We don't want to consume the :@kw event, because that would break # def..ensure..end chains. ending = find_token(Kw, "end", consume: false) + start_char = find_next_statement_start(keyword.location.end_char) statements.bind( - find_next_statement_start(keyword.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[keyword.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) Ensure.new( @@ -1296,7 +1354,7 @@ def on_field(parent, operator, name) def on_float(value) FloatLiteral.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1341,7 +1399,9 @@ def on_for(index, collection, statements) statements.bind( (keyword || collection).location.end_char, - ending.location.start_char + (keyword || collection).location.end_column, + ending.location.start_char, + ending.location.start_column ) if index.is_a?(MLHS) @@ -1362,7 +1422,7 @@ def on_for(index, collection, statements) def on_gvar(value) GVar.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1383,7 +1443,7 @@ def on_hash(assocs) # on_heredoc_beg: (String value) -> HeredocBeg def on_heredoc_beg(value) location = - Location.token(line: lineno, char: char_pos, size: value.size + 1) + Location.token(line: lineno, char: char_pos, column: current_column, size: value.size + 1) # Here we're going to artificially create an extra node type so that if # there are comments after the declaration of a heredoc, they get printed. @@ -1419,8 +1479,10 @@ def on_heredoc_end(value) Location.new( start_line: heredoc.location.start_line, start_char: heredoc.location.start_char, + start_column: heredoc.location.start_column, end_line: lineno, - end_char: char_pos + end_char: char_pos, + end_column: current_column, ) ) end @@ -1447,7 +1509,7 @@ def on_hshptn(constant, keywords, keyword_rest) def on_ident(value) Ident.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1461,7 +1523,12 @@ def on_if(predicate, statements, consequent) beginning = find_token(Kw, "if") ending = consequent || find_token(Kw, "end") - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) If.new( predicate: predicate, @@ -1507,7 +1574,7 @@ def on_if_mod(predicate, statement) def on_imaginary(value) Imaginary.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1531,9 +1598,12 @@ def on_in(pattern, statements, consequent) statements_start = token end + start_char = find_next_statement_start(statements_start.location.end_char) statements.bind( - find_next_statement_start(statements_start.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[statements_start.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) In.new( @@ -1549,7 +1619,7 @@ def on_in(pattern, statements, consequent) def on_int(value) Int.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1558,7 +1628,7 @@ def on_int(value) def on_ivar(value) IVar.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1568,7 +1638,7 @@ def on_kw(value) node = Kw.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1589,7 +1659,7 @@ def on_kwrest_param(name) def on_label(value) Label.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1599,7 +1669,7 @@ def on_label_end(value) node = LabelEnd.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1625,7 +1695,12 @@ def on_lambda(params, statements) closing = find_token(Kw, "end") end - statements.bind(opening.location.end_char, closing.location.start_char) + statements.bind( + opening.location.end_char, + opening.location.end_column, + closing.location.start_char, + closing.location.start_column + ) Lambda.new( params: params, @@ -1640,7 +1715,7 @@ def on_lbrace(value) node = LBrace.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1653,7 +1728,7 @@ def on_lbracket(value) node = LBracket.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1666,7 +1741,7 @@ def on_lparen(value) node = LParen.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1765,7 +1840,7 @@ def on_mlhs_add_star(mlhs, part) # :call-seq: # on_mlhs_new: () -> MLHS def on_mlhs_new - MLHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + MLHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column)) end # :call-seq: @@ -1791,10 +1866,13 @@ def on_mlhs_paren(contents) def on_module(constant, bodystmt) beginning = find_token(Kw, "module") ending = find_token(Kw, "end") + start_char = find_next_statement_start(constant.location.end_char) bodystmt.bind( - find_next_statement_start(constant.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[constant.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) ModuleDeclaration.new( @@ -1807,7 +1885,7 @@ def on_module(constant, bodystmt) # :call-seq: # on_mrhs_new: () -> MRHS def on_mrhs_new - MRHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + MRHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column)) end # :call-seq: @@ -1876,7 +1954,7 @@ def on_op(value) node = Op.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1935,7 +2013,7 @@ def on_params( if parts.any? parts[0].location.to(parts[-1].location) else - Location.fixed(line: lineno, char: char_pos) + Location.fixed(line: lineno, char: char_pos, column: current_column) end Params.new( @@ -1958,12 +2036,15 @@ def on_paren(contents) if contents && contents.is_a?(Params) location = contents.location + start_char = find_next_statement_start(lparen.location.end_char) location = Location.new( start_line: location.start_line, - start_char: find_next_statement_start(lparen.location.end_char), + start_char: start_char, + start_column: start_char - line_counts[lparen.location.start_line - 1].start, end_line: location.end_line, - end_char: rparen.location.start_char + end_char: rparen.location.start_char, + end_column: rparen.location.start_column ) contents = @@ -2001,23 +2082,26 @@ def on_parse_error(error, *) def on_period(value) Period.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end # :call-seq: # on_program: (Statements statements) -> Program def on_program(statements) + last_column = source.length - line_counts[lines.length - 1].start location = Location.new( start_line: 1, start_char: 0, + start_column: 0, end_line: lines.length, - end_char: source.length + end_char: source.length, + end_column: last_column ) statements.body << @__end__ if @__end__ - statements.bind(0, source.length) + statements.bind(0, 0, source.length, last_column) program = Program.new(statements: statements, location: location) attach_comments(program, @comments) @@ -2130,7 +2214,7 @@ def on_qsymbols_beg(value) node = QSymbolsBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2165,7 +2249,7 @@ def on_qwords_beg(value) node = QWordsBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2185,7 +2269,7 @@ def on_qwords_new def on_rational(value) RationalLiteral.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -2195,7 +2279,7 @@ def on_rbrace(value) node = RBrace.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2208,7 +2292,7 @@ def on_rbracket(value) node = RBracket.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2242,7 +2326,7 @@ def on_regexp_beg(value) node = RegexpBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2254,7 +2338,7 @@ def on_regexp_beg(value) def on_regexp_end(value) RegexpEnd.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -2296,9 +2380,12 @@ def on_rescue(exceptions, variable, statements, consequent) exceptions = exceptions[0] if exceptions.is_a?(Array) last_node = variable || exceptions || keyword + start_char = find_next_statement_start(last_node.location.end_char) statements.bind( - find_next_statement_start(last_node.location.end_char), - char_pos + start_char, + start_char - line_counts[last_node.location.start_line - 1].start, + char_pos, + current_column ) # We add an additional inner node here that ripper doesn't provide so that @@ -2313,8 +2400,10 @@ def on_rescue(exceptions, variable, statements, consequent) Location.new( start_line: keyword.location.start_line, start_char: keyword.location.end_char + 1, + start_column: keyword.location.end_column + 1, end_line: last_node.location.end_line, - end_char: last_node.location.end_char + end_char: last_node.location.end_char, + end_column: last_node.location.end_column ) ) end @@ -2328,8 +2417,10 @@ def on_rescue(exceptions, variable, statements, consequent) Location.new( start_line: keyword.location.start_line, start_char: keyword.location.start_char, + start_column: keyword.location.start_column, end_line: lineno, - end_char: char_pos + end_char: char_pos, + end_column: current_column ) ) end @@ -2388,7 +2479,7 @@ def on_rparen(value) node = RParen.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2400,10 +2491,13 @@ def on_rparen(value) def on_sclass(target, bodystmt) beginning = find_token(Kw, "class") ending = find_token(Kw, "end") + start_char = find_next_statement_start(target.location.end_char) bodystmt.bind( - find_next_statement_start(target.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[target.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) SClass.new( @@ -2442,7 +2536,7 @@ def on_stmts_new Statements.new( self, body: [], - location: Location.fixed(line: lineno, char: char_pos) + location: Location.fixed(line: lineno, char: char_pos, column: current_column) ) end @@ -2476,7 +2570,7 @@ def on_string_concat(left, right) def on_string_content StringContent.new( parts: [], - location: Location.fixed(line: lineno, char: char_pos) + location: Location.fixed(line: lineno, char: char_pos, column: current_column) ) end @@ -2499,18 +2593,22 @@ def on_string_embexpr(statements) statements.bind( embexpr_beg.location.end_char, - embexpr_end.location.start_char + embexpr_beg.location.end_column, + embexpr_end.location.start_char, + embexpr_end.location.start_column ) location = Location.new( start_line: embexpr_beg.location.start_line, start_char: embexpr_beg.location.start_char, + start_column: embexpr_beg.location.start_column, end_line: [ embexpr_end.location.end_line, statements.location.end_line ].max, - end_char: embexpr_end.location.end_char + end_char: embexpr_end.location.end_char, + end_column: embexpr_end.location.end_column ) StringEmbExpr.new(statements: statements, location: location) @@ -2538,11 +2636,13 @@ def on_string_literal(string) Location.new( start_line: tstring_beg.location.start_line, start_char: tstring_beg.location.start_char, + start_column: tstring_beg.location.start_column, end_line: [ tstring_end.location.end_line, string.location.end_line ].max, - end_char: tstring_end.location.end_char + end_char: tstring_end.location.end_char, + end_column: tstring_end.location.end_column ) StringLiteral.new( @@ -2571,7 +2671,7 @@ def on_symbeg(value) node = SymBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2625,7 +2725,7 @@ def on_symbols_beg(value) node = SymbolsBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2650,7 +2750,7 @@ def on_tlambda(value) node = TLambda.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2663,7 +2763,7 @@ def on_tlambeg(value) node = TLamBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2698,7 +2798,7 @@ def on_tstring_beg(value) node = TStringBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2710,7 +2810,7 @@ def on_tstring_beg(value) def on_tstring_content(value) TStringContent.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -2720,7 +2820,7 @@ def on_tstring_end(value) node = TStringEnd.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2794,7 +2894,12 @@ def on_unless(predicate, statements, consequent) beginning = find_token(Kw, "unless") ending = consequent || find_token(Kw, "end") - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) Unless.new( predicate: predicate, @@ -2831,7 +2936,12 @@ def on_until(predicate, statements) end # Update the Statements location information - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) Until.new( predicate: predicate, @@ -2875,7 +2985,7 @@ def on_var_field(value) else # You can hit this pattern if you're assigning to a splat using # pattern matching syntax in Ruby 2.7+ - Location.fixed(line: lineno, char: char_pos) + Location.fixed(line: lineno, char: char_pos, column: current_column) end VarField.new(value: value, location: location) @@ -2903,7 +3013,7 @@ def on_vcall(ident) # :call-seq: # on_void_stmt: () -> VoidStmt def on_void_stmt - VoidStmt.new(location: Location.fixed(line: lineno, char: char_pos)) + VoidStmt.new(location: Location.fixed(line: lineno, char: char_pos, column: current_column)) end # :call-seq: @@ -2922,9 +3032,13 @@ def on_when(arguments, statements, consequent) statements_start = token end + start_char = find_next_statement_start(statements_start.location.end_char) + statements.bind( - find_next_statement_start(statements_start.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[statements_start.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) When.new( @@ -2950,7 +3064,12 @@ def on_while(predicate, statements) end # Update the Statements location information - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) While.new( predicate: predicate, @@ -2986,7 +3105,7 @@ def on_word_add(word, part) # :call-seq: # on_word_new: () -> Word def on_word_new - Word.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + Word.new(parts: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column)) end # :call-seq: @@ -3005,7 +3124,7 @@ def on_words_beg(value) node = WordsBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node diff --git a/test/node_test.rb b/test/node_test.rb index 9c29f79d..4831081b 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -999,14 +999,26 @@ def test_zsuper assert_node(ZSuper, "zsuper", "super") end + def test_column_positions + source = <<~SOURCE + puts 'Hello' + puts 'Goodbye' + SOURCE + + at = location(lines: 2..2, chars: 13..27, columns: 0..14) + assert_node(Command, "command", source, at: at) + end + private - def location(lines: 1..1, chars: 0..0) + def location(lines: 1..1, chars: 0..0, columns: 0..0) Location.new( start_line: lines.begin, start_char: chars.begin, + start_column: columns.begin, end_line: lines.end, - end_char: chars.end + end_char: chars.end, + end_column: columns.end ) end @@ -1014,7 +1026,8 @@ def assert_node(kind, type, source, at: nil) at ||= location( lines: 1..[1, source.count("\n")].max, - chars: 0..source.chomp.size + chars: 0..source.chomp.size, + columns: 0..source.chomp.size ) # Parse the example, get the outputted parse tree, and assert that it was From 48ce3753499f3ad510c0234539c6c23f4ecfc784 Mon Sep 17 00:00:00 2001 From: Vinicius Stock Date: Tue, 12 Apr 2022 16:44:43 -0400 Subject: [PATCH 2/2] Handle multibyte strings --- lib/syntax_tree/parser.rb | 3 ++- test/node_test.rb | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 6bc27d5c..60923b57 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -166,7 +166,8 @@ def char_pos # This represents the current column we're in relative to the beginning of # the current line. def current_column - column - line_counts[lineno - 1].start + line = line_counts[lineno - 1] + line[column].to_i - line.start end # As we build up a list of tokens, we'll periodically need to go backwards diff --git a/test/node_test.rb b/test/node_test.rb index 4831081b..e412d648 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -1009,6 +1009,16 @@ def test_column_positions assert_node(Command, "command", source, at: at) end + def test_multibyte_column_positions + source = <<~SOURCE + puts "Congrats" + puts "🎉 🎉" + SOURCE + + at = location(lines: 2..2, chars: 16..26, columns: 0..10) + assert_node(Command, "command", source, at: at) + end + private def location(lines: 1..1, chars: 0..0, columns: 0..0)