diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 9081d977..f940499a 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -3,13 +3,15 @@ module SyntaxTree # Represents the location of a node in the tree from the source code. class Location - attr_reader :start_line, :start_char, :end_line, :end_char + attr_reader :start_line, :start_char, :start_column, :end_line, :end_char, :end_column - def initialize(start_line:, start_char:, end_line:, end_char:) + def initialize(start_line:, start_char:, start_column:, end_line:, end_char:, end_column:) @start_line = start_line @start_char = start_char + @start_column = start_column @end_line = end_line @end_char = end_char + @end_column = end_column end def lines @@ -26,22 +28,26 @@ def to(other) Location.new( start_line: start_line, start_char: start_char, + start_column: start_column, end_line: [end_line, other.end_line].max, - end_char: other.end_char + end_char: other.end_char, + end_column: other.end_column ) end - def self.token(line:, char:, size:) + def self.token(line:, char:, column:, size:) new( start_line: line, start_char: char, + start_column: column, end_line: line, - end_char: char + size + end_char: char + size, + end_column: column + size ) end - def self.fixed(line:, char:) - new(start_line: line, start_char: char, end_line: line, end_char: char) + def self.fixed(line:, char:, column:) + new(start_line: line, start_char: char, start_column: column, end_line: line, end_char: char, end_column: column) end end @@ -2047,13 +2053,15 @@ def initialize( @comments = comments end - def bind(start_char, end_char) + def bind(start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, start_char: start_char, + start_column: start_column, end_line: location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) parts = [rescue_clause, else_clause, ensure_clause] @@ -2062,14 +2070,17 @@ def bind(start_char, end_char) consequent = parts.compact.first statements.bind( start_char, - consequent ? consequent.location.start_char : end_char + start_column, + consequent ? consequent.location.start_char : end_char, + consequent ? consequent.location.start_column : end_column ) # Next we're going to determine the rescue clause if there is one if rescue_clause consequent = parts.drop(1).compact.first rescue_clause.bind_end( - consequent ? consequent.location.start_char : end_char + consequent ? consequent.location.start_char : end_char, + consequent ? consequent.location.start_column : end_column ) end end @@ -8413,20 +8424,22 @@ def initialize( @comments = comments end - def bind_end(end_char) + def bind_end(end_char, end_column) @location = Location.new( start_line: location.start_line, start_char: location.start_char, + start_column: location.start_column, end_line: location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) if consequent - consequent.bind_end(end_char) - statements.bind_end(consequent.location.start_char) + consequent.bind_end(end_char, end_column) + statements.bind_end(consequent.location.start_char, consequent.location.start_column) else - statements.bind_end(end_char) + statements.bind_end(end_char, end_column) end end @@ -8885,13 +8898,15 @@ def initialize(parser, body:, location:, comments: []) @comments = comments end - def bind(start_char, end_char) + def bind(start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, start_char: start_char, + start_column: start_column, end_line: location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) if body[0].is_a?(VoidStmt) @@ -8900,8 +8915,10 @@ def bind(start_char, end_char) Location.new( start_line: location.start_line, start_char: start_char, + start_column: start_column, end_line: location.end_line, - end_char: start_char + end_char: start_char, + end_column: end_column ) body[0] = VoidStmt.new(location: location) @@ -8910,13 +8927,15 @@ def bind(start_char, end_char) attach_comments(start_char, end_char) end - def bind_end(end_char) + def bind_end(end_char, end_column) @location = Location.new( start_line: location.start_line, start_char: location.start_char, + start_column: location.start_column, end_line: location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) end diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 5bd89dc2..60923b57 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -163,6 +163,13 @@ def char_pos line_counts[lineno - 1][column] end + # This represents the current column we're in relative to the beginning of + # the current line. + def current_column + line = line_counts[lineno - 1] + line[column].to_i - line.start + end + # As we build up a list of tokens, we'll periodically need to go backwards # and find the ones that we've already hit in order to determine the # location information for nodes that use them. For example, if you have a @@ -251,10 +258,13 @@ def find_next_statement_start(position) def on_BEGIN(statements) lbrace = find_token(LBrace) rbrace = find_token(RBrace) + start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( - find_next_statement_start(lbrace.location.end_char), - rbrace.location.start_char + start_char, + start_char - line_counts[lbrace.location.start_line - 1].start, + rbrace.location.start_char, + rbrace.location.start_column, ) keyword = find_token(Kw, "BEGIN") @@ -271,7 +281,7 @@ def on_BEGIN(statements) def on_CHAR(value) CHAR.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -280,10 +290,13 @@ def on_CHAR(value) def on_END(statements) lbrace = find_token(LBrace) rbrace = find_token(RBrace) + start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( - find_next_statement_start(lbrace.location.end_char), - rbrace.location.start_char + start_char, + start_char - line_counts[lbrace.location.start_line - 1].start, + rbrace.location.start_char, + rbrace.location.start_column ) keyword = find_token(Kw, "END") @@ -301,7 +314,7 @@ def on___end__(value) @__end__ = EndContent.new( value: source[(char_pos + value.length)..-1], - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -465,7 +478,7 @@ def on_args_forward # :call-seq: # on_args_new: () -> Args def on_args_new - Args.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + Args.new(parts: [], location: Location.fixed(line: lineno, column: current_column, char: char_pos)) end # :call-seq: @@ -551,7 +564,7 @@ def on_assoc_splat(value) def on_backref(value) Backref.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -561,7 +574,7 @@ def on_backtick(value) node = Backtick.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -592,15 +605,20 @@ def on_begin(bodystmt) PinnedBegin.new(statement: bodystmt, location: location) else keyword = find_token(Kw, "begin") - end_char = + end_location = if bodystmt.rescue_clause || bodystmt.ensure_clause || bodystmt.else_clause - bodystmt.location.end_char + bodystmt.location else - find_token(Kw, "end").location.end_char + find_token(Kw, "end").location end - bodystmt.bind(keyword.location.end_char, end_char) + bodystmt.bind( + keyword.location.end_char, + keyword.location.end_column, + end_location.end_char, + end_location.end_column + ) location = keyword.location.to(bodystmt.location) Begin.new(bodystmt: bodystmt, location: location) @@ -685,7 +703,7 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) else_keyword: else_clause && find_token(Kw, "else"), else_clause: else_clause, ensure_clause: ensure_clause, - location: Location.fixed(line: lineno, char: char_pos) + location: Location.fixed(line: lineno, char: char_pos, column: current_column) ) end @@ -697,18 +715,24 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) def on_brace_block(block_var, statements) lbrace = find_token(LBrace) rbrace = find_token(RBrace) + location = (block_var || lbrace).location + start_char = find_next_statement_start(location.end_char) statements.bind( - find_next_statement_start((block_var || lbrace).location.end_char), - rbrace.location.start_char + start_char, + start_char - line_counts[location.start_line - 1].start, + rbrace.location.start_char, + rbrace.location.start_column ) location = Location.new( start_line: lbrace.location.start_line, start_char: lbrace.location.start_char, + start_column: lbrace.location.start_column, end_line: [rbrace.location.end_line, statements.location.end_line].max, - end_char: rbrace.location.end_char + end_char: rbrace.location.end_char, + end_column: rbrace.location.end_column ) BraceBlock.new( @@ -782,10 +806,14 @@ def on_case(value, consequent) def on_class(constant, superclass, bodystmt) beginning = find_token(Kw, "class") ending = find_token(Kw, "end") + location = (superclass || constant).location + start_char = find_next_statement_start(location.end_char) bodystmt.bind( - find_next_statement_start((superclass || constant).location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) ClassDeclaration.new( @@ -802,7 +830,7 @@ def on_comma(value) node = Comma.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -847,7 +875,7 @@ def on_comment(value) value: value.chomp, inline: value.strip != lines[line - 1].strip, location: - Location.token(line: line, char: char_pos, size: value.size - 1) + Location.token(line: line, char: char_pos, column: current_column, size: value.size - 1) ) @comments << comment @@ -859,7 +887,7 @@ def on_comment(value) def on_const(value) Const.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -894,7 +922,7 @@ def on_const_ref(constant) def on_cvar(value) CVar.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -918,12 +946,15 @@ def on_def(name, params, bodystmt) # location information if params.is_a?(Params) && params.empty? end_char = name.location.end_char + end_column = name.location.end_column location = Location.new( start_line: params.location.start_line, start_char: end_char, + start_column: end_column, end_line: params.location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) params = Params.new(location: location) @@ -933,9 +964,13 @@ def on_def(name, params, bodystmt) if ending tokens.delete(ending) + start_char = find_next_statement_start(params.location.end_char) + bodystmt.bind( - find_next_statement_start(params.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[params.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) Def.new( @@ -993,12 +1028,15 @@ def on_defs(target, operator, name, params, bodystmt) # location information if params.is_a?(Params) && params.empty? end_char = name.location.end_char + end_column = name.location.end_column location = Location.new( start_line: params.location.start_line, start_char: end_char, + start_column: end_column, end_line: params.location.end_line, - end_char: end_char + end_char: end_char, + end_column: end_column ) params = Params.new(location: location) @@ -1009,9 +1047,13 @@ def on_defs(target, operator, name, params, bodystmt) if ending tokens.delete(ending) + start_char = find_next_statement_start(params.location.end_char) + bodystmt.bind( - find_next_statement_start(params.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[params.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) Defs.new( @@ -1043,10 +1085,14 @@ def on_defs(target, operator, name, params, bodystmt) def on_do_block(block_var, bodystmt) beginning = find_token(Kw, "do") ending = find_token(Kw, "end") + location = (block_var || beginning).location + start_char = find_next_statement_start(location.end_char) bodystmt.bind( - find_next_statement_start((block_var || beginning).location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) DoBlock.new( @@ -1128,10 +1174,13 @@ def on_else(statements) node = tokens[index] ending = node.value == "end" ? tokens.delete_at(index) : node + start_char = find_next_statement_start(keyword.location.end_char) statements.bind( - find_next_statement_start(keyword.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[keyword.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) Else.new( @@ -1151,7 +1200,12 @@ def on_elsif(predicate, statements, consequent) beginning = find_token(Kw, "elsif") ending = consequent || find_token(Kw, "end") - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) Elsif.new( predicate: predicate, @@ -1174,7 +1228,7 @@ def on_embdoc_beg(value) @embdoc = EmbDoc.new( value: value, - location: Location.fixed(line: lineno, char: char_pos) + location: Location.fixed(line: lineno, column: current_column, char: char_pos) ) end @@ -1189,8 +1243,10 @@ def on_embdoc_end(value) Location.new( start_line: location.start_line, start_char: location.start_char, + start_column: location.start_column, end_line: lineno, - end_char: char_pos + value.length - 1 + end_char: char_pos + value.length - 1, + end_column: current_column + value.length - 1 ) ) @@ -1206,7 +1262,7 @@ def on_embexpr_beg(value) node = EmbExprBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1219,7 +1275,7 @@ def on_embexpr_end(value) node = EmbExprEnd.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1232,7 +1288,7 @@ def on_embvar(value) node = EmbVar.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1247,9 +1303,12 @@ def on_ensure(statements) # We don't want to consume the :@kw event, because that would break # def..ensure..end chains. ending = find_token(Kw, "end", consume: false) + start_char = find_next_statement_start(keyword.location.end_char) statements.bind( - find_next_statement_start(keyword.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[keyword.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) Ensure.new( @@ -1296,7 +1355,7 @@ def on_field(parent, operator, name) def on_float(value) FloatLiteral.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1341,7 +1400,9 @@ def on_for(index, collection, statements) statements.bind( (keyword || collection).location.end_char, - ending.location.start_char + (keyword || collection).location.end_column, + ending.location.start_char, + ending.location.start_column ) if index.is_a?(MLHS) @@ -1362,7 +1423,7 @@ def on_for(index, collection, statements) def on_gvar(value) GVar.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1383,7 +1444,7 @@ def on_hash(assocs) # on_heredoc_beg: (String value) -> HeredocBeg def on_heredoc_beg(value) location = - Location.token(line: lineno, char: char_pos, size: value.size + 1) + Location.token(line: lineno, char: char_pos, column: current_column, size: value.size + 1) # Here we're going to artificially create an extra node type so that if # there are comments after the declaration of a heredoc, they get printed. @@ -1419,8 +1480,10 @@ def on_heredoc_end(value) Location.new( start_line: heredoc.location.start_line, start_char: heredoc.location.start_char, + start_column: heredoc.location.start_column, end_line: lineno, - end_char: char_pos + end_char: char_pos, + end_column: current_column, ) ) end @@ -1447,7 +1510,7 @@ def on_hshptn(constant, keywords, keyword_rest) def on_ident(value) Ident.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1461,7 +1524,12 @@ def on_if(predicate, statements, consequent) beginning = find_token(Kw, "if") ending = consequent || find_token(Kw, "end") - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) If.new( predicate: predicate, @@ -1507,7 +1575,7 @@ def on_if_mod(predicate, statement) def on_imaginary(value) Imaginary.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1531,9 +1599,12 @@ def on_in(pattern, statements, consequent) statements_start = token end + start_char = find_next_statement_start(statements_start.location.end_char) statements.bind( - find_next_statement_start(statements_start.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[statements_start.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) In.new( @@ -1549,7 +1620,7 @@ def on_in(pattern, statements, consequent) def on_int(value) Int.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1558,7 +1629,7 @@ def on_int(value) def on_ivar(value) IVar.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1568,7 +1639,7 @@ def on_kw(value) node = Kw.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1589,7 +1660,7 @@ def on_kwrest_param(name) def on_label(value) Label.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -1599,7 +1670,7 @@ def on_label_end(value) node = LabelEnd.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1625,7 +1696,12 @@ def on_lambda(params, statements) closing = find_token(Kw, "end") end - statements.bind(opening.location.end_char, closing.location.start_char) + statements.bind( + opening.location.end_char, + opening.location.end_column, + closing.location.start_char, + closing.location.start_column + ) Lambda.new( params: params, @@ -1640,7 +1716,7 @@ def on_lbrace(value) node = LBrace.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1653,7 +1729,7 @@ def on_lbracket(value) node = LBracket.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1666,7 +1742,7 @@ def on_lparen(value) node = LParen.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1765,7 +1841,7 @@ def on_mlhs_add_star(mlhs, part) # :call-seq: # on_mlhs_new: () -> MLHS def on_mlhs_new - MLHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + MLHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column)) end # :call-seq: @@ -1791,10 +1867,13 @@ def on_mlhs_paren(contents) def on_module(constant, bodystmt) beginning = find_token(Kw, "module") ending = find_token(Kw, "end") + start_char = find_next_statement_start(constant.location.end_char) bodystmt.bind( - find_next_statement_start(constant.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[constant.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) ModuleDeclaration.new( @@ -1807,7 +1886,7 @@ def on_module(constant, bodystmt) # :call-seq: # on_mrhs_new: () -> MRHS def on_mrhs_new - MRHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + MRHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column)) end # :call-seq: @@ -1876,7 +1955,7 @@ def on_op(value) node = Op.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -1935,7 +2014,7 @@ def on_params( if parts.any? parts[0].location.to(parts[-1].location) else - Location.fixed(line: lineno, char: char_pos) + Location.fixed(line: lineno, char: char_pos, column: current_column) end Params.new( @@ -1958,12 +2037,15 @@ def on_paren(contents) if contents && contents.is_a?(Params) location = contents.location + start_char = find_next_statement_start(lparen.location.end_char) location = Location.new( start_line: location.start_line, - start_char: find_next_statement_start(lparen.location.end_char), + start_char: start_char, + start_column: start_char - line_counts[lparen.location.start_line - 1].start, end_line: location.end_line, - end_char: rparen.location.start_char + end_char: rparen.location.start_char, + end_column: rparen.location.start_column ) contents = @@ -2001,23 +2083,26 @@ def on_parse_error(error, *) def on_period(value) Period.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end # :call-seq: # on_program: (Statements statements) -> Program def on_program(statements) + last_column = source.length - line_counts[lines.length - 1].start location = Location.new( start_line: 1, start_char: 0, + start_column: 0, end_line: lines.length, - end_char: source.length + end_char: source.length, + end_column: last_column ) statements.body << @__end__ if @__end__ - statements.bind(0, source.length) + statements.bind(0, 0, source.length, last_column) program = Program.new(statements: statements, location: location) attach_comments(program, @comments) @@ -2130,7 +2215,7 @@ def on_qsymbols_beg(value) node = QSymbolsBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2165,7 +2250,7 @@ def on_qwords_beg(value) node = QWordsBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2185,7 +2270,7 @@ def on_qwords_new def on_rational(value) RationalLiteral.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -2195,7 +2280,7 @@ def on_rbrace(value) node = RBrace.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2208,7 +2293,7 @@ def on_rbracket(value) node = RBracket.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2242,7 +2327,7 @@ def on_regexp_beg(value) node = RegexpBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2254,7 +2339,7 @@ def on_regexp_beg(value) def on_regexp_end(value) RegexpEnd.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -2296,9 +2381,12 @@ def on_rescue(exceptions, variable, statements, consequent) exceptions = exceptions[0] if exceptions.is_a?(Array) last_node = variable || exceptions || keyword + start_char = find_next_statement_start(last_node.location.end_char) statements.bind( - find_next_statement_start(last_node.location.end_char), - char_pos + start_char, + start_char - line_counts[last_node.location.start_line - 1].start, + char_pos, + current_column ) # We add an additional inner node here that ripper doesn't provide so that @@ -2313,8 +2401,10 @@ def on_rescue(exceptions, variable, statements, consequent) Location.new( start_line: keyword.location.start_line, start_char: keyword.location.end_char + 1, + start_column: keyword.location.end_column + 1, end_line: last_node.location.end_line, - end_char: last_node.location.end_char + end_char: last_node.location.end_char, + end_column: last_node.location.end_column ) ) end @@ -2328,8 +2418,10 @@ def on_rescue(exceptions, variable, statements, consequent) Location.new( start_line: keyword.location.start_line, start_char: keyword.location.start_char, + start_column: keyword.location.start_column, end_line: lineno, - end_char: char_pos + end_char: char_pos, + end_column: current_column ) ) end @@ -2388,7 +2480,7 @@ def on_rparen(value) node = RParen.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2400,10 +2492,13 @@ def on_rparen(value) def on_sclass(target, bodystmt) beginning = find_token(Kw, "class") ending = find_token(Kw, "end") + start_char = find_next_statement_start(target.location.end_char) bodystmt.bind( - find_next_statement_start(target.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[target.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) SClass.new( @@ -2442,7 +2537,7 @@ def on_stmts_new Statements.new( self, body: [], - location: Location.fixed(line: lineno, char: char_pos) + location: Location.fixed(line: lineno, char: char_pos, column: current_column) ) end @@ -2476,7 +2571,7 @@ def on_string_concat(left, right) def on_string_content StringContent.new( parts: [], - location: Location.fixed(line: lineno, char: char_pos) + location: Location.fixed(line: lineno, char: char_pos, column: current_column) ) end @@ -2499,18 +2594,22 @@ def on_string_embexpr(statements) statements.bind( embexpr_beg.location.end_char, - embexpr_end.location.start_char + embexpr_beg.location.end_column, + embexpr_end.location.start_char, + embexpr_end.location.start_column ) location = Location.new( start_line: embexpr_beg.location.start_line, start_char: embexpr_beg.location.start_char, + start_column: embexpr_beg.location.start_column, end_line: [ embexpr_end.location.end_line, statements.location.end_line ].max, - end_char: embexpr_end.location.end_char + end_char: embexpr_end.location.end_char, + end_column: embexpr_end.location.end_column ) StringEmbExpr.new(statements: statements, location: location) @@ -2538,11 +2637,13 @@ def on_string_literal(string) Location.new( start_line: tstring_beg.location.start_line, start_char: tstring_beg.location.start_char, + start_column: tstring_beg.location.start_column, end_line: [ tstring_end.location.end_line, string.location.end_line ].max, - end_char: tstring_end.location.end_char + end_char: tstring_end.location.end_char, + end_column: tstring_end.location.end_column ) StringLiteral.new( @@ -2571,7 +2672,7 @@ def on_symbeg(value) node = SymBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2625,7 +2726,7 @@ def on_symbols_beg(value) node = SymbolsBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2650,7 +2751,7 @@ def on_tlambda(value) node = TLambda.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2663,7 +2764,7 @@ def on_tlambeg(value) node = TLamBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2698,7 +2799,7 @@ def on_tstring_beg(value) node = TStringBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2710,7 +2811,7 @@ def on_tstring_beg(value) def on_tstring_content(value) TStringContent.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) end @@ -2720,7 +2821,7 @@ def on_tstring_end(value) node = TStringEnd.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node @@ -2794,7 +2895,12 @@ def on_unless(predicate, statements, consequent) beginning = find_token(Kw, "unless") ending = consequent || find_token(Kw, "end") - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) Unless.new( predicate: predicate, @@ -2831,7 +2937,12 @@ def on_until(predicate, statements) end # Update the Statements location information - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) Until.new( predicate: predicate, @@ -2875,7 +2986,7 @@ def on_var_field(value) else # You can hit this pattern if you're assigning to a splat using # pattern matching syntax in Ruby 2.7+ - Location.fixed(line: lineno, char: char_pos) + Location.fixed(line: lineno, char: char_pos, column: current_column) end VarField.new(value: value, location: location) @@ -2903,7 +3014,7 @@ def on_vcall(ident) # :call-seq: # on_void_stmt: () -> VoidStmt def on_void_stmt - VoidStmt.new(location: Location.fixed(line: lineno, char: char_pos)) + VoidStmt.new(location: Location.fixed(line: lineno, char: char_pos, column: current_column)) end # :call-seq: @@ -2922,9 +3033,13 @@ def on_when(arguments, statements, consequent) statements_start = token end + start_char = find_next_statement_start(statements_start.location.end_char) + statements.bind( - find_next_statement_start(statements_start.location.end_char), - ending.location.start_char + start_char, + start_char - line_counts[statements_start.location.start_line - 1].start, + ending.location.start_char, + ending.location.start_column ) When.new( @@ -2950,7 +3065,12 @@ def on_while(predicate, statements) end # Update the Statements location information - statements.bind(predicate.location.end_char, ending.location.start_char) + statements.bind( + predicate.location.end_char, + predicate.location.end_column, + ending.location.start_char, + ending.location.start_column + ) While.new( predicate: predicate, @@ -2986,7 +3106,7 @@ def on_word_add(word, part) # :call-seq: # on_word_new: () -> Word def on_word_new - Word.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + Word.new(parts: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column)) end # :call-seq: @@ -3005,7 +3125,7 @@ def on_words_beg(value) node = WordsBeg.new( value: value, - location: Location.token(line: lineno, char: char_pos, size: value.size) + location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size) ) tokens << node diff --git a/test/node_test.rb b/test/node_test.rb index 9c29f79d..e412d648 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -999,14 +999,36 @@ def test_zsuper assert_node(ZSuper, "zsuper", "super") end + def test_column_positions + source = <<~SOURCE + puts 'Hello' + puts 'Goodbye' + SOURCE + + at = location(lines: 2..2, chars: 13..27, columns: 0..14) + assert_node(Command, "command", source, at: at) + end + + def test_multibyte_column_positions + source = <<~SOURCE + puts "Congrats" + puts "🎉 🎉" + SOURCE + + at = location(lines: 2..2, chars: 16..26, columns: 0..10) + assert_node(Command, "command", source, at: at) + end + private - def location(lines: 1..1, chars: 0..0) + def location(lines: 1..1, chars: 0..0, columns: 0..0) Location.new( start_line: lines.begin, start_char: chars.begin, + start_column: columns.begin, end_line: lines.end, - end_char: chars.end + end_char: chars.end, + end_column: columns.end ) end @@ -1014,7 +1036,8 @@ def assert_node(kind, type, source, at: nil) at ||= location( lines: 1..[1, source.count("\n")].max, - chars: 0..source.chomp.size + chars: 0..source.chomp.size, + columns: 0..source.chomp.size ) # Parse the example, get the outputted parse tree, and assert that it was