From c2125304986a5e6706b93b3e13ae061e058c95b1 Mon Sep 17 00:00:00 2001
From: Vinicius Stock <vinicius.stock@shopify.com>
Date: Tue, 12 Apr 2022 16:27:09 -0400
Subject: [PATCH 1/2] Add column position to Location

---
 lib/syntax_tree/node.rb   |  61 ++++---
 lib/syntax_tree/parser.rb | 331 ++++++++++++++++++++++++++------------
 test/node_test.rb         |  19 ++-
 3 files changed, 281 insertions(+), 130 deletions(-)

diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb
index 9081d977..f940499a 100644
--- a/lib/syntax_tree/node.rb
+++ b/lib/syntax_tree/node.rb
@@ -3,13 +3,15 @@
 module SyntaxTree
   # Represents the location of a node in the tree from the source code.
   class Location
-    attr_reader :start_line, :start_char, :end_line, :end_char
+    attr_reader :start_line, :start_char, :start_column, :end_line, :end_char, :end_column
 
-    def initialize(start_line:, start_char:, end_line:, end_char:)
+    def initialize(start_line:, start_char:, start_column:, end_line:, end_char:, end_column:)
       @start_line = start_line
       @start_char = start_char
+      @start_column = start_column
       @end_line = end_line
       @end_char = end_char
+      @end_column = end_column
     end
 
     def lines
@@ -26,22 +28,26 @@ def to(other)
       Location.new(
         start_line: start_line,
         start_char: start_char,
+        start_column: start_column,
         end_line: [end_line, other.end_line].max,
-        end_char: other.end_char
+        end_char: other.end_char,
+        end_column: other.end_column
       )
     end
 
-    def self.token(line:, char:, size:)
+    def self.token(line:, char:, column:, size:)
       new(
         start_line: line,
         start_char: char,
+        start_column: column,
         end_line: line,
-        end_char: char + size
+        end_char: char + size,
+        end_column: column + size
       )
     end
 
-    def self.fixed(line:, char:)
-      new(start_line: line, start_char: char, end_line: line, end_char: char)
+    def self.fixed(line:, char:, column:)
+      new(start_line: line, start_char: char, start_column: column, end_line: line, end_char: char, end_column: column)
     end
   end
 
@@ -2047,13 +2053,15 @@ def initialize(
       @comments = comments
     end
 
-    def bind(start_char, end_char)
+    def bind(start_char, start_column, end_char, end_column)
       @location =
         Location.new(
           start_line: location.start_line,
           start_char: start_char,
+          start_column: start_column,
           end_line: location.end_line,
-          end_char: end_char
+          end_char: end_char,
+          end_column: end_column
         )
 
       parts = [rescue_clause, else_clause, ensure_clause]
@@ -2062,14 +2070,17 @@ def bind(start_char, end_char)
       consequent = parts.compact.first
       statements.bind(
         start_char,
-        consequent ? consequent.location.start_char : end_char
+        start_column,
+        consequent ? consequent.location.start_char : end_char,
+        consequent ? consequent.location.start_column : end_column
       )
 
       # Next we're going to determine the rescue clause if there is one
       if rescue_clause
         consequent = parts.drop(1).compact.first
         rescue_clause.bind_end(
-          consequent ? consequent.location.start_char : end_char
+          consequent ? consequent.location.start_char : end_char,
+          consequent ? consequent.location.start_column : end_column
         )
       end
     end
@@ -8413,20 +8424,22 @@ def initialize(
       @comments = comments
     end
 
-    def bind_end(end_char)
+    def bind_end(end_char, end_column)
       @location =
         Location.new(
           start_line: location.start_line,
           start_char: location.start_char,
+          start_column: location.start_column,
           end_line: location.end_line,
-          end_char: end_char
+          end_char: end_char,
+          end_column: end_column
         )
 
       if consequent
-        consequent.bind_end(end_char)
-        statements.bind_end(consequent.location.start_char)
+        consequent.bind_end(end_char, end_column)
+        statements.bind_end(consequent.location.start_char, consequent.location.start_column)
       else
-        statements.bind_end(end_char)
+        statements.bind_end(end_char, end_column)
       end
     end
 
@@ -8885,13 +8898,15 @@ def initialize(parser, body:, location:, comments: [])
       @comments = comments
     end
 
-    def bind(start_char, end_char)
+    def bind(start_char, start_column, end_char, end_column)
       @location =
         Location.new(
           start_line: location.start_line,
           start_char: start_char,
+          start_column: start_column,
           end_line: location.end_line,
-          end_char: end_char
+          end_char: end_char,
+          end_column: end_column
         )
 
       if body[0].is_a?(VoidStmt)
@@ -8900,8 +8915,10 @@ def bind(start_char, end_char)
           Location.new(
             start_line: location.start_line,
             start_char: start_char,
+            start_column: start_column,
             end_line: location.end_line,
-            end_char: start_char
+            end_char: start_char,
+            end_column: end_column
           )
 
         body[0] = VoidStmt.new(location: location)
@@ -8910,13 +8927,15 @@ def bind(start_char, end_char)
       attach_comments(start_char, end_char)
     end
 
-    def bind_end(end_char)
+    def bind_end(end_char, end_column)
       @location =
         Location.new(
           start_line: location.start_line,
           start_char: location.start_char,
+          start_column: location.start_column,
           end_line: location.end_line,
-          end_char: end_char
+          end_char: end_char,
+          end_column: end_column
         )
     end
 
diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb
index 5bd89dc2..6bc27d5c 100644
--- a/lib/syntax_tree/parser.rb
+++ b/lib/syntax_tree/parser.rb
@@ -163,6 +163,12 @@ def char_pos
       line_counts[lineno - 1][column]
     end
 
+    # This represents the current column we're in relative to the beginning of
+    # the current line.
+    def current_column
+      column - line_counts[lineno - 1].start
+    end
+
     # As we build up a list of tokens, we'll periodically need to go backwards
     # and find the ones that we've already hit in order to determine the
     # location information for nodes that use them. For example, if you have a
@@ -251,10 +257,13 @@ def find_next_statement_start(position)
     def on_BEGIN(statements)
       lbrace = find_token(LBrace)
       rbrace = find_token(RBrace)
+      start_char = find_next_statement_start(lbrace.location.end_char)
 
       statements.bind(
-        find_next_statement_start(lbrace.location.end_char),
-        rbrace.location.start_char
+        start_char,
+        start_char - line_counts[lbrace.location.start_line - 1].start,
+        rbrace.location.start_char,
+        rbrace.location.start_column,
       )
 
       keyword = find_token(Kw, "BEGIN")
@@ -271,7 +280,7 @@ def on_BEGIN(statements)
     def on_CHAR(value)
       CHAR.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -280,10 +289,13 @@ def on_CHAR(value)
     def on_END(statements)
       lbrace = find_token(LBrace)
       rbrace = find_token(RBrace)
+      start_char = find_next_statement_start(lbrace.location.end_char)
 
       statements.bind(
-        find_next_statement_start(lbrace.location.end_char),
-        rbrace.location.start_char
+        start_char,
+        start_char - line_counts[lbrace.location.start_line - 1].start,
+        rbrace.location.start_char,
+        rbrace.location.start_column
       )
 
       keyword = find_token(Kw, "END")
@@ -301,7 +313,7 @@ def on___end__(value)
       @__end__ =
         EndContent.new(
           value: source[(char_pos + value.length)..-1],
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
     end
 
@@ -465,7 +477,7 @@ def on_args_forward
     # :call-seq:
     #   on_args_new: () -> Args
     def on_args_new
-      Args.new(parts: [], location: Location.fixed(line: lineno, char: char_pos))
+      Args.new(parts: [], location: Location.fixed(line: lineno, column: current_column, char: char_pos))
     end
 
     # :call-seq:
@@ -551,7 +563,7 @@ def on_assoc_splat(value)
     def on_backref(value)
       Backref.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -561,7 +573,7 @@ def on_backtick(value)
       node =
         Backtick.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -592,15 +604,20 @@ def on_begin(bodystmt)
         PinnedBegin.new(statement: bodystmt, location: location)
       else
         keyword = find_token(Kw, "begin")
-        end_char =
+        end_location =
           if bodystmt.rescue_clause || bodystmt.ensure_clause ||
               bodystmt.else_clause
-            bodystmt.location.end_char
+            bodystmt.location
           else
-            find_token(Kw, "end").location.end_char
+            find_token(Kw, "end").location
           end
 
-        bodystmt.bind(keyword.location.end_char, end_char)
+        bodystmt.bind(
+          keyword.location.end_char,
+          keyword.location.end_column,
+          end_location.end_char,
+          end_location.end_column
+        )
         location = keyword.location.to(bodystmt.location)
 
         Begin.new(bodystmt: bodystmt, location: location)
@@ -685,7 +702,7 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause)
         else_keyword: else_clause && find_token(Kw, "else"),
         else_clause: else_clause,
         ensure_clause: ensure_clause,
-        location: Location.fixed(line: lineno, char: char_pos)
+        location: Location.fixed(line: lineno, char: char_pos, column: current_column)
       )
     end
 
@@ -697,18 +714,24 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause)
     def on_brace_block(block_var, statements)
       lbrace = find_token(LBrace)
       rbrace = find_token(RBrace)
+      location = (block_var || lbrace).location
+      start_char = find_next_statement_start(location.end_char)
 
       statements.bind(
-        find_next_statement_start((block_var || lbrace).location.end_char),
-        rbrace.location.start_char
+        start_char,
+        start_char - line_counts[location.start_line - 1].start,
+        rbrace.location.start_char,
+        rbrace.location.start_column
       )
 
       location =
         Location.new(
           start_line: lbrace.location.start_line,
           start_char: lbrace.location.start_char,
+          start_column: lbrace.location.start_column,
           end_line: [rbrace.location.end_line, statements.location.end_line].max,
-          end_char: rbrace.location.end_char
+          end_char: rbrace.location.end_char,
+          end_column: rbrace.location.end_column
         )
 
       BraceBlock.new(
@@ -782,10 +805,14 @@ def on_case(value, consequent)
     def on_class(constant, superclass, bodystmt)
       beginning = find_token(Kw, "class")
       ending = find_token(Kw, "end")
+      location = (superclass || constant).location
+      start_char = find_next_statement_start(location.end_char)
 
       bodystmt.bind(
-        find_next_statement_start((superclass || constant).location.end_char),
-        ending.location.start_char
+        start_char,
+        start_char - line_counts[location.start_line - 1].start,
+        ending.location.start_char,
+        ending.location.start_column
       )
 
       ClassDeclaration.new(
@@ -802,7 +829,7 @@ def on_comma(value)
       node =
         Comma.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -847,7 +874,7 @@ def on_comment(value)
           value: value.chomp,
           inline: value.strip != lines[line - 1].strip,
           location:
-            Location.token(line: line, char: char_pos, size: value.size - 1)
+            Location.token(line: line, char: char_pos, column: current_column, size: value.size - 1)
         )
 
       @comments << comment
@@ -859,7 +886,7 @@ def on_comment(value)
     def on_const(value)
       Const.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -894,7 +921,7 @@ def on_const_ref(constant)
     def on_cvar(value)
       CVar.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -918,12 +945,15 @@ def on_def(name, params, bodystmt)
       # location information
       if params.is_a?(Params) && params.empty?
         end_char = name.location.end_char
+        end_column = name.location.end_column
         location =
           Location.new(
             start_line: params.location.start_line,
             start_char: end_char,
+            start_column: end_column,
             end_line: params.location.end_line,
-            end_char: end_char
+            end_char: end_char,
+            end_column: end_column
           )
 
         params = Params.new(location: location)
@@ -933,9 +963,13 @@ def on_def(name, params, bodystmt)
 
       if ending
         tokens.delete(ending)
+        start_char = find_next_statement_start(params.location.end_char)
+
         bodystmt.bind(
-          find_next_statement_start(params.location.end_char),
-          ending.location.start_char
+          start_char,
+          start_char - line_counts[params.location.start_line - 1].start,
+          ending.location.start_char,
+          ending.location.start_column
         )
 
         Def.new(
@@ -993,12 +1027,15 @@ def on_defs(target, operator, name, params, bodystmt)
       # location information
       if params.is_a?(Params) && params.empty?
         end_char = name.location.end_char
+        end_column = name.location.end_column
         location =
           Location.new(
             start_line: params.location.start_line,
             start_char: end_char,
+            start_column: end_column,
             end_line: params.location.end_line,
-            end_char: end_char
+            end_char: end_char,
+            end_column: end_column
           )
 
         params = Params.new(location: location)
@@ -1009,9 +1046,13 @@ def on_defs(target, operator, name, params, bodystmt)
 
       if ending
         tokens.delete(ending)
+        start_char = find_next_statement_start(params.location.end_char)
+
         bodystmt.bind(
-          find_next_statement_start(params.location.end_char),
-          ending.location.start_char
+          start_char,
+          start_char - line_counts[params.location.start_line - 1].start,
+          ending.location.start_char,
+          ending.location.start_column
         )
 
         Defs.new(
@@ -1043,10 +1084,14 @@ def on_defs(target, operator, name, params, bodystmt)
     def on_do_block(block_var, bodystmt)
       beginning = find_token(Kw, "do")
       ending = find_token(Kw, "end")
+      location = (block_var || beginning).location
+      start_char = find_next_statement_start(location.end_char)
 
       bodystmt.bind(
-        find_next_statement_start((block_var || beginning).location.end_char),
-        ending.location.start_char
+        start_char,
+        start_char - line_counts[location.start_line - 1].start,
+        ending.location.start_char,
+        ending.location.start_column
       )
 
       DoBlock.new(
@@ -1128,10 +1173,13 @@ def on_else(statements)
 
       node = tokens[index]
       ending = node.value == "end" ? tokens.delete_at(index) : node
+      start_char = find_next_statement_start(keyword.location.end_char)
 
       statements.bind(
-        find_next_statement_start(keyword.location.end_char),
-        ending.location.start_char
+        start_char,
+        start_char - line_counts[keyword.location.start_line - 1].start,
+        ending.location.start_char,
+        ending.location.start_column
       )
 
       Else.new(
@@ -1151,7 +1199,12 @@ def on_elsif(predicate, statements, consequent)
       beginning = find_token(Kw, "elsif")
       ending = consequent || find_token(Kw, "end")
 
-      statements.bind(predicate.location.end_char, ending.location.start_char)
+      statements.bind(
+        predicate.location.end_char,
+        predicate.location.end_column,
+        ending.location.start_char,
+        ending.location.start_column
+      )
 
       Elsif.new(
         predicate: predicate,
@@ -1174,7 +1227,7 @@ def on_embdoc_beg(value)
       @embdoc =
         EmbDoc.new(
           value: value,
-          location: Location.fixed(line: lineno, char: char_pos)
+          location: Location.fixed(line: lineno, column: current_column, char: char_pos)
         )
     end
 
@@ -1189,8 +1242,10 @@ def on_embdoc_end(value)
             Location.new(
               start_line: location.start_line,
               start_char: location.start_char,
+              start_column: location.start_column,
               end_line: lineno,
-              end_char: char_pos + value.length - 1
+              end_char: char_pos + value.length - 1,
+              end_column: current_column + value.length - 1
             )
         )
 
@@ -1206,7 +1261,7 @@ def on_embexpr_beg(value)
       node =
         EmbExprBeg.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -1219,7 +1274,7 @@ def on_embexpr_end(value)
       node =
         EmbExprEnd.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -1232,7 +1287,7 @@ def on_embvar(value)
       node =
         EmbVar.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -1247,9 +1302,12 @@ def on_ensure(statements)
       # We don't want to consume the :@kw event, because that would break
       # def..ensure..end chains.
       ending = find_token(Kw, "end", consume: false)
+      start_char = find_next_statement_start(keyword.location.end_char)
       statements.bind(
-        find_next_statement_start(keyword.location.end_char),
-        ending.location.start_char
+        start_char,
+        start_char - line_counts[keyword.location.start_line - 1].start,
+        ending.location.start_char,
+        ending.location.start_column
       )
 
       Ensure.new(
@@ -1296,7 +1354,7 @@ def on_field(parent, operator, name)
     def on_float(value)
       FloatLiteral.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -1341,7 +1399,9 @@ def on_for(index, collection, statements)
 
       statements.bind(
         (keyword || collection).location.end_char,
-        ending.location.start_char
+        (keyword || collection).location.end_column,
+        ending.location.start_char,
+        ending.location.start_column
       )
 
       if index.is_a?(MLHS)
@@ -1362,7 +1422,7 @@ def on_for(index, collection, statements)
     def on_gvar(value)
       GVar.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -1383,7 +1443,7 @@ def on_hash(assocs)
     #   on_heredoc_beg: (String value) -> HeredocBeg
     def on_heredoc_beg(value)
       location =
-        Location.token(line: lineno, char: char_pos, size: value.size + 1)
+        Location.token(line: lineno, char: char_pos, column: current_column, size: value.size + 1)
 
       # Here we're going to artificially create an extra node type so that if
       # there are comments after the declaration of a heredoc, they get printed.
@@ -1419,8 +1479,10 @@ def on_heredoc_end(value)
           Location.new(
             start_line: heredoc.location.start_line,
             start_char: heredoc.location.start_char,
+            start_column: heredoc.location.start_column,
             end_line: lineno,
-            end_char: char_pos
+            end_char: char_pos,
+            end_column: current_column,
           )
       )
     end
@@ -1447,7 +1509,7 @@ def on_hshptn(constant, keywords, keyword_rest)
     def on_ident(value)
       Ident.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -1461,7 +1523,12 @@ def on_if(predicate, statements, consequent)
       beginning = find_token(Kw, "if")
       ending = consequent || find_token(Kw, "end")
 
-      statements.bind(predicate.location.end_char, ending.location.start_char)
+      statements.bind(
+        predicate.location.end_char,
+        predicate.location.end_column,
+        ending.location.start_char,
+        ending.location.start_column
+      )
 
       If.new(
         predicate: predicate,
@@ -1507,7 +1574,7 @@ def on_if_mod(predicate, statement)
     def on_imaginary(value)
       Imaginary.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -1531,9 +1598,12 @@ def on_in(pattern, statements, consequent)
         statements_start = token
       end
 
+      start_char = find_next_statement_start(statements_start.location.end_char)
       statements.bind(
-        find_next_statement_start(statements_start.location.end_char),
-        ending.location.start_char
+        start_char,
+        start_char - line_counts[statements_start.location.start_line - 1].start,
+        ending.location.start_char,
+        ending.location.start_column
       )
 
       In.new(
@@ -1549,7 +1619,7 @@ def on_in(pattern, statements, consequent)
     def on_int(value)
       Int.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -1558,7 +1628,7 @@ def on_int(value)
     def on_ivar(value)
       IVar.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -1568,7 +1638,7 @@ def on_kw(value)
       node =
         Kw.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -1589,7 +1659,7 @@ def on_kwrest_param(name)
     def on_label(value)
       Label.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -1599,7 +1669,7 @@ def on_label_end(value)
       node =
         LabelEnd.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -1625,7 +1695,12 @@ def on_lambda(params, statements)
         closing = find_token(Kw, "end")
       end
 
-      statements.bind(opening.location.end_char, closing.location.start_char)
+      statements.bind(
+        opening.location.end_char,
+        opening.location.end_column,
+        closing.location.start_char,
+        closing.location.start_column
+      )
 
       Lambda.new(
         params: params,
@@ -1640,7 +1715,7 @@ def on_lbrace(value)
       node =
         LBrace.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -1653,7 +1728,7 @@ def on_lbracket(value)
       node =
         LBracket.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -1666,7 +1741,7 @@ def on_lparen(value)
       node =
         LParen.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -1765,7 +1840,7 @@ def on_mlhs_add_star(mlhs, part)
     # :call-seq:
     #   on_mlhs_new: () -> MLHS
     def on_mlhs_new
-      MLHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos))
+      MLHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column))
     end
 
     # :call-seq:
@@ -1791,10 +1866,13 @@ def on_mlhs_paren(contents)
     def on_module(constant, bodystmt)
       beginning = find_token(Kw, "module")
       ending = find_token(Kw, "end")
+      start_char = find_next_statement_start(constant.location.end_char)
 
       bodystmt.bind(
-        find_next_statement_start(constant.location.end_char),
-        ending.location.start_char
+        start_char,
+        start_char - line_counts[constant.location.start_line - 1].start,
+        ending.location.start_char,
+        ending.location.start_column
       )
 
       ModuleDeclaration.new(
@@ -1807,7 +1885,7 @@ def on_module(constant, bodystmt)
     # :call-seq:
     #   on_mrhs_new: () -> MRHS
     def on_mrhs_new
-      MRHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos))
+      MRHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column))
     end
 
     # :call-seq:
@@ -1876,7 +1954,7 @@ def on_op(value)
       node =
         Op.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -1935,7 +2013,7 @@ def on_params(
         if parts.any?
           parts[0].location.to(parts[-1].location)
         else
-          Location.fixed(line: lineno, char: char_pos)
+          Location.fixed(line: lineno, char: char_pos, column: current_column)
         end
 
       Params.new(
@@ -1958,12 +2036,15 @@ def on_paren(contents)
 
       if contents && contents.is_a?(Params)
         location = contents.location
+        start_char = find_next_statement_start(lparen.location.end_char)
         location =
           Location.new(
             start_line: location.start_line,
-            start_char: find_next_statement_start(lparen.location.end_char),
+            start_char: start_char,
+            start_column: start_char - line_counts[lparen.location.start_line - 1].start,
             end_line: location.end_line,
-            end_char: rparen.location.start_char
+            end_char: rparen.location.start_char,
+            end_column: rparen.location.start_column
           )
 
         contents =
@@ -2001,23 +2082,26 @@ def on_parse_error(error, *)
     def on_period(value)
       Period.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
     # :call-seq:
     #   on_program: (Statements statements) -> Program
     def on_program(statements)
+      last_column = source.length - line_counts[lines.length - 1].start
       location =
         Location.new(
           start_line: 1,
           start_char: 0,
+          start_column: 0,
           end_line: lines.length,
-          end_char: source.length
+          end_char: source.length,
+          end_column: last_column
         )
 
       statements.body << @__end__ if @__end__
-      statements.bind(0, source.length)
+      statements.bind(0, 0, source.length, last_column)
 
       program = Program.new(statements: statements, location: location)
       attach_comments(program, @comments)
@@ -2130,7 +2214,7 @@ def on_qsymbols_beg(value)
       node =
         QSymbolsBeg.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2165,7 +2249,7 @@ def on_qwords_beg(value)
       node =
         QWordsBeg.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2185,7 +2269,7 @@ def on_qwords_new
     def on_rational(value)
       RationalLiteral.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -2195,7 +2279,7 @@ def on_rbrace(value)
       node =
         RBrace.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2208,7 +2292,7 @@ def on_rbracket(value)
       node =
         RBracket.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2242,7 +2326,7 @@ def on_regexp_beg(value)
       node =
         RegexpBeg.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2254,7 +2338,7 @@ def on_regexp_beg(value)
     def on_regexp_end(value)
       RegexpEnd.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -2296,9 +2380,12 @@ def on_rescue(exceptions, variable, statements, consequent)
       exceptions = exceptions[0] if exceptions.is_a?(Array)
 
       last_node = variable || exceptions || keyword
+      start_char = find_next_statement_start(last_node.location.end_char)
       statements.bind(
-        find_next_statement_start(last_node.location.end_char),
-        char_pos
+        start_char,
+        start_char - line_counts[last_node.location.start_line - 1].start,
+        char_pos,
+        current_column
       )
 
       # We add an additional inner node here that ripper doesn't provide so that
@@ -2313,8 +2400,10 @@ def on_rescue(exceptions, variable, statements, consequent)
               Location.new(
                 start_line: keyword.location.start_line,
                 start_char: keyword.location.end_char + 1,
+                start_column: keyword.location.end_column + 1,
                 end_line: last_node.location.end_line,
-                end_char: last_node.location.end_char
+                end_char: last_node.location.end_char,
+                end_column: last_node.location.end_column
               )
           )
         end
@@ -2328,8 +2417,10 @@ def on_rescue(exceptions, variable, statements, consequent)
           Location.new(
             start_line: keyword.location.start_line,
             start_char: keyword.location.start_char,
+            start_column: keyword.location.start_column,
             end_line: lineno,
-            end_char: char_pos
+            end_char: char_pos,
+            end_column: current_column
           )
       )
     end
@@ -2388,7 +2479,7 @@ def on_rparen(value)
       node =
         RParen.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2400,10 +2491,13 @@ def on_rparen(value)
     def on_sclass(target, bodystmt)
       beginning = find_token(Kw, "class")
       ending = find_token(Kw, "end")
+      start_char = find_next_statement_start(target.location.end_char)
 
       bodystmt.bind(
-        find_next_statement_start(target.location.end_char),
-        ending.location.start_char
+        start_char,
+        start_char - line_counts[target.location.start_line - 1].start,
+        ending.location.start_char,
+        ending.location.start_column
       )
 
       SClass.new(
@@ -2442,7 +2536,7 @@ def on_stmts_new
       Statements.new(
         self,
         body: [],
-        location: Location.fixed(line: lineno, char: char_pos)
+        location: Location.fixed(line: lineno, char: char_pos, column: current_column)
       )
     end
 
@@ -2476,7 +2570,7 @@ def on_string_concat(left, right)
     def on_string_content
       StringContent.new(
         parts: [],
-        location: Location.fixed(line: lineno, char: char_pos)
+        location: Location.fixed(line: lineno, char: char_pos, column: current_column)
       )
     end
 
@@ -2499,18 +2593,22 @@ def on_string_embexpr(statements)
 
       statements.bind(
         embexpr_beg.location.end_char,
-        embexpr_end.location.start_char
+        embexpr_beg.location.end_column,
+        embexpr_end.location.start_char,
+        embexpr_end.location.start_column
       )
 
       location =
         Location.new(
           start_line: embexpr_beg.location.start_line,
           start_char: embexpr_beg.location.start_char,
+          start_column: embexpr_beg.location.start_column,
           end_line: [
             embexpr_end.location.end_line,
             statements.location.end_line
           ].max,
-          end_char: embexpr_end.location.end_char
+          end_char: embexpr_end.location.end_char,
+          end_column: embexpr_end.location.end_column
         )
 
       StringEmbExpr.new(statements: statements, location: location)
@@ -2538,11 +2636,13 @@ def on_string_literal(string)
           Location.new(
             start_line: tstring_beg.location.start_line,
             start_char: tstring_beg.location.start_char,
+            start_column: tstring_beg.location.start_column,
             end_line: [
               tstring_end.location.end_line,
               string.location.end_line
             ].max,
-            end_char: tstring_end.location.end_char
+            end_char: tstring_end.location.end_char,
+            end_column: tstring_end.location.end_column
           )
 
         StringLiteral.new(
@@ -2571,7 +2671,7 @@ def on_symbeg(value)
       node =
         SymBeg.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2625,7 +2725,7 @@ def on_symbols_beg(value)
       node =
         SymbolsBeg.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2650,7 +2750,7 @@ def on_tlambda(value)
       node =
         TLambda.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2663,7 +2763,7 @@ def on_tlambeg(value)
       node =
         TLamBeg.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2698,7 +2798,7 @@ def on_tstring_beg(value)
       node =
         TStringBeg.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2710,7 +2810,7 @@ def on_tstring_beg(value)
     def on_tstring_content(value)
       TStringContent.new(
         value: value,
-        location: Location.token(line: lineno, char: char_pos, size: value.size)
+        location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
       )
     end
 
@@ -2720,7 +2820,7 @@ def on_tstring_end(value)
       node =
         TStringEnd.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
@@ -2794,7 +2894,12 @@ def on_unless(predicate, statements, consequent)
       beginning = find_token(Kw, "unless")
       ending = consequent || find_token(Kw, "end")
 
-      statements.bind(predicate.location.end_char, ending.location.start_char)
+      statements.bind(
+        predicate.location.end_char,
+        predicate.location.end_column,
+        ending.location.start_char,
+        ending.location.start_column
+      )
 
       Unless.new(
         predicate: predicate,
@@ -2831,7 +2936,12 @@ def on_until(predicate, statements)
       end
 
       # Update the Statements location information
-      statements.bind(predicate.location.end_char, ending.location.start_char)
+      statements.bind(
+        predicate.location.end_char,
+        predicate.location.end_column,
+        ending.location.start_char,
+        ending.location.start_column
+      )
 
       Until.new(
         predicate: predicate,
@@ -2875,7 +2985,7 @@ def on_var_field(value)
         else
           # You can hit this pattern if you're assigning to a splat using
           # pattern matching syntax in Ruby 2.7+
-          Location.fixed(line: lineno, char: char_pos)
+          Location.fixed(line: lineno, char: char_pos, column: current_column)
         end
 
       VarField.new(value: value, location: location)
@@ -2903,7 +3013,7 @@ def on_vcall(ident)
     # :call-seq:
     #   on_void_stmt: () -> VoidStmt
     def on_void_stmt
-      VoidStmt.new(location: Location.fixed(line: lineno, char: char_pos))
+      VoidStmt.new(location: Location.fixed(line: lineno, char: char_pos, column: current_column))
     end
 
     # :call-seq:
@@ -2922,9 +3032,13 @@ def on_when(arguments, statements, consequent)
         statements_start = token
       end
 
+      start_char = find_next_statement_start(statements_start.location.end_char)
+
       statements.bind(
-        find_next_statement_start(statements_start.location.end_char),
-        ending.location.start_char
+        start_char,
+        start_char - line_counts[statements_start.location.start_line - 1].start,
+        ending.location.start_char,
+        ending.location.start_column
       )
 
       When.new(
@@ -2950,7 +3064,12 @@ def on_while(predicate, statements)
       end
 
       # Update the Statements location information
-      statements.bind(predicate.location.end_char, ending.location.start_char)
+      statements.bind(
+        predicate.location.end_char,
+        predicate.location.end_column,
+        ending.location.start_char,
+        ending.location.start_column
+      )
 
       While.new(
         predicate: predicate,
@@ -2986,7 +3105,7 @@ def on_word_add(word, part)
     # :call-seq:
     #   on_word_new: () -> Word
     def on_word_new
-      Word.new(parts: [], location: Location.fixed(line: lineno, char: char_pos))
+      Word.new(parts: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column))
     end
 
     # :call-seq:
@@ -3005,7 +3124,7 @@ def on_words_beg(value)
       node =
         WordsBeg.new(
           value: value,
-          location: Location.token(line: lineno, char: char_pos, size: value.size)
+          location: Location.token(line: lineno, char: char_pos, column: current_column, size: value.size)
         )
 
       tokens << node
diff --git a/test/node_test.rb b/test/node_test.rb
index 9c29f79d..4831081b 100644
--- a/test/node_test.rb
+++ b/test/node_test.rb
@@ -999,14 +999,26 @@ def test_zsuper
       assert_node(ZSuper, "zsuper", "super")
     end
 
+    def test_column_positions
+      source = <<~SOURCE
+        puts 'Hello'
+        puts 'Goodbye'
+      SOURCE
+
+      at = location(lines: 2..2, chars: 13..27, columns: 0..14)
+      assert_node(Command, "command", source, at: at)
+    end
+
     private
 
-    def location(lines: 1..1, chars: 0..0)
+    def location(lines: 1..1, chars: 0..0, columns: 0..0)
       Location.new(
         start_line: lines.begin,
         start_char: chars.begin,
+        start_column: columns.begin,
         end_line: lines.end,
-        end_char: chars.end
+        end_char: chars.end,
+        end_column: columns.end
       )
     end
 
@@ -1014,7 +1026,8 @@ def assert_node(kind, type, source, at: nil)
       at ||=
         location(
           lines: 1..[1, source.count("\n")].max,
-          chars: 0..source.chomp.size
+          chars: 0..source.chomp.size,
+          columns: 0..source.chomp.size
         )
 
       # Parse the example, get the outputted parse tree, and assert that it was

From 48ce3753499f3ad510c0234539c6c23f4ecfc784 Mon Sep 17 00:00:00 2001
From: Vinicius Stock <vinicius.stock@shopify.com>
Date: Tue, 12 Apr 2022 16:44:43 -0400
Subject: [PATCH 2/2] Handle multibyte strings

---
 lib/syntax_tree/parser.rb |  3 ++-
 test/node_test.rb         | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb
index 6bc27d5c..60923b57 100644
--- a/lib/syntax_tree/parser.rb
+++ b/lib/syntax_tree/parser.rb
@@ -166,7 +166,8 @@ def char_pos
     # This represents the current column we're in relative to the beginning of
     # the current line.
     def current_column
-      column - line_counts[lineno - 1].start
+      line = line_counts[lineno - 1]
+      line[column].to_i - line.start
     end
 
     # As we build up a list of tokens, we'll periodically need to go backwards
diff --git a/test/node_test.rb b/test/node_test.rb
index 4831081b..e412d648 100644
--- a/test/node_test.rb
+++ b/test/node_test.rb
@@ -1009,6 +1009,16 @@ def test_column_positions
       assert_node(Command, "command", source, at: at)
     end
 
+    def test_multibyte_column_positions
+      source = <<~SOURCE
+        puts "Congrats"
+        puts "🎉 🎉"
+      SOURCE
+
+      at = location(lines: 2..2, chars: 16..26, columns: 0..10)
+      assert_node(Command, "command", source, at: at)
+    end
+
     private
 
     def location(lines: 1..1, chars: 0..0, columns: 0..0)