@@ -212,12 +212,15 @@ private function binarySearch(int $offset): int
212212
213213 /**
214214 * The 0-based column of that offset.
215+ *
216+ * Unlike offsets (which are byte-offsets), columns are computed based on Unicode
217+ * codepoints to provide a better experience.
215218 */
216219 public function getColumn (int $ offset ): int
217220 {
218221 $ line = $ this ->getLine ($ offset );
219222
220- return $ offset - $ this ->lineStarts [$ line ];
223+ return mb_strlen ( substr ( $ this -> string , $ this -> lineStarts [ $ line ], $ offset - $ this ->lineStarts [$ line ]), ' UTF-8 ' ) ;
221224 }
222225
223226 /**
@@ -237,7 +240,17 @@ public function getOffset(int $line, int $column = 0): int
237240 throw new \OutOfRangeException ('Column may not be negative. ' );
238241 }
239242
240- $ result = $ this ->lineStarts [$ line ] + $ column ;
243+ if ($ column === 0 ) {
244+ $ result = $ this ->lineStarts [$ line ];
245+ } else {
246+ $ lineContent = substr ($ this ->string , $ this ->lineStarts [$ line ], $ this ->lineStarts [$ line + 1 ] ?? null );
247+
248+ if ($ column > mb_strlen ($ lineContent , 'UTF-8 ' )) {
249+ throw new \OutOfRangeException ("Line $ line doesn't have $ column columns. " );
250+ }
251+
252+ $ result = $ this ->lineStarts [$ line ] + \strlen (mb_substr ($ lineContent , 0 , $ column , 'UTF-8 ' ));
253+ }
241254
242255 if ($ result > \strlen ($ this ->string ) || ($ line + 1 < \count ($ this ->lineStarts ) && $ result >= $ this ->lineStarts [$ line + 1 ])) {
243256 throw new \OutOfRangeException ("Line $ line doesn't have $ column columns. " );
0 commit comments