From b507f04301019234c50a2bf4b5be5c54deffc72a Mon Sep 17 00:00:00 2001
From: Yew Ming Chen <fusion82@gmail.com>
Date: Wed, 13 Nov 2019 11:23:11 +0800
Subject: [PATCH 1/2] Support very large ctags file

- Use multiprocessing to improve performance.
- Reduce memory footprint with VimList to process a:items on the fly instead of loading the whole thing into memory
---
 autoload/pymatcher.py | 122 ++++++++++++++++++++++++++++--------------
 1 file changed, 82 insertions(+), 40 deletions(-)

diff --git a/autoload/pymatcher.py b/autoload/pymatcher.py
index a819dea..d9b9d9a 100644
--- a/autoload/pymatcher.py
+++ b/autoload/pymatcher.py
@@ -1,22 +1,82 @@
-import vim, re
+import vim
+import re
 import heapq
+from multiprocessing import Pool
+import os
 
 _escape = dict((c , "\\" + c) for c in ['^','$','.','{','}','(',')','[',']','\\','/','+'])
 
+class FilenameScore:
+    def __init__(self, prog):
+        self.prog = prog
+
+    def __call_(self, line):
+        # get filename via reverse find to improve performance
+        slashPos = line.rfind('/')
+
+        if slashPos != -1:
+            line = line[slashPos + 1:]
+
+        lineLower = line.casefold()
+        result = self.prog.search(lineLower)
+        if result:
+            score = result.end() - result.start() + 1
+            score = score + ( len(lineLower) + 1 ) / 100.0
+            score = score + ( len(line) + 1 ) / 1000.0
+            return (1000.0 / score, line)
+
+        return (0, line)
+
+class PathScore:
+    def __init__(self, prog, first_non_tab=False, until_last_tab=False):
+        self.prog = prog
+        self.first_non_tab = first_non_tab
+        self.until_last_tab = until_last_tab
+
+    def __call__(self, line):
+        lineLower = line.casefold()
+        if self.first_non_tab:
+            lineLower = lineLower.split('\t')[0]
+        if self.until_last_tab:
+            lineLower = lineLower.rsplit('\t')[0]
+        result = self.prog.search(lineLower)
+        if result:
+            score = result.end() - result.start() + 1
+            score = score + ( len(lineLower) + 1 ) / 100.0
+            return (1000.0 / score, line)
+
+        return (0, line)
+
+class VimList:
+    def __init__(self, name):
+        self.name = name
+        self.len = int(vim.eval('len({})'.format(self.name)))
+
+    def __len__(self):
+        return self.len
+
+    def __getitem__(self, index):
+        return vim.eval('{}[{}]'.format(self.name, index))
+
+    def __iter__(self):
+        for i in range(self.len):
+            yield self[i]
+
+
 def CtrlPPyMatch():
-    items = vim.eval('a:items')
+    items = VimList('a:items')
     astr = vim.eval('a:str')
-    lowAstr = astr.lower()
+    lowAstr = astr.casefold()
     limit = int(vim.eval('a:limit'))
     mmode = vim.eval('a:mmode')
     aregex = int(vim.eval('a:regex'))
     crfile = vim.eval('a:crfile')
 
-    if crfile in items and int(vim.eval("pymatcher#ShouldHideCurrentFile(a:ispath, a:crfile)")):
-        items.remove(crfile)
-
     rez = vim.eval('s:rez')
 
+    pool = Pool(max(1, os.cpu_count()-1))
+    chunksize = 4096
+
     regex = ''
     if aregex == 1:
         regex = astr
@@ -33,54 +93,36 @@ def CtrlPPyMatch():
         regex += escaped[-1]
     # because this IGNORECASE flag is extremely expensive we are converting everything to lower case
     # see https://github.com/FelikZ/ctrlp-py-matcher/issues/29
-    regex = regex.lower()
+    regex = regex.casefold()
 
     res = []
     prog = re.compile(regex)
 
-    def filename_score(line):
-        # get filename via reverse find to improve performance
-        slashPos = line.rfind('/')
-
-        if slashPos != -1:
-            line = line[slashPos + 1:]
-
-        lineLower = line.lower()
-        result = prog.search(lineLower)
-        if result:
-            score = result.end() - result.start() + 1
-            score = score + ( len(lineLower) + 1 ) / 100.0
-            score = score + ( len(line) + 1 ) / 1000.0
-            return 1000.0 / score
-
-        return 0
-
-    def path_score(line):
-        lineLower = line.lower()
-        result = prog.search(lineLower)
-        if result:
-            score = result.end() - result.start() + 1
-            score = score + ( len(lineLower) + 1 ) / 100.0
-            return 1000.0 / score
-
-        return 0
-
     if mmode == 'filename-only':
-        res = [(filename_score(line), line) for line in items]
+        filename_score = FilenameScore(prog)
+        res = pool.imap_unordered(filename_score, items, chunksize)
 
     elif mmode == 'first-non-tab':
-        res = [(path_score(line.split('\t')[0]), line) for line in items]
+        path_score = PathScore(prog, first_non_tab=True)
+        res = pool.imap_unordered(path_score, items, chunksize)
 
     elif mmode == 'until-last-tab':
-        res = [(path_score(line.rsplit('\t')[0]), line) for line in items]
+        path_score = PathScore(prog, until_last_tab=True)
+        res = pool.imap_unordered(path_score, items, chunksize)
 
     else:
-        res = [(path_score(line), line) for line in items]
+        path_score = PathScore(prog)
+        res = pool.imap_unordered(path_score, items, chunksize)
+
+    pool.close()
+
+    rez.extend((line for score, line in heapq.nlargest(limit, res) if score != 0))
 
-    rez.extend([line for score, line in heapq.nlargest(limit, res) if score != 0])
+    if int(vim.eval("pymatcher#ShouldHideCurrentFile(a:ispath, a:crfile)")) and crfile in rez:
+        rez.remove(crfile)
 
     # Use double quoted vim strings and escape \
-    vimrez = ['"' + line.replace('\\', '\\\\').replace('"', '\\"') + '"' for line in rez]
+    vimrez = ('"' + line.replace('\\', '\\\\').replace('"', '\\"') + '"' for line in rez)
 
     vim.command("let s:regex = '%s'" % regex)
     vim.command('let s:rez = [%s]' % ','.join(vimrez))

From 0b18c449677fbce0dafd468ed766040eb24452ea Mon Sep 17 00:00:00 2001
From: Chen Yew Ming <fusion82@gmail.com>
Date: Wed, 13 Nov 2019 13:21:05 +0800
Subject: [PATCH 2/2] Support Python2 and Windows

---
 autoload/pymatcher.py | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/autoload/pymatcher.py b/autoload/pymatcher.py
index d9b9d9a..565911e 100644
--- a/autoload/pymatcher.py
+++ b/autoload/pymatcher.py
@@ -1,11 +1,16 @@
 import vim
 import re
 import heapq
-from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
 import os
 
 _escape = dict((c , "\\" + c) for c in ['^','$','.','{','}','(',')','[',']','\\','/','+'])
 
+if hasattr(str, 'casefold'):
+    str2lower = lambda s: s.casefold()
+else:
+    str2lower = lambda s: s.lower()
+
 class FilenameScore:
     def __init__(self, prog):
         self.prog = prog
@@ -17,7 +22,7 @@ def __call_(self, line):
         if slashPos != -1:
             line = line[slashPos + 1:]
 
-        lineLower = line.casefold()
+        lineLower = str2lower(line)
         result = self.prog.search(lineLower)
         if result:
             score = result.end() - result.start() + 1
@@ -34,7 +39,7 @@ def __init__(self, prog, first_non_tab=False, until_last_tab=False):
         self.until_last_tab = until_last_tab
 
     def __call__(self, line):
-        lineLower = line.casefold()
+        lineLower = str2lower(line)
         if self.first_non_tab:
             lineLower = lineLower.split('\t')[0]
         if self.until_last_tab:
@@ -66,7 +71,7 @@ def __iter__(self):
 def CtrlPPyMatch():
     items = VimList('a:items')
     astr = vim.eval('a:str')
-    lowAstr = astr.casefold()
+    lowAstr = str2lower(astr)
     limit = int(vim.eval('a:limit'))
     mmode = vim.eval('a:mmode')
     aregex = int(vim.eval('a:regex'))
@@ -74,8 +79,14 @@ def CtrlPPyMatch():
 
     rez = vim.eval('s:rez')
 
-    pool = Pool(max(1, os.cpu_count()-1))
-    chunksize = 4096
+    pool = Pool(max(1, cpu_count()-1)) if os.name == 'posix' else None
+
+    def pool_map(func, items):
+        chunksize = 4096
+        if pool:
+            return pool.imap_unordered(func, items, chunksize)
+        else:
+            return (func(i) for i in items)
 
     regex = ''
     if aregex == 1:
@@ -93,28 +104,29 @@ def CtrlPPyMatch():
         regex += escaped[-1]
     # because this IGNORECASE flag is extremely expensive we are converting everything to lower case
     # see https://github.com/FelikZ/ctrlp-py-matcher/issues/29
-    regex = regex.casefold()
+    regex = str2lower(regex)
 
     res = []
     prog = re.compile(regex)
 
     if mmode == 'filename-only':
         filename_score = FilenameScore(prog)
-        res = pool.imap_unordered(filename_score, items, chunksize)
+        res = pool_map(filename_score, items)
 
     elif mmode == 'first-non-tab':
         path_score = PathScore(prog, first_non_tab=True)
-        res = pool.imap_unordered(path_score, items, chunksize)
+        res = pool_map(path_score, items)
 
     elif mmode == 'until-last-tab':
         path_score = PathScore(prog, until_last_tab=True)
-        res = pool.imap_unordered(path_score, items, chunksize)
+        res = pool_map(path_score, items)
 
     else:
         path_score = PathScore(prog)
-        res = pool.imap_unordered(path_score, items, chunksize)
+        res = pool_map(path_score, items)
 
-    pool.close()
+    if pool:
+        pool.close()
 
     rez.extend((line for score, line in heapq.nlargest(limit, res) if score != 0))