From 682e9c076b984864bd03973c69299cb673f53034 Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Tue, 11 Feb 2025 10:10:48 +0000
Subject: [PATCH 01/32] upgrade

---
 src/tools/rag/write_descriptions.py | 313 ++++++++++++++++++++--------
 1 file changed, 231 insertions(+), 82 deletions(-)

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index 4518593e..5cb4e1e1 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -1,123 +1,272 @@
+"""Functions to create an index of files for RAG."""
+
+import logging
 import os
+import sys
 from pathlib import Path
+
+import chromadb
+from dotenv import find_dotenv, load_dotenv
 from langchain.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
-from dotenv import load_dotenv, find_dotenv
-import chromadb
-import sys
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')))
-from src.utilities.util_functions import join_paths, read_coderrules
-from src.utilities.start_work_functions import CoderIgnore, file_folder_ignored
+from langchain_core.runnables.base import RunnableSequence
+
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")))
+from src.utilities.exceptions import MissingEnvironmentVariableError
+from src.utilities.util_functions import join_paths
 from src.utilities.llms import init_llms_mini
 
 
-load_dotenv(find_dotenv())
-work_dir = os.getenv("WORK_DIR")
+## Configure the logging level
+logging.basicConfig(level=logging.INFO)
 
 
-def is_code_file(file_path):
+def relevant_extension(file_path: Path, file_extension_constraint: set[str]) -> bool:
+    """Checker for whether file extension indicates a script."""
     # List of common code file extensions
-    code_extensions = {
-        '.js', '.jsx', '.ts', '.tsx', '.vue', '.py', '.rb', '.php', '.java', '.c', '.cpp', '.cs', '.go', '.swift',
-        '.kt', '.rs', '.htm','.html', '.css', '.scss', '.sass', '.less', '.prompt',
-    }
-    return file_path.suffix.lower() in code_extensions
+    return file_path.suffix.lower() in file_extension_constraint
 
 
 # read file content. place name of file in the top
-def get_content(file_path):
-    with open(file_path, 'r', encoding='utf-8') as file:
+def get_content(file_path: Path) -> str:
+    """Collect file name and content to return them together as string."""
+    with open(file_path, encoding="utf-8") as file:
         content = file.read()
-    content = file_path.name + '\n' + content
-    return content
-
-def collect_file_pathes(subfolders, work_dir):
-    """
-    Collect and return a list of allowed code files from the given subfolders
-    under the work_dir according to is_code_file criteria and .coderignore patterns.
-    """
-    allowed_files = []
-    for folder in subfolders:
-        for root, _, files in os.walk(work_dir + folder):
-            for file in files:
-                file_path = Path(root) / file
-                if not is_code_file(file_path):
-                    continue
-                relative_path_str = file_path.relative_to(work_dir).as_posix()
-                if file_folder_ignored(relative_path_str):
-                    continue
-                allowed_files.append(file_path)
-    return allowed_files
+    return file_path.name + "\n" + content
 
 
-def write_descriptions(subfolders_with_files=['/']):
-    all_files = collect_file_pathes(subfolders_with_files, work_dir)
+def evaluate_file(root: str, file: str, file_extension_constraint: set[str] | None, ignore: set[str]) -> Path | None:
+    """Return file path if the file is to be considered."""
+    file_path = Path(root).joinpath(file)
+    if len(ignore.intersection(file_path.parts)) > 0:
+        return None
+    if file_extension_constraint and relevant_extension(
+        file_path, file_extension_constraint=file_extension_constraint
+    ):
+        return file_path
+    return None
 
-    coderrules = read_coderrules()
 
-    prompt = ChatPromptTemplate.from_template(
-f"""First, get known with info about project (may be useful, may be not):
-
-'''
-{coderrules}
-'''
+def files_in_directory(
+    directories_with_files_to_describe: list[str | Path],
+    file_extension_constraint: set[str] | None,
+    ignore: set[str],
+) -> list[Path]:
+    """Fetch paths of files in directory."""
+    files_to_describe = []
+    for directory in directories_with_files_to_describe:
+        directory_files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
+        tmp = [
+            evaluate_file(
+                root=str(directory),
+                file=file,
+                file_extension_constraint=file_extension_constraint,
+                ignore=ignore,
+            )
+            for file in directory_files
+        ]
+        files_to_describe.extend(tmp)
+        for root, _, files in os.walk(directory):
+            tmp = [
+                evaluate_file(
+                    root=root,
+                    file=file,
+                    file_extension_constraint=file_extension_constraint,
+                    ignore=ignore,
+                )
+                for file in files
+            ]
+            files_to_describe.extend(tmp)
+    return files_to_describe
 
-Describe the code in 4 sentences or less, focusing only on important information from integration point of view.
-Write what file is responsible for.
 
-Go traight to the thing in description, without starting sentence.
+def save_file_description(file_path: Path, work_dir: str, description: str, file_description_dir: str) -> None:
+    """Save file description."""
+    file_name = file_path.relative_to(work_dir).as_posix().replace("/", "=")
+    output_path = join_paths(file_description_dir, f"{file_name}.txt")
+    with open(output_path, "w", encoding="utf-8") as out_file:
+        out_file.write(description)
 
-'''
-{{code}}
-'''
-"""
-    )
-    llms = init_llms_mini(tools=[], run_name='File Describer')
-    llm = llms[0]
-    chain = prompt | llm | StrOutputParser()
 
-    description_folder = join_paths(work_dir, '.clean_coder/files_and_folders_descriptions')
-    Path(description_folder).mkdir(parents=True, exist_ok=True)
+def output_descriptions(
+    files_to_describe: list[Path], chain: RunnableSequence, file_description_dir: str, work_dir: str
+) -> None:
+    """Generate & output file descriptions to designated directory in WORK_DIR."""
     # iterate over all files, take 8 files at once
     batch_size = 8
-    for i in range(0, len(all_files), batch_size):
-        files_iteration = all_files[i:i + batch_size]
+    for i in range(0, len(files_to_describe), batch_size):
+        files_iteration = [f for f in files_to_describe[i : i + batch_size] if f is not None]
         descriptions = chain.batch([get_content(file_path) for file_path in files_iteration])
-        print(descriptions)
+        logging.debug(descriptions)
+        [
+            save_file_description(
+                file_path=file_path,
+                work_dir=work_dir,
+                description=description,
+                file_description_dir=file_description_dir,
+            )
+            for file_path, description in zip(files_iteration, descriptions, strict=True)
+        ]
 
-        for file_path, description in zip(files_iteration, descriptions):
-            file_name = file_path.relative_to(work_dir).as_posix().replace('/', '=')
-            output_path = join_paths(description_folder, f"{file_name}.txt")
 
-            with open(output_path, 'w', encoding='utf-8') as out_file:
-                out_file.write(description)
+def produce_descriptions(
+    directories_with_files_to_describe: list[str | Path],
+    file_description_dir: str,
+    work_dir: str,
+    ignore: set[str],
+    file_extension_constraint: set[str] | None = None,
+) -> None:
+    """
+    Produce short descriptions of files. Store the descriptions in .clean_coder folder in WORK_DIR.
 
+    Inputs:
+        directories_with_files_to_describe: directories from which files are to be described.
+        file_description_dir: directory where generated file descriptions are to be saved to.
+        work_dir: project directory worked on with Clean Coder.
+        ignore: files and folders to ignore.
+        file_extension_constraint: The list of file extension types accepted, if it's provided.
 
-def upload_descriptions_to_vdb():
-    chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, '.clean_coder/chroma_base'))
-    collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
+    Example:
+        work_dir = os.getenv("WORK_DIR") # provide your own directory of choice if WORK_DIR is not set.
+        if not work_dir:
+            msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
+            raise MissingEnvironmentVariableError(msg)
+        file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
+        file_extension_constraint = {
+            ".js", ".jsx", ".ts", ".tsx", ".vue", ".py", ".rb", ".php", ".java", ".c", ".cpp", ".cs", ".go", ".swift",
+            ".kt", ".rs", ".htm",".html", ".css", ".scss", ".sass", ".less", ".prompt",
+        }
+        ignore = {".clean_coder", ".coderrules"}
+        produce_descriptions(directories_with_files_to_describe=[work_dir],
+                        file_description_dir=file_description_dir,
+                        work_dir=work_dir,
+                        file_extension_constraint=file_extension_constraint,
+                        ignore=ignore,
+                        )
+    """
+    files_to_describe = files_in_directory(
+        directories_with_files_to_describe=directories_with_files_to_describe,
+        file_extension_constraint=file_extension_constraint,
+        ignore=ignore,
+    )
 
-    collection = chroma_client.get_or_create_collection(
-        name=collection_name
+    prompt = ChatPromptTemplate.from_template(
+        """Describe the following code in 4 sentences or less, focusing only on important information from integration point of view.
+    Write what file is responsible for.\n\n'''\n{code}'''
+    """,
     )
 
-    # read files and upload to base
-    description_folder = join_paths(work_dir, '.clean_coder/files_and_folders_descriptions')
-    for root, _, files in os.walk(description_folder):
+    llms = init_llms_mini(tools=[], run_name="File Describer")
+    llm = llms[0]
+    chain = prompt | llm | StrOutputParser()
+    Path(file_description_dir).mkdir(parents=True, exist_ok=True)
+    output_descriptions(
+        files_to_describe=files_to_describe, work_dir=work_dir, chain=chain, file_description_dir=file_description_dir
+    )
+
+
+def upload_to_collection(collection: chromadb.PersistentClient, file_description_dir: str) -> None:
+    """Insert file information to chroma database."""
+    for root, _, files in os.walk(file_description_dir):
         for file in files:
             file_path = Path(root) / file
-            with open(file_path, 'r', encoding='utf-8') as file:
-                content = file.read()
+            with open(file_path, encoding="utf-8") as f:
+                content = f.read()
             collection.upsert(
                 documents=[
-                    content
+                    content,
                 ],
-                ids=[file_path.name.replace('=', '/').removesuffix(".txt")],
+                ids=[file_path.name.replace("=", "/").removesuffix(".txt")],
             )
 
 
-if __name__ == '__main__':
-    #provide optionally which subfolders needs to be checked, if you don't want to describe all project folder
-    write_descriptions(subfolders_with_files=['/'])
+def upload_descriptions_to_vdb(
+    chroma_collection_name: str,
+    work_dir: str,
+    file_description_dir: str,
+    vdb_location: str = ".clean_coder/chroma_base",
+) -> None:
+    """
+    Upload file descriptions to chroma database.
+
+    Inputs:
+        chroma_collection_name: name of the collection within Chroma vector database to save file descriptions in.
+        file_description_dir: directory where generated file descriptions are available.
+        work_dir: project directory worked on with Clean Coder.
+        vdb_location: (optional) location for storing the vector database.
+
+    Example:
+        work_dir = os.getenv("WORK_DIR") # provide your own directory of choice if WORK_DIR is not set.
+        if not work_dir:
+            msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
+            raise MissingEnvironmentVariableError(msg)
+        file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
+        file_extension_constraint = {
+            ".js", ".jsx", ".ts", ".tsx", ".vue", ".py", ".rb", ".php", ".java", ".c", ".cpp", ".cs", ".go", ".swift",
+            ".kt", ".rs", ".htm",".html", ".css", ".scss", ".sass", ".less", ".prompt",
+        }
+        ignore = {".clean_coder", ".coderrules"}
+        produce_descriptions(directories_with_files_to_describe=[work_dir],
+                        file_description_dir=file_description_dir,
+                        work_dir=work_dir,
+                        file_extension_constraint=file_extension_constraint,
+                        ignore=ignore,
+                        )
+        chroma_collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
+        upload_descriptions_to_vdb(chroma_collection_name=chroma_collection_name, work_dir=work_dir, file_description_dir=file_description_dir)
+    """
+    chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, vdb_location))
+    collection = chroma_client.get_or_create_collection(
+        name=chroma_collection_name,
+    )
+
+    # read files and upload to base
+    upload_to_collection(collection=collection, file_description_dir=file_description_dir)
+
 
-    upload_descriptions_to_vdb()
+if __name__ == "__main__":
+    # provide optionally which subfolders needs to be checked, if you don't want to describe all project folder
+    # load environment
+    load_dotenv(find_dotenv())
+    work_dir = os.getenv("WORK_DIR")
+    if not work_dir:
+        msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
+        raise MissingEnvironmentVariableError(msg)
+    file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
+    file_extension_constraint = {
+        ".js",
+        ".jsx",
+        ".ts",
+        ".tsx",
+        ".vue",
+        ".py",
+        ".rb",
+        ".php",
+        ".java",
+        ".c",
+        ".cpp",
+        ".cs",
+        ".go",
+        ".swift",
+        ".kt",
+        ".rs",
+        ".htm",
+        ".html",
+        ".css",
+        ".scss",
+        ".sass",
+        ".less",
+        ".prompt",
+    }
+    ignore = {".clean_coder", ".coderrules"}
+    produce_descriptions(
+        directories_with_files_to_describe=[work_dir],
+        file_description_dir=file_description_dir,
+        work_dir=work_dir,
+        file_extension_constraint=file_extension_constraint,
+        ignore=ignore,
+    )
+    chroma_collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
+    upload_descriptions_to_vdb(
+        chroma_collection_name=chroma_collection_name, work_dir=work_dir, file_description_dir=file_description_dir
+    )

From 983454560f2c9e80d7c3542c33caf8b042a83d9f Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Tue, 18 Feb 2025 16:05:08 +0000
Subject: [PATCH 02/32] custom exception message

---
 src/utilities/exceptions.py | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 src/utilities/exceptions.py

diff --git a/src/utilities/exceptions.py b/src/utilities/exceptions.py
new file mode 100644
index 00000000..e1c8f66d
--- /dev/null
+++ b/src/utilities/exceptions.py
@@ -0,0 +1,4 @@
+"""Custom exception messages."""
+
+class MissingEnvironmentVariableError(Exception):
+    """Enviromental variable missing."""

From aae7de3cb217f4f76ae43dd201ced5c719130109 Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Tue, 18 Feb 2025 16:05:25 +0000
Subject: [PATCH 03/32] response to comments

---
 src/tools/rag/write_descriptions.py | 53 +++++++++++++----------------
 1 file changed, 23 insertions(+), 30 deletions(-)

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index 5cb4e1e1..22a5e3ad 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -13,9 +13,9 @@
 
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")))
 from src.utilities.exceptions import MissingEnvironmentVariableError
-from src.utilities.util_functions import join_paths
 from src.utilities.llms import init_llms_mini
-
+from src.utilities.start_work_functions import file_folder_ignored
+from src.utilities.util_functions import join_paths
 
 ## Configure the logging level
 logging.basicConfig(level=logging.INFO)
@@ -35,13 +35,16 @@ def get_content(file_path: Path) -> str:
     return file_path.name + "\n" + content
 
 
-def evaluate_file(root: str, file: str, file_extension_constraint: set[str] | None, ignore: set[str]) -> Path | None:
+def add_to_indexing_if_relevant(root: str, file: str, file_extension_constraint: set[str] | None) -> Path | None:
     """Return file path if the file is to be considered."""
     file_path = Path(root).joinpath(file)
-    if len(ignore.intersection(file_path.parts)) > 0:
+    if file_folder_ignored(str(file_path)):
+        # ignore files and folders mentioned in .coderignore
         return None
-    if file_extension_constraint and relevant_extension(
-        file_path, file_extension_constraint=file_extension_constraint
+    if not file_extension_constraint:
+        return file_path
+    if relevant_extension(
+        file_path, file_extension_constraint=file_extension_constraint,
     ):
         return file_path
     return None
@@ -50,29 +53,26 @@ def evaluate_file(root: str, file: str, file_extension_constraint: set[str] | No
 def files_in_directory(
     directories_with_files_to_describe: list[str | Path],
     file_extension_constraint: set[str] | None,
-    ignore: set[str],
 ) -> list[Path]:
     """Fetch paths of files in directory."""
     files_to_describe = []
     for directory in directories_with_files_to_describe:
         directory_files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
         tmp = [
-            evaluate_file(
+            add_to_indexing_if_relevant(
                 root=str(directory),
                 file=file,
                 file_extension_constraint=file_extension_constraint,
-                ignore=ignore,
             )
             for file in directory_files
         ]
         files_to_describe.extend(tmp)
         for root, _, files in os.walk(directory):
             tmp = [
-                evaluate_file(
+                add_to_indexing_if_relevant(
                     root=root,
                     file=file,
                     file_extension_constraint=file_extension_constraint,
-                    ignore=ignore,
                 )
                 for file in files
             ]
@@ -80,8 +80,11 @@ def files_in_directory(
     return files_to_describe
 
 
-def save_file_description(file_path: Path, work_dir: str, description: str, file_description_dir: str) -> None:
+def save_file_description(file_path: Path, description: str, file_description_dir: str) -> None:
     """Save file description."""
+    if not work_dir:
+        msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
+        raise MissingEnvironmentVariableError(msg)
     file_name = file_path.relative_to(work_dir).as_posix().replace("/", "=")
     output_path = join_paths(file_description_dir, f"{file_name}.txt")
     with open(output_path, "w", encoding="utf-8") as out_file:
@@ -89,7 +92,7 @@ def save_file_description(file_path: Path, work_dir: str, description: str, file
 
 
 def output_descriptions(
-    files_to_describe: list[Path], chain: RunnableSequence, file_description_dir: str, work_dir: str
+    files_to_describe: list[Path], chain: RunnableSequence, file_description_dir: str,
 ) -> None:
     """Generate & output file descriptions to designated directory in WORK_DIR."""
     # iterate over all files, take 8 files at once
@@ -101,7 +104,6 @@ def output_descriptions(
         [
             save_file_description(
                 file_path=file_path,
-                work_dir=work_dir,
                 description=description,
                 file_description_dir=file_description_dir,
             )
@@ -112,8 +114,6 @@ def output_descriptions(
 def produce_descriptions(
     directories_with_files_to_describe: list[str | Path],
     file_description_dir: str,
-    work_dir: str,
-    ignore: set[str],
     file_extension_constraint: set[str] | None = None,
 ) -> None:
     """
@@ -122,7 +122,6 @@ def produce_descriptions(
     Inputs:
         directories_with_files_to_describe: directories from which files are to be described.
         file_description_dir: directory where generated file descriptions are to be saved to.
-        work_dir: project directory worked on with Clean Coder.
         ignore: files and folders to ignore.
         file_extension_constraint: The list of file extension types accepted, if it's provided.
 
@@ -139,15 +138,12 @@ def produce_descriptions(
         ignore = {".clean_coder", ".coderrules"}
         produce_descriptions(directories_with_files_to_describe=[work_dir],
                         file_description_dir=file_description_dir,
-                        work_dir=work_dir,
                         file_extension_constraint=file_extension_constraint,
-                        ignore=ignore,
                         )
     """
     files_to_describe = files_in_directory(
         directories_with_files_to_describe=directories_with_files_to_describe,
         file_extension_constraint=file_extension_constraint,
-        ignore=ignore,
     )
 
     prompt = ChatPromptTemplate.from_template(
@@ -161,7 +157,7 @@ def produce_descriptions(
     chain = prompt | llm | StrOutputParser()
     Path(file_description_dir).mkdir(parents=True, exist_ok=True)
     output_descriptions(
-        files_to_describe=files_to_describe, work_dir=work_dir, chain=chain, file_description_dir=file_description_dir
+        files_to_describe=files_to_describe, chain=chain, file_description_dir=file_description_dir
     )
 
 
@@ -182,7 +178,6 @@ def upload_to_collection(collection: chromadb.PersistentClient, file_description
 
 def upload_descriptions_to_vdb(
     chroma_collection_name: str,
-    work_dir: str,
     file_description_dir: str,
     vdb_location: str = ".clean_coder/chroma_base",
 ) -> None:
@@ -192,7 +187,6 @@ def upload_descriptions_to_vdb(
     Inputs:
         chroma_collection_name: name of the collection within Chroma vector database to save file descriptions in.
         file_description_dir: directory where generated file descriptions are available.
-        work_dir: project directory worked on with Clean Coder.
         vdb_location: (optional) location for storing the vector database.
 
     Example:
@@ -208,13 +202,15 @@ def upload_descriptions_to_vdb(
         ignore = {".clean_coder", ".coderrules"}
         produce_descriptions(directories_with_files_to_describe=[work_dir],
                         file_description_dir=file_description_dir,
-                        work_dir=work_dir,
                         file_extension_constraint=file_extension_constraint,
-                        ignore=ignore,
                         )
         chroma_collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
-        upload_descriptions_to_vdb(chroma_collection_name=chroma_collection_name, work_dir=work_dir, file_description_dir=file_description_dir)
+        upload_descriptions_to_vdb(chroma_collection_name=chroma_collection_name, file_description_dir=file_description_dir)
     """
+    work_dir = os.getenv("WORK_DIR")
+    if not work_dir:
+        msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
+        raise MissingEnvironmentVariableError(msg)
     chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, vdb_location))
     collection = chroma_client.get_or_create_collection(
         name=chroma_collection_name,
@@ -258,15 +254,12 @@ def upload_descriptions_to_vdb(
         ".less",
         ".prompt",
     }
-    ignore = {".clean_coder", ".coderrules"}
     produce_descriptions(
         directories_with_files_to_describe=[work_dir],
         file_description_dir=file_description_dir,
-        work_dir=work_dir,
         file_extension_constraint=file_extension_constraint,
-        ignore=ignore,
     )
     chroma_collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
     upload_descriptions_to_vdb(
-        chroma_collection_name=chroma_collection_name, work_dir=work_dir, file_description_dir=file_description_dir
+        chroma_collection_name=chroma_collection_name, file_description_dir=file_description_dir,
     )

From 838c88f52f4ea8b04c0e88ae7bb1831546a42a2f Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Sat, 22 Feb 2025 12:34:03 +0000
Subject: [PATCH 04/32] remove examples from docstrings. Remove 'if not
 workdir' from if __name__ == main

---
 src/tools/rag/write_descriptions.py | 37 -----------------------------
 1 file changed, 37 deletions(-)

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index 22a5e3ad..b1ebc4b1 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -124,22 +124,6 @@ def produce_descriptions(
         file_description_dir: directory where generated file descriptions are to be saved to.
         ignore: files and folders to ignore.
         file_extension_constraint: The list of file extension types accepted, if it's provided.
-
-    Example:
-        work_dir = os.getenv("WORK_DIR") # provide your own directory of choice if WORK_DIR is not set.
-        if not work_dir:
-            msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
-            raise MissingEnvironmentVariableError(msg)
-        file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
-        file_extension_constraint = {
-            ".js", ".jsx", ".ts", ".tsx", ".vue", ".py", ".rb", ".php", ".java", ".c", ".cpp", ".cs", ".go", ".swift",
-            ".kt", ".rs", ".htm",".html", ".css", ".scss", ".sass", ".less", ".prompt",
-        }
-        ignore = {".clean_coder", ".coderrules"}
-        produce_descriptions(directories_with_files_to_describe=[work_dir],
-                        file_description_dir=file_description_dir,
-                        file_extension_constraint=file_extension_constraint,
-                        )
     """
     files_to_describe = files_in_directory(
         directories_with_files_to_describe=directories_with_files_to_describe,
@@ -188,29 +172,8 @@ def upload_descriptions_to_vdb(
         chroma_collection_name: name of the collection within Chroma vector database to save file descriptions in.
         file_description_dir: directory where generated file descriptions are available.
         vdb_location: (optional) location for storing the vector database.
-
-    Example:
-        work_dir = os.getenv("WORK_DIR") # provide your own directory of choice if WORK_DIR is not set.
-        if not work_dir:
-            msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
-            raise MissingEnvironmentVariableError(msg)
-        file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
-        file_extension_constraint = {
-            ".js", ".jsx", ".ts", ".tsx", ".vue", ".py", ".rb", ".php", ".java", ".c", ".cpp", ".cs", ".go", ".swift",
-            ".kt", ".rs", ".htm",".html", ".css", ".scss", ".sass", ".less", ".prompt",
-        }
-        ignore = {".clean_coder", ".coderrules"}
-        produce_descriptions(directories_with_files_to_describe=[work_dir],
-                        file_description_dir=file_description_dir,
-                        file_extension_constraint=file_extension_constraint,
-                        )
-        chroma_collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
-        upload_descriptions_to_vdb(chroma_collection_name=chroma_collection_name, file_description_dir=file_description_dir)
     """
     work_dir = os.getenv("WORK_DIR")
-    if not work_dir:
-        msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
-        raise MissingEnvironmentVariableError(msg)
     chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, vdb_location))
     collection = chroma_client.get_or_create_collection(
         name=chroma_collection_name,

From 41b73dcda559e4fa444abf64b55aa9303ac2ff7e Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Sat, 22 Feb 2025 12:36:54 +0000
Subject: [PATCH 05/32] fix for 'if not work_dir

---
 src/tools/rag/write_descriptions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index b1ebc4b1..aedf6c40 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -174,6 +174,9 @@ def upload_descriptions_to_vdb(
         vdb_location: (optional) location for storing the vector database.
     """
     work_dir = os.getenv("WORK_DIR")
+    if not work_dir:
+        msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
+        raise MissingEnvironmentVariableError(msg)
     chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, vdb_location))
     collection = chroma_client.get_or_create_collection(
         name=chroma_collection_name,
@@ -188,9 +191,6 @@ def upload_descriptions_to_vdb(
     # load environment
     load_dotenv(find_dotenv())
     work_dir = os.getenv("WORK_DIR")
-    if not work_dir:
-        msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
-        raise MissingEnvironmentVariableError(msg)
     file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
     file_extension_constraint = {
         ".js",

From d2f8a67d460ac5ff25e9e1c94e66a67f76bb65f6 Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Sat, 22 Feb 2025 12:48:43 +0000
Subject: [PATCH 06/32] rename file_extension_constraint to code_extensions

---
 src/tools/rag/write_descriptions.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index aedf6c40..a45d28ba 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -21,10 +21,10 @@
 logging.basicConfig(level=logging.INFO)
 
 
-def relevant_extension(file_path: Path, file_extension_constraint: set[str]) -> bool:
+def relevant_extension(file_path: Path, code_extensions: set[str]) -> bool:
     """Checker for whether file extension indicates a script."""
     # List of common code file extensions
-    return file_path.suffix.lower() in file_extension_constraint
+    return file_path.suffix.lower() in code_extensions
 
 
 # read file content. place name of file in the top
@@ -35,16 +35,16 @@ def get_content(file_path: Path) -> str:
     return file_path.name + "\n" + content
 
 
-def add_to_indexing_if_relevant(root: str, file: str, file_extension_constraint: set[str] | None) -> Path | None:
+def add_to_indexing_if_relevant(root: str, file: str, code_extensions: set[str] | None) -> Path | None:
     """Return file path if the file is to be considered."""
     file_path = Path(root).joinpath(file)
     if file_folder_ignored(str(file_path)):
         # ignore files and folders mentioned in .coderignore
         return None
-    if not file_extension_constraint:
+    if not code_extensions:
         return file_path
     if relevant_extension(
-        file_path, file_extension_constraint=file_extension_constraint,
+        file_path, code_extensions=code_extensions,
     ):
         return file_path
     return None
@@ -52,7 +52,7 @@ def add_to_indexing_if_relevant(root: str, file: str, file_extension_constraint:
 
 def files_in_directory(
     directories_with_files_to_describe: list[str | Path],
-    file_extension_constraint: set[str] | None,
+    code_extensions: set[str] | None,
 ) -> list[Path]:
     """Fetch paths of files in directory."""
     files_to_describe = []
@@ -62,7 +62,7 @@ def files_in_directory(
             add_to_indexing_if_relevant(
                 root=str(directory),
                 file=file,
-                file_extension_constraint=file_extension_constraint,
+                code_extensions=code_extensions,
             )
             for file in directory_files
         ]
@@ -72,7 +72,7 @@ def files_in_directory(
                 add_to_indexing_if_relevant(
                     root=root,
                     file=file,
-                    file_extension_constraint=file_extension_constraint,
+                    code_extensions=code_extensions,
                 )
                 for file in files
             ]
@@ -114,7 +114,7 @@ def output_descriptions(
 def produce_descriptions(
     directories_with_files_to_describe: list[str | Path],
     file_description_dir: str,
-    file_extension_constraint: set[str] | None = None,
+    code_extensions: set[str] | None = None,
 ) -> None:
     """
     Produce short descriptions of files. Store the descriptions in .clean_coder folder in WORK_DIR.
@@ -123,11 +123,11 @@ def produce_descriptions(
         directories_with_files_to_describe: directories from which files are to be described.
         file_description_dir: directory where generated file descriptions are to be saved to.
         ignore: files and folders to ignore.
-        file_extension_constraint: The list of file extension types accepted, if it's provided.
+        code_extensions: The list of file extension types accepted, if it's provided.
     """
     files_to_describe = files_in_directory(
         directories_with_files_to_describe=directories_with_files_to_describe,
-        file_extension_constraint=file_extension_constraint,
+        code_extensions=code_extensions,
     )
 
     prompt = ChatPromptTemplate.from_template(
@@ -192,7 +192,7 @@ def upload_descriptions_to_vdb(
     load_dotenv(find_dotenv())
     work_dir = os.getenv("WORK_DIR")
     file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
-    file_extension_constraint = {
+    code_extensions = {
         ".js",
         ".jsx",
         ".ts",
@@ -220,7 +220,7 @@ def upload_descriptions_to_vdb(
     produce_descriptions(
         directories_with_files_to_describe=[work_dir],
         file_description_dir=file_description_dir,
-        file_extension_constraint=file_extension_constraint,
+        code_extensions=code_extensions,
     )
     chroma_collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
     upload_descriptions_to_vdb(

From 62b86c40c052e64208ca16d556be4ac25321874c Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Sat, 22 Feb 2025 12:58:04 +0000
Subject: [PATCH 07/32] make default file extensions as the baseline option

---
 src/tools/rag/write_descriptions.py | 31 ++++-------------------------
 1 file changed, 4 insertions(+), 27 deletions(-)

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index a45d28ba..5ec0a305 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -4,6 +4,7 @@
 import os
 import sys
 from pathlib import Path
+from typing import Literal
 
 import chromadb
 from dotenv import find_dotenv, load_dotenv
@@ -114,7 +115,7 @@ def output_descriptions(
 def produce_descriptions(
     directories_with_files_to_describe: list[str | Path],
     file_description_dir: str,
-    code_extensions: set[str] | None = None,
+    code_extensions: set[str] | Literal["default"] | None = "default",
 ) -> None:
     """
     Produce short descriptions of files. Store the descriptions in .clean_coder folder in WORK_DIR.
@@ -125,6 +126,8 @@ def produce_descriptions(
         ignore: files and folders to ignore.
         code_extensions: The list of file extension types accepted, if it's provided.
     """
+    if code_extensions == "default":
+        code_extensions = {".py", ".java", ".js", ".ts", ".html", ".css", ".scss", ".sql", ".json", ".xml"}
     files_to_describe = files_in_directory(
         directories_with_files_to_describe=directories_with_files_to_describe,
         code_extensions=code_extensions,
@@ -192,35 +195,9 @@ def upload_descriptions_to_vdb(
     load_dotenv(find_dotenv())
     work_dir = os.getenv("WORK_DIR")
     file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
-    code_extensions = {
-        ".js",
-        ".jsx",
-        ".ts",
-        ".tsx",
-        ".vue",
-        ".py",
-        ".rb",
-        ".php",
-        ".java",
-        ".c",
-        ".cpp",
-        ".cs",
-        ".go",
-        ".swift",
-        ".kt",
-        ".rs",
-        ".htm",
-        ".html",
-        ".css",
-        ".scss",
-        ".sass",
-        ".less",
-        ".prompt",
-    }
     produce_descriptions(
         directories_with_files_to_describe=[work_dir],
         file_description_dir=file_description_dir,
-        code_extensions=code_extensions,
     )
     chroma_collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
     upload_descriptions_to_vdb(

From 881f300822689f6ddcb50062fdcd2257e8e23de2 Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Sat, 22 Feb 2025 14:54:18 +0000
Subject: [PATCH 08/32] improve function names

---
 src/tools/rag/write_descriptions.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index 5ec0a305..c3cd017b 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -51,7 +51,7 @@ def add_to_indexing_if_relevant(root: str, file: str, code_extensions: set[str]
     return None
 
 
-def files_in_directory(
+def find_files_to_describe(
     directories_with_files_to_describe: list[str | Path],
     code_extensions: set[str] | None,
 ) -> list[Path]:
@@ -92,7 +92,7 @@ def save_file_description(file_path: Path, description: str, file_description_di
         out_file.write(description)
 
 
-def output_descriptions(
+def describe_files(
     files_to_describe: list[Path], chain: RunnableSequence, file_description_dir: str,
 ) -> None:
     """Generate & output file descriptions to designated directory in WORK_DIR."""
@@ -128,7 +128,7 @@ def produce_descriptions(
     """
     if code_extensions == "default":
         code_extensions = {".py", ".java", ".js", ".ts", ".html", ".css", ".scss", ".sql", ".json", ".xml"}
-    files_to_describe = files_in_directory(
+    files_to_describe = find_files_to_describe(
         directories_with_files_to_describe=directories_with_files_to_describe,
         code_extensions=code_extensions,
     )
@@ -143,7 +143,7 @@ def produce_descriptions(
     llm = llms[0]
     chain = prompt | llm | StrOutputParser()
     Path(file_description_dir).mkdir(parents=True, exist_ok=True)
-    output_descriptions(
+    describe_files(
         files_to_describe=files_to_describe, chain=chain, file_description_dir=file_description_dir
     )
 

From d02ea9504291945aeb032e944cdbc9402027542a Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Sat, 22 Feb 2025 15:01:12 +0000
Subject: [PATCH 09/32] debug

---
 src/tools/rag/write_descriptions.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index c3cd017b..d855572f 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -21,6 +21,10 @@
 ## Configure the logging level
 logging.basicConfig(level=logging.INFO)
 
+# load environment
+load_dotenv(find_dotenv())  # load environment variables from .env file
+work_dir = os.getenv("WORK_DIR")
+
 
 def relevant_extension(file_path: Path, code_extensions: set[str]) -> bool:
     """Checker for whether file extension indicates a script."""
@@ -192,8 +196,6 @@ def upload_descriptions_to_vdb(
 if __name__ == "__main__":
     # provide optionally which subfolders needs to be checked, if you don't want to describe all project folder
     # load environment
-    load_dotenv(find_dotenv())
-    work_dir = os.getenv("WORK_DIR")
     file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
     produce_descriptions(
         directories_with_files_to_describe=[work_dir],

From 563055957753955e1128976a08d6f885ac3b925e Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Mon, 24 Feb 2025 10:01:26 +0000
Subject: [PATCH 10/32] remove use of work_dir checks and missing environment
 variable error.

---
 src/tools/rag/write_descriptions.py | 8 +-------
 src/utilities/exceptions.py         | 4 ----
 2 files changed, 1 insertion(+), 11 deletions(-)
 delete mode 100644 src/utilities/exceptions.py

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index d855572f..5641e4a0 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -13,7 +13,6 @@
 from langchain_core.runnables.base import RunnableSequence
 
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")))
-from src.utilities.exceptions import MissingEnvironmentVariableError
 from src.utilities.llms import init_llms_mini
 from src.utilities.start_work_functions import file_folder_ignored
 from src.utilities.util_functions import join_paths
@@ -87,9 +86,7 @@ def find_files_to_describe(
 
 def save_file_description(file_path: Path, description: str, file_description_dir: str) -> None:
     """Save file description."""
-    if not work_dir:
-        msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
-        raise MissingEnvironmentVariableError(msg)
+    work_dir = os.getenv("WORK_DIR")
     file_name = file_path.relative_to(work_dir).as_posix().replace("/", "=")
     output_path = join_paths(file_description_dir, f"{file_name}.txt")
     with open(output_path, "w", encoding="utf-8") as out_file:
@@ -181,9 +178,6 @@ def upload_descriptions_to_vdb(
         vdb_location: (optional) location for storing the vector database.
     """
     work_dir = os.getenv("WORK_DIR")
-    if not work_dir:
-        msg = "WORK_DIR variable not provided. Please add WORK_DIR to .env file"
-        raise MissingEnvironmentVariableError(msg)
     chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, vdb_location))
     collection = chroma_client.get_or_create_collection(
         name=chroma_collection_name,
diff --git a/src/utilities/exceptions.py b/src/utilities/exceptions.py
deleted file mode 100644
index e1c8f66d..00000000
--- a/src/utilities/exceptions.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""Custom exception messages."""
-
-class MissingEnvironmentVariableError(Exception):
-    """Enviromental variable missing."""

From d0d60939f2a9e9e319c8710290a059f9984cc8f0 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Tue, 25 Feb 2025 11:32:30 +0100
Subject: [PATCH 11/32] numpy in req

---
 requirements.txt                          |   3 +-
 src/utilities/syntax_checker_functions.py | 426 ++++------------------
 2 files changed, 82 insertions(+), 347 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index e0e73e24..7dff3c4d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,4 +30,5 @@ pyright==1.1.390
 ruff==0.8.2
 httpx==0.27.2
 questionary==2.1.0
-pathspec==0.12.1
\ No newline at end of file
+pathspec==0.12.1
+numpy==1.26.4
\ No newline at end of file
diff --git a/src/utilities/syntax_checker_functions.py b/src/utilities/syntax_checker_functions.py
index d6b778fc..8ca50316 100644
--- a/src/utilities/syntax_checker_functions.py
+++ b/src/utilities/syntax_checker_functions.py
@@ -216,355 +216,89 @@ def parse_yaml(yaml_string):
 
 if __name__ == "__main__":
     code = """
-"use client";
-
-import { useState, useEffect, useRef } from "react";
-import Image from "next/image";
-import { useRouter } from "next/navigation";
-import ProfileCard from "./components/ProfileCard";
-import PopupNotification from "./components/PopupNotification";
-
-interface ProfileItem {
-  uuid: string;
-  full_name: string;
-  short_bio?: string;
-  bio?: string;
-}
-
-export default function Home() {
-  const [activeTab, setActiveTab] = useState<'Explore' | 'Received' | 'Sent' | 'Matches'>('Explore');
-  const [exploreItems, setExploreItems] = useState<ProfileItem[]>([]);
-  const [receivedItems, setReceivedItems] = useState<ProfileItem[]>([]);
-  const [sentItems, setSentItems] = useState<ProfileItem[]>([]);
-  const [matchedItems, setMatchedItems] = useState<ProfileItem[]>([]);
-  const [error, setError] = useState('');
-  const [notification, setNotification] = useState<{ message: string, type: 'positive' | 'negative' } | null>(null);
-  const [loading, setLoading] = useState(false);
-  const [iconLoading, setIconLoading] = useState(true);
-  const [skip, setSkip] = useState(0);
-  const [limit] = useState(10);
-  const [totalExploreItems, setTotalExploreItems] = useState(0);
-  const sentinelRef = useRef<HTMLDivElement | null>(null);
-  const router = useRouter();
-
-  function goToProfile(uuid: string) {
-    const userRole = localStorage.getItem('role');
-    if (userRole === "intern") {
-      router.push(`/campaign/${uuid}`);
-    } else {
-      router.push(`/intern/${uuid}`);
-    }
-  }
-
-  async function handleConnect(uuid: string) {
-    setLoading(true);
-    try {
-      const token = localStorage.getItem('token');
-      if (!token) throw new Error('Authentication token not found');
-      const response = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/invitations/create/${uuid}`, {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          'Authorization': `Bearer ${token}`,
-        },
-      });
-      if (!response.ok) throw new Error('Failed to create invitation');
-      setNotification({ message: 'Invitation sent successfully', type: 'positive' });
-
-      // Optimistically update the explore list
-      setExploreItems((prevItems) => prevItems.filter(item => item.uuid !== uuid));
-    } catch (err: any) {
-      setNotification({ message: err.message, type: 'negative' });
-    } finally {
-      setLoading(false);
-      setTimeout(() => setNotification(null), 3000);
-    }
-  }
-
-  async function handleAccept(invitationId: string) {
-    setLoading(true);
-    try {
-      const token = localStorage.getItem('token');
-      if (!token) throw new Error('Authentication token not found');
-      const response = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/invitations/accept/${invitationId}`, {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          'Authorization': `Bearer ${token}`,
-        },
-      });
-      if (!response.ok) throw new Error('Failed to accept invitation');
-      setNotification({ message: 'Invitation accepted successfully', type: 'positive' });
-      setReceivedItems((prevItems) => prevItems.filter(item => item.invitation_id !== invitationId));
-    } catch (err: any) {
-      setNotification({ message: err.message, type: 'negative' });
-    } finally {
-      setLoading(false);
-      setTimeout(() => setNotification(null), 3000);
-    }
-  }
-
-  async function handleReject(invitationId: string) {
-    setLoading(true);
-    try {
-      const token = localStorage.getItem('token');
-      if (!token) throw new Error('Authentication token not found');
-      const response = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/invitations/reject/${invitationId}`, {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          'Authorization': `Bearer ${token}`,
-        },
-      });
-      if (!response.ok) throw new Error('Failed to reject invitation');
-      setNotification({ message: 'Invitation rejected successfully', type: 'positive' });
-      setReceivedItems((prevItems) => prevItems.filter(item => item.invitation_id !== invitationId));
-    } catch (err: any) {
-      setNotification({ message: err.message, type: 'negative' });
-    } finally {
-      setLoading(false);
-      setTimeout(() => setNotification(null), 3000);
-    }
-  }
-
-  async function handleCancel(invitationId: string) {
-    setLoading(true);
-    try {
-      const token = localStorage.getItem('token');
-      if (!token) throw new Error('Authentication token not found');
-      const response = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/invitations/cancel/${invitationId}`, {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          'Authorization': `Bearer ${token}`,
-        },
-      });
-      if (!response.ok) throw new Error('Failed to cancel invitation');
-      setNotification({ message: 'Invitation canceled successfully', type: 'positive' });
-      setSentItems((prevItems) => prevItems.filter(item => item.invitation_id !== invitationId));
-    } catch (err: any) {
-      setNotification({ message: err.message, type: 'negative' });
-    } finally {
-      setLoading(false);
-      setTimeout(() => setNotification(null), 3000);
-    }
-  }
-  async function fetchExplore() {
-    try {
-      const userRole = localStorage.getItem('role');
-      const token = localStorage.getItem('token');
-      
-      if (!token) {
-        throw new Error('Authentication token not found');
-      }
-
-      if (!userRole) {
-        throw new Error('User role not found');
-      }
-
-      const url = `${process.env.NEXT_PUBLIC_API_URL}${
-        userRole === "intern"
-          ? '/fetch-campaigns-for-main-page'
-          : '/fetch-interns-for-main-page'
-      }?skip=${skip}&limit=${limit}`;
-
-      console.log('Fetching from URL:', url);
-      
-      const response = await fetch(url, {
-        method: 'GET',
-        headers: {
-          'Content-Type': 'application/json',
-          'Authorization': `Bearer ${token}`,
-        },
-      });
-
-      if (!response.ok) {
-        const errorData = await response.json();
-        throw new Error(errorData.detail || 'Failed to fetch explore items');
+<template>
+  <div class="form-container">
+    <Notification v-show="notificationMessage" :message="notificationMessage" :type="notificationType" />
+    <h1>Change Password</h1>
+    <form @submit.prevent="handleSubmit">
+      <div>
+        <label for="current-password">Current Password:</label>
+        <input type="password" v-model="currentPassword" required />
+      </div>
+      <div>
+        <label for="new-password">New Password:</label>
+        <input type="password" v-model="newPassword" required />
+      </div>
+      <div>
+        <label for="confirm-new-password">Confirm New Password:</label>
+        <input type="password" v-model="confirmNewPassword" required />
+      </div>
+      <button type="submit">Change Password</button>
+    </form>
+  </div>
+</template>
+
+<script>
+import { useAuthStore } from '@/stores/auth';
+import Notification from '@/components/Notification.vue';
+
+export default {
+  components: {
+    Notification,
+  },
+  data() {
+    return {
+      currentPassword: '',
+      newPassword: '',
+      confirmNewPassword: '',
+      apiUrl: import.meta.env.VITE_API_URL,
+      notificationMessage: '',
+      notificationType: 'positive',
+    };
+  },
+  methods: {
+    async handleSubmit() {
+      if (this.newPassword !== this.confirmNewPassword) {
+        this.notificationMessage = 'New passwords do not match';
+        this.notificationType = 'negative';
+        return;
       }
 
-      const data = await response.json();
-      setExploreItems(prev => [...prev, ...(data.items || [])]);
-      setTotalExploreItems(data.total || 0);
-    } catch (err: any) {
-      console.error('Fetch error:', err);
-      setError(err.message);
-      setTimeout(() => setError(''), 3000);
-    }
-  }
-
-  async function fetchReceived() {
-    try {
-      const token = localStorage.getItem('token');
-      const response = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/invitations/received`, {
-        method: 'GET',
-        headers: {
-          'Content-Type': 'application/json',
-          'Authorization': `Bearer ${token}`,
-        },
-      });
-      if (!response.ok) throw new Error('Failed to fetch received invitations');
-      const data = await response.json();
-      setReceivedItems(data.items || []);
-    } catch (err: any) {
-      setError(err.message);
-      setTimeout(() => setError(''), 3000);
-    }
-  }
-
-  async function fetchSent() {
-    try {
-      const token = localStorage.getItem('token');
-      const response = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/invitations/sent`, {
-        method: 'GET',
-        headers: {
-          'Content-Type': 'application/json',
-          'Authorization': `Bearer ${token}`,
-        },
-      });
-      if (!response.ok) throw new Error('Failed to fetch sent invitations');
-      const data = await response.json();
-      setSentItems(data.items || []);
-    } catch (err: any) {
-      setError(err.message);
-      setTimeout(() => setError(''), 3000);
-    }
-  }
-  async function fetchMatches() {
-    try {
-      const token = localStorage.getItem('token');
-      if (!token) throw new Error('Authentication token not found');
-      const response = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/matches`, {
-        method: 'GET',
-        headers: {
-          'Content-Type': 'application/json',
-          'Authorization': `Bearer ${token}`,
-        },
-      });
-      if (!response.ok) throw new Error('Failed to fetch matches');
-      const data = await response.json();
-      setMatchedItems(data.items || []);
-    } catch (err: any) {
-      setError(err.message);
-      setTimeout(() => setError(''), 3000);
-    }
-  }
-
-
-  useEffect(() => {
-    const token = localStorage.getItem('token');
-    if (!token) {
-      setError('Please login first');
-      return;
-    }
-    // Initial load of the first page
-    fetchExplore().then(() => setSkip(prev => prev + limit));
-  }, []);
-
-  // Infinite scroll: Observe the sentinel at the bottom of the Explore list
-  useEffect(() => {
-    const observer = new IntersectionObserver((entries) => {
-      const [entry] = entries;
-      // If sentinel is in view and we have more items to fetch
-      if (entry.isIntersecting && skip < totalExploreItems) {
-        // Fetch the next batch
-        fetchExplore().then(() => {
-          setSkip(prev => prev + limit);
+      const formData = new FormData();
+      formData.append('current_password', this.currentPassword);
+      formData.append('new_password', this.newPassword);
+
+      try {
+        const response = await fetch(this.apiUrl + '/change-password', {
+          method: 'POST',
+          headers: {
+            'Authorization': `Bearer ${localStorage.getItem('token')}`,
+          },
+          body: formData,
         });
+        if (!response.ok) {
+          throw new Error('Password change failed');
+        }
+        this.notificationMessage = 'Password changed successfully';
+        this.notificationType = 'positive';
+        setTimeout(() => {
+          this.notificationMessage = '';
+          this.$router.push('/');
+        }, 2000);
+      } catch (error) {
+        console.error('Error:', error);
+        this.notificationMessage = error.message;
+        this.notificationType = 'negative';
+      } finally {
+        this.currentPassword = '';
+        this.newPassword = '';
+        this.confirmNewPassword = '';
       }
-    });
-
-    if (sentinelRef.current) {
-      observer.observe(sentinelRef.current);
-    }
-
-    // Cleanup
-    return () => {
-      if (sentinelRef.current) {
-        observer.unobserve(sentinelRef.current);
-      }
-    };
-  }, [skip, totalExploreItems, limit]);
-
-  const handleTabClick = (tab: 'Explore' | 'Received' | 'Sent' | 'Matches') => {
-    setActiveTab(tab);
-    if (tab === 'Explore') fetchExplore();
-    if (tab === 'Received') fetchReceived();
-    if (tab === 'Sent') fetchSent();
-    if (tab === 'Matches') fetchMatches();
-  };
-
-  let listToRender: ProfileItem[] = [];
-  if (activeTab === 'Explore') listToRender = exploreItems;
-  if (activeTab === 'Received') listToRender = receivedItems;
-  if (activeTab === 'Sent') listToRender = sentItems;
-  if (activeTab === 'Matches') listToRender = matchedItems;
-  return (
-    <main className="max-w-2xl mx-auto px-4 py-6 sm:px-6 lg:px-8">
-      <header className="flex items-center justify-between mb-8">
-          <button className="w-10 h-10 rounded-full bg-[#EEEEEE] flex items-center justify-center">
-            <Image 
-              src="/profile.svg" 
-              alt="Profile" 
-              width={24} 
-              height={24} 
-              onError={(e) => e.currentTarget.src = '/fallback-icon.svg'} // Fallback icon
-            />
-          </button>
-        <h1 className="text-xl font-semibold">Glovn</h1>
-      </header>
-
-      <nav className="flex p-1 mb-8 justify-center bg-[#F5F5F5]/40 rounded-full max-w-md mx-auto">
-        {["Explore", "Received", "Sent", "Matches"].map((tab) => (
-          <button
-            key={tab}
-            onClick={() => handleTabClick(tab as 'Explore' | 'Received' | 'Sent' | 'Matches')}
-            className={`flex-1 px-6 py-2.5 rounded-full text-sm transition-all duration-300 ${
-              activeTab === tab
-                ? "bg-white font-medium text-black shadow-sm text-[15px]"
-                : "text-gray-400/80 hover:text-gray-500"
-            }`}
-          >
-            {tab}
-          </button>
-        ))}
-      </nav>
-
-      {error && (
-        <div className="mb-6 py-2 px-4 w-full text-center bg-[#FFF2F2] text-[#FF0000] text-[14px] rounded-full">
-          {error}
-        </div>
-      )}
-
-      <section className="space-y-4">
-        {listToRender.length === 0 ? (
-          <div className="text-center py-4 text-gray-500">No items found</div>
-        ) : (
-          listToRender.map((item) => (
-            <ProfileCard
-              key={item.uuid}
-              item={item}
-              onConnect={handleConnect}
-              onAccept={handleAccept}
-              onReject={handleReject}
-              onCancel={handleCancel}
-              activeTab={activeTab}
-            />
-          ))
-        )}
-      </section>
-
-      {/*<div ref={sentinelRef} style={{ height: "1px" }} />*/}
-      {notification && (
-        <PopupNotification
-          message={notification.message}
-          type={notification.type}
-          onClose={() => setNotification(null)}
-        />
-      )}
-    </main>
-  );
-}
+    },
+  },
+};
+</script>
 
+<style scoped src="@/assets/styles/forms.css"></style>
 """
-    print(parse_tsx(code))
\ No newline at end of file
+    print(parse_vue_basic(code))
\ No newline at end of file

From 5660f79ad96b8f3cffd59dae0013f4a84bfa5083 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Tue, 25 Feb 2025 11:53:47 +0100
Subject: [PATCH 12/32] code splitter added

---
 src/tools/rag/code_splitter.py | 54 ++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 5 deletions(-)

diff --git a/src/tools/rag/code_splitter.py b/src/tools/rag/code_splitter.py
index bfa71650..09bdcfc1 100644
--- a/src/tools/rag/code_splitter.py
+++ b/src/tools/rag/code_splitter.py
@@ -4,9 +4,7 @@
 )
 
 
-python_splitter = RecursiveCharacterTextSplitter.from_language(
-    language=Language.PYTHON, chunk_size=1000, chunk_overlap=0
-)
+
 
 
 code = """
@@ -310,9 +308,55 @@ def load_system_message():
             project_rules=read_coderrules()
     ))
 """
+extension_to_language = {
+    'cpp': 'cpp',
+    'go': 'go',
+    'java': 'java',
+    'kt': 'kotlin',
+    'js': 'js',
+    'jsx': 'js',
+    'vue': 'js',
+    'ts': 'ts',
+    'tsx': 'ts',
+    'mjs': 'js',
+    'cjs': 'js',
+    'php': 'php',
+    'proto': 'proto',
+    'py': 'python',
+    'rst': 'rst',
+    'rb': 'ruby',
+    'rs': 'rust',
+    'scala': 'scala',
+    'swift': 'swift',
+    'md': 'markdown',
+    'tex': 'latex',
+    'html': 'html',
+    'sol': 'sol',
+    'cs': 'csharp',
+    'cob': 'cobol',
+    'c': 'c',
+    'lua': 'lua',
+    'pl': 'perl',
+    'hs': 'haskell',
+    'ex': 'elixir',
+    'ps1': 'powershell',
+    'json': 'json',
+    'xml': 'xml',
+    'bash': 'powershell',
+    'zsh': 'powershell',
+    'sh': 'powershell',
+    'dockerfile': 'proto',
+}
+def split_code(code, extension, chunk_size=1000):
+    language = extension_to_language.get(extension)
+    if not language:
+        return
+    splitter = RecursiveCharacterTextSplitter.from_language(
+        language=Language(language), chunk_size=chunk_size, chunk_overlap=0
+    )
+    return splitter.split_text(code)
 
-splitted = python_splitter.split_text(code)
-print(RecursiveCharacterTextSplitter.get_separators_for_language(Language.PYTHON))
+splitted = split_code(code, "py")
 for doc in splitted:
     print(doc)
     print("###")

From b65614a7200b12e015486b0651ecfce93993ca95 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Tue, 25 Feb 2025 11:58:50 +0100
Subject: [PATCH 13/32] code splitter added

---
 src/tools/rag/code_splitter.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/tools/rag/code_splitter.py b/src/tools/rag/code_splitter.py
index 09bdcfc1..cd8ade29 100644
--- a/src/tools/rag/code_splitter.py
+++ b/src/tools/rag/code_splitter.py
@@ -3,10 +3,6 @@
     RecursiveCharacterTextSplitter,
 )
 
-
-
-
-
 code = """
 from langchain_openai.chat_models import ChatOpenAI
 from langchain_community.chat_models import ChatOllama
@@ -347,7 +343,10 @@ def load_system_message():
     'sh': 'powershell',
     'dockerfile': 'proto',
 }
-def split_code(code, extension, chunk_size=1000):
+
+
+def split_code(code: str, extension: str, chunk_size: int = 1000):
+    """Splits code for smaller elements as functions. That allows to describe functions for semantic retrieval tool."""
     language = extension_to_language.get(extension)
     if not language:
         return
@@ -356,6 +355,7 @@ def split_code(code, extension, chunk_size=1000):
     )
     return splitter.split_text(code)
 
+
 splitted = split_code(code, "py")
 for doc in splitted:
     print(doc)

From 13f5ed1c08a5497fd8139cf8ebd9cace1ab405f1 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Wed, 26 Feb 2025 11:43:59 +0100
Subject: [PATCH 14/32] PR tempalte

---
 .github/PULL_REQUEST_TEMPLATE/template.md | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 .github/PULL_REQUEST_TEMPLATE/template.md

diff --git a/.github/PULL_REQUEST_TEMPLATE/template.md b/.github/PULL_REQUEST_TEMPLATE/template.md
new file mode 100644
index 00000000..cfb25108
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE/template.md
@@ -0,0 +1,11 @@
+## Pull Request Template
+
+### Description
+Please provide a detailed description of the changes made in this pull request.
+
+### Related Issue
+If this pull request addresses an existing issue, please reference it here (e.g., "Fixes #123").
+
+### Checklist
+- [ ] I have tested these changes locally.
+- [ ] I'm making contribution to the `dev` branch. Direct contributions to `master` are not allowed. Don't worry, they will be merged to `master` on the nearest release.

From c17adcba2030f37e432b0a929b447ee6b6b48358 Mon Sep 17 00:00:00 2001
From: Radek Kowalski <r.kowalski.pyzdry@gmail.com>
Date: Wed, 26 Feb 2025 11:25:54 +0000
Subject: [PATCH 15/32] restore previous prompt

---
 src/tools/rag/write_descriptions.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index 5641e4a0..c67c51bc 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -15,7 +15,7 @@
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")))
 from src.utilities.llms import init_llms_mini
 from src.utilities.start_work_functions import file_folder_ignored
-from src.utilities.util_functions import join_paths
+from src.utilities.util_functions import join_paths, read_coderrules
 
 ## Configure the logging level
 logging.basicConfig(level=logging.INFO)
@@ -133,11 +133,19 @@ def produce_descriptions(
         directories_with_files_to_describe=directories_with_files_to_describe,
         code_extensions=code_extensions,
     )
-
+    coderrules = read_coderrules()
     prompt = ChatPromptTemplate.from_template(
-        """Describe the following code in 4 sentences or less, focusing only on important information from integration point of view.
-    Write what file is responsible for.\n\n'''\n{code}'''
-    """,
+f"""First, get known with info about project (may be useful, may be not):
+'''
+{coderrules}
+'''
+Describe the code in 4 sentences or less, focusing only on important information from integration point of view.
+Write what file is responsible for.
+Go traight to the thing in description, without starting sentence.
+'''
+{{code}}
+'''
+""",
     )
 
     llms = init_llms_mini(tools=[], run_name="File Describer")

From b8352ac677af8004f2c030cc201e46d24250fab2 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Thu, 27 Feb 2025 14:51:43 +0100
Subject: [PATCH 16/32] write descriptions - old code restored

---
 .github/PULL_REQUEST_TEMPLATE/template.md |   3 +
 src/tools/rag/code_splitter.py            |   9 +-
 src/tools/rag/write_descriptions.py       | 297 ++++++++++------------
 3 files changed, 141 insertions(+), 168 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE/template.md b/.github/PULL_REQUEST_TEMPLATE/template.md
index cfb25108..fb2040e4 100644
--- a/.github/PULL_REQUEST_TEMPLATE/template.md
+++ b/.github/PULL_REQUEST_TEMPLATE/template.md
@@ -3,6 +3,9 @@
 ### Description
 Please provide a detailed description of the changes made in this pull request.
 
+### How to use
+If you created new functionality, please describe how it could be enabled and used.
+
 ### Related Issue
 If this pull request addresses an existing issue, please reference it here (e.g., "Fixes #123").
 
diff --git a/src/tools/rag/code_splitter.py b/src/tools/rag/code_splitter.py
index cd8ade29..c7247c01 100644
--- a/src/tools/rag/code_splitter.py
+++ b/src/tools/rag/code_splitter.py
@@ -356,7 +356,8 @@ def split_code(code: str, extension: str, chunk_size: int = 1000):
     return splitter.split_text(code)
 
 
-splitted = split_code(code, "py")
-for doc in splitted:
-    print(doc)
-    print("###")
+if __name__ == "__main__":
+    splitted = split_code(code, "py")
+    for doc in splitted:
+        print(doc)
+        print("###")
diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index c67c51bc..11b1a631 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -1,209 +1,178 @@
-"""Functions to create an index of files for RAG."""
-
-import logging
 import os
-import sys
 from pathlib import Path
-from typing import Literal
-
-import chromadb
-from dotenv import find_dotenv, load_dotenv
 from langchain.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables.base import RunnableSequence
-
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")))
-from src.utilities.llms import init_llms_mini
-from src.utilities.start_work_functions import file_folder_ignored
+from dotenv import load_dotenv, find_dotenv
+import chromadb
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')))
 from src.utilities.util_functions import join_paths, read_coderrules
+from src.utilities.start_work_functions import CoderIgnore, file_folder_ignored
+from src.utilities.llms import init_llms_mini
+from src.tools.rag.code_splitter import split_code
 
-## Configure the logging level
-logging.basicConfig(level=logging.INFO)
 
-# load environment
-load_dotenv(find_dotenv())  # load environment variables from .env file
+load_dotenv(find_dotenv())
 work_dir = os.getenv("WORK_DIR")
 
 
-def relevant_extension(file_path: Path, code_extensions: set[str]) -> bool:
-    """Checker for whether file extension indicates a script."""
+def is_code_file(file_path):
     # List of common code file extensions
+    code_extensions = {
+        '.js', '.jsx', '.ts', '.tsx', '.vue', '.py', '.rb', '.php', '.java', '.c', '.cpp', '.cs', '.go', '.swift',
+        '.kt', '.rs', '.htm','.html', '.css', '.scss', '.sass', '.less', '.prompt',
+    }
     return file_path.suffix.lower() in code_extensions
 
 
 # read file content. place name of file in the top
-def get_content(file_path: Path) -> str:
-    """Collect file name and content to return them together as string."""
-    with open(file_path, encoding="utf-8") as file:
+def get_content(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
         content = file.read()
-    return file_path.name + "\n" + content
-
-
-def add_to_indexing_if_relevant(root: str, file: str, code_extensions: set[str] | None) -> Path | None:
-    """Return file path if the file is to be considered."""
-    file_path = Path(root).joinpath(file)
-    if file_folder_ignored(str(file_path)):
-        # ignore files and folders mentioned in .coderignore
-        return None
-    if not code_extensions:
-        return file_path
-    if relevant_extension(
-        file_path, code_extensions=code_extensions,
-    ):
-        return file_path
-    return None
-
-
-def find_files_to_describe(
-    directories_with_files_to_describe: list[str | Path],
-    code_extensions: set[str] | None,
-) -> list[Path]:
-    """Fetch paths of files in directory."""
-    files_to_describe = []
-    for directory in directories_with_files_to_describe:
-        directory_files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
-        tmp = [
-            add_to_indexing_if_relevant(
-                root=str(directory),
-                file=file,
-                code_extensions=code_extensions,
-            )
-            for file in directory_files
-        ]
-        files_to_describe.extend(tmp)
-        for root, _, files in os.walk(directory):
-            tmp = [
-                add_to_indexing_if_relevant(
-                    root=root,
-                    file=file,
-                    code_extensions=code_extensions,
-                )
-                for file in files
-            ]
-            files_to_describe.extend(tmp)
-    return files_to_describe
-
-
-def save_file_description(file_path: Path, description: str, file_description_dir: str) -> None:
-    """Save file description."""
-    work_dir = os.getenv("WORK_DIR")
-    file_name = file_path.relative_to(work_dir).as_posix().replace("/", "=")
-    output_path = join_paths(file_description_dir, f"{file_name}.txt")
-    with open(output_path, "w", encoding="utf-8") as out_file:
-        out_file.write(description)
-
-
-def describe_files(
-    files_to_describe: list[Path], chain: RunnableSequence, file_description_dir: str,
-) -> None:
-    """Generate & output file descriptions to designated directory in WORK_DIR."""
-    # iterate over all files, take 8 files at once
-    batch_size = 8
-    for i in range(0, len(files_to_describe), batch_size):
-        files_iteration = [f for f in files_to_describe[i : i + batch_size] if f is not None]
-        descriptions = chain.batch([get_content(file_path) for file_path in files_iteration])
-        logging.debug(descriptions)
-        [
-            save_file_description(
-                file_path=file_path,
-                description=description,
-                file_description_dir=file_description_dir,
-            )
-            for file_path, description in zip(files_iteration, descriptions, strict=True)
-        ]
-
+    content = file_path.name + '\n' + content
+    return content
 
-def produce_descriptions(
-    directories_with_files_to_describe: list[str | Path],
-    file_description_dir: str,
-    code_extensions: set[str] | Literal["default"] | None = "default",
-) -> None:
+def collect_file_pathes(subfolders, work_dir):
     """
-    Produce short descriptions of files. Store the descriptions in .clean_coder folder in WORK_DIR.
-
-    Inputs:
-        directories_with_files_to_describe: directories from which files are to be described.
-        file_description_dir: directory where generated file descriptions are to be saved to.
-        ignore: files and folders to ignore.
-        code_extensions: The list of file extension types accepted, if it's provided.
+    Collect and return a list of allowed code files from the given subfolders
+    under the work_dir according to is_code_file criteria and .coderignore patterns.
     """
-    if code_extensions == "default":
-        code_extensions = {".py", ".java", ".js", ".ts", ".html", ".css", ".scss", ".sql", ".json", ".xml"}
-    files_to_describe = find_files_to_describe(
-        directories_with_files_to_describe=directories_with_files_to_describe,
-        code_extensions=code_extensions,
-    )
+    allowed_files = []
+    for folder in subfolders:
+        for root, _, files in os.walk(work_dir + folder):
+            for file in files:
+                file_path = Path(root) / file
+                if not is_code_file(file_path):
+                    continue
+                relative_path_str = file_path.relative_to(work_dir).as_posix()
+                if file_folder_ignored(relative_path_str):
+                    continue
+                allowed_files.append(file_path)
+    return allowed_files
+
+
+def write_file_descriptions(subfolders_with_files=['/']):
+    all_files = collect_file_pathes(subfolders_with_files, work_dir)
+
     coderrules = read_coderrules()
+
     prompt = ChatPromptTemplate.from_template(
 f"""First, get known with info about project (may be useful, may be not):
+
 '''
 {coderrules}
 '''
+
 Describe the code in 4 sentences or less, focusing only on important information from integration point of view.
 Write what file is responsible for.
-Go traight to the thing in description, without starting sentence.
+
+Go straight to the thing in description, without starting sentence.
+
 '''
 {{code}}
 '''
-""",
+"""
     )
-
-    llms = init_llms_mini(tools=[], run_name="File Describer")
+    llms = init_llms_mini(tools=[], run_name='File Describer')
     llm = llms[0]
     chain = prompt | llm | StrOutputParser()
-    Path(file_description_dir).mkdir(parents=True, exist_ok=True)
-    describe_files(
-        files_to_describe=files_to_describe, chain=chain, file_description_dir=file_description_dir
+
+    description_folder = join_paths(work_dir, '.clean_coder/files_and_folders_descriptions')
+    Path(description_folder).mkdir(parents=True, exist_ok=True)
+    # iterate over all files, take 8 files at once and descrive files in batch
+    batch_size = 8
+    for i in range(0, len(all_files), batch_size):
+        files_iteration = all_files[i:i + batch_size]
+        descriptions = chain.batch([get_content(file_path) for file_path in files_iteration])
+        print(descriptions)
+
+        for file_path, description in zip(files_iteration, descriptions):
+            file_name = file_path.relative_to(work_dir).as_posix().replace('/', '=')
+            output_path = join_paths(description_folder, f"{file_name}.txt")
+
+            with open(output_path, 'w', encoding='utf-8') as out_file:
+                out_file.write(description)
+
+
+
+def write_file_chunks_descriptions(subfolders_with_files=['/']):
+    all_files = collect_file_pathes(subfolders_with_files, work_dir)
+
+    coderrules = read_coderrules()
+
+    prompt = ChatPromptTemplate.from_template(
+f"""First, get known with info about project (may be useful, may be not):
+
+'''
+{coderrules}
+'''
+
+For the reference, you have code of whole file here:
+
+'''
+{{file_code}}
+'''
+
+Describe provided function/file_chunk in 4 sentences or less, focusing only on important information from integration point of view.
+Write what function/file chunk is responsible for.
+
+Go straight to the thing in description, without starting sentence.
+
+Here is file chunk to describe:
+'''
+{{chunk_code}}
+'''
+"""
     )
+    llms = init_llms_mini(tools=[], run_name='File Describer')
+    llm = llms[0]
+    chain = prompt | llm | StrOutputParser()
 
+    description_folder = join_paths(work_dir, '.clean_coder/files_and_folders_descriptions')
+    Path(description_folder).mkdir(parents=True, exist_ok=True)
+    # iterate chunks inside of the file
+    for file_path in all_files:
+        file_content = get_content(file_path)
+        # get file extenstion
+        extension = file_path.split('.')[-1]
+        file_chunks = split_code(file_content, extension)
+        descriptions = chain.batch(file_chunks)
+        print(descriptions)
 
-def upload_to_collection(collection: chromadb.PersistentClient, file_description_dir: str) -> None:
-    """Insert file information to chroma database."""
-    for root, _, files in os.walk(file_description_dir):
-        for file in files:
-            file_path = Path(root) / file
-            with open(file_path, encoding="utf-8") as f:
-                content = f.read()
-            collection.upsert(
-                documents=[
-                    content,
-                ],
-                ids=[file_path.name.replace("=", "/").removesuffix(".txt")],
-            )
+        for file_path, description in zip(files_iteration, descriptions):
+            file_name = file_path.relative_to(work_dir).as_posix().replace('/', '=')
+            output_path = join_paths(description_folder, f"{file_name}.txt")
 
+            with open(output_path, 'w', encoding='utf-8') as out_file:
+                out_file.write(description)
 
-def upload_descriptions_to_vdb(
-    chroma_collection_name: str,
-    file_description_dir: str,
-    vdb_location: str = ".clean_coder/chroma_base",
-) -> None:
-    """
-    Upload file descriptions to chroma database.
 
-    Inputs:
-        chroma_collection_name: name of the collection within Chroma vector database to save file descriptions in.
-        file_description_dir: directory where generated file descriptions are available.
-        vdb_location: (optional) location for storing the vector database.
-    """
-    work_dir = os.getenv("WORK_DIR")
-    chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, vdb_location))
+
+def upload_descriptions_to_vdb():
+    chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, '.clean_coder/chroma_base'))
+    collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
+
     collection = chroma_client.get_or_create_collection(
-        name=chroma_collection_name,
+        name=collection_name
     )
 
     # read files and upload to base
-    upload_to_collection(collection=collection, file_description_dir=file_description_dir)
+    description_folder = join_paths(work_dir, '.clean_coder/files_and_folders_descriptions')
+    for root, _, files in os.walk(description_folder):
+        for file in files:
+            file_path = Path(root) / file
+            with open(file_path, 'r', encoding='utf-8') as file:
+                content = file.read()
+            collection.upsert(
+                documents=[
+                    content
+                ],
+                ids=[file_path.name.replace('=', '/').removesuffix(".txt")],
+            )
 
 
-if __name__ == "__main__":
-    # provide optionally which subfolders needs to be checked, if you don't want to describe all project folder
-    # load environment
-    file_description_dir = join_paths(work_dir, ".clean_coder/workdir_file_descriptions")
-    produce_descriptions(
-        directories_with_files_to_describe=[work_dir],
-        file_description_dir=file_description_dir,
-    )
-    chroma_collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
-    upload_descriptions_to_vdb(
-        chroma_collection_name=chroma_collection_name, file_description_dir=file_description_dir,
-    )
+if __name__ == '__main__':
+    #provide optionally which subfolders needs to be checked, if you don't want to describe all project folder
+    write_file_descriptions(subfolders_with_files=['/'])
+
+    upload_descriptions_to_vdb()
\ No newline at end of file

From 1e0c448f5786477727555bd3d8414a1ecef85d1e Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Fri, 28 Feb 2025 09:16:20 +0100
Subject: [PATCH 17/32] write chunk descriptions started to work

---
 src/prompts/describe_file_chunks.prompt | 21 +++++++++++++
 src/tools/rag/write_descriptions.py     | 40 +++++++------------------
 2 files changed, 31 insertions(+), 30 deletions(-)
 create mode 100644 src/prompts/describe_file_chunks.prompt

diff --git a/src/prompts/describe_file_chunks.prompt b/src/prompts/describe_file_chunks.prompt
new file mode 100644
index 00000000..a10f7368
--- /dev/null
+++ b/src/prompts/describe_file_chunks.prompt
@@ -0,0 +1,21 @@
+First, get known with info about project (may be useful, may be not):
+
+'''
+{coderrules}
+'''
+
+For the reference, you have code of whole file here:
+
+'''
+{file_code}
+'''
+
+Describe provided function/file_chunk in 4 sentences or less, focusing only on important information from integration point of view.
+Write what function/file chunk is responsible for.
+
+Go straight to the thing in description, without starting sentence.
+
+Here is file chunk to describe:
+'''
+{chunk_code}
+'''
\ No newline at end of file
diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index 11b1a631..d4cb3351 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -29,7 +29,7 @@ def is_code_file(file_path):
 def get_content(file_path):
     with open(file_path, 'r', encoding='utf-8') as file:
         content = file.read()
-    content = file_path.name + '\n' + content
+    content = file_path.name + '\n\n' + content
     return content
 
 def collect_file_pathes(subfolders, work_dir):
@@ -97,33 +97,13 @@ def write_file_descriptions(subfolders_with_files=['/']):
 
 def write_file_chunks_descriptions(subfolders_with_files=['/']):
     all_files = collect_file_pathes(subfolders_with_files, work_dir)
-
     coderrules = read_coderrules()
 
-    prompt = ChatPromptTemplate.from_template(
-f"""First, get known with info about project (may be useful, may be not):
-
-'''
-{coderrules}
-'''
-
-For the reference, you have code of whole file here:
-
-'''
-{{file_code}}
-'''
+    grandparent_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+    with open(f"{grandparent_dir}/prompts/describe_file_chunks.prompt", "r") as f:
+        chunks_describe_template = f.read()
 
-Describe provided function/file_chunk in 4 sentences or less, focusing only on important information from integration point of view.
-Write what function/file chunk is responsible for.
-
-Go straight to the thing in description, without starting sentence.
-
-Here is file chunk to describe:
-'''
-{{chunk_code}}
-'''
-"""
-    )
+    prompt = ChatPromptTemplate.from_template(chunks_describe_template)
     llms = init_llms_mini(tools=[], run_name='File Describer')
     llm = llms[0]
     chain = prompt | llm | StrOutputParser()
@@ -134,20 +114,19 @@ def write_file_chunks_descriptions(subfolders_with_files=['/']):
     for file_path in all_files:
         file_content = get_content(file_path)
         # get file extenstion
-        extension = file_path.split('.')[-1]
+        extension = file_path.suffix.lstrip('.')
         file_chunks = split_code(file_content, extension)
-        descriptions = chain.batch(file_chunks)
+        descriptions = chain.batch([{'coderrules': coderrules, 'file_code': file_content, 'chunk_code': chunk} for chunk in file_chunks])
         print(descriptions)
 
-        for file_path, description in zip(files_iteration, descriptions):
-            file_name = file_path.relative_to(work_dir).as_posix().replace('/', '=')
+        for nr, description in enumerate(descriptions):
+            file_name = f"{file_path.relative_to(work_dir).as_posix().replace('/', '=')}_chunk{nr}"
             output_path = join_paths(description_folder, f"{file_name}.txt")
 
             with open(output_path, 'w', encoding='utf-8') as out_file:
                 out_file.write(description)
 
 
-
 def upload_descriptions_to_vdb():
     chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, '.clean_coder/chroma_base'))
     collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
@@ -174,5 +153,6 @@ def upload_descriptions_to_vdb():
 if __name__ == '__main__':
     #provide optionally which subfolders needs to be checked, if you don't want to describe all project folder
     write_file_descriptions(subfolders_with_files=['/'])
+    write_file_chunks_descriptions()
 
     upload_descriptions_to_vdb()
\ No newline at end of file

From 17dbdb3e11d55fa7205a99824cdd6b96c96a581e Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Fri, 28 Feb 2025 09:24:38 +0100
Subject: [PATCH 18/32] write chunk descriptions started to work

---
 .github/PULL_REQUEST_TEMPLATE/template.md | 1 +
 src/prompts/describe_file_chunks.prompt   | 2 --
 src/tools/rag/write_descriptions.py       | 3 +++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE/template.md b/.github/PULL_REQUEST_TEMPLATE/template.md
index fb2040e4..0fb59ae7 100644
--- a/.github/PULL_REQUEST_TEMPLATE/template.md
+++ b/.github/PULL_REQUEST_TEMPLATE/template.md
@@ -11,4 +11,5 @@ If this pull request addresses an existing issue, please reference it here (e.g.
 
 ### Checklist
 - [ ] I have tested these changes locally.
+- [ ] I used docstrings on the begin of every function I created to describe it. Both humans and AI will have no problem to understand my code.
 - [ ] I'm making contribution to the `dev` branch. Direct contributions to `master` are not allowed. Don't worry, they will be merged to `master` on the nearest release.
diff --git a/src/prompts/describe_file_chunks.prompt b/src/prompts/describe_file_chunks.prompt
index a10f7368..0ebefc5f 100644
--- a/src/prompts/describe_file_chunks.prompt
+++ b/src/prompts/describe_file_chunks.prompt
@@ -1,11 +1,9 @@
 First, get known with info about project (may be useful, may be not):
-
 '''
 {coderrules}
 '''
 
 For the reference, you have code of whole file here:
-
 '''
 {file_code}
 '''
diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index d4cb3351..0f4a36d8 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -96,6 +96,8 @@ def write_file_descriptions(subfolders_with_files=['/']):
 
 
 def write_file_chunks_descriptions(subfolders_with_files=['/']):
+    """Writes descriptions of whole file chunks in codebase. Gets list of whole files to describe, divides files
+    into chunks and describes each chunk separately."""
     all_files = collect_file_pathes(subfolders_with_files, work_dir)
     coderrules = read_coderrules()
 
@@ -128,6 +130,7 @@ def write_file_chunks_descriptions(subfolders_with_files=['/']):
 
 
 def upload_descriptions_to_vdb():
+    """Uploads descriptions, created by write_file_chunks_descriptions, into vector database."""
     chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, '.clean_coder/chroma_base'))
     collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
 

From 05d5b86a36b5cc0e3cdca7132cb7328c209bc89e Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Fri, 28 Feb 2025 10:51:20 +0100
Subject: [PATCH 19/32] repairing chunk describing

---
 src/tools/rag/code_splitter.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/tools/rag/code_splitter.py b/src/tools/rag/code_splitter.py
index c7247c01..a3dd6d90 100644
--- a/src/tools/rag/code_splitter.py
+++ b/src/tools/rag/code_splitter.py
@@ -349,7 +349,8 @@ def split_code(code: str, extension: str, chunk_size: int = 1000):
     """Splits code for smaller elements as functions. That allows to describe functions for semantic retrieval tool."""
     language = extension_to_language.get(extension)
     if not language:
-        return
+        print(f'problem with extension {extension}')
+        return []
     splitter = RecursiveCharacterTextSplitter.from_language(
         language=Language(language), chunk_size=chunk_size, chunk_overlap=0
     )

From d522b677076e61baf198d0feca89b9d30d5751f4 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Mon, 3 Mar 2025 14:45:42 +0100
Subject: [PATCH 20/32] researcher improved

---
 manager.py                           |  4 ++--
 src/agents/debugger_agent.py         |  4 ++--
 src/agents/executor_agent.py         |  4 ++--
 src/agents/frontend_feedback.py      |  4 ++--
 src/agents/planner_agent.py          |  4 ++--
 src/agents/researcher_agent.py       |  6 +++---
 src/prompts/researcher_system.prompt | 31 ++++++++++++++++------------
 src/tools/rag/code_splitter.py       |  1 -
 src/tools/rag/retrieval.py           | 12 +++++++++--
 src/tools/rag/write_descriptions.py  |  3 +++
 src/utilities/llms.py                |  2 +-
 src/utilities/manager_utils.py       |  4 ++--
 12 files changed, 47 insertions(+), 32 deletions(-)

diff --git a/manager.py b/manager.py
index afdbe311..9c60d830 100644
--- a/manager.py
+++ b/manager.py
@@ -20,7 +20,7 @@
 from src.utilities.langgraph_common_functions import call_model, call_tool, multiple_tools_msg, no_tools_msg, empty_message_msg
 from src.utilities.start_project_functions import set_up_dot_clean_coder_dir
 from src.utilities.util_functions import join_paths
-from src.utilities.llms import init_llms
+from src.utilities.llms import init_llms_medium_intelligence
 from src.utilities.print_formatters import print_formatted
 import json
 import os
@@ -36,7 +36,7 @@ def __init__(self):
         self.work_dir = os.getenv("WORK_DIR")
         set_up_dot_clean_coder_dir(self.work_dir)
         self.tools = self.prepare_tools()
-        self.llms = init_llms(tools=self.tools, run_name="Manager")
+        self.llms = init_llms_medium_intelligence(tools=self.tools, run_name="Manager")
         self.manager = self.setup_workflow()
         self.saved_messages_path = join_paths(self.work_dir, ".clean_coder/manager_messages.json")
 
diff --git a/src/agents/debugger_agent.py b/src/agents/debugger_agent.py
index 5adf5741..9dff2782 100644
--- a/src/agents/debugger_agent.py
+++ b/src/agents/debugger_agent.py
@@ -17,7 +17,7 @@
     read_coderrules,
     convert_images,
 )
-from src.utilities.llms import init_llms
+from src.utilities.llms import init_llms_medium_intelligence
 from src.utilities.langgraph_common_functions import (
     call_model, call_tool, ask_human, after_ask_human_condition, multiple_tools_msg, no_tools_msg, agent_looped_human_help,
 )
@@ -50,7 +50,7 @@ class Debugger():
     def __init__(self, files, work_dir, human_feedback, image_paths, playwright_code=None):
         self.work_dir = work_dir
         self.tools = prepare_tools(work_dir)
-        self.llms = init_llms(self.tools, "Debugger")
+        self.llms = init_llms_medium_intelligence(self.tools, "Debugger")
         self.system_message = SystemMessage(
             content=system_prompt_template.format(project_rules=read_coderrules())
         )
diff --git a/src/agents/executor_agent.py b/src/agents/executor_agent.py
index 5b40c6e6..d873ba45 100644
--- a/src/agents/executor_agent.py
+++ b/src/agents/executor_agent.py
@@ -7,7 +7,7 @@
 from langgraph.graph import StateGraph, END
 from dotenv import load_dotenv, find_dotenv
 from langchain.tools import tool
-from src.utilities.llms import init_llms
+from src.utilities.llms import init_llms_medium_intelligence
 from src.utilities.print_formatters import print_formatted, print_error
 from src.utilities.util_functions import (
     check_file_contents, exchange_file_contents, bad_tool_call_looped
@@ -43,7 +43,7 @@ class Executor():
     def __init__(self, files, work_dir):
         self.work_dir = work_dir
         self.tools = prepare_tools(work_dir)
-        self.llms = init_llms(self.tools, "Executor")
+        self.llms = init_llms_medium_intelligence(self.tools, "Executor")
         self.system_message = SystemMessage(
             content=system_prompt_template
         )
diff --git a/src/agents/frontend_feedback.py b/src/agents/frontend_feedback.py
index 430397ba..f4da34cf 100644
--- a/src/agents/frontend_feedback.py
+++ b/src/agents/frontend_feedback.py
@@ -1,6 +1,6 @@
 import os
 from langchain_core.messages import HumanMessage
-from src.utilities.llms import init_llms
+from src.utilities.llms import init_llms_medium_intelligence
 from src.utilities.start_work_functions import read_frontend_feedback_story
 import base64
 import textwrap
@@ -9,7 +9,7 @@
 from pydantic import BaseModel, Field
 
 
-llms = init_llms(run_name="Frontend Feedback")
+llms = init_llms_medium_intelligence(run_name="Frontend Feedback")
 
 llm = llms[0].with_fallbacks(llms[1:])
 
diff --git a/src/agents/planner_agent.py b/src/agents/planner_agent.py
index 15e916f4..007e23bb 100644
--- a/src/agents/planner_agent.py
+++ b/src/agents/planner_agent.py
@@ -7,7 +7,7 @@
 from src.utilities.langgraph_common_functions import after_ask_human_condition
 from src.utilities.user_input import user_input
 from src.utilities.graphics import LoadingAnimation
-from src.utilities.llms import init_llms_high_intelligence, init_llms_mini, init_llms
+from src.utilities.llms import init_llms_high_intelligence, init_llms_mini, init_llms_medium_intelligence
 import os
 
 
@@ -15,7 +15,7 @@
 
 llms_planners = init_llms_high_intelligence(run_name="Planner")
 llm_strong = llms_planners[0].with_fallbacks(llms_planners[1:])
-llms_middle_strength = init_llms(run_name="Plan finalizer")
+llms_middle_strength = init_llms_medium_intelligence(run_name="Plan finalizer")
 llm_middle_strength = llms_middle_strength[0].with_fallbacks(llms_middle_strength[1:])
 llms_controller = init_llms_mini(run_name="Plan Files Controller")
 llm_controller = llms_controller[0].with_fallbacks(llms_controller[1:])
diff --git a/src/agents/researcher_agent.py b/src/agents/researcher_agent.py
index c96ed5df..bdea126a 100644
--- a/src/agents/researcher_agent.py
+++ b/src/agents/researcher_agent.py
@@ -13,7 +13,7 @@
     call_model, call_tool, ask_human, after_ask_human_condition, no_tools_msg
 )
 from src.utilities.print_formatters import print_formatted
-from src.utilities.llms import init_llms_mini
+from src.utilities.llms import init_llms_medium_intelligence
 import os
 
 
@@ -27,7 +27,7 @@
 @tool
 def final_response_researcher(
         files_to_work_on: Annotated[List[str], "List of existing files to potentially introduce changes"],
-        reference_files: Annotated[List[str], "List of code files useful as a reference without images"],
+        reference_files: Annotated[List[str], "List of code files useful as a reference. There are files where similar task been implemented already."],
         template_images: Annotated[List[str], "List of template images"]):
     """That tool outputs list of files programmer will need to change and paths to graphical patterns if some.
     Use that tool only when you 100% sure you found all the files programmer will need to modify.
@@ -65,7 +65,7 @@ def __init__(self, work_dir):
         self.tools = [see_file, list_dir, final_response_researcher]
         if vdb_available():
             self.tools.append(retrieve_files_by_semantic_query)
-        self.llms = init_llms_mini(self.tools, "Researcher")
+        self.llms = init_llms_medium_intelligence(self.tools, "Researcher")
 
         # workflow definition
         researcher_workflow = StateGraph(AgentState)
diff --git a/src/prompts/researcher_system.prompt b/src/prompts/researcher_system.prompt
index bd5d036b..e53aaaa0 100644
--- a/src/prompts/researcher_system.prompt
+++ b/src/prompts/researcher_system.prompt
@@ -1,21 +1,26 @@
-As a curious filesystem researcher, examine files thoroughly, prioritizing comprehensive checks. 
-You checking a lot of different folders looking around for interesting files (hey, you are very curious!) before giving the final answer.
-The more folders/files you will check, the more they will pay you.
-When you discover significant dependencies from one file to another, ensure to inspect both.
-Important: you are can not modify any files! You are reasearching only, but modifications will introduce another guys. Do not execute the task, just prepare ground for it's execution.
-Your final selection should include files needed to be modified or needed as reference for a programmer 
-(for example to see how code in similar file implemented). 
-Avoid recommending unseen or non-existent files in final response.
-
-You need to point out all files programmer needed to see to execute the task and only that task. Task is:
+As a curious filesystem researcher, thoroughly inspect the files for a task by following these steps:
+
+1. Break down the task to identify which parts of the application are responsible for executing it. Identify the root of the problem.
+
+2. Search through various folders to find all necessary files needed to modify for completing the task. Explore numerous folders and files to maximize your understanding.
+
+3. When you find significant dependencies between files, examine both thoroughly.
+
+4. Remember, you are only researching. Do not modify any files; modifications will be handled by others. Just prepare the groundwork for task execution.
+
+5. Also identify files that need to be used as a reference for a programmer. Reference files should include examples where similar tasks have been solved or similar coding tools been used and can serve as code guidance.
+
+6. Only include files that exist and are necessary for the task. You must not provide information about files you haven’t seen or that don’t exist.
+
+Lastly, list all files the programmer needs to see to execute the task and only include those relevant to this specific task:
+
 '''
 {task}
 '''
 
-Here is some additional info about project:
+Here's some additional information about the project:
 '''
 {project_rules}
 '''
 
-First, provide reasoning about results of your previous action. Think what do you need to find now in order to accomplish the task.
-Next, call tool(s). You can use up to 3 tool cals simultaniousely to speed up research.
\ No newline at end of file
+First, think about what you need to find to accomplish the task based on past actions. Then, use up to 3 tools simultaneously to gather this information.
\ No newline at end of file
diff --git a/src/tools/rag/code_splitter.py b/src/tools/rag/code_splitter.py
index a3dd6d90..9201e389 100644
--- a/src/tools/rag/code_splitter.py
+++ b/src/tools/rag/code_splitter.py
@@ -349,7 +349,6 @@ def split_code(code: str, extension: str, chunk_size: int = 1000):
     """Splits code for smaller elements as functions. That allows to describe functions for semantic retrieval tool."""
     language = extension_to_language.get(extension)
     if not language:
-        print(f'problem with extension {extension}')
         return []
     splitter = RecursiveCharacterTextSplitter.from_language(
         language=Language(language), chunk_size=chunk_size, chunk_overlap=0
diff --git a/src/tools/rag/retrieval.py b/src/tools/rag/retrieval.py
index 603ca265..0c96d3ba 100644
--- a/src/tools/rag/retrieval.py
+++ b/src/tools/rag/retrieval.py
@@ -28,8 +28,16 @@ def vdb_available():
     return True if get_collection() else False
 
 
-def retrieve(question):
-    # collection should be initialized once, in the class init
+def retrieve(question: str) -> str:
+    """
+    Retrieve files descriptions by semantic query.
+
+    Parameters:
+    question (str): The query to retrieve information for.
+
+    Returns:
+    str: A formatted response with file descriptions of found files.
+    """
     collection = get_collection()
     retrieval = collection.query(query_texts=[question], n_results=8)
     reranked_docs = cohere_client.rerank(
diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index 0f4a36d8..2ddac8b4 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -118,6 +118,9 @@ def write_file_chunks_descriptions(subfolders_with_files=['/']):
         # get file extenstion
         extension = file_path.suffix.lstrip('.')
         file_chunks = split_code(file_content, extension)
+        # do not describe chunk of 1-chunk files
+        if len(file_chunks) <= 1:
+            continue
         descriptions = chain.batch([{'coderrules': coderrules, 'file_code': file_content, 'chunk_code': chunk} for chunk in file_chunks])
         print(descriptions)
 
diff --git a/src/utilities/llms.py b/src/utilities/llms.py
index 3885dcc9..81fbf2ac 100644
--- a/src/utilities/llms.py
+++ b/src/utilities/llms.py
@@ -31,7 +31,7 @@ def llm_open_local_hosted(model):
     timeout=90,
 )
 
-def init_llms(tools=None, run_name="Clean Coder", temp=0):
+def init_llms_medium_intelligence(tools=None, run_name="Clean Coder", temp=0):
     llms = []
     if getenv("ANTHROPIC_API_KEY"):
         llms.append(ChatAnthropic(model='claude-3-5-sonnet-20241022', temperature=temp, timeout=60, max_tokens=2048))
diff --git a/src/utilities/manager_utils.py b/src/utilities/manager_utils.py
index a2b2e560..1a2d05cd 100644
--- a/src/utilities/manager_utils.py
+++ b/src/utilities/manager_utils.py
@@ -6,7 +6,7 @@
 from langchain_community.chat_models import ChatOllama
 from langchain_anthropic import ChatAnthropic
 from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
-from src.utilities.llms import init_llms
+from src.utilities.llms import init_llms_medium_intelligence
 from src.utilities.util_functions import join_paths, read_coderrules, list_directory_tree
 from src.utilities.start_project_functions import create_project_plan_file
 from langchain_core.output_parsers import StrOutputParser
@@ -52,7 +52,7 @@
 with open(f"{parent_dir}/prompts/manager_progress.prompt", "r") as f:
     tasks_progress_template = f.read()
 
-llms = init_llms(run_name="Progress description")
+llms = init_llms_medium_intelligence(run_name="Progress description")
 llm = llms[0].with_fallbacks(llms[1:])
 
 

From 28ff399ead8796ac32ad4a4a775e5ede5b4b1b39 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Tue, 4 Mar 2025 10:45:47 +0100
Subject: [PATCH 21/32] updates

---
 src/tools/rag/retrieval.py          | 9 ++++++++-
 src/tools/rag/write_descriptions.py | 4 +++-
 src/utilities/llms.py               | 7 ++++---
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/tools/rag/retrieval.py b/src/tools/rag/retrieval.py
index 0c96d3ba..9d7b7bee 100644
--- a/src/tools/rag/retrieval.py
+++ b/src/tools/rag/retrieval.py
@@ -40,6 +40,13 @@ def retrieve(question: str) -> str:
     """
     collection = get_collection()
     retrieval = collection.query(query_texts=[question], n_results=8)
+    response = ""
+    for i, description in enumerate(retrieval["documents"]):
+        filename = retrieval["ids"][0][i]
+        response += f"{filename}:\n\n{description}\n\n"
+    response += "\n\nRemember to see files before adding to final response!"
+    return response
+
     reranked_docs = cohere_client.rerank(
         query=question,
         documents=retrieval["documents"][0],
@@ -48,7 +55,7 @@ def retrieve(question: str) -> str:
         #return_documents=True,
     )
     reranked_indexes = [result.index for result in reranked_docs.results]
-    response = ""
+
     for index in reranked_indexes:
         filename = retrieval["ids"][0][index]
         description = retrieval["documents"][0][index]
diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/write_descriptions.py
index 2ddac8b4..b8b23c33 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/write_descriptions.py
@@ -10,6 +10,7 @@
 from src.utilities.start_work_functions import CoderIgnore, file_folder_ignored
 from src.utilities.llms import init_llms_mini
 from src.tools.rag.code_splitter import split_code
+from src.utilities.print_formatters import print_formatted
 
 
 load_dotenv(find_dotenv())
@@ -74,7 +75,7 @@ def write_file_descriptions(subfolders_with_files=['/']):
 """
     )
     llms = init_llms_mini(tools=[], run_name='File Describer')
-    llm = llms[0]
+    llm = llms[0].with_fallbacks(llms[1:])
     chain = prompt | llm | StrOutputParser()
 
     description_folder = join_paths(work_dir, '.clean_coder/files_and_folders_descriptions')
@@ -134,6 +135,7 @@ def write_file_chunks_descriptions(subfolders_with_files=['/']):
 
 def upload_descriptions_to_vdb():
     """Uploads descriptions, created by write_file_chunks_descriptions, into vector database."""
+    print_formatted("Uploading file descriptions to vector storage...", color='magenta')
     chroma_client = chromadb.PersistentClient(path=join_paths(work_dir, '.clean_coder/chroma_base'))
     collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
 
diff --git a/src/utilities/llms.py b/src/utilities/llms.py
index 81fbf2ac..ed81e8e5 100644
--- a/src/utilities/llms.py
+++ b/src/utilities/llms.py
@@ -34,9 +34,9 @@ def llm_open_local_hosted(model):
 def init_llms_medium_intelligence(tools=None, run_name="Clean Coder", temp=0):
     llms = []
     if getenv("ANTHROPIC_API_KEY"):
-        llms.append(ChatAnthropic(model='claude-3-5-sonnet-20241022', temperature=temp, timeout=60, max_tokens=2048))
+        llms.append(ChatAnthropic(model='claude-3-7-sonnet-latest', temperature=temp, timeout=60, max_tokens=2048))
     if getenv("OPENROUTER_API_KEY"):
-        llms.append(llm_open_router("anthropic/claude-3.5-sonnet"))
+        llms.append(llm_open_router("anthropic/claude-3.7-sonnet"))
     if getenv("OPENAI_API_KEY"):
         llms.append(ChatOpenAI(model="gpt-4o", temperature=temp, timeout=60))
     # if os.getenv("GOOGLE_API_KEY"):
@@ -79,12 +79,13 @@ def init_llms_high_intelligence(tools=None, run_name="Clean Coder", temp=0.2):
         llms.append(ChatOpenAI(model="o3-mini", temperature=1, timeout=60, reasoning_effort="high"))
     if os.getenv("OPENAI_API_KEY"):
         llms.append(ChatOpenAI(model="o1", temperature=1, timeout=60))
+
     if os.getenv("OPENROUTER_API_KEY"):
         llms.append(llm_open_router("openai/gpt-4o"))
     if os.getenv("OPENAI_API_KEY"):
         llms.append(ChatOpenAI(model="gpt-4o", temperature=temp, timeout=60))
     if os.getenv("ANTHROPIC_API_KEY"):
-        llms.append(ChatAnthropic(model='claude-3-5-sonnet-20241022', temperature=temp, timeout=60, max_tokens=2048))
+        llms.append(ChatAnthropic(model='claude-3-7-sonnet-latest', temperature=temp, timeout=60, max_tokens=2048))
     # if os.getenv("GOOGLE_API_KEY"):
     #     llms.append(ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", temperature=temp, timeout=60))
     if os.getenv("OLLAMA_MODEL"):

From 7ea944e30b4554f414be105631bf50b9512eaf24 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Tue, 4 Mar 2025 11:01:53 +0100
Subject: [PATCH 22/32] updating semantic retrieval

---
 src/tools/rag/retrieval.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tools/rag/retrieval.py b/src/tools/rag/retrieval.py
index 9d7b7bee..1e5d24a6 100644
--- a/src/tools/rag/retrieval.py
+++ b/src/tools/rag/retrieval.py
@@ -41,9 +41,9 @@ def retrieve(question: str) -> str:
     collection = get_collection()
     retrieval = collection.query(query_texts=[question], n_results=8)
     response = ""
-    for i, description in enumerate(retrieval["documents"]):
+    for i, description in enumerate(retrieval["documents"][0]):
         filename = retrieval["ids"][0][i]
-        response += f"{filename}:\n\n{description}\n\n"
+        response += f"{filename}:\n\n{description}\n\n###\n\n"
     response += "\n\nRemember to see files before adding to final response!"
     return response
 
@@ -59,7 +59,7 @@ def retrieve(question: str) -> str:
     for index in reranked_indexes:
         filename = retrieval["ids"][0][index]
         description = retrieval["documents"][0][index]
-        response += f"{filename}:\n\n{description}\n\n"
+        response += f"{filename}:\n{description}\n\n###"
     response += "\n\nRemember to see files before adding to final response!"
 
     return response

From c4dca440e711b4ee148c8c4c2071c2083204be85 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Tue, 4 Mar 2025 12:02:01 +0100
Subject: [PATCH 23/32] working on binary ranker

---
 src/tools/rag/retrieval.py                |  52 ++++++
 src/utilities/syntax_checker_functions.py | 195 +++++++++++++---------
 2 files changed, 172 insertions(+), 75 deletions(-)

diff --git a/src/tools/rag/retrieval.py b/src/tools/rag/retrieval.py
index 1e5d24a6..0c2ced23 100644
--- a/src/tools/rag/retrieval.py
+++ b/src/tools/rag/retrieval.py
@@ -3,6 +3,9 @@
 import chromadb
 from pathlib import Path
 from dotenv import load_dotenv, find_dotenv
+from src.utilities.llms import init_llms_mini
+from langchain.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
 
 
 load_dotenv(find_dotenv())
@@ -63,10 +66,59 @@ def retrieve(question: str) -> str:
     response += "\n\nRemember to see files before adding to final response!"
 
     return response
+# New class added for binary ranking with lazy loading.
+class BinaryRanker:
+    def __init__(self):
+        # Lazy-loaded chain; not initialized until rank() is called.
+        self.chain = None
+
+    def initialize_chain(self):
+        if self.chain is None:
+            # Define prompt template for binary ranking.
+            template = (
+                "You are a binary ranker. Evaluate the relevance of a document to a given question.\n"
+                "Question: {question}\n"
+                "Document: {document}\n\n"
+                "If the document is relevant to the question, output only '1'. "
+                "If it may be useful for programmer as contains similar code, but no relevant directly, also output '1'. "
+                "If it is not relevant at all, output only '0'."
+            )
+            prompt = ChatPromptTemplate.from_template(template)
+            # Initialize LLMs with minimal intelligence and set run name to 'BinaryRanker'
+            llms = init_llms_mini(tools=[], run_name='BinaryRanker')
+            llm = llms[0].with_fallbacks(llms[1:])
+            # Build the chain by combining the prompt template, the LLM instance, and StrOutputParser.
+            self.chain = prompt | llm | StrOutputParser()
+
+    def rank(self, question: str, retrieval: dict) -> list:
+        # Ensure the chain is initialized (lazy loading)
+        self.initialize_chain()
+        # Extract list of documents and their ids from the retrieval result.
+        documents_list = retrieval["documents"][0]
+        id_list = retrieval["ids"][0]
+        # Build input for batch processing: list of dicts containing question and document.
+        batch_inputs = []
+        for doc in documents_list:
+            batch_inputs.append({"question": question, "document": doc})
+        # Use the chain batch function to get binary outputs.
+        results = self.chain.batch(batch_inputs)
+        # Pair each document id with its binary ranking result.
+        ranking = []
+        for idx, result in enumerate(results):
+            ranking.append((id_list[idx], result.strip()))
+        return ranking
 
 
 if __name__ == "__main__":
+    # Example usage of BinaryRanker for testing.
     question = "Common styles, used in the main page"
+    collection = get_collection()
+    retrieval = collection.query(query_texts=[question], n_results=8)
+    binary_ranker = BinaryRanker()
+    ranking = binary_ranker.rank(question, retrieval)
+    print("Binary Ranking Results:", ranking)
+    
+    # Test the retrieve function
     results = retrieve(question)
     print("\n\n")
     print("results: ", results)
diff --git a/src/utilities/syntax_checker_functions.py b/src/utilities/syntax_checker_functions.py
index 8ca50316..bc6122a4 100644
--- a/src/utilities/syntax_checker_functions.py
+++ b/src/utilities/syntax_checker_functions.py
@@ -216,89 +216,134 @@ def parse_yaml(yaml_string):
 
 if __name__ == "__main__":
     code = """
-<template>
-  <div class="form-container">
-    <Notification v-show="notificationMessage" :message="notificationMessage" :type="notificationType" />
-    <h1>Change Password</h1>
-    <form @submit.prevent="handleSubmit">
-      <div>
-        <label for="current-password">Current Password:</label>
-        <input type="password" v-model="currentPassword" required />
+'use client';
+import React, { useEffect, useState } from 'react';
+import { useRouter } from 'next/navigation';
+
+// Scale indicator component showing agreement levels from 1-5
+const ScaleIndicator = () => (
+  <div className="flex flex-col items-center mb-12">
+    <div className="w-full max-w-2xl mx-auto">
+      <div className="flex justify-between text-sm text-gray-600 mb-1">
+        <span>Highly disagree</span>
+        <span>Highly agree</span>
       </div>
-      <div>
-        <label for="new-password">New Password:</label>
-        <input type="password" v-model="newPassword" required />
+      <div className="relative w-full h-[2px] bg-gray-200 mb-8">
+        {Array.from({ length: 5 }, (_, i) => i + 1).map((num) => (
+          <div
+            key={num}
+            className="absolute -translate-x-1/2"
+            style={{ left: `${((num - 1) * 100) / 4}%` }}
+          >
+            <div className="absolute -top-3 w-[2px] h-[6px] bg-gray-300" />
+            <div className="absolute top-4 text-sm text-gray-600">
+              {num}
+            </div>
+          </div>
+        ))}
       </div>
-      <div>
-        <label for="confirm-new-password">Confirm New Password:</label>
-        <input type="password" v-model="confirmNewPassword" required />
-      </div>
-      <button type="submit">Change Password</button>
-    </form>
+    </div>
   </div>
-</template>
-
-<script>
-import { useAuthStore } from '@/stores/auth';
-import Notification from '@/components/Notification.vue';
-
-export default {
-  components: {
-    Notification,
-  },
-  data() {
-    return {
-      currentPassword: '',
-      newPassword: '',
-      confirmNewPassword: '',
-      apiUrl: import.meta.env.VITE_API_URL,
-      notificationMessage: '',
-      notificationType: 'positive',
-    };
-  },
-  methods: {
-    async handleSubmit() {
-      if (this.newPassword !== this.confirmNewPassword) {
-        this.notificationMessage = 'New passwords do not match';
-        this.notificationType = 'negative';
-        return;
-      }
+);
+
+function NavHeader() {
+  const router = useRouter();
+  return (
+    <div className="flex flex-col items-center">
+      <div className="flex items-center justify-start w-full mb-2">
+        <button
+          className="text-gray-700 hover:text-gray-900 mr-4"
+          onClick={() => router.back()}
+          aria-label="Go back"
+        >
+          <svg
+            xmlns="http://www.w3.org/2000/svg"
+            fill="none"
+            viewBox="0 0 24 24"
+            strokeWidth={2}
+            stroke="currentColor"
+            className="w-5 h-5"
+          >
+            <path strokeLinecap="round" strokeLinejoin="round" d="M15.75 19.5L8.25 12l7.5-7.5" />
+          </svg>
+        </button>
+        <h1 className="flex-grow text-center text-xl font-bold">Survey Results</h1>
+      </div>
+    </div>
+  );
+}
 
-      const formData = new FormData();
-      formData.append('current_password', this.currentPassword);
-      formData.append('new_password', this.newPassword);
+export default function Page({ params }: { params: Promise<{ uuid: string }> }) {
+  const { uuid } = React.use(params);
+  const [profile, setProfile] = useState<any>(null);
+  const [error, setError] = useState<string>('');
 
+  useEffect(() => {
+    const fetchProfile = async () => {
       try {
-        const response = await fetch(this.apiUrl + '/change-password', {
-          method: 'POST',
-          headers: {
-            'Authorization': `Bearer ${localStorage.getItem('token')}`,
-          },
-          body: formData,
-        });
+        const response = await fetch(
+          `${process.env.NEXT_PUBLIC_API_URL}/profile/${uuid}`
+        );
         if (!response.ok) {
-          throw new Error('Password change failed');
+          throw new Error('Failed to fetch profile data');
         }
-        this.notificationMessage = 'Password changed successfully';
-        this.notificationType = 'positive';
-        setTimeout(() => {
-          this.notificationMessage = '';
-          this.$router.push('/');
-        }, 2000);
-      } catch (error) {
-        console.error('Error:', error);
-        this.notificationMessage = error.message;
-        this.notificationType = 'negative';
-      } finally {
-        this.currentPassword = '';
-        this.newPassword = '';
-        this.confirmNewPassword = '';
+        const data = await response.json();
+        setProfile(data);
+      } catch (err: any) {
+        console.error('Error details:', err);
+        setError(err.message || 'An error occurred');
       }
-    },
-  },
-};
-</script>
+    };
+
+    fetchProfile();
+  }, [uuid]);
+
+  if (error) {
+    return (
+      <div className="p-4 text-red-500">
+        {error}
+      </div>
+    );
+  }
+
+  if (!profile) {
+    return (
+      <div className="p-4">
+        Loading profile data...
+      </div>
+    );
+  }
+
+  if (!profile.survey_data) {
+    return <div className="p-4 text-gray-700">No survey data available.</div>;
+  }
+
+  return (
+    <div className="px-4 py-2 text-gray-900 max-w-4xl mx-auto">
+      <div className="mb-8">
+        <NavHeader />
+        <ScaleIndicator />
+      </div>
 
-<style scoped src="@/assets/styles/forms.css"></style>
+      {profile.survey_data.map((category) => (
+        <div key={category.name} className="mb-10">
+          <h2 className="text-base font-semibold text-gray-800 mb-4">
+            {category.name}
+          </h2>
+          {category.statements.map((statement: any) => (
+            <div key={statement.id} className="flex items-start py-4 border-b border-gray-200 last:border-b-0">
+              <span className="text-2xl font-semibold text-gray-900 w-8 text-center">
+                {statement.value}
+              </span>
+              <p className="text-base text-gray-700 leading-relaxed flex-1 ml-6">
+                {statement.text}
+              </p>
+            </div>
+          ))}
+        </div>
+      ))}
+    </div>
+  );
+}
 """
-    print(parse_vue_basic(code))
\ No newline at end of file
+    print(parse_tsx(code))
\ No newline at end of file

From de89d423037b25f6115aa7c18618e7986586c8c9 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Tue, 4 Mar 2025 12:54:23 +0100
Subject: [PATCH 24/32] binary ranker for semantic retrieval done

---
 src/prompts/binary_ranker.prompt |   8 +++
 src/tools/rag/retrieval.py       | 102 +++++++++++++++++++------------
 2 files changed, 71 insertions(+), 39 deletions(-)
 create mode 100644 src/prompts/binary_ranker.prompt

diff --git a/src/prompts/binary_ranker.prompt b/src/prompts/binary_ranker.prompt
new file mode 100644
index 00000000..b5110a6b
--- /dev/null
+++ b/src/prompts/binary_ranker.prompt
@@ -0,0 +1,8 @@
+You are a binary ranker. Evaluate if document can contain answer for a given question.
+Question: """{question}"""
+Filename: """{filename}"""
+Document: """{document}"""
+
+If the document is relevant to the question, output only '1'. 
+If it may be useful for programmer as contains similar code, but no relevant directly, also output only '1'. 
+If it is not relevant at all, output only '0'.
diff --git a/src/tools/rag/retrieval.py b/src/tools/rag/retrieval.py
index 0c2ced23..3498acdb 100644
--- a/src/tools/rag/retrieval.py
+++ b/src/tools/rag/retrieval.py
@@ -43,47 +43,61 @@ def retrieve(question: str) -> str:
     """
     collection = get_collection()
     retrieval = collection.query(query_texts=[question], n_results=8)
+    
+    # Use BinaryRanker to filter relevant documents
+    binary_ranker = BinaryRanker()
+    ranking_results = binary_ranker.rank(question, retrieval)
+
+    # Filter documents that are marked as relevant (score = '1')
     response = ""
-    for i, description in enumerate(retrieval["documents"][0]):
-        filename = retrieval["ids"][0][i]
-        response += f"{filename}:\n\n{description}\n\n###\n\n"
+    for filename, score in ranking_results:
+        if score == '1':
+            # Find the corresponding document in the retrieval results
+            idx = retrieval["ids"][0].index(filename)
+            description = retrieval["documents"][0][idx]
+            response += f"{filename}:\n\n{description}\n\n###\n\n"
+
+    # If no relevant documents found, return a message
+    if not response:
+        return "No relevant documents found for your query."
+
     response += "\n\nRemember to see files before adding to final response!"
     return response
 
-    reranked_docs = cohere_client.rerank(
-        query=question,
-        documents=retrieval["documents"][0],
-        top_n=4,
-        model="rerank-english-v3.0",
-        #return_documents=True,
-    )
-    reranked_indexes = [result.index for result in reranked_docs.results]
-
-    for index in reranked_indexes:
-        filename = retrieval["ids"][0][index]
-        description = retrieval["documents"][0][index]
-        response += f"{filename}:\n{description}\n\n###"
-    response += "\n\nRemember to see files before adding to final response!"
 
-    return response
 # New class added for binary ranking with lazy loading.
 class BinaryRanker:
+    """
+    A binary document ranker that uses LLM to determine document relevance.
+    
+    This class implements lazy loading of the LLM chain, meaning the chain
+    is only initialized when the rank method is called. It evaluates whether
+    each document is relevant to a given question, returning a binary score
+    (0 or 1) for each document.
+    """
     def __init__(self):
+        """
+        Initialize the BinaryRanker with lazy loading.
+        
+        The LLM chain is not created until the rank method is called.
+        """
         # Lazy-loaded chain; not initialized until rank() is called.
         self.chain = None
 
     def initialize_chain(self):
+        """
+        Initialize the LLM chain if it hasn't been initialized yet.
+        
+        This method loads the prompt template from an external file, initializes the LLM,
+        and builds the chain used for binary document ranking.
+        """
         if self.chain is None:
-            # Define prompt template for binary ranking.
-            template = (
-                "You are a binary ranker. Evaluate the relevance of a document to a given question.\n"
-                "Question: {question}\n"
-                "Document: {document}\n\n"
-                "If the document is relevant to the question, output only '1'. "
-                "If it may be useful for programmer as contains similar code, but no relevant directly, also output '1'. "
-                "If it is not relevant at all, output only '0'."
-            )
-            prompt = ChatPromptTemplate.from_template(template)
+            # Load the binary ranker prompt from an external file.
+            grandparent_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+            file_path = f"{grandparent_dir}/prompts/binary_ranker.prompt"
+            with open(file_path, 'r') as file_handle:
+                template_text = file_handle.read()
+            prompt = ChatPromptTemplate.from_template(template_text)
             # Initialize LLMs with minimal intelligence and set run name to 'BinaryRanker'
             llms = init_llms_mini(tools=[], run_name='BinaryRanker')
             llm = llms[0].with_fallbacks(llms[1:])
@@ -91,32 +105,42 @@ def initialize_chain(self):
             self.chain = prompt | llm | StrOutputParser()
 
     def rank(self, question: str, retrieval: dict) -> list:
+        """
+        Rank documents based on their relevance to the question.
+
+        Parameters:
+        question (str): The query to evaluate document relevance against.
+        retrieval (dict): The retrieval results from a vector database query.
+
+        Returns:
+        list: A list of tuples containing document IDs and their binary relevance scores ('0' or '1').
+        """
         # Ensure the chain is initialized (lazy loading)
         self.initialize_chain()
         # Extract list of documents and their ids from the retrieval result.
         documents_list = retrieval["documents"][0]
-        id_list = retrieval["ids"][0]
-        # Build input for batch processing: list of dicts containing question and document.
+        filenames_list = retrieval["ids"][0]
+        # Build input for batch processing: list of dicts containing question, filename, and document.
         batch_inputs = []
-        for doc in documents_list:
-            batch_inputs.append({"question": question, "document": doc})
+        for idx, doc in enumerate(documents_list):
+            batch_inputs.append({
+                "question": question,
+                "filename": filenames_list[idx],
+                "document": doc
+            })
         # Use the chain batch function to get binary outputs.
         results = self.chain.batch(batch_inputs)
         # Pair each document id with its binary ranking result.
         ranking = []
         for idx, result in enumerate(results):
-            ranking.append((id_list[idx], result.strip()))
+            ranking.append((filenames_list[idx], result.strip()))
         return ranking
 
 
 if __name__ == "__main__":
     # Example usage of BinaryRanker for testing.
-    question = "Common styles, used in the main page"
-    collection = get_collection()
-    retrieval = collection.query(query_texts=[question], n_results=8)
-    binary_ranker = BinaryRanker()
-    ranking = binary_ranker.rank(question, retrieval)
-    print("Binary Ranking Results:", ranking)
+    question = "Some tool that can change files"
+
     
     # Test the retrieve function
     results = retrieve(question)

From 67ec89045a3a9228ea5759372bb6d7682fd1835a Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Wed, 5 Mar 2025 10:42:02 +0100
Subject: [PATCH 25/32] questionaru to index added

---
 manager.py                                    |  5 ++-
 single_task_coder.py                          |  8 +++--
 ...riptions.py => index_file_descriptions.py} | 31 ++++++++++++++++---
 src/tools/rag/retrieval.py                    | 22 ++++++-------
 src/utilities/manager_utils.py                |  2 +-
 5 files changed, 47 insertions(+), 21 deletions(-)
 rename src/tools/rag/{write_descriptions.py => index_file_descriptions.py} (86%)

diff --git a/manager.py b/manager.py
index 9c60d830..279d9835 100644
--- a/manager.py
+++ b/manager.py
@@ -34,7 +34,11 @@ class Manager:
     def __init__(self):
         load_dotenv(find_dotenv())
         self.work_dir = os.getenv("WORK_DIR")
+        # initial project setup
         set_up_dot_clean_coder_dir(self.work_dir)
+        setup_todoist_project_if_needed()
+
+
         self.tools = self.prepare_tools()
         self.llms = init_llms_medium_intelligence(tools=self.tools, run_name="Manager")
         self.manager = self.setup_workflow()
@@ -113,7 +117,6 @@ def setup_workflow(self):
 
     def run(self):
         print_formatted("😀 Hello! I'm Manager agent. Let's plan your project together!", color="green")
-        setup_todoist_project_if_needed()
 
         messages = get_manager_messages(self.saved_messages_path)
         inputs = {"messages": messages}
diff --git a/single_task_coder.py b/single_task_coder.py
index 6cfc59bd..1bdd57b7 100644
--- a/single_task_coder.py
+++ b/single_task_coder.py
@@ -17,6 +17,7 @@
 from src.utilities.start_project_functions import set_up_dot_clean_coder_dir
 from src.utilities.util_functions import create_frontend_feedback_story
 from concurrent.futures import ThreadPoolExecutor
+from src.tools.rag.index_file_descriptions import prompt_index_project_files
 
 
 use_frontend_feedback = bool(os.getenv("FRONTEND_URL"))
@@ -54,8 +55,9 @@ def run_clean_coder_pipeline(task: str, work_dir: str, doc_harvest: bool = False
 
 if __name__ == "__main__":
     work_dir = os.getenv("WORK_DIR")
-    set_up_dot_clean_coder_dir(work_dir)
-    task = user_input("Provide task to be executed. ")
     if not work_dir:
         raise Exception("WORK_DIR variable not provided. Please add WORK_DIR to .env file")
-    run_clean_coder_pipeline(task, work_dir)
\ No newline at end of file
+    set_up_dot_clean_coder_dir(work_dir)
+    prompt_index_project_files()
+    task = user_input("Provide task to be executed. ")
+    run_clean_coder_pipeline(task, work_dir)
diff --git a/src/tools/rag/write_descriptions.py b/src/tools/rag/index_file_descriptions.py
similarity index 86%
rename from src/tools/rag/write_descriptions.py
rename to src/tools/rag/index_file_descriptions.py
index b8b23c33..3d31611f 100644
--- a/src/tools/rag/write_descriptions.py
+++ b/src/tools/rag/index_file_descriptions.py
@@ -5,13 +5,15 @@
 from dotenv import load_dotenv, find_dotenv
 import chromadb
 import sys
+import questionary
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')))
 from src.utilities.util_functions import join_paths, read_coderrules
-from src.utilities.start_work_functions import CoderIgnore, file_folder_ignored
+from src.utilities.start_work_functions import file_folder_ignored
 from src.utilities.llms import init_llms_mini
 from src.tools.rag.code_splitter import split_code
 from src.utilities.print_formatters import print_formatted
-
+from src.tools.rag.retrieval import vdb_available
+from src.utilities.manager_utils import QUESTIONARY_STYLE
 
 load_dotenv(find_dotenv())
 work_dir = os.getenv("WORK_DIR")
@@ -158,9 +160,30 @@ def upload_descriptions_to_vdb():
             )
 
 
-if __name__ == '__main__':
+def prompt_index_project_files():
+    """
+    Checks if the vector database (VDB) is available.
+    If not, prompts the user via questionary to index project files for better search.
+    On a "Yes" answer, triggers write_and_index_descriptions().
+    """
+    if not vdb_available():
+        answer = questionary.select(
+            "Do you want to index your project files for better search?",
+            choices=["Index", "Skip"],
+            style=QUESTIONARY_STYLE,
+            instruction="\nHint: Skip if you're running Clean Coder for the first time and testing, index if you're working on a real project"
+        ).ask()
+        if answer == "Index":
+            write_and_index_descriptions()
+
+
+def write_and_index_descriptions():
     #provide optionally which subfolders needs to be checked, if you don't want to describe all project folder
     write_file_descriptions(subfolders_with_files=['/'])
     write_file_chunks_descriptions()
 
-    upload_descriptions_to_vdb()
\ No newline at end of file
+    upload_descriptions_to_vdb()
+
+
+if __name__ == "__main__":
+    write_and_index_descriptions()
\ No newline at end of file
diff --git a/src/tools/rag/retrieval.py b/src/tools/rag/retrieval.py
index 3498acdb..056fb8ae 100644
--- a/src/tools/rag/retrieval.py
+++ b/src/tools/rag/retrieval.py
@@ -10,21 +10,19 @@
 
 load_dotenv(find_dotenv())
 work_dir = os.getenv("WORK_DIR")
-cohere_key = os.getenv("COHERE_API_KEY")
-if cohere_key:
-    cohere_client = cohere.Client(cohere_key)
+# cohere_key = os.getenv("COHERE_API_KEY")
+# if cohere_key:
+#     cohere_client = cohere.Client(cohere_key)
 collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
 
 
 def get_collection():
-    if cohere_key:
-        chroma_client = chromadb.PersistentClient(path=os.getenv('WORK_DIR') + '/.clean_coder/chroma_base')
-        try:
-            return chroma_client.get_collection(name=collection_name)
-        except:
-            # print("Vector database does not exist. (Optional) create it by running src/tools/rag/write_descriptions.py to improve file research capabilities")
-            return False
-    return False
+    chroma_client = chromadb.PersistentClient(path=os.getenv('WORK_DIR') + '/.clean_coder/chroma_base')
+    try:
+        return chroma_client.get_collection(name=collection_name)
+    except:
+        # print("Vector database does not exist. (Optional) create it by running src/tools/rag/write_descriptions.py to improve file research capabilities")
+        return False
 
 
 def vdb_available():
@@ -68,7 +66,7 @@ def retrieve(question: str) -> str:
 # New class added for binary ranking with lazy loading.
 class BinaryRanker:
     """
-    A binary document ranker that uses LLM to determine document relevance.
+    A binary document ranker that uses LLM to determine if a document is relevant.
     
     This class implements lazy loading of the LLM chain, meaning the chain
     is only initialized when the rank method is called. It evaluates whether
diff --git a/src/utilities/manager_utils.py b/src/utilities/manager_utils.py
index 1a2d05cd..364d6c4b 100644
--- a/src/utilities/manager_utils.py
+++ b/src/utilities/manager_utils.py
@@ -41,7 +41,7 @@
     ('highlighted', 'fg:green bold'),   # Highlighted choice
     ('selected', 'fg:green bold'),      # Selected choice
     ('separator', 'fg:magenta'),        # Separator between choices
-    ('instruction', 'fg:white'),        # Additional instructions
+    ('instruction', 'fg:#FFD700'),      # Additional instructions now in golden yellow (hex color)
 ])
 
 

From 0d5190ce735d8627abe47fef9f46343773e41485 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Wed, 5 Mar 2025 10:44:58 +0100
Subject: [PATCH 26/32] questionaru to index added

---
 src/tools/rag/index_file_descriptions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/rag/index_file_descriptions.py b/src/tools/rag/index_file_descriptions.py
index 3d31611f..14e3b81f 100644
--- a/src/tools/rag/index_file_descriptions.py
+++ b/src/tools/rag/index_file_descriptions.py
@@ -171,7 +171,7 @@ def prompt_index_project_files():
             "Do you want to index your project files for better search?",
             choices=["Index", "Skip"],
             style=QUESTIONARY_STYLE,
-            instruction="\nHint: Skip if you're running Clean Coder for the first time and testing, index if you're working on a real project"
+            instruction="\nHint: Skip for testing Clean Coder; index for real projects."
         ).ask()
         if answer == "Index":
             write_and_index_descriptions()

From 58c267e25c18a34da2cfce735b7cc910fab238e8 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Thu, 6 Mar 2025 01:17:13 +0100
Subject: [PATCH 27/32] p bar

---
 src/tools/rag/index_file_descriptions.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/tools/rag/index_file_descriptions.py b/src/tools/rag/index_file_descriptions.py
index 14e3b81f..339d3f28 100644
--- a/src/tools/rag/index_file_descriptions.py
+++ b/src/tools/rag/index_file_descriptions.py
@@ -6,6 +6,7 @@
 import chromadb
 import sys
 import questionary
+from rich.progress import Progress
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')))
 from src.utilities.util_functions import join_paths, read_coderrules
 from src.utilities.start_work_functions import file_folder_ignored
@@ -54,9 +55,9 @@ def collect_file_pathes(subfolders, work_dir):
     return allowed_files
 
 
-def write_file_descriptions(subfolders_with_files=['/']):
+def write_file_descriptions(subfolders_with_files=['/']): 
     all_files = collect_file_pathes(subfolders_with_files, work_dir)
-
+    progress = Progress()
     coderrules = read_coderrules()
 
     prompt = ChatPromptTemplate.from_template(
@@ -82,12 +83,13 @@ def write_file_descriptions(subfolders_with_files=['/']):
 
     description_folder = join_paths(work_dir, '.clean_coder/files_and_folders_descriptions')
     Path(description_folder).mkdir(parents=True, exist_ok=True)
-    # iterate over all files, take 8 files at once and descrive files in batch
     batch_size = 8
+    task_progress = progress.add_task("[gold1]Describing files (0/{})".format(len(all_files)), total=len(all_files))
+    progress.start()
+
     for i in range(0, len(all_files), batch_size):
         files_iteration = all_files[i:i + batch_size]
         descriptions = chain.batch([get_content(file_path) for file_path in files_iteration])
-        print(descriptions)
 
         for file_path, description in zip(files_iteration, descriptions):
             file_name = file_path.relative_to(work_dir).as_posix().replace('/', '=')
@@ -95,6 +97,9 @@ def write_file_descriptions(subfolders_with_files=['/']):
 
             with open(output_path, 'w', encoding='utf-8') as out_file:
                 out_file.write(description)
+            files_processed = progress.tasks[task_progress].completed + 1
+            progress.update(task_progress, advance=1, description=f"[gold1]Describing files ({files_processed}/{len(all_files)})")
+    progress.stop()
 
 
 

From fd699831aeddb1b91d41e43a55e1d2797ea2a985 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Thu, 6 Mar 2025 09:02:20 +0100
Subject: [PATCH 28/32] progres bars for indexing done

---
 src/tools/rag/index_file_descriptions.py | 29 ++++++++++++++++--------
 src/utilities/llms.py                    | 19 ++++++----------
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/tools/rag/index_file_descriptions.py b/src/tools/rag/index_file_descriptions.py
index 339d3f28..65c70179 100644
--- a/src/tools/rag/index_file_descriptions.py
+++ b/src/tools/rag/index_file_descriptions.py
@@ -6,7 +6,6 @@
 import chromadb
 import sys
 import questionary
-from rich.progress import Progress
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')))
 from src.utilities.util_functions import join_paths, read_coderrules
 from src.utilities.start_work_functions import file_folder_ignored
@@ -15,10 +14,21 @@
 from src.utilities.print_formatters import print_formatted
 from src.tools.rag.retrieval import vdb_available
 from src.utilities.manager_utils import QUESTIONARY_STYLE
+from tqdm import tqdm
 
 load_dotenv(find_dotenv())
 work_dir = os.getenv("WORK_DIR")
 
+GOLDEN = "\033[38;5;220m"
+MAGENTA = "\033[95m"
+RESET = "\033[0m"
+
+# Customize tqdm's bar format with golden and magenta colors
+bar_format = (
+    f"{GOLDEN}{{desc}}: {MAGENTA}{{percentage:3.0f}}%{GOLDEN}|"
+    f"{{bar}}| {MAGENTA}{{n_fmt}}/{{total_fmt}} files "
+    f"{GOLDEN}[{{elapsed}}<{{remaining}}, {{rate_fmt}}{{postfix}}]{RESET}"
+)
 
 def is_code_file(file_path):
     # List of common code file extensions
@@ -57,7 +67,6 @@ def collect_file_pathes(subfolders, work_dir):
 
 def write_file_descriptions(subfolders_with_files=['/']): 
     all_files = collect_file_pathes(subfolders_with_files, work_dir)
-    progress = Progress()
     coderrules = read_coderrules()
 
     prompt = ChatPromptTemplate.from_template(
@@ -84,8 +93,7 @@ def write_file_descriptions(subfolders_with_files=['/']):
     description_folder = join_paths(work_dir, '.clean_coder/files_and_folders_descriptions')
     Path(description_folder).mkdir(parents=True, exist_ok=True)
     batch_size = 8
-    task_progress = progress.add_task("[gold1]Describing files (0/{})".format(len(all_files)), total=len(all_files))
-    progress.start()
+    pbar = tqdm(total=len(all_files), desc=f"[1/2]Describing files", bar_format=bar_format)
 
     for i in range(0, len(all_files), batch_size):
         files_iteration = all_files[i:i + batch_size]
@@ -97,9 +105,11 @@ def write_file_descriptions(subfolders_with_files=['/']):
 
             with open(output_path, 'w', encoding='utf-8') as out_file:
                 out_file.write(description)
-            files_processed = progress.tasks[task_progress].completed + 1
-            progress.update(task_progress, advance=1, description=f"[gold1]Describing files ({files_processed}/{len(all_files)})")
-    progress.stop()
+
+        # Update by actual number of files processed in this batch
+        pbar.update(len(files_iteration))
+
+    pbar.close()  # Don't forget to close the progress bar when done
 
 
 
@@ -120,8 +130,10 @@ def write_file_chunks_descriptions(subfolders_with_files=['/']):
 
     description_folder = join_paths(work_dir, '.clean_coder/files_and_folders_descriptions')
     Path(description_folder).mkdir(parents=True, exist_ok=True)
+
     # iterate chunks inside of the file
-    for file_path in all_files:
+    for file_path in tqdm(all_files, desc=f"[2/2]Describing file chunks",
+                 bar_format=bar_format):
         file_content = get_content(file_path)
         # get file extenstion
         extension = file_path.suffix.lstrip('.')
@@ -130,7 +142,6 @@ def write_file_chunks_descriptions(subfolders_with_files=['/']):
         if len(file_chunks) <= 1:
             continue
         descriptions = chain.batch([{'coderrules': coderrules, 'file_code': file_content, 'chunk_code': chunk} for chunk in file_chunks])
-        print(descriptions)
 
         for nr, description in enumerate(descriptions):
             file_name = f"{file_path.relative_to(work_dir).as_posix().replace('/', '=')}_chunk{nr}"
diff --git a/src/utilities/llms.py b/src/utilities/llms.py
index ed81e8e5..3c4e15e9 100644
--- a/src/utilities/llms.py
+++ b/src/utilities/llms.py
@@ -34,13 +34,12 @@ def llm_open_local_hosted(model):
 def init_llms_medium_intelligence(tools=None, run_name="Clean Coder", temp=0):
     llms = []
     if getenv("ANTHROPIC_API_KEY"):
-        llms.append(ChatAnthropic(model='claude-3-7-sonnet-latest', temperature=temp, timeout=60, max_tokens=2048))
+        llms.append(ChatAnthropic(model='claude-3-5-sonnet-latest', temperature=temp, timeout=60, max_tokens=2048))
     if getenv("OPENROUTER_API_KEY"):
-        llms.append(llm_open_router("anthropic/claude-3.7-sonnet"))
+        llms.append(llm_open_router("anthropic/claude-3.5-sonnet"))
     if getenv("OPENAI_API_KEY"):
         llms.append(ChatOpenAI(model="gpt-4o", temperature=temp, timeout=60))
-    # if os.getenv("GOOGLE_API_KEY"):
-    #     llms.append(ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", temperature=temp, timeout=60))
+
     if getenv("OLLAMA_MODEL"):
         llms.append(ChatOllama(model=os.getenv("OLLAMA_MODEL")))
     if getenv("LOCAL_MODEL_API_BASE"):
@@ -75,19 +74,15 @@ def init_llms_mini(tools=None, run_name="Clean Coder", temp=0):
 
 def init_llms_high_intelligence(tools=None, run_name="Clean Coder", temp=0.2):
     llms = []
+    if os.getenv("ANTHROPIC_API_KEY"):
+        llms.append(ChatAnthropic(model='claude-3-7-sonnet-latest', temperature=temp, timeout=60, max_tokens=2048))
+    if getenv("OPENROUTER_API_KEY"):
+        llms.append(llm_open_router("anthropic/claude-3.7-sonnet"))
     if os.getenv("OPENAI_API_KEY"):
         llms.append(ChatOpenAI(model="o3-mini", temperature=1, timeout=60, reasoning_effort="high"))
     if os.getenv("OPENAI_API_KEY"):
         llms.append(ChatOpenAI(model="o1", temperature=1, timeout=60))
 
-    if os.getenv("OPENROUTER_API_KEY"):
-        llms.append(llm_open_router("openai/gpt-4o"))
-    if os.getenv("OPENAI_API_KEY"):
-        llms.append(ChatOpenAI(model="gpt-4o", temperature=temp, timeout=60))
-    if os.getenv("ANTHROPIC_API_KEY"):
-        llms.append(ChatAnthropic(model='claude-3-7-sonnet-latest', temperature=temp, timeout=60, max_tokens=2048))
-    # if os.getenv("GOOGLE_API_KEY"):
-    #     llms.append(ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", temperature=temp, timeout=60))
     if os.getenv("OLLAMA_MODEL"):
         llms.append(ChatOllama(model=os.getenv("OLLAMA_MODEL")))
     if getenv("LOCAL_MODEL_API_BASE"):

From e912cdae9354f71c82e0ed56bdaf371a804882f1 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Fri, 7 Mar 2025 08:43:17 +0100
Subject: [PATCH 29/32] indexing added to manager, bug with cutting parenthesis
 solved

---
 .env.template                                   | 7 ++-----
 docker-compose.yml                              | 2 --
 manager.py                                      | 3 ++-
 non_src/tests/manual_tests/planer_scenario_1.py | 4 ++--
 requirements.txt                                | 2 --
 src/agents/planner_agent.py                     | 2 +-
 src/prompts/planner_system.prompt               | 8 +++-----
 src/tools/rag/retrieval.py                      | 6 ------
 src/utilities/util_functions.py                 | 4 ++--
 9 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/.env.template b/.env.template
index 4898bdf8..38fb491d 100644
--- a/.env.template
+++ b/.env.template
@@ -11,14 +11,11 @@ OLLAMA_MODEL=
 LOCAL_MODEL_API_BASE=
 LOCAL_MODEL_NAME=
 
-# Optional, but highly recommended
-## For RAG tool of Researcher
-COHERE_API_KEY=
-
-# Optional
 ## For Manager agent
 TODOIST_API_KEY=
 TODOIST_PROJECT_ID=
+
+# Optional
 ## For automatic error check
 LOG_FILE=
 ## Frontend Feedback
diff --git a/docker-compose.yml b/docker-compose.yml
index 9fb66bc0..50f50145 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,7 +10,6 @@ services:
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
       - TODOIST_API_KEY=${TODOIST_API_KEY}
       - TODOIST_PROJECT_ID=${TODOIST_PROJECT_ID}
-      - COHERE_API_KEY=${COHERE_API_KEY}
       - LOG_FILE=${LOG_FILE:-}
     volumes:
       - .:/Clean_Coder
@@ -29,7 +28,6 @@ services:
       - WORK_DIR=/work_dir
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
-      - COHERE_API_KEY=${COHERE_API_KEY}
       - LOG_FILE=${LOG_FILE:-}
     volumes:
       - .:/Clean_Coder
diff --git a/manager.py b/manager.py
index 279d9835..1cfea36c 100644
--- a/manager.py
+++ b/manager.py
@@ -16,6 +16,7 @@
 from langgraph.graph import StateGraph
 from src.tools.tools_project_manager import add_task, modify_task, finish_project_planning, reorder_tasks
 from src.tools.tools_coder_pipeline import prepare_list_dir_tool, prepare_see_file_tool, ask_human_tool
+from src.tools.rag.index_file_descriptions import prompt_index_project_files
 from src.utilities.manager_utils import actualize_tasks_list_and_progress_description, setup_todoist_project_if_needed, get_manager_messages
 from src.utilities.langgraph_common_functions import call_model, call_tool, multiple_tools_msg, no_tools_msg, empty_message_msg
 from src.utilities.start_project_functions import set_up_dot_clean_coder_dir
@@ -37,7 +38,7 @@ def __init__(self):
         # initial project setup
         set_up_dot_clean_coder_dir(self.work_dir)
         setup_todoist_project_if_needed()
-
+        prompt_index_project_files()
 
         self.tools = self.prepare_tools()
         self.llms = init_llms_medium_intelligence(tools=self.tools, run_name="Manager")
diff --git a/non_src/tests/manual_tests/planer_scenario_1.py b/non_src/tests/manual_tests/planer_scenario_1.py
index df9322f0..bfe7c81e 100644
--- a/non_src/tests/manual_tests/planer_scenario_1.py
+++ b/non_src/tests/manual_tests/planer_scenario_1.py
@@ -11,8 +11,8 @@
 
 load_dotenv(find_dotenv())
 
-folder_with_project_files = repo_directory.joinpath("non_src/tests/manual_tests/projects_files", "debugger_scenario_1_files")
-tmp_folder =  pathlib.Path(__file__).parent.resolve().joinpath("sandbox_work_dir")
+folder_with_project_files = repo_directory.joinpath("non_src/tests/manual_tests/projects_files", "planner_scenario_1_files")
+tmp_folder = pathlib.Path(__file__).parent.resolve().joinpath("sandbox_work_dir")
 setup_work_dir(manual_tests_folder=tmp_folder, test_files_dir=folder_with_project_files)
 
 task = "Make form wider, with green background. Improve styling."
diff --git a/requirements.txt b/requirements.txt
index 32cdcfe1..61f23600 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,8 +10,6 @@ langchain-ollama==0.2.0
 playwright==1.47.0
 libsass==0.23.0
 openai==1.61.1
-cohere==5.10.0
-langchain-cohere==0.3.0
 chromadb==0.4.21
 todoist-api-python==2.1.7
 termcolor==2.4.0
diff --git a/src/agents/planner_agent.py b/src/agents/planner_agent.py
index 007e23bb..408111ab 100644
--- a/src/agents/planner_agent.py
+++ b/src/agents/planner_agent.py
@@ -67,7 +67,7 @@ def call_advanced_planner(state):
     logic_pseudocode = llm_strong.invoke(logic_planner_messages)
     print_formatted("\nIntermediate planning done. Finalizing plan...", color="light_magenta")
     if os.getenv("SHOW_LOGIC_PLAN"):
-        print(logic_pseudocode.content)
+        print_formatted(logic_pseudocode.content, color="light_yellow")
 
     state["plan_finalizer_messages"].append(HumanMessage(content=f"Logic pseudocode plan to follow:\n\n{logic_pseudocode.content}"))
     plan_finalizer_messages = state["plan_finalizer_messages"]
diff --git a/src/prompts/planner_system.prompt b/src/prompts/planner_system.prompt
index 1d6d4165..33496241 100644
--- a/src/prompts/planner_system.prompt
+++ b/src/prompts/planner_system.prompt
@@ -20,16 +20,13 @@ For additional context, here's the directory tree:
 
 Instructions:
 
-1. Plan the logic:
-   Outline the logic algorithm before proposing code changes.
-
-2. Draft a detailed modification plan:
+1. Draft a detailed modification plan:
    - Prioritize readability
    - Follow the DRY (Don't Repeat Yourself) principle
    - Use meaningful variable names
    - Write concise code
 
-3. Format code snippets in your plan properly:
+2. Format code snippets in your plan properly:
    In your code snippets, follow udiff format with filename we working on in the header. For each code modification, use the following structure:
 
    ```filename.extension
@@ -43,3 +40,4 @@ Instructions:
 Remember:
 - If you're unsure how to implement a given task, don't improvise. Simply state that you don't know. Assuming is not allowed - just tell "please provide me with more files" when needed.
 - When adjusting your plan based on user feedback, always provide a complete version of the plan, referenced to original file contents. Don't reference previous plan.
+- Previous plan proposition have not been implemented. Always reference your code changes to code files you have in the context, not to the previous plan proposition.
diff --git a/src/tools/rag/retrieval.py b/src/tools/rag/retrieval.py
index 056fb8ae..5eef5de8 100644
--- a/src/tools/rag/retrieval.py
+++ b/src/tools/rag/retrieval.py
@@ -1,5 +1,4 @@
 import os
-import cohere
 import chromadb
 from pathlib import Path
 from dotenv import load_dotenv, find_dotenv
@@ -10,9 +9,6 @@
 
 load_dotenv(find_dotenv())
 work_dir = os.getenv("WORK_DIR")
-# cohere_key = os.getenv("COHERE_API_KEY")
-# if cohere_key:
-#     cohere_client = cohere.Client(cohere_key)
 collection_name = f"clean_coder_{Path(work_dir).name}_file_descriptions"
 
 
@@ -138,8 +134,6 @@ def rank(self, question: str, retrieval: dict) -> list:
 if __name__ == "__main__":
     # Example usage of BinaryRanker for testing.
     question = "Some tool that can change files"
-
-    
     # Test the retrieve function
     results = retrieve(question)
     print("\n\n")
diff --git a/src/utilities/util_functions.py b/src/utilities/util_functions.py
index 1db64783..57c648e1 100644
--- a/src/utilities/util_functions.py
+++ b/src/utilities/util_functions.py
@@ -63,9 +63,9 @@ def watch_file(filename, work_dir, line_numbers=True):
     except FileNotFoundError:
         return "File not exists."
     if line_numbers:
-        formatted_lines = [f"{i + 1}|{line[:-1]} |{i+1}\n" for i, line in enumerate(lines)]
+        formatted_lines = [f"{i + 1}|{line.rstrip()} |{i+1}\n" for i, line in enumerate(lines)]
     else:
-        formatted_lines = [f"{line[:-1]}\n" for line in lines]
+        formatted_lines = [f"{line.rstrip()}\n" for line in lines]
     file_content = "".join(formatted_lines)
     file_content = filename + ":\n\n" + file_content
 

From ec843aa76acfeaeeeae68f83600dad5c78635576 Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Sun, 9 Mar 2025 15:18:33 +0100
Subject: [PATCH 30/32] improvements

---
 .env.template                     | 5 ++---
 src/utilities/llms.py             | 2 +-
 src/utilities/manager_utils.py    | 1 +
 src/utilities/print_formatters.py | 3 +++
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.env.template b/.env.template
index 38fb491d..9eaf1a43 100644
--- a/.env.template
+++ b/.env.template
@@ -6,7 +6,6 @@ WORK_DIR=
 ANTHROPIC_API_KEY=
 OPENAI_API_KEY=
 OPENROUTER_API_KEY=
-GOOGLE_API_KEY=
 OLLAMA_MODEL=
 LOCAL_MODEL_API_BASE=
 LOCAL_MODEL_NAME=
@@ -25,8 +24,8 @@ EDIT_TRANSCRIPTION=
 ## Show planner intermediate reasoning
 SHOW_LOGIC_PLAN=
 
-# optional - LLM observability
-LANGCHAIN_TRACING_V2=true
+# Optional - LLM observability
+LANGCHAIN_TRACING_V2=
 LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
 LANGCHAIN_API_KEY=
 LANGCHAIN_PROJECT=
\ No newline at end of file
diff --git a/src/utilities/llms.py b/src/utilities/llms.py
index 3c4e15e9..9d6dfeda 100644
--- a/src/utilities/llms.py
+++ b/src/utilities/llms.py
@@ -75,7 +75,7 @@ def init_llms_mini(tools=None, run_name="Clean Coder", temp=0):
 def init_llms_high_intelligence(tools=None, run_name="Clean Coder", temp=0.2):
     llms = []
     if os.getenv("ANTHROPIC_API_KEY"):
-        llms.append(ChatAnthropic(model='claude-3-7-sonnet-latest', temperature=temp, timeout=60, max_tokens=2048))
+        llms.append(ChatAnthropic(model='claude-3-7-sonnet-latest', temperature=temp, timeout=60, max_tokens=4096))
     if getenv("OPENROUTER_API_KEY"):
         llms.append(llm_open_router("anthropic/claude-3.7-sonnet"))
     if os.getenv("OPENAI_API_KEY"):
diff --git a/src/utilities/manager_utils.py b/src/utilities/manager_utils.py
index 364d6c4b..8a6372a3 100644
--- a/src/utilities/manager_utils.py
+++ b/src/utilities/manager_utils.py
@@ -76,6 +76,7 @@ def fetch_epics():
 
 
 def fetch_tasks():
+    print("pies")
     return todoist_api.get_tasks(project_id=os.getenv('TODOIST_PROJECT_ID'))
 
 
diff --git a/src/utilities/print_formatters.py b/src/utilities/print_formatters.py
index 618e0998..e9c8825e 100644
--- a/src/utilities/print_formatters.py
+++ b/src/utilities/print_formatters.py
@@ -10,6 +10,9 @@
 
 
 def print_formatted_content_planner(content):
+    """
+    Prints output of planner module. Highlights code snippets in diff.
+    """
     parts = content.split('```')
     outside_texts = parts[::2]
     code_snippets = parts[1::2]

From f0b3e6e4244747b09b28ba5ba4e654551717b47e Mon Sep 17 00:00:00 2001
From: Grigorij Dudnik <dudnikgrv@gmail.com>
Date: Mon, 10 Mar 2025 17:38:58 +0100
Subject: [PATCH 31/32] 2-step indexing

---
 src/tools/rag/index_file_descriptions.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/tools/rag/index_file_descriptions.py b/src/tools/rag/index_file_descriptions.py
index 65c70179..a68c1915 100644
--- a/src/tools/rag/index_file_descriptions.py
+++ b/src/tools/rag/index_file_descriptions.py
@@ -180,14 +180,23 @@ def prompt_index_project_files():
     """
     Checks if the vector database (VDB) is available.
     If not, prompts the user via questionary to index project files for better search.
-    On a "Yes" answer, triggers write_and_index_descriptions().
+    Then asks if yous sure he want to do indexing. Then triggers write_and_index_descriptions().
     """
-    if not vdb_available():
+    if vdb_available():
+        return
+    answer = questionary.select(
+        "Do you want to index your project files for improving file search?",
+        choices=["Proceed", "Skip"],
+        style=QUESTIONARY_STYLE,
+        instruction="\nHint: Skip for testing Clean Coder; index for real projects."
+    ).ask()
+    if answer == "Proceed":
+        nr_of_files = len(collect_file_pathes(['/'], work_dir))
         answer = questionary.select(
-            "Do you want to index your project files for better search?",
+            f"Going to index {nr_of_files} files. Indexing could be time-consuming and costly. Are you ready to go?",
             choices=["Index", "Skip"],
             style=QUESTIONARY_STYLE,
-            instruction="\nHint: Skip for testing Clean Coder; index for real projects."
+            instruction="\nHint: Ensure you provided all files and directories you don't want to index in {WORK_DIR}/.clean_coder/.coderignore to avoid describing trashy files."
         ).ask()
         if answer == "Index":
             write_and_index_descriptions()

From 8a522716f9070b03e93561be9f99862b053f36f2 Mon Sep 17 00:00:00 2001
From: Wiktor Balcerzak <33687465+LilKeyboard@users.noreply.github.com>
Date: Fri, 14 Mar 2025 10:59:24 +0100
Subject: [PATCH 32/32] Update user_input.py

I added multiline input feature
---
 src/utilities/user_input.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/utilities/user_input.py b/src/utilities/user_input.py
index 573ce3ed..6d371b6c 100644
--- a/src/utilities/user_input.py
+++ b/src/utilities/user_input.py
@@ -3,13 +3,18 @@
 from src.utilities.voice_utils import VoiceRecorder
 import keyboard
 import readline
+import sys
 
 
 recorder = VoiceRecorder()
 
 
 def user_input(prompt=""):
-    print_formatted(prompt + "Or use (m)icrophone to tell:", color="cyan", bold=True)
+    print_formatted(prompt + "Or use (m)icrophone to tell, or press Enter for multiline input:", color="cyan", bold=True)
+    
+    if not sys.stdin.isatty():
+        return sys.stdin.read().strip()
+        
     user_sentence = input()
     if user_sentence == 'm':
         if not os.getenv("OPENAI_API_KEY"):
@@ -26,7 +31,15 @@ def user_input(prompt=""):
         else:
             print_formatted("Install 'sudo apt-get install libportaudio2' (Linux) or 'brew install portaudio' (Mac) to use microphone feature.", color="red")
             user_sentence = input()
-
+    elif user_sentence == '' or '\n' in user_sentence:  
+        if user_sentence:  
+            return user_sentence
+        print_formatted("Enter your multiline text (end with Ctrl+D on Unix or Ctrl+Z on Windows):", color="green")
+        try:
+            user_sentence = sys.stdin.read().strip()
+        except KeyboardInterrupt:
+            return ""
+        
     return user_sentence