Code comments

blazickjp · blazickjp · commit 3017bf3fa1a7 · 2024-03-21T13:46:59.000-07:00
diff --git a/backend/agent/coding_agent.py b/backend/agent/coding_agent.py
@@ -1,4 +1,8 @@
 # import os
+"""
+This Python module defines the classes and functions used by the coding agent in the backend of an application. The coding agent is responsible for interacting with various components such as the database, memory management system, and external APIs to facilitate code generation, manipulation, and management tasks. It utilizes models for code generation, applies AST (Abstract Syntax Tree) operations to modify code, and manages the working context and system prompts for the user. Additionally, it handles the execution of generated code operations and integrates with external services like OpenAI and AWS for enhanced functionality.
+"""
+
 import re
 import json
 import boto3
@@ -37,6 +41,7 @@ class NestedNamespace(SimpleNamespace):
     """
     A class to convert a dictionary into a nested namespace.
     """
+
     def __init__(self, dictionary, **kwargs):
         if not isinstance(dictionary, dict):
             raise ValueError("Input must be a dictionary")
@@ -85,7 +90,7 @@ def __init__(
 
         self.memory_manager = memory_manager
         self.function_map = function_map
-        self.GPT_MODEL = 'gpt-4-0125-preview'
+        self.GPT_MODEL = "gpt-4-0125-preview"
         self.codebase = codebase
         self.max_tokens = 4000
         self.temperature = 0.75
@@ -200,6 +205,20 @@ def query(self, input: str, command: Optional[str] = None) -> List[str]:
                 yield chunk.choices[0].delta.content
 
     def execute_ops(self, ops: List[dict]):
+        """
+        Executes the operations stored in the `ops_to_execute` list.
+
+        This method iterates over each operation in `ops_to_execute`, applying the necessary changes to the
+        corresponding file. It handles file path normalization, reads the original code, applies the AST changes,
+        computes the diff between the original and transformed code, and finally writes the transformed code back to the file.
+        It accumulates and returns a list of diffs for each operation.
+
+        Args:
+            ops (List[dict]): A list of operations to be executed.
+
+        Returns:
+            List[str]: A list of unified diff strings representing the changes made to each file.
+        """
         diffs = []  # List to store the diffs for each operation
 
         for op in self.ops_to_execute:
@@ -279,6 +298,17 @@ def process_json(self, args: str) -> str:
             return json.loads(response_str)
 
     def call_model_streaming(self, command: Optional[str] | None = None, **kwargs):
+        if command:
+            kwargs["prompt"] = command
+        else:
+            kwargs["prompt"] = "Please generate code based on the provided context."
+
+        kwargs["stream"] = True
+        kwargs["max_tokens"] = kwargs.get("max_tokens", 256)
+        kwargs["temperature"] = kwargs.get("temperature", 0.5)
+
+        if "model" not in kwargs:
+            raise ValueError("Model not specified in kwargs")
         print("Calling model streaming")
         print(kwargs["model"])
         if self.GPT_MODEL.startswith("gpt"):
@@ -296,7 +326,6 @@ def call_model_streaming(self, command: Optional[str] | None = None, **kwargs):
                     modelId="anthropic.claude-3-sonnet-20240229-v1:0",
                     body=json.dumps(
                         {
-
                             "messages": kwargs["messages"][1:],
                             "system": self.generate_anthropic_prompt(sys_only=True),
                             "max_tokens": max(kwargs["max_tokens"], 2000),
@@ -345,7 +374,9 @@ def call_model_streaming(self, command: Optional[str] | None = None, **kwargs):
                     print("UnboundLocalError")
                     break
 
-    def generate_anthropic_prompt(self, include_messages: Optional[bool]=True, sys_only: Optional[bool]=None) -> str:
+    def generate_anthropic_prompt(
+        self, include_messages: Optional[bool] = True, sys_only: Optional[bool] = None
+    ) -> str:
         """
         Generates a prompt for the Gaive model.
 
@@ -409,7 +440,7 @@ def generate_anthropic_prompt(self, include_messages: Optional[bool]=True, sys_o
 
         if sys_only:
             return sys_prompt
-        
+
         return (
             "\n\nHuman: The folllowing is your system prompt: "
             + sys_prompt
@@ -421,6 +452,15 @@ def generate_anthropic_prompt(self, include_messages: Optional[bool]=True, sys_o
 
     @staticmethod
     def normalize_path(input_path):
+        """
+        Normalizes a path to be relative to the current working directory.
+
+        Args:
+            input_path (str): The path to normalize.
+
+        Returns:
+            str: The normalized path.
+        """
         # Get the current working directory as a Path object
         working_directory = Path.cwd()
 
diff --git a/backend/database/my_codebase.py b/backend/database/my_codebase.py
@@ -1,3 +1,7 @@
+"""
+This module defines the MyCodebase class, which is responsible for managing the database operations related to codebase management. It includes functionalities such as initializing the database connection, setting up the directory to scan for code, creating necessary database tables, updating files and embeddings, and removing old files from the database. The class utilizes an external encoder (tiktoken) for encoding model specifics and interacts with the database to store and manage the codebase information efficiently.
+"""
+
 import os
 import datetime
 import tiktoken
@@ -97,6 +101,9 @@ def update_file(self, file_path: str) -> None:
         self.conn.commit()
 
     def create_tables(self) -> None:
+        """
+        Creates the necessary tables in the database if they don't exist.
+        """
         try:
             self.cur.execute(
                 """
@@ -127,6 +134,18 @@ def create_tables(self) -> None:
             print(f"Failed to create tables: {e}")
 
     def tree(self) -> str:
+        """
+        Generates a visual representation of the project's directory structure.
+
+        This method fetches the file paths and summaries from the database, constructs a tree
+        structure representing the directory hierarchy, and then generates a string representation
+        of this tree. Each node in the tree represents a directory or file, with directories containing
+        nested dictionaries of their contents.
+
+        Returns:
+            str: A string representation of the directory tree, with each node prefixed by "+--" and
+                 indented to represent its depth in the hierarchy.
+        """
         tree = {}
         start_from = os.path.basename(self.directory)
         print("Start from: ", start_from)
diff --git a/backend/memory/memory_manager.py b/backend/memory/memory_manager.py
@@ -1,3 +1,7 @@
+"""
+This module contains the implementation of the memory management system for the backend. It includes the `WorkingContext` class, which is responsible for managing the working context of the user, including the database connection, project directory, and interaction with the OpenAI API client. The module also handles the creation of necessary database tables and provides methods for managing the working context data within the database. Additionally, it integrates with other components such as the system prompt handler and the OpenAI API client to facilitate the generation and management of system prompts and responses.
+"""
+
 import tiktoken
 from typing import Optional, List
 from datetime import datetime
@@ -119,10 +123,6 @@ def execute(self, working_context: WorkingContext) -> None:
 
 
 class MemoryManager:
-    # MemoryManager class manages interactions with the memory database
-    # including initializing connections, creating tables, and
-    # delegating to other classes that interact with the database.
-
     def __init__(
         self,
         model: str = "gpt-3.5-turbo-16k",
@@ -163,6 +163,17 @@ def __init__(
         self.background_tasks = None
 
     def get_messages(self, chat_box: Optional[bool] = None) -> List[dict]:
+        """
+        Fetches messages from the system prompt table.
+
+        This method queries the system prompt table for messages, filtering based on the chat_box flag. If chat_box is True, it fetches messages with a higher token limit to accommodate more verbose interactions typical in a chat interface. Otherwise, it uses the default max_tokens limit defined for the system.
+
+        Args:
+            chat_box (Optional[bool]): A flag indicating whether the messages are being fetched for a chat box interface. Defaults to None.
+
+        Returns:
+            List[dict]: A list of dictionaries, each containing the role and content of a message.
+        """
         self.cur.execute(
             f"""
             SELECT role, content
@@ -218,7 +229,7 @@ def get_messages(self, chat_box: Optional[bool] = None) -> List[dict]:
                 ),
             )
         results = self.cur.fetchall()
-        prev_role = 'assistant'
+        prev_role = "assistant"
         for result in results[::-1]:
             if prev_role == result[0]:
                 continue
@@ -235,6 +246,20 @@ def add_message(
         command: Optional[str] = None,
         function_response: Optional[str] = None,
     ) -> None:
+        """
+        Adds a message to the memory database.
+
+        This method inserts a new message into the memory database with the provided role, content, and optional command and function response. It also calculates the timestamp, the total number of tokens in the message, and optionally summarizes the message if the number of tokens exceeds a certain threshold.
+
+        Args:
+            role (str): The role of the message sender (e.g., "user" or "assistant").
+            content (str): The content of the message.
+            command (Optional[str]): An optional command associated with the message.
+            function_response (Optional[str]): An optional function response associated with the message.
+
+        Returns:
+            None
+        """
         timestamp = datetime.now().isoformat()
         message_tokens = self.get_total_tokens_in_message(content)
         summary, summary_tokens = (
@@ -266,7 +291,15 @@ def add_message(
         return
 
     def get_total_tokens_in_message(self, message: str) -> int:
-        """Returns the number of tokens in a message."""
+        """
+        Calculates the total number of tokens in a given message using the tiktoken library.
+
+        Args:
+            message (str): The message for which to calculate the total number of tokens.
+
+        Returns:
+            int: The total number of tokens in the message.
+        """
         encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
         num_tokens = len(encoding.encode(message))
         return num_tokens
diff --git a/backend/memory/system_prompt_handler.py b/backend/memory/system_prompt_handler.py
@@ -1,6 +1,7 @@
 """
-Methods and CRUD operations for managing system prompts.
+This module contains classes and methods for handling system prompts and logging relevant information within the backend memory system. It includes a `RelevantLogHandler` class for managing logs relevant to the system's operation, focusing on error logs, and a `SystemPromptHandler` class for managing system prompts, including CRUD operations on system prompts stored in a database. These components are essential for maintaining a responsive and informed system environment, aiding in debugging and user interaction management.
 """
+
 from typing import Optional, Dict
 import os
 import logging