AI-Hypercomputer
diff --git a/‎jetstream/core/orchestrator.py‎
Lines changed: 0 additions & 6 deletions b/‎jetstream/core/orchestrator.py‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎jetstream/core/proto/jetstream_pb2.py‎
Lines changed: 3 additions & 5 deletions b/‎jetstream/core/proto/jetstream_pb2.py‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎jetstream/core/proto/jetstream_pb2_grpc.py‎
Lines changed: 88 additions & 106 deletions b/‎jetstream/core/proto/jetstream_pb2_grpc.py‎
Lines changed: 88 additions & 106 deletions
diff --git a/‎jetstream/core/proto/multi_lora_decoding_pb2.py‎
Lines changed: 3 additions & 4 deletions b/‎jetstream/core/proto/multi_lora_decoding_pb2.py‎
Lines changed: 3 additions & 4 deletions
@@ -580,7 +580,6 @@ def _prefill_thread(self, idx: int):
 
       if request is None:
         break
-
       request.metadata.prefill_dequeue_time = time.perf_counter()
       is_bos = True
       logging.info(
@@ -590,7 +589,6 @@ def _prefill_thread(self, idx: int):
           self._prefill_backlog.qsize(),
           is_bos,
       )
-
       # Tokenize and padding the text or token input.
       padded_tokens, true_length = self._process_prefill_content(
           request, tokenizer, is_bos, prefill_engine.max_prefill_length
@@ -703,7 +701,6 @@ def _transfer_thread(self, idx: int):
       # Place the request on the correct generate backlog and block if full.
       new_request.metadata.generate_enqueue_time = time.perf_counter()
       self._generate_backlogs[target_idx].put(new_request, block=True)
-
       logging.info(
           "Successfully transferred prefill "
           "from prefill engine %d to generate engine %d "
@@ -727,7 +724,6 @@ def _generate_thread(self, idx: int):
     decode_state = generate_engine.init_decode_state()
 
     generate_params = self._generate_params[idx]
-
     logging.info("---------Generate params %d loaded.---------", idx)
     time_of_last_generate = time.time()
     time_of_last_print = time.time()
@@ -841,7 +837,6 @@ def _generate_thread(self, idx: int):
           generate_params, decode_state
       )
       sampled_tokens.copy_to_host_async()
-
       # Respond to detokenization backpressure.
       my_detokenize_backlog.put((generate_timestep, sampled_tokens), block=True)
       generate_timestep += 1
@@ -1135,7 +1130,6 @@ async def Decode(  # pylint: disable=invalid-overridden-method
     prefill_content, is_client_side_tokenization = self._get_prefill_content(
         request
     )
-
     # Wrap request as an ActiveRequest.
     active_request = ActiveRequest(
         max_tokens=request.max_tokens,
 
@@ -11,12 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
-# NO CHECKED-IN PROTOBUF GENCODE
 # source: jetstream.proto
-# Protobuf Python Version: 5.29.0
+# Protobuf Python Version: 4.25.1
 """Generated protocol buffer code."""
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
@@ -34,8 +32,8 @@
 _globals = globals()
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'jetstream_pb2', _globals)
-if not _descriptor._USE_C_DESCRIPTORS:
-  DESCRIPTOR._loaded_options = None
+if _descriptor._USE_C_DESCRIPTORS == False:
+  DESCRIPTOR._options = None
   _globals['_DECODEREQUEST']._serialized_start=37
   _globals['_DECODEREQUEST']._serialized_end=437
   _globals['_DECODEREQUEST_TEXTCONTENT']._serialized_start=293
 
@@ -15,124 +15,106 @@
 """Client and server classes corresponding to protobuf-defined services."""
 import grpc
 
-from jetstream.core.proto import jetstream_pb2 as jetstream_dot_core_dot_proto_dot_jetstream__pb2
+from jetstream.core.proto import jetstream_pb2 as jetstream__pb2
 
 
 class OrchestratorStub(object):
-  """TODO: Merge this with main JetStream core once we settle on an API."""
+    """TODO: Merge this with main JetStream core once we settle on an API.
 
-  def __init__(self, channel):
-    """Constructor.
-
-    Args:
-        channel: A grpc.Channel.
     """
-    self.Decode = channel.unary_stream(
-        "/jetstream_proto.Orchestrator/Decode",
-        request_serializer=jetstream_dot_core_dot_proto_dot_jetstream__pb2.DecodeRequest.SerializeToString,
-        response_deserializer=jetstream_dot_core_dot_proto_dot_jetstream__pb2.DecodeResponse.FromString,
-    )
-    self.HealthCheck = channel.unary_unary(
-        "/jetstream_proto.Orchestrator/HealthCheck",
-        request_serializer=jetstream_dot_core_dot_proto_dot_jetstream__pb2.HealthCheckRequest.SerializeToString,
-        response_deserializer=jetstream_dot_core_dot_proto_dot_jetstream__pb2.HealthCheckResponse.FromString,
-    )
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.Decode = channel.unary_stream(
+                '/jetstream_proto.Orchestrator/Decode',
+                request_serializer=jetstream__pb2.DecodeRequest.SerializeToString,
+                response_deserializer=jetstream__pb2.DecodeResponse.FromString,
+                )
+        self.HealthCheck = channel.unary_unary(
+                '/jetstream_proto.Orchestrator/HealthCheck',
+                request_serializer=jetstream__pb2.HealthCheckRequest.SerializeToString,
+                response_deserializer=jetstream__pb2.HealthCheckResponse.FromString,
+                )
 
 
 class OrchestratorServicer(object):
-  """TODO: Merge this with main JetStream core once we settle on an API."""
+    """TODO: Merge this with main JetStream core once we settle on an API.
 
-  def Decode(self, request, context):
-    """Query LLM to generate text or tokens."""
-    context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-    context.set_details("Method not implemented!")
-    raise NotImplementedError("Method not implemented!")
+    """
 
-  def HealthCheck(self, request, context):
-    """Checks if the model server is live."""
-    context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-    context.set_details("Method not implemented!")
-    raise NotImplementedError("Method not implemented!")
+    def Decode(self, request, context):
+        """Query LLM to generate text or tokens.
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+
+    def HealthCheck(self, request, context):
+        """Checks if the model server is live.
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
 
 
 def add_OrchestratorServicer_to_server(servicer, server):
-  rpc_method_handlers = {
-      "Decode": grpc.unary_stream_rpc_method_handler(
-          servicer.Decode,
-          request_deserializer=jetstream_dot_core_dot_proto_dot_jetstream__pb2.DecodeRequest.FromString,
-          response_serializer=jetstream_dot_core_dot_proto_dot_jetstream__pb2.DecodeResponse.SerializeToString,
-      ),
-      "HealthCheck": grpc.unary_unary_rpc_method_handler(
-          servicer.HealthCheck,
-          request_deserializer=jetstream_dot_core_dot_proto_dot_jetstream__pb2.HealthCheckRequest.FromString,
-          response_serializer=jetstream_dot_core_dot_proto_dot_jetstream__pb2.HealthCheckResponse.SerializeToString,
-      ),
-  }
-  generic_handler = grpc.method_handlers_generic_handler(
-      "jetstream_proto.Orchestrator", rpc_method_handlers
-  )
-  server.add_generic_rpc_handlers((generic_handler,))
-
-
-# This class is part of an EXPERIMENTAL API.
+    rpc_method_handlers = {
+            'Decode': grpc.unary_stream_rpc_method_handler(
+                    servicer.Decode,
+                    request_deserializer=jetstream__pb2.DecodeRequest.FromString,
+                    response_serializer=jetstream__pb2.DecodeResponse.SerializeToString,
+            ),
+            'HealthCheck': grpc.unary_unary_rpc_method_handler(
+                    servicer.HealthCheck,
+                    request_deserializer=jetstream__pb2.HealthCheckRequest.FromString,
+                    response_serializer=jetstream__pb2.HealthCheckResponse.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'jetstream_proto.Orchestrator', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+ # This class is part of an EXPERIMENTAL API.
 class Orchestrator(object):
-  """TODO: Merge this with main JetStream core once we settle on an API."""
-
-  @staticmethod
-  def Decode(
-      request,
-      target,
-      options=(),
-      channel_credentials=None,
-      call_credentials=None,
-      insecure=False,
-      compression=None,
-      wait_for_ready=None,
-      timeout=None,
-      metadata=None,
-  ):
-    return grpc.experimental.unary_stream(
-        request,
-        target,
-        "/jetstream_proto.Orchestrator/Decode",
-        jetstream_dot_core_dot_proto_dot_jetstream__pb2.DecodeRequest.SerializeToString,
-        jetstream_dot_core_dot_proto_dot_jetstream__pb2.DecodeResponse.FromString,
-        options,
-        channel_credentials,
-        insecure,
-        call_credentials,
-        compression,
-        wait_for_ready,
-        timeout,
-        metadata,
-    )
-
-  @staticmethod
-  def HealthCheck(
-      request,
-      target,
-      options=(),
-      channel_credentials=None,
-      call_credentials=None,
-      insecure=False,
-      compression=None,
-      wait_for_ready=None,
-      timeout=None,
-      metadata=None,
-  ):
-    return grpc.experimental.unary_unary(
-        request,
-        target,
-        "/jetstream_proto.Orchestrator/HealthCheck",
-        jetstream_dot_core_dot_proto_dot_jetstream__pb2.HealthCheckRequest.SerializeToString,
-        jetstream_dot_core_dot_proto_dot_jetstream__pb2.HealthCheckResponse.FromString,
-        options,
-        channel_credentials,
-        insecure,
-        call_credentials,
-        compression,
-        wait_for_ready,
-        timeout,
-        metadata,
-    )
+    """TODO: Merge this with main JetStream core once we settle on an API.
+
+    """
 
+    @staticmethod
+    def Decode(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_stream(request, target, '/jetstream_proto.Orchestrator/Decode',
+            jetstream__pb2.DecodeRequest.SerializeToString,
+            jetstream__pb2.DecodeResponse.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
+
+    @staticmethod
+    def HealthCheck(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/jetstream_proto.Orchestrator/HealthCheck',
+            jetstream__pb2.HealthCheckRequest.SerializeToString,
+            jetstream__pb2.HealthCheckResponse.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)