Add a GCP text to image demo for LIT.

llcourage · LIT team · commit f2bde3db2dc5 · 2025-05-14T10:51:04.000-07:00
PiperOrigin-RevId: 758748858
diff --git a/lit_nlp/examples/gcp_text_to_image/datasets.py b/lit_nlp/examples/gcp_text_to_image/datasets.py
@@ -0,0 +1,23 @@
+"""Data loaders for text to image models."""
+
+from lit_nlp.api import dataset as lit_dataset
+from lit_nlp.api import types as lit_types
+
+
+class TextToImageDataset(lit_dataset.Dataset):
+  """TextToImageDataset is a dataset that contains a list of prompts.
+
+  It is used to generate images using the text to image models.
+  """
+
+  def __init__(self, prompts: list[str]):
+    self._examples = []
+    for prompt in prompts:
+      self._examples.append({"prompt": prompt})
+
+  @classmethod
+  def init_spec(cls) -> lit_types.Spec:
+    return {"prompt": lit_types.TextSegment(required=True)}
+
+  def spec(self) -> lit_types.Spec:
+    return {"prompt": lit_types.TextSegment()}
diff --git a/lit_nlp/examples/gcp_text_to_image/demo.py b/lit_nlp/examples/gcp_text_to_image/demo.py
@@ -0,0 +1,129 @@
+r"""A blank demo ready to load generative text to image models and datasets.
+
+To use with VertexAI Model Garden models, you must install the following packages:
+  pip install vertexai>=1.49.0
+To run the demo, you must set you GCP project location and project id.
+
+Currently, the demo only supports the image generation models in the Model
+Garden.
+
+The following command can be used to run the demo:
+  blaze run -c opt examples/gcp_text_to_image:demo -- \
+    --project_id=$GCP_PROJECT_ID \
+    --project_location=$GCP_PROJECT_LOCATION \
+    --alsologtostderr
+Then navigate to localhost:5432 to access the demo UI.
+"""
+
+from collections.abc import Sequence
+import sys
+from typing import Optional
+
+from absl import app
+from absl import flags
+from absl import logging
+import google.auth
+from google.cloud.aiplatform import vertexai
+from lit_nlp import app as lit_app
+from lit_nlp import dev_server
+from lit_nlp import server_flags
+from lit_nlp.api import layout
+from lit_nlp.examples.gcp_text_to_image import datasets as gcp_text_to_image_datasets
+from lit_nlp.examples.gcp_text_to_image import models as gcp_text_to_image_models
+
+
+FLAGS = flags.FLAGS
+# Define GCP project information and vertex AI API key.
+LOCATION = flags.DEFINE_string(
+    'project_location',
+    None,
+    'Please enter your GCP project location',
+    required=True,
+)
+PROJECT_ID = flags.DEFINE_string(
+    'project_id',
+    None,
+    'Please enter your project id',
+    required=True,
+)
+
+# Custom frontend layout; see api/layout.py
+_modules = layout.LitModuleName
+_IMAGE_LAYOUT = layout.LitCanonicalLayout(
+    upper={
+        'Main': [
+            _modules.DataTableModule,
+            _modules.DatapointEditorModule,
+        ]
+    },
+    lower={
+        'Predictions': [
+            _modules.GeneratedImageModule,
+            _modules.GeneratedTextModule,
+        ],
+    },
+    description='Custom layout for Text to Image models.',
+)
+
+
+CUSTOM_LAYOUTS = layout.DEFAULT_LAYOUTS | {'_IMAGE_LAYOUT': _IMAGE_LAYOUT}
+
+_CANNED_PROMPTS = ['I have a dream', 'I have a shiba dog named cola']
+
+
+def get_wsgi_app() -> Optional[dev_server.LitServerType]:
+  """Return WSGI app for container-hosted demos."""
+  FLAGS.set_default('server_type', 'external')
+  FLAGS.set_default('demo_mode', True)
+  # Parse flags without calling app.run(main), to avoid conflict with
+  # gunicorn command line flags.
+  unused = flags.FLAGS(sys.argv, known_only=True)
+  if unused:
+    logging.info(
+        'generateive_demo:get_wsgi_app() called with unused args: %s', unused
+    )
+  return main([])
+
+
+def main(argv: Sequence[str]) -> Optional[dev_server.LitServerType]:
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+
+  creds, _ = google.auth.default(
+      scopes=['https://www.googleapis.com/auth/cloud-platform']
+  )
+  creds = creds.with_quota_project(PROJECT_ID.value)
+  vertexai.init(
+      project=PROJECT_ID.value,
+      location=LOCATION.value,
+      credentials=creds,
+  )
+  models = {}
+  model_loaders: lit_app.ModelLoadersMap = {}
+  model_loaders['text_to_image'] = (
+      gcp_text_to_image_models.VertexModelGardenModel,
+      gcp_text_to_image_models.VertexModelGardenModel.init_spec(),
+  )
+
+  datasets = {
+      'prompts': gcp_text_to_image_datasets.TextToImageDataset(_CANNED_PROMPTS)
+  }
+  dataset_loaders: lit_app.DatasetLoadersMap = {}
+  dataset_loaders['text_to_image'] = (
+      gcp_text_to_image_datasets.TextToImageDataset,
+      gcp_text_to_image_datasets.TextToImageDataset.init_spec(),
+  )
+
+  lit_demo = dev_server.Server(
+      models=models,
+      model_loaders=model_loaders,
+      datasets=datasets,
+      dataset_loaders=dataset_loaders,
+      layout=layout.DEFAULT_LAYOUTS,
+      **server_flags.get_flags()
+  )
+  return lit_demo.serve()
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/lit_nlp/examples/gcp_text_to_image/models.py b/lit_nlp/examples/gcp_text_to_image/models.py
@@ -0,0 +1,141 @@
+"""Model Wrapper for generative models."""
+
+from collections.abc import Iterable
+import io
+import logging
+import time
+from typing import Literal, Optional, Union
+from vertexai import vision_models
+from lit_nlp.api import model as lit_model
+from lit_nlp.api import types as lit_types
+from lit_nlp.lib import image_utils
+from PIL import Image
+
+_MAX_NUM_RETRIES = 5
+
+_DEFAULT_CANDIDATE_COUNT = 1
+
+_DEFAULT_MAX_OUTPUT_TOKENS = 256
+
+_IMAGE_PREFIX = 'data:image/png;base64,'
+
+
+class VertexModelGardenModel(lit_model.BatchedRemoteModel):
+  """VertexModelGardenModel is a wrapper for Vertex AI Model Garden model.
+
+  Attributes:
+    model_name: The name of the model to load.
+    max_concurrent_requests: The maximum number of concurrent requests to the
+      model.
+    max_qps: The maximum number of queries per second to the model.
+    temperature: The temperature to use for the model.
+    candidate_count: The number of candidates to generate.
+    max_output_tokens: The maximum number of tokens to generate.
+
+  Please note the model will predict all examples at a fixed temperature.
+  """
+
+  def __init__(
+      self,
+      model_name: str = 'imagen-3.0-generate-002',
+      max_concurrent_requests: int = 4,
+      max_qps: Union[int, float] = 25,
+      aspect_ratio: Optional[
+          Literal['16:9', '1:1', '3:4', '4:3', '9:16']
+      ] = None,
+      width: int = 256,
+      height: int = 256,
+  ):
+    super().__init__(max_concurrent_requests, max_qps)
+    # Connect to the remote model.
+    self._model = vision_models.ImageGenerationModel.from_pretrained(model_name)
+    self._aspect_ratio = aspect_ratio
+    self._width = width
+    self._height = height
+
+  def query_model(self, prompt: str, **unused_kw) -> list[lit_types.JsonDict]:
+    num_attempts = 0
+    predictions = None
+    exception = None
+    width = self._width
+    height = self._height
+
+    while num_attempts < _MAX_NUM_RETRIES and predictions is None:
+      num_attempts += 1
+
+      try:
+        predictions = self._model.generate_images(
+            prompt=prompt,
+            aspect_ratio=self._aspect_ratio,
+        )
+      except Exception as e:  # pylint: disable=broad-except
+        wait_time = 2**num_attempts
+        exception = e
+        logging.warning('Waiting %ds to retry... (%s)', wait_time, e)
+        time.sleep(2**num_attempts)
+
+    if predictions is None:
+      raise ValueError(
+          f'Failed to get predictions. ({exception})'
+      ) from exception
+
+    if not isinstance(predictions, Iterable):
+      raise ValueError(f'Predictions is not an Iterable: {type(predictions)}')
+
+    images = []
+    for image_ in predictions.images:
+      pil_img = Image.open(io.BytesIO(getattr(image_, '_image_bytes')))
+      pil_img = pil_img.resize((width, height))
+      images.append(image_utils.convert_pil_to_image_str(pil_img))
+
+    return images
+
+  def predict_minibatch(
+      self, inputs: list[lit_types.JsonDict]
+  ) -> list[lit_types.JsonDict]:
+    """The model can generate up to 8 images per run, but LIT may only show one due to frontend limitations.
+
+    In MinDalle demos, the grid_size parameter controls layout—for example,
+    grid_size=2 creates a 2x2 grid of sub-images, rendered as a single final
+    image. That’s why only one image might appear even if multiple are
+    generated.
+
+    Args:
+      inputs: A list of input dictionaries, each containing a 'prompt'.
+
+    Returns:
+      A list of dictionaries, each containing the generated 'image' and the
+      original 'prompt'.
+    """
+    results = []
+    for inp in inputs:
+      prompt = inp['prompt']
+      b64_strs = self.query_model(prompt)
+      if not b64_strs:
+        raise ValueError(f'No images generated for prompt: {prompt}')
+      results.append({
+          'image': b64_strs[0],
+          'prompt': prompt,
+      })
+    return results
+
+  @classmethod
+  def init_spec(cls) -> lit_types.Spec:
+    return {
+        'model_name': lit_types.String(
+            default='imagen-3.0-generate-002', required=True
+        ),
+        'aspect_ratio': lit_types.String(default='1:1', required=False),
+        'width': lit_types.Integer(default=256, required=False),
+        'height': lit_types.Integer(default=256, required=False),
+    }
+
+  def input_spec(self) -> lit_types.Spec:
+    return {
+        'prompt': lit_types.TextSegment(),
+    }
+
+  def output_spec(self):
+    return {
+        'image': lit_types.ImageBytesList(),
+    }
diff --git a/lit_nlp/examples/gcp_text_to_image/models_test.py b/lit_nlp/examples/gcp_text_to_image/models_test.py
@@ -0,0 +1,83 @@
+import base64
+from unittest import mock
+from absl.testing import absltest
+from vertexai import vision_models
+from lit_nlp.examples.gcp_text_to_image import models
+
+
+class MockModel:
+
+  def __init__(
+      self, images=None, raise_exception=False, sample_image_bytes=None
+  ):
+    self.images = images if images else []
+    self.raise_exception = raise_exception
+    self.call_count = 0
+    self.sample_image_bytes = sample_image_bytes
+
+  def generate_images(self, prompt, aspect_ratio=None):
+    _, _ = prompt, aspect_ratio
+    self.call_count += 1
+    if self.raise_exception:
+      raise ValueError("Mock Model Error")
+
+    if self.sample_image_bytes:
+      # Create a mock GeneratedImage instance, passing image_bytes
+      mock_image = mock.create_autospec(
+          vision_models.GeneratedImage, instance=True
+      )
+      mock_image._image_bytes = self.sample_image_bytes
+      mock_response = vision_models.ImageGenerationResponse(images=[mock_image])
+      return mock_response
+
+    return vision_models.ImageGenerationResponse(images=[])
+
+
+class ModelsTest(absltest.TestCase):
+
+  def setUp(self):
+    super().setUp()
+    # Create a sample image for testing
+    png_base64 = b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMBAKh72VgAAAAASUVORK5CYII="
+    self.sample_image_bytes = base64.b64decode(png_base64)
+
+  @mock.patch(
+      "vertexai.vision_models.ImageGenerationModel.from_pretrained",
+  )
+  @mock.patch("PIL.Image.open")
+  def test_query_model(self, mock_image_open, mock_from_pretrained):
+    # Create a MockModel instance
+    mock_model = MockModel(
+        sample_image_bytes=self.sample_image_bytes,
+    )
+    # Configure mock_from_pretrained to return the mock_model
+    mock_from_pretrained.return_value = mock_model
+
+    model = models.VertexModelGardenModel(model_name="test_model_name")
+    mock_image = mock.Mock()
+
+    mock_image.resize.return_value = mock_image
+    mock_image_open.return_value = mock_image
+
+    output = model.predict_minibatch(
+        inputs=[{"prompt": "I say yes you say no"}]
+    )
+    result = list(output)
+
+    self.assertLen(result, 1)
+    self.assertIn("image", result[0])
+    self.assertIn("prompt", result[0])
+    self.assertEqual(result[0]["prompt"], "I say yes you say no")
+
+    # Validate that the image is a base64 string
+    self.assertTrue(result[0]["image"].startswith("data:image/png"))
+    self.assertIsInstance(result[0]["image"], str)
+
+    mock_from_pretrained.assert_called_once_with("test_model_name")
+
+    # Assert that mock_generate_content was called
+    self.assertEqual(mock_model.call_count, 1)
+
+
+if __name__ == "__main__":
+  absltest.main()