|
| 1 | +"""Example use case inspired by Google Gemini. |
| 2 | +
|
| 3 | +The real [Google Gemini](https://deepmind.google/technologies/gemini/) family |
| 4 | +of models is designed to operate in a multimodal setting (text, image, audio, |
| 5 | +and more) while also supporting reasoning and structured output. |
| 6 | +
|
| 7 | +This module implements a light-weight, fully local simulation of a possible |
| 8 | +workflow that such a model could enable: automatically summarising the key |
| 9 | +moments of a meeting by combining textual notes, image observations, and |
| 10 | +structured action items. The goal of the example is educational – showing how |
| 11 | +one could *organise* information for a multimodal model, not to replicate any |
| 12 | +proprietary model behaviour. |
| 13 | +
|
| 14 | +The :class:`GeminiMeetingSummarizer` exposes two public methods: |
| 15 | +
|
| 16 | +``summarize`` |
| 17 | + Produces a human readable summary string. |
| 18 | +
|
| 19 | +``build_structured_report`` |
| 20 | + Produces a JSON-serialisable dictionary emphasising the same information |
| 21 | + but in a machine friendly format. |
| 22 | +
|
| 23 | +Both methods rely on simple, deterministic heuristics to keep the example |
| 24 | +self-contained and easy to test. Nevertheless, the overall flow mirrors a real |
| 25 | +Gemini use case: |
| 26 | +
|
| 27 | +1. Gather information from multiple modalities (textual notes and image |
| 28 | + observations). |
| 29 | +2. Extract salient talking points via keyword scoring. |
| 30 | +3. Merge the salient points with previously computed action items. |
| 31 | +4. Return both natural language and structured artefacts. |
| 32 | +
|
| 33 | +Example |
| 34 | +------- |
| 35 | +
|
| 36 | +>>> documents = [ |
| 37 | +... MeetingDocument( |
| 38 | +... title="Roadmap discussion", |
| 39 | +... content="We reviewed Q3 targets and prioritised the Gemini launch.", |
| 40 | +... ), |
| 41 | +... MeetingDocument( |
| 42 | +... title="Budget", |
| 43 | +... content="Marketing receives additional funds to prepare promo videos.", |
| 44 | +... ), |
| 45 | +... ] |
| 46 | +>>> observations = [ |
| 47 | +... ImageObservation(description="Slide showing Gemini app mock-ups."), |
| 48 | +... ] |
| 49 | +>>> action_items = [ |
| 50 | +... ActionItem(owner="Alex", task="Draft launch blog post", due_date="2024-06-18"), |
| 51 | +... ] |
| 52 | +>>> summarizer = GeminiMeetingSummarizer() |
| 53 | +>>> summarizer.summarize(documents, observations, action_items) |
| 54 | +'Key updates: Gemini launch prioritised. Marketing receives additional funds. Visual assets reviewed from slides. Action items: Alex to Draft launch blog post by 2024-06-18.' |
| 55 | +
|
| 56 | +Even though the underlying logic is intentionally straightforward, the |
| 57 | +structure of the code demonstrates how a developer might prepare data for a |
| 58 | +multimodal, reasoning-centric assistant such as Gemini. |
| 59 | +""" |
| 60 | + |
| 61 | +from __future__ import annotations |
| 62 | + |
| 63 | +from dataclasses import dataclass |
| 64 | +from typing import Iterable, Sequence |
| 65 | + |
| 66 | +STOPWORDS = { |
| 67 | + "a", |
| 68 | + "an", |
| 69 | + "and", |
| 70 | + "are", |
| 71 | + "as", |
| 72 | + "at", |
| 73 | + "be", |
| 74 | + "by", |
| 75 | + "for", |
| 76 | + "from", |
| 77 | + "has", |
| 78 | + "in", |
| 79 | + "is", |
| 80 | + "it", |
| 81 | + "of", |
| 82 | + "on", |
| 83 | + "that", |
| 84 | + "the", |
| 85 | + "to", |
| 86 | + "we", |
| 87 | +} |
| 88 | + |
| 89 | + |
| 90 | +@dataclass(frozen=True) |
| 91 | +class MeetingDocument: |
| 92 | + """Represents a textual artefact discussed during a meeting.""" |
| 93 | + |
| 94 | + title: str |
| 95 | + content: str |
| 96 | + |
| 97 | + |
| 98 | +@dataclass(frozen=True) |
| 99 | +class ImageObservation: |
| 100 | + """Stores a short natural language description of an observed visual.""" |
| 101 | + |
| 102 | + description: str |
| 103 | + |
| 104 | + |
| 105 | +@dataclass(frozen=True) |
| 106 | +class ActionItem: |
| 107 | + """Represents a structured task decided in the meeting.""" |
| 108 | + |
| 109 | + owner: str |
| 110 | + task: str |
| 111 | + due_date: str |
| 112 | + |
| 113 | + |
| 114 | +class GeminiMeetingSummarizer: |
| 115 | + """Summarises meeting artefacts in the spirit of a Gemini-style workflow.""" |
| 116 | + |
| 117 | + def summarize( |
| 118 | + self, |
| 119 | + documents: Sequence[MeetingDocument], |
| 120 | + observations: Sequence[ImageObservation], |
| 121 | + action_items: Sequence[ActionItem], |
| 122 | + ) -> str: |
| 123 | + """Return a readable summary of the provided meeting artefacts.""" |
| 124 | + |
| 125 | + highlights = self._generate_highlights(documents, observations) |
| 126 | + actions = self._render_actions(action_items) |
| 127 | + |
| 128 | + if highlights: |
| 129 | + sentence = "Key updates: " + " ".join(highlights) |
| 130 | + else: |
| 131 | + sentence = "No textual highlights captured." |
| 132 | + |
| 133 | + if actions: |
| 134 | + sentence += f" Action items: {actions}." |
| 135 | + else: |
| 136 | + sentence += " No action items captured." |
| 137 | + |
| 138 | + return sentence.strip() |
| 139 | + |
| 140 | + def build_structured_report( |
| 141 | + self, |
| 142 | + documents: Sequence[MeetingDocument], |
| 143 | + observations: Sequence[ImageObservation], |
| 144 | + action_items: Sequence[ActionItem], |
| 145 | + ) -> dict[str, object]: |
| 146 | + """Return a JSON-serialisable representation of the meeting.""" |
| 147 | + |
| 148 | + highlights = self._generate_highlights(documents, observations) |
| 149 | + return { |
| 150 | + "highlights": highlights, |
| 151 | + "action_items": [ |
| 152 | + { |
| 153 | + "owner": item.owner, |
| 154 | + "task": item.task, |
| 155 | + "due_date": item.due_date, |
| 156 | + } |
| 157 | + for item in action_items |
| 158 | + ], |
| 159 | + "source_documents": [ |
| 160 | + {"title": doc.title, "content": doc.content} |
| 161 | + for doc in documents |
| 162 | + ], |
| 163 | + "visual_observations": [ |
| 164 | + observation.description for observation in observations |
| 165 | + ], |
| 166 | + } |
| 167 | + |
| 168 | + @staticmethod |
| 169 | + def _generate_highlights( |
| 170 | + documents: Sequence[MeetingDocument], |
| 171 | + observations: Sequence[ImageObservation], |
| 172 | + ) -> list[str]: |
| 173 | + highlights = [] |
| 174 | + |
| 175 | + for doc in documents: |
| 176 | + summary = _first_relevant_sentence(doc.content) |
| 177 | + if summary: |
| 178 | + highlights.append(summary) |
| 179 | + |
| 180 | + if observations: |
| 181 | + visual_summary = _summarise_visuals(observations) |
| 182 | + highlights.append(visual_summary) |
| 183 | + |
| 184 | + return highlights |
| 185 | + |
| 186 | + @staticmethod |
| 187 | + def _render_actions(action_items: Sequence[ActionItem]) -> str: |
| 188 | + if not action_items: |
| 189 | + return "" |
| 190 | + |
| 191 | + formatted = [ |
| 192 | + f"{item.owner} to {item.task} by {item.due_date}" |
| 193 | + for item in action_items |
| 194 | + ] |
| 195 | + return "; ".join(formatted) |
| 196 | + |
| 197 | + |
| 198 | +def _first_relevant_sentence(text: str) -> str: |
| 199 | + """Return the first sentence containing a meaningful keyword.""" |
| 200 | + |
| 201 | + sentences = _split_into_sentences(text) |
| 202 | + if not sentences: |
| 203 | + return "" |
| 204 | + |
| 205 | + best_sentence = "" |
| 206 | + best_score = 0 |
| 207 | + for sentence in sentences: |
| 208 | + score = _keyword_score(sentence) |
| 209 | + if score > best_score: |
| 210 | + best_sentence = sentence |
| 211 | + best_score = score |
| 212 | + |
| 213 | + return best_sentence if best_sentence else sentences[0] |
| 214 | + |
| 215 | + |
| 216 | +def _split_into_sentences(text: str) -> list[str]: |
| 217 | + candidate = [part.strip() for part in text.replace("\n", " ").split(".")] |
| 218 | + return [sentence for sentence in candidate if sentence] |
| 219 | + |
| 220 | + |
| 221 | +def _keyword_score(sentence: str) -> int: |
| 222 | + score = 0 |
| 223 | + for word in sentence.split(): |
| 224 | + word = _normalise_word(word) |
| 225 | + if len(word) > 3 and word not in STOPWORDS: |
| 226 | + score += 1 |
| 227 | + return score |
| 228 | + |
| 229 | + |
| 230 | +def _normalise_word(word: str) -> str: |
| 231 | + return "".join(char for char in word.lower() if char.isalpha()) |
| 232 | + |
| 233 | + |
| 234 | +def _summarise_visuals(observations: Iterable[ImageObservation]) -> str: |
| 235 | + keywords = [] |
| 236 | + for observation in observations: |
| 237 | + keywords.extend( |
| 238 | + word |
| 239 | + for word in map(_normalise_word, observation.description.split()) |
| 240 | + if word and word not in STOPWORDS |
| 241 | + ) |
| 242 | + |
| 243 | + if not keywords: |
| 244 | + return "Visuals reviewed with no prominent details." |
| 245 | + |
| 246 | + primary = _select_top_keywords(keywords) |
| 247 | + return f"Visual assets reviewed from {' '.join(primary)}." if primary else ( |
| 248 | + "Visuals reviewed with no prominent details." |
| 249 | + ) |
| 250 | + |
| 251 | + |
| 252 | +def _select_top_keywords(words: Iterable[str], limit: int = 3) -> list[str]: |
| 253 | + frequency: dict[str, int] = {} |
| 254 | + for word in words: |
| 255 | + frequency[word] = frequency.get(word, 0) + 1 |
| 256 | + |
| 257 | + ranked = sorted( |
| 258 | + frequency.items(), key=lambda item: (-item[1], item[0]) |
| 259 | + ) |
| 260 | + return [word for word, _ in ranked[:limit]] |
| 261 | + |
| 262 | + |
| 263 | +__all__ = [ |
| 264 | + "ActionItem", |
| 265 | + "GeminiMeetingSummarizer", |
| 266 | + "ImageObservation", |
| 267 | + "MeetingDocument", |
| 268 | +] |
| 269 | + |
0 commit comments