|
1 | | -# === Imports === |
| 1 | +""" |
| 2 | +Main entry point for the Enhanced AGI Pipeline API. |
| 3 | +""" |
| 4 | + |
2 | 5 | import os |
3 | | -import asyncio |
4 | | -import time |
5 | | -from typing import List |
6 | | -import torch |
7 | | -from transformers import T5Tokenizer, T5ForConditionalGeneration |
| 6 | +from io import BytesIO |
| 7 | +from fastapi import FastAPI, UploadFile, File, HTTPException, Depends |
| 8 | +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials |
8 | 9 | from PIL import Image |
9 | | -from fastapi import FastAPI, UploadFile, Depends, HTTPException, Request |
10 | | -from fastapi.security import OAuth2PasswordBearer |
11 | | -from pydantic import BaseModel, SecretStr |
12 | | -import whisper |
13 | | -from ultralytics import YOLO |
14 | | -import pyttsx3 |
15 | 10 | from loguru import logger |
16 | | -import io |
17 | | -import nest_asyncio |
18 | | -import uvicorn |
19 | | - |
20 | | -# === Logging Setup === |
21 | | -logger.add("pipeline_{time}.log", rotation="1 MB", level="DEBUG", enqueue=True, backtrace=True, diagnose=True) |
22 | | -logger.info("Application startup") |
23 | | - |
24 | | -# === Security Enhancement: Environment Variable for Secure Token === |
25 | | -SECURE_TOKEN = SecretStr(os.getenv("SECURE_TOKEN", "YvZz9Hni0hWJPh_UWW4dQYf9rhIe9nNYcC5ZQTTZz0Q")) |
26 | | - |
27 | | -# === OAuth2PasswordBearer for Authentication === |
28 | | -oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") |
29 | | - |
30 | | -# === Authentication Function === |
31 | | -def authenticate_user(token: str = Depends(oauth2_scheme)): |
32 | | - if token != SECURE_TOKEN.get_secret_value(): |
33 | | - logger.warning("Authentication failed.") |
34 | | - raise HTTPException(status_code=401, detail="Invalid token") |
35 | | - |
36 | | -# === Request and Response Models (Pydantic) === |
37 | | -class TextRequest(BaseModel): |
38 | | - text: str |
39 | | - |
40 | | -class TextResponse(BaseModel): |
41 | | - response: str |
42 | | - |
43 | | -# === NLP Module (T5 Transformer) === |
44 | | -class NLPModule: |
45 | | - def __init__(self): |
46 | | - model_name = "google/flan-t5-small" |
47 | | - self.tokenizer = T5Tokenizer.from_pretrained(model_name) |
48 | | - self.model = T5ForConditionalGeneration.from_pretrained(model_name) |
49 | | - logger.info("NLP model loaded successfully.") |
50 | | - |
51 | | - def generate_text(self, prompt: str) -> str: |
52 | | - if not prompt.strip(): |
53 | | - raise ValueError("Prompt cannot be empty.") |
54 | | - logger.debug(f"Generating text for prompt: {prompt}") |
55 | | - inputs = self.tokenizer(prompt, return_tensors="pt") |
56 | | - outputs = self.model.generate(inputs["input_ids"], max_length=100) |
57 | | - response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
58 | | - logger.info(f"Generated response: {response}") |
59 | | - return response |
60 | | - |
61 | | -# === CV Module (YOLOv8 for Object Detection) === |
62 | | -class CVModule: |
63 | | - def __init__(self): |
64 | | - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
65 | | - self.model = YOLO('yolov8n.pt').to(self.device) |
66 | | - logger.info("CV model loaded successfully.") |
67 | 11 |
|
68 | | - def detect_objects(self, image: Image.Image) -> str: |
69 | | - logger.debug("Detecting objects in the image.") |
70 | | - results = self.model(image) |
71 | | - return results.pandas().xyxy[0].to_json() |
72 | | - |
73 | | -# === Speech Processor (Whisper for Speech-to-Text, PyTTSX3 for Text-to-Speech) === |
74 | | -class SpeechProcessor: |
75 | | - def __init__(self): |
76 | | - self.whisper_model = whisper.load_model("base") |
77 | | - self.tts = pyttsx3.init() |
78 | | - logger.info("Speech processor initialized successfully.") |
| 12 | +from nlp_module import NLPModule |
| 13 | +from cv_module import CVModule |
| 14 | +from speech_processor import SpeechProcessor |
79 | 15 |
|
80 | | - def speech_to_text(self, audio_file: UploadFile) -> str: |
81 | | - with audio_file.file as audio_data: |
82 | | - result = self.whisper_model.transcribe(audio_data) |
83 | | - return result['text'] |
| 16 | +# API Key from environment or default |
| 17 | +VALID_API_KEY = os.getenv("AGI_API_KEY", "YvZz9Hni0hWJPh_UWW4dQYf9rhIe9nNYcC5ZQTTZz0Q") |
84 | 18 |
|
85 | | - def text_to_speech(self, text: str) -> None: |
86 | | - if not text.strip(): |
87 | | - raise ValueError("Text cannot be empty.") |
88 | | - self.tts.say(text) |
89 | | - self.tts.runAndWait() |
| 19 | +security = HTTPBearer() |
90 | 20 |
|
91 | | - def __del__(self): |
92 | | - self.tts.stop() |
93 | 21 |
|
94 | | -# === Enhanced AGI Pipeline === |
95 | 22 | class EnhancedAGIPipeline: |
| 23 | + """ |
| 24 | + A wrapper class that integrates NLP, CV, and Speech modules. |
| 25 | + """ |
| 26 | + |
96 | 27 | def __init__(self): |
| 28 | + """ |
| 29 | + Initializes all pipeline modules. |
| 30 | + """ |
97 | 31 | self.nlp = NLPModule() |
98 | 32 | self.cv = CVModule() |
99 | | - self.speech_processor = SpeechProcessor() |
| 33 | + self.speech = SpeechProcessor() |
| 34 | + |
| 35 | + def process_nlp(self, prompt: str) -> str: |
| 36 | + """ |
| 37 | + Processes text using the NLP module. |
| 38 | + """ |
| 39 | + return self.nlp.generate_text(prompt) |
100 | 40 |
|
101 | | - async def process_nlp(self, text: str) -> str: |
102 | | - return await asyncio.to_thread(self.nlp.generate_text, text) |
| 41 | + def process_cv(self, image: Image.Image) -> str: |
| 42 | + """ |
| 43 | + Processes an image using the CV module. |
| 44 | + """ |
| 45 | + return self.cv.detect_objects(image) |
103 | 46 |
|
104 | | - async def process_cv(self, image: Image.Image) -> str: |
105 | | - return await asyncio.to_thread(self.cv.detect_objects, image) |
| 47 | + def process_stt(self, file: UploadFile) -> str: |
| 48 | + """ |
| 49 | + Processes audio using the STT module. |
| 50 | + """ |
| 51 | + return self.speech.speech_to_text(file) |
106 | 52 |
|
107 | | - async def process_speech_to_text(self, audio_file: UploadFile) -> str: |
108 | | - return await asyncio.to_thread(self.speech_processor.speech_to_text, audio_file) |
| 53 | + def process_tts(self, text: str) -> None: |
| 54 | + """ |
| 55 | + Processes text using the TTS module. |
| 56 | + """ |
| 57 | + self.speech.text_to_speech(text) |
109 | 58 |
|
110 | | - async def process_text_to_speech(self, text: str) -> None: |
111 | | - await asyncio.to_thread(self.speech_processor.text_to_speech, text) |
112 | 59 |
|
113 | | -# === FastAPI Application === |
114 | 60 | app = FastAPI() |
| 61 | +agi = EnhancedAGIPipeline() |
| 62 | + |
| 63 | + |
| 64 | +def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): |
| 65 | + """ |
| 66 | + Verifies the Bearer token in the Authorization header. |
| 67 | + """ |
| 68 | + if credentials.credentials != VALID_API_KEY: |
| 69 | + raise HTTPException(status_code=403, detail="Forbidden") |
| 70 | + |
| 71 | + |
| 72 | +@app.post("/process-nlp/", dependencies=[Depends(verify_token)]) |
| 73 | +async def process_nlp(data: dict): |
| 74 | + """ |
| 75 | + Endpoint for text generation. |
| 76 | + """ |
| 77 | + try: |
| 78 | + prompt = data.get("text", "") |
| 79 | + return {"response": agi.process_nlp(prompt)} |
| 80 | + except Exception as e: |
| 81 | + logger.error(f"NLP Error: {e}") |
| 82 | + raise HTTPException(status_code=500, detail=str(e)) from e |
| 83 | + |
| 84 | + |
| 85 | +@app.post("/process-cv-detection/", dependencies=[Depends(verify_token)]) |
| 86 | +async def process_cv_detection(file: UploadFile = File(...)): |
| 87 | + """ |
| 88 | + Endpoint for object detection in images. |
| 89 | + """ |
| 90 | + try: |
| 91 | + image_data = await file.read() |
| 92 | + image = Image.open(BytesIO(image_data)) |
| 93 | + return {"detections": agi.process_cv(image)} |
| 94 | + except Exception as e: |
| 95 | + logger.error(f"CV Error: {e}") |
| 96 | + raise HTTPException(status_code=500, detail=str(e)) from e |
| 97 | + |
| 98 | + |
| 99 | +@app.post("/speech-to-text/", dependencies=[Depends(verify_token)]) |
| 100 | +async def speech_to_text(file: UploadFile = File(...)): |
| 101 | + """ |
| 102 | + Endpoint for Speech-to-Text conversion. |
| 103 | + """ |
| 104 | + try: |
| 105 | + return {"response": agi.process_stt(file)} |
| 106 | + except Exception as e: |
| 107 | + logger.error(f"STT Error: {e}") |
| 108 | + raise HTTPException(status_code=500, detail=str(e)) from e |
| 109 | + |
| 110 | + |
| 111 | +@app.post("/text-to-speech/", dependencies=[Depends(verify_token)]) |
| 112 | +async def text_to_speech(data: dict): |
| 113 | + """ |
| 114 | + Endpoint for Text-to-Speech conversion. |
| 115 | + """ |
| 116 | + try: |
| 117 | + text = data.get("text", "") |
| 118 | + agi.process_tts(text) |
| 119 | + return {"response": "Speech synthesis complete."} |
| 120 | + except Exception as e: |
| 121 | + logger.error(f"TTS Error: {e}") |
| 122 | + raise HTTPException(status_code=500, detail=str(e)) from e |
| 123 | + |
115 | 124 |
|
116 | | -pipeline = EnhancedAGIPipeline() |
117 | | - |
118 | | -# === Endpoints === |
119 | | -@app.post("/process-nlp/", response_model=TextResponse, dependencies=[Depends(authenticate_user)]) |
120 | | -async def process_nlp(request: TextRequest): |
121 | | - response = await pipeline.process_nlp(request.text) |
122 | | - return {"response": response} |
123 | | - |
124 | | -@app.post("/process-cv-detection/", dependencies=[Depends(authenticate_user)]) |
125 | | -async def process_cv_detection(file: UploadFile): |
126 | | - image = Image.open(io.BytesIO(await file.read())) |
127 | | - response = await pipeline.process_cv(image) |
128 | | - return {"detections": response} |
129 | | - |
130 | | -@app.post("/batch-cv-detection/", dependencies=[Depends(authenticate_user)]) |
131 | | -async def batch_cv_detection(files: List[UploadFile]): |
132 | | - responses = [] |
133 | | - for file in files: |
134 | | - image = Image.open(io.BytesIO(await file.read())) |
135 | | - response = await pipeline.process_cv(image) |
136 | | - responses.append(response) |
137 | | - return {"batch_detections": responses} |
138 | | - |
139 | | -@app.post("/speech-to-text/", response_model=TextResponse, dependencies=[Depends(authenticate_user)]) |
140 | | -async def speech_to_text(file: UploadFile): |
141 | | - response = await pipeline.process_speech_to_text(file) |
142 | | - return {"response": response} |
143 | | - |
144 | | -@app.post("/text-to-speech/", dependencies=[Depends(authenticate_user)]) |
145 | | -async def text_to_speech(request: TextRequest): |
146 | | - await pipeline.process_text_to_speech(request.text) |
147 | | - return {"response": "Speech synthesis complete."} |
148 | | - |
149 | | -# === Run the Application with HTTPS (uvicorn) === |
150 | 125 | if __name__ == "__main__": |
151 | | - nest_asyncio.apply() |
152 | | - config = uvicorn.Config(app, host="0.0.0.0", port=8000) |
153 | | - server = uvicorn.Server(config) |
154 | | - asyncio.run(server.serve()) |
| 126 | + import uvicorn |
| 127 | + |
| 128 | + uvicorn.run(app, host="127.0.0.1", port=8000) |
0 commit comments