Merging Dev branch into Main (#22)

Mdperez19 · web-flow · commit 7cca46c0da5f · 2024-03-14T00:49:25.000-06:00
# PR: Merging Dev branch into Main ## What has been done? - #18 - #19 - #20 - #21
diff --git a/.github/workflows/dev_rose-tt-api.yml b/.github/workflows/dev_rose-tt-api.yml
@@ -0,0 +1,78 @@
+# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
+# More GitHub Actions for Azure: https://github.com/Azure/actions
+# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions
+
+name: Build and deploy Python app to Azure Web App - Rose-TT-API
+
+on:
+  push:
+    branches:
+      - dev
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python version
+        uses: actions/setup-python@v1
+        with:
+          python-version: '3.12'
+
+      - name: Create and start virtual environment
+        run: |
+          python -m venv venv
+          source venv/bin/activate
+      
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+        
+      # Optional: Add step to run tests here (PyTest, Django test suites, etc.)
+
+      - name: Zip artifact for deployment
+        run: zip release.zip ./* -r
+
+      - name: Upload artifact for deployment jobs
+        uses: actions/upload-artifact@v3
+        with:
+          name: python-app
+          path: |
+            release.zip
+            !venv/
+
+  deploy:
+    runs-on: ubuntu-latest
+    needs: build
+    environment:
+      name: 'Production'
+      url: ${{ steps.deploy-to-webapp.outputs.webapp-url }}
+    permissions:
+      id-token: write #This is required for requesting the JWT
+
+    steps:
+      - name: Download artifact from build job
+        uses: actions/download-artifact@v3
+        with:
+          name: python-app
+
+      - name: Unzip artifact for deployment
+        run: unzip release.zip
+
+      
+      - name: Login to Azure
+        uses: azure/login@v1
+        with:
+          client-id: ${{ secrets.AZUREAPPSERVICE_CLIENTID_036307703B564B82AF74B71826757E2C }}
+          tenant-id: ${{ secrets.AZUREAPPSERVICE_TENANTID_22210C8B765F49FD80C76EBC938F73DD }}
+          subscription-id: ${{ secrets.AZUREAPPSERVICE_SUBSCRIPTIONID_B22146082A7841569867B644CE6A4B1E }}
+
+      - name: 'Deploy to Azure Web App'
+        uses: azure/webapps-deploy@v2
+        id: deploy-to-webapp
+        with:
+          app-name: 'Rose-TT-API'
+          slot-name: 'Production'
+          
diff --git a/app.py b/app.py
@@ -1,9 +1,14 @@
 from flask import Flask
 from grammatical_analysis.grammatical_analysis_controller import grammatical_analysis_api
+from dotenv import load_dotenv
+import os
 
 app = Flask(__name__)
 
+if os.path.exists('.env'):
+    load_dotenv()
+
 app.register_blueprint(grammatical_analysis_api, url_prefix='/grammatical_analysis')
 
 if __name__ == '__main__':
-    app.run(debug=True)
+    app.run(debug=True)
diff --git a/grammatical_analysis/grammatical_analysis_controller.py b/grammatical_analysis/grammatical_analysis_controller.py
@@ -1,13 +1,19 @@
-from flask import Blueprint, jsonify
+from flask import Blueprint, jsonify, Response
 from .grammatical_analysis_service import GrammaticalAnalysisService
+from .natural_language_processing_tools.text_preprocessing.tokenizer.Tokenizer_nltk import TokenizerNltk
+from .natural_language_processing_tools.text_preprocessing.pos_tagger.POS_tagger_nltk import POSTaggerNltk
+from .natural_language_processing_tools.token_processor.GrammarAnalyzerOpenai import GrammarAnalyzerOpenai
+from http import HTTPStatus
+
 grammatical_analysis_api = Blueprint('grammatical_analysis_api', __name__)
 
-@grammatical_analysis_api.route('/')
-def index():
-   return jsonify("Hello World"), 200
+tokenizer = TokenizerNltk()
+pos_tagger = POSTaggerNltk()
+grammatical_analyzer = GrammarAnalyzerOpenai()
+
 
 @grammatical_analysis_api.route('/<text>', methods=['GET'])
-def analyze_grammar(text):
-    service = GrammaticalAnalysisService()
+def analyze_grammar(text: str) -> tuple[Response, int]:
+    service = GrammaticalAnalysisService(tokenizer, pos_tagger, grammatical_analyzer)
     result = service.analyze_text_grammatically(text)
-    return jsonify(result), 200
+    return jsonify(result), HTTPStatus.OK
diff --git a/grammatical_analysis/grammatical_analysis_service.py b/grammatical_analysis/grammatical_analysis_service.py
@@ -1,12 +1,16 @@
-from .natural_language_processing_tools.NLPNltkOpenai import NLPNltkOpenai
+from grammatical_analysis.natural_language_processing_tools.text_preprocessing.tokenizer.Tokenizer import Tokenizer
+from grammatical_analysis.natural_language_processing_tools.text_preprocessing.pos_tagger.POSTagger import POSTagger
+from grammatical_analysis.natural_language_processing_tools.token_processor.GrammarAnalyzer import GrammarAnalyzer
 
 
 class GrammaticalAnalysisService:
-    def __init__(self):
-        self.nlp = NLPNltkOpenai()
+    def __init__(self, tokenizer: Tokenizer, pos_tagger: POSTagger, grammar_analyzer: GrammarAnalyzer):
+        self.tokenizer = tokenizer
+        self.pos_tagger = pos_tagger
+        self.grammar_analyzer = grammar_analyzer
 
-    def analyze_text_grammatically(self, text_to_analyze):
-        tokenized_sentences = self.nlp.tokenize_sentences(text_to_analyze)
-        pos_tagged_sentences = self.nlp.tag_sentences_with_pos(tokenized_sentences)
-        analyzed_text = self.nlp.analyze_grammar(pos_tagged_sentences)
+    def analyze_text_grammatically(self, text_to_analyze: str) -> str:
+        tokenized_sentences = self.tokenizer.tokenize_text_by_sentences(text_to_analyze)
+        pos_tagged_sentences = self.pos_tagger.tag_sentences_with_pos(tokenized_sentences)
+        analyzed_text = self.grammar_analyzer.analyze_grammar(pos_tagged_sentences)
         return analyzed_text
diff --git a/grammatical_analysis/natural_language_processing_tools/NLP.py b/grammatical_analysis/natural_language_processing_tools/NLP.py
diff --git a/grammatical_analysis/natural_language_processing_tools/NLPNltkOpenai.py b/grammatical_analysis/natural_language_processing_tools/NLPNltkOpenai.py
diff --git a/grammatical_analysis/natural_language_processing_tools/text_preprocessing/pos_tagger/POS_tagger_nltk.py b/grammatical_analysis/natural_language_processing_tools/text_preprocessing/pos_tagger/POS_tagger_nltk.py
@@ -1,9 +1,10 @@
 from .POSTagger import POSTagger
 import nltk
+from nltk import UnigramTagger
 from nltk.corpus import cess_esp
 
 
-def get_tagger():
+def get_tagger() -> UnigramTagger:
     patterns = [
         (r".*é$", "VBD"),  # past verb
         (r".*ó$", "VBD"),  # past verb
@@ -36,11 +37,40 @@ class POSTaggerNltk(POSTagger):
     def __init__(self):
         nltk.download('cess_esp')
         nltk.download('universal_tagset')
-    
+
     def tag_sentences_with_pos(self, tokenized_sentences: list) -> list:
-        unigram_tagger = get_tagger()
+        unigram_tagger = self.get_tagger()
         tagged_sentences = [
             unigram_tagger.tag(nltk.word_tokenize(sentence))
             for sentence in tokenized_sentences
         ]
         return tagged_sentences
+
+    @staticmethod
+    def get_tagger() -> nltk.UnigramTagger:
+        patterns = [
+            (r".*é$", "VBD"),  # past verb
+            (r".*ó$", "VBD"),  # past verb
+            (r".*rán$", "VBD"),  # past verb
+            (r".*ando$", "VBG"),  # gerund
+            (r".*iendo$", "VBG"),  # gerund
+            (r".*endo$", "VBG"),  # gerund
+            (r".*osa$", "ADJ"),  # adjective
+            (r".*oso$", "ADJ"),  # adjective
+            (r".*o$", "NOUN"),  # noun masculine singular
+            (r".*os$", "NOUN"),  # noun masculine plural
+            (r".*a$", "NOUN"),  # noun feminine singular
+            (r".*as$", "NOUN"),  # noun feminine plural
+        ]
+
+        default_tagger = "NOUN"
+        default = nltk.DefaultTagger(default_tagger)
+
+        sentences_tagged = []
+        for sentence in cess_esp.tagged_sents(tagset='universal_tagset'):
+            sentences_tagged.append([(word, tag) for (word, tag) in sentence])
+
+        regex_tagger = nltk.RegexpTagger(patterns, backoff=default)
+        unigram_tagger = nltk.UnigramTagger(sentences_tagged, backoff=regex_tagger)
+
+        return unigram_tagger
diff --git a/grammatical_analysis/natural_language_processing_tools/token_processor/GrammarAnalyzerOpenai.py b/grammatical_analysis/natural_language_processing_tools/token_processor/GrammarAnalyzerOpenai.py
@@ -3,23 +3,14 @@
 import os
 
 
-def tagged_sentences_to_string(tagged_sentences):
-    tagged_sentences_strings = []
-    for tagged_sentence in tagged_sentences:
-        sentence_string = " ".join([f"{word}/{tag}" for word, tag in tagged_sentence])
-        tagged_sentences_strings.append(sentence_string)
-    return "\n".join(tagged_sentences_strings)
-
-
 class GrammarAnalyzerOpenai(GrammarAnalyzer):
     def __init__(self):
-        pass
+        openai_key = os.environ.get('OPENAI_API_KEY')
+        self.client = OpenAI(api_key=openai_key)
 
     def analyze_grammar(self, pos_tagged_sentences: list) -> str:
-        prompt = tagged_sentences_to_string(pos_tagged_sentences)
-        openai_key = os.environ.get('OPENAI_API_KEY')
-        client = OpenAI(api_key=openai_key)
-        completion = client.chat.completions.create(
+        text_to_analyze = tagged_sentences_to_string(pos_tagged_sentences)
+        completion = self.client.chat.completions.create(
             model="gpt-4",
             messages=[
                 {
@@ -28,11 +19,19 @@ def analyze_grammar(self, pos_tagged_sentences: list) -> str:
                 },
                 {
                     "role": "user",
-                    "content": prompt,
+                    "content": text_to_analyze,
                 },
             ],
         )
 
         response = completion.choices[0].message.content
 
         return response
+
+    @staticmethod
+    def tagged_sentences_to_string(tagged_sentences):
+        tagged_sentences_strings = []
+        for tagged_sentence in tagged_sentences:
+            sentence_string = " ".join([f"{word}/{tag}" for word, tag in tagged_sentence])
+            tagged_sentences_strings.append(sentence_string)
+        return "\n".join(tagged_sentences_strings)
diff --git a/requirements.txt b/requirements.txt
@@ -23,3 +23,4 @@ sniffio==1.3.0
 tqdm==4.66.2
 typing_extensions==4.9.0
 Werkzeug==3.0.1
+python-dotenv~=1.0.1