Provide controls in UI and API layer to enable/disable image processing options.

raaj-akshar · raaj-akshar · commit 4d5f1f55a63a · 2025-06-02T23:33:37.000+05:30
diff --git a/html/index.html b/html/index.html
@@ -18,31 +18,35 @@ <h1 class="text-center mb-4">Extract text from documents</h1>
             <label for="fileInput" class="form-label">Upload an image, a scanned document or a PDF.</label>
             <input class="form-control" type="file" id="fileInput" accept="image/*,.pdf" required>
           </div>
-<div class="accordion mb-3" id="processingOptionsAccordion">
-  <div class="accordion-item border-0">
-    <h2 class="accordion-header" id="headingOptions">
-      <button class="accordion-button collapsed py-2 small" type="button" data-bs-toggle="collapse" data-bs-target="#collapseOptions" aria-expanded="false" aria-controls="collapseOptions">
-        Advanced Options <span class="text-muted ms-2 small">(optional)</span>
-      </button>
-    </h2>
-    <div id="collapseOptions" class="accordion-collapse collapse" aria-labelledby="headingOptions" data-bs-parent="#processingOptionsAccordion">
-      <div class="accordion-body py-2">
-        <div class="form-check small mb-1">
-          <input class="form-check-input" type="checkbox" value="true" id="removeNoise" name="remove_noise">
-          <label class="form-check-label" for="removeNoise">Remove noise and specks (Uses Bilateral Filter)</label>
-        </div>
-        <div class="form-check small mb-1">
-          <input class="form-check-input" type="checkbox" value="true" id="binarization" name="binarize">
-          <label class="form-check-label" for="binarization">Perform binarization (Uses Adaptive Thresholding)</label>
-        </div>
-        <div class="form-check small">
-          <input class="form-check-input" type="checkbox" value="true" id="autoRotate" name="auto_rotate">
-          <label class="form-check-label" for="autoRotate">Auto rotate if needed (Contour Detection and Alignment)</label>
-        </div>
-      </div>
-    </div>
-  </div>
-</div>
+          <div class="accordion mb-3" id="processingOptionsAccordion">
+            <div class="accordion-item border-0">
+              <h2 class="accordion-header" id="headingOptions">
+                <button class="accordion-button collapsed py-2 small" type="button" data-bs-toggle="collapse" data-bs-target="#collapseOptions" aria-expanded="false" aria-controls="collapseOptions">
+                  Advanced Options <span class="text-muted ms-2 small">(optional)</span>
+                </button>
+              </h2>
+              <div id="collapseOptions" class="accordion-collapse collapse" aria-labelledby="headingOptions" data-bs-parent="#processingOptionsAccordion">
+                <div class="accordion-body py-2">
+                  <div class="form-check small mb-1">
+                    <input class="form-check-input" type="checkbox" value="true" id="gray" name="gray" checked>
+                    <label class="form-check-label" for="gray">Perform Grayscale conversion</label>
+                  </div>
+                  <div class="form-check small mb-1">
+                    <input class="form-check-input" type="checkbox" value="true" id="denoise" name="denoise" checked>
+                    <label class="form-check-label" for="denoise">Remove noise and specks (Uses Bilateral Filter)</label>
+                  </div>
+                  <div class="form-check small mb-1">
+                    <input class="form-check-input" type="checkbox" value="true" id="binarize" name="binarize" checked>
+                    <label class="form-check-label" for="binarize">Perform binarization (Uses Adaptive Thresholding)</label>
+                  </div>
+                  <div class="form-check small">
+                    <input class="form-check-input" type="checkbox" value="true" id="autoRotate" name="auto_rotate" checked>
+                    <label class="form-check-label" for="autoRotate">Auto rotate if needed (Contour Detection and Alignment)</label>
+                  </div>
+                </div>
+              </div>
+            </div>
+          </div>
           <div class="d-flex justify-content-between">
             <button type="submit" class="btn btn-outline-success w-50 me-2 d-flex align-items-center justify-content-center gap-2" data-mode="basic">
               Basic OCR (Faster and Recommended)
@@ -87,20 +91,28 @@ <h4>Detected Text:</h4>
 
     form.addEventListener('submit', async (event) => {
       event.preventDefault();
-      // Reset, hide previous alert
-      errorAlert.classList.add('d-none');
-      errorAlert.textContent = '';
-      resultText.textContent = 'OCR and Recognition is computationally expensive, and takes time. Please wait...';
 
       const file = fileInput.files[0];
       if (!file) {
         alert('Please select a file first.');
         return;
       }
 
+      // Get checkbox states
+      const grayChecked = document.getElementById('gray').checked;
+      const denoiseChecked = document.getElementById('denoise').checked;
+      const binarizeChecked = document.getElementById('binarize').checked;
+      // Reset, hide previous alert
+      errorAlert.classList.add('d-none');
+      errorAlert.textContent = '';
+      resultText.textContent = 'OCR and Recognition is computationally expensive, and takes time. Please wait...';
+
       const formData = new FormData();
       formData.append('attachment', file);
       formData.append('synchronous', false);
+      formData.append('gray', grayChecked);
+      formData.append('denoise', denoiseChecked);
+      formData.append('binarize', binarizeChecked);
 
       resultText.textContent = 'OCR and Recognition is computationally expensive, and takes time. Please wait...';
       const BASE_URL = 'http://localhost:8000';
diff --git a/main.py b/main.py
@@ -4,7 +4,7 @@
 from typing import List
 
 from fastapi import FastAPI
-from fastapi import UploadFile
+from fastapi import UploadFile, Form
 from fastapi.exceptions import HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 
@@ -117,7 +117,7 @@ def extract_img_text(attachment: UploadFile):
 
 
 @app.post("/ocr")
-def ocr(attachment: UploadFile):
+def ocr(attachment: UploadFile, gray: bool = Form(True), denoise: bool = Form(True), binarize: bool = Form(True)):
     """
     TODO: Support multiple attachments
     It could pass a PDF or an image.
@@ -133,6 +133,11 @@ def ocr(attachment: UploadFile):
     In all of the above cases, the processing would happen asynchronously.
     The task would be queued and a link would be returned to the user.
     """
+    options = {
+        "gray": gray,
+        "denoise": denoise,
+        "binarize": binarize
+    }
     type_details = identify_file_type(attachment.file)
     if not type_details.mime_type.startswith('image') and not type_details.mime_type.startswith('application/pdf'):
         raise HTTPException(status_code=400, detail="Provide either an image or a PDF")
@@ -145,7 +150,7 @@ def ocr(attachment: UploadFile):
     if type_details.mime_type.startswith('image'):
         # Attempt extraction through Tesseract
         set_object(key=path_hash, field="type", value="image")
-        enqueue_extraction(extraction_function=extract_image_text_and_set_db, file_path=output_filename, key=path_hash)
+        enqueue_extraction(extraction_function=extract_image_text_and_set_db, file_path=output_filename, key=path_hash, options=options)
     elif type_details.mime_type.startswith('application/pdf'):
         # Attempt extracting text using pdfminer.six or else through the image conversion -> OCR pipeline.
         set_object(key=path_hash, field="type", value="pdf")