|
| 1 | +package com.livedetectedges |
| 2 | + |
| 3 | +import android.content.Context |
| 4 | +import android.graphics.PointF |
| 5 | +import androidx.camera.core.ImageProxy |
| 6 | +import org.opencv.core.Core |
| 7 | +import org.opencv.core.CvType |
| 8 | +import org.opencv.core.Mat |
| 9 | +import org.opencv.core.MatOfPoint |
| 10 | +import org.opencv.core.MatOfPoint2f |
| 11 | +import org.opencv.core.Point |
| 12 | +import org.opencv.core.Size |
| 13 | +import org.opencv.imgproc.Imgproc |
| 14 | +import java.util.ArrayList |
| 15 | + |
| 16 | +class DocumentDetector(private val context: Context? = null) { |
| 17 | + |
| 18 | + private val activeContours = ArrayList<MatOfPoint>() |
| 19 | + private val srcGray = Mat() |
| 20 | + private val srcBlur = Mat() |
| 21 | + private val srcBinary = Mat() |
| 22 | + private val srcCanny = Mat() |
| 23 | + private val rotatedMat = Mat() |
| 24 | + private val rgbMat = Mat() // RGB frame for segmentation input |
| 25 | + private val enhancedMat = Mat() // RGB frame after contrast enhancement |
| 26 | + |
| 27 | + // Segmentation detector (optional, falls back to Canny if segmentation is not available) |
| 28 | + private var segmentationDetector: SegmentationDetector? = null |
| 29 | + |
| 30 | + // Minimal area ratio of a candidate contour relative to the full frame. |
| 31 | + // Lower value makes it easier to detect smaller/partial documents while still filtering noise. |
| 32 | + private val AREA_THRESHOLD_RATIO = 0.02 |
| 33 | + |
| 34 | + // Thresholds (probability levels) to try, similar to FairScan, to better handle perspective / tilted documents. |
| 35 | + private val SEGMENTATION_THRESHOLDS = listOf(0.3f, 0.4f, 0.5f, 0.6f, 0.7f) |
| 36 | + |
| 37 | + init { |
| 38 | + // Initialize segmentation detector if context is available |
| 39 | + if (context != null) { |
| 40 | + try { |
| 41 | + segmentationDetector = SegmentationDetector(context) |
| 42 | + } catch (e: Exception) { |
| 43 | + // Segmentation is not available, we will use the Canny fallback |
| 44 | + } |
| 45 | + } |
| 46 | + } |
| 47 | + |
| 48 | + fun detect(image: ImageProxy): List<PointF>? { |
| 49 | + // 1. Convert ImageProxy (Y plane) to grayscale Mat |
| 50 | + val yBuffer = image.planes[0].buffer |
| 51 | + val ySize = yBuffer.remaining() |
| 52 | + val data = ByteArray(ySize) |
| 53 | + yBuffer.get(data) |
| 54 | + |
| 55 | + // Re-allocate Mats if dimensions changed (or first run) |
| 56 | + if (srcGray.width() != image.width || srcGray.height() != image.height) { |
| 57 | + srcGray.create(image.height, image.width, CvType.CV_8UC1) |
| 58 | + rgbMat.create(image.height, image.width, CvType.CV_8UC3) |
| 59 | + enhancedMat.create(image.height, image.width, CvType.CV_8UC3) |
| 60 | + } |
| 61 | + srcGray.put(0, 0, data) |
| 62 | + |
| 63 | + // 2. Handle rotation from the camera sensor |
| 64 | + val rotation = image.imageInfo.rotationDegrees |
| 65 | + val processingMat = if (rotation != 0) { |
| 66 | + rotateMat(srcGray, rotation) |
| 67 | + } else { |
| 68 | + srcGray |
| 69 | + } |
| 70 | + |
| 71 | + // 3. Try segmentation first if available, otherwise fall back to Canny |
| 72 | + val probMask = if (segmentationDetector?.isModelLoaded() == true) { |
| 73 | + // Pre-processing: improve contrast so the model is more robust to perspective and low-contrast documents. |
| 74 | + // Convert grayscale to RGB for segmentation (the model expects RGB input). |
| 75 | + Imgproc.cvtColor(processingMat, rgbMat, Imgproc.COLOR_GRAY2RGB) |
| 76 | + |
| 77 | + // Increase local contrast using CLAHE (Contrast Limited Adaptive Histogram Equalization). |
| 78 | + val labMat = Mat() |
| 79 | + Imgproc.cvtColor(rgbMat, labMat, Imgproc.COLOR_RGB2Lab) |
| 80 | + val labChannels = ArrayList<Mat>() |
| 81 | + Core.split(labMat, labChannels) |
| 82 | + val clahe = Imgproc.createCLAHE(2.0, Size(8.0, 8.0)) |
| 83 | + clahe.apply(labChannels[0], labChannels[0]) |
| 84 | + Core.merge(labChannels, enhancedMat) |
| 85 | + Imgproc.cvtColor(enhancedMat, enhancedMat, Imgproc.COLOR_Lab2RGB) |
| 86 | + |
| 87 | + // Cleanup temporary Mats |
| 88 | + labMat.release() |
| 89 | + labChannels.forEach { it.release() } |
| 90 | + |
| 91 | + val mask = segmentationDetector?.segment(enhancedMat) |
| 92 | + mask |
| 93 | + } else { |
| 94 | + null |
| 95 | + } |
| 96 | + |
| 97 | + // 4. If a probability mask is available, try multiple thresholds; otherwise, use Canny. |
| 98 | + if (probMask != null) { |
| 99 | + // Try multiple thresholds like FairScan to handle perspective cases more robustly. |
| 100 | + for (threshold in SEGMENTATION_THRESHOLDS) { |
| 101 | + val binaryMask = Mat() |
| 102 | + Imgproc.threshold(probMask, binaryMask, threshold.toDouble(), 255.0, Imgproc.THRESH_BINARY) |
| 103 | + |
| 104 | + // Convert to uint8 |
| 105 | + val uint8Mask = Mat() |
| 106 | + binaryMask.convertTo(uint8Mask, CvType.CV_8UC1) |
| 107 | + |
| 108 | + // Clean up the mask so that contours are more stable: |
| 109 | + // - Close (MORPH_CLOSE) to fill small holes – larger kernel to better handle tilted shapes. |
| 110 | + // - Open (MORPH_OPEN) to remove small noise blobs. |
| 111 | + val cleaned = Mat() |
| 112 | + val kernel = Imgproc.getStructuringElement( |
| 113 | + Imgproc.MORPH_ELLIPSE, |
| 114 | + Size(7.0, 7.0) // Larger kernel to better handle tilted angles |
| 115 | + ) |
| 116 | + Imgproc.morphologyEx(uint8Mask, cleaned, Imgproc.MORPH_CLOSE, kernel) |
| 117 | + Imgproc.morphologyEx(cleaned, cleaned, Imgproc.MORPH_OPEN, kernel) |
| 118 | + |
| 119 | + val result = findDocumentContour(cleaned, processingMat) |
| 120 | + |
| 121 | + // Cleanup |
| 122 | + binaryMask.release() |
| 123 | + uint8Mask.release() |
| 124 | + cleaned.release() |
| 125 | + |
| 126 | + if (result != null) { |
| 127 | + probMask.release() |
| 128 | + return result |
| 129 | + } |
| 130 | + } |
| 131 | + probMask.release() |
| 132 | + } |
| 133 | + |
| 134 | + // 5. Fallback: classic OpenCV pipeline – GaussianBlur -> Otsu threshold -> Canny. |
| 135 | + Imgproc.GaussianBlur(processingMat, srcBlur, Size(5.0, 5.0), 0.0) |
| 136 | + Imgproc.threshold( |
| 137 | + srcBlur, |
| 138 | + srcBinary, |
| 139 | + 0.0, |
| 140 | + 255.0, |
| 141 | + Imgproc.THRESH_BINARY or Imgproc.THRESH_OTSU |
| 142 | + ) |
| 143 | + Imgproc.Canny(srcBinary, srcCanny, 30.0, 100.0) // Lowered thresholds for better detection |
| 144 | + |
| 145 | + return findDocumentContour(srcCanny, processingMat) |
| 146 | + } |
| 147 | + |
| 148 | + private fun findDocumentContour(contourSource: Mat, processingMat: Mat): List<PointF>? { |
| 149 | + // 6. Find contours (external only, similar to typical document-scanning pipelines). |
| 150 | + activeContours.clear() |
| 151 | + Imgproc.findContours( |
| 152 | + contourSource, |
| 153 | + activeContours, |
| 154 | + Mat(), |
| 155 | + Imgproc.RETR_EXTERNAL, |
| 156 | + Imgproc.CHAIN_APPROX_SIMPLE |
| 157 | + ) |
| 158 | + |
| 159 | + // Sort contours by area (descending) and iterate. |
| 160 | + val imageArea = (processingMat.width() * processingMat.height()).toDouble() |
| 161 | + activeContours.sortByDescending { Imgproc.contourArea(it) } |
| 162 | + |
| 163 | + for (contour in activeContours) { |
| 164 | + val area = Imgproc.contourArea(contour) |
| 165 | + if (area < imageArea * AREA_THRESHOLD_RATIO) continue |
| 166 | + |
| 167 | + // Filter out very irregular contours using bounding-box heuristics. |
| 168 | + val rect = Imgproc.boundingRect(contour) |
| 169 | + val aspectRatio = rect.width.toDouble() / rect.height.toDouble() |
| 170 | + val rectArea = rect.width.toDouble() * rect.height.toDouble() |
| 171 | + val fillRatio = area / rectArea |
| 172 | + |
| 173 | + // Relax filters to accept more tilted rectangles: |
| 174 | + // - Aspect ratio: 0.25–4.0 (instead of 0.3–3.5) to allow more extreme tilt. |
| 175 | + // - Fill ratio: 0.35 (instead of 0.4) to allow partially visible documents. |
| 176 | + if (aspectRatio < 0.25 || aspectRatio > 4.0) continue |
| 177 | + if (fillRatio < 0.35) continue |
| 178 | + |
| 179 | + val curve = MatOfPoint2f(*contour.toArray()) |
| 180 | + val peri = Imgproc.arcLength(curve, true) |
| 181 | + val approx = MatOfPoint2f() |
| 182 | + // Use a slightly larger epsilon so that a tilted rectangle is approximated as a 4-point polygon more often. |
| 183 | + Imgproc.approxPolyDP(curve, approx, 0.025 * peri, true) |
| 184 | + |
| 185 | + if (approx.total() == 4L) { |
| 186 | + val points = approx.toList() |
| 187 | + val sortedPoints = sortPoints(points) |
| 188 | + return sortedPoints.map { PointF(it.x.toFloat(), it.y.toFloat()) } |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + // No valid quad found |
| 193 | + return null |
| 194 | + } |
| 195 | + |
| 196 | + private fun rotateMat(src: Mat, rotationDegrees: Int): Mat { |
| 197 | + when (rotationDegrees) { |
| 198 | + 90 -> { |
| 199 | + Core.transpose(src, rotatedMat) |
| 200 | + Core.flip(rotatedMat, rotatedMat, 1) |
| 201 | + return rotatedMat |
| 202 | + } |
| 203 | + 180 -> { |
| 204 | + Core.flip(src, rotatedMat, -1) |
| 205 | + return rotatedMat |
| 206 | + } |
| 207 | + 270 -> { |
| 208 | + Core.transpose(src, rotatedMat) |
| 209 | + Core.flip(rotatedMat, rotatedMat, 0) |
| 210 | + return rotatedMat |
| 211 | + } |
| 212 | + } |
| 213 | + return src |
| 214 | + } |
| 215 | + |
| 216 | + private fun sortPoints(points: List<Point>): List<Point> { |
| 217 | + // Sort the 4 points into: TopLeft, TopRight, BottomRight, BottomLeft. |
| 218 | + // The goal is to provide a consistent ordering for downstream consumers (e.g. perspective correction). |
| 219 | + |
| 220 | + // Sum/Diff method which is standard for document scanning |
| 221 | + val sum = points.map { it.x + it.y } |
| 222 | + val diff = points.map { it.y - it.x } |
| 223 | + |
| 224 | + val tlIndex = sum.indexOf(sum.minOrNull()!!) |
| 225 | + val brIndex = sum.indexOf(sum.maxOrNull()!!) |
| 226 | + val trIndex = diff.indexOf(diff.minOrNull()!!) |
| 227 | + val blIndex = diff.indexOf(diff.maxOrNull()!!) |
| 228 | + |
| 229 | + return listOf(points[tlIndex], points[trIndex], points[brIndex], points[blIndex]) |
| 230 | + } |
| 231 | + |
| 232 | + // Helper to get image dimensions |
| 233 | + fun getWidth() = if(rotatedMat.empty()) srcGray.width() else rotatedMat.width() |
| 234 | + fun getHeight() = if(rotatedMat.empty()) srcGray.height() else rotatedMat.height() |
| 235 | + |
| 236 | + fun release() { |
| 237 | + segmentationDetector?.release() |
| 238 | + } |
| 239 | +} |
0 commit comments