Skip to content

Commit 87930ce

Browse files
authored
Merge pull request #1 from loijwdev/feat/android-detect-edges
Feat/android detect edges
2 parents deb5567 + e02efed commit 87930ce

12 files changed

Lines changed: 1009 additions & 22 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ jobs:
116116
yarn turbo run build:android --cache-dir="${{ env.TURBO_CACHE_DIR }}"
117117
118118
build-ios:
119+
if: false # temporarily skip iOS build
119120
runs-on: macos-latest
120121

121122
env:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ project.xcworkspace
4040
.settings
4141
local.properties
4242
android.iml
43+
*.tflite
4344

4445
# Cocoapods
4546
#

android/build.gradle

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,21 @@ buildscript {
1212
classpath "com.android.tools.build:gradle:8.7.2"
1313
// noinspection DifferentKotlinGradleVersion
1414
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:${getExtOrDefault('kotlinVersion')}"
15+
classpath "de.undercouch:gradle-download-task:5.6.0"
1516
}
1617
}
1718

1819

1920
apply plugin: "com.android.library"
2021
apply plugin: "kotlin-android"
22+
apply plugin: "de.undercouch.download"
2123

2224
apply plugin: "com.facebook.react"
2325

26+
// Import DownloadModels task
27+
project.ext.set("ASSET_DIR", "$projectDir/src/main/assets")
28+
apply(from: "download_models.gradle")
29+
2430
def getExtOrIntegerDefault(name) {
2531
return rootProject.ext.has(name) ? rootProject.ext.get(name) : (project.properties["LiveDetectEdges_" + name]).toInteger()
2632
}
@@ -45,6 +51,10 @@ android {
4551
}
4652
}
4753

54+
aaptOptions {
55+
noCompress "tflite"
56+
}
57+
4858
lintOptions {
4959
disable "GradleCompatible"
5060
}
@@ -74,4 +84,20 @@ def kotlin_version = getExtOrDefault("kotlinVersion")
7484
dependencies {
7585
implementation "com.facebook.react:react-android"
7686
implementation "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
87+
88+
def camerax_version = "1.5.2"
89+
implementation "androidx.camera:camera-core:${camerax_version}"
90+
implementation "androidx.camera:camera-camera2:${camerax_version}"
91+
implementation "androidx.camera:camera-lifecycle:${camerax_version}"
92+
implementation "androidx.camera:camera-view:${camerax_version}"
93+
implementation "androidx.camera:camera-extensions:${camerax_version}"
94+
95+
// OpenCV for image processing
96+
implementation 'org.opencv:opencv:4.12.0'
97+
98+
// LiteRT for segmentation model (replaces TensorFlow Lite, supports 16 KB page size)
99+
implementation 'com.google.ai.edge.litert:litert:1.4.0'
100+
implementation 'com.google.ai.edge.litert:litert-api:1.4.0'
101+
implementation 'com.google.ai.edge.litert:litert-support:1.4.0'
102+
implementation 'com.google.ai.edge.litert:litert-metadata:1.4.0'
77103
}

android/download_models.gradle

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
tasks.register('downloadModelFile', Download) {
18+
src 'https://github.com/pynicolas/fairscan-segmentation-model/releases/download/v1.1.0/fairscan-segmentation-model.tflite'
19+
dest project.ext.ASSET_DIR + '/fairscan-segmentation-model.tflite'
20+
overwrite false
21+
}
22+
23+
preBuild.dependsOn downloadModelFile
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
package com.livedetectedges
2+
3+
import android.content.Context
4+
import android.graphics.PointF
5+
import androidx.camera.core.ImageProxy
6+
import org.opencv.core.Core
7+
import org.opencv.core.CvType
8+
import org.opencv.core.Mat
9+
import org.opencv.core.MatOfPoint
10+
import org.opencv.core.MatOfPoint2f
11+
import org.opencv.core.Point
12+
import org.opencv.core.Size
13+
import org.opencv.imgproc.Imgproc
14+
import java.util.ArrayList
15+
16+
class DocumentDetector(private val context: Context? = null) {
17+
18+
private val activeContours = ArrayList<MatOfPoint>()
19+
private val srcGray = Mat()
20+
private val srcBlur = Mat()
21+
private val srcBinary = Mat()
22+
private val srcCanny = Mat()
23+
private val rotatedMat = Mat()
24+
private val rgbMat = Mat() // RGB frame for segmentation input
25+
private val enhancedMat = Mat() // RGB frame after contrast enhancement
26+
27+
// Segmentation detector (optional, falls back to Canny if segmentation is not available)
28+
private var segmentationDetector: SegmentationDetector? = null
29+
30+
// Minimal area ratio of a candidate contour relative to the full frame.
31+
// Lower value makes it easier to detect smaller/partial documents while still filtering noise.
32+
private val AREA_THRESHOLD_RATIO = 0.02
33+
34+
// Thresholds (probability levels) to try, similar to FairScan, to better handle perspective / tilted documents.
35+
private val SEGMENTATION_THRESHOLDS = listOf(0.3f, 0.4f, 0.5f, 0.6f, 0.7f)
36+
37+
init {
38+
// Initialize segmentation detector if context is available
39+
if (context != null) {
40+
try {
41+
segmentationDetector = SegmentationDetector(context)
42+
} catch (e: Exception) {
43+
// Segmentation is not available, we will use the Canny fallback
44+
}
45+
}
46+
}
47+
48+
fun detect(image: ImageProxy): List<PointF>? {
49+
// 1. Convert ImageProxy (Y plane) to grayscale Mat
50+
val yBuffer = image.planes[0].buffer
51+
val ySize = yBuffer.remaining()
52+
val data = ByteArray(ySize)
53+
yBuffer.get(data)
54+
55+
// Re-allocate Mats if dimensions changed (or first run)
56+
if (srcGray.width() != image.width || srcGray.height() != image.height) {
57+
srcGray.create(image.height, image.width, CvType.CV_8UC1)
58+
rgbMat.create(image.height, image.width, CvType.CV_8UC3)
59+
enhancedMat.create(image.height, image.width, CvType.CV_8UC3)
60+
}
61+
srcGray.put(0, 0, data)
62+
63+
// 2. Handle rotation from the camera sensor
64+
val rotation = image.imageInfo.rotationDegrees
65+
val processingMat = if (rotation != 0) {
66+
rotateMat(srcGray, rotation)
67+
} else {
68+
srcGray
69+
}
70+
71+
// 3. Try segmentation first if available, otherwise fall back to Canny
72+
val probMask = if (segmentationDetector?.isModelLoaded() == true) {
73+
// Pre-processing: improve contrast so the model is more robust to perspective and low-contrast documents.
74+
// Convert grayscale to RGB for segmentation (the model expects RGB input).
75+
Imgproc.cvtColor(processingMat, rgbMat, Imgproc.COLOR_GRAY2RGB)
76+
77+
// Increase local contrast using CLAHE (Contrast Limited Adaptive Histogram Equalization).
78+
val labMat = Mat()
79+
Imgproc.cvtColor(rgbMat, labMat, Imgproc.COLOR_RGB2Lab)
80+
val labChannels = ArrayList<Mat>()
81+
Core.split(labMat, labChannels)
82+
val clahe = Imgproc.createCLAHE(2.0, Size(8.0, 8.0))
83+
clahe.apply(labChannels[0], labChannels[0])
84+
Core.merge(labChannels, enhancedMat)
85+
Imgproc.cvtColor(enhancedMat, enhancedMat, Imgproc.COLOR_Lab2RGB)
86+
87+
// Cleanup temporary Mats
88+
labMat.release()
89+
labChannels.forEach { it.release() }
90+
91+
val mask = segmentationDetector?.segment(enhancedMat)
92+
mask
93+
} else {
94+
null
95+
}
96+
97+
// 4. If a probability mask is available, try multiple thresholds; otherwise, use Canny.
98+
if (probMask != null) {
99+
// Try multiple thresholds like FairScan to handle perspective cases more robustly.
100+
for (threshold in SEGMENTATION_THRESHOLDS) {
101+
val binaryMask = Mat()
102+
Imgproc.threshold(probMask, binaryMask, threshold.toDouble(), 255.0, Imgproc.THRESH_BINARY)
103+
104+
// Convert to uint8
105+
val uint8Mask = Mat()
106+
binaryMask.convertTo(uint8Mask, CvType.CV_8UC1)
107+
108+
// Clean up the mask so that contours are more stable:
109+
// - Close (MORPH_CLOSE) to fill small holes – larger kernel to better handle tilted shapes.
110+
// - Open (MORPH_OPEN) to remove small noise blobs.
111+
val cleaned = Mat()
112+
val kernel = Imgproc.getStructuringElement(
113+
Imgproc.MORPH_ELLIPSE,
114+
Size(7.0, 7.0) // Larger kernel to better handle tilted angles
115+
)
116+
Imgproc.morphologyEx(uint8Mask, cleaned, Imgproc.MORPH_CLOSE, kernel)
117+
Imgproc.morphologyEx(cleaned, cleaned, Imgproc.MORPH_OPEN, kernel)
118+
119+
val result = findDocumentContour(cleaned, processingMat)
120+
121+
// Cleanup
122+
binaryMask.release()
123+
uint8Mask.release()
124+
cleaned.release()
125+
126+
if (result != null) {
127+
probMask.release()
128+
return result
129+
}
130+
}
131+
probMask.release()
132+
}
133+
134+
// 5. Fallback: classic OpenCV pipeline – GaussianBlur -> Otsu threshold -> Canny.
135+
Imgproc.GaussianBlur(processingMat, srcBlur, Size(5.0, 5.0), 0.0)
136+
Imgproc.threshold(
137+
srcBlur,
138+
srcBinary,
139+
0.0,
140+
255.0,
141+
Imgproc.THRESH_BINARY or Imgproc.THRESH_OTSU
142+
)
143+
Imgproc.Canny(srcBinary, srcCanny, 30.0, 100.0) // Lowered thresholds for better detection
144+
145+
return findDocumentContour(srcCanny, processingMat)
146+
}
147+
148+
private fun findDocumentContour(contourSource: Mat, processingMat: Mat): List<PointF>? {
149+
// 6. Find contours (external only, similar to typical document-scanning pipelines).
150+
activeContours.clear()
151+
Imgproc.findContours(
152+
contourSource,
153+
activeContours,
154+
Mat(),
155+
Imgproc.RETR_EXTERNAL,
156+
Imgproc.CHAIN_APPROX_SIMPLE
157+
)
158+
159+
// Sort contours by area (descending) and iterate.
160+
val imageArea = (processingMat.width() * processingMat.height()).toDouble()
161+
activeContours.sortByDescending { Imgproc.contourArea(it) }
162+
163+
for (contour in activeContours) {
164+
val area = Imgproc.contourArea(contour)
165+
if (area < imageArea * AREA_THRESHOLD_RATIO) continue
166+
167+
// Filter out very irregular contours using bounding-box heuristics.
168+
val rect = Imgproc.boundingRect(contour)
169+
val aspectRatio = rect.width.toDouble() / rect.height.toDouble()
170+
val rectArea = rect.width.toDouble() * rect.height.toDouble()
171+
val fillRatio = area / rectArea
172+
173+
// Relax filters to accept more tilted rectangles:
174+
// - Aspect ratio: 0.25–4.0 (instead of 0.3–3.5) to allow more extreme tilt.
175+
// - Fill ratio: 0.35 (instead of 0.4) to allow partially visible documents.
176+
if (aspectRatio < 0.25 || aspectRatio > 4.0) continue
177+
if (fillRatio < 0.35) continue
178+
179+
val curve = MatOfPoint2f(*contour.toArray())
180+
val peri = Imgproc.arcLength(curve, true)
181+
val approx = MatOfPoint2f()
182+
// Use a slightly larger epsilon so that a tilted rectangle is approximated as a 4-point polygon more often.
183+
Imgproc.approxPolyDP(curve, approx, 0.025 * peri, true)
184+
185+
if (approx.total() == 4L) {
186+
val points = approx.toList()
187+
val sortedPoints = sortPoints(points)
188+
return sortedPoints.map { PointF(it.x.toFloat(), it.y.toFloat()) }
189+
}
190+
}
191+
192+
// No valid quad found
193+
return null
194+
}
195+
196+
private fun rotateMat(src: Mat, rotationDegrees: Int): Mat {
197+
when (rotationDegrees) {
198+
90 -> {
199+
Core.transpose(src, rotatedMat)
200+
Core.flip(rotatedMat, rotatedMat, 1)
201+
return rotatedMat
202+
}
203+
180 -> {
204+
Core.flip(src, rotatedMat, -1)
205+
return rotatedMat
206+
}
207+
270 -> {
208+
Core.transpose(src, rotatedMat)
209+
Core.flip(rotatedMat, rotatedMat, 0)
210+
return rotatedMat
211+
}
212+
}
213+
return src
214+
}
215+
216+
private fun sortPoints(points: List<Point>): List<Point> {
217+
// Sort the 4 points into: TopLeft, TopRight, BottomRight, BottomLeft.
218+
// The goal is to provide a consistent ordering for downstream consumers (e.g. perspective correction).
219+
220+
// Sum/Diff method which is standard for document scanning
221+
val sum = points.map { it.x + it.y }
222+
val diff = points.map { it.y - it.x }
223+
224+
val tlIndex = sum.indexOf(sum.minOrNull()!!)
225+
val brIndex = sum.indexOf(sum.maxOrNull()!!)
226+
val trIndex = diff.indexOf(diff.minOrNull()!!)
227+
val blIndex = diff.indexOf(diff.maxOrNull()!!)
228+
229+
return listOf(points[tlIndex], points[trIndex], points[brIndex], points[blIndex])
230+
}
231+
232+
// Helper to get image dimensions
233+
fun getWidth() = if(rotatedMat.empty()) srcGray.width() else rotatedMat.width()
234+
fun getHeight() = if(rotatedMat.empty()) srcGray.height() else rotatedMat.height()
235+
236+
fun release() {
237+
segmentationDetector?.release()
238+
}
239+
}

0 commit comments

Comments
 (0)