Skip to content

Commit 3e2838a

Browse files
committed
feat: implement image capture with perspective correction and saving processed images
1 parent 2b8a8f3 commit 3e2838a

9 files changed

Lines changed: 437 additions & 85 deletions

File tree

android/build.gradle

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,4 @@ dependencies {
9898
// LiteRT for segmentation model (replaces TensorFlow Lite, supports 16 KB page size)
9999
implementation 'com.google.ai.edge.litert:litert:1.4.0'
100100
implementation 'com.google.ai.edge.litert:litert-api:1.4.0'
101-
implementation 'com.google.ai.edge.litert:litert-support:1.4.0'
102-
implementation 'com.google.ai.edge.litert:litert-metadata:1.4.0'
103101
}

android/src/main/java/com/livedetectedges/DocumentDetector.kt

Lines changed: 83 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,85 @@ class DocumentDetector(private val context: Context? = null) {
6969
}
7070

7171
// 3. Try segmentation first if available, otherwise fall back to Canny
72+
// Logic refactored to detectInternal for reuse with Bitmap
73+
return detectInternal(processingMat)
74+
}
75+
76+
private fun findDocumentContour(contourSource: Mat, processingMat: Mat): List<PointF>? {
77+
// 6. Find contours (external only, similar to typical document-scanning pipelines).
78+
activeContours.clear()
79+
Imgproc.findContours(
80+
contourSource,
81+
activeContours,
82+
Mat(),
83+
Imgproc.RETR_EXTERNAL,
84+
Imgproc.CHAIN_APPROX_SIMPLE
85+
)
86+
87+
// Sort contours by area (descending) and iterate.
88+
val imageArea = (processingMat.width() * processingMat.height()).toDouble()
89+
activeContours.sortByDescending { Imgproc.contourArea(it) }
90+
91+
for (contour in activeContours) {
92+
val area = Imgproc.contourArea(contour)
93+
if (area < imageArea * AREA_THRESHOLD_RATIO) continue
94+
95+
// Filter out very irregular contours using bounding-box heuristics.
96+
val rect = Imgproc.boundingRect(contour)
97+
val aspectRatio = rect.width.toDouble() / rect.height.toDouble()
98+
val rectArea = rect.width.toDouble() * rect.height.toDouble()
99+
val fillRatio = area / rectArea
100+
101+
// Relax filters to accept more tilted rectangles:
102+
// - Aspect ratio: 0.25–4.0 (instead of 0.3–3.5) to allow more extreme tilt.
103+
// - Fill ratio: 0.35 (instead of 0.4) to allow partially visible documents.
104+
if (aspectRatio < 0.25 || aspectRatio > 4.0) continue
105+
if (fillRatio < 0.35) continue
106+
107+
val curve = MatOfPoint2f(*contour.toArray())
108+
val peri = Imgproc.arcLength(curve, true)
109+
val approx = MatOfPoint2f()
110+
// Use a slightly larger epsilon so that a tilted rectangle is approximated as a 4-point polygon more often.
111+
Imgproc.approxPolyDP(curve, approx, 0.025 * peri, true)
112+
113+
if (approx.total() == 4L) {
114+
val points = approx.toList()
115+
val sortedPoints = sortPoints(points)
116+
return sortedPoints.map { PointF(it.x.toFloat(), it.y.toFloat()) }
117+
}
118+
}
119+
120+
// No valid quad found
121+
return null
122+
}
123+
124+
fun detect(bitmap: android.graphics.Bitmap): List<PointF>? {
125+
if (srcGray.width() != bitmap.width || srcGray.height() != bitmap.height) {
126+
srcGray.create(bitmap.height, bitmap.width, CvType.CV_8UC1)
127+
rgbMat.create(bitmap.height, bitmap.width, CvType.CV_8UC3)
128+
enhancedMat.create(bitmap.height, bitmap.width, CvType.CV_8UC3)
129+
}
130+
131+
org.opencv.android.Utils.bitmapToMat(bitmap, rgbMat)
132+
Imgproc.cvtColor(rgbMat, srcGray, Imgproc.COLOR_RGB2GRAY)
133+
134+
// No rotation needed for bitmap as we assume it's already oriented correctly or we handle it before passing here
135+
val processingMat = srcGray
136+
137+
// Reuse the logic from the other detect method, but extracted to common helper if possible.
138+
// For now, duplicating the logic flow or we can refactor.
139+
// Let's refactor the core logic into `detectInternal`.
140+
141+
return detectInternal(processingMat)
142+
}
143+
144+
private fun detectInternal(processingMat: Mat): List<PointF>? {
145+
// 3. Try segmentation first if available, otherwise fall back to Canny
72146
val probMask = if (segmentationDetector?.isModelLoaded() == true) {
73-
// Pre-processing: improve contrast so the model is more robust to perspective and low-contrast documents.
74-
// Convert grayscale to RGB for segmentation (the model expects RGB input).
147+
// Pre-processing
75148
Imgproc.cvtColor(processingMat, rgbMat, Imgproc.COLOR_GRAY2RGB)
76149

77-
// Increase local contrast using CLAHE (Contrast Limited Adaptive Histogram Equalization).
150+
// Increase local contrast
78151
val labMat = Mat()
79152
Imgproc.cvtColor(rgbMat, labMat, Imgproc.COLOR_RGB2Lab)
80153
val labChannels = ArrayList<Mat>()
@@ -94,31 +167,21 @@ class DocumentDetector(private val context: Context? = null) {
94167
null
95168
}
96169

97-
// 4. If a probability mask is available, try multiple thresholds; otherwise, use Canny.
170+
// 4. If a probability mask is available...
98171
if (probMask != null) {
99-
// Try multiple thresholds like FairScan to handle perspective cases more robustly.
100172
for (threshold in SEGMENTATION_THRESHOLDS) {
101173
val binaryMask = Mat()
102174
Imgproc.threshold(probMask, binaryMask, threshold.toDouble(), 255.0, Imgproc.THRESH_BINARY)
103-
104-
// Convert to uint8
105175
val uint8Mask = Mat()
106176
binaryMask.convertTo(uint8Mask, CvType.CV_8UC1)
107-
108-
// Clean up the mask so that contours are more stable:
109-
// - Close (MORPH_CLOSE) to fill small holes – larger kernel to better handle tilted shapes.
110-
// - Open (MORPH_OPEN) to remove small noise blobs.
177+
111178
val cleaned = Mat()
112-
val kernel = Imgproc.getStructuringElement(
113-
Imgproc.MORPH_ELLIPSE,
114-
Size(7.0, 7.0) // Larger kernel to better handle tilted angles
115-
)
179+
val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(3.0, 3.0))
116180
Imgproc.morphologyEx(uint8Mask, cleaned, Imgproc.MORPH_CLOSE, kernel)
117181
Imgproc.morphologyEx(cleaned, cleaned, Imgproc.MORPH_OPEN, kernel)
118182

119183
val result = findDocumentContour(cleaned, processingMat)
120184

121-
// Cleanup
122185
binaryMask.release()
123186
uint8Mask.release()
124187
cleaned.release()
@@ -131,68 +194,15 @@ class DocumentDetector(private val context: Context? = null) {
131194
probMask.release()
132195
}
133196

134-
// 5. Fallback: classic OpenCV pipeline – GaussianBlur -> Otsu threshold -> Canny.
197+
// 5. Fallback: classic OpenCV pipeline
198+
// Use Canny directly on blurred grayscale image for better edge preservation
135199
Imgproc.GaussianBlur(processingMat, srcBlur, Size(5.0, 5.0), 0.0)
136-
Imgproc.threshold(
137-
srcBlur,
138-
srcBinary,
139-
0.0,
140-
255.0,
141-
Imgproc.THRESH_BINARY or Imgproc.THRESH_OTSU
142-
)
143-
Imgproc.Canny(srcBinary, srcCanny, 30.0, 100.0) // Lowered thresholds for better detection
200+
// Imgproc.threshold(srcBlur, srcBinary, 0.0, 255.0, Imgproc.THRESH_BINARY or Imgproc.THRESH_OTSU)
201+
Imgproc.Canny(srcBlur, srcCanny, 75.0, 200.0)
144202

145203
return findDocumentContour(srcCanny, processingMat)
146204
}
147205

148-
private fun findDocumentContour(contourSource: Mat, processingMat: Mat): List<PointF>? {
149-
// 6. Find contours (external only, similar to typical document-scanning pipelines).
150-
activeContours.clear()
151-
Imgproc.findContours(
152-
contourSource,
153-
activeContours,
154-
Mat(),
155-
Imgproc.RETR_EXTERNAL,
156-
Imgproc.CHAIN_APPROX_SIMPLE
157-
)
158-
159-
// Sort contours by area (descending) and iterate.
160-
val imageArea = (processingMat.width() * processingMat.height()).toDouble()
161-
activeContours.sortByDescending { Imgproc.contourArea(it) }
162-
163-
for (contour in activeContours) {
164-
val area = Imgproc.contourArea(contour)
165-
if (area < imageArea * AREA_THRESHOLD_RATIO) continue
166-
167-
// Filter out very irregular contours using bounding-box heuristics.
168-
val rect = Imgproc.boundingRect(contour)
169-
val aspectRatio = rect.width.toDouble() / rect.height.toDouble()
170-
val rectArea = rect.width.toDouble() * rect.height.toDouble()
171-
val fillRatio = area / rectArea
172-
173-
// Relax filters to accept more tilted rectangles:
174-
// - Aspect ratio: 0.25–4.0 (instead of 0.3–3.5) to allow more extreme tilt.
175-
// - Fill ratio: 0.35 (instead of 0.4) to allow partially visible documents.
176-
if (aspectRatio < 0.25 || aspectRatio > 4.0) continue
177-
if (fillRatio < 0.35) continue
178-
179-
val curve = MatOfPoint2f(*contour.toArray())
180-
val peri = Imgproc.arcLength(curve, true)
181-
val approx = MatOfPoint2f()
182-
// Use a slightly larger epsilon so that a tilted rectangle is approximated as a 4-point polygon more often.
183-
Imgproc.approxPolyDP(curve, approx, 0.025 * peri, true)
184-
185-
if (approx.total() == 4L) {
186-
val points = approx.toList()
187-
val sortedPoints = sortPoints(points)
188-
return sortedPoints.map { PointF(it.x.toFloat(), it.y.toFloat()) }
189-
}
190-
}
191-
192-
// No valid quad found
193-
return null
194-
}
195-
196206
private fun rotateMat(src: Mat, rotationDegrees: Int): Mat {
197207
when (rotationDegrees) {
198208
90 -> {
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
package com.livedetectedges
2+
3+
import android.content.Context
4+
import android.graphics.Bitmap
5+
import android.graphics.BitmapFactory
6+
import android.graphics.PointF
7+
import android.net.Uri
8+
import org.opencv.android.Utils
9+
import org.opencv.core.CvType
10+
import org.opencv.core.Mat
11+
import org.opencv.core.MatOfPoint2f
12+
import org.opencv.core.Point
13+
import org.opencv.core.Size
14+
import org.opencv.imgproc.Imgproc
15+
import java.io.File
16+
import java.io.FileOutputStream
17+
import java.util.UUID
18+
19+
object LiveDetectEdgesImageProcessor {
20+
21+
fun processImage(bitmap: Bitmap, quad: Quadrilateral?): Bitmap {
22+
if (quad == null) {
23+
return bitmap
24+
}
25+
26+
val srcMat = Mat()
27+
Utils.bitmapToMat(bitmap, srcMat)
28+
29+
// Convert to float points for perspective transform
30+
val srcPoints = listOf(
31+
quad.topLeft,
32+
quad.topRight,
33+
quad.bottomRight,
34+
quad.bottomLeft
35+
).map { Point(it.x.toDouble(), it.y.toDouble()) }
36+
37+
val srcMatPoints = MatOfPoint2f(*srcPoints.toTypedArray())
38+
39+
// Calculate destination dimensions
40+
// Width = max(distance(tl, tr), distance(bl, br))
41+
// Height = max(distance(tl, bl), distance(tr, br))
42+
val widthA = Math.hypot(srcPoints[1].x - srcPoints[0].x, srcPoints[1].y - srcPoints[0].y)
43+
val widthB = Math.hypot(srcPoints[2].x - srcPoints[3].x, srcPoints[2].y - srcPoints[3].y)
44+
val maxWidth = Math.max(widthA, widthB)
45+
46+
val heightA = Math.hypot(srcPoints[3].x - srcPoints[0].x, srcPoints[3].y - srcPoints[0].y)
47+
val heightB = Math.hypot(srcPoints[2].x - srcPoints[1].x, srcPoints[2].y - srcPoints[1].y)
48+
val maxHeight = Math.max(heightA, heightB)
49+
50+
val dstPoints = listOf(
51+
Point(0.0, 0.0),
52+
Point(maxWidth - 1, 0.0),
53+
Point(maxWidth - 1, maxHeight - 1),
54+
Point(0.0, maxHeight - 1)
55+
)
56+
val dstMatPoints = MatOfPoint2f(*dstPoints.toTypedArray())
57+
58+
val perspectiveTransform = Imgproc.getPerspectiveTransform(srcMatPoints, dstMatPoints)
59+
val dstMat = Mat()
60+
61+
Imgproc.warpPerspective(
62+
srcMat,
63+
dstMat,
64+
perspectiveTransform,
65+
Size(maxWidth, maxHeight)
66+
)
67+
68+
val outBitmap = Bitmap.createBitmap(
69+
dstMat.cols(),
70+
dstMat.rows(),
71+
Bitmap.Config.ARGB_8888
72+
)
73+
Utils.matToBitmap(dstMat, outBitmap)
74+
75+
// Cleanup
76+
srcMat.release()
77+
dstMat.release()
78+
perspectiveTransform.release()
79+
srcMatPoints.release()
80+
dstMatPoints.release()
81+
82+
return outBitmap
83+
}
84+
85+
fun saveImageToTempFile(context: Context, bitmap: Bitmap): String? {
86+
val filename = "${UUID.randomUUID()}.jpg"
87+
val file = File(context.cacheDir, filename)
88+
89+
return try {
90+
val out = FileOutputStream(file)
91+
bitmap.compress(Bitmap.CompressFormat.JPEG, 90, out)
92+
out.flush()
93+
out.close()
94+
Uri.fromFile(file).toString()
95+
} catch (e: Exception) {
96+
e.printStackTrace()
97+
null
98+
}
99+
}
100+
101+
fun getBitmapFromUri(context: Context, uriString: String): Bitmap? {
102+
return try {
103+
val uri = Uri.parse(uriString)
104+
val inputStream = context.contentResolver.openInputStream(uri)
105+
BitmapFactory.decodeStream(inputStream)
106+
} catch (e: Exception) {
107+
e.printStackTrace()
108+
null
109+
}
110+
}
111+
}
112+
113+
data class Quadrilateral(
114+
val topLeft: PointF,
115+
val topRight: PointF,
116+
val bottomRight: PointF,
117+
val bottomLeft: PointF
118+
)

0 commit comments

Comments
 (0)