Skip to content

Commit c9da4f6

Browse files
authored
Improve perspective correction using projective geometry (#173)
1 parent 01402c5 commit c9da4f6

3 files changed

Lines changed: 140 additions & 16 deletions

File tree

evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
package org.fairscan.evaluation
1616

1717
import org.fairscan.imageprocessing.Mask
18+
import org.fairscan.imageprocessing.autoColorMode
1819
import org.fairscan.imageprocessing.detectDocumentQuad
1920
import org.fairscan.imageprocessing.extractDocument
20-
import org.fairscan.imageprocessing.autoColorMode
2121
import org.fairscan.imageprocessing.scaledTo
2222
import org.fairscan.imageprocessing.toImageSize
2323
import org.opencv.core.Mat
@@ -53,7 +53,7 @@ object DatasetEvaluator {
5353
?.mapNotNull { img ->
5454
val mask = File(maskDir, img.nameWithoutExtension + ".png")
5555
if (mask.exists()) Entry(img.nameWithoutExtension, img, mask) else null
56-
}
56+
}?.sortedBy { e -> e.name }
5757
?: emptyList()
5858

5959
val htmlFragments = mutableListOf<String>()
@@ -73,18 +73,16 @@ object DatasetEvaluator {
7373
val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false)
7474
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
7575

76-
val corrected: Mat? = if (quad != null) {
77-
val colorMode = autoColorMode(inputMat, mask, quad)
78-
extractDocument(inputMat, quad = quad, rotationDegrees = 0, colorMode, 2_000_000)
79-
} else null
76+
if (quad == null) continue
77+
78+
val colorMode = autoColorMode(inputMat, mask, quad)
79+
val corrected = extractDocument(inputMat, quad = quad, rotationDegrees = 0, colorMode, 2_000_000)
8080

8181
val inputOut = File(outputDir, "${e.name}_input.jpg")
8282
Imgcodecs.imwrite(inputOut.absolutePath, inputMat)
8383

8484
val outputOut = File(outputDir, "${e.name}_output.jpg")
85-
if (corrected != null) {
86-
Imgcodecs.imwrite(outputOut.absolutePath, corrected)
87-
}
85+
Imgcodecs.imwrite(outputOut.absolutePath, corrected)
8886

8987
htmlFragments += """
9088
<div class="entry">

imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,7 @@ fun extractDocument(
157157
colorMode: ColorMode,
158158
maxPixels: Long,
159159
): Mat {
160-
val widthTop = norm(quad.topLeft, quad.topRight)
161-
val widthBottom = norm(quad.bottomLeft, quad.bottomRight)
162-
val targetWidth = (widthTop + widthBottom) / 2
163-
164-
val heightLeft = norm(quad.topLeft, quad.bottomLeft)
165-
val heightRight = norm(quad.topRight, quad.bottomRight)
166-
val targetHeight = (heightLeft + heightRight) / 2
160+
val (targetWidth, targetHeight) = estimateRealDimensions(quad, inputMat.cols(), inputMat.rows())
167161

168162
val srcPoints = MatOfPoint2f(
169163
quad.topLeft.toCv(),
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/*
2+
* Copyright 2025-2026 Pierre-Yves Nicolas
3+
*
4+
* This program is free software: you can redistribute it and/or modify it
5+
* under the terms of the GNU General Public License as published by the Free
6+
* Software Foundation, either version 3 of the License, or (at your option)
7+
* any later version.
8+
* This program is distributed in the hope that it will be useful, but WITHOUT
9+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11+
* more details.
12+
* You should have received a copy of the GNU General Public License along with
13+
* this program. If not, see <https://www.gnu.org/licenses/>.
14+
*/
15+
package org.fairscan.imageprocessing
16+
17+
import kotlin.math.absoluteValue
18+
import kotlin.math.max
19+
import kotlin.math.sqrt
20+
21+
data class Vector3D(val x: Double, val y: Double, val z: Double) {
22+
operator fun minus(other: Vector3D) = Vector3D(x - other.x, y - other.y, z - other.z)
23+
operator fun times(t: Double) = Vector3D(x * t, y * t, z * t)
24+
// https://en.wikipedia.org/wiki/Dot_product
25+
fun dotProduct(other: Vector3D) = x * other.x + y * other.y + z * other.z
26+
// https://en.wikipedia.org/wiki/Cross_product
27+
fun crossProduct(other: Vector3D) = Vector3D(
28+
y * other.z - z * other.y,
29+
z * other.x - x * other.z,
30+
x * other.y - y * other.x,
31+
)
32+
fun norm() = sqrt(x * x + y * y + z * z)
33+
}
34+
35+
/**
36+
* Estimates the true width and height of the document in the output image,
37+
* correcting for perspective distortion using projective geometry.
38+
*
39+
* Falls back to average side lengths when the geometry is degenerate
40+
* or the perspective is too weak to estimate reliably.
41+
*
42+
* See:
43+
* - https://en.wikipedia.org/wiki/Pinhole_camera_model
44+
* - https://www.robots.ox.ac.uk/~vgg/publications/1999/Criminisi99/criminisi99.pdf
45+
* - https://web.stanford.edu/class/cs231a/course_notes/02-single-view-metrology.pdf
46+
*/
47+
fun estimateRealDimensions(quad: Quad, imageWidth: Int, imageHeight: Int): Pair<Double, Double> {
48+
49+
fun averageSides(): Pair<Double, Double> {
50+
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
51+
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
52+
return Pair(w, h)
53+
}
54+
55+
// Homogeneous 2D point
56+
// https://en.wikipedia.org/wiki/Homogeneous_coordinates#Use_in_computer_graphics_and_computer_vision
57+
fun toH(p: Point) = Vector3D(p.x, p.y, 1.0)
58+
59+
// Line through two points in homogeneous coordinates
60+
fun lineThrough(p1: Point, p2: Point) = toH(p1).crossProduct(toH(p2))
61+
62+
// Vanishing points from pairs of opposite sides
63+
val v1h = lineThrough(quad.topLeft, quad.topRight)
64+
.crossProduct(lineThrough(quad.bottomLeft, quad.bottomRight))
65+
val v2h = lineThrough(quad.topLeft, quad.bottomLeft)
66+
.crossProduct(lineThrough(quad.topRight, quad.bottomRight))
67+
68+
// Degenerate case: one pair of sides is parallel (vanishing point at infinity)
69+
if (v1h.z.absoluteValue < 1e-6 || v2h.z.absoluteValue < 1e-6)
70+
return averageSides()
71+
72+
// Approximate "principal point" as image center (common assumption on mobile cameras)
73+
val cx = imageWidth / 2.0
74+
val cy = imageHeight / 2.0
75+
76+
// Vanishing points in Cartesian coordinates, relative to principal point
77+
val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy)
78+
val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy)
79+
80+
// Focal length estimated assuming zero skew and principal point at image center.
81+
// Under these assumptions, the Image of the Absolute Conic (IAC) simplifies,
82+
// and orthogonal directions satisfy v1 · ω · v2 = 0,
83+
// which reduces to: f² = -(v1x·v2x + v1y·v2y)
84+
val f2 = -(v1.x * v2.x + v1.y * v2.y)
85+
if (f2 <= 0)
86+
return averageSides()
87+
val f = sqrt(f2)
88+
89+
// Fall back when f is too large: document nearly fronto-parallel,
90+
// vanishing points are far away, making the focal length estimate unstable.
91+
//
92+
// This threshold is heuristic and tuned for typical smartphone images.
93+
// Note that the estimated f depends on both camera intrinsics and scene geometry,
94+
// so large values usually indicate low perspective rather than an actual large focal length.
95+
//
96+
// In those cases, falling back to average side lengths gives a stable approximation.
97+
if (f > max(imageWidth, imageHeight) * 1.2)
98+
return averageSides()
99+
100+
// 3D directions of each pair of sides, back-projected through K⁻¹
101+
val d1 = Vector3D(v1.x, v1.y, f)
102+
val d2 = Vector3D(v2.x, v2.y, f)
103+
104+
// Document plane normal: perpendicular to both edge directions
105+
val n = d1.crossProduct(d2)
106+
107+
// Camera ray through a corner: K⁻¹ · (u, v, 1)
108+
fun ray(p: Point) = Vector3D((p.x - cx) / f, (p.y - cy) / f, 1.0)
109+
110+
// Intersect ray with document plane: X = t·r where t = 1 / (n·r)
111+
// We assume an arbitrary plane distance (d = 1). Absolute scale is wrong,
112+
// but cancels out when computing length ratios.
113+
fun corner3D(p: Point): Vector3D {
114+
val r = ray(p)
115+
return r * (1.0 / n.dotProduct(r))
116+
}
117+
118+
val xTL = corner3D(quad.topLeft); val xTR = corner3D(quad.topRight)
119+
val xBR = corner3D(quad.bottomRight); val xBL = corner3D(quad.bottomLeft)
120+
121+
// Side lengths in reconstructed 3D space (up to an unknown global scale)
122+
val realW = ((xTR - xTL).norm() + (xBR - xBL).norm()) / 2
123+
val realH = ((xBL - xTL).norm() + (xBR - xTR).norm()) / 2
124+
125+
// Output dimensions: preserve projected area, apply corrected aspect ratio
126+
val ratio = realH / realW
127+
val (projW, projH) = averageSides()
128+
val targetWidth = sqrt(projW * projH / ratio)
129+
val targetHeight = targetWidth * ratio
130+
131+
return Pair(targetWidth, targetHeight)
132+
}

0 commit comments

Comments
 (0)