Add PaddleOCR debug for Aadhaar candidates
All checks were successful
build-and-deploy / build-deploy (push) Successful in 34s

This commit is contained in:
androidlover5842
2026-01-31 22:25:14 +05:30
parent b6d613b743
commit f5c6406e31

View File

@@ -29,6 +29,7 @@ class PaddleOcrClient(
private val minTextLength: Int private val minTextLength: Int
) { ) {
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java) private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
private val aadhaarRegex = Regex("\\b(?:\\d[\\s-]?){12}\\b")
fun extract(filePath: String): PaddleOcrResult? { fun extract(filePath: String): PaddleOcrResult? {
if (!enabled) return null if (!enabled) return null
@@ -36,10 +37,23 @@ class PaddleOcrClient(
if (!Files.exists(path)) return null if (!Files.exists(path)) return null
return try { return try {
val sizeBytes = Files.size(path)
logger.debug("PaddleOCR extract path={} sizeBytes={}", path, sizeBytes)
val output = callOcr(path) val output = callOcr(path)
val average = averageScore(output.scores) val average = averageScore(output.scores)
val rawCandidates = extractCandidates(output.texts)
val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength) val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength)
val filteredCandidates = extractCandidates(filtered)
val aadhaar = extractAadhaar(filtered) val aadhaar = extractAadhaar(filtered)
if (rawCandidates.isNotEmpty() || filteredCandidates.isNotEmpty() || aadhaar != null) {
logger.debug(
"PaddleOCR candidates path={} raw={} filtered={} selected={}",
path,
rawCandidates.map { maskAadhaar(it) },
filteredCandidates.map { maskAadhaar(it) },
aadhaar?.let { maskAadhaar(it) }
)
}
val rejected = average != null && average < minAverageScore val rejected = average != null && average < minAverageScore
PaddleOcrResult(filtered, aadhaar, average, rejected) PaddleOcrResult(filtered, aadhaar, average, rejected)
} catch (ex: Exception) { } catch (ex: Exception) {
@@ -83,17 +97,27 @@ class PaddleOcrClient(
} }
private fun extractAadhaar(texts: List<String>): String? { private fun extractAadhaar(texts: List<String>): String? {
val candidates = extractCandidates(texts)
val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null
return formatAadhaar(valid)
}
private fun extractCandidates(texts: List<String>): List<String> {
val joined = texts.joinToString(" ") val joined = texts.joinToString(" ")
val candidates = mutableListOf<String>() val candidates = mutableListOf<String>()
val regex = Regex("\\b(?:\\d[\\s-]?){12}\\b") aadhaarRegex.findAll(joined).forEach { match ->
regex.findAll(joined).forEach { match ->
val digits = match.value.filter { it.isDigit() } val digits = match.value.filter { it.isDigit() }
if (digits.length == 12) { if (digits.length == 12) {
candidates.add(digits) candidates.add(digits)
} }
} }
val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null return candidates
return formatAadhaar(valid) }
private fun maskAadhaar(value: String): String {
val digits = value.filter { it.isDigit() }
if (digits.length != 12) return value
return "XXXXXXXX" + digits.takeLast(4)
} }
} }