diff --git a/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt b/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt index db65ed2..dd5685c 100644 --- a/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt +++ b/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt @@ -29,6 +29,7 @@ class PaddleOcrClient( private val minTextLength: Int ) { private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java) + private val aadhaarRegex = Regex("\\b(?:\\d[\\s-]?){12}\\b") fun extract(filePath: String): PaddleOcrResult? { if (!enabled) return null @@ -36,10 +37,23 @@ class PaddleOcrClient( if (!Files.exists(path)) return null return try { + val sizeBytes = Files.size(path) + logger.debug("PaddleOCR extract path={} sizeBytes={}", path, sizeBytes) val output = callOcr(path) val average = averageScore(output.scores) + val rawCandidates = extractCandidates(output.texts) val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength) + val filteredCandidates = extractCandidates(filtered) val aadhaar = extractAadhaar(filtered) + if (rawCandidates.isNotEmpty() || filteredCandidates.isNotEmpty() || aadhaar != null) { + logger.debug( + "PaddleOCR candidates path={} raw={} filtered={} selected={}", + path, + rawCandidates.map { maskAadhaar(it) }, + filteredCandidates.map { maskAadhaar(it) }, + aadhaar?.let { maskAadhaar(it) } + ) + } val rejected = average != null && average < minAverageScore PaddleOcrResult(filtered, aadhaar, average, rejected) } catch (ex: Exception) { @@ -83,17 +97,27 @@ class PaddleOcrClient( } private fun extractAadhaar(texts: List): String? { + val candidates = extractCandidates(texts) + val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null + return formatAadhaar(valid) + } + + private fun extractCandidates(texts: List): List { val joined = texts.joinToString(" ") val candidates = mutableListOf() - val regex = Regex("\\b(?:\\d[\\s-]?){12}\\b") - regex.findAll(joined).forEach { match -> + aadhaarRegex.findAll(joined).forEach { match -> val digits = match.value.filter { it.isDigit() } if (digits.length == 12) { candidates.add(digits) } } - val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null - return formatAadhaar(valid) + return candidates + } + + private fun maskAadhaar(value: String): String { + val digits = value.filter { it.isDigit() } + if (digits.length != 12) return value + return "XXXXXXXX" + digits.takeLast(4) } }