Add PaddleOCR debug for Aadhaar candidates
All checks were successful
build-and-deploy / build-deploy (push) Successful in 34s
All checks were successful
build-and-deploy / build-deploy (push) Successful in 34s
This commit is contained in:
@@ -29,6 +29,7 @@ class PaddleOcrClient(
|
|||||||
private val minTextLength: Int
|
private val minTextLength: Int
|
||||||
) {
|
) {
|
||||||
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
||||||
|
private val aadhaarRegex = Regex("\\b(?:\\d[\\s-]?){12}\\b")
|
||||||
|
|
||||||
fun extract(filePath: String): PaddleOcrResult? {
|
fun extract(filePath: String): PaddleOcrResult? {
|
||||||
if (!enabled) return null
|
if (!enabled) return null
|
||||||
@@ -36,10 +37,23 @@ class PaddleOcrClient(
|
|||||||
if (!Files.exists(path)) return null
|
if (!Files.exists(path)) return null
|
||||||
|
|
||||||
return try {
|
return try {
|
||||||
|
val sizeBytes = Files.size(path)
|
||||||
|
logger.debug("PaddleOCR extract path={} sizeBytes={}", path, sizeBytes)
|
||||||
val output = callOcr(path)
|
val output = callOcr(path)
|
||||||
val average = averageScore(output.scores)
|
val average = averageScore(output.scores)
|
||||||
|
val rawCandidates = extractCandidates(output.texts)
|
||||||
val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength)
|
val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength)
|
||||||
|
val filteredCandidates = extractCandidates(filtered)
|
||||||
val aadhaar = extractAadhaar(filtered)
|
val aadhaar = extractAadhaar(filtered)
|
||||||
|
if (rawCandidates.isNotEmpty() || filteredCandidates.isNotEmpty() || aadhaar != null) {
|
||||||
|
logger.debug(
|
||||||
|
"PaddleOCR candidates path={} raw={} filtered={} selected={}",
|
||||||
|
path,
|
||||||
|
rawCandidates.map { maskAadhaar(it) },
|
||||||
|
filteredCandidates.map { maskAadhaar(it) },
|
||||||
|
aadhaar?.let { maskAadhaar(it) }
|
||||||
|
)
|
||||||
|
}
|
||||||
val rejected = average != null && average < minAverageScore
|
val rejected = average != null && average < minAverageScore
|
||||||
PaddleOcrResult(filtered, aadhaar, average, rejected)
|
PaddleOcrResult(filtered, aadhaar, average, rejected)
|
||||||
} catch (ex: Exception) {
|
} catch (ex: Exception) {
|
||||||
@@ -83,17 +97,27 @@ class PaddleOcrClient(
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun extractAadhaar(texts: List<String>): String? {
|
private fun extractAadhaar(texts: List<String>): String? {
|
||||||
|
val candidates = extractCandidates(texts)
|
||||||
|
val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null
|
||||||
|
return formatAadhaar(valid)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun extractCandidates(texts: List<String>): List<String> {
|
||||||
val joined = texts.joinToString(" ")
|
val joined = texts.joinToString(" ")
|
||||||
val candidates = mutableListOf<String>()
|
val candidates = mutableListOf<String>()
|
||||||
val regex = Regex("\\b(?:\\d[\\s-]?){12}\\b")
|
aadhaarRegex.findAll(joined).forEach { match ->
|
||||||
regex.findAll(joined).forEach { match ->
|
|
||||||
val digits = match.value.filter { it.isDigit() }
|
val digits = match.value.filter { it.isDigit() }
|
||||||
if (digits.length == 12) {
|
if (digits.length == 12) {
|
||||||
candidates.add(digits)
|
candidates.add(digits)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null
|
return candidates
|
||||||
return formatAadhaar(valid)
|
}
|
||||||
|
|
||||||
|
private fun maskAadhaar(value: String): String {
|
||||||
|
val digits = value.filter { it.isDigit() }
|
||||||
|
if (digits.length != 12) return value
|
||||||
|
return "XXXXXXXX" + digits.takeLast(4)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user