ocr: attach high confidance stuff to all images
All checks were successful
build-and-deploy / build-deploy (push) Successful in 32s
All checks were successful
build-and-deploy / build-deploy (push) Successful in 32s
This commit is contained in:
@@ -24,7 +24,9 @@ class PaddleOcrClient(
|
||||
@Value("\${ocr.paddle.minScore:0.9}")
|
||||
private val minScore: Double,
|
||||
@Value("\${ocr.paddle.minAverageScore:0.8}")
|
||||
private val minAverageScore: Double
|
||||
private val minAverageScore: Double,
|
||||
@Value("\${ocr.paddle.minTextLength:4}")
|
||||
private val minTextLength: Int
|
||||
) {
|
||||
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
||||
|
||||
@@ -36,7 +38,7 @@ class PaddleOcrClient(
|
||||
return try {
|
||||
val output = callOcr(path)
|
||||
val average = averageScore(output.scores)
|
||||
val filtered = filterByScore(output.texts, output.scores, minScore)
|
||||
val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength)
|
||||
val aadhaar = extractAadhaar(filtered)
|
||||
val rejected = average != null && average < minAverageScore
|
||||
PaddleOcrResult(filtered, aadhaar, average, rejected)
|
||||
@@ -68,10 +70,10 @@ class PaddleOcrClient(
|
||||
return OcrPayload(parsedTexts, parsedScores)
|
||||
}
|
||||
|
||||
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double): List<String> {
|
||||
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double, minLen: Int): List<String> {
|
||||
if (scores.size != texts.size || scores.isEmpty()) return texts
|
||||
return texts.mapIndexedNotNull { index, text ->
|
||||
if (scores[index] >= min) text else null
|
||||
if (scores[index] >= min && text.trim().length >= minLen) text else null
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user