Normalize extracted address fields
All checks were successful
build-and-deploy / build-deploy (push) Successful in 32s
All checks were successful
build-and-deploy / build-deploy (push) Successful in 32s
This commit is contained in:
@@ -244,6 +244,7 @@ class DocumentExtractionService(
|
|||||||
normalizePinCode(results)
|
normalizePinCode(results)
|
||||||
normalizeIdNumber(results)
|
normalizeIdNumber(results)
|
||||||
markAadhaarIfValid(results)
|
markAadhaarIfValid(results)
|
||||||
|
normalizeAddress(results)
|
||||||
applyBookingCityUpdates(document, results)
|
applyBookingCityUpdates(document, results)
|
||||||
results["docType"] = computeDocType(results, handled)
|
results["docType"] = computeDocType(results, handled)
|
||||||
applyGuestUpdates(document, propertyId, results)
|
applyGuestUpdates(document, propertyId, results)
|
||||||
@@ -282,6 +283,13 @@ class DocumentExtractionService(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun normalizeAddress(results: MutableMap<String, String>) {
|
||||||
|
val key = DocumentPrompts.ADDRESS.first
|
||||||
|
val raw = cleanedValue(results[key]) ?: return
|
||||||
|
val normalized = cleanAddress(raw) ?: return
|
||||||
|
results[key] = normalized
|
||||||
|
}
|
||||||
|
|
||||||
private fun markAadhaarIfValid(results: MutableMap<String, String>) {
|
private fun markAadhaarIfValid(results: MutableMap<String, String>) {
|
||||||
val idKey = DocumentPrompts.ID_NUMBER.first
|
val idKey = DocumentPrompts.ID_NUMBER.first
|
||||||
val digits = normalizeDigits(cleanedValue(results[idKey]))
|
val digits = normalizeDigits(cleanedValue(results[idKey]))
|
||||||
@@ -519,3 +527,54 @@ private fun isValidPin(value: String?): Boolean {
|
|||||||
if (value.isNullOrBlank()) return false
|
if (value.isNullOrBlank()) return false
|
||||||
return pinCodeRegex.matches(value)
|
return pinCodeRegex.matches(value)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun cleanAddress(raw: String): String? {
|
||||||
|
val relationRegex = Regex("^\\s*(S/O|D/O|W/O|C/O|H/O|F/O)\\b", RegexOption.IGNORE_CASE)
|
||||||
|
val prefixRegexes = listOf(
|
||||||
|
Regex("^\\s*ADDRESS\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*HOUSE\\s*/\\s*BLDG\\.?\\s*/\\s*APT\\.?\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*HOUSE-?BLDG\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*HOUSE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*H\\s*NO\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*HOUSE\\s*NO\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*STREET/ROAD/LANE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*STREET\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*ROAD\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*LANE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*AREA\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*SECTOR\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*COLONY\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*VILLAGE/TOWN/CITY\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*VILLAGE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*TOWN\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*CITY\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*P\\.?\\s*O\\.?\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*POST\\s*OFFICE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*P\\.?\\s*DIST\\.?\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*DISTRICT\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*DIST\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*STATE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*PIN\\s*CODE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*PINCODE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE),
|
||||||
|
Regex("^\\s*PIN\\b[:\\-\\s]*", RegexOption.IGNORE_CASE)
|
||||||
|
)
|
||||||
|
val dropPhrases = setOf(
|
||||||
|
"address", "addr", "area", "area was", "street/road/lane", "village/town/city", "colony"
|
||||||
|
)
|
||||||
|
val parts = raw.replace("\n", ",").split(",")
|
||||||
|
val cleanedParts = mutableListOf<String>()
|
||||||
|
for (part in parts) {
|
||||||
|
var value = part.trim()
|
||||||
|
if (value.isBlank()) continue
|
||||||
|
if (relationRegex.containsMatchIn(value)) continue
|
||||||
|
for (regex in prefixRegexes) {
|
||||||
|
value = regex.replace(value, "").trim()
|
||||||
|
}
|
||||||
|
value = value.replace(Regex("\\s+"), " ").trim()
|
||||||
|
if (value.isBlank()) continue
|
||||||
|
if (value.length < 3) continue
|
||||||
|
if (dropPhrases.contains(value.lowercase())) continue
|
||||||
|
cleanedParts.add(value)
|
||||||
|
}
|
||||||
|
return if (cleanedParts.isEmpty()) null else cleanedParts.joinToString(", ")
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user