diff --git a/codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils/CaseUtils.kt b/codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils/CaseUtils.kt index 0dd7bef0a6..be77ef55ee 100644 --- a/codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils/CaseUtils.kt +++ b/codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils/CaseUtils.kt @@ -8,32 +8,99 @@ package software.amazon.smithy.kotlin.codegen.utils * Split a string on word boundaries */ fun String.splitOnWordBoundaries(): List { - // adapted from Java v2 SDK CodegenNamingUtils.splitOnWordBoundaries - var result = this + // This is taken from Rust: https://github.com/awslabs/smithy-rs/pull/3037/files#diff-4175c66ee81a450fcf1cd3e256f36ae2c8e0b30b910be8ca505135fbe215144d + // previously we used the Java v2 implementation https://github.com/aws/aws-sdk-java-v2/blob/2.20.162/utils/src/main/java/software/amazon/awssdk/utils/internal/CodegenNamingUtils.java#L36 + // but this has some edge cases it doesn't handle well + val out = mutableListOf() + // These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL` + val completeWords = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl") + var currentWord = "" - // all non-alphanumeric characters: "acm-success"-> "acm success" - result = result.replace(Regex("[^A-Za-z0-9+]"), " ") + // emit the current word and update from the next character + val emit = { next: Char -> + if (currentWord.isNotEmpty()) { + out += currentWord.lowercase() + } + currentWord = if (next.isLetterOrDigit()) { + next.toString() + } else { + "" + } + } + val allLowerCase = this.lowercase() == this + this.forEachIndexed { index, nextCharacter -> + val peek = this.getOrNull(index + 1) + val doublePeek = this.getOrNull(index + 2) + val completeWordInProgress = completeWords.any { + (currentWord + this.substring(index)).lowercase().startsWith( + it, + ) + } && !completeWords.contains(currentWord.lowercase()) + when { + // [C] in these docs indicates the value of nextCharacter + // A[_]B + !nextCharacter.isLetterOrDigit() -> emit(nextCharacter) - // if a number has a standalone v in front of it, separate it out - result = result.replace(Regex("([^a-z]{2,})v([0-9]+)"), "$1 v$2 ") // TESTv4 -> "TEST v4 " - .replace(Regex("([^A-Z]{2,})V([0-9]+)"), "$1 V$2 ") // TestV4 -> "Test V4 " + // If we have no letters so far, push the next letter (we already know it's a letter or digit) + currentWord.isEmpty() -> currentWord += nextCharacter.toString() - // add a space between camelCased words - result = result.split(Regex("(?<=[a-z])(?=[A-Z]([a-zA-Z]|[0-9]))")).joinToString(separator = " ") // AcmSuccess -> // "Acm Success" + // Abc[D]ef or Ab2[D]ef + !completeWordInProgress && loweredFollowedByUpper(currentWord, nextCharacter) -> emit(nextCharacter) - // add a space after acronyms - result = result.replace(Regex("([A-Z]+)([A-Z][a-z])"), "$1 $2") // "ACMSuccess" -> "ACM Success" + // s3[k]ey + !completeWordInProgress && allLowerCase && digitFollowedByLower(currentWord, nextCharacter) -> emit( + nextCharacter, + ) - // add space after a number in the middle of a word - result = result.replace(Regex("([0-9])([a-zA-Z])"), "$1 $2") // "s3ec2" -> "s3 ec2" + // DB[P]roxy, or `IAM[U]ser` but not AC[L]s + endOfAcronym(currentWord, nextCharacter, peek, doublePeek) -> emit(nextCharacter) - // remove extra spaces - multiple consecutive ones or those and the beginning/end of words - result = result.replace(Regex("\\s+"), " ") // "Foo Bar" -> "Foo Bar" - .trim() // " Foo " -> "Foo" + // If we haven't found a word boundary, push it and keep going + else -> currentWord += nextCharacter.toString() + } + } + if (currentWord.isNotEmpty()) { + out += currentWord + } + return out +} + +/** + * Handle cases like `DB[P]roxy`, `ARN[S]upport`, `AC[L]s` + */ +private fun endOfAcronym(current: String, nextChar: Char, peek: Char?, doublePeek: Char?): Boolean { + if (!current.last().isUpperCase()) { + // Not an acronym in progress + return false + } + if (!nextChar.isUpperCase()) { + // We aren't at the next word yet + return false + } + + if (peek?.isLowerCase() != true) { + return false + } - return result.split(" ") + // Skip cases like `AR[N]s`, `AC[L]s` but not `IAM[U]ser` + if (peek == 's' && (doublePeek == null || !doublePeek.isLowerCase())) { + return false + } + + // Skip cases like `DynamoD[B]v2` + return !(peek == 'v' && doublePeek?.isDigit() == true) +} + +private fun loweredFollowedByUpper(current: String, nextChar: Char): Boolean { + if (!nextChar.isUpperCase()) { + return false + } + return current.last().isLowerCase() || current.last().isDigit() } +private fun digitFollowedByLower(current: String, nextChar: Char): Boolean = + (current.last().isDigit() && nextChar.isLowerCase()) + /** * Convert a string to `PascalCase` (uppercase start with upper case word boundaries) */ diff --git a/codegen/smithy-kotlin-codegen/src/test/kotlin/software/amazon/smithy/kotlin/codegen/core/NamingTest.kt b/codegen/smithy-kotlin-codegen/src/test/kotlin/software/amazon/smithy/kotlin/codegen/core/NamingTest.kt index 9028c0b689..251d2c5914 100644 --- a/codegen/smithy-kotlin-codegen/src/test/kotlin/software/amazon/smithy/kotlin/codegen/core/NamingTest.kt +++ b/codegen/smithy-kotlin-codegen/src/test/kotlin/software/amazon/smithy/kotlin/codegen/core/NamingTest.kt @@ -119,12 +119,18 @@ class NamingTest { "arm64" to "Arm64", ) + val errors = mutableListOf() tests.forEach { (input, expected) -> // NOTE: a lot of these are not valid names according to the Smithy spec but since // we still allow deriving a name from the enum value we want to verify what _would_ happen // should we encounter these inputs val actual = input.enumVariantName() - assertEquals(expected, actual, "input: $input") + if (expected != actual) { + errors += "expected '$expected' != actual '$actual' for input: $input" + } + } + if (errors.isNotEmpty()) { + fail(errors.joinToString("\n")) } } @@ -174,6 +180,49 @@ class NamingTest { assertNotEquals(all, firstMember) assertNotEquals(firstMember, secondMember) } + + @Test + fun testCamelCase() { + val tests = listOf( + "ACLs" to "acls", + "ACLsUpdateStatus" to "aclsUpdateStatus", + "AllowedAllVPCs" to "allowedAllVpcs", + "BluePrimaryX" to "bluePrimaryX", + "CIDRs" to "cidrs", + "AuthTtL" to "authTtl", + "CNAMEPrefix" to "cnamePrefix", + "S3Location" to "s3Location", + "signatureS" to "signatureS", + "signatureR" to "signatureR", + "M3u8Settings" to "m3u8Settings", + "IAMUser" to "iamUser", + "OtaaV1_0_x" to "otaaV10X", + "DynamoDBv2Action" to "dynamoDbv2Action", + "SessionKeyEmv2000" to "sessionKeyEmv2000", + "SupportsClassB" to "supportsClassB", + "UnassignIpv6AddressesRequest" to "unassignIpv6AddressesRequest", + "TotalGpuMemoryInMiB" to "totalGpuMemoryInMib", + "WriteIOs" to "writeIos", + "dynamoDBv2" to "dynamoDbv2", + "ipv4Address" to "ipv4Address", + "sigv4" to "sigv4", + "s3key" to "s3Key", + "sha256sum" to "sha256Sum", + "Av1QvbrSettings" to "av1QvbrSettings", + "Av1Settings" to "av1Settings", + "AwsElbv2LoadBalancer" to "awsElbv2LoadBalancer", + "SigV4Authorization" to "sigv4Authorization", + "IpV6Address" to "ipv6Address", + "IpV6Cidr" to "ipv6Cidr", + "IpV4Addresses" to "ipv4Addresses", + ) + + tests.forEach { (input, expected) -> + val actual = input.toCamelCase() + assertEquals(expected, actual, "input: $input") + } + } + @Test fun testAllNames() { // Set this to true to write a new test expectation file @@ -186,7 +235,7 @@ class NamingTest { val input = split[0] val expectation = split[1] val actual = input.toCamelCase() - if (input.toCamelCase() != expectation) { + if (actual != expectation) { errors += "$it => $actual (expected $expectation)" } output.appendLine("$input,$actual") @@ -212,7 +261,7 @@ class NamingTest { val input = split[0] val expectation = split[1] val actual = clientName(input) - if (input.toCamelCase() != expectation) { + if (actual != expectation) { errors += "$it => $actual (expected $expectation)" } output.appendLine("$input,$actual")