smithy-lang · aajtodd · Oct 23, 2023 · Oct 10, 2023 · Oct 10, 2023 · Oct 13, 2023
@@ -0,0 +1,5 @@
+{
+    "id": "b707af45-8aa9-4f88-8fce-b8ce51770c25",
+    "type": "misc",
+    "description": "**BREAKING**: refactor CaseUtils to better deal with plurals and other edge cases."
+}
@@ -8,32 +8,109 @@ package software.amazon.smithy.kotlin.codegen.utils
  * Split a string on word boundaries
  */
 fun String.splitOnWordBoundaries(): List<String> {
-    // adapted from Java v2 SDK CodegenNamingUtils.splitOnWordBoundaries
-    var result = this
+    // This is taken from Rust: https://github.com/awslabs/smithy-rs/pull/3037/files#diff-4175c66ee81a450fcf1cd3e256f36ae2c8e0b30b910be8ca505135fbe215144d
+    // with minor changes (s3 and iot as whole words). Previously we used the Java v2 implementation
+    // https://github.com/aws/aws-sdk-java-v2/blob/2.20.162/utils/src/main/java/software/amazon/awssdk/utils/internal/CodegenNamingUtils.java#L36
+    // but this has some edge cases it doesn't handle well
+    val out = mutableListOf<String>()
+    // These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL`
+    val completeWords = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl", "iot", "s3")
+    var currentWord = ""
 
-    // all non-alphanumeric characters: "acm-success"-> "acm success"
-    result = result.replace(Regex("[^A-Za-z0-9+]"), " ")
+    // emit the current word and update from the next character
+    val emit = { next: Char ->
+        if (currentWord.isNotEmpty()) {
+            out += currentWord.lowercase()
+        }
+        currentWord = if (next.isLetterOrDigit()) {
+            next.toString()
+        } else {
+            ""
+        }
+    }
 
-    // if a number has a standalone v in front of it, separate it out
-    result = result.replace(Regex("([^a-z]{2,})v([0-9]+)"), "$1 v$2 ") // TESTv4 -> "TEST v4 "
-        .replace(Regex("([^A-Z]{2,})V([0-9]+)"), "$1 V$2 ") // TestV4 -> "Test V4 "
+    val allLowerCase = lowercase() == this
+    forEachIndexed { index, nextChar ->
+        val peek = getOrNull(index + 1)
+        val doublePeek = getOrNull(index + 2)
+        val completeWordInProgress = completeWords.any {
+            (currentWord + substring(index)).lowercase().startsWith(it)
+        } && !completeWords.contains(currentWord.lowercase())
 
-    // add a space between camelCased words
-    result = result.split(Regex("(?<=[a-z])(?=[A-Z]([a-zA-Z]|[0-9]))")).joinToString(separator = " ") // AcmSuccess -> // "Acm Success"
+        when {
+            // [C] in these docs indicates the value of nextCharacter
+            // A[_]B
+            !nextChar.isLetterOrDigit() -> emit(nextChar)
 
-    // add a space after acronyms
-    result = result.replace(Regex("([A-Z]+)([A-Z][a-z])"), "$1 $2") // "ACMSuccess" -> "ACM Success"
+            // If we have no letters so far, push the next letter (we already know it's a letter or digit)
+            currentWord.isEmpty() -> currentWord += nextChar.toString()
 
-    // add space after a number in the middle of a word
-    result = result.replace(Regex("([0-9])([a-zA-Z])"), "$1 $2") // "s3ec2" -> "s3 ec2"
+            // Abc[D]ef or Ab2[D]ef
+            !completeWordInProgress && loweredFollowedByUpper(currentWord, nextChar) -> emit(nextChar)
 
-    // remove extra spaces - multiple consecutive ones or those and the beginning/end of words
-    result = result.replace(Regex("\\s+"), " ") // "Foo  Bar" -> "Foo Bar"
-        .trim() // " Foo " -> "Foo"
+            // s3[k]ey
+            !completeWordInProgress && allLowerCase && digitFollowedByLower(currentWord, nextChar) -> emit(nextChar)
 
-    return result.split(" ")
+            // DB[P]roxy, or `IAM[U]ser` but not AC[L]s
+            endOfAcronym(currentWord, nextChar, peek, doublePeek) -> emit(nextChar)
+
+            // emit complete words
+            !completeWordInProgress && completeWords.contains(currentWord.lowercase()) -> emit(nextChar)
+
+            // If we haven't found a word boundary, push it and keep going
+            else -> currentWord += nextChar.toString()
+        }
+    }
+    if (currentWord.isNotEmpty()) {
+        out += currentWord
+    }
+
+    return out
 }
 
+/**
+ * Handle cases like `DB[P]roxy`, `ARN[S]upport`, `AC[L]s`
+ */
+private fun endOfAcronym(current: String, nextChar: Char, peek: Char?, doublePeek: Char?): Boolean {
+    if (!current.last().isUpperCase()) {
+        // Not an acronym in progress
+        return false
+    }
+    if (!nextChar.isUpperCase()) {
+        // We aren't at the next word yet
+        return false
+    }
+
+    if (peek?.isLowerCase() != true) {
+        return false
+    }
+
+    // Skip cases like `AR[N]s`, `AC[L]s` but not `IAM[U]ser`
+    if (peek == 's' && (doublePeek == null || !doublePeek.isLowerCase())) {
+        return false
+    }
+
+    // Skip cases like `DynamoD[B]v2`
+    return !(peek == 'v' && doublePeek?.isDigit() == true)
+}
+
+private fun loweredFollowedByUpper(current: String, nextChar: Char): Boolean {
+    if (!nextChar.isUpperCase()) {
+        return false
+    }
+    return current.last().isLowerCase() || current.last().isDigit()
+}
+
+private fun loweredFollowedByDigit(current: String, nextChar: Char): Boolean {
+    if (!nextChar.isDigit()) {
+        return false
+    }
+    return current.last().isLowerCase()
+}
+
+private fun digitFollowedByLower(current: String, nextChar: Char): Boolean =
+    (current.last().isDigit() && nextChar.isLowerCase())
+
 /**
  * Convert a string to `PascalCase` (uppercase start with upper case word boundaries)
  */

@@ -8,11 +8,14 @@ package software.amazon.smithy.kotlin.codegen.core
 import software.amazon.smithy.kotlin.codegen.model.expectShape
 import software.amazon.smithy.kotlin.codegen.test.prependNamespaceAndService
 import software.amazon.smithy.kotlin.codegen.test.toSmithyModel
+import software.amazon.smithy.kotlin.codegen.utils.toCamelCase
 import software.amazon.smithy.model.shapes.MemberShape
 import software.amazon.smithy.model.shapes.ShapeId
+import java.io.File
 import kotlin.test.Test
 import kotlin.test.assertEquals
 import kotlin.test.assertNotEquals
+import kotlin.test.fail
 
 class NamingTest {
 
@@ -24,7 +27,7 @@ class NamingTest {
         assertEquals("ElastiCache", clientName("ElastiCache"))
         assertEquals("ApiGatewayManagement", clientName("ApiGatewayManagementApi"))
         assertEquals("MigrationHubConfig", clientName("MigrationHub Config"))
-        assertEquals("IoTFleetHub", clientName("IoTFleetHub"))
+        assertEquals("IotFleetHub", clientName("IoTFleetHub"))
         assertEquals("Iot1ClickProjects", clientName("IoT 1Click Projects"))
         assertEquals("DynamoDb", clientName("DynamoDB"))
 
@@ -45,8 +48,8 @@ class NamingTest {
             "ACL" to "acl",
             "ACLList" to "aclList",
             "fooey" to "fooey",
-            "stringA" to "stringa",
-            "StringA" to "stringa",
+            "stringA" to "stringA",
+            "StringA" to "stringA",
         )
 
         tests.forEach { (input, expected) ->
@@ -75,30 +78,30 @@ class NamingTest {
             "application/vnd.amazonaws.card.generic" to "ApplicationVndAmazonawsCardGeneric",
             "IPV4" to "Ipv4",
             "ipv4" to "Ipv4",
-            "IPv4" to "IpV4",
-            "ipV4" to "IpV4",
+            "IPv4" to "Ipv4",
+            "ipV4" to "Ipv4",
             "IPMatch" to "IpMatch",
             "S3" to "S3",
             "EC2Instance" to "Ec2Instance",
             "aws.config" to "AwsConfig",
             "AWS::EC2::CustomerGateway" to "AwsEc2CustomerGateway",
             "application/pdf" to "ApplicationPdf",
             "ADConnector" to "AdConnector",
-            "MS-CHAPv1" to "MsChapV1",
+            "MS-CHAPv1" to "MsChapv1",
             "One-Way: Outgoing" to "OneWayOutgoing",
             "scram_sha_1" to "ScramSha1",
-            "EC_prime256v1" to "EcPrime256V1",
+            "EC_prime256v1" to "EcPrime256v1",
             "EC_PRIME256V1" to "EcPrime256V1",
-            "EC2v11.4" to "Ec2V11_4",
+            "EC2v11.4" to "Ec2v11_4",
             "nodejs4.3-edge" to "Nodejs4_3_Edge",
             "BUILD_GENERAL1_SMALL" to "BuildGeneral1Small",
             "SSE_S3" to "SseS3",
             "http1.1" to "Http1_1",
             "T100" to "T100",
             "s3:ObjectCreated:*" to "S3ObjectCreated",
             "s3:ObjectCreated:Put" to "S3ObjectCreatedPut",
-            "TLSv1" to "TlsV1",
-            "TLSv1.2" to "TlsV1_2",
+            "TLSv1" to "Tlsv1",
+            "TLSv1.2" to "Tlsv1_2",
             "us-east-1" to "UsEast1",
             "io1" to "Io1",
             "testNESTEDAcronym" to "TestNestedAcronym",
@@ -116,12 +119,18 @@ class NamingTest {
             "arm64" to "Arm64",
         )
 
+        val errors = mutableListOf<String>()
         tests.forEach { (input, expected) ->
             // NOTE: a lot of these are not valid names according to the Smithy spec but since
             // we still allow deriving a name from the enum value we want to verify what _would_ happen
             // should we encounter these inputs
             val actual = input.enumVariantName()
-            assertEquals(expected, actual, "input: $input")
+            if (expected != actual) {
+                errors += "expected '$expected' != actual '$actual' for input: $input"
+            }
+        }
+        if (errors.isNotEmpty()) {
+            fail(errors.joinToString("\n"))
         }
     }
 
@@ -171,4 +180,97 @@ class NamingTest {
         assertNotEquals(all, firstMember)
         assertNotEquals(firstMember, secondMember)
     }
+
+    @Test
+    fun testCamelCase() {
+        val tests = listOf(
+            "ACLs" to "acls",
+            "ACLsUpdateStatus" to "aclsUpdateStatus",
+            "AllowedAllVPCs" to "allowedAllVpcs",
+            "BluePrimaryX" to "bluePrimaryX",
+            "CIDRs" to "cidrs",
+            "AuthTtL" to "authTtl",
+            "CNAMEPrefix" to "cnamePrefix",
+            "S3Location" to "s3Location",
+            "signatureS" to "signatureS",
+            "signatureR" to "signatureR",
+            "M3u8Settings" to "m3u8Settings",
+            "IAMUser" to "iamUser",
+            "OtaaV1_0_x" to "otaaV10X",
+            "DynamoDBv2Action" to "dynamoDbv2Action",
+            "SessionKeyEmv2000" to "sessionKeyEmv2000",
+            "SupportsClassB" to "supportsClassB",
+            "UnassignIpv6AddressesRequest" to "unassignIpv6AddressesRequest",
+            "TotalGpuMemoryInMiB" to "totalGpuMemoryInMib",
+            "WriteIOs" to "writeIos",
+            "dynamoDBv2" to "dynamoDbv2",
+            "ipv4Address" to "ipv4Address",
+            "sigv4" to "sigv4",
+            "s3key" to "s3Key",
+            "sha256sum" to "sha256Sum",
+            "Av1QvbrSettings" to "av1QvbrSettings",
+            "Av1Settings" to "av1Settings",
+            "AwsElbv2LoadBalancer" to "awsElbv2LoadBalancer",
+            "SigV4Authorization" to "sigv4Authorization",
+            "IpV6Address" to "ipv6Address",
+            "IpV6Cidr" to "ipv6Cidr",
+            "IpV4Addresses" to "ipv4Addresses",
+        )
+
+        tests.forEach { (input, expected) ->
+            val actual = input.toCamelCase()
+            assertEquals(expected, actual, "input: $input")
+        }
+    }
+
+    @Test
+    fun testAllNames() {
+        // Set this to true to write a new test expectation file
+        val publishUpdate = true
+        val allNames = this::class.java.getResource("/all-names-test-output.txt")?.readText()!!
+        val errors = mutableListOf<String>()
+        val output = StringBuilder()
+        allNames.lines().filter { it.isNotBlank() }.forEach {
+            val split = it.split(',')
+            val input = split[0]
+            val expectation = split[1]
+            val actual = input.toCamelCase()
+            if (actual != expectation) {
+                errors += "$it => $actual (expected $expectation)"
+            }
+            output.appendLine("$input,$actual")
+        }
+        if (publishUpdate) {
+            File("all-names-test-output.txt").writeText(output.toString())
+        }
+        if (errors.isNotEmpty()) {
+            fail(errors.joinToString("\n"))
+        }
+    }
+
+    @Test
+    fun testClientNames() {
+        // jq '.. | select(.sdkId?).sdkId' codegen/sdk/aws-models/*.json > /tmp/sdk-ids.txt
+        // Set this to true to write a new test expectation file
+        val publishUpdate = true
+        val allNames = this::class.java.getResource("/sdk-ids-test-output.txt")?.readText()!!
+        val errors = mutableListOf<String>()
+        val output = StringBuilder()
+        allNames.lines().filter { it.isNotBlank() }.forEach {
+            val split = it.split(',')
+            val input = split[0]
+            val expectation = split[1]
+            val actual = clientName(input)
+            if (actual != expectation) {
+                errors += "$it => $actual (expected $expectation)"
+            }
+            output.appendLine("$input,$actual")
+        }
+        if (publishUpdate) {
+            File("sdk-ids-test-output.txt").writeText(output.toString())
+        }
+        if (errors.isNotEmpty()) {
+            fail(errors.joinToString("\n"))
+        }
+    }
 }