From 1e2f969c069f4c79c97e5ecdccb01230da6cf908 Mon Sep 17 00:00:00 2001
From: bryan newbold <bnewbold@robocracy.org>
Date: Fri, 29 Nov 2024 11:51:55 -0800
Subject: [PATCH] syntax: fast-path for did:plc parsing

Goal is to avoid using full regex parsing for this common case.
profiling was showing that ParseDID regex has been burning a lot of CPU
in prod.
---
 atproto/syntax/did.go | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/atproto/syntax/did.go b/atproto/syntax/did.go
index ef9c0e04f..ffa4fc226 100644
--- a/atproto/syntax/did.go
+++ b/atproto/syntax/did.go
@@ -14,8 +14,30 @@ import (
 type DID string
 
 var didRegex = regexp.MustCompile(`^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$`)
+var plcChars = ""
+
+func isASCIIAlphaNum(c rune) bool {
+	if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') {
+		return true
+	}
+	return false
+}
 
 func ParseDID(raw string) (DID, error) {
+	// fast-path for did:plc, avoiding regex
+	if len(raw) == 32 && strings.HasPrefix(raw, "did:plc:") {
+		// NOTE: this doesn't really check base32, just broader alphanumberic. might pass invalid PLC DIDs, but they still have overall valid DID syntax
+		isPlc := true
+		for _, c := range raw[8:32] {
+			if !isASCIIAlphaNum(c) {
+				isPlc = false
+				break
+			}
+		}
+		if isPlc {
+			return DID(raw), nil
+		}
+	}
 	if raw == "" {
 		return "", errors.New("expected DID, got empty string")
 	}