fix(glossary): various workflow fixes & latest content (#2664)

* fix: move statelessness.mdx to glossary * fix: frontmatter indentation & missing fields * feat: add `categories` & `takeaways` workflows * feat: add `categories` & `takeaways` from frontmatter * takeaways is required * feat: add seo optimized h1 * feat(glossary): Add API Security.mdx to glossary * fixing duplicate db row creation * refactor(glossary): align takeaways schema across apps - Create strongly-typed Zod schema for takeaways in both billing and www apps - Add documentation indicating billing as source of truth - Update YAML frontmatter generation to match schema structure - Ensure type safety from LLM generation through to content collection BREAKING CHANGE: Takeaways schema is now strictly typed and validated. Existing content may need to be updated to match the new schema structure. TODO: Extract schema into shared package to avoid duplication * updates to content workflow * content * content * sitemap * updated content * `pnpm fmt` * update trigger * - fix workflow to create new PRs if a previous one was closed - add slug to frontmatter * update content for SSO * no diff? * final updated content * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: chronark <[email protected]>
unkeyed · Nov 18, 2024 · 4eef4a3 · 4eef4a3
1 parent e97c730
commit 4eef4a3
Show file tree

Hide file tree

Showing 24 changed files with 1,240 additions and 345 deletions.
diff --git a/apps/billing/package.json b/apps/billing/package.json
@@ -22,9 +22,9 @@
     "@mendable/firecrawl-js": "^1.5.2",
     "@octokit/rest": "^21.0.2",
     "@planetscale/database": "^1.16.0",
-    "@trigger.dev/nextjs": "3.1.2",
-    "@trigger.dev/sdk": "3.1.2",
-    "@trigger.dev/slack": "3.1.2",
+    "@trigger.dev/nextjs": "3.2.0",
+    "@trigger.dev/sdk": "3.2.0",
+    "@trigger.dev/slack": "3.2.0",
     "@unkey/billing": "workspace:^",
     "@unkey/clickhouse": "workspace:^",
     "@unkey/db": "workspace:^",
@@ -35,6 +35,8 @@
     "ai": "^3.4.7",
     "drizzle-orm": "^0.33.0",
     "drizzle-zod": "^0.5.1",
+    "github-slugger": "^2.0.0",
+    "js-yaml": "^4.1.0",
     "react-dom": "^18",
     "stripe": "^14.23.0",
     "zod": "^3.23.5"

diff --git a/apps/billing/src/lib/db-marketing/schemas/entries.ts b/apps/billing/src/lib/db-marketing/schemas/entries.ts
@@ -1,9 +1,30 @@
 import { relations } from "drizzle-orm";
-import { index, int, mysqlTable, text, timestamp, varchar } from "drizzle-orm/mysql-core";
+import {
+  index,
+  int,
+  json,
+  mysqlEnum,
+  mysqlTable,
+  text,
+  timestamp,
+  varchar,
+} from "drizzle-orm/mysql-core";
 import { createInsertSchema, createSelectSchema } from "drizzle-zod";
-import type { z } from "zod";
+import { z } from "zod";
 import { searchQueries } from "./searchQuery";
 import { sections } from "./sections";
+import type { Takeaways } from "./takeaways-schema";
+
+export const entryStatus = ["ARCHIVED", "PUBLISHED"] as const;
+export type EntryStatus = (typeof entryStatus)[number];
+export const faqSchema = z.array(
+  z.object({
+    question: z.string(),
+    answer: z.string(),
+  }),
+);
+
+export type FAQ = z.infer<typeof faqSchema>;
 
 export const entries = mysqlTable(
   "entries",
@@ -14,6 +35,11 @@ export const entries = mysqlTable(
     dynamicSectionsContent: text("dynamic_sections_content"),
     metaTitle: varchar("meta_title", { length: 255 }),
     metaDescription: varchar("meta_description", { length: 255 }),
+    metaH1: varchar("meta_h1", { length: 255 }),
+    categories: json("linking_categories").$type<string[]>().default([]),
+    status: mysqlEnum("status", entryStatus),
+    takeaways: json("content_takeaways").$type<Takeaways>(),
+    faq: json("content_faq").$type<FAQ>(),
     createdAt: timestamp("created_at").notNull().defaultNow(),
     updatedAt: timestamp("updated_at")
       .notNull()

diff --git a/apps/billing/src/lib/db-marketing/schemas/searchQuery.ts b/apps/billing/src/lib/db-marketing/schemas/searchQuery.ts
@@ -39,12 +39,12 @@ export type NewSearchQueryParams = z.infer<typeof insertSearchQuerySchema>;
 // every searchQuery can have an optional 1:1 serperResult searchResponses associated with it
 // because the fk is stored in the serperResult table, the searchQueries relation have neither fields nor references
 export const searchQueryRelations = relations(searchQueries, ({ one, many }) => ({
-  searchResponses: one(serperSearchResponses, {
+  searchResponse: one(serperSearchResponses, {
     fields: [searchQueries.inputTerm],
     references: [serperSearchResponses.inputTerm],
   }),
   firecrawlResponses: many(firecrawlResponses),
-  entries: one(entries, {
+  entry: one(entries, {
     fields: [searchQueries.inputTerm],
     references: [entries.inputTerm],
   }),

diff --git a/apps/billing/src/lib/db-marketing/schemas/takeaways-schema.ts b/apps/billing/src/lib/db-marketing/schemas/takeaways-schema.ts
@@ -0,0 +1,36 @@
+import { z } from "zod";
+
+/**
+ * @description Schema for glossary entry takeaways
+ * @sourceOfTruth This is the source of truth for the takeaways schema as it's used for database storage
+ * @todo Extract this schema into a shared package to avoid duplication with apps/www
+ */
+export const takeawaysSchema = z.object({
+  tldr: z.string(),
+  definitionAndStructure: z.array(
+    z.object({
+      key: z.string(),
+      value: z.string(),
+    }),
+  ),
+  historicalContext: z.array(
+    z.object({
+      key: z.string(),
+      value: z.string(),
+    }),
+  ),
+  usageInAPIs: z.object({
+    tags: z.array(z.string()),
+    description: z.string(),
+  }),
+  bestPractices: z.array(z.string()),
+  recommendedReading: z.array(
+    z.object({
+      title: z.string(),
+      url: z.string(),
+    }),
+  ),
+  didYouKnow: z.string(),
+});
+
+export type Takeaways = z.infer<typeof takeawaysSchema>;
diff --git a/apps/billing/src/lib/search-query.ts b/apps/billing/src/lib/search-query.ts
@@ -1,30 +1,33 @@
 import { db } from "@/lib/db-marketing/client";
 import { openai } from "@ai-sdk/openai";
 import { generateObject } from "ai";
-import { eq, sql } from "drizzle-orm";
+import { eq } from "drizzle-orm";
 
 import { entries, insertSearchQuerySchema, searchQueries } from "@/lib/db-marketing/schemas";
+import type { CacheStrategy } from "@/trigger/glossary/_generate-glossary-entry";
+import { AbortTaskRunError } from "@trigger.dev/sdk/v3";
 
-export async function getOrCreateSearchQuery(args: { term: string }) {
-  const { term } = args;
+export async function getOrCreateSearchQuery({
+  term,
+  onCacheHit = "stale",
+}: { term: string; onCacheHit: CacheStrategy }) {
   // Try to find existing search query
-  const existingQuery = await db.query.searchQueries.findFirst({
-    where: eq(searchQueries.inputTerm, term),
+  const existing = await db.query.entries.findFirst({
+    where: eq(entries.inputTerm, term),
+    with: {
+      searchQuery: true,
+    },
+    orderBy: (searchQueries, { asc }) => [asc(searchQueries.createdAt)],
   });
 
-  if (existingQuery) {
-    // Ensure entry exists even for existing query
-    await db
-      .insert(entries)
-      .values({
-        inputTerm: term,
-      })
-      .onDuplicateKeyUpdate({
-        set: {
-          updatedAt: sql`now()`,
-        },
-      });
-    return existingQuery;
+  if (existing?.searchQuery && onCacheHit === "revalidate") {
+    return existing;
+  }
+
+  if (!existing) {
+    throw new AbortTaskRunError(
+      `Entry not found for term: ${term}. It's likely that the keyword-research task failed.`,
+    );
   }
 
   // Generate new search query
@@ -48,40 +51,19 @@ Keep the search query as short and as simple as possible, don't use quotes aroun
     schema: insertSearchQuerySchema.omit({ createdAt: true, updatedAt: true }),
   });
 
-  // Create both search query and entry in a transaction
-  await db.transaction(async (tx) => {
-    // Insert search query
-    await tx
-      .insert(searchQueries)
-      .values({
-        ...generatedQuery.object,
-      })
-      .onDuplicateKeyUpdate({
-        set: {
-          updatedAt: sql`now()`,
-        },
-      });
-
-    // Insert entry
-    await tx
-      .insert(entries)
-      .values({
-        inputTerm: term,
-      })
-      .onDuplicateKeyUpdate({
-        set: {
-          updatedAt: sql`now()`,
-        },
-      });
-  });
-
-  const insertedQuery = await db.query.searchQueries.findFirst({
-    where: eq(searchQueries.inputTerm, generatedQuery.object.inputTerm),
-  });
+  // create the search query in the database & connect it to the entry:
+  const [insertedQueryId] = await db
+    .insert(searchQueries)
+    .values(generatedQuery.object)
+    .$returningId();
 
-  if (!insertedQuery) {
+  if (!insertedQueryId) {
     throw new Error("Failed to insert or update search query");
   }
-
-  return insertedQuery;
+  return db.query.entries.findFirst({
+    where: eq(entries.inputTerm, term),
+    with: {
+      searchQuery: true,
+    },
+  });
 }
diff --git a/apps/billing/src/trigger/glossary/_generate-glossary-entry.ts b/apps/billing/src/trigger/glossary/_generate-glossary-entry.ts
@@ -3,8 +3,10 @@ import { entries } from "@/lib/db-marketing/schemas";
 import { task } from "@trigger.dev/sdk/v3";
 import { AbortTaskRunError } from "@trigger.dev/sdk/v3";
 import { eq } from "drizzle-orm";
+import { contentTakeawaysTask } from "./content-takeaways";
 import { createPrTask } from "./create-pr";
 import { draftSectionsTask } from "./draft-sections";
+import { generateFaqsTask } from "./generate-faqs";
 import { generateOutlineTask } from "./generate-outline";
 import { keywordResearchTask } from "./keyword-research";
 import { seoMetaTagsTask } from "./seo-meta-tags";
@@ -18,9 +20,10 @@ export type CacheStrategy = "revalidate" | "stale";
  * This workflow runs multiple steps sequentially:
  * 1. Keyword Research
  * 2. Generate Outline
- * 3. Draft Sections
+ * 3. Draft Sections & Content Takeaways (in parallel)
  * 4. Generate SEO Meta Tags
- * 5. Create PR
+ * 5. Generate FAQs
+ * 6. Create PR
  *
  * Each workflow step generates output that's stored in the database (with the exception of create PR, which stores the MDX output in the GitHub repository).
  * The default behaviour of every task is to always return a cached output if available.
@@ -68,6 +71,11 @@ export const generateGlossaryEntryTask = task({
       };
     }
 
+    if (!existing) {
+      // create the entry in the database if it doesn't exist, so that all other tasks can rely on it existing:
+      await db.insert(entries).values({ inputTerm: term });
+    }
+
     // Step 1: Keyword Research
     console.info("1/5 - Starting keyword research...");
     const keywordResearch = await keywordResearchTask.triggerAndWait({ term, onCacheHit });
@@ -86,13 +94,20 @@ export const generateGlossaryEntryTask = task({
     }
     console.info("✓ Outline generated");
 
-    // Step 3: Draft Sections
-    console.info("3/5 - Drafting sections...");
-    const draftSections = await draftSectionsTask.triggerAndWait({ term, onCacheHit });
+    // Step 3: Draft Sections & Content Takeaways (in parallel)
+    console.info("3/5 - Drafting sections and generating takeaways...");
+    const [draftSections, contentTakeaways] = await Promise.all([
+      draftSectionsTask.triggerAndWait({ term, onCacheHit }),
+      contentTakeawaysTask.triggerAndWait({ term, onCacheHit }),
+    ]);
+
     if (!draftSections.ok) {
       throw new AbortTaskRunError(`Section drafting failed for term: ${term}`);
     }
-    console.info("✓ All sections drafted");
+    if (!contentTakeaways.ok) {
+      throw new AbortTaskRunError(`Content takeaways generation failed for term: ${term}`);
+    }
+    console.info("✓ All sections drafted and takeaways generated");
 
     // Step 4: Generate SEO Meta Tags
     console.info("4/5 - Generating SEO meta tags...");
@@ -102,6 +117,14 @@ export const generateGlossaryEntryTask = task({
     }
     console.info("✓ SEO meta tags generated");
 
+    // Step 4.5: Generate FAQs
+    console.info("4.5/5 - Generating FAQs...");
+    const faqs = await generateFaqsTask.triggerAndWait({ term, onCacheHit });
+    if (!faqs.ok) {
+      throw new AbortTaskRunError(`FAQ generation failed for term: ${term}`);
+    }
+    console.info("✓ FAQs generated");
+
     // Step 5: Create PR
     console.info("5/5 - Creating PR...");
     const pr = await createPrTask.triggerAndWait({ input: term, onCacheHit });