Skip to content

Commit

Permalink
fix(glossary): various workflow fixes & latest content (#2664)
Browse files Browse the repository at this point in the history
* fix: move statelessness.mdx to glossary

* fix: frontmatter indentation & missing fields

* feat: add `categories` & `takeaways` workflows

* feat: add `categories` & `takeaways` from frontmatter

* takeaways is required

* feat: add seo optimized h1

* feat(glossary): Add API Security.mdx to glossary

* fixing duplicate db row creation

* refactor(glossary): align takeaways schema across apps

- Create strongly-typed Zod schema for takeaways in both billing and www apps
- Add documentation indicating billing as source of truth
- Update YAML frontmatter generation to match schema structure
- Ensure type safety from LLM generation through to content collection

BREAKING CHANGE: Takeaways schema is now strictly typed and validated.
Existing content may need to be updated to match the new schema structure.

TODO: Extract schema into shared package to avoid duplication

* updates to content workflow

* content

* content

* sitemap

* updated content

* `pnpm fmt`

* update trigger

* - fix workflow to create new PRs if a previous one was closed
- add slug to frontmatter

* update content for SSO

* no diff?

* final updated content

* [autofix.ci] apply automated fixes

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: chronark <[email protected]>
  • Loading branch information
3 people authored Nov 18, 2024
1 parent e97c730 commit 4eef4a3
Show file tree
Hide file tree
Showing 24 changed files with 1,240 additions and 345 deletions.
8 changes: 5 additions & 3 deletions apps/billing/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
"@mendable/firecrawl-js": "^1.5.2",
"@octokit/rest": "^21.0.2",
"@planetscale/database": "^1.16.0",
"@trigger.dev/nextjs": "3.1.2",
"@trigger.dev/sdk": "3.1.2",
"@trigger.dev/slack": "3.1.2",
"@trigger.dev/nextjs": "3.2.0",
"@trigger.dev/sdk": "3.2.0",
"@trigger.dev/slack": "3.2.0",
"@unkey/billing": "workspace:^",
"@unkey/clickhouse": "workspace:^",
"@unkey/db": "workspace:^",
Expand All @@ -35,6 +35,8 @@
"ai": "^3.4.7",
"drizzle-orm": "^0.33.0",
"drizzle-zod": "^0.5.1",
"github-slugger": "^2.0.0",
"js-yaml": "^4.1.0",
"react-dom": "^18",
"stripe": "^14.23.0",
"zod": "^3.23.5"
Expand Down
30 changes: 28 additions & 2 deletions apps/billing/src/lib/db-marketing/schemas/entries.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,30 @@
import { relations } from "drizzle-orm";
import { index, int, mysqlTable, text, timestamp, varchar } from "drizzle-orm/mysql-core";
import {
index,
int,
json,
mysqlEnum,
mysqlTable,
text,
timestamp,
varchar,
} from "drizzle-orm/mysql-core";
import { createInsertSchema, createSelectSchema } from "drizzle-zod";
import type { z } from "zod";
import { z } from "zod";
import { searchQueries } from "./searchQuery";
import { sections } from "./sections";
import type { Takeaways } from "./takeaways-schema";

export const entryStatus = ["ARCHIVED", "PUBLISHED"] as const;
export type EntryStatus = (typeof entryStatus)[number];
export const faqSchema = z.array(
z.object({
question: z.string(),
answer: z.string(),
}),
);

export type FAQ = z.infer<typeof faqSchema>;

export const entries = mysqlTable(
"entries",
Expand All @@ -14,6 +35,11 @@ export const entries = mysqlTable(
dynamicSectionsContent: text("dynamic_sections_content"),
metaTitle: varchar("meta_title", { length: 255 }),
metaDescription: varchar("meta_description", { length: 255 }),
metaH1: varchar("meta_h1", { length: 255 }),
categories: json("linking_categories").$type<string[]>().default([]),
status: mysqlEnum("status", entryStatus),
takeaways: json("content_takeaways").$type<Takeaways>(),
faq: json("content_faq").$type<FAQ>(),
createdAt: timestamp("created_at").notNull().defaultNow(),
updatedAt: timestamp("updated_at")
.notNull()
Expand Down
4 changes: 2 additions & 2 deletions apps/billing/src/lib/db-marketing/schemas/searchQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ export type NewSearchQueryParams = z.infer<typeof insertSearchQuerySchema>;
// every searchQuery can have an optional 1:1 serperResult searchResponses associated with it
// because the fk is stored in the serperResult table, the searchQueries relation have neither fields nor references
export const searchQueryRelations = relations(searchQueries, ({ one, many }) => ({
searchResponses: one(serperSearchResponses, {
searchResponse: one(serperSearchResponses, {
fields: [searchQueries.inputTerm],
references: [serperSearchResponses.inputTerm],
}),
firecrawlResponses: many(firecrawlResponses),
entries: one(entries, {
entry: one(entries, {
fields: [searchQueries.inputTerm],
references: [entries.inputTerm],
}),
Expand Down
36 changes: 36 additions & 0 deletions apps/billing/src/lib/db-marketing/schemas/takeaways-schema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { z } from "zod";

/**
* @description Schema for glossary entry takeaways
* @sourceOfTruth This is the source of truth for the takeaways schema as it's used for database storage
* @todo Extract this schema into a shared package to avoid duplication with apps/www
*/
export const takeawaysSchema = z.object({
tldr: z.string(),
definitionAndStructure: z.array(
z.object({
key: z.string(),
value: z.string(),
}),
),
historicalContext: z.array(
z.object({
key: z.string(),
value: z.string(),
}),
),
usageInAPIs: z.object({
tags: z.array(z.string()),
description: z.string(),
}),
bestPractices: z.array(z.string()),
recommendedReading: z.array(
z.object({
title: z.string(),
url: z.string(),
}),
),
didYouKnow: z.string(),
});

export type Takeaways = z.infer<typeof takeawaysSchema>;
84 changes: 33 additions & 51 deletions apps/billing/src/lib/search-query.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
import { db } from "@/lib/db-marketing/client";
import { openai } from "@ai-sdk/openai";
import { generateObject } from "ai";
import { eq, sql } from "drizzle-orm";
import { eq } from "drizzle-orm";

import { entries, insertSearchQuerySchema, searchQueries } from "@/lib/db-marketing/schemas";
import type { CacheStrategy } from "@/trigger/glossary/_generate-glossary-entry";
import { AbortTaskRunError } from "@trigger.dev/sdk/v3";

export async function getOrCreateSearchQuery(args: { term: string }) {
const { term } = args;
export async function getOrCreateSearchQuery({
term,
onCacheHit = "stale",
}: { term: string; onCacheHit: CacheStrategy }) {
// Try to find existing search query
const existingQuery = await db.query.searchQueries.findFirst({
where: eq(searchQueries.inputTerm, term),
const existing = await db.query.entries.findFirst({
where: eq(entries.inputTerm, term),
with: {
searchQuery: true,
},
orderBy: (searchQueries, { asc }) => [asc(searchQueries.createdAt)],
});

if (existingQuery) {
// Ensure entry exists even for existing query
await db
.insert(entries)
.values({
inputTerm: term,
})
.onDuplicateKeyUpdate({
set: {
updatedAt: sql`now()`,
},
});
return existingQuery;
if (existing?.searchQuery && onCacheHit === "revalidate") {
return existing;
}

if (!existing) {
throw new AbortTaskRunError(
`Entry not found for term: ${term}. It's likely that the keyword-research task failed.`,
);
}

// Generate new search query
Expand All @@ -48,40 +51,19 @@ Keep the search query as short and as simple as possible, don't use quotes aroun
schema: insertSearchQuerySchema.omit({ createdAt: true, updatedAt: true }),
});

// Create both search query and entry in a transaction
await db.transaction(async (tx) => {
// Insert search query
await tx
.insert(searchQueries)
.values({
...generatedQuery.object,
})
.onDuplicateKeyUpdate({
set: {
updatedAt: sql`now()`,
},
});

// Insert entry
await tx
.insert(entries)
.values({
inputTerm: term,
})
.onDuplicateKeyUpdate({
set: {
updatedAt: sql`now()`,
},
});
});

const insertedQuery = await db.query.searchQueries.findFirst({
where: eq(searchQueries.inputTerm, generatedQuery.object.inputTerm),
});
// create the search query in the database & connect it to the entry:
const [insertedQueryId] = await db
.insert(searchQueries)
.values(generatedQuery.object)
.$returningId();

if (!insertedQuery) {
if (!insertedQueryId) {
throw new Error("Failed to insert or update search query");
}

return insertedQuery;
return db.query.entries.findFirst({
where: eq(entries.inputTerm, term),
with: {
searchQuery: true,
},
});
}
35 changes: 29 additions & 6 deletions apps/billing/src/trigger/glossary/_generate-glossary-entry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ import { entries } from "@/lib/db-marketing/schemas";
import { task } from "@trigger.dev/sdk/v3";
import { AbortTaskRunError } from "@trigger.dev/sdk/v3";
import { eq } from "drizzle-orm";
import { contentTakeawaysTask } from "./content-takeaways";
import { createPrTask } from "./create-pr";
import { draftSectionsTask } from "./draft-sections";
import { generateFaqsTask } from "./generate-faqs";
import { generateOutlineTask } from "./generate-outline";
import { keywordResearchTask } from "./keyword-research";
import { seoMetaTagsTask } from "./seo-meta-tags";
Expand All @@ -18,9 +20,10 @@ export type CacheStrategy = "revalidate" | "stale";
* This workflow runs multiple steps sequentially:
* 1. Keyword Research
* 2. Generate Outline
* 3. Draft Sections
* 3. Draft Sections & Content Takeaways (in parallel)
* 4. Generate SEO Meta Tags
* 5. Create PR
* 5. Generate FAQs
* 6. Create PR
*
* Each workflow step generates output that's stored in the database (with the exception of create PR, which stores the MDX output in the GitHub repository).
* The default behaviour of every task is to always return a cached output if available.
Expand Down Expand Up @@ -68,6 +71,11 @@ export const generateGlossaryEntryTask = task({
};
}

if (!existing) {
// create the entry in the database if it doesn't exist, so that all other tasks can rely on it existing:
await db.insert(entries).values({ inputTerm: term });
}

// Step 1: Keyword Research
console.info("1/5 - Starting keyword research...");
const keywordResearch = await keywordResearchTask.triggerAndWait({ term, onCacheHit });
Expand All @@ -86,13 +94,20 @@ export const generateGlossaryEntryTask = task({
}
console.info("✓ Outline generated");

// Step 3: Draft Sections
console.info("3/5 - Drafting sections...");
const draftSections = await draftSectionsTask.triggerAndWait({ term, onCacheHit });
// Step 3: Draft Sections & Content Takeaways (in parallel)
console.info("3/5 - Drafting sections and generating takeaways...");
const [draftSections, contentTakeaways] = await Promise.all([
draftSectionsTask.triggerAndWait({ term, onCacheHit }),
contentTakeawaysTask.triggerAndWait({ term, onCacheHit }),
]);

if (!draftSections.ok) {
throw new AbortTaskRunError(`Section drafting failed for term: ${term}`);
}
console.info("✓ All sections drafted");
if (!contentTakeaways.ok) {
throw new AbortTaskRunError(`Content takeaways generation failed for term: ${term}`);
}
console.info("✓ All sections drafted and takeaways generated");

// Step 4: Generate SEO Meta Tags
console.info("4/5 - Generating SEO meta tags...");
Expand All @@ -102,6 +117,14 @@ export const generateGlossaryEntryTask = task({
}
console.info("✓ SEO meta tags generated");

// Step 4.5: Generate FAQs
console.info("4.5/5 - Generating FAQs...");
const faqs = await generateFaqsTask.triggerAndWait({ term, onCacheHit });
if (!faqs.ok) {
throw new AbortTaskRunError(`FAQ generation failed for term: ${term}`);
}
console.info("✓ FAQs generated");

// Step 5: Create PR
console.info("5/5 - Creating PR...");
const pr = await createPrTask.triggerAndWait({ input: term, onCacheHit });
Expand Down
Loading

0 comments on commit 4eef4a3

Please sign in to comment.