Skip to content

Commit

Permalink
Add some prebuilt functions
Browse files Browse the repository at this point in the history
  • Loading branch information
pauldambra committed Nov 10, 2024
1 parent 08b6741 commit 54d3031
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 6 deletions.
12 changes: 9 additions & 3 deletions src/__tests__/posthog-core.beforeCapture.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,26 @@ describe('posthog core - before capture', () => {
beforeCapture: rejectingEventFn,
})
;(posthog._send_request as jest.Mock).mockClear()

const capturedData = posthog.capture(eventName, {}, {})

expect(capturedData).toBeUndefined()
expect(posthog._send_request).not.toHaveBeenCalled()
expect(jest.mocked(logger).info).toHaveBeenCalledWith(
`Event '${eventName}' was rejected in beforeCapture function`
)
})

it('can edit an event', () => {
const posthog = posthogWith({
beforeCapture: editingEventFn,
})
;(posthog._send_request as jest.Mock).mockClear()

const capturedData = posthog.capture(eventName, {}, {})

expect(capturedData).toHaveProperty(['properties', 'edited'], true)
expect(capturedData).toHaveProperty(['$set', 'edited'], true)

expect(posthog._send_request).toHaveBeenCalledWith({
batchKey: undefined,
callback: expect.any(Function),
Expand Down Expand Up @@ -112,8 +118,8 @@ describe('posthog core - before capture', () => {

posthog.capture(randomUnsafeEditableEvent, {}, {})

expect(jest.mocked(logger).info).toHaveBeenCalledWith(
`Event '${randomUnsafeEditableEvent}' was rejected. This can cause unexpected behavior.`
expect(jest.mocked(logger).warn).toHaveBeenCalledWith(
`Event '${randomUnsafeEditableEvent}' was rejected in beforeCapture function. This can cause unexpected behavior.`
)
})
})
57 changes: 57 additions & 0 deletions src/__tests__/utils/before-capture-utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { sampleByDistinctId, sampleByEvent, sampleBySessionId } from '../../utils/before-capture.utils'
import { CaptureResult } from '../../types'
import { isNull } from '../../utils/type-utils'

function expectRoughlyFiftyPercent(emittedEvents: any[]) {
expect(emittedEvents.length).toBeGreaterThanOrEqual(40)
expect(emittedEvents.length).toBeLessThanOrEqual(60)
}

describe('before capture utils', () => {
it('can sample by event name', () => {
const sampleFn = sampleByEvent(['$autocapture'], 50)
const results = []
Array.from({ length: 100 }).forEach(() => {
const captureResult = { event: '$autocapture' } as unknown as CaptureResult
results.push(sampleFn(captureResult))
})
const emittedEvents = results.filter((r) => !isNull(r))
expectRoughlyFiftyPercent(emittedEvents)
})

it('can sample by distinct id', () => {
const sampleFn = sampleByDistinctId(50)
const results = []
const distinct_id_one = 'user-1'
const distinct_id_two = 'user-that-hashes-to-no-events'
Array.from({ length: 100 }).forEach(() => {
;[distinct_id_one, distinct_id_two].forEach((distinct_id) => {
const captureResult = { properties: { distinct_id } } as unknown as CaptureResult
results.push(sampleFn(captureResult))
})
})
const distinctIdOneEvents = results.filter((r) => !isNull(r) && r.properties.distinct_id === distinct_id_one)
const distinctIdTwoEvents = results.filter((r) => !isNull(r) && r.properties.distinct_id === distinct_id_two)

expect(distinctIdOneEvents.length).toBe(100)
expect(distinctIdTwoEvents.length).toBe(0)
})

it('can sample by session id', () => {
const sampleFn = sampleBySessionId(50)
const results = []
const session_id_one = 'a-session-id'
const session_id_two = 'id-that-hashes-to-not-sending-events'
Array.from({ length: 100 }).forEach(() => {
;[session_id_one, session_id_two].forEach((session_id) => {
const captureResult = { properties: { $session_id: session_id } } as unknown as CaptureResult
results.push(sampleFn(captureResult))
})
})
const sessionIdOneEvents = results.filter((r) => !isNull(r) && r.properties.$session_id === session_id_one)
const sessionIdTwoEvents = results.filter((r) => !isNull(r) && r.properties.$session_id === session_id_two)

expect(sessionIdOneEvents.length).toBe(100)
expect(sessionIdTwoEvents.length).toBe(0)
})
})
5 changes: 4 additions & 1 deletion src/posthog-core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -883,8 +883,11 @@ export class PostHog {
if (!isKnownUnEditableEvent(data.event)) {
const beforeCaptureResult = this.config.beforeCapture(data)
if (isNullish(beforeCaptureResult)) {
const logMessage = `Event '${data.event}' was rejected in beforeCapture function`
if (isKnownUnsafeEditableEvent(data.event)) {
logger.info(`Event '${data.event}' was rejected. This can cause unexpected behavior.`)
logger.warn(`${logMessage}. This can cause unexpected behavior.`)
} else {
logger.info(logMessage)
}
return
} else {
Expand Down
6 changes: 4 additions & 2 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ export const knownUnsafeEditableEvent = [
export type KnownUnsafeEditableEvent = typeof knownUnsafeEditableEvent[number]

/**
* These known events can be processed by the `beforeCapture` function
* These are known events PostHog events that can be processed by the `beforeCapture` function
* That means PostHog functionality does not rely on receiving 100% of these for calculations
* So, it is safe to sample them to reduce the volume of events sent to PostHog
*/
type KnownEventName =
export type KnownEventName =
| '$heatmaps_data'
| '$opt_in'
| '$exception'
Expand Down
64 changes: 64 additions & 0 deletions src/utils/before-capture.utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { clampToRange } from './number-utils'
import { CaptureResult, KnownEventName } from '../types'
import { includes } from './index'

function simpleHash(str: string) {
let hash = 0
for (let i = 0; i < str.length; i++) {
hash = (hash << 5) - hash + str.charCodeAt(i) // (hash * 31) + char code
hash |= 0 // Convert to 32bit integer
}
return Math.abs(hash)
}

/**
* An implementation of sampling that samples based on the distinct ID.
* Can be used to create a beforeCapture fn for a PostHog instance.
*
* Causes roughly 50% of distinct ids to have events sent.
* Not 50% of events for each distinct id.
*
* @param percent a number from 0 to 100, 100 means never sample, 0 means never send the event
*/
export function sampleByDistinctId(percent: number): (c: CaptureResult) => CaptureResult | null {
return (captureResult: CaptureResult): CaptureResult | null => {
const hash = simpleHash(captureResult.properties.distinct_id)
return hash % 100 < clampToRange(percent, 0, 100) ? captureResult : null
}
}

/**
* An implementation of sampling that samples based on the session ID.
* Can be used to create a beforeCapture fn for a PostHog instance.
*
* Causes roughly 50% of sessions to have events sent.
* Not 50% of events for each session.
*
* @param percent a number from 0 to 100, 100 means never sample, 0 means never send the event
*/
export function sampleBySessionId(percent: number): (c: CaptureResult) => CaptureResult | null {
return (captureResult: CaptureResult): CaptureResult | null => {
const hash = simpleHash(captureResult.properties.$session_id)
return hash % 100 < clampToRange(percent, 0, 100) ? captureResult : null
}
}

/**
* An implementation of sampling that samples based on the event name.
* Can be used to create a beforeCapture fn for a PostHog instance.
*
* @param eventNames an array of event names to sample, sampling is applied across events not per event name
* @param percent a number from 0 to 100, 100 means never sample, 0 means never send the event
*/
export function sampleByEvent(
eventNames: KnownEventName[],
percent: number
): (c: CaptureResult) => CaptureResult | null {
return (captureResult: CaptureResult): CaptureResult | null => {
if (!includes(eventNames, captureResult.event)) {
return captureResult
}

return Math.random() * 100 < clampToRange(percent, 0, 100) ? captureResult : null
}
}

0 comments on commit 54d3031

Please sign in to comment.