Skip to content

Commit

Permalink
add the beta scene analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
jbilcke-hf committed Jul 31, 2024
1 parent 6f671fa commit f426817
Show file tree
Hide file tree
Showing 10 changed files with 323 additions and 65 deletions.
21 changes: 12 additions & 9 deletions src/components/tasks/useTasks.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,18 @@ export const useTasks = create<TasksStore>((set, get) => ({
}): Task[] => {
const { tasks } = get()

let list = Object.values(tasks)

if (params?.status) {
list = list.filter((t) => t.status === params?.status)
}

if (params?.category) {
list = list.filter((t) => t.category === params?.category)
}
let list = Object.values(tasks).filter((t) => {
if (params?.status && t.status !== params.status) {
return false
}
if (params?.category && t.category !== params.category) {
return false
}
if (params?.visibility && t.visibility !== params.visibility) {
return false
}
return true
})

return list
},
Expand Down
16 changes: 16 additions & 0 deletions src/components/toolbars/top-menu/assistant/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@ import { useUI } from '@/services/ui'
import { SettingsCategory } from '@aitube/clapper-services'
import { AssistantModelList } from '../lists/AssistantModelList'
import { useVoiceAssistant } from '@/services/assistant/useVoiceAssistant'
import { useAutocomplete } from '@/services/autocomplete/useAutocomplete'

export function TopMenuAssistant() {
const setShowSettings = useUI((s) => s.setShowSettings)
const storyboardsToStory = useAutocomplete((s) => s.storyboardsToStory)

const hasBetaAccess = useUI((s) => s.hasBetaAccess)

// this should only be called on and at only one place in the project!
useVoiceAssistant()
Expand All @@ -36,6 +40,18 @@ export function TopMenuAssistant() {
</MenubarItem>
<MenubarSeparator />
<AssistantModelList />
{hasBetaAccess && (
<>
<MenubarSeparator />
<MenubarItem
onClick={() => {
storyboardsToStory()
}}
>
Storyboards-to-captions (beta, client-side AI)
</MenubarItem>
</>
)}
<MenubarSeparator />
<MenubarItem disabled>Usage and costs: not implemented</MenubarItem>
</MenubarSub>
Expand Down
2 changes: 1 addition & 1 deletion src/lib/core/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
export const HARD_LIMIT_NB_MAX_ASSETS_TO_GENERATE_IN_PARALLEL = 32

export const APP_NAME = 'Clapper.app'
export const APP_REVISION = '20240730+1240'
export const APP_REVISION = '20240731+2141'

export const APP_DOMAIN = 'Clapper.app'
export const APP_LINK = 'https://clapper.app'
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,49 @@ import {
RawImage,
} from '@xenova/transformers'

export const cache: {
model?: Promise<any>
processor?: Promise<any>
tokenizer?: Promise<any>
} = {}

export async function loadModel(
modelId: string,
onProgress: (progress: number) => void
) {
onProgress(0)
const model = await (cache.model
? cache.model
: (cache.model = Florence2ForConditionalGeneration.from_pretrained(
modelId,
{
dtype: 'fp32',
}
)))

onProgress(33)

const processor = await (cache.processor
? cache.processor
: (cache.processor = AutoProcessor.from_pretrained(modelId)))

onProgress(66)

const tokenizer = await (cache.tokenizer
? cache.tokenizer
: (cache.tokenizer = AutoTokenizer.from_pretrained(modelId)))

onProgress(100)

return { model, processor, tokenizer }
}

export function closeModel() {
cache.model = undefined
cache.processor = undefined
cache.tokenizer = undefined
}

export async function extractCaptionsFromFrames(
images: string[] = [],
onProgress: (
Expand All @@ -31,34 +74,24 @@ Linux experimental support also requires launching the browser with --enable-fea
}

let progress = 0
onProgress(progress, 0, images.length)

// for code example, see:
// https://github.com/xenova/transformers.js/pull/545#issuecomment-2183625876

// Load model, processor, and tokenizer
const model_id = 'onnx-community/Florence-2-base-ft'
const model = await Florence2ForConditionalGeneration.from_pretrained(
model_id,
{
dtype: 'fp32',
}
)

onProgress((progress = 5), 0, images.length)

const processor = await AutoProcessor.from_pretrained(model_id)

onProgress((progress = 10), 0, images.length)

const tokenizer = await AutoTokenizer.from_pretrained(model_id)

onProgress((progress = 15), 0, images.length)
const { model, processor, tokenizer } = await loadModel(model_id, (p) => {
onProgress((progress = p * 15), 0, images.length)
})

// not all prompts will work properly, see the official examples:
// https://huggingface.co/microsoft/Florence-2-base-ft/blob/e7a5acc73559546de6e12ec0319cd7cc1fa2437c/processing_florence2.py#L115-L117

// Prepare text inputs
const prompts = 'Describe with a paragraph what is shown in the image.'
// const prompts = 'Decompose the following video frame into era, genre, location, weather, characters, and action. Give the answer in YAML.'

const text_inputs = tokenizer(prompts)

let i = 1
Expand Down
8 changes: 8 additions & 0 deletions src/services/autocomplete/getDefaultAutocompleteState.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { AutocompleteState } from './types'

export function getDefaultAutocompleteState(): AutocompleteState {
const state: AutocompleteState = {
isRunning: false,
}
return state
}
20 changes: 20 additions & 0 deletions src/services/autocomplete/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
export type AutocompleteState = {
isRunning: boolean
}
export type AutocompleteControls = {
/**
* Take a range of storyboards and infer the corresponding story
*
* This will directly update the screenplay and timeline,
* creating the appropriate segments, line coordinates etc
*
*
* @param params
* @returns
*/
storyboardsToStory: (params?: {
startTimeInMs?: number
endTimeInMs?: number
}) => Promise<void>
}
export type AutocompleteStore = AutocompleteState & AutocompleteControls
Loading

0 comments on commit f426817

Please sign in to comment.