Merge pull request #11 from finegrain-ai/pr/webcam-input

support webcam input
edgarGracia · Aug 1, 2024 · 52500dc · 52500dc
2 parents c5827ff + dd02e2d
commit 52500dc
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 10 deletions.
diff --git a/backend/gradio_image_annotation/image_annotator.py b/backend/gradio_image_annotation/image_annotator.py
@@ -60,7 +60,11 @@ def __init__(
         image_mode: Literal[
             "1", "L", "P", "RGB", "RGBA", "CMYK", "YCbCr", "LAB", "HSV", "I", "F"
         ] = "RGB",
-        sources: list[Literal["upload", "clipboard"]] | None = ["upload", "clipboard"],
+        sources: list[Literal["upload", "webcam", "clipboard"]] | None = [
+            "upload",
+            "webcam",
+            "clipboard",
+        ],
         image_type: Literal["numpy", "pil", "filepath"] = "numpy",
         label: str | None = None,
         container: bool = True,
@@ -93,7 +97,7 @@ def __init__(
             height: The height of the displayed image, specified in pixels if a number is passed, or in CSS units if a string is passed.
             width: The width of the displayed image, specified in pixels if a number is passed, or in CSS units if a string is passed.
             image_mode: "RGB" if color, or "L" if black and white. See https://pillow.readthedocs.io/en/stable/handbook/concepts.html for other supported image modes and their meaning.
-            sources: List of sources for the image. "upload" creates a box where user can drop an image file, "clipboard" allows users to paste an image from the clipboard. If None, defaults to ["upload", "clipboard"].
+            sources: List of sources for the image. "upload" creates a box where user can drop an image file, "webcam" allows user to take snapshot from their webcam, "clipboard" allows users to paste an image from the clipboard. If None, defaults to ["upload", "webcam", "clipboard"].
             image_type: The format the image is converted before being passed into the prediction function. "numpy" converts the image to a numpy array with shape (height, width, 3) and values from 0 to 255, "pil" converts the image to a PIL image object, "filepath" passes a str path to a temporary file containing the image. If the image is SVG, the `type` is ignored and the filepath of the SVG is returned.
             label: The label for this component. Appears above the component and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component is assigned to.
             container: If True, will place the component in a container - providing some extra padding around the border.
@@ -123,7 +127,7 @@ def __init__(
         self.image_mode = image_mode
 
         self.sources = sources
-        valid_sources = ["upload", "clipboard", None]
+        valid_sources = ["upload", "clipboard", "webcam", None]
         if isinstance(sources, str):
             self.sources = [sources]
         if self.sources is None:

diff --git a/frontend/Index.svelte b/frontend/Index.svelte
@@ -13,7 +13,7 @@
 	import AnnotatedImageData from "./shared/AnnotatedImageData";
 	import ImageAnnotator from "./shared/ImageAnnotator.svelte";
 
-	type sources = "upload" | "clipboard" | null;
+	type sources = "upload" | "webcam" | "clipboard" | null;
 
 	export let elem_id = "";
 	export let elem_classes: string[] = [];
@@ -29,7 +29,11 @@
 	export let scale: number | null = null;
 	export let min_width: number | undefined = undefined;
 	export let loading_status: LoadingStatus;
-	export let sources: ("clipboard" | "upload")[] = ["upload", "clipboard"];
+	export let sources: ("upload" | "webcam" | "clipboard")[] = [
+		"upload",
+		"webcam",
+		"clipboard",
+	];
 	export let show_download_button: boolean;
 	export let show_share_button: boolean;
 	export let show_clear_button: boolean;

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
diff --git a/frontend/package.json b/frontend/package.json
@@ -12,6 +12,7 @@
     "@gradio/icons": "0.4.1",
     "@gradio/statustracker": "0.6.0",
     "@gradio/upload": "0.11.1",
+    "@gradio/image": "0.11.9",
     "@gradio/utils": "0.4.2",
     "@gradio/wasm": "0.10.1",
     "cropperjs": "^1.5.12",

diff --git a/frontend/shared/ImageAnnotator.svelte b/frontend/shared/ImageAnnotator.svelte
@@ -1,22 +1,23 @@
 <script lang="ts">
-	import { createEventDispatcher } from "svelte";
+	import { createEventDispatcher, tick } from "svelte";
 	import { Download, Image as ImageIcon } from "@gradio/icons";
 	import { DownloadLink } from "@gradio/wasm/svelte";
 	import { uploadToHuggingFace } from "@gradio/utils";
 	import { BlockLabel, IconButton, ShareButton, SelectSource} from "@gradio/atoms";
 	import { Upload } from "@gradio/upload";
+	import { Webcam } from "@gradio/image";
 	import type { FileData, Client } from "@gradio/client";
 	import type { I18nFormatter, SelectData } from "@gradio/utils";
 	import { Clear } from "@gradio/icons";
 	import ImageCanvas from "./ImageCanvas.svelte";
 	import AnnotatedImageData from "./AnnotatedImageData";
-	
-	type source_type = "upload" | "clipboard" | null;
+
+	type source_type = "upload" | "webcam" | "clipboard" | null;
 
 	export let value: null | AnnotatedImageData;
 	export let label: string | undefined = undefined;
 	export let show_label: boolean;
-	export let sources: source_type[] = ["upload", "clipboard"];
+	export let sources: source_type[] = ["upload", "webcam", "clipboard"];
 	export let selectable = false;
 	export let root: string;
 	export let interactive: boolean;
@@ -55,6 +56,19 @@
 		dispatch("change");
 	}
 
+	async function handle_save(img_blob: Blob | any): Promise<void> {
+		const f = await upload.load_files([new File([img_blob], `webcam.png`)]);
+		const image = f?.[0] || null;
+		if (image) {
+			value = new AnnotatedImageData();
+			value.image = image;
+		} else {
+			value = null;
+		}
+		await tick();
+		dispatch("change");
+	}
+
 	$: if (uploading) clear();
 
 	const dispatch = createEventDispatcher<{
@@ -125,7 +139,7 @@
 <div data-testid="image" class="image-container">
 	<div class="upload-container">
 		<Upload
-			hidden={value !== null}
+			hidden={value !== null || active_source === "webcam"}
 			bind:this={upload}
 			bind:uploading
 			bind:dragging
@@ -142,6 +156,20 @@
 				<slot />
 			{/if}
 		</Upload>
+		{#if value === null && active_source === "webcam"}
+			<Webcam
+				{root}
+				on:capture={(e) => handle_save(e.detail)}
+				on:stream={(e) => handle_save(e.detail)}
+				on:error
+				on:drag
+				on:upload={(e) => handle_save(e.detail)}
+				mode="image"
+				include_audio={false}
+				{i18n}
+				{upload}
+			/>
+		{/if}
 		{#if value !== null}
 			<div class:selectable class="image-frame" >
 				<ImageCanvas