From 98c9520700f318919afd066f6f9dbb4c42418712 Mon Sep 17 00:00:00 2001 From: imradhe Date: Sat, 14 Sep 2024 17:17:43 +0530 Subject: [PATCH] added tooltip to the tabs --- frontend/components/Datasets.tsx | 513 ++++++++++++++++++------------- 1 file changed, 301 insertions(+), 212 deletions(-) diff --git a/frontend/components/Datasets.tsx b/frontend/components/Datasets.tsx index ae3d668..f1bde39 100644 --- a/frontend/components/Datasets.tsx +++ b/frontend/components/Datasets.tsx @@ -2,19 +2,12 @@ import { ReactElement, useState, useEffect } from "react"; import { Box, - SimpleGrid, - Icon, Text, Stack, Flex, Heading, - Container, Card, - CardBody, - useColorModeValue, - SkeletonCircle, HStack, - SkeletonText, Link, Image as ChakraImage, useBreakpointValue, @@ -25,6 +18,7 @@ import { TabList, TabPanels, TabPanel, + Tooltip, } from "@chakra-ui/react"; import Image from "next/image"; import axios from "axios"; @@ -108,11 +102,8 @@ export default function Datasets() { return ( - - - - Early on in our journey, we recognized that advancing Indian technology necessitates large-scale datasets. Thus, building and collecting extensive datasets across multiple verticals has become a critical endeavor at AI4Bharat. Thanks to generous grants from MeitY, we are spearheading pioneering efforts in data collection as part of the Data Management Unit of Bhashini. Our nationwide initiative aims to gather 15,000 hours of transcribed data from over 400 districts, encompassing all 22 scheduled languages of India. In parallel, our in-house team of over 100 translators is diligently creating a parallel corpus with 2.2 million translation pairs across 22 languages. To produce studio-quality data for expressive TTS systems, we have established recording studios in our lab, where professional voice artists contribute their expertise. Additionally, our annotators are meticulously labeling pages for Document Layout Parsing, accommodating the diverse scripts of India. To accelerate the development of Indic Large Language Models (LLMs), we are focused on building pipelines for curating and synthetically generating pre-training data, collecting contextually grounded prompts, and creating evaluation datasets that reflect India’s rich linguistic tapestry. Collecting and annotating data at this scale demands standardization of processes and tools. To meet this challenge, AI4Bharat has invested in developing various open-source data collection and annotation tools, aiming to enhance these efforts not only within India but also in multilingual regions across the globe. + Early on in our journey, we recognized that advancing Indian + technology necessitates large-scale datasets. Thus, building and + collecting extensive datasets across multiple verticals has become a + critical endeavor at AI4Bharat. Thanks to generous grants from + MeitY, we are spearheading pioneering efforts in data collection as + part of the Data Management Unit of Bhashini. Our nationwide + initiative aims to gather 15,000 hours of transcribed data from over + 400 districts, encompassing all 22 scheduled languages of India. In + parallel, our in-house team of over 100 translators is diligently + creating a parallel corpus with 2.2 million translation pairs across + 22 languages. To produce studio-quality data for expressive TTS + systems, we have established recording studios in our lab, where + professional voice artists contribute their expertise. Additionally, + our annotators are meticulously labeling pages for Document Layout + Parsing, accommodating the diverse scripts of India. To accelerate + the development of Indic Large Language Models (LLMs), we are + focused on building pipelines for curating and synthetically + generating pre-training data, collecting contextually grounded + prompts, and creating evaluation datasets that reflect India’s rich + linguistic tapestry. Collecting and annotating data at this scale + demands standardization of processes and tools. To meet this + challenge, AI4Bharat has invested in developing various open-source + data collection and annotation tools, aiming to enhance these + efforts not only within India but also in multilingual regions + across the globe. + + Explore Datasets + - - - - - - - ASR - - - - - - LLM - - - - - - NMT - - - - - - TTS - - - - - - XLIT - - - + + + + + + + + ASR + + + + + + + + LLMs + + + + + + + + NMT + + + + + + + - - {/* ASR Tab Panel */} - - {isLoading ? ( - Loading... - ) : ( - Object.entries(datasetIcons).map(([key, val]) => - key === "asr" ? ( - <> - - {val} - - {datasets.map((dataset) => ( - <> - {dataset.area.toLowerCase() === key ? ( - - - - ) : null} - - ))} - - - - - ) : null - ) - )} - + TTS + + + + + + + + XLIT + + + + - {/* LLM Tab Panel */} - - {isLoading ? ( - Loading... - ) : ( - Object.entries(datasetIcons).map(([key, val]) => - key === "llm" ? ( - <> - - {val} - - {datasets.map((dataset) => ( - <> - {dataset.area.toLowerCase() === key ? ( - - - - ) : null} - - ))} - - - - - ) : null - ) - )} - + + {/* ASR Tab Panel */} + + {isLoading ? ( + Loading... + ) : ( + Object.entries(datasetIcons).map(([key, val]) => + key === "asr" ? ( + <> + + {val} + + {datasets.map((dataset) => ( + <> + {dataset.area.toLowerCase() === key ? ( + + + + ) : null} + + ))} + + + + + ) : null + ) + )} + - - {/* NMT Tab Panel */} - - - {isLoading ? ( - Loading... - ) : ( - Object.entries(datasetIcons).map(([key, val]) => - key === "nmt" ? ( - <> - - {val} - - {datasets.map((dataset) => ( - <> - {dataset.area.toLowerCase() === key ? ( - - - - ) : null} - - ))} - - - - - ) : null - ) - )} - + {/* LLM Tab Panel */} + + {isLoading ? ( + Loading... + ) : ( + Object.entries(datasetIcons).map(([key, val]) => + key === "llm" ? ( + <> + + {val} + + {datasets.map((dataset) => ( + <> + {dataset.area.toLowerCase() === key ? ( + + + + ) : null} + + ))} + + + + + ) : null + ) + )} + - {/* TTS Tab Panel */} - - {isLoading ? ( - Loading... - ) : ( - Object.entries(datasetIcons).map(([key, val]) => - key === "tts" ? ( - <> - - {val} - - {datasets.map((dataset) => ( - <> - {dataset.area.toLowerCase() === key ? ( - - - - ) : null} - - ))} - - - - - ) : null - ) - )} - - - {/* XLIT Tab Panel */} - - {isLoading ? ( - Loading... - ) : ( - Object.entries(datasetIcons).map(([key, val]) => - key === "xlit" ? ( - <> - - {val} - - {datasets.map((dataset) => ( - <> - {dataset.area.toLowerCase() === key ? ( - - - - ) : null} - - ))} - - - - - ) : null - ) - )} - - - - + {/* NMT Tab Panel */} + + + {isLoading ? ( + Loading... + ) : ( + Object.entries(datasetIcons).map(([key, val]) => + key === "nmt" ? ( + <> + + {val} + + {datasets.map((dataset) => ( + <> + {dataset.area.toLowerCase() === key ? ( + + + + ) : null} + + ))} + + + + + ) : null + ) + )} + + + {/* TTS Tab Panel */} + + {isLoading ? ( + Loading... + ) : ( + Object.entries(datasetIcons).map(([key, val]) => + key === "tts" ? ( + <> + + {val} + + {datasets.map((dataset) => ( + <> + {dataset.area.toLowerCase() === key ? ( + + + + ) : null} + + ))} + + + + + ) : null + ) + )} + + + {/* XLIT Tab Panel */} + + {isLoading ? ( + Loading... + ) : ( + Object.entries(datasetIcons).map(([key, val]) => + key === "xlit" ? ( + <> + + {val} + + {datasets.map((dataset) => ( + <> + {dataset.area.toLowerCase() === key ? ( + + + + ) : null} + + ))} + + + + + ) : null + ) + )} + + + + ); }