From 81962fa2313bcc43ae5ac79e45e19185ac4a9aae Mon Sep 17 00:00:00 2001 From: louyk18 <14280048+louyk18@user.noreply.gitee.com> Date: Wed, 22 Jan 2025 16:48:44 +0800 Subject: [PATCH] update --- .../src/components/CypherQuery/index.tsx | 2 +- .../components/Next/Neighbors/TableView.tsx | 14 +- .../src/components/Report/Intention.tsx | 20 ++- .../src/components/Report/Text.tsx | 31 ++++- .../src/components/Report/Write.tsx | 121 ++++-------------- .../src/components/Report/const.tsx | 7 + .../src/components/Report/index.tsx | 5 + 7 files changed, 89 insertions(+), 111 deletions(-) create mode 100644 packages/studio-explore/src/components/Report/const.tsx diff --git a/packages/studio-explore/src/components/CypherQuery/index.tsx b/packages/studio-explore/src/components/CypherQuery/index.tsx index 308c41b7..83ea341e 100644 --- a/packages/studio-explore/src/components/CypherQuery/index.tsx +++ b/packages/studio-explore/src/components/CypherQuery/index.tsx @@ -25,7 +25,7 @@ const CypherQuery: React.FunctionComponent = props => { - {report && {report}} - - ); -}; const WriteReport: React.FunctionComponent< SummaryType & { task: string; @@ -250,6 +170,7 @@ const WriteReport: React.FunctionComponent< content: GET_REPORT_PROMPTS_BY_SECTION_INTRO_EN(task, 50, JSON.stringify(categoriesWithoutChildren)), }), ]); + const intro_text = intro_res.message.content; let section_no = 0; let already_sec = ""; @@ -262,14 +183,15 @@ const WriteReport: React.FunctionComponent< task, JSON.stringify(category), JSON.stringify(section_no), - SECTION_CONSTANT_EXAMPLE_EN + SECTION_CONSTANT_EXAMPLE_EN, + intro_text ), }), ]); already_sec = already_sec + '\n' + res.message.content; } - already_sec = intro_res.message.content + "\n" + already_sec; + already_sec = intro_text + "\n" + already_sec; setState(preState => { @@ -281,11 +203,22 @@ const WriteReport: React.FunctionComponent< }); }; + const handleDownloadMindmap = ()=>{ + Utils.createDownload(JSON.stringify(categories,null,2),'mindmap.json') + } + return ( - + + {report && } diff --git a/packages/studio-explore/src/components/Report/const.tsx b/packages/studio-explore/src/components/Report/const.tsx new file mode 100644 index 00000000..5f2dde21 --- /dev/null +++ b/packages/studio-explore/src/components/Report/const.tsx @@ -0,0 +1,7 @@ +export const test_report = ` +## 2.1 Language Model Improvements\n\nRecent advancements in language models have focused on enhancing capabilities through novel training techniques, scaling methods, and performance improvements. Several studies explore efficient text classification methods, demonstrating that simple baselines can rival deep learning classifiers \\cite{360287970189639680,123}. Meanwhile, compact yet powerful models have been developed for deployment on resource-constrained devices like smartphones \\cite{360287970189639681}. These models leverage advanced pretraining datasets and alignment techniques to achieve state-of-the-art results while maintaining small footprints.\n\nEfficient text classification methods have shown that speed and accuracy can coexist. For instance, fastText \\cite{360287970189639680} trains on over one billion words in less than ten minutes using a standard CPU and classifies sentences among hundreds of thousands of classes rapidly. Compact models such as phi-3-mini \\cite{360287970189639681} further push the boundaries by achieving high performance with fewer parameters, enabling deployment on mobile devices. Both approaches emphasize the importance of scalability without compromising on performance.\n\nIn addition, several works focus on improving few-shot learning and prompt optimization. Fantastically Ordered Prompts \\cite{360287970189639684} addresses the sensitivity of few-shot prompts, showing that the order of provided samples significantly impacts performance. By analyzing permutations of prompts, this study identifies optimal configurations that yield near-state-of-the-art results across various text classification tasks. Similarly, iterative self-refinement techniques \\cite{360287970189639702} allow models to improve outputs through feedback loops, leading to better task performance without additional training data or reinforcement learning.\n\nInstruction tuning has emerged as a critical method for enhancing zero-shot learning capabilities. Finetuned models exhibit substantial improvements on unseen tasks when trained on diverse instruction datasets \\cite{360287970189639771}. This approach not only boosts performance but also ensures robustness across different domains. Hierarchical instruction prioritization \\cite{360287970189639802} further refines this process by introducing an instruction hierarchy that teaches models to prioritize system prompts over user inputs, increasing security and reliability.\n\nMoreover, recent studies delve into the challenges of memorization and bias in large language models. Quantifying memorization \\cite{360287970189639714} highlights the risks associated with model capacity and duplicated training examples, emphasizing the need for active mitigations. Domain-specific data upsampling \\cite{360287970189639711} offers a solution by strategically increasing the representation of domain-specific datasets during training, leading to significant performance gains on challenging benchmarks.\n\nOverall, these advancements underscore the ongoing efforts to make language models more efficient, versatile, and reliable. By addressing key challenges such as scalability, few-shot learning, and ethical considerations, researchers are paving the way for more capable and responsible AI systems. + +## 2.2 Mathematical Reasoning in Language Models\n\nThe mathematical reasoning capabilities of language models have seen significant advancements, particularly through the development of benchmarks, evaluation methods, and techniques to enhance these abilities. Several studies explore how large language models (LLMs) can effectively perform multi-step reasoning tasks, often using chain-of-thought prompting \\cite{360287970189639687}. These approaches enable LLMs to solve complex math problems in multilingual settings, demonstrating strong reasoning abilities even in underrepresented languages such as Bengali and Swahili.\n\nMetacognitive knowledge in LLMs has also been explored, revealing that models can assign skill labels to math questions, improving accuracy when solving problems by identifying relevant skills \\cite{360287970189639692}. This method leverages prompt-guided interaction to make reasoning processes more interpretable. Additionally, iterative preference learning has been introduced to enhance reasoning via Monte Carlo Tree Search, showing substantial improvements in performance on arithmetic and commonsense reasoning tasks \\cite{360287970189639694}.\n\nChain-of-thought prompting has been shown to significantly improve performance on challenging tasks from the BIG-Bench Hard suite, where prior evaluations underestimated model capabilities. Applying CoT enables emergent task performance, especially with larger models \\cite{360287970189639699}. Moreover, robustness in mathematical reasoning is evaluated through adversarial datasets like GSM-Plus, which introduce various perturbations to test the consistency of LLMs' problem-solving abilities \\cite{360287970189639708}.\n\nCommon 7B language models already exhibit strong mathematical capabilities without extensive pre-training, highlighting the potential for smaller models to perform well with appropriate fine-tuning and synthetic data \\cite{360287970189639709}. Verifiers have also been developed to judge the correctness of model completions, enhancing performance on grade school math word problems \\cite{360287970189639797}. \n\nEnhancing mathematical reasoning further, WizardMath uses Reinforcement Learning from Evol-Instruct Feedback (RLEIF) to boost performance on mathematical benchmarks without external tools, achieving impressive results \\cite{360287970189639805}. DeepSeekMath, another approach, continues pre-training with math-related tokens and introduces Group Relative Policy Optimization to achieve competitive scores on MATH benchmarks \\cite{360287970189639829}.\n\nSmaller models like Orca-Math demonstrate that high accuracy on GSM8K can be achieved without multiple model calls or external tools, thanks to a high-quality synthetic dataset and iterative learning techniques \\cite{360287970189639842}. The integration of external tools and computation libraries into reasoning agents, as proposed by ToRA, also shows significant improvements in solving complex mathematical problems \\cite{360287970189639874}.\n\nIn multilingual contexts, research has addressed the scarcity of training data by constructing multilingual datasets, leading to models that outperform conventional LLMs and even surpass ChatGPT in few-shot scenarios \\cite{360287970189639896}. Overall, these advancements underscore the growing sophistication of LLMs in handling diverse mathematical reasoning tasks across multiple languages and domains. + +## 2.3 Multimodal Understanding\n\nThe integration of multiple modalities (e.g., text, image, video) in AI models has seen significant advancements, focusing on tasks like visual understanding, audio-visual comprehension, and cross-modal learning. Recent research highlights the importance of these capabilities for enhancing model performance across various applications.\n\nSeveral studies have explored methods to improve visual grounding in large multimodal models (LMMs). One approach uses interactive segmentation models to partition images into regions with varying levels of granularity, overlaying them with marks such as alphanumerics or masks \\cite{360287970189639683}. This method, referred to as Set-of-Mark (SoM) prompting, significantly boosts zero-shot performance on fine-grained vision and multimodal tasks, outperforming fully-finetuned models in referring expression comprehension and segmentation \\cite{360287970189639683}.\n\nAnother key area is the development of multimodal frameworks that empower LLMs with both visual and auditory comprehension capabilities. For instance, a multi-modal framework bootstraps cross-modal training from pre-trained visual and audio encoders, addressing challenges such as capturing temporal changes in visual scenes and integrating audio-visual signals \\cite{360287970189639685}. This framework introduces Video Q-former to assemble pre-trained image encoders into a video encoder and leverages ImageBind for reasonable auditory query embeddings, demonstrating strong performance in video comprehension and generating meaningful responses grounded in visual and auditory information \\cite{360287970189639685}.\n\nUnified visual representation learning has also been a focus, where models aim to align visual features into a language feature space to enhance multi-modal interactions. One study establishes a robust baseline by unifying the visual representation of images and videos, mutually benefiting each other within a unified visual representation \\cite{360287970189639704}. This approach achieves superior performance on a broad range of benchmarks, outperforming models designed specifically for images or videos \\cite{360287970189639704}.\n\nBenchmarking multimodal video models is crucial for evaluating their perception and reasoning skills. A novel benchmark, Perception Test, evaluates models on skills such as Memory, Abstraction, Physics, Semantics, and types of reasoning including descriptive, explanatory, predictive, and counterfactual \\cite{360287970189639712}. This benchmark includes densely annotated real-world videos, providing a comprehensive evaluation tool for multimodal models \\cite{360287970189639712}.\n\nVision transformers have shown promise in handling complex visual tasks. Vision Transformer (ViT) applies a pure transformer architecture directly to sequences of image patches, achieving excellent results on image classification tasks while requiring fewer computational resources compared to convolutional networks \\cite{360287970189639700}. ViT's effectiveness is further demonstrated through its superior performance on various mid-sized or small image recognition benchmarks \\cite{360287970189639700}.\n\nEfforts to create versatile multimodal models capable of performing diverse tasks are ongoing. Florence-2, a vision foundation model, uses a unified, prompt-based representation for a variety of computer vision and vision-language tasks \\cite{360287970189639715}. By employing a sequence-to-sequence structure and utilizing large-scale, high-quality annotated data, Florence-2 demonstrates strong zero-shot and fine-tuning capabilities across numerous tasks \\cite{360287970189639715}.\n\nIn summary, recent advancements in multimodal understanding have led to more sophisticated models that can integrate and process multiple forms of data effectively. These developments not only enhance the performance of individual tasks but also pave the way for more generalized and versatile AI systems \\cite{360287970189639683, 360287970189639685, 360287970189639704, 360287970189639712, 360287970189639700, 360287970189639715}. +` \ No newline at end of file diff --git a/packages/studio-explore/src/components/Report/index.tsx b/packages/studio-explore/src/components/Report/index.tsx index 94f492cd..cca82559 100644 --- a/packages/studio-explore/src/components/Report/index.tsx +++ b/packages/studio-explore/src/components/Report/index.tsx @@ -7,6 +7,8 @@ import { GraphSchema, useContext } from '@graphscope/studio-graph'; import Intention from './Intention'; import Setting from '../Copilot/setting'; import { getPrompt } from './utils'; +import ReportText from './Text' +import {test_report} from './const' interface IReportProps {} const GET_DATA_FILTER_RULES_EN = (user_query: string, schema: any) => { @@ -112,10 +114,13 @@ const Report: React.FunctionComponent = props => { Input your intention + {/* */} {intention && } + + ); };