From 81962fa2313bcc43ae5ac79e45e19185ac4a9aae Mon Sep 17 00:00:00 2001
From: louyk18 <14280048+louyk18@user.noreply.gitee.com>
Date: Wed, 22 Jan 2025 16:48:44 +0800
Subject: [PATCH] update

---
 .../src/components/CypherQuery/index.tsx      |   2 +-
 .../components/Next/Neighbors/TableView.tsx   |  14 +-
 .../src/components/Report/Intention.tsx       |  20 ++-
 .../src/components/Report/Text.tsx            |  31 ++++-
 .../src/components/Report/Write.tsx           | 121 ++++--------------
 .../src/components/Report/const.tsx           |   7 +
 .../src/components/Report/index.tsx           |   5 +
 7 files changed, 89 insertions(+), 111 deletions(-)
 create mode 100644 packages/studio-explore/src/components/Report/const.tsx
diff --git a/packages/studio-explore/src/components/CypherQuery/index.tsx b/packages/studio-explore/src/components/CypherQuery/index.tsx
index 308c41b7..83ea341e 100644
--- a/packages/studio-explore/src/components/CypherQuery/index.tsx
+++ b/packages/studio-explore/src/components/CypherQuery/index.tsx
@@ -25,7 +25,7 @@ const CypherQuery: React.FunctionComponent<IStatementQueryProps> = props => {
       <Input.TextArea
         rows={10}
         ref={inputRef}
-        defaultValue={`Match (a)-[r]-(b) return a,r,b limit 100`}
+        defaultValue={`Match (a:Paper)-[r:Reference]-(b:Paper) Where a.title contains "The Llama 3" return b`}
       ></Input.TextArea>
       <Space>
         <Button icon={<PlayCircleOutlined />} block onClick={handleQuery}>
diff --git a/packages/studio-explore/src/components/Next/Neighbors/TableView.tsx b/packages/studio-explore/src/components/Next/Neighbors/TableView.tsx
index 25bbd9d8..50970791 100644
--- a/packages/studio-explore/src/components/Next/Neighbors/TableView.tsx
+++ b/packages/studio-explore/src/components/Next/Neighbors/TableView.tsx
@@ -21,8 +21,8 @@ export interface IPropertiesPanelProps {
 const TableView: React.FunctionComponent<IPropertiesPanelProps> = props => {
   const { items, counts, name, onQuery, containerRef } = props;
   const { dataSource, columns } = getTable([...items]);
-  // const defaultSelectedRowKeys = dataSource.map(item => item.key);
-  const [selectedRowKeys, setSelectedRowKeys] = useState<React.Key[]>([]);
+  const defaultSelectedRowKeys = dataSource.map(item => item.key);
+  const [selectedRowKeys, setSelectedRowKeys] = useState<React.Key[]>(defaultSelectedRowKeys);
   // const containerRef = React.useRef<HTMLDivElement>(null);
 
   /** filter cloumns */
@@ -41,11 +41,11 @@ const TableView: React.FunctionComponent<IPropertiesPanelProps> = props => {
   };
   /** filter cloumns end */
 
-  // useEffect(() => {
-  //   const { dataSource } = getTable([...items]);
-  //   const defaultSelectedRowKeys = dataSource.map(item => item.key);
-  //   setSelectedRowKeys(defaultSelectedRowKeys);
-  // }, [items]);
+  useEffect(() => {
+    const { dataSource } = getTable([...items]);
+    const defaultSelectedRowKeys = dataSource.map(item => item.key);
+    setSelectedRowKeys(defaultSelectedRowKeys);
+  }, [items]);
 
   const onSelectChange = (newSelectedRowKeys: React.Key[]) => {
     setSelectedRowKeys(newSelectedRowKeys);
diff --git a/packages/studio-explore/src/components/Report/Intention.tsx b/packages/studio-explore/src/components/Report/Intention.tsx
index 47d31486..9a801670 100644
--- a/packages/studio-explore/src/components/Report/Intention.tsx
+++ b/packages/studio-explore/src/components/Report/Intention.tsx
@@ -1,4 +1,4 @@
-import { Flex, Typography, Button, Divider, Select, Timeline } from 'antd';
+import { Flex, Typography, Button, Divider, Select, Timeline,notification } from 'antd';
 import React, { useState } from 'react';
 import { OpenAIOutlined } from '@ant-design/icons';
 import { query } from '../Copilot/query';
@@ -349,7 +349,23 @@ const Intention: React.FunctionComponent<IReportProps> = props => {
             content: current_prompt,
           }),
         ]);
-        res = JSON.parse(_res.message.content);
+
+        if(_res.message.content){
+          res = JSON.parse(_res.message.content);
+        }else{
+          notification.error({
+            message:"network error"
+          })
+          setState(preState => {
+            return {
+              ...preState,
+              loading: false,
+              summary: null,
+            };
+          });
+          return ;
+        }
+       
       }
 
       const data_ids = res.data.map(item => item.data_id.toString());
diff --git a/packages/studio-explore/src/components/Report/Text.tsx b/packages/studio-explore/src/components/Report/Text.tsx
index 45e009b0..5a1c8ae8 100644
--- a/packages/studio-explore/src/components/Report/Text.tsx
+++ b/packages/studio-explore/src/components/Report/Text.tsx
@@ -25,16 +25,33 @@ const ReportText = (props: { report: string; enableBib?: boolean }) => {
         const bibKeyMatch = bib.match(bibKeyRegex);
         const bibKey = bibKeyMatch ? bibKeyMatch[1] : null;
         nodesMap.set(id, { ...properties, [bib_key]: bibKey });
+        //@ts-ignore
+        window.nodesMap = nodesMap
       });
       // 替换 cite id 为 bibKey
-      const regex = /\\cite\{(\d+)\}/g;
-      const replacedText = report.replace(regex, (match, id) => {
-        const bibKey = nodesMap.get(id)[bib_key];
-        if (bibKey) {
-          bibs[id] = nodesMap.get(id)[property_key_of_bib];
-          return `\\cite{${bibKey}}`;
+      const regex = /\\cite\{(.*?)\}/g;
+      const replacedText = report.replace(regex, (match, ids) => {
+      
+        const bibKeys = ids.split(',').map((_id)=>{
+          const id = (_id as String).trim()
+          if(!nodesMap.get(id)){
+            console.log( 'missing id',id,nodesMap,ids)
+            debugger
+            
+          }
+          const node = nodesMap.get(id) || {};
+          const bibKey = node[bib_key];
+          if(bibKey){
+            bibs[id] = node[property_key_of_bib];
+          }else{
+            bibs[id] = `@article{${id}} is missing info ,Title={${node['title']}},`;
+          }
+          return bibKey;
+        })
+      
+        if (bibKeys.length>0) {
+          return `\\cite{${bibKeys.join(',')}}`;
         } else {
-          bibs[id] = `@article{${id}} is missing info ,Title={${nodesMap.get(id)['title']}},`;
           // 如果找到 bibKey，则替换；否则保留原内容
           return match;
         }
diff --git a/packages/studio-explore/src/components/Report/Write.tsx b/packages/studio-explore/src/components/Report/Write.tsx
index 9022e5d1..280a3607 100644
--- a/packages/studio-explore/src/components/Report/Write.tsx
+++ b/packages/studio-explore/src/components/Report/Write.tsx
@@ -6,6 +6,7 @@ import ReactMarkdown from 'react-markdown';
 import type { SummaryType } from './Intention';
 import { getPrompt } from './utils';
 import ReportText from './Text';
+import { Utils } from '@graphscope/studio-components';
 
 const SECTION_CONSTANT_EXAMPLE_EN = () => {
   return `
@@ -24,7 +25,7 @@ significantly contributed to the development of GQL, the emerging standard for q
 The increasing demand for Cypher integration into various systems, including GraphScope, alongside the
 standardization of ISO/GQL \\cite{[Id]}, highlights the evolving nature of graph querying interfaces.
 Additionally, many graph databases offer the capability to register custom stored procedures for
-enhanced querying functionality.
+enhanced querying functionality. Some literatures summarize this kind of graph databases \\cite{[Id1], [Id2], [Id3]}.
   `;
 };
 
@@ -37,23 +38,28 @@ ${example}
 };
 
 
-const GET_REPORT_PROMPTS_BY_SECTION_TEXT_EN = (user_query, category, section_id, example) => {
+const GET_REPORT_PROMPTS_BY_SECTION_TEXT_EN = (user_query, category, section_id, example, intro_text) => {
   return `
-You are a highly skilled AI assistant. Given a user input and a category of data, your task is to write a corresponding subsection (subsection 2.${section_id}) in a report based on the given data that belongs to the same category.
+You are a highly skilled AI assistant. Given a user input and a category of data, your task is to write a corresponding subsection (subsection 2.${section_id}) in a report based on the given data that belongs to the same category. The introduction part of this report is as follows:
+${intro_text}
 
 User Input: ${user_query}
 Category: ${category}
 
-In the input category, the data belonging to this category are stored in the 'children' field.
+In the input category, the data belonging to this category are stored in the 'children' field. For each data, there are two fields, i.e., 'name' and 'description.
 
 Guidance:
-- In each subsection, you must mention and cite as many data belonging to this category (i.e., those in the 'children' field) as possible. 
+- In this subsection, you must mention and cite AS MANY data belonging to this category (i.e., those in the 'children' field) AS POSSIBLE. It's best to mention and cite all the data in the 'children' field.
 For each mentioned data, you need to explain its relationship with the category corresponding to the current subsection. When introducing each mentioned data, use at most two sentences (within about 50 words).
+- If the report is a related work, do not directly write the title/name of the papers in the report.
+- Sometimes, you can describe the commonalities of related data in 1-2 sentences and cite these works, which also counts as mentioning them.
 - For each mentioned data, it would be better for you to also explain its relationship with other data if possible.
 - Every time a data is first mentioned in the text, add a citation in the format \\cite{Id} and you must not use \\cite as the subject of the sentence. For example, suppose the [Id] of data p1 is 0000.0000, then the citation should be added as \\cite{0000.0000}.
 If the data presents a certain viewpoint or method M, the sentence may be "M \\cite{0000.0000} ...". If there is author or source information A for the data, the sentence may be "A~\\cite{0000.0000} proposes that ...". If several data with ids [Id_1], ... [Id_k] are related to an item or topic T, then the sentence may be "Topic T is well studied \\cite{Id_1, ..., Id_k} ...".
 Anyway, sentence "\\cite{0000.0000} proposes ..." is NOT allowed. 
 - Avoid using a whole paragraph to describe one piece of data, nor should you list the data one by one. Instead, appropriately describe the connections between data.
+- In this subsection, when introducing data, it's best to present similar and strongly related data close to each other and describe their commonalities and differences.
+
 
 ${example}
 `;
@@ -132,93 +138,7 @@ const GET_REPORT_PROMPTS_2 = (user_query, mind_map) => {
   
   `;
 };
-const WriteReportBySection: React.FunctionComponent<
-  SummaryType & {
-    task: string;
-  }
-> = props => {
-  const { categories, task } = props;
-  const [state, setState] = useState({
-    loading: false,
-    report: '',
-  });
-  const { loading, report } = state;
-
-  const handleClick = async () => {
-    setState(preState => {
-      return {
-        ...preState,
-        loading: true,
-      };
-    });
-
-    const max_token_per_subsection = 200;
-    let subsection_id = 0;
-    let total_text = '';
-
-    for (let category_info of categories) {
-      subsection_id += 1;
-      if (subsection_id === 1) {
-        const res_section = await query([
-          new Message({
-            role: 'user',
-            content: GET_REPORT_PROMPTS_BY_SECTION_TEXT_EN(
-              task,
-              JSON.stringify(category_info),
-              max_token_per_subsection.toString(),
-              SECTION_CONSTANT_EXAMPLE_EN,
-            ),
-          }),
-        ]);
-        total_text += res_section.message.content;
-      } else {
-        const res_section = await query([
-          new Message({
-            role: 'user',
-            content: GET_REPORT_PROMPTS_BY_SECTION_TEXT_EN(
-              task,
-              JSON.stringify(category_info),
-              max_token_per_subsection.toString(),
-              SECTION_PREVIOUS_EXAMPLE_EN(total_text),
-            ),
-          }),
-        ]);
-        total_text += res_section.message.content;
-      }
-    }
-
-    const res = await query([
-      new Message({
-        role: 'user',
-        content: GET_REPORT_PROMPTS_BY_SECTION_INTRO_EN(task, 50, total_text),
-      }),
-    ]);
-
-    // const res = await query([
-    //   new Message({
-    //     role: 'user',
-    //     content: GET_REPORT_PROMPTS_CHN(task, JSON.stringify(category)),
-    //   }),
-    // ]);
-
-    setState(preState => {
-      return {
-        ...preState,
-        loading: false,
-        report: res.message.content,
-      };
-    });
-  };
 
-  return (
-    <Flex vertical gap={12}>
-      <Button onClick={handleClick} loading={loading}>
-        Write Report
-      </Button>
-      {report && <ReactMarkdown>{report}</ReactMarkdown>}
-    </Flex>
-  );
-};
 const WriteReport: React.FunctionComponent<
   SummaryType & {
     task: string;
@@ -250,6 +170,7 @@ const WriteReport: React.FunctionComponent<
         content: GET_REPORT_PROMPTS_BY_SECTION_INTRO_EN(task, 50, JSON.stringify(categoriesWithoutChildren)),
       }),
     ]);
+    const intro_text = intro_res.message.content;
 
     let section_no = 0;
     let already_sec = "";
@@ -262,14 +183,15 @@ const WriteReport: React.FunctionComponent<
             task,
             JSON.stringify(category),
             JSON.stringify(section_no),
-            SECTION_CONSTANT_EXAMPLE_EN
+            SECTION_CONSTANT_EXAMPLE_EN,
+            intro_text
           ),
         }),
       ]);
       already_sec = already_sec + '\n' + res.message.content;
     }
 
-    already_sec = intro_res.message.content + "\n" + already_sec;
+    already_sec = intro_text + "\n" + already_sec;
 
 
     setState(preState => {
@@ -281,11 +203,22 @@ const WriteReport: React.FunctionComponent<
     });
   };
 
+  const handleDownloadMindmap = ()=>{
+    Utils.createDownload(JSON.stringify(categories,null,2),'mindmap.json')
+  }
+
   return (
     <Flex vertical gap={12}>
-      <Button onClick={handleClick} loading={loading}>
+      <Flex>
+
+    
+      <Button onClick={handleDownloadMindmap}  >
+        Download Mindmap
+      </Button>
+      <Button onClick={handleClick} loading={loading} type='primary'>
         Write Report
       </Button>
+      </Flex>
 
       {report && <ReportText report={report} enableBib />}
     </Flex>
diff --git a/packages/studio-explore/src/components/Report/const.tsx b/packages/studio-explore/src/components/Report/const.tsx
new file mode 100644
index 00000000..5f2dde21
--- /dev/null
+++ b/packages/studio-explore/src/components/Report/const.tsx
@@ -0,0 +1,7 @@
+export const test_report = `
+## 2.1 Language Model Improvements\n\nRecent advancements in language models have focused on enhancing capabilities through novel training techniques, scaling methods, and performance improvements. Several studies explore efficient text classification methods, demonstrating that simple baselines can rival deep learning classifiers \\cite{360287970189639680,123}. Meanwhile, compact yet powerful models have been developed for deployment on resource-constrained devices like smartphones \\cite{360287970189639681}. These models leverage advanced pretraining datasets and alignment techniques to achieve state-of-the-art results while maintaining small footprints.\n\nEfficient text classification methods have shown that speed and accuracy can coexist. For instance, fastText \\cite{360287970189639680} trains on over one billion words in less than ten minutes using a standard CPU and classifies sentences among hundreds of thousands of classes rapidly. Compact models such as phi-3-mini \\cite{360287970189639681} further push the boundaries by achieving high performance with fewer parameters, enabling deployment on mobile devices. Both approaches emphasize the importance of scalability without compromising on performance.\n\nIn addition, several works focus on improving few-shot learning and prompt optimization. Fantastically Ordered Prompts \\cite{360287970189639684} addresses the sensitivity of few-shot prompts, showing that the order of provided samples significantly impacts performance. By analyzing permutations of prompts, this study identifies optimal configurations that yield near-state-of-the-art results across various text classification tasks. Similarly, iterative self-refinement techniques \\cite{360287970189639702} allow models to improve outputs through feedback loops, leading to better task performance without additional training data or reinforcement learning.\n\nInstruction tuning has emerged as a critical method for enhancing zero-shot learning capabilities. Finetuned models exhibit substantial improvements on unseen tasks when trained on diverse instruction datasets \\cite{360287970189639771}. This approach not only boosts performance but also ensures robustness across different domains. Hierarchical instruction prioritization \\cite{360287970189639802} further refines this process by introducing an instruction hierarchy that teaches models to prioritize system prompts over user inputs, increasing security and reliability.\n\nMoreover, recent studies delve into the challenges of memorization and bias in large language models. Quantifying memorization \\cite{360287970189639714} highlights the risks associated with model capacity and duplicated training examples, emphasizing the need for active mitigations. Domain-specific data upsampling \\cite{360287970189639711} offers a solution by strategically increasing the representation of domain-specific datasets during training, leading to significant performance gains on challenging benchmarks.\n\nOverall, these advancements underscore the ongoing efforts to make language models more efficient, versatile, and reliable. By addressing key challenges such as scalability, few-shot learning, and ethical considerations, researchers are paving the way for more capable and responsible AI systems.
+
+## 2.2 Mathematical Reasoning in Language Models\n\nThe mathematical reasoning capabilities of language models have seen significant advancements, particularly through the development of benchmarks, evaluation methods, and techniques to enhance these abilities. Several studies explore how large language models (LLMs) can effectively perform multi-step reasoning tasks, often using chain-of-thought prompting \\cite{360287970189639687}. These approaches enable LLMs to solve complex math problems in multilingual settings, demonstrating strong reasoning abilities even in underrepresented languages such as Bengali and Swahili.\n\nMetacognitive knowledge in LLMs has also been explored, revealing that models can assign skill labels to math questions, improving accuracy when solving problems by identifying relevant skills \\cite{360287970189639692}. This method leverages prompt-guided interaction to make reasoning processes more interpretable. Additionally, iterative preference learning has been introduced to enhance reasoning via Monte Carlo Tree Search, showing substantial improvements in performance on arithmetic and commonsense reasoning tasks \\cite{360287970189639694}.\n\nChain-of-thought prompting has been shown to significantly improve performance on challenging tasks from the BIG-Bench Hard suite, where prior evaluations underestimated model capabilities. Applying CoT enables emergent task performance, especially with larger models \\cite{360287970189639699}. Moreover, robustness in mathematical reasoning is evaluated through adversarial datasets like GSM-Plus, which introduce various perturbations to test the consistency of LLMs' problem-solving abilities \\cite{360287970189639708}.\n\nCommon 7B language models already exhibit strong mathematical capabilities without extensive pre-training, highlighting the potential for smaller models to perform well with appropriate fine-tuning and synthetic data \\cite{360287970189639709}. Verifiers have also been developed to judge the correctness of model completions, enhancing performance on grade school math word problems \\cite{360287970189639797}. \n\nEnhancing mathematical reasoning further, WizardMath uses Reinforcement Learning from Evol-Instruct Feedback (RLEIF) to boost performance on mathematical benchmarks without external tools, achieving impressive results \\cite{360287970189639805}. DeepSeekMath, another approach, continues pre-training with math-related tokens and introduces Group Relative Policy Optimization to achieve competitive scores on MATH benchmarks \\cite{360287970189639829}.\n\nSmaller models like Orca-Math demonstrate that high accuracy on GSM8K can be achieved without multiple model calls or external tools, thanks to a high-quality synthetic dataset and iterative learning techniques \\cite{360287970189639842}. The integration of external tools and computation libraries into reasoning agents, as proposed by ToRA, also shows significant improvements in solving complex mathematical problems \\cite{360287970189639874}.\n\nIn multilingual contexts, research has addressed the scarcity of training data by constructing multilingual datasets, leading to models that outperform conventional LLMs and even surpass ChatGPT in few-shot scenarios \\cite{360287970189639896}. Overall, these advancements underscore the growing sophistication of LLMs in handling diverse mathematical reasoning tasks across multiple languages and domains.
+
+## 2.3 Multimodal Understanding\n\nThe integration of multiple modalities (e.g., text, image, video) in AI models has seen significant advancements, focusing on tasks like visual understanding, audio-visual comprehension, and cross-modal learning. Recent research highlights the importance of these capabilities for enhancing model performance across various applications.\n\nSeveral studies have explored methods to improve visual grounding in large multimodal models (LMMs). One approach uses interactive segmentation models to partition images into regions with varying levels of granularity, overlaying them with marks such as alphanumerics or masks \\cite{360287970189639683}. This method, referred to as Set-of-Mark (SoM) prompting, significantly boosts zero-shot performance on fine-grained vision and multimodal tasks, outperforming fully-finetuned models in referring expression comprehension and segmentation \\cite{360287970189639683}.\n\nAnother key area is the development of multimodal frameworks that empower LLMs with both visual and auditory comprehension capabilities. For instance, a multi-modal framework bootstraps cross-modal training from pre-trained visual and audio encoders, addressing challenges such as capturing temporal changes in visual scenes and integrating audio-visual signals \\cite{360287970189639685}. This framework introduces Video Q-former to assemble pre-trained image encoders into a video encoder and leverages ImageBind for reasonable auditory query embeddings, demonstrating strong performance in video comprehension and generating meaningful responses grounded in visual and auditory information \\cite{360287970189639685}.\n\nUnified visual representation learning has also been a focus, where models aim to align visual features into a language feature space to enhance multi-modal interactions. One study establishes a robust baseline by unifying the visual representation of images and videos, mutually benefiting each other within a unified visual representation \\cite{360287970189639704}. This approach achieves superior performance on a broad range of benchmarks, outperforming models designed specifically for images or videos \\cite{360287970189639704}.\n\nBenchmarking multimodal video models is crucial for evaluating their perception and reasoning skills. A novel benchmark, Perception Test, evaluates models on skills such as Memory, Abstraction, Physics, Semantics, and types of reasoning including descriptive, explanatory, predictive, and counterfactual \\cite{360287970189639712}. This benchmark includes densely annotated real-world videos, providing a comprehensive evaluation tool for multimodal models \\cite{360287970189639712}.\n\nVision transformers have shown promise in handling complex visual tasks. Vision Transformer (ViT) applies a pure transformer architecture directly to sequences of image patches, achieving excellent results on image classification tasks while requiring fewer computational resources compared to convolutional networks \\cite{360287970189639700}. ViT's effectiveness is further demonstrated through its superior performance on various mid-sized or small image recognition benchmarks \\cite{360287970189639700}.\n\nEfforts to create versatile multimodal models capable of performing diverse tasks are ongoing. Florence-2, a vision foundation model, uses a unified, prompt-based representation for a variety of computer vision and vision-language tasks \\cite{360287970189639715}. By employing a sequence-to-sequence structure and utilizing large-scale, high-quality annotated data, Florence-2 demonstrates strong zero-shot and fine-tuning capabilities across numerous tasks \\cite{360287970189639715}.\n\nIn summary, recent advancements in multimodal understanding have led to more sophisticated models that can integrate and process multiple forms of data effectively. These developments not only enhance the performance of individual tasks but also pave the way for more generalized and versatile AI systems \\cite{360287970189639683, 360287970189639685, 360287970189639704, 360287970189639712, 360287970189639700, 360287970189639715}.
+`
\ No newline at end of file
diff --git a/packages/studio-explore/src/components/Report/index.tsx b/packages/studio-explore/src/components/Report/index.tsx
index 94f492cd..cca82559 100644
--- a/packages/studio-explore/src/components/Report/index.tsx
+++ b/packages/studio-explore/src/components/Report/index.tsx
@@ -7,6 +7,8 @@ import { GraphSchema, useContext } from '@graphscope/studio-graph';
 import Intention from './Intention';
 import Setting from '../Copilot/setting';
 import { getPrompt } from './utils';
+import ReportText from  './Text'
+import {test_report} from './const'
 interface IReportProps {}
 
 const GET_DATA_FILTER_RULES_EN = (user_query: string, schema: any) => {
@@ -112,10 +114,13 @@ const Report: React.FunctionComponent<IReportProps> = props => {
         Input your intention
       </Typography.Text>
       <Input.TextArea rows={3} defaultValue={task} ref={InputRef}></Input.TextArea>
+      {/* <ReportText report={test_report} enableBib/> */}
       <Button icon={<OpenAIOutlined />} onClick={handleClick} loading={state.loading}>
         Infer Intention
       </Button>
       {intention && <Intention task={task} intention={intention} />}
+
+
     </Flex>
   );
 };