From c5cbb1bd022fba5ddd45f87e9f3d8a81a9a12ba7 Mon Sep 17 00:00:00 2001
From: cfli <545999961@qq.com>
Date: Tue, 26 Nov 2024 11:26:26 +0800
Subject: [PATCH] update examples
---
.../MTEB/AskUbuntuDupQuestions.json | 28 +++++++++++++++----
.../bge-en-icl/MTEB/ClimateFEVER.json | 16 ++++++++---
.../examples/bge-en-icl/MTEB/HotpotQA.json | 12 ++++----
.../bge-en-icl/MTEB/ImdbClassification.json | 10 ++++++-
.../bge-en-icl/MTEB/MedrxivClusteringS2S.json | 20 ++++++-------
.../examples/bge-en-icl/MTEB/NFCorpus.json | 12 ++++----
.../bge-en-icl/MTEB/RedditClustering.json | 4 +++
.../examples/bge-en-icl/MTEB/SCIDOCS.json | 12 +++++---
.../MTEB/StackExchangeClustering.json | 4 +++
.../examples/bge-en-icl/MTEB/SummEval.json | 8 ++----
.../examples/bge-en-icl/MTEB/TRECCOVID.json | 12 +++++---
.../ToxicConversationsClassification.json | 18 ++++++++----
.../MTEB/TwentyNewsgroupsClustering.json | 4 +++
13 files changed, 108 insertions(+), 52 deletions(-)
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/AskUbuntuDupQuestions.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/AskUbuntuDupQuestions.json
index 9c01a895..dd363268 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/AskUbuntuDupQuestions.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/AskUbuntuDupQuestions.json
@@ -1,14 +1,30 @@
[
{
- "query": "angularjs infinite scroll in a container",
- "response": "AngularJS ng-infinite-scroll not working on a specific container/div"
+ "query": "how to reset forgotten password in ubuntu?",
+ "response": " \"forgot password on ubuntu - how to reset?\""
},
{
- "query": "Java: Efficiently converting an array of longs to an array of bytes",
- "response": "Most Compact way to Serialize an Array of Longs in Java"
+ "query": "ubuntu 18.04 freezes after login screen",
+ "response": "ubuntu freezing after login - how to fix?"
},
{
- "query": "PyVISA missing methods",
- "response": "NI VISA + pyVisa on Mac OS X (Snow Leopard)"
+ "query": "how to install NVIDIA drivers on ubuntu 20.04?",
+ "response": "NVIDIA driver installation guide for ubuntu 20.04?"
+ },
+ {
+ "query": "ubuntu won't boot after installing updates",
+ "response": "stuck at ubuntu boot screen after update - solutions?"
+ },
+ {
+ "query": "setting up SSH keys on ubuntu server",
+ "response": "SSH key authentication setup on ubuntu - step-by-step guide?"
+ },
+ {
+ "query": "how to transfer wine configuration to another machine?",
+ "response": " \"how to export and import wine settings?\""
+ },
+ {
+ "query": "how to install precise pangolin from a DVD?",
+ "response": "steps to install precise pangolin using a DVD"
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ClimateFEVER.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ClimateFEVER.json
index 05628a56..c94b3eb9 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ClimateFEVER.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ClimateFEVER.json
@@ -1,10 +1,18 @@
[
{
- "query": "Global warming is causing more frequent and intense hurricanes.",
- "response": "Hurricanes and Climate Change Hurricanes, also known as tropical cyclones or typhoons depending on their location, are powerful and destructive weather systems characterized by strong winds, heavy rainfall, and storm surges. The formation and intensity of hurricanes are influenced by a variety of factors, including sea surface temperatures, atmospheric moisture, and wind patterns. Scientific research indicates that global warming is having a significant impact on these factors, leading to changes in hurricane behavior. As sea surface temperatures rise due to increased greenhouse gas emissions, the energy available for hurricane formation and intensification also increases. This has been linked to an increase in the frequency of the most intense hurricanes, categorized as Category 4 and 5 storms. Additionally, warmer air can hold more moisture, leading to heavier rainfall and greater flooding potential during hurricanes. The Intergovernmental Panel on Climate Change (IPCC) reports that while the total number of hurricanes may not be increasing, there is a clear trend towards more intense and damaging storms in a warming world."
+ "query": "There is strong evidence that solar activity is the main driver of recent global warming.",
+ "response": "Solar activity and its influence on Earth's climate Solar activity, including solar radiation and sunspots, has been a topic of interest in climate science for many years. Changes in solar output can influence Earth's climate in various ways. For example, increased solar radiation can lead to higher temperatures, while variations in the number of sunspots can affect weather patterns. However, studies have shown that solar activity alone cannot account for the significant rise in global temperatures observed over the past century. The Intergovernmental Panel on Climate Change (IPCC) states that while solar variability does have an impact on climate, it is not the dominant factor in recent global warming. The IPCC's assessments, based on multiple lines of evidence, indicate that human activities, particularly the burning of fossil fuels and deforestation, are the primary contributors to the observed warming trend. Climate models that include both natural and anthropogenic factors show that the recent increase in global temperatures cannot be explained by natural forces alone, such as solar activity and volcanic eruptions. This consensus is supported by numerous scientific studies and major scientific organizations worldwide, highlighting the critical role of human influence in driving current climate change."
},
{
- "query": "The Arctic sea ice extent has decreased by nearly 40% since the late 1970s due to global warming.",
- "response": "The Arctic sea ice extent refers to the surface area of the Arctic Ocean covered by sea ice . Observations indicate a significant reduction in Arctic sea ice extent over recent decades . Satellite measurements have shown that the minimum sea ice extent , typically occurring in September , has declined by about 40% since the late 1970s . This decrease is largely attributed to rising global temperatures , which have led to warmer ocean waters and higher air temperatures in the Arctic region . Climate models predict that the Arctic could be nearly ice-free during summer within this century if the current rate of warming continues . This decline in sea ice has profound implications for Arctic ecosystems , global weather patterns , and sea levels ."
+ "query": "Renewable energy sources can completely replace fossil fuels to combat climate change.",
+ "response": "Renewable Energy and Climate Change Mitigation Renewable energy sources, such as solar, wind, hydro, and geothermal power, are essential components of strategies to mitigate climate change. These sources produce little to no greenhouse gas emissions during operation, unlike fossil fuels. The transition to renewable energy is necessary to reduce the concentration of CO2 and other greenhouse gases in the atmosphere, which are the primary drivers of global warming. Several studies have shown that it is technically feasible to meet the world's energy needs with 100% renewable energy. For example, a study by Jacobson et al. (2017) demonstrated that a transition to renewable energy could result in significant reductions in air pollution and associated health benefits. However, challenges such as energy storage, grid stability, and economic factors must be addressed to achieve this goal. Despite these challenges, advancements in technology and decreasing costs of renewable energy sources provide optimism for a sustainable energy future. The International Renewable Energy Agency (IRENA) reports that renewable energy could supply up to 86% of global power by 2050, significantly reducing greenhouse gas emissions and combating climate change. While renewable energy alone may not completely replace fossil fuels immediately, it is a crucial step toward a more sustainable and environmentally friendly energy system."
+ },
+ {
+ "query": "Climate change is causing an increase in the frequency and intensity of hurricanes.",
+ "response": "Hurricanes and Climate Change Research indicates that climate change is contributing to an increase in the frequency and intensity of hurricanes. Warmer ocean temperatures, which are a direct result of global warming, provide more energy for storms, making them stronger and potentially more destructive. According to a 2020 study published in the journal Nature, there has been a significant increase in the number of Category 4 and 5 hurricanes since the 1980s. Additionally, the Intergovernmental Panel on Climate Change (IPCC) has reported that the intensity of tropical cyclones has likely increased over the last four decades. The study attributes this trend to rising sea surface temperatures and changes in atmospheric conditions, such as increased moisture content and altered wind patterns. These changes are consistent with the effects of human-induced climate change. Moreover, the National Oceanic and Atmospheric Administration (NOAA) has observed that the season length of hurricanes has extended, with storms forming earlier and lasting longer than in the past. While natural variability also plays a role in hurricane activity, the overwhelming consensus among scientists is that climate change is exacerbating the severity of these events, leading to greater risks and damages in affected regions."
+ },
+ {
+ "query": "Climate change is not significantly affecting global agricultural productivity.",
+ "response": "Climate Change and Agriculture: A Critical Review Contrary to the claim that climate change is not significantly affecting global agricultural productivity, numerous studies have shown that climate change is having a profound impact on agriculture worldwide. According to the Food and Agriculture Organization (FAO), changes in temperature, precipitation patterns, and the increased frequency of extreme weather events are already affecting crop yields. For instance, higher temperatures can accelerate crop maturation, reduce the time available for grain filling, and ultimately decrease yields. A study published in Nature Climate Change found that global wheat and maize yields have been significantly impacted by rising temperatures, with declines of 6% and 7.4% respectively per degree Celsius increase in temperature. Moreover, climate change exacerbates water scarcity in many regions, leading to reduced irrigation potential and further stressing agricultural systems. The Intergovernmental Panel on Climate Change (IPCC) reports that changing precipitation patterns can cause both droughts and floods, disrupting planting and harvesting cycles and damaging crops. In tropical regions, where agriculture is particularly vulnerable, smallholder farmers face increasing challenges in maintaining productivity due to more frequent and severe weather events. In addition to crop production, climate change affects livestock by altering the availability of feed and water, increasing heat stress, and spreading diseases. The economic impact on agriculture is substantial, with the World Bank estimating that climate change could push more than 100 million people into poverty by 2030, primarily through impacts on agriculture and food security. Therefore, the assertion that climate change is not significantly affecting global agricultural productivity is not supported by the extensive body of scientific evidence documenting the adverse effects of climate change on agriculture."
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/HotpotQA.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/HotpotQA.json
index 246a8a3a..c20d536e 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/HotpotQA.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/HotpotQA.json
@@ -1,14 +1,14 @@
[
{
- "query": "Which tennis player Anna-Lena Gr\u00f6nefeld or Mats Wilander turned professional first ?",
- "response": "Anna-Lena Gr\u00f6nefeld Anna-Lena Gr\u00f6nefeld (born 4 June 1985) is a German tennis player. She turned professional in April 2003."
+ "query": "Which city, Kaiping or Ruzhou, has a greater population?",
+ "response": "Ruzhou. Ruzhou () is a city in Henan province, China. It was called Linru County (\u4e34\u6c5d\u53bf) until 1988. It has more than 100,000 inhabitants. The Fengxue Temple of Ruzhou features the Qizu Pagoda, built in 738 during the Tang Dynasty (618\u2013907)."
},
{
- "query": "What South Korean K-pop group has 13 members and their own online TV program?",
- "response": "Seventeen (band) Seventeen (Hangul: \uc138\ube10\ud2f4 ), also stylized as SEVENTEEN or SVT, is a South Korean boy group formed by Pledis Entertainment in 2015. The group consists of thirteen members who are separated into three sub-units, each with different areas of specialization: a 'Hip-Hop Unit', 'Vocal Unit', and 'Performance Unit'. They have released one studio album and four extended plays."
+ "query": "Anniversary Park is located at the south end of a university that was originally founded as what college?",
+ "response": "George Washington University The George Washington University (GW, GWU, or George Washington) is a private research university in Washington, D.C., the capital of the United States. Founded in 1821 as Columbian College, the university has since grown to comprise fourteen undergraduate and graduate colleges and schools, including the School of Media and Public Affairs, Elliott School of International Affairs, Law School, and School of Public Health. George Washington's main campus is located in the Foggy Bottom neighborhood with two satellite campuses located in the Foxhall neighborhood of Washington, D.C. and in Ashburn, Virginia. It is the second oldest and the largest institution of higher education in the District of Columbia."
},
{
- "query": "The game show Keep It in the Family was hosted by an actor that played what role in Coronation Street\"?\"",
- "response": "Keep It in the Family (UK game show) Keep It in the Family is a British game show that aired on ITV from 26 October 2014 to 19 December 2015 and is hosted by Bradley Walsh."
+ "query": "What was the original name of the New Hampshire team whose home ballpark was a stadium constructed in 1937?",
+ "response": "Nashua Pirates The Nashua Pirates were a minor league baseball team, based in Nashua, New Hampshire. The team started in 1983 as the Nashua Angels, an affiliate with the California Angels in the Eastern League. The club changed affiliations in 1984 to the Pittsburgh Pirates and were then renamed the Nashua Pirates. The team's home ballpark was Holman Stadium."
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ImdbClassification.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ImdbClassification.json
index 34e69382..35b31642 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ImdbClassification.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ImdbClassification.json
@@ -1,6 +1,14 @@
[
{
- "query": "Renny Harlin's first American film was one of the best of a slew of prison-set horror films(like 'Death House' or 'The Chair')in the late 80's.Twenty years before,guard Lane Smith had wrongfully executed a condemned man.Now,he is the warden of the newly re-opened prison,and the man's ghost is back for bloody revenge.This atmospheric and very moody film features lots of gruesome gore and violence.Viggo Mortensen,Tiny Lister,Tom Everett and Kane Hodder are onhand for the entertaining carnage.",
+ "query": "This is the worst thing the TMNT franchise has ever spawned. I was a kid when this came out and I still thought it was deuce, even though I liked the original cartoon.\\n\\nThere's this one scene I remember when the mafia ape guy explains to his minions what rhetorical questions are. It's atrocious. Many fans hate on the series for including a female turtle, but that didn't bother me. So much so that I didn't even remember her until I read about the show recently. All in all, it's miserably forgettable.\\n\\nThe only okay thing was the theme song. Guilty pleasure, they call it... Nananana ninja...",
+ "response": "negative"
+ },
+ {
+ "query": "I saw this movie a few days ago and gamely jumped during the scary parts. I must admit, I found it pretty decent...until I started to THINK about what the characters were saying. Logical problems:
1. Her boyfriend, who seems to be a pretty fit dude, makes no sound while being killed. Don't you think that he might have at least tried to take the killer?
2. When the remark is made that the gym teacher is 'SOOOO in love with Lisa,' I almost screamed at the screen. When your best friend's family HAS BEEN KILLED BY A TEACHER WHO WAS IN LOVE WITH HER, you don't make comments like that if you have half of a heart.
3. As soon as Nash asks the uncle how many exits they have in the house and the uncle catches on that there may be danger ahead, wouldn't the smart thing to do be to get Donna, boyfriend, aunt, and uncle into a car and drive far, far away, then bait the house with the HRT and police force so that the killer has no way to get out?
I could go on. And on. And on. Basically, the plot was decent, the characters weren't profiled enough for you to actually feel any empathy when they were slaughtered and there were way too many errors.
HOWEVER.
This movie might be good for teenagers, or young couples just looking for a fun night out. If you don't consider all the goofs, it's a mediocre film.",
+ "response": "negative"
+ },
+ {
+ "query": "What a night. Perry Mason then Have Gun, Will Travel followed by Gunsmoke (when it was a half hour) and finally at 10:30PM came 'Sea Hunt' with its wonderful opening theme music and Mike's boat sailing off to a new adventure. Terrific.. Regardless of the story it was the lead character (played by Lloyd Bridges), strong, honest, sincere. A Man's Man and a Boy's Man. This brought on an interest in boats that lasted for years. Why they don't show on cable or make it available on video, no idea.. Too bad.",
"response": "positive"
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MedrxivClusteringS2S.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MedrxivClusteringS2S.json
index 3787dc3a..b719ca9c 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MedrxivClusteringS2S.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/MedrxivClusteringS2S.json
@@ -1,22 +1,22 @@
[
{
- "query": "Evaluating the Efficacy of New Therapeutic Agents in the Management of Hypertension-Induced Kidney Damage",
- "response": "nephrology"
+ "query": "Longitudinal Analysis of SARS-CoV-2 Neutralizing Antibody Titers and Viral Load in Asymptomatic and Symptomatic Patients",
+ "response": "infectious diseases"
},
{
- "query": "Exploring the Relationship Between ICU Staffing Levels and Patient Outcomes in Severe Trauma Cases",
- "response": "intensive care and critical care medicine"
+ "query": "Impact of Public Health Messaging and Community Engagement on Vaccination Uptake During the COVID-19 Pandemic",
+ "response": "epidemiology"
},
{
- "query": "The Impact of Environmental Allergens on Pediatric Asthma and Ear Infections",
- "response": "otolaryngology"
+ "query": "Long-term Effects of Ambient Temperature on COPD Hospitalizations: A Population-based Analysis in Northern Europe",
+ "response": "public and global health"
},
{
- "query": "Patient-Reported Outcomes in Rehabilitation: The Importance of Psychosocial Factors in Recovery",
- "response": "rehabilitation medicine and physical therapy"
+ "query": "Genomic Landscape of Rare Genetic Disorders Revealed through Whole-Exome Sequencing in Pediatric Populations",
+ "response": "genetic and genomic medicine"
},
{
- "query": "The Role of Micronutrients in Supporting Immune Function During Viral Infections",
- "response": "nutrition"
+ "query": "Impact of Gut Microbiota on Neuroinflammation and Cognitive Function in Multiple Sclerosis Patients: A Prospective Study",
+ "response": "neurology"
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/NFCorpus.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/NFCorpus.json
index 25ee39c8..ed0c92bf 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/NFCorpus.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/NFCorpus.json
@@ -1,14 +1,14 @@
[
{
- "query": "lung disease",
- "response": "Hibiscus anthocyanins rich extract-induced apoptotic cell death in human promyelocytic leukemia cells. Hibiscus sabdariffa Linne (Malvaceae), an attractive plant believed to be native to Africa, is cultivated in the Sudan and Eastern Taiwan. Anthocyanins exist widely in many vegetables and fruits. Some reports demonstrated that anthocyanins extracted from H. sabdariffa L., Hibiscus anthocyanins (HAs) (which are a group of natural pigments existing in the dried calyx of H. sabdariffa L.) exhibited antioxidant activity and liver protection. Therefore, in this study, we explored the effect of HAs on human cancer cells. The result showed that HAs could cause cancer cell apoptosis, especially in HL-60 cells. Using flow cytometry, we found that HAs treatment (0-4 mg/ml) markedly induced apoptosis in HL-60 cells in a dose- and time-dependent manner. The result also revealed increased phosphorylation in p38 and c-Jun, cytochrome c release, and expression of tBid, Fas, and FasL in the HAs-treated HL-60 cells. We further used SB203580 (p38 inhibitor), PD98059 (MEK inhibitor), SP600125 (JNK inhibitor), and wortmannin (phosphatidylinositol 3-kinase; PI-3K inhibitor) to evaluate their effect on the HAs-induced HL-60 death. The data showed that only SB203580 had strong potential in inhibiting HL-60 cell apoptosis and related protein expression and phosphorylation. Therefore, we suggested that HAs mediated HL-60 apoptosis via the p38-FasL and Bid pathway. According to these results, HAs could be developed as chemopreventive agents. However, further investigations into the specificity and mechanism(s) of HAs are needed."
+ "query": "Eat Beans to Live Longer",
+ "response": "Beans are rich in protein, fiber, and nutrients, aiding in weight management and reducing the risk of chronic diseases like diabetes and heart ailments. Studies suggest that diets rich in beans promote longevity due to their ability to regulate blood sugar levels and lower cholesterol. Incorporating beans into your diet may enhance overall health and longevity, making them an essential addition to a balanced diet for longevity and well-being."
},
{
- "query": "arthritis",
- "response": "A clustering of immune-mediated polyradiculoneuropathy among swine abattoir workers exposed to aerosolized porcine brains, Indiana, United States. In November 2007 a novel neuropathy, immune-mediated polyradiculoneuropathy (IP), was identified among workers at a Minnesota swine abattoir where a unique compressed air technique was used to remove porcine brains. An epidemiologic investigation at another abattoir in Indiana that also uses this process was launched to evaluate workers self-reporting neurologic illness compatible with IP. A nested case-control study was performed to identify cases and risk factors. Six confirmed, one probable, and three possible IP cases were detected. IP cases were 28-52 years old, of Latino origin, and 62.5% female. Onset dates ranged from April 2005-December 2007; 60% were hospitalized. IP cases at this plant were similar in clinical presentation and exposure risks to those detected in Minnesota. Swine abattoirs using similar brain extraction methods should discontinue this process."
+ "query": "Which Common Fruit Fights Cancer Better?",
+ "response": "Determining which common fruit fights cancer better is complex. However, several fruits show promising anticancer properties due to their high antioxidant content, including berries, citrus fruits like oranges and lemons, and apples. These fruits contain compounds like flavonoids and vitamin C, which may help inhibit cancer cell growth and reduce inflammation. Nonetheless, it's crucial to maintain a varied diet rich in fruits and vegetables to maximize health benefits and potentially lower cancer risk. Consulting with a healthcare professional for personalized advice is advisable."
},
{
- "query": "vitamin C",
- "response": "Which population level environmental factors are associated with asthma, rhinoconjunctivitis and eczema? Review of the ecological analyses of ISAAC Phase One The International Study of Asthma and Allergies in Childhood (ISAAC) Phase One showed large worldwide variations in the prevalence of symptoms of asthma, rhinoconjunctivitis and eczema, up to 10 to 20 fold between countries. Ecological analyses were undertaken with ISAAC Phase One data to explore factors that may have contributed to these variations, and are summarised and reviewed here. In ISAAC Phase One the prevalence of symptoms in the past 12 months of asthma, rhinoconjunctivitis and eczema were estimated from studies in 463,801 children aged 13 - 14 years in 155 centres in 56 countries, and in 257,800 children aged 6-7 years in 91 centres in 38 countries. Ecological analyses were undertaken between symptom prevalence and the following: Gross National Product per capita (GNP), food intake, immunisation rates, tuberculosis notifications, climatic factors, tobacco consumption, pollen, antibiotic sales, paracetamol sales, and outdoor air pollution. Symptom prevalence of all three conditions was positively associated with GNP, trans fatty acids, paracetamol, and women smoking, and inversely associated with food of plant origin, pollen, immunisations, tuberculosis notifications, air pollution, and men smoking. The magnitude of these associations was small, but consistent in direction between conditions. There were mixed associations of climate and antibiotic sales with symptom prevalence. The potential causality of these associations warrant further investigation. Factors which prevent the development of these conditions, or where there is an absence of a positive correlation at a population level may be as important from the policy viewpoint as a focus on the positive risk factors. Interventions based on small associations may have the potential for a large public health benefit."
+ "query": "Raisins vs. Energy Gels for Athletic Performance",
+ "response": "When it comes to athletic performance, the choice between raisins and energy gels depends on individual preferences and nutritional needs. Raisins offer natural sugars, fiber, and antioxidants, providing sustained energy and aiding digestion. On the other hand, energy gels deliver quick-release carbohydrates and electrolytes, ideal for rapid energy boosts during intense exercise. Athletes should experiment to see which option works best for them, considering factors like taste, digestibility, and convenience to optimize performance and fuel their workouts effectively."
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/RedditClustering.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/RedditClustering.json
index f7673bee..c166a597 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/RedditClustering.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/RedditClustering.json
@@ -14,5 +14,9 @@
{
"query": "Coffee Addiction: When It's More Than Just a Drink",
"response": "Coffee.txt"
+ },
+ {
+ "query": "Understanding Boeing's Micro-Missile Capabilities",
+ "response": "aviation.txt"
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SCIDOCS.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SCIDOCS.json
index 834bead8..ea85dc4a 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SCIDOCS.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SCIDOCS.json
@@ -1,10 +1,14 @@
[
{
- "query": "Enhancing Urban Mobility Through Intelligent Transportation Systems",
- "response": "Intelligent Transportation Systems (ITS) represent a revolutionary approach to urban mobility by leveraging advanced technologies to improve transportation efficiency and safety. This paper explores the integration of real-time traffic monitoring, adaptive signal control, and vehicle-to-infrastructure communication to optimize traffic flow and reduce congestion. The study highlights how data from various sensors, combined with predictive analytics, can lead to smarter decision-making and better management of transportation networks. It also discusses the challenges associated with implementing ITS, including system interoperability and data privacy concerns. The findings suggest that while ITS holds significant promise for enhancing urban mobility, ongoing research and technological advancements are crucial to addressing existing limitations and fully realizing its potential."
+ "query": "A Direct Search Method to solve Economic Dispatch Problem with Valve-Point Effect",
+ "response": "In this study, we propose a novel Direct Search Method (DSM) for solving the Economic Dispatch Problem (EDP) incorporating the Valve-Point Effect (VPE). The DSM efficiently optimizes the allocation of generation among power units considering nonlinear characteristics induced by VPE. Through numerical experiments, we demonstrate the effectiveness and superiority of the DSM in minimizing the total generation cost while satisfying operational constraints. The proposed method presents a promising approach for enhancing the economic operation of power systems amidst valve-point effects."
},
{
- "query": "Efficient Algorithms for Mining Association Rules in Large Databases",
- "response": "Association rule mining is a fundamental problem in data mining, which involves finding interesting relationships or patterns among a set of items in large datasets. Traditional algorithms, such as Apriori, suffer from inefficiencies in handling very large databases due to the high computational cost of candidate generation and frequent itemset counting. This paper introduces a novel algorithm called FP-Growth (Frequent Pattern Growth) that addresses these inefficiencies by using a compact data structure known as the FP-tree. FP-Growth constructs the FP-tree by compressing the database and recursively dividing it into smaller, manageable parts. This approach eliminates the need for candidate generation and significantly reduces the computational overhead. The algorithm is shown to be highly efficient in mining association rules, with substantial improvements in performance and scalability over previous methods. Theoretical analysis and experimental results demonstrate the effectiveness of FP-Growth in handling large-scale datasets and extracting valuable association rules."
+ "query": "Detection of distributed denial of service attacks using machine learning algorithms in software defined networks",
+ "response": "This research investigates the application of machine learning algorithms to detect distributed denial of service (DDoS) attacks within software-defined networks (SDNs). Leveraging the flexibility and programmability of SDNs, we propose a novel approach that integrates machine learning techniques to identify and mitigate DDoS attacks effectively. Through extensive experimentation, we evaluate the performance of various machine learning algorithms in accurately detecting and classifying DDoS attacks. Our findings highlight the efficacy of machine learning-based solutions in enhancing the security of SDNs against malicious cyber threats."
+ },
+ {
+ "query": "Discovering social circles in ego networks",
+ "response": "This study explores methods for discovering social circles within ego networks, focusing on the identification of cohesive groups of individuals and their interconnections. Leveraging graph-theoretical approaches and community detection algorithms, we propose a novel framework for automatically detecting social circles embedded in ego networks. Through empirical analysis on real-world social network datasets, we demonstrate the effectiveness of our approach in accurately uncovering meaningful social structures. Our findings contribute to advancing our understanding of social network dynamics and fostering insights into community formation processes within ego networks."
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/StackExchangeClustering.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/StackExchangeClustering.json
index c3814a2a..f0ab619a 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/StackExchangeClustering.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/StackExchangeClustering.json
@@ -14,5 +14,9 @@
{
"query": "Can amans\" be used as a substantival participle in Latin?\"",
"response": "latin.stackexchange.com.txt"
+ },
+ {
+ "query": "Is it normal for a 2018 Audi A4 to consume coolant frequently?",
+ "response": "mechanics.stackexchange.com.txt"
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SummEval.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SummEval.json
index f20b9665..c3c21dc9 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SummEval.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/SummEval.json
@@ -1,10 +1,6 @@
[
{
- "query": "Luis Su\u00e1rez is reportedly being eyed by Barcelona for a potential return. After a successful spell at Atl\u00e9tico Madrid, the Uruguayan striker has caught the attention of his former club. Barcelona is looking to strengthen their attack and sees Su\u00e1rez as a viable option. The move could see Su\u00e1rez reuniting with Lionel Messi, rekindling their successful partnership.",
- "response": "Barcelona is considering bringing back Luis Su\u00e1rez. Su\u00e1rez, who currently plays for Atl\u00e9tico Madrid, has performed well and attracted interest from his former club. This potential move aims to bolster Barcelona's attack, potentially reuniting Su\u00e1rez with Lionel Messi."
- },
- {
- "query": "The United States has imposed sanctions on several Chinese officials in response to Beijing's actions in Hong Kong. The sanctions target individuals who are seen as responsible for undermining Hong Kong's autonomy. The move comes amid increasing tensions between the US and China over a range of issues, including trade and human rights.",
- "response": "The US has sanctioned Chinese officials for their role in undermining Hong Kong's autonomy. This decision is part of the growing tension between the US and China over various issues such as trade and human rights."
+ "query": "passenger jin pai , 35 , was standing on the rim of a toilet when it collapsed , leaving him hospitalised with deep cuts on his leg and buttocks after he broke a toilet he was squatting on . passenger jin pai , 35 , was standing on the rim of a toilet when it smashed to the ground . according to airport officials he had not wanted to let his bottom touch the seat because he was ' worried it might not be clean ' .",
+ "response": "Jin Pai was standing on rim of a toilet in Hefei Xinqiao International Airport. The porcelain toilet then tipped over and shattered on the floor. The 35-year-old is left with deep cuts to his leg and buttocks."
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TRECCOVID.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TRECCOVID.json
index 88689c85..c4eded7d 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TRECCOVID.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TRECCOVID.json
@@ -1,10 +1,14 @@
[
{
- "query": "How effective are antiviral drugs like favipiravir and molnupiravir against COVID-19?",
- "response": "The ongoing COVID-19 pandemic has led to extensive research into antiviral drugs to combat the virus. Favipiravir and molnupiravir have emerged as potential treatments. Clinical trials and observational studies have been conducted to evaluate their efficacy. Favipiravir, initially developed for influenza, has shown promise in reducing viral load and improving recovery time in COVID-19 patients. Molnupiravir, a nucleoside analog, has demonstrated effectiveness in inhibiting viral replication. Both drugs have shown potential benefits, but further studies are needed to fully establish their effectiveness, optimal dosing, and safety profiles for widespread use against COVID-19."
+ "query": "How has misinformation impacted public trust in COVID-19 vaccines?",
+ "response": "Infodemic: The Impact of Misinformation on COVID-19 Vaccine Hesitancy Amidst the global fight against COVID-19, misinformation has emerged as a formidable adversary, threatening public health efforts to combat the pandemic. The rapid spread of false claims and conspiracy theories has sowed seeds of doubt and skepticism, particularly regarding the safety and efficacy of COVID-19 vaccines. This infodemic has eroded public trust in vaccination programs, leading to vaccine hesitancy in communities worldwide. In this article, we explore the origins and propagation of COVID-19 vaccine misinformation, analyze its detrimental effects on public perception, and propose strategies to mitigate its influence. As vaccination campaigns continue to be crucial in achieving herd immunity, addressing misinformation remains paramount in safeguarding public health and restoring trust in scientific expertise."
},
{
- "query": "How does COVID-19 affect patients with compromised immune systems?",
- "response": "Antiviral drugs like favipiravir and molnupiravir have been investigated for their effectiveness against COVID-19. Favipiravir, originally developed for influenza treatment, has shown promise in some studies for reducing viral load and improving clinical outcomes in COVID-19 patients, particularly in mild to moderate cases. Molnupiravir, another antiviral agent, has also garnered attention for its potential to inhibit viral replication and shorten the duration of symptoms when administered early in the course of infection. Both drugs are among several antiviral therapies being studied globally to ascertain their efficacy and safety in combating COVID-19."
+ "query": "What are the economic impacts of lockdowns during the COVID-19 pandemic?",
+ "response": "Economic Consequences of Lockdowns: Lessons from the COVID-19 Pandemic The COVID-19 pandemic has necessitated unprecedented measures, including lockdowns, to curb the spread of the virus. While these restrictions have been instrumental in protecting public health, they have also triggered profound economic repercussions globally. This article examines the multifaceted economic impacts of lockdowns, ranging from disruptions in supply chains and plummeting consumer demand to widespread job losses and financial strain on businesses. We delve into case studies of various countries to assess the effectiveness of economic stimulus packages and policy responses in mitigating the adverse effects of lockdowns. Furthermore, we explore lessons learned and propose strategies for fostering economic resilience in the face of future pandemics or crises. Understanding these economic dynamics is crucial for policymakers and businesses alike as they navigate the complexities of post-pandemic recovery."
+ },
+ {
+ "query": "What are the transmission patterns and epidemiological characteristics of COVID-19 in different regions?",
+ "response": "Dancing with the Virus: Unveiling COVID-19's Regional Rhythms COVID-19, caused by the novel coronavirus SARS-CoV-2, has set a complex global stage, revealing intricate regional transmission patterns and epidemiological landscapes. From densely populated urban centers to remote rural communities, the virus orchestrates its spread with varied tempo and intensity. This article delves into the divergent paths traced by COVID-19 across different regions, highlighting how factors such as population density, healthcare infrastructure, and cultural practices influence its transmission dynamics. In urban hubs like New York City and Mumbai, high population density and frequent human interactions have fueled rapid community transmission, prompting stringent containment measures. Contrastingly, in sparsely populated regions like the Nordic countries and rural Africa, slower transmission rates reflect both lower population density and effective early containment strategies. Understanding these diverse epidemiological characteristics not only informs local response strategies but also underscores the global challenge of harmonizing pandemic control efforts. As COVID-19 continues its global tour, the resilience of regional healthcare systems and the adaptability of public health interventions will define our collective ability to sway to this viral rhythm.'This example aims to creatively illustrate how COVID-19's impact varies across different regions, emphasizing both the challenges and adaptations observed worldwide."
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ToxicConversationsClassification.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ToxicConversationsClassification.json
index ec617705..8c495728 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ToxicConversationsClassification.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/ToxicConversationsClassification.json
@@ -1,18 +1,26 @@
[
{
- "query": "Pull your little head out of your big ass areola!",
+ "query": "What is most important is that he be independent and loyal nott o Trump or any president but loyal to the Constitution. Trump is a disgrace and a disaster. He needs to go back to his phony TV show.",
"response": "toxic"
},
{
- "query": "Trudeau will survive this alright as, unfortunately, Canadians are amongst the most dumbed down on the earth thanks to no real media other than his lapdogs.",
- "response": "toxic"
+ "query": "Nobody awake on the bridge, no lookouts ,no radar tech. Never saw the ships lights ???? Oh boy !!",
+ "response": "not toxic"
+ },
+ {
+ "query": "Yes, Hussein Obama was elected. 17 days until the end of an error.",
+ "response": "not toxic"
},
{
- "query": "No evidence of one ever starting a fire. Better ban them.",
+ "query": "Cycling is extremely hazardous for the lungs, with ozone effect, diesel fumes, car, and bus exhaust carrying dust into the air, un-burnt gas, and oil from scooters, the cigarette smoke coming from cars, micro glass from the asphalt, dust from construction, and smog can ruin a ride.",
"response": "not toxic"
},
{
- "query": "What's this about Andrew Jackson and the civil war? And now he is saying he would be honored\" to meet with NK dictator Kim Jung Un. Is the pressure of being president getting to him? I am worried.\"",
+ "query": "So far as I know no one has actually spent a minute trying to defeat this device. But if amateurs never tried to hack security systems, it would wind up being done first by actual criminals.",
"response": "not toxic"
+ },
+ {
+ "query": "Why? Make it legal just like pot. If people OD on it, well that's their own fault for doing something so stupid.",
+ "response": "toxic"
}
]
\ No newline at end of file
diff --git a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TwentyNewsgroupsClustering.json b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TwentyNewsgroupsClustering.json
index f29ed112..a776fdc4 100644
--- a/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TwentyNewsgroupsClustering.json
+++ b/research/llm_dense_retriever/examples/bge-en-icl/MTEB/TwentyNewsgroupsClustering.json
@@ -7,6 +7,10 @@
"query": "Bruins' Unstoppable Winning Streak",
"response": "rec.sport.hockey"
},
+ {
+ "query": "Comparing Windows File Systems: NTFS vs. FAT32 vs. exFAT",
+ "response": "comp.os.ms-windows.misc"
+ },
{
"query": "Troubleshooting a Digital Multimeter Calibration Issue",
"response": "sci.electronics"