From d1e80934285a8fdcd625c8e07dd3fec0d5c514d4 Mon Sep 17 00:00:00 2001 From: Rishabh Srivastava Date: Wed, 18 Sep 2024 21:03:44 +0800 Subject: [PATCH] fixed issue where some tables were in all caps in golden queries --- data/questions_gen_mysql.csv | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/data/questions_gen_mysql.csv b/data/questions_gen_mysql.csv index f6f0941..7d03baa 100644 --- a/data/questions_gen_mysql.csv +++ b/data/questions_gen_mysql.csv @@ -1,5 +1,5 @@ db_name,db_type,query_category,query,question,instructions -academic,mysql,group_by,"SELECT author.name FROM author WHERE author.aid IN (SELECT domain_author.aid FROM domain_author WHERE domain_author.did IN (SELECT domain.did FROM DOMAIN WHERE domain.name IN ('Machine Learning', 'Data Science')) GROUP BY 1 HAVING COUNT(DISTINCT domain_author.did) = 2);SELECT author.aid FROM author WHERE author.aid IN (SELECT domain_author.aid FROM domain_author WHERE domain_author.did IN (SELECT domain.did FROM DOMAIN WHERE domain.name IN ('Machine Learning', 'Data Science')) GROUP BY 1 HAVING COUNT(DISTINCT domain_author.did) = 2);SELECT author.name, author.aid FROM author WHERE author.aid IN (SELECT domain_author.aid FROM domain_author WHERE domain_author.did IN (SELECT domain.did FROM DOMAIN WHERE domain.name IN ('Machine Learning', 'Data Science')) GROUP BY 1 HAVING COUNT(DISTINCT domain_author.did) = 2);","Which authors have written publications in both the domain ""Machine Learning"" and the domain ""Data Science""?", +academic,mysql,group_by,"SELECT author.name FROM author WHERE author.aid IN (SELECT domain_author.aid FROM domain_author WHERE domain_author.did IN (SELECT domain.did FROM domain WHERE domain.name IN ('Machine Learning', 'Data Science')) GROUP BY 1 HAVING COUNT(DISTINCT domain_author.did) = 2);SELECT author.aid FROM author WHERE author.aid IN (SELECT domain_author.aid FROM domain_author WHERE domain_author.did IN (SELECT domain.did FROM domain WHERE domain.name IN ('Machine Learning', 'Data Science')) GROUP BY 1 HAVING COUNT(DISTINCT domain_author.did) = 2);SELECT author.name, author.aid FROM author WHERE author.aid IN (SELECT domain_author.aid FROM domain_author WHERE domain_author.did IN (SELECT domain.did FROM domain WHERE domain.name IN ('Machine Learning', 'Data Science')) GROUP BY 1 HAVING COUNT(DISTINCT domain_author.did) = 2);","Which authors have written publications in both the domain ""Machine Learning"" and the domain ""Data Science""?", academic,mysql,group_by,"SELECT author.name, SUM(publication.citation_num) AS total_citations FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid GROUP BY author.name ORDER BY total_citations DESC;SELECT author.aid, SUM(publication.citation_num) AS total_citations FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid GROUP BY author.aid ORDER BY total_citations DESC;SELECT author.name, author.aid, SUM(publication.citation_num) AS total_citations FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid GROUP BY author.name, author.aid ORDER BY total_citations DESC;SELECT a.aid, COUNT(c.cited) AS total_citations FROM author AS a JOIN writes AS w ON a.aid = w.aid JOIN publication AS p ON w.pid = p.pid JOIN cite AS c ON p.pid = c.cited GROUP BY a.aid ORDER BY CASE WHEN total_citations IS NULL THEN 1 ELSE 0 END DESC, total_citations DESC;SELECT a.name, COUNT(c.cited) AS total_citations FROM author AS a JOIN writes AS w ON a.aid = w.aid JOIN publication AS p ON w.pid = p.pid JOIN cite AS c ON p.pid = c.cited GROUP BY a.name ORDER BY CASE WHEN total_citations IS NULL THEN 1 ELSE 0 END DESC, total_citations DESC;SELECT a.aid, a.name, COUNT(c.cited) AS total_citations FROM author AS a JOIN writes AS w ON a.aid = w.aid JOIN publication AS p ON w.pid = p.pid JOIN cite AS c ON p.pid = c.cited GROUP BY a.aid, a.name ORDER BY CASE WHEN total_citations IS NULL THEN 1 ELSE 0 END DESC, total_citations DESC;",What is the total number of citations received by each author?, academic,mysql,group_by,"SELECT publication.year, COUNT(DISTINCT publication.pid) AS total_publications FROM publication GROUP BY publication.year ORDER BY CASE WHEN publication.year IS NULL THEN 1 ELSE 0 END, publication.year;",What is the total number of publications published in each year?, academic,mysql,group_by,"SELECT domain.name, AVG(publication.reference_num) AS average_references FROM domain_publication JOIN publication ON domain_publication.pid = publication.pid JOIN domain ON domain.did = domain_publication.did GROUP BY domain.name;SELECT domain.did, AVG(publication.reference_num) AS average_references FROM domain_publication JOIN publication ON domain_publication.pid = publication.pid JOIN domain ON domain.did = domain_publication.did GROUP BY domain.did;SELECT domain.name, domain.did, AVG(publication.reference_num) AS average_references FROM domain_publication JOIN publication ON domain_publication.pid = publication.pid JOIN domain ON domain.did = domain_publication.did GROUP BY domain.name, domain.did;",What is the average number of references cited by publications in each domain name?, @@ -22,7 +22,7 @@ academic,mysql,table_join,SELECT COUNT(DISTINCT publication.pid) FROM publicatio academic,mysql,instruct,"SELECT DISTINCT organization.name FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';SELECT DISTINCT organization.oid FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';SELECT DISTINCT organization.name, organization.oid FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';","Which organizations have authors who have written publications in the domain ""Machine Learning""?",Always filter names using an exact match academic,mysql,instruct,"SELECT DISTINCT a2.name FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE LOWER('%martin%');SELECT DISTINCT a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE LOWER('%martin%');SELECT DISTINCT a2.name, a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE LOWER('%martin%');",Which authors belong to the same domain as Martin?,Always filter names using LIKE with percent sign wildcards academic,mysql,instruct,"SELECT DISTINCT name FROM author WHERE oid IS NULL;SELECT DISTINCT aid FROM author WHERE oid IS NULL;SELECT DISTINCT name, aid FROM author WHERE oid IS NULL;",Which authors are not part of any organization?,Always filter names using LIKE -academic,mysql,instruct,"SELECT DISTINCT publication.title FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE LOWER('%Sociology%') AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.pid FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE LOWER('%Sociology%') AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.title, publication.pid FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE LOWER('%Sociology%') AND publication.year = 2020 AND publication.cid = domain_conference.cid;",What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using LIKE." +academic,mysql,instruct,"SELECT DISTINCT publication.title FROM domain JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE LOWER('%Sociology%') AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.pid FROM domain JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE LOWER('%Sociology%') AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.title, publication.pid FROM domain JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE LOWER('%Sociology%') AND publication.year = 2020 AND publication.cid = domain_conference.cid;",What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using LIKE." academic,mysql,instruct,SELECT DISTINCT author.name FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_publication ON publication.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE LOWER(domain.name) LIKE LOWER('%computer%science%');,"What are the names of the authors who have written publications in the domain ""Computer Science""?","To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using LIKE." advising,mysql,date_functions,"SELECT DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF(MONTH, '0000-01-01 00:00:00', s.admit_term)) MONTH) AS month, COUNT(*) AS total_students FROM student AS s GROUP BY MONTH ORDER BY CASE WHEN total_students IS NULL THEN 1 ELSE 0 END DESC, total_students DESC LIMIT 1;SELECT DATE_FORMAT(DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF(MONTH, '0000-01-01 00:00:00', s.admit_term)) MONTH), '%Y-%m') AS month, COUNT(*) AS total_students FROM student AS s GROUP BY month ORDER BY CASE WHEN total_students IS NULL THEN 1 ELSE 0 END DESC, total_students DESC LIMIT 1;",What month were most students admitted? Return the no. of students and the month as a date, advising,mysql,date_functions,SELECT AVG(predicted_graduation_semester - admit_term) AS average_predicted_time_to_graduation FROM student;,What's the average predicted time to graduation since admission in no. of days?, @@ -121,10 +121,10 @@ geography,mysql,instruct,"SELECT lake_name, area FROM lake WHERE LOWER(state_nam geography,mysql,instruct,"SELECT mountain_name, mountain_altitude FROM mountain WHERE LOWER(country_name) LIKE LOWER('%Nepal%');",What are the names and altitudes of the mountains in Nepal?,Always filter names using LIKE geography,mysql,instruct,"SELECT city_name, population FROM city WHERE LOWER(country_name) LIKE LOWER('%United States%');",Get the cities in the United States and their population,Always filter names using LIKE restaurants,mysql,group_by,"SELECT restaurant.food_type, COUNT(DISTINCT restaurant.id) AS total_number_of_restaurants FROM restaurant GROUP BY restaurant.food_type;",What is the total number of restaurants serving each type of food?, -restaurants,mysql,group_by,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS total_count FROM LOCATION GROUP BY location.city_name;",What is the total count of restaurants in each city?, +restaurants,mysql,group_by,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS total_count FROM location GROUP BY location.city_name;",What is the total count of restaurants in each city?, restaurants,mysql,group_by,"SELECT restaurant.food_type, AVG(restaurant.rating) AS average_rating FROM restaurant GROUP BY restaurant.food_type ORDER BY average_rating DESC;",What is the average rating of restaurants serving each type of food?, restaurants,mysql,group_by,"SELECT restaurant.city_name, COUNT(*) AS number_of_restaurants FROM restaurant WHERE LOWER(restaurant.food_type) LIKE LOWER('%Italian%') GROUP BY restaurant.city_name ORDER BY number_of_restaurants DESC;",How many restaurants serve Italian food in each city?, -restaurants,mysql,group_by,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS number_of_restaurants FROM LOCATION GROUP BY location.city_name ORDER BY number_of_restaurants DESC;",How many restaurants are there in each city? Order the results by the number of restaurants in descending order., +restaurants,mysql,group_by,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS number_of_restaurants FROM location GROUP BY location.city_name ORDER BY number_of_restaurants DESC;",How many restaurants are there in each city? Order the results by the number of restaurants in descending order., restaurants,mysql,order_by,"SELECT street_name FROM location GROUP BY street_name ORDER BY CASE WHEN COUNT(restaurant_id) IS NULL THEN 1 ELSE 0 END DESC, COUNT(restaurant_id) DESC LIMIT 1;",Which street has the most number of restaurants?, restaurants,mysql,order_by,"SELECT name FROM restaurant WHERE LOWER(food_type) LIKE LOWER('%Italian%') OR LOWER(city_name) LIKE LOWER('%New York%') ORDER BY CASE WHEN name IS NULL THEN 1 ELSE 0 END, name;",Which restaurants serve Italian cuisine or are located in New York? Order the results by the restaurant name., restaurants,mysql,order_by,"SELECT geographic.region, AVG(restaurant.rating) AS average_rating FROM restaurant JOIN geographic ON restaurant.city_name = geographic.city_name GROUP BY geographic.region ORDER BY CASE WHEN geographic.region IS NULL THEN 1 ELSE 0 END, geographic.region;",What is the average rating of restaurants in each region? Order the results by the region name., @@ -136,13 +136,13 @@ restaurants,mysql,ratio,"SELECT CAST(COUNT(CASE WHEN rating > 4 THEN 1 END) AS D restaurants,mysql,ratio,"SELECT CAST(SUM(CASE WHEN LOWER(restaurant.food_type) LIKE '%vegan%' THEN 1 ELSE 0 END) AS DOUBLE) / NULLIF(SUM(CASE WHEN NOT LOWER(restaurant.food_type) LIKE '%vegan%' THEN 1 ELSE 0 END), 0) AS ratio FROM restaurant WHERE LOWER(LOWER(restaurant.city_name)) LIKE LOWER('%san francisco%');",What is the ratio of restaurants serving vegan food to restaurants serving non-vegan food in San Francisco? Match food_type case insensitively, restaurants,mysql,ratio,"SELECT CAST(COUNT(CASE WHEN LOWER(food_type) LIKE LOWER('%Italian%') THEN 1 END) AS DOUBLE) / NULLIF(COUNT(food_type), 0) AS ratio FROM restaurant WHERE LOWER(city_name) LIKE LOWER('%Los Angeles%');",What is the ratio of Italian restaurants out of all restaurants in Los Angeles?, restaurants,mysql,table_join,"SELECT r.city_name, r.name, COUNT(r.id) AS restaurant_count FROM restaurant AS r GROUP BY r.city_name, r.name HAVING COUNT(r.id) > 1;","What cities have more than one restaurants with the same name, and how many of them are there? Return the city name, restaurant name, and restaurant count", -restaurants,mysql,table_join,"SELECT location.city_name, AVG(restaurant.rating) AS average_rating FROM restaurant JOIN LOCATION ON restaurant.id = location.restaurant_id WHERE LOWER(restaurant.food_type) LIKE '%mexican%' GROUP BY location.city_name;",What is the average rating of restaurants that serve Mexican food in each city?, +restaurants,mysql,table_join,"SELECT location.city_name, AVG(restaurant.rating) AS average_rating FROM restaurant JOIN location ON restaurant.id = location.restaurant_id WHERE LOWER(restaurant.food_type) LIKE '%mexican%' GROUP BY location.city_name;",What is the average rating of restaurants that serve Mexican food in each city?, restaurants,mysql,table_join,"SELECT geographic.region, AVG(restaurant.rating) AS average_rating FROM geographic JOIN restaurant ON geographic.city_name = restaurant.city_name GROUP BY 1;",What is the average rating of restaurants in each region?, restaurants,mysql,table_join,"SELECT geographic.region, COUNT(restaurant.id) AS number_of_restaurants FROM restaurant JOIN geographic ON restaurant.city_name = geographic.city_name WHERE LOWER(restaurant.food_type) LIKE '%italian%' GROUP BY geographic.region ORDER BY number_of_restaurants DESC;",How many restaurants serve Italian food in each region?, restaurants,mysql,table_join,"SELECT geographic.region, COUNT(DISTINCT restaurant.id) AS number_of_restaurants FROM geographic JOIN restaurant ON geographic.city_name = restaurant.city_name GROUP BY geographic.region ORDER BY CASE WHEN number_of_restaurants IS NULL THEN 1 ELSE 0 END DESC, number_of_restaurants DESC;SELECT geographic.region, COUNT(DISTINCT restaurant.id) AS number_of_restaurants FROM geographic LEFT JOIN restaurant ON geographic.city_name = restaurant.city_name GROUP BY geographic.region ORDER BY CASE WHEN number_of_restaurants IS NULL THEN 1 ELSE 0 END DESC, number_of_restaurants DESC;",How many restaurants are there in each region?, restaurants,mysql,instruct,SELECT DISTINCT restaurant.city_name FROM restaurant WHERE rating = (SELECT MAX(rating) FROM restaurant);,Which city has the highest-rated restaurant?,Match all strings case-insensitively using wildcard operators restaurants,mysql,instruct,"SELECT restaurant.name, restaurant.rating FROM restaurant WHERE restaurant.rating > 4 AND LOWER(restaurant.city_name) LIKE LOWER('%New York%');",What's the name and rating of all the restaurants that have a rating greater than 4 and are located in the city of New York?,Match all strings case-insensitively using wildcard operators -restaurants,mysql,instruct,"SELECT restaurant.name, restaurant.food_type FROM restaurant JOIN LOCATION ON restaurant.id = location.restaurant_id WHERE LOWER(location.street_name) LIKE LOWER('%Market St%') AND LOWER(location.city_name) LIKE LOWER('%San Francisco%');",What's the name and food type of all the restaurants located on Market St in San Francisco?,Match all strings case-insensitively using wildcard operators +restaurants,mysql,instruct,"SELECT restaurant.name, restaurant.food_type FROM restaurant JOIN location ON restaurant.id = location.restaurant_id WHERE LOWER(location.street_name) LIKE LOWER('%Market St%') AND LOWER(location.city_name) LIKE LOWER('%San Francisco%');",What's the name and food type of all the restaurants located on Market St in San Francisco?,Match all strings case-insensitively using wildcard operators restaurants,mysql,instruct,SELECT restaurant.name FROM restaurant WHERE LOWER(LOWER(restaurant.food_type)) LIKE LOWER('%italian%');,What are the names of the restaurants that serve Italian food?,Match all strings case-insensitively using wildcard operators restaurants,mysql,instruct,"SELECT DISTINCT restaurant.name FROM restaurant WHERE LOWER(restaurant.city_name) LIKE LOWER('%Los Angeles%') AND restaurant.rating > 4 ORDER BY CASE WHEN restaurant.name IS NULL THEN 1 ELSE 0 END, restaurant.name;",What are the names of the restaurants in Los Angeles that have a rating higher than 4?,Match all strings case-insensitively using wildcard operators scholar,mysql,group_by,"SELECT paper.year, COUNT(paper.paperid) AS total_papers FROM paper GROUP BY paper.year ORDER BY CASE WHEN paper.year IS NULL THEN 1 ELSE 0 END, paper.year;",What is the total number of papers published per year?,