diff --git a/data/questions_gen.csv b/data/questions_gen.csv index 33b0b29..2d1921a 100644 --- a/data/questions_gen.csv +++ b/data/questions_gen.csv @@ -29,7 +29,7 @@ Which authors belong to the same domain as Martin?,"SELECT DISTINCT {a2.name, a2 Which authors are not part of any organization?,"SELECT DISTINCT {name, aid} FROM author WHERE oid IS NULL",academic,where What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"SELECT DISTINCT {publication.title, publication.pid} FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE domain.name ILIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;",academic,where "What are the names of the authors who have written publications in the domain ""Computer Science""?",SELECT DISTINCT author.name FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_publication ON publication.pid = domain_publication.pid JOIN DOMAIN ON domain_publication.did = domain.did WHERE domain.name ilike '%computer%science%';,academic,where -What month were most students admitted?,"SELECT extract(MONTH FROM s.admit_term) AS MONTH, count(*) AS num_students FROM student s GROUP BY MONTH ORDER BY num_students DESC LIMIT 1;",advising,date_functions +What month were most students admitted?,"SELECT date_trunc('month', s.admit_term) AS MONTH, COUNT(*) AS total_students FROM student s GROUP BY MONTH ORDER BY total_students DESC LIMIT 1;",advising,date_functions What's the average predicted time to graduation since admission in no. of days?,SELECT avg(predicted_graduation_semester - admit_term) AS average_predicted_time_to_graduation FROM student;,advising,date_functions How many students were predicted to graduate in the last 10 years?,"SELECT count(*) AS num_students_graduated FROM student WHERE predicted_graduation_semester >= DATE_TRUNC('year', CURRENT_DATE) - interval '10 year';",advising,date_functions How long has it been since the last admitted student?,SELECT CURRENT_DATE - max(admit_term) AS duration_since_last_admitted_student FROM student;,advising,date_functions @@ -68,7 +68,7 @@ What is the total cost of round-trip fares for each airline code?,"SELECT fare.f "What is the average cost of round-trip fares from Los Angeles (LAX) to Chicago (ORD) for each airline, sorted in descending order by average cost?","SELECT fare.fare_airline, AVG(fare.round_trip_cost) AS average_cost FROM fare WHERE fare.from_airport = 'LAX' AND fare.to_airport = 'ORD' GROUP BY fare.fare_airline ORDER BY average_cost DESC NULLS LAST;SELECT airline.airline_name, AVG(fare.round_trip_cost) AS avg_round_trip_cost FROM fare JOIN airline ON fare.fare_airline = airline.airline_code WHERE fare.from_airport = 'LAX' AND fare.to_airport = 'ORD' GROUP BY airline.airline_name ORDER BY avg_round_trip_cost DESC;",atis,group_by "What is the average cost of a one-way trip for each fare id, sorted in ascending order?","SELECT fare.fare_id, AVG(fare.one_direction_cost) AS average_cost FROM fare GROUP BY fare.fare_id ORDER BY average_cost ASC NULLS LAST;",atis,group_by "How many meals are served in each compartment, sorted by the number of meals in descending order?","SELECT food_service.compartment, COUNT(food_service.meal_number) AS number_of_meals FROM food_service GROUP BY food_service.compartment ORDER BY number_of_meals DESC NULLS LAST;",atis,group_by -"How many flights depart from each airport code, excluding stopovers?","SELECT airport.airport_code, COUNT(flight.from_airport) AS num_departures FROM airport LEFT JOIN flight ON airport.airport_code = flight.from_airport GROUP BY airport.airport_code;SELECT airport.airport_code, COUNT(flight.from_airport) AS num_departures FROM airport JOIN flight ON airport.airport_code = flight.from_airport GROUP BY airport.airport_code;",atis,group_by +"How many flights depart from each airport code, excluding departures from stopovers?","SELECT airport.airport_code, COUNT(flight.from_airport) AS num_departures FROM airport LEFT JOIN flight ON airport.airport_code = flight.from_airport GROUP BY airport.airport_code;SELECT airport.airport_code, COUNT(flight.from_airport) AS num_departures FROM airport JOIN flight ON airport.airport_code = flight.from_airport GROUP BY airport.airport_code;",atis,group_by "Which flight ids to Chicago (ORD) have the longest duration from departure to arrival, sorted in ascending order?","SELECT flight.flight_id, (flight.arrival_time - flight.departure_time) AS duration FROM flight WHERE to_airport = 'ORD' ORDER BY duration ASC NULLS LAST;",atis,order_by "Which airports have the shortest minimum connect time, sorted in ascending order? Show the minimum connect time.","SELECT {airport.airport_name, airport.airport_code}, airport.minimum_connect_time FROM airport ORDER BY airport.minimum_connect_time ASC NULLS LAST;",atis,order_by Which aircraft code can carry the highest weight of cargo that any aircraft can carry?,SELECT aircraft.aircraft_code FROM aircraft ORDER BY pay_load DESC NULLS LAST LIMIT 1;,atis,order_by @@ -83,7 +83,7 @@ Which flights serve meals in first class? Give me the flight id and meal descrip Which airlines offer flights with a stopover in Dallas?,"SELECT DISTINCT {airline.airline_name, airline.airline_code} FROM flight_stop JOIN airport ON flight_stop.stop_airport = airport.airport_code JOIN flight ON flight_stop.flight_id = flight.flight_id JOIN airline ON flight.airline_code = airline.airline_code WHERE airport.airport_location ILIKE '%Dallas%';",atis,table_join Which airlines offer flights from LAX to ORD?,"SELECT DISTINCT {airline.airline_name, airline.airline_code} FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';",atis,table_join "Which airlines offer flights from Chicago (ORD) to New York (JFK), and how many stops do they have, sorted by number of stops in ascending order?","SELECT {airline.airline_name, airline.airline_code}, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' GROUP BY {}, flight.stops ORDER BY flight.stops NULLS LAST;",atis,table_join -"Which airlines do not have any flights that depart or arrive at JFK or have stopovers?","SELECT DISTINCT {airline.airline_name, airline.airline_code} FROM airline WHERE airline.airline_code NOT IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops = 0);",atis,table_join +"Which airlines do not have any flights that depart or arrive at JFK or do not have stopovers?","SELECT DISTINCT {airline.airline_name, airline.airline_code} FROM airline WHERE airline.airline_code NOT IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);",atis,table_join Which state code is Orlando International Airport in?,SELECT state_code FROM airport WHERE airport_name ILIKE '%Orlando International Airport%';,atis,where Which flights operate on Mondays and Wednesdays? Give me the relevant flight numbers,"SELECT {flight.flight_number, flight.flight_id} FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';",atis,where What is the total cost of all round-trip fares from New York (JFK) to Los Angeles?,SELECT SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare WHERE fare.from_airport = 'JFK' AND fare.to_airport = 'LAX';,atis,where @@ -96,7 +96,7 @@ How many mountains are there in each country?,"SELECT mountain.country_name, COU How many lakes are there in each state?,"SELECT lake.state_name, COUNT(lake.lake_name) AS lake_count FROM lake GROUP BY lake.state_name ORDER BY lake_count DESC;",geography,group_by "Which states have the highest population density in people per square kilometer, ordered from highest to lowest?","SELECT state.state_name, state.density FROM state ORDER BY state.density DESC NULLS LAST;",geography,order_by "Which lakes have the largest areas in square kilometers, ordered from largest to smallest?","SELECT lake.lake_name, lake.area FROM lake ORDER BY lake.area DESC NULLS LAST;",geography,order_by -What are the top 5 cities with the highest population?,SELECT city.city_name FROM city ORDER BY city.population DESC NULLS LAST LIMIT 5;,geography,order_by +What are the top 5 cities with the highest population? Give both city names and the population.,SELECT city.city_name, city.population FROM city ORDER BY city.population DESC NULLS LAST LIMIT 5;,geography,order_by "What are the longest rivers in meters, ordered from longest to shortest?","SELECT river.river_name, river.length FROM river ORDER BY river.length DESC NULLS LAST;",geography,order_by "What are the highest mountains in meters, ordered from highest to lowest altitude?","SELECT mountain.mountain_name, mountain.mountain_altitude FROM mountain ORDER BY mountain.mountain_altitude DESC NULLS LAST;",geography,order_by What is the ratio of the population of the United States to the population of California?,"SELECT CAST(SUM(NULLIF(state.population, 0)) FILTER (WHERE LOWER(state.country_name) LIKE '%united states%') AS FLOAT) / CAST(SUM(NULLIF(state.population, 0)) FILTER (WHERE LOWER(state.state_name) LIKE '%california%') AS FLOAT) AS population_ratio FROM state;",geography,ratio @@ -153,7 +153,7 @@ How many authors have published more than 2 papers?,SELECT COUNT(*) AS number_of "What is the total number of keyphrases associated with each paper, ordered by the paper ID in ascending order?","SELECT paperkeyphrase.paperid, COUNT(paperkeyphrase.keyphraseid) AS total_keyphrases FROM paperkeyphrase GROUP BY paperkeyphrase.paperid ORDER BY paperkeyphrase.paperid ASC NULLS LAST;",scholar,order_by "What are the titles of the papers published in the year 2020, ordered alphabetically?",SELECT paper.title FROM paper WHERE paper.year = 2020 ORDER BY paper.title ASC NULLS LAST;,scholar,order_by "What are the names of the journals in the database, ordered by the length of the journal name from shortest to longest?",SELECT journal.journalname FROM journal ORDER BY LENGTH(journal.journalname) ASC NULLS LAST;,scholar,order_by -"For each paper that cites other papers, how many other papers does it cite? Sort by the number of papers cited in descending order","SELECT cite.citingpaperid, COUNT(*) AS citation_count FROM cite GROUP BY cite.citingpaperid ORDER BY citation_count DESC NULLS LAST;",scholar,order_by +"For each paper that cites other papers, how many other papers does it cite? Sort by the number of papers cited in descending order","SELECT cite.citingpaperid, COUNT(*) AS citation_count FROM cite GROUP BY cite.citingpaperid ORDER BY citation_count DESC NULLS LAST;SELECT p.paperid, p.numciting FROM paper p WHERE p.numciting > 0 ORDER BY p.numciting DESC;",scholar,order_by What is the ratio of papers that have more than 1 keyphrases to papers that have 1 keyphrase?,"SELECT CAST(COUNT(DISTINCT CASE WHEN keyphrase_count > 1 THEN subquery.paperid END) AS FLOAT) / NULLIF(COUNT(DISTINCT CASE WHEN keyphrase_count =1 THEN subquery.paperid END), 0) AS ratio FROM (SELECT paperkeyphrase.paperid, COUNT(paperkeyphrase.keyphraseid) AS keyphrase_count FROM paperkeyphrase GROUP BY paperkeyphrase.paperid) AS subquery;",scholar,ratio What is the ratio of papers that have been cited by 2 or more papers to papers that have been cited by less than 2 papers?,"SELECT CAST(COUNT(CASE WHEN paper.numcitedby > 1 THEN 1 END) AS FLOAT) / NULLIF(COUNT(CASE WHEN paper.numcitedby < 2 THEN 1 END), 0) AS ratio FROM paper;",scholar,ratio What is the ratio of papers published in the year 2020 to the total number of papers in the database?,"SELECT CAST(COUNT(CASE WHEN paper.year = 2020 THEN 1 END) AS FLOAT) / NULLIF(COUNT(paper.paperid), 0) AS ratio FROM paper;",scholar,ratio