-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSQL Fundamentals.sql
252 lines (168 loc) · 5.64 KB
/
SQL Fundamentals.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
-- Start ##
SELECT *
FROM recent_grads
-- First Query ##
select * from recent_grads;
-- The LIMIT Clause ##
SELECT *
FROM recent_grads
LIMIT 5;
-- Selecting Specific Columns ##
SELECT Major , ShareWomen
FROM recent_grads
--
SELECT name , tagline , contributed_by
FROM beers
LIMIT 10;
-- Filtering Rows Using WHERE ##
SELECT Major , ShareWomen
FROM recent_grads
WHERE ShareWomen < 0.5
-- Expressing Multiple Filter Criteria Using 'AND' ##
SELECT Major , Major_category , Median,ShareWomen
FROM recent_grads
WHERE ShareWomen > 0.5 AND Median > 50000
-- Returning One of Several Conditions With OR ##
SELECT Major,Median,Unemployed
FROM recent_grads
WHERE Median >= 10000
OR Men > Women
LIMIT 20;
-- Grouping Operators with Parentheses ##
SELECT Major,Major_category,ShareWomen,Unemployment_rate
FROM recent_grads
WHERE Major_category = 'Engineering'
AND (ShareWomen > 0.5
OR Unemployment_rate < 0.051 )
-- Ordering Results Using ORDER BY ##
SELECT Major, ShareWomen, Unemployment_rate
FROM recent_grads
WHERE ShareWomen > 0.3
AND Unemployment_rate < 0.1
ORDER BY ShareWomen DESC
-- Practice Writing a Query ##
SELECT Major_category, Major, Unemployment_rate
FROM recent_grads
WHERE Major_category IN ('Engineering' , 'Physical Sciences')
ORDER BY Unemployment_rate ;
------------------------------------------------------------------------------------------------
-- A Simple Question ##
SELECT MIN(Unemployment_rate)
FROM recent_grads
-- Aggregate Functions ##
SELECT SUM(Total)
FROM recent_grads
-- Order of Execution ##
SELECT COUNT(Major)
FROM recent_grads
WHERE ShareWomen < 0.5
-- Missing Values ##
SELECT COUNT(*),COUNT(Unemployment_rate)
FROM recent_grads
-- Combining Multiple Aggregation Functions ##
SELECT AVG(Total) , MIN(Men) , MAX(Women)
FROM recent_grads
-- Customising the Results ##
SELECT COUNT(*) AS 'Number of Majors' , MAX(Unemployment_rate) AS 'Highest Unemployment Rate'
FROM recent_grads
-- Counting Unique Values ##
SELECT COUNT(DISTINCT Major) AS 'unique_majors' ,
COUNT(DISTINCT Major_category) AS 'unique_major_categories',
COUNT(DISTINCT Major_code) AS 'unique_major_codes'
FROM recent_grads
-- String Functions and Operations ##
SELECT 'Major: ' || LOWER(Major) AS 'Major',Total,Men,Women,Unemployment_rate,LENGTH(Major) AS 'Length_of_name'
FROM recent_grads
ORDER BY Unemployment_rate DESC;
-- Performing Arithmetic in SQL ##
SELECT Major , Major_category , P75th - P25th AS 'quartile_spread'
FROM recent_grads
ORDER BY quartile_spread
LIMIT 20;
----------------------------------------------------------------
-- If/Then in SQL ##
SELECT CASE
WHEN Sample_size < 200 THEN 'Small'
WHEN Sample_size >=200 AND Sample_size < 1000 THEN 'Medium'
WHEN Sample_size >=1000 THEN 'Large'
END AS Sample_category
FROM recent_grads
-- Dissecting CASE ##
SELECT Major , Sample_size ,
CASE
WHEN Sample_size < 200 THEN 'Small'
WHEN Sample_size >= 200 AND Sample_size < 1000 THEN 'Medium'
ELSE 'Large'
END AS Sample_category
FROM recent_grads
-- Calculating Group-Level Summary Statistics ##
SELECT Major_category , SUM(Total) AS Total_graduates
FROM recent_grads
GROUP BY Major_category
-- GROUP BY Visual Breakdown ##
SELECT Major_category , AVG(ShareWomen) AS Average_women
FROM recent_grads
GROUP BY Major_category
-- Multiple Summary Statistics by Group ##
SELECT Major_category , SUM(Women) AS Total_women , AVG(ShareWomen) AS Mean_women , SUM(Total) * AVG(ShareWomen) AS Estimate_women
FROM recent_grads
GROUP BY Major_category
-- Multiple Group Columns ##
SELECT Major_category , Sample_category , AVG(ShareWomen) AS Mean_women , SUM(Total) AS Total_graduates
FROM new_grads
GROUP BY Major_category , Sample_category
-- Querying Virtual Columns With the HAVING Statement ##
SELECT Major_category , AVG(Low_wage_jobs)/AVG(Total) AS Share_low_wage
FROM new_grads
GROUP BY Major_category
HAVING Share_low_wage > .1
-- Rounding Results With the ROUND() Function ##
SELECT ROUND(ShareWomen,4) AS Rounded_women , Major_category
FROM new_grads
LIMIT 10
-- Nesting functions ##
SELECT Major_category, ROUND(AVG(College_jobs) / AVG(Total),3) AS Share_degree_jobs
FROM new_grads
GROUP BY Major_category
HAVING Share_degree_jobs < .3
-- Casting ##
SELECT Major_category,CAST(SUM(Women) AS FLOAT)/CAST(SUM(Total) AS FLOAT) AS SW
FROM new_grads
GROUP BY Major_category
ORDER BY SW
----------------------------------------------------------------
-- Writing More Complex Queries ##
SELECT Major , ShareWomen
FROM recent_grads
WHERE ShareWomen > 0.5225502029537575
-- Subqueries ##
SELECT Major , Unemployment_rate
FROM recent_grads
WHERE Unemployment_rate < (
SELECT AVG(Unemployment_rate)
FROM recent_grads)
-- Subquery in SELECT ##
SELECT CAST(COUNT(*) AS FLOAT) /
CAST((SELECT COUNT(*)
FROM recent_grads
) AS FLOAT) AS proportion_abv_avg
FROM recent_grads
WHERE ShareWomen > (SELECT AVG(ShareWomen)
FROM recent_grads
);
-- The IN Operator ##
SELECT Major_category , Major
FROM recent_grads
WHERE Major_category IN ('Business','Humanities & Liberal Arts','Education')
-- Returning Multiple Results in Subqueries ##
SELECT Major_category, Major
FROM recent_grads
WHERE Major_category IN('Business', 'Humanities & Liberal Arts','Education');
-- Building Complex Subqueries ##
SELECT AVG(CAST(Sample_size AS FLOAT) / CAST(Total AS FLOAT)) AS avg_ratio
FROM recent_grads
-- Practice Integrating A Subquery With The Outer Query ##
SELECT Major , Major_category , (SELECT CAST(Sample_size AS FLOAT) / CAST(Total AS FLOAT)) AS ratio
FROM recent_grads
WHERE ratio > (SELECT AVG(CAST(Sample_size AS FLOAT) / CAST(Total AS FLOAT)) AS avg_ratio
FROM recent_grads)