diff --git a/_preview/468/.buildinfo b/_preview/468/.buildinfo
new file mode 100644
index 000000000..3018697ca
--- /dev/null
+++ b/_preview/468/.buildinfo
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 5fc2183660e2f8d0d6e5328072b0d802
+tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/_preview/468/_images/0378ed90474a4a800540a402c9117d63d9a659626d033f81b7dc03f3d88c7684.png b/_preview/468/_images/0378ed90474a4a800540a402c9117d63d9a659626d033f81b7dc03f3d88c7684.png
new file mode 100644
index 000000000..dab5f809b
Binary files /dev/null and b/_preview/468/_images/0378ed90474a4a800540a402c9117d63d9a659626d033f81b7dc03f3d88c7684.png differ
diff --git a/_preview/468/_images/05b6b4fd5dddbf85422158d80ab6879a9f838d28bc5a8cd62819e5de8a708350.png b/_preview/468/_images/05b6b4fd5dddbf85422158d80ab6879a9f838d28bc5a8cd62819e5de8a708350.png
new file mode 100644
index 000000000..ed9e255fc
Binary files /dev/null and b/_preview/468/_images/05b6b4fd5dddbf85422158d80ab6879a9f838d28bc5a8cd62819e5de8a708350.png differ
diff --git a/_preview/468/_images/09939a7670ac947b349b606017ecda396d1f6497bbf80fb5daedfcc0908f4d6f.png b/_preview/468/_images/09939a7670ac947b349b606017ecda396d1f6497bbf80fb5daedfcc0908f4d6f.png
new file mode 100644
index 000000000..804ef3ab7
Binary files /dev/null and b/_preview/468/_images/09939a7670ac947b349b606017ecda396d1f6497bbf80fb5daedfcc0908f4d6f.png differ
diff --git a/_preview/468/_images/0ced9c189d8c7acdecb498c5017267413b814729d35a14030ded486994848e3e.png b/_preview/468/_images/0ced9c189d8c7acdecb498c5017267413b814729d35a14030ded486994848e3e.png
new file mode 100644
index 000000000..671ae3068
Binary files /dev/null and b/_preview/468/_images/0ced9c189d8c7acdecb498c5017267413b814729d35a14030ded486994848e3e.png differ
diff --git a/_preview/468/_images/0eb5a1e40ec32e805469d633b51ec3aa0d297f83004e65925cbcea02bf555092.png b/_preview/468/_images/0eb5a1e40ec32e805469d633b51ec3aa0d297f83004e65925cbcea02bf555092.png
new file mode 100644
index 000000000..2ed71d3a4
Binary files /dev/null and b/_preview/468/_images/0eb5a1e40ec32e805469d633b51ec3aa0d297f83004e65925cbcea02bf555092.png differ
diff --git a/_preview/468/_images/1-gitstatus.png b/_preview/468/_images/1-gitstatus.png
new file mode 100644
index 000000000..8cf630929
Binary files /dev/null and b/_preview/468/_images/1-gitstatus.png differ
diff --git a/_preview/468/_images/10-github-newbranch.png b/_preview/468/_images/10-github-newbranch.png
new file mode 100644
index 000000000..b80884a28
Binary files /dev/null and b/_preview/468/_images/10-github-newbranch.png differ
diff --git a/_preview/468/_images/11-newbranch-contribute.png b/_preview/468/_images/11-newbranch-contribute.png
new file mode 100644
index 000000000..86b558073
Binary files /dev/null and b/_preview/468/_images/11-newbranch-contribute.png differ
diff --git a/_preview/468/_images/11cd773391e2ee63774113ba696093c2dc314f186efc083a780d40ee26dce5e4.png b/_preview/468/_images/11cd773391e2ee63774113ba696093c2dc314f186efc083a780d40ee26dce5e4.png
new file mode 100644
index 000000000..c09b3e8b6
Binary files /dev/null and b/_preview/468/_images/11cd773391e2ee63774113ba696093c2dc314f186efc083a780d40ee26dce5e4.png differ
diff --git a/_preview/468/_images/12-compare.png b/_preview/468/_images/12-compare.png
new file mode 100644
index 000000000..940e45dc0
Binary files /dev/null and b/_preview/468/_images/12-compare.png differ
diff --git a/_preview/468/_images/123efb19160ff65ae76ffd62a6a0e3bf58b8428840ef894ab79754c98ea79214.png b/_preview/468/_images/123efb19160ff65ae76ffd62a6a0e3bf58b8428840ef894ab79754c98ea79214.png
new file mode 100644
index 000000000..307ff2d4e
Binary files /dev/null and b/_preview/468/_images/123efb19160ff65ae76ffd62a6a0e3bf58b8428840ef894ab79754c98ea79214.png differ
diff --git a/_preview/468/_images/13-message.png b/_preview/468/_images/13-message.png
new file mode 100644
index 000000000..857887dbd
Binary files /dev/null and b/_preview/468/_images/13-message.png differ
diff --git a/_preview/468/_images/14-prsummary.png b/_preview/468/_images/14-prsummary.png
new file mode 100644
index 000000000..395783b15
Binary files /dev/null and b/_preview/468/_images/14-prsummary.png differ
diff --git a/_preview/468/_images/14823a00387a43fce8fd7b4035c5b98e68204d306bb0ab6a33c23ad5d09dd244.png b/_preview/468/_images/14823a00387a43fce8fd7b4035c5b98e68204d306bb0ab6a33c23ad5d09dd244.png
new file mode 100644
index 000000000..41a4fd6fb
Binary files /dev/null and b/_preview/468/_images/14823a00387a43fce8fd7b4035c5b98e68204d306bb0ab6a33c23ad5d09dd244.png differ
diff --git a/_preview/468/_images/15-todraft.png b/_preview/468/_images/15-todraft.png
new file mode 100644
index 000000000..e02e9eb9e
Binary files /dev/null and b/_preview/468/_images/15-todraft.png differ
diff --git a/_preview/468/_images/16-draft.png b/_preview/468/_images/16-draft.png
new file mode 100644
index 000000000..e4cb69e14
Binary files /dev/null and b/_preview/468/_images/16-draft.png differ
diff --git a/_preview/468/_images/17-fileschanged.png b/_preview/468/_images/17-fileschanged.png
new file mode 100644
index 000000000..dfcd6a169
Binary files /dev/null and b/_preview/468/_images/17-fileschanged.png differ
diff --git a/_preview/468/_images/1746da75f71a63ce67756bd3b591d6ec008a178e4e5e8cfa3ea22fe94eef1859.png b/_preview/468/_images/1746da75f71a63ce67756bd3b591d6ec008a178e4e5e8cfa3ea22fe94eef1859.png
new file mode 100644
index 000000000..aa4207616
Binary files /dev/null and b/_preview/468/_images/1746da75f71a63ce67756bd3b591d6ec008a178e4e5e8cfa3ea22fe94eef1859.png differ
diff --git a/_preview/468/_images/18-review.png b/_preview/468/_images/18-review.png
new file mode 100644
index 000000000..626716a1a
Binary files /dev/null and b/_preview/468/_images/18-review.png differ
diff --git a/_preview/468/_images/1a998e55dc357396938e4582b50be14d160553e4bf972e6e4c6b9fed7c4b1afc.png b/_preview/468/_images/1a998e55dc357396938e4582b50be14d160553e4bf972e6e4c6b9fed7c4b1afc.png
new file mode 100644
index 000000000..aa3678dba
Binary files /dev/null and b/_preview/468/_images/1a998e55dc357396938e4582b50be14d160553e4bf972e6e4c6b9fed7c4b1afc.png differ
diff --git a/_preview/468/_images/1ae1aba7705f9f4c003ebc48f3b0f687a2031ed7f9d6cb8c8393bf159c6b7188.png b/_preview/468/_images/1ae1aba7705f9f4c003ebc48f3b0f687a2031ed7f9d6cb8c8393bf159c6b7188.png
new file mode 100644
index 000000000..fe080e5d8
Binary files /dev/null and b/_preview/468/_images/1ae1aba7705f9f4c003ebc48f3b0f687a2031ed7f9d6cb8c8393bf159c6b7188.png differ
diff --git a/_preview/468/_images/1b032d9a2feeffd58f4e3df5e582f2f3fc2710e05e1adbc47eb699b687e891d0.png b/_preview/468/_images/1b032d9a2feeffd58f4e3df5e582f2f3fc2710e05e1adbc47eb699b687e891d0.png
new file mode 100644
index 000000000..599675fe3
Binary files /dev/null and b/_preview/468/_images/1b032d9a2feeffd58f4e3df5e582f2f3fc2710e05e1adbc47eb699b687e891d0.png differ
diff --git a/_preview/468/_images/1d51c6d30cd93059141d8982c4f6bce9d3c7c7f9b234e1b359521763688676d8.png b/_preview/468/_images/1d51c6d30cd93059141d8982c4f6bce9d3c7c7f9b234e1b359521763688676d8.png
new file mode 100644
index 000000000..6c77ffb55
Binary files /dev/null and b/_preview/468/_images/1d51c6d30cd93059141d8982c4f6bce9d3c7c7f9b234e1b359521763688676d8.png differ
diff --git a/_preview/468/_images/1dbed8c14d7aad1a5b5210f39f76f6256fdb5b6e3ab323da00548fccb1f7b63d.png b/_preview/468/_images/1dbed8c14d7aad1a5b5210f39f76f6256fdb5b6e3ab323da00548fccb1f7b63d.png
new file mode 100644
index 000000000..5a18bd12a
Binary files /dev/null and b/_preview/468/_images/1dbed8c14d7aad1a5b5210f39f76f6256fdb5b6e3ab323da00548fccb1f7b63d.png differ
diff --git a/_preview/468/_images/1ef40d335857fa8fc92493dab3a12ea9523aac3b4715787e87137171cb91ef73.png b/_preview/468/_images/1ef40d335857fa8fc92493dab3a12ea9523aac3b4715787e87137171cb91ef73.png
new file mode 100644
index 000000000..7aa833de8
Binary files /dev/null and b/_preview/468/_images/1ef40d335857fa8fc92493dab3a12ea9523aac3b4715787e87137171cb91ef73.png differ
diff --git a/_preview/468/_images/2-gitremote.png b/_preview/468/_images/2-gitremote.png
new file mode 100644
index 000000000..004c982f6
Binary files /dev/null and b/_preview/468/_images/2-gitremote.png differ
diff --git a/_preview/468/_images/20-green.png b/_preview/468/_images/20-green.png
new file mode 100644
index 000000000..51068000e
Binary files /dev/null and b/_preview/468/_images/20-green.png differ
diff --git a/_preview/468/_images/20463bc2e4e1ae4668a6d55a52bdefd082e59f2e3bfddfdf24abf0ed52707e5f.png b/_preview/468/_images/20463bc2e4e1ae4668a6d55a52bdefd082e59f2e3bfddfdf24abf0ed52707e5f.png
new file mode 100644
index 000000000..4f3efeff0
Binary files /dev/null and b/_preview/468/_images/20463bc2e4e1ae4668a6d55a52bdefd082e59f2e3bfddfdf24abf0ed52707e5f.png differ
diff --git a/_preview/468/_images/2144c68ce4a8938698bd50dd6069cd783aec23349ffc4df739d1172fab24ba63.png b/_preview/468/_images/2144c68ce4a8938698bd50dd6069cd783aec23349ffc4df739d1172fab24ba63.png
new file mode 100644
index 000000000..7353c8d2d
Binary files /dev/null and b/_preview/468/_images/2144c68ce4a8938698bd50dd6069cd783aec23349ffc4df739d1172fab24ba63.png differ
diff --git a/_preview/468/_images/2205a843b54ab0c35306488c72a99f509358cf60171d0adbd3edb715c2b2772f.png b/_preview/468/_images/2205a843b54ab0c35306488c72a99f509358cf60171d0adbd3edb715c2b2772f.png
new file mode 100644
index 000000000..1e639a43f
Binary files /dev/null and b/_preview/468/_images/2205a843b54ab0c35306488c72a99f509358cf60171d0adbd3edb715c2b2772f.png differ
diff --git a/_preview/468/_images/22d9f31e171ccab942bee3b5ae111fe7b3a71f631f85bc6e456b939f6154758f.png b/_preview/468/_images/22d9f31e171ccab942bee3b5ae111fe7b3a71f631f85bc6e456b939f6154758f.png
new file mode 100644
index 000000000..29fa47408
Binary files /dev/null and b/_preview/468/_images/22d9f31e171ccab942bee3b5ae111fe7b3a71f631f85bc6e456b939f6154758f.png differ
diff --git a/_preview/468/_images/25d0791ad0dcad5bf2085dacc30c1bc926f2020414e24f2891624ddd420504bd.png b/_preview/468/_images/25d0791ad0dcad5bf2085dacc30c1bc926f2020414e24f2891624ddd420504bd.png
new file mode 100644
index 000000000..9d43f0385
Binary files /dev/null and b/_preview/468/_images/25d0791ad0dcad5bf2085dacc30c1bc926f2020414e24f2891624ddd420504bd.png differ
diff --git a/_preview/468/_images/296cbdce5356e063165c603b3530a6c74983f2041817c66b2d110a4483a00d7c.png b/_preview/468/_images/296cbdce5356e063165c603b3530a6c74983f2041817c66b2d110a4483a00d7c.png
new file mode 100644
index 000000000..701bf33d8
Binary files /dev/null and b/_preview/468/_images/296cbdce5356e063165c603b3530a6c74983f2041817c66b2d110a4483a00d7c.png differ
diff --git a/_preview/468/_images/3-gitbranch.png b/_preview/468/_images/3-gitbranch.png
new file mode 100644
index 000000000..96199c3e5
Binary files /dev/null and b/_preview/468/_images/3-gitbranch.png differ
diff --git a/_preview/468/_images/3039ddf02a56f5bb1c4e6145ab688f092cfaf947e48151e521c62c5fd385260d.png b/_preview/468/_images/3039ddf02a56f5bb1c4e6145ab688f092cfaf947e48151e521c62c5fd385260d.png
new file mode 100644
index 000000000..d5d707af9
Binary files /dev/null and b/_preview/468/_images/3039ddf02a56f5bb1c4e6145ab688f092cfaf947e48151e521c62c5fd385260d.png differ
diff --git a/_preview/468/_images/30d02597d36a90a91e936f5c1a842859a912a021928afd0b94ace719550ad001.png b/_preview/468/_images/30d02597d36a90a91e936f5c1a842859a912a021928afd0b94ace719550ad001.png
new file mode 100644
index 000000000..534ae322b
Binary files /dev/null and b/_preview/468/_images/30d02597d36a90a91e936f5c1a842859a912a021928afd0b94ace719550ad001.png differ
diff --git a/_preview/468/_images/320040cb622cfe3ab69bcf2a353ff5004ac957be88c6c3ec2f56d6be0ce3bbb1.png b/_preview/468/_images/320040cb622cfe3ab69bcf2a353ff5004ac957be88c6c3ec2f56d6be0ce3bbb1.png
new file mode 100644
index 000000000..47f7e8248
Binary files /dev/null and b/_preview/468/_images/320040cb622cfe3ab69bcf2a353ff5004ac957be88c6c3ec2f56d6be0ce3bbb1.png differ
diff --git a/_preview/468/_images/355ebe76fcf4c05c3ab90ba24afc728f7f5c48ee2b78d1b32348575b3e793f95.png b/_preview/468/_images/355ebe76fcf4c05c3ab90ba24afc728f7f5c48ee2b78d1b32348575b3e793f95.png
new file mode 100644
index 000000000..11859e20f
Binary files /dev/null and b/_preview/468/_images/355ebe76fcf4c05c3ab90ba24afc728f7f5c48ee2b78d1b32348575b3e793f95.png differ
diff --git a/_preview/468/_images/3b721d9b8404df33609396fee0dfbce4e8e3cd5082dbd28cc3466d32d8994246.png b/_preview/468/_images/3b721d9b8404df33609396fee0dfbce4e8e3cd5082dbd28cc3466d32d8994246.png
new file mode 100644
index 000000000..167665557
Binary files /dev/null and b/_preview/468/_images/3b721d9b8404df33609396fee0dfbce4e8e3cd5082dbd28cc3466d32d8994246.png differ
diff --git a/_preview/468/_images/3cf69c70f2646c79a40649eb5bb76a0ce4260a1fdc632a2828beceed66cdcc1b.png b/_preview/468/_images/3cf69c70f2646c79a40649eb5bb76a0ce4260a1fdc632a2828beceed66cdcc1b.png
new file mode 100644
index 000000000..e5bb707f7
Binary files /dev/null and b/_preview/468/_images/3cf69c70f2646c79a40649eb5bb76a0ce4260a1fdc632a2828beceed66cdcc1b.png differ
diff --git a/_preview/468/_images/4-gitnewbranch.png b/_preview/468/_images/4-gitnewbranch.png
new file mode 100644
index 000000000..2fcb59285
Binary files /dev/null and b/_preview/468/_images/4-gitnewbranch.png differ
diff --git a/_preview/468/_images/40ef0837c9537c236519f677ec0c7a8ee47229dbeb39b83db399efc70db3132a.png b/_preview/468/_images/40ef0837c9537c236519f677ec0c7a8ee47229dbeb39b83db399efc70db3132a.png
new file mode 100644
index 000000000..c6fa48952
Binary files /dev/null and b/_preview/468/_images/40ef0837c9537c236519f677ec0c7a8ee47229dbeb39b83db399efc70db3132a.png differ
diff --git a/_preview/468/_images/470f6f8431715056f1a7646b6b78ab6751eccb25f37d471854e689b222a44d63.png b/_preview/468/_images/470f6f8431715056f1a7646b6b78ab6751eccb25f37d471854e689b222a44d63.png
new file mode 100644
index 000000000..0abb9eb52
Binary files /dev/null and b/_preview/468/_images/470f6f8431715056f1a7646b6b78ab6751eccb25f37d471854e689b222a44d63.png differ
diff --git a/_preview/468/_images/49af1204a7bd9b5c86c97e5c40820421787a4e7d3808520d90d8c1f38bbabe52.png b/_preview/468/_images/49af1204a7bd9b5c86c97e5c40820421787a4e7d3808520d90d8c1f38bbabe52.png
new file mode 100644
index 000000000..6886a30d0
Binary files /dev/null and b/_preview/468/_images/49af1204a7bd9b5c86c97e5c40820421787a4e7d3808520d90d8c1f38bbabe52.png differ
diff --git a/_preview/468/_images/4ab1942c85eedfd46edda089377ea3fb0aeff64b36813c5fe50a9d7190fe57ae.png b/_preview/468/_images/4ab1942c85eedfd46edda089377ea3fb0aeff64b36813c5fe50a9d7190fe57ae.png
new file mode 100644
index 000000000..9b1403666
Binary files /dev/null and b/_preview/468/_images/4ab1942c85eedfd46edda089377ea3fb0aeff64b36813c5fe50a9d7190fe57ae.png differ
diff --git a/_preview/468/_images/4b77a6ed8a47cccc6a2d8d99ba0053be41da83ab475b952f2e7a7c9a68640781.png b/_preview/468/_images/4b77a6ed8a47cccc6a2d8d99ba0053be41da83ab475b952f2e7a7c9a68640781.png
new file mode 100644
index 000000000..5e3190476
Binary files /dev/null and b/_preview/468/_images/4b77a6ed8a47cccc6a2d8d99ba0053be41da83ab475b952f2e7a7c9a68640781.png differ
diff --git a/_preview/468/_images/4cb9cb157df99f05571cbec0e8bbe4a846f0cd4a58d01dcf5ff0e1240d2a3fe4.png b/_preview/468/_images/4cb9cb157df99f05571cbec0e8bbe4a846f0cd4a58d01dcf5ff0e1240d2a3fe4.png
new file mode 100644
index 000000000..f7754469e
Binary files /dev/null and b/_preview/468/_images/4cb9cb157df99f05571cbec0e8bbe4a846f0cd4a58d01dcf5ff0e1240d2a3fe4.png differ
diff --git a/_preview/468/_images/4cf89863004774898bc36fe84f43ab490c68886c3b9c81cc81185dc8b4718652.png b/_preview/468/_images/4cf89863004774898bc36fe84f43ab490c68886c3b9c81cc81185dc8b4718652.png
new file mode 100644
index 000000000..b71464058
Binary files /dev/null and b/_preview/468/_images/4cf89863004774898bc36fe84f43ab490c68886c3b9c81cc81185dc8b4718652.png differ
diff --git a/_preview/468/_images/4e5f13266498d98db134189acbcf7cf9f7244199fc3ae7490abc8491864cd31d.png b/_preview/468/_images/4e5f13266498d98db134189acbcf7cf9f7244199fc3ae7490abc8491864cd31d.png
new file mode 100644
index 000000000..254ef63d6
Binary files /dev/null and b/_preview/468/_images/4e5f13266498d98db134189acbcf7cf9f7244199fc3ae7490abc8491864cd31d.png differ
diff --git a/_preview/468/_images/5-gitcheckout.png b/_preview/468/_images/5-gitcheckout.png
new file mode 100644
index 000000000..fa5871a38
Binary files /dev/null and b/_preview/468/_images/5-gitcheckout.png differ
diff --git a/_preview/468/_images/5208f56fd3906f4ce6c46d551cbad6a93c7bc861a6a1aaa6a112eced706fa28d.png b/_preview/468/_images/5208f56fd3906f4ce6c46d551cbad6a93c7bc861a6a1aaa6a112eced706fa28d.png
new file mode 100644
index 000000000..7e5fc215c
Binary files /dev/null and b/_preview/468/_images/5208f56fd3906f4ce6c46d551cbad6a93c7bc861a6a1aaa6a112eced706fa28d.png differ
diff --git a/_preview/468/_images/54b84e29047a8bc63cad4c91d7cb7552d0c485cc58457a22e870f87124f45200.png b/_preview/468/_images/54b84e29047a8bc63cad4c91d7cb7552d0c485cc58457a22e870f87124f45200.png
new file mode 100644
index 000000000..06db5126d
Binary files /dev/null and b/_preview/468/_images/54b84e29047a8bc63cad4c91d7cb7552d0c485cc58457a22e870f87124f45200.png differ
diff --git a/_preview/468/_images/54c9c2e758d9c9914b4aca68aa5af780cefb566dcbcb5d2f3884af2c6821447d.png b/_preview/468/_images/54c9c2e758d9c9914b4aca68aa5af780cefb566dcbcb5d2f3884af2c6821447d.png
new file mode 100644
index 000000000..7bc6620f2
Binary files /dev/null and b/_preview/468/_images/54c9c2e758d9c9914b4aca68aa5af780cefb566dcbcb5d2f3884af2c6821447d.png differ
diff --git a/_preview/468/_images/57f39e3d0829df5b12fc9298b4722d64bea6b8ef6a3e0b911e26309374691f26.png b/_preview/468/_images/57f39e3d0829df5b12fc9298b4722d64bea6b8ef6a3e0b911e26309374691f26.png
new file mode 100644
index 000000000..2d2c080ce
Binary files /dev/null and b/_preview/468/_images/57f39e3d0829df5b12fc9298b4722d64bea6b8ef6a3e0b911e26309374691f26.png differ
diff --git a/_preview/468/_images/5af13513d573f8e31e87cc16e5c3de53825e3ded07dbe7b6c8b909192973a363.png b/_preview/468/_images/5af13513d573f8e31e87cc16e5c3de53825e3ded07dbe7b6c8b909192973a363.png
new file mode 100644
index 000000000..dc225d373
Binary files /dev/null and b/_preview/468/_images/5af13513d573f8e31e87cc16e5c3de53825e3ded07dbe7b6c8b909192973a363.png differ
diff --git a/_preview/468/_images/5bb7d5a92eb94d357fb3f6fd2ccb81fa5eaefec7c72c07d1647edbd16694de2a.png b/_preview/468/_images/5bb7d5a92eb94d357fb3f6fd2ccb81fa5eaefec7c72c07d1647edbd16694de2a.png
new file mode 100644
index 000000000..d91b58cd5
Binary files /dev/null and b/_preview/468/_images/5bb7d5a92eb94d357fb3f6fd2ccb81fa5eaefec7c72c07d1647edbd16694de2a.png differ
diff --git a/_preview/468/_images/6-samplechange.png b/_preview/468/_images/6-samplechange.png
new file mode 100644
index 000000000..82f395a43
Binary files /dev/null and b/_preview/468/_images/6-samplechange.png differ
diff --git a/_preview/468/_images/63267a3a0fccacf8aafa8eb8bb63695915b0fc12918ed369f769e26f838ce690.png b/_preview/468/_images/63267a3a0fccacf8aafa8eb8bb63695915b0fc12918ed369f769e26f838ce690.png
new file mode 100644
index 000000000..1f11bd30b
Binary files /dev/null and b/_preview/468/_images/63267a3a0fccacf8aafa8eb8bb63695915b0fc12918ed369f769e26f838ce690.png differ
diff --git a/_preview/468/_images/661e7c3c05775e866cd54ae3a6b4e59e1bb4f763fc3b2157d67d727da3ff75be.png b/_preview/468/_images/661e7c3c05775e866cd54ae3a6b4e59e1bb4f763fc3b2157d67d727da3ff75be.png
new file mode 100644
index 000000000..7100258b7
Binary files /dev/null and b/_preview/468/_images/661e7c3c05775e866cd54ae3a6b4e59e1bb4f763fc3b2157d67d727da3ff75be.png differ
diff --git a/_preview/468/_images/675482d9a10440aa4600e522fb10a144d002419b027cdc8870848ca3938f549b.png b/_preview/468/_images/675482d9a10440aa4600e522fb10a144d002419b027cdc8870848ca3938f549b.png
new file mode 100644
index 000000000..5a5f0880d
Binary files /dev/null and b/_preview/468/_images/675482d9a10440aa4600e522fb10a144d002419b027cdc8870848ca3938f549b.png differ
diff --git a/_preview/468/_images/6a-gitadd.png b/_preview/468/_images/6a-gitadd.png
new file mode 100644
index 000000000..7bc8c1a1b
Binary files /dev/null and b/_preview/468/_images/6a-gitadd.png differ
diff --git a/_preview/468/_images/6b-gitlog.png b/_preview/468/_images/6b-gitlog.png
new file mode 100644
index 000000000..3afabd9d8
Binary files /dev/null and b/_preview/468/_images/6b-gitlog.png differ
diff --git a/_preview/468/_images/6b18e8ed4f56996057c7d09f628c27c79dea0092ef2629cb1a8cc8630677a50f.png b/_preview/468/_images/6b18e8ed4f56996057c7d09f628c27c79dea0092ef2629cb1a8cc8630677a50f.png
new file mode 100644
index 000000000..b2df93fe2
Binary files /dev/null and b/_preview/468/_images/6b18e8ed4f56996057c7d09f628c27c79dea0092ef2629cb1a8cc8630677a50f.png differ
diff --git a/_preview/468/_images/6c-gitpush.png b/_preview/468/_images/6c-gitpush.png
new file mode 100644
index 000000000..c42bfefc5
Binary files /dev/null and b/_preview/468/_images/6c-gitpush.png differ
diff --git a/_preview/468/_images/6d-setupstream.png b/_preview/468/_images/6d-setupstream.png
new file mode 100644
index 000000000..f69688df5
Binary files /dev/null and b/_preview/468/_images/6d-setupstream.png differ
diff --git a/_preview/468/_images/6e570983369c5e29eb77847abfb5743a612e8e39cea91602438cd3d031964765.png b/_preview/468/_images/6e570983369c5e29eb77847abfb5743a612e8e39cea91602438cd3d031964765.png
new file mode 100644
index 000000000..0da822587
Binary files /dev/null and b/_preview/468/_images/6e570983369c5e29eb77847abfb5743a612e8e39cea91602438cd3d031964765.png differ
diff --git a/_preview/468/_images/7-github-branchandstatus.png b/_preview/468/_images/7-github-branchandstatus.png
new file mode 100644
index 000000000..7f81cc23b
Binary files /dev/null and b/_preview/468/_images/7-github-branchandstatus.png differ
diff --git a/_preview/468/_images/73dd509e9d84e947d7ce0db4a7d2adf1ab602e09d4451a82ca86dd254e887d18.png b/_preview/468/_images/73dd509e9d84e947d7ce0db4a7d2adf1ab602e09d4451a82ca86dd254e887d18.png
new file mode 100644
index 000000000..ca2e06e4c
Binary files /dev/null and b/_preview/468/_images/73dd509e9d84e947d7ce0db4a7d2adf1ab602e09d4451a82ca86dd254e887d18.png differ
diff --git a/_preview/468/_images/744a94cd827ce2feeb760ab94530647cc9beff3ee90db9bc3a8902c55d59c178.png b/_preview/468/_images/744a94cd827ce2feeb760ab94530647cc9beff3ee90db9bc3a8902c55d59c178.png
new file mode 100644
index 000000000..f4eed5616
Binary files /dev/null and b/_preview/468/_images/744a94cd827ce2feeb760ab94530647cc9beff3ee90db9bc3a8902c55d59c178.png differ
diff --git a/_preview/468/_images/76ce5aaf7d98647199d0c6b86ac85e8e6757e3e9d6742f6343658bcf1a12b8f4.png b/_preview/468/_images/76ce5aaf7d98647199d0c6b86ac85e8e6757e3e9d6742f6343658bcf1a12b8f4.png
new file mode 100644
index 000000000..758101789
Binary files /dev/null and b/_preview/468/_images/76ce5aaf7d98647199d0c6b86ac85e8e6757e3e9d6742f6343658bcf1a12b8f4.png differ
diff --git a/_preview/468/_images/76f463843718aeae7f084aa4667ea7980fb137239e7f5df3fe06388dcfcc3c2c.png b/_preview/468/_images/76f463843718aeae7f084aa4667ea7980fb137239e7f5df3fe06388dcfcc3c2c.png
new file mode 100644
index 000000000..c038f7eb1
Binary files /dev/null and b/_preview/468/_images/76f463843718aeae7f084aa4667ea7980fb137239e7f5df3fe06388dcfcc3c2c.png differ
diff --git a/_preview/468/_images/77258b7d02a7d3df8d52ca4db46041a0f5c50e9bb865bd0f57af0bb44d7aff57.png b/_preview/468/_images/77258b7d02a7d3df8d52ca4db46041a0f5c50e9bb865bd0f57af0bb44d7aff57.png
new file mode 100644
index 000000000..5ee3b4a70
Binary files /dev/null and b/_preview/468/_images/77258b7d02a7d3df8d52ca4db46041a0f5c50e9bb865bd0f57af0bb44d7aff57.png differ
diff --git a/_preview/468/_images/7d81b267a9c45161b7481e72af4f0904352ad1217c6be819ab66de1062d64e79.png b/_preview/468/_images/7d81b267a9c45161b7481e72af4f0904352ad1217c6be819ab66de1062d64e79.png
new file mode 100644
index 000000000..a28859c4d
Binary files /dev/null and b/_preview/468/_images/7d81b267a9c45161b7481e72af4f0904352ad1217c6be819ab66de1062d64e79.png differ
diff --git a/_preview/468/_images/8-github.png b/_preview/468/_images/8-github.png
new file mode 100644
index 000000000..2180ee7ca
Binary files /dev/null and b/_preview/468/_images/8-github.png differ
diff --git a/_preview/468/_images/83addc6377f43aba61e19c6df89c6b42919526ab8a1c2fa5993345fea976dc8c.png b/_preview/468/_images/83addc6377f43aba61e19c6df89c6b42919526ab8a1c2fa5993345fea976dc8c.png
new file mode 100644
index 000000000..95234f3cd
Binary files /dev/null and b/_preview/468/_images/83addc6377f43aba61e19c6df89c6b42919526ab8a1c2fa5993345fea976dc8c.png differ
diff --git a/_preview/468/_images/85725700baea61ddb4dd8d7175b493ae78c68cd51f6205d81a843d2329d07f74.png b/_preview/468/_images/85725700baea61ddb4dd8d7175b493ae78c68cd51f6205d81a843d2329d07f74.png
new file mode 100644
index 000000000..080313381
Binary files /dev/null and b/_preview/468/_images/85725700baea61ddb4dd8d7175b493ae78c68cd51f6205d81a843d2329d07f74.png differ
diff --git a/_preview/468/_images/9-github-seebranches.png b/_preview/468/_images/9-github-seebranches.png
new file mode 100644
index 000000000..ac2096b09
Binary files /dev/null and b/_preview/468/_images/9-github-seebranches.png differ
diff --git a/_preview/468/_images/902761e68b138260acda429d7099911a0461034f02a0772ba3e9656742c693f2.png b/_preview/468/_images/902761e68b138260acda429d7099911a0461034f02a0772ba3e9656742c693f2.png
new file mode 100644
index 000000000..c5ba55aa1
Binary files /dev/null and b/_preview/468/_images/902761e68b138260acda429d7099911a0461034f02a0772ba3e9656742c693f2.png differ
diff --git a/_preview/468/_images/95797f2525a11af3047757c51bd2a0a30bb587e5143e122dd4f6577724098652.png b/_preview/468/_images/95797f2525a11af3047757c51bd2a0a30bb587e5143e122dd4f6577724098652.png
new file mode 100644
index 000000000..30f0608e3
Binary files /dev/null and b/_preview/468/_images/95797f2525a11af3047757c51bd2a0a30bb587e5143e122dd4f6577724098652.png differ
diff --git a/_preview/468/_images/9b1981920854ab74787b0eba8f805517e868b1147a854e1ae7f1884e57027502.png b/_preview/468/_images/9b1981920854ab74787b0eba8f805517e868b1147a854e1ae7f1884e57027502.png
new file mode 100644
index 000000000..c90c9564c
Binary files /dev/null and b/_preview/468/_images/9b1981920854ab74787b0eba8f805517e868b1147a854e1ae7f1884e57027502.png differ
diff --git a/_preview/468/_images/9d3490460bc04608246c1a53e50ccf270dd1958f38ab07b6feef40a79f056cc9.png b/_preview/468/_images/9d3490460bc04608246c1a53e50ccf270dd1958f38ab07b6feef40a79f056cc9.png
new file mode 100644
index 000000000..709e01995
Binary files /dev/null and b/_preview/468/_images/9d3490460bc04608246c1a53e50ccf270dd1958f38ab07b6feef40a79f056cc9.png differ
diff --git a/_preview/468/_images/Anaconda.png b/_preview/468/_images/Anaconda.png
new file mode 100644
index 000000000..7cb61c0f2
Binary files /dev/null and b/_preview/468/_images/Anaconda.png differ
diff --git a/_preview/468/_images/Git-Logo-2Color.png b/_preview/468/_images/Git-Logo-2Color.png
new file mode 100644
index 000000000..18c5b29d7
Binary files /dev/null and b/_preview/468/_images/Git-Logo-2Color.png differ
diff --git a/_preview/468/_images/GitHub-logo.png b/_preview/468/_images/GitHub-logo.png
new file mode 100644
index 000000000..c9904ee88
Binary files /dev/null and b/_preview/468/_images/GitHub-logo.png differ
diff --git a/_preview/468/_images/GitHubContrChecks.png b/_preview/468/_images/GitHubContrChecks.png
new file mode 100644
index 000000000..9677152e9
Binary files /dev/null and b/_preview/468/_images/GitHubContrChecks.png differ
diff --git a/_preview/468/_images/GitHubContrFork.png b/_preview/468/_images/GitHubContrFork.png
new file mode 100644
index 000000000..ac6448e67
Binary files /dev/null and b/_preview/468/_images/GitHubContrFork.png differ
diff --git a/_preview/468/_images/GitHubContrJupyterLab.png b/_preview/468/_images/GitHubContrJupyterLab.png
new file mode 100644
index 000000000..577aaf911
Binary files /dev/null and b/_preview/468/_images/GitHubContrJupyterLab.png differ
diff --git a/_preview/468/_images/GitHubContrPR.png b/_preview/468/_images/GitHubContrPR.png
new file mode 100644
index 000000000..ad4a0cd4f
Binary files /dev/null and b/_preview/468/_images/GitHubContrPR.png differ
diff --git a/_preview/468/_images/GitHubContrXarray.png b/_preview/468/_images/GitHubContrXarray.png
new file mode 100644
index 000000000..54b132484
Binary files /dev/null and b/_preview/468/_images/GitHubContrXarray.png differ
diff --git a/_preview/468/_images/GitHubJoin.png b/_preview/468/_images/GitHubJoin.png
new file mode 100644
index 000000000..b92b9473f
Binary files /dev/null and b/_preview/468/_images/GitHubJoin.png differ
diff --git a/_preview/468/_images/GitHubNumPy.png b/_preview/468/_images/GitHubNumPy.png
new file mode 100644
index 000000000..e14a69c48
Binary files /dev/null and b/_preview/468/_images/GitHubNumPy.png differ
diff --git a/_preview/468/_images/GitHubPythiaDisc.png b/_preview/468/_images/GitHubPythiaDisc.png
new file mode 100644
index 000000000..cf401bc72
Binary files /dev/null and b/_preview/468/_images/GitHubPythiaDisc.png differ
diff --git a/_preview/468/_images/GitHubPythiaDisc156.png b/_preview/468/_images/GitHubPythiaDisc156.png
new file mode 100644
index 000000000..5c367c943
Binary files /dev/null and b/_preview/468/_images/GitHubPythiaDisc156.png differ
diff --git a/_preview/468/_images/GitHubPythiaIssue144.png b/_preview/468/_images/GitHubPythiaIssue144.png
new file mode 100644
index 000000000..90a9e7c87
Binary files /dev/null and b/_preview/468/_images/GitHubPythiaIssue144.png differ
diff --git a/_preview/468/_images/GitHubPythiaIssues.png b/_preview/468/_images/GitHubPythiaIssues.png
new file mode 100644
index 000000000..7f485f97e
Binary files /dev/null and b/_preview/468/_images/GitHubPythiaIssues.png differ
diff --git a/_preview/468/_images/GitHubPythiaIssuesClosed.png b/_preview/468/_images/GitHubPythiaIssuesClosed.png
new file mode 100644
index 000000000..4da651370
Binary files /dev/null and b/_preview/468/_images/GitHubPythiaIssuesClosed.png differ
diff --git a/_preview/468/_images/GitHubPython.png b/_preview/468/_images/GitHubPython.png
new file mode 100644
index 000000000..bc2c17241
Binary files /dev/null and b/_preview/468/_images/GitHubPython.png differ
diff --git a/_preview/468/_images/GitHubXarray.png b/_preview/468/_images/GitHubXarray.png
new file mode 100644
index 000000000..0aceb0f31
Binary files /dev/null and b/_preview/468/_images/GitHubXarray.png differ
diff --git a/_preview/468/_images/GitHub_CodeClone.png b/_preview/468/_images/GitHub_CodeClone.png
new file mode 100644
index 000000000..76f3583b2
Binary files /dev/null and b/_preview/468/_images/GitHub_CodeClone.png differ
diff --git a/_preview/468/_images/GitHub_CodeCloneHTTPS.png b/_preview/468/_images/GitHub_CodeCloneHTTPS.png
new file mode 100644
index 000000000..c3c495a74
Binary files /dev/null and b/_preview/468/_images/GitHub_CodeCloneHTTPS.png differ
diff --git a/_preview/468/_images/GitHub_Fork.png b/_preview/468/_images/GitHub_Fork.png
new file mode 100644
index 000000000..37e867d69
Binary files /dev/null and b/_preview/468/_images/GitHub_Fork.png differ
diff --git a/_preview/468/_images/GitHub_ForkBranch.png b/_preview/468/_images/GitHub_ForkBranch.png
new file mode 100644
index 000000000..46842a6db
Binary files /dev/null and b/_preview/468/_images/GitHub_ForkBranch.png differ
diff --git a/_preview/468/_images/GitHub_ForkDest.png b/_preview/468/_images/GitHub_ForkDest.png
new file mode 100644
index 000000000..6c9903b9a
Binary files /dev/null and b/_preview/468/_images/GitHub_ForkDest.png differ
diff --git a/_preview/468/_images/GitHub_ForkPost.png b/_preview/468/_images/GitHub_ForkPost.png
new file mode 100644
index 000000000..ac91196fd
Binary files /dev/null and b/_preview/468/_images/GitHub_ForkPost.png differ
diff --git a/_preview/468/_images/GitHub_RepoTools.png b/_preview/468/_images/GitHub_RepoTools.png
new file mode 100644
index 000000000..c1382ee3c
Binary files /dev/null and b/_preview/468/_images/GitHub_RepoTools.png differ
diff --git a/_preview/468/_images/GitHub_SandboxRepo.png b/_preview/468/_images/GitHub_SandboxRepo.png
new file mode 100644
index 000000000..23595ba10
Binary files /dev/null and b/_preview/468/_images/GitHub_SandboxRepo.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_Notification_Filter.png b/_preview/468/_images/GitHub_Setup_Advanced_Notification_Filter.png
new file mode 100644
index 000000000..f62a4feff
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_Notification_Filter.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_Notification_Settings.png b/_preview/468/_images/GitHub_Setup_Advanced_Notification_Settings.png
new file mode 100644
index 000000000..7bae89e20
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_Notification_Settings.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_Notifications.png b/_preview/468/_images/GitHub_Setup_Advanced_Notifications.png
new file mode 100644
index 000000000..e797f7720
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_Notifications.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_Notifications_Browser.png b/_preview/468/_images/GitHub_Setup_Advanced_Notifications_Browser.png
new file mode 100644
index 000000000..38bd48ff2
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_Notifications_Browser.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_Notifications_Unsubscribe.png b/_preview/468/_images/GitHub_Setup_Advanced_Notifications_Unsubscribe.png
new file mode 100644
index 000000000..b0e220199
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_Notifications_Unsubscribe.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_Unwatch.png b/_preview/468/_images/GitHub_Setup_Advanced_Unwatch.png
new file mode 100644
index 000000000..dce261fd2
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_Unwatch.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_Watch.png b/_preview/468/_images/GitHub_Setup_Advanced_Watch.png
new file mode 100644
index 000000000..509121004
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_Watch.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_Watch_All_Activity.png b/_preview/468/_images/GitHub_Setup_Advanced_Watch_All_Activity.png
new file mode 100644
index 000000000..d6caeefed
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_Watch_All_Activity.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_https_URL.png b/_preview/468/_images/GitHub_Setup_Advanced_https_URL.png
new file mode 100644
index 000000000..d13cd1b55
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_https_URL.png differ
diff --git a/_preview/468/_images/GitHub_Setup_Advanced_ssh_URL.png b/_preview/468/_images/GitHub_Setup_Advanced_ssh_URL.png
new file mode 100644
index 000000000..43b3767e7
Binary files /dev/null and b/_preview/468/_images/GitHub_Setup_Advanced_ssh_URL.png differ
diff --git a/_preview/468/_images/NSF-NCAR_Lockup-UCAR-Dark_102523.svg b/_preview/468/_images/NSF-NCAR_Lockup-UCAR-Dark_102523.svg
new file mode 100644
index 000000000..538f8ec9f
--- /dev/null
+++ b/_preview/468/_images/NSF-NCAR_Lockup-UCAR-Dark_102523.svg
@@ -0,0 +1 @@
+
diff --git a/_preview/468/_images/ProjectPythia_Logo_Final-01-Blue.svg b/_preview/468/_images/ProjectPythia_Logo_Final-01-Blue.svg
new file mode 100644
index 000000000..961efc26a
--- /dev/null
+++ b/_preview/468/_images/ProjectPythia_Logo_Final-01-Blue.svg
@@ -0,0 +1 @@
+
diff --git a/_preview/468/_images/ProjectPythia_Logo_Final-01-Blue1.svg b/_preview/468/_images/ProjectPythia_Logo_Final-01-Blue1.svg
new file mode 100644
index 000000000..961efc26a
--- /dev/null
+++ b/_preview/468/_images/ProjectPythia_Logo_Final-01-Blue1.svg
@@ -0,0 +1 @@
+
diff --git a/_preview/468/_images/UAlbany-A2-logo-purple-gold.svg b/_preview/468/_images/UAlbany-A2-logo-purple-gold.svg
new file mode 100644
index 000000000..4fdfe3a8e
--- /dev/null
+++ b/_preview/468/_images/UAlbany-A2-logo-purple-gold.svg
@@ -0,0 +1,1125 @@
+
+
diff --git a/_preview/468/_images/Unidata_logo_horizontal_1200x300.svg b/_preview/468/_images/Unidata_logo_horizontal_1200x300.svg
new file mode 100644
index 000000000..0d9fd70fd
--- /dev/null
+++ b/_preview/468/_images/Unidata_logo_horizontal_1200x300.svg
@@ -0,0 +1,891 @@
+
+
diff --git a/_preview/468/_images/XarrayGithub.png b/_preview/468/_images/XarrayGithub.png
new file mode 100644
index 000000000..d8cc98a66
Binary files /dev/null and b/_preview/468/_images/XarrayGithub.png differ
diff --git a/_preview/468/_images/a1c3799ae40e9796735d776702ceccbc91dfba561a3b9630394b5b85186a488e.png b/_preview/468/_images/a1c3799ae40e9796735d776702ceccbc91dfba561a3b9630394b5b85186a488e.png
new file mode 100644
index 000000000..a37723e2f
Binary files /dev/null and b/_preview/468/_images/a1c3799ae40e9796735d776702ceccbc91dfba561a3b9630394b5b85186a488e.png differ
diff --git a/_preview/468/_images/a39dc4df0a70f1b25137b97d2a2f6f190d7bd31cbf9522d0bd3012b8bef57228.png b/_preview/468/_images/a39dc4df0a70f1b25137b97d2a2f6f190d7bd31cbf9522d0bd3012b8bef57228.png
new file mode 100644
index 000000000..cdaf01864
Binary files /dev/null and b/_preview/468/_images/a39dc4df0a70f1b25137b97d2a2f6f190d7bd31cbf9522d0bd3012b8bef57228.png differ
diff --git a/_preview/468/_images/a83652b1b139a20f0892173c55fac81ee65a385747d9a7a23d087b24d03e2941.png b/_preview/468/_images/a83652b1b139a20f0892173c55fac81ee65a385747d9a7a23d087b24d03e2941.png
new file mode 100644
index 000000000..c0b9c8b34
Binary files /dev/null and b/_preview/468/_images/a83652b1b139a20f0892173c55fac81ee65a385747d9a7a23d087b24d03e2941.png differ
diff --git a/_preview/468/_images/a88ffbf571233e1cab406cf084d24a9ebd7786c66d354437685d413356ae7dc8.png b/_preview/468/_images/a88ffbf571233e1cab406cf084d24a9ebd7786c66d354437685d413356ae7dc8.png
new file mode 100644
index 000000000..7e93ba11b
Binary files /dev/null and b/_preview/468/_images/a88ffbf571233e1cab406cf084d24a9ebd7786c66d354437685d413356ae7dc8.png differ
diff --git a/_preview/468/_images/a8d4a276a13612ceddb7f36f67596005b00ce9a13a49e6bdd2469b33c9b0843b.png b/_preview/468/_images/a8d4a276a13612ceddb7f36f67596005b00ce9a13a49e6bdd2469b33c9b0843b.png
new file mode 100644
index 000000000..f76ba67ad
Binary files /dev/null and b/_preview/468/_images/a8d4a276a13612ceddb7f36f67596005b00ce9a13a49e6bdd2469b33c9b0843b.png differ
diff --git a/_preview/468/_images/a95c64364491de03c669d4e365813e39c907d610292e31c1c911d8278caf3a68.png b/_preview/468/_images/a95c64364491de03c669d4e365813e39c907d610292e31c1c911d8278caf3a68.png
new file mode 100644
index 000000000..380d1a8ec
Binary files /dev/null and b/_preview/468/_images/a95c64364491de03c669d4e365813e39c907d610292e31c1c911d8278caf3a68.png differ
diff --git a/_preview/468/_images/aa812187559e2c138eb60bf62d0e3ffb652cbda1c5c65d3235949bd1e9dc14b1.png b/_preview/468/_images/aa812187559e2c138eb60bf62d0e3ffb652cbda1c5c65d3235949bd1e9dc14b1.png
new file mode 100644
index 000000000..070fed16e
Binary files /dev/null and b/_preview/468/_images/aa812187559e2c138eb60bf62d0e3ffb652cbda1c5c65d3235949bd1e9dc14b1.png differ
diff --git a/_preview/468/_images/af16b2a1f868e02fa589823748113e780755bfed390d2b6c665871f6d1289a6e.png b/_preview/468/_images/af16b2a1f868e02fa589823748113e780755bfed390d2b6c665871f6d1289a6e.png
new file mode 100644
index 000000000..c131d2a84
Binary files /dev/null and b/_preview/468/_images/af16b2a1f868e02fa589823748113e780755bfed390d2b6c665871f6d1289a6e.png differ
diff --git a/_preview/468/_images/array_index.png b/_preview/468/_images/array_index.png
new file mode 100644
index 000000000..d1da26020
Binary files /dev/null and b/_preview/468/_images/array_index.png differ
diff --git a/_preview/468/_images/ba648607e4e0a8597f10c3fe24c4394a2d156e5b5dda85fda98aee901995871d.png b/_preview/468/_images/ba648607e4e0a8597f10c3fe24c4394a2d156e5b5dda85fda98aee901995871d.png
new file mode 100644
index 000000000..3679b9a4e
Binary files /dev/null and b/_preview/468/_images/ba648607e4e0a8597f10c3fe24c4394a2d156e5b5dda85fda98aee901995871d.png differ
diff --git a/_preview/468/_images/bb3882fd0469c9f1b9d26ca957db8f6088c8b757f90980f677c67170d39b99e2.png b/_preview/468/_images/bb3882fd0469c9f1b9d26ca957db8f6088c8b757f90980f677c67170d39b99e2.png
new file mode 100644
index 000000000..0b435d1de
Binary files /dev/null and b/_preview/468/_images/bb3882fd0469c9f1b9d26ca957db8f6088c8b757f90980f677c67170d39b99e2.png differ
diff --git a/_preview/468/_images/bbb3c552653e8e8a68c1b27bae480b9404021d4fc4d0cebbdc172140e80dbd7b.png b/_preview/468/_images/bbb3c552653e8e8a68c1b27bae480b9404021d4fc4d0cebbdc172140e80dbd7b.png
new file mode 100644
index 000000000..0c54b8b20
Binary files /dev/null and b/_preview/468/_images/bbb3c552653e8e8a68c1b27bae480b9404021d4fc4d0cebbdc172140e80dbd7b.png differ
diff --git a/_preview/468/_images/bc1de77ab4b6647d3490d41f003e9efea5a7dce7cc12f152c9b1f1e615daf36c.png b/_preview/468/_images/bc1de77ab4b6647d3490d41f003e9efea5a7dce7cc12f152c9b1f1e615daf36c.png
new file mode 100644
index 000000000..35998426c
Binary files /dev/null and b/_preview/468/_images/bc1de77ab4b6647d3490d41f003e9efea5a7dce7cc12f152c9b1f1e615daf36c.png differ
diff --git a/_preview/468/_images/bc39ec80fee868b87317c071ae9c36dff1c3d39c703a0422d5cf2d7760de77eb.png b/_preview/468/_images/bc39ec80fee868b87317c071ae9c36dff1c3d39c703a0422d5cf2d7760de77eb.png
new file mode 100644
index 000000000..701537682
Binary files /dev/null and b/_preview/468/_images/bc39ec80fee868b87317c071ae9c36dff1c3d39c703a0422d5cf2d7760de77eb.png differ
diff --git a/_preview/468/_images/bda2adbcab335752fa6691a757c16ee3eacbbbdfb0241e2cf6a5a006e8e722be.png b/_preview/468/_images/bda2adbcab335752fa6691a757c16ee3eacbbbdfb0241e2cf6a5a006e8e722be.png
new file mode 100644
index 000000000..4b6ae4800
Binary files /dev/null and b/_preview/468/_images/bda2adbcab335752fa6691a757c16ee3eacbbbdfb0241e2cf6a5a006e8e722be.png differ
diff --git a/_preview/468/_images/be92652e8d668c6cecc563e83a224badac5167a43e15f9df1850340a0c4b1190.png b/_preview/468/_images/be92652e8d668c6cecc563e83a224badac5167a43e15f9df1850340a0c4b1190.png
new file mode 100644
index 000000000..0990660bc
Binary files /dev/null and b/_preview/468/_images/be92652e8d668c6cecc563e83a224badac5167a43e15f9df1850340a0c4b1190.png differ
diff --git a/_preview/468/_images/binder-highlight.png b/_preview/468/_images/binder-highlight.png
new file mode 100644
index 000000000..e52b2ab00
Binary files /dev/null and b/_preview/468/_images/binder-highlight.png differ
diff --git a/_preview/468/_images/branching.gif b/_preview/468/_images/branching.gif
new file mode 100644
index 000000000..31e66bdd0
Binary files /dev/null and b/_preview/468/_images/branching.gif differ
diff --git a/_preview/468/_images/c.png b/_preview/468/_images/c.png
new file mode 100644
index 000000000..913d56f11
Binary files /dev/null and b/_preview/468/_images/c.png differ
diff --git a/_preview/468/_images/c3a5d7cb21215b2af7858c18d99d0f7a103cfbd385ec8abdce018a664f783d96.png b/_preview/468/_images/c3a5d7cb21215b2af7858c18d99d0f7a103cfbd385ec8abdce018a664f783d96.png
new file mode 100644
index 000000000..83364ecbe
Binary files /dev/null and b/_preview/468/_images/c3a5d7cb21215b2af7858c18d99d0f7a103cfbd385ec8abdce018a664f783d96.png differ
diff --git a/_preview/468/_images/c72da04b1bfe3a506395391dafe24ac8521e9cf45f986769fdc135046116258b.png b/_preview/468/_images/c72da04b1bfe3a506395391dafe24ac8521e9cf45f986769fdc135046116258b.png
new file mode 100644
index 000000000..ba53c4add
Binary files /dev/null and b/_preview/468/_images/c72da04b1bfe3a506395391dafe24ac8521e9cf45f986769fdc135046116258b.png differ
diff --git a/_preview/468/_images/caf93b42a7330563cb488018fff85a8cb1fef4526fde9351f6620316599f258c.png b/_preview/468/_images/caf93b42a7330563cb488018fff85a8cb1fef4526fde9351f6620316599f258c.png
new file mode 100644
index 000000000..b9e5b3bb5
Binary files /dev/null and b/_preview/468/_images/caf93b42a7330563cb488018fff85a8cb1fef4526fde9351f6620316599f258c.png differ
diff --git a/_preview/468/_images/cartopy_logo.png b/_preview/468/_images/cartopy_logo.png
new file mode 100644
index 000000000..6533e45d5
Binary files /dev/null and b/_preview/468/_images/cartopy_logo.png differ
diff --git a/_preview/468/_images/cca07a6e3939834018b680d960904b8bd955f56435c9f91392b31351f3b130f4.png b/_preview/468/_images/cca07a6e3939834018b680d960904b8bd955f56435c9f91392b31351f3b130f4.png
new file mode 100644
index 000000000..fdacb4673
Binary files /dev/null and b/_preview/468/_images/cca07a6e3939834018b680d960904b8bd955f56435c9f91392b31351f3b130f4.png differ
diff --git a/_preview/468/_images/cff85267ac822235de65b62d030988c5120d7670de42cf0e19b196cd17e9b95d.png b/_preview/468/_images/cff85267ac822235de65b62d030988c5120d7670de42cf0e19b196cd17e9b95d.png
new file mode 100644
index 000000000..3f0f0c9d8
Binary files /dev/null and b/_preview/468/_images/cff85267ac822235de65b62d030988c5120d7670de42cf0e19b196cd17e9b95d.png differ
diff --git a/_preview/468/_images/codecells.png b/_preview/468/_images/codecells.png
new file mode 100644
index 000000000..f6cd8774d
Binary files /dev/null and b/_preview/468/_images/codecells.png differ
diff --git a/_preview/468/_images/console.png b/_preview/468/_images/console.png
new file mode 100644
index 000000000..41d7e88d8
Binary files /dev/null and b/_preview/468/_images/console.png differ
diff --git a/_preview/468/_images/cyclic.png b/_preview/468/_images/cyclic.png
new file mode 100644
index 000000000..4d2704e5d
Binary files /dev/null and b/_preview/468/_images/cyclic.png differ
diff --git a/_preview/468/_images/d.png b/_preview/468/_images/d.png
new file mode 100644
index 000000000..8045141f2
Binary files /dev/null and b/_preview/468/_images/d.png differ
diff --git a/_preview/468/_images/d220a63f038b9f4b30db4f2ec22815ec0186aa89f941c3332531850b13d0431c.png b/_preview/468/_images/d220a63f038b9f4b30db4f2ec22815ec0186aa89f941c3332531850b13d0431c.png
new file mode 100644
index 000000000..c96de0bdc
Binary files /dev/null and b/_preview/468/_images/d220a63f038b9f4b30db4f2ec22815ec0186aa89f941c3332531850b13d0431c.png differ
diff --git a/_preview/468/_images/da0c0450dfe16164c6f246f4eb896b210f98a2c0ffc8ed8cd55dcd932af3fe09.png b/_preview/468/_images/da0c0450dfe16164c6f246f4eb896b210f98a2c0ffc8ed8cd55dcd932af3fe09.png
new file mode 100644
index 000000000..6e08c6cb8
Binary files /dev/null and b/_preview/468/_images/da0c0450dfe16164c6f246f4eb896b210f98a2c0ffc8ed8cd55dcd932af3fe09.png differ
diff --git a/_preview/468/_images/dask_horizontal.svg b/_preview/468/_images/dask_horizontal.svg
new file mode 100644
index 000000000..868fcfa34
--- /dev/null
+++ b/_preview/468/_images/dask_horizontal.svg
@@ -0,0 +1,13 @@
+
diff --git a/_preview/468/_images/dbdb22ebebfd039cdd491563bb58f0d5d9e864ff0cb846bf696aa84be7cce4c2.png b/_preview/468/_images/dbdb22ebebfd039cdd491563bb58f0d5d9e864ff0cb846bf696aa84be7cce4c2.png
new file mode 100644
index 000000000..bb506c58d
Binary files /dev/null and b/_preview/468/_images/dbdb22ebebfd039cdd491563bb58f0d5d9e864ff0cb846bf696aa84be7cce4c2.png differ
diff --git a/_preview/468/_images/deletingbranch.gif b/_preview/468/_images/deletingbranch.gif
new file mode 100644
index 000000000..f290dd039
Binary files /dev/null and b/_preview/468/_images/deletingbranch.gif differ
diff --git a/_preview/468/_images/diverging.png b/_preview/468/_images/diverging.png
new file mode 100644
index 000000000..887c69121
Binary files /dev/null and b/_preview/468/_images/diverging.png differ
diff --git a/_preview/468/_images/e2c548a09c90fba6075b5ce821a002845513223ba09f7aa7178dc817a1e89ba4.png b/_preview/468/_images/e2c548a09c90fba6075b5ce821a002845513223ba09f7aa7178dc817a1e89ba4.png
new file mode 100644
index 000000000..525617ca2
Binary files /dev/null and b/_preview/468/_images/e2c548a09c90fba6075b5ce821a002845513223ba09f7aa7178dc817a1e89ba4.png differ
diff --git a/_preview/468/_images/e686fffe535db4b54576f28d0dd4eef4cd5dedb9c0c5b9d3390ae7f1d909deba.png b/_preview/468/_images/e686fffe535db4b54576f28d0dd4eef4cd5dedb9c0c5b9d3390ae7f1d909deba.png
new file mode 100644
index 000000000..1c3ae85b7
Binary files /dev/null and b/_preview/468/_images/e686fffe535db4b54576f28d0dd4eef4cd5dedb9c0c5b9d3390ae7f1d909deba.png differ
diff --git a/_preview/468/_images/e80681079b231135b15ac59f11233357b279b29c002d04e31b20fe5d3f75b0e0.png b/_preview/468/_images/e80681079b231135b15ac59f11233357b279b29c002d04e31b20fe5d3f75b0e0.png
new file mode 100644
index 000000000..52b10cd5e
Binary files /dev/null and b/_preview/468/_images/e80681079b231135b15ac59f11233357b279b29c002d04e31b20fe5d3f75b0e0.png differ
diff --git a/_preview/468/_images/e8d7e7d2fcc348a999ece6dd661226910adaeed362e5c4c21f2f6687daa3e77d.png b/_preview/468/_images/e8d7e7d2fcc348a999ece6dd661226910adaeed362e5c4c21f2f6687daa3e77d.png
new file mode 100644
index 000000000..9742ab581
Binary files /dev/null and b/_preview/468/_images/e8d7e7d2fcc348a999ece6dd661226910adaeed362e5c4c21f2f6687daa3e77d.png differ
diff --git a/_preview/468/_images/e9747d6abc546d9b585d536545507a501835a95f1570640178434763126418a1.png b/_preview/468/_images/e9747d6abc546d9b585d536545507a501835a95f1570640178434763126418a1.png
new file mode 100644
index 000000000..8994bbb1c
Binary files /dev/null and b/_preview/468/_images/e9747d6abc546d9b585d536545507a501835a95f1570640178434763126418a1.png differ
diff --git a/_preview/468/_images/e9e07fc7d52a0afdbb4c1769987bdeca7caf514fc915387ff86dd3b58c248a74.png b/_preview/468/_images/e9e07fc7d52a0afdbb4c1769987bdeca7caf514fc915387ff86dd3b58c248a74.png
new file mode 100644
index 000000000..ffca4ea79
Binary files /dev/null and b/_preview/468/_images/e9e07fc7d52a0afdbb4c1769987bdeca7caf514fc915387ff86dd3b58c248a74.png differ
diff --git a/_preview/468/_images/ed8814928ce6dbf5a9d8975e0a62c96a8122736eb1abbc6897539cdd4b5bb76a.png b/_preview/468/_images/ed8814928ce6dbf5a9d8975e0a62c96a8122736eb1abbc6897539cdd4b5bb76a.png
new file mode 100644
index 000000000..06968494c
Binary files /dev/null and b/_preview/468/_images/ed8814928ce6dbf5a9d8975e0a62c96a8122736eb1abbc6897539cdd4b5bb76a.png differ
diff --git a/_preview/468/_images/eec329ef6c890c15078df77034cc328d557fa5ac232e3113f2e4bef281783c95.png b/_preview/468/_images/eec329ef6c890c15078df77034cc328d557fa5ac232e3113f2e4bef281783c95.png
new file mode 100644
index 000000000..e707a7ef3
Binary files /dev/null and b/_preview/468/_images/eec329ef6c890c15078df77034cc328d557fa5ac232e3113f2e4bef281783c95.png differ
diff --git a/_preview/468/_images/extensions.png b/_preview/468/_images/extensions.png
new file mode 100644
index 000000000..e0bdca191
Binary files /dev/null and b/_preview/468/_images/extensions.png differ
diff --git a/_preview/468/_images/f98b90b6eccc24379d8362dfabd794860f1c92c1998078ba525a9e0874d7d542.png b/_preview/468/_images/f98b90b6eccc24379d8362dfabd794860f1c92c1998078ba525a9e0874d7d542.png
new file mode 100644
index 000000000..a1a1f7fae
Binary files /dev/null and b/_preview/468/_images/f98b90b6eccc24379d8362dfabd794860f1c92c1998078ba525a9e0874d7d542.png differ
diff --git a/_preview/468/_images/feef657b828ea7f5bcaa6b7f94aef496c68635c615312e1d8a39e9f9effb337f.png b/_preview/468/_images/feef657b828ea7f5bcaa6b7f94aef496c68635c615312e1d8a39e9f9effb337f.png
new file mode 100644
index 000000000..66c7d5b70
Binary files /dev/null and b/_preview/468/_images/feef657b828ea7f5bcaa6b7f94aef496c68635c615312e1d8a39e9f9effb337f.png differ
diff --git a/_preview/468/_images/foundations_diagram.png b/_preview/468/_images/foundations_diagram.png
new file mode 100644
index 000000000..d0aae8bcf
Binary files /dev/null and b/_preview/468/_images/foundations_diagram.png differ
diff --git a/_preview/468/_images/github-clone-fork.png b/_preview/468/_images/github-clone-fork.png
new file mode 100644
index 000000000..b2cca525c
Binary files /dev/null and b/_preview/468/_images/github-clone-fork.png differ
diff --git a/_preview/468/_images/github-repos.png b/_preview/468/_images/github-repos.png
new file mode 100644
index 000000000..127a2f658
Binary files /dev/null and b/_preview/468/_images/github-repos.png differ
diff --git a/_preview/468/_images/gitworkflow.gif b/_preview/468/_images/gitworkflow.gif
new file mode 100644
index 000000000..af453f033
Binary files /dev/null and b/_preview/468/_images/gitworkflow.gif differ
diff --git a/_preview/468/_images/hsv2gray.png b/_preview/468/_images/hsv2gray.png
new file mode 100644
index 000000000..12287ac17
Binary files /dev/null and b/_preview/468/_images/hsv2gray.png differ
diff --git a/_preview/468/_images/interface_labeled.png b/_preview/468/_images/interface_labeled.png
new file mode 100644
index 000000000..59deea746
Binary files /dev/null and b/_preview/468/_images/interface_labeled.png differ
diff --git a/_preview/468/_images/jupyter_gui.png b/_preview/468/_images/jupyter_gui.png
new file mode 100644
index 000000000..8ac5dfe39
Binary files /dev/null and b/_preview/468/_images/jupyter_gui.png differ
diff --git a/_preview/468/_images/local-execution-model.gif b/_preview/468/_images/local-execution-model.gif
new file mode 100644
index 000000000..795c417e9
Binary files /dev/null and b/_preview/468/_images/local-execution-model.gif differ
diff --git a/_preview/468/_images/m.png b/_preview/468/_images/m.png
new file mode 100644
index 000000000..bd636a62d
Binary files /dev/null and b/_preview/468/_images/m.png differ
diff --git a/_preview/468/_images/magics.png b/_preview/468/_images/magics.png
new file mode 100644
index 000000000..65b74c5e9
Binary files /dev/null and b/_preview/468/_images/magics.png differ
diff --git a/_preview/468/_images/markdown.png b/_preview/468/_images/markdown.png
new file mode 100644
index 000000000..12ad4b4a6
Binary files /dev/null and b/_preview/468/_images/markdown.png differ
diff --git a/_preview/468/_images/markdown_eq.png b/_preview/468/_images/markdown_eq.png
new file mode 100644
index 000000000..9314fb2b3
Binary files /dev/null and b/_preview/468/_images/markdown_eq.png differ
diff --git a/_preview/468/_images/markdown_eq_inline.png b/_preview/468/_images/markdown_eq_inline.png
new file mode 100644
index 000000000..810e931b7
Binary files /dev/null and b/_preview/468/_images/markdown_eq_inline.png differ
diff --git a/_preview/468/_images/misc.png b/_preview/468/_images/misc.png
new file mode 100644
index 000000000..33814e727
Binary files /dev/null and b/_preview/468/_images/misc.png differ
diff --git a/_preview/468/_images/mysci.png b/_preview/468/_images/mysci.png
new file mode 100644
index 000000000..ffd37a733
Binary files /dev/null and b/_preview/468/_images/mysci.png differ
diff --git a/_preview/468/_images/notebook-interface_labeled.png b/_preview/468/_images/notebook-interface_labeled.png
new file mode 100644
index 000000000..0525c649d
Binary files /dev/null and b/_preview/468/_images/notebook-interface_labeled.png differ
diff --git a/_preview/468/_images/perceptually-sequential.png b/_preview/468/_images/perceptually-sequential.png
new file mode 100644
index 000000000..fadd85c28
Binary files /dev/null and b/_preview/468/_images/perceptually-sequential.png differ
diff --git a/_preview/468/_images/pretty-earth.png b/_preview/468/_images/pretty-earth.png
new file mode 100644
index 000000000..96de175fa
Binary files /dev/null and b/_preview/468/_images/pretty-earth.png differ
diff --git a/_preview/468/_images/ps.png b/_preview/468/_images/ps.png
new file mode 100644
index 000000000..381a1a976
Binary files /dev/null and b/_preview/468/_images/ps.png differ
diff --git a/_preview/468/_images/pulling.gif b/_preview/468/_images/pulling.gif
new file mode 100644
index 000000000..20389de00
Binary files /dev/null and b/_preview/468/_images/pulling.gif differ
diff --git a/_preview/468/_images/pullrequest.gif b/_preview/468/_images/pullrequest.gif
new file mode 100644
index 000000000..c8cb2cebb
Binary files /dev/null and b/_preview/468/_images/pullrequest.gif differ
diff --git a/_preview/468/_images/pushing.gif b/_preview/468/_images/pushing.gif
new file mode 100644
index 000000000..a09edf5e2
Binary files /dev/null and b/_preview/468/_images/pushing.gif differ
diff --git a/_preview/468/_images/qualitative.png b/_preview/468/_images/qualitative.png
new file mode 100644
index 000000000..2e0c25793
Binary files /dev/null and b/_preview/468/_images/qualitative.png differ
diff --git a/_preview/468/_images/raw.png b/_preview/468/_images/raw.png
new file mode 100644
index 000000000..9d50a34a8
Binary files /dev/null and b/_preview/468/_images/raw.png differ
diff --git a/_preview/468/_images/remote-execution-model.gif b/_preview/468/_images/remote-execution-model.gif
new file mode 100644
index 000000000..f9cdb7119
Binary files /dev/null and b/_preview/468/_images/remote-execution-model.gif differ
diff --git a/_preview/468/_images/review-approve.png b/_preview/468/_images/review-approve.png
new file mode 100644
index 000000000..d48e19c9a
Binary files /dev/null and b/_preview/468/_images/review-approve.png differ
diff --git a/_preview/468/_images/review-fileschanged.png b/_preview/468/_images/review-fileschanged.png
new file mode 100644
index 000000000..b50c326bd
Binary files /dev/null and b/_preview/468/_images/review-fileschanged.png differ
diff --git a/_preview/468/_images/review-inline.png b/_preview/468/_images/review-inline.png
new file mode 100644
index 000000000..076181e1b
Binary files /dev/null and b/_preview/468/_images/review-inline.png differ
diff --git a/_preview/468/_images/review-request.png b/_preview/468/_images/review-request.png
new file mode 100644
index 000000000..72f035b02
Binary files /dev/null and b/_preview/468/_images/review-request.png differ
diff --git a/_preview/468/_images/running-tabs-kernels.png b/_preview/468/_images/running-tabs-kernels.png
new file mode 100644
index 000000000..f3c63f93d
Binary files /dev/null and b/_preview/468/_images/running-tabs-kernels.png differ
diff --git a/_preview/468/_images/s1.png b/_preview/468/_images/s1.png
new file mode 100644
index 000000000..738f6a059
Binary files /dev/null and b/_preview/468/_images/s1.png differ
diff --git a/_preview/468/_images/s2.png b/_preview/468/_images/s2.png
new file mode 100644
index 000000000..c47fc1361
Binary files /dev/null and b/_preview/468/_images/s2.png differ
diff --git a/_preview/468/_images/sequential.png b/_preview/468/_images/sequential.png
new file mode 100644
index 000000000..5730a3e82
Binary files /dev/null and b/_preview/468/_images/sequential.png differ
diff --git a/_preview/468/_images/sequential2.png b/_preview/468/_images/sequential2.png
new file mode 100644
index 000000000..162545b58
Binary files /dev/null and b/_preview/468/_images/sequential2.png differ
diff --git a/_preview/468/_images/special_vars.png b/_preview/468/_images/special_vars.png
new file mode 100644
index 000000000..72812f4dc
Binary files /dev/null and b/_preview/468/_images/special_vars.png differ
diff --git a/_preview/468/_images/suggestion.png b/_preview/468/_images/suggestion.png
new file mode 100644
index 000000000..fe15a3f03
Binary files /dev/null and b/_preview/468/_images/suggestion.png differ
diff --git a/_preview/468/_images/table-contents.png b/_preview/468/_images/table-contents.png
new file mode 100644
index 000000000..24dbd675b
Binary files /dev/null and b/_preview/468/_images/table-contents.png differ
diff --git a/_preview/468/_images/terminal.png b/_preview/468/_images/terminal.png
new file mode 100644
index 000000000..30693eae2
Binary files /dev/null and b/_preview/468/_images/terminal.png differ
diff --git a/_preview/468/_images/txt-editor.png b/_preview/468/_images/txt-editor.png
new file mode 100644
index 000000000..07ff3d668
Binary files /dev/null and b/_preview/468/_images/txt-editor.png differ
diff --git a/_preview/468/_images/xarray-split-apply-combine.jpeg b/_preview/468/_images/xarray-split-apply-combine.jpeg
new file mode 100644
index 000000000..25d4b84de
Binary files /dev/null and b/_preview/468/_images/xarray-split-apply-combine.jpeg differ
diff --git a/_preview/468/_sources/appendix/how-to-contribute.md b/_preview/468/_sources/appendix/how-to-contribute.md
new file mode 100644
index 000000000..f001d1e8b
--- /dev/null
+++ b/_preview/468/_sources/appendix/how-to-contribute.md
@@ -0,0 +1,70 @@
+# Pythia Foundations Contributor's Guide
+
+```{note}
+This content is under construction!
+```
+
+General information on how to contribute to any Project Pythia repository
+may be found [here][pythia contributor's guide].
+
+This page will eventually contain a full guide to contributing to Project Pythia. As GitHub Pull Requests are an important part of contributing to Pythia, this guide will cross-reference tutorials on GitHub and Pull Requests.
+
+If you need to comment on anything in Pythia Foundations you feel needs work, you can use the "open issue" or "suggest edit" buttons at the top of any Pythia Foundations page. These buttons appear when you hover over the GitHub Octocat logo. Clicking on these buttons will take you to the relevant page on GitHub, where the entirety of the Pythia Foundations material is hosted. In order to actually suggest changes, you must have a free GitHub account, as listed in the GitHub section of Pythia Foundations. This contributor's guide is strictly for Pythia Foundations; for general Project Pythia contribution guidelines, see the main [Project Pythia Contributor's Guide][pythia contributor's guide].
+
+To quickly provide feedback about minor issues without the use of GitHub, you can also use this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSeVa1TC9xM-dk7qIE2e8bsgSrIP82yYDNw3wew3J46eREJa4w/viewform?usp=sf_link).
+
+## Contributing a new Jupyter Notebook
+
+If you'd like to contribute a Jupyter Notebook to these materials, please reference our [template](template) viewable on the next page. This template is available to you in `appendix/template.ipynb` if you've cloned the [source repository](https://github.com/ProjectPythia/pythia-foundations), or available as a download [directly from GitHub](https://github.com/ProjectPythia/pythia-foundations/raw/main/appendix/template.ipynb).
+
+## Building the site
+
+### Create a conda environment
+
+The first time you check out this repository, run:
+
+```bash
+conda env update -f environment.yml
+```
+
+This will create or update the dev environment (`pythia-book-dev`).
+
+### Install `pre-commit` hooks
+
+This repository includes `pre-commit` hooks (defined in `.pre-commit-config.yaml`). To activate/install these pre-commit hooks, run:
+
+```bash
+conda activate pythia-book-dev
+pre-commit install
+```
+
+This is also a one-time step.
+
+_NOTE_: The `pre-commit` package is already installed via the `pythia-book-dev` conda environment.
+
+### Building the book locally
+
+To build the book locally, run the following:
+
+```bash
+conda activate pythia-book-dev
+jupyter-book build .
+```
+
+Finally, you can view the book by opening the file `_build/html/index.html` with your favorite web browser. On most platforms you can simply run:
+
+```bash
+open _build/html/index.html
+```
+
+### Keeping your dev environment up to date
+
+It's good practice to update the packages in your `pythia-book-dev` conda environment frequently to their latest versions, especially if it's been a while since you used it. If the `jupyter-book build .` command above generates error messages, that is a good indication that your conda environment may be out of date.
+
+To update all packages in the currently activated environment to their latest versions, do this:
+
+```bash
+conda update --all
+```
+
+[pythia contributor's guide]: https://projectpythia.org/contributing.html
diff --git a/_preview/468/_sources/appendix/template.ipynb b/_preview/468/_sources/appendix/template.ipynb
new file mode 100644
index 000000000..4b7942ba1
--- /dev/null
+++ b/_preview/468/_sources/appendix/template.ipynb
@@ -0,0 +1,367 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Project Pythia Notebook Template\n",
+ "\n",
+ "## How to Use This Page\n",
+ "\n",
+ "This page is designed as a template. As such, each section contains instructions for the content added to the equivalent section of a new notebook, with the exception of this section, and the Setting Up a New Notebook section. Because this is not a tutorial, the overall structure of the page does not need to be cohesive.\n",
+ "\n",
+ "## Setting Up a New Notebook\n",
+ "\n",
+ "This section lists the first steps for configuring a Jupyter Notebook for inclusion in Pythia Foundations. First, if you have an image relevant to your notebook, such as a [logo](https://github.com/numpy/numpy/blob/main/doc/source/_static/numpylogo.svg), link to this image at the top of the notebook. The following Markdown example illustrates the correct technique for linking such an image:\n",
+ "\n",
+ "> `![](http://link.com/to/image.png \"image alt text\")`\n",
+ "\n",
+ "You can also use an `img` tag in raw HTML to embed your logo or other image. Second, make sure to add an HTML `alt` tag to any image in your notebook. This includes any type of image, including logos, wherever and however they appear in your notebook. Adding this tag improves accessibility and allows more people to properly access your notebook.\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Project Pythia Notebook Template\n",
+ "\n",
+ "Each notebook must be properly titled with a top level Markdown header, i.e., a header title prefixed by a single # mark. Nowhere else in the notebook should you use a top level header. This header will be automatically used by the Pythia book-building process to generate the page title, which will then be added to the navbar, table of contents, etc. As such, the header needs to be short, concise, and descriptive. After the header line, add a separate Jupyter Notebook cell with the text `---`. This adds a separating line used to separate the title from the overview and prerequisites. This technique will also be used later to separate other sections."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "If your notebook contains an introductory paragraph, include the paragraph at the start of this section. Such paragraphs must be short, and relevant to the content of the notebook. After the introductory paragraph, it is required to list the notebook topics, in the format shown below:\n",
+ "\n",
+ "1. This is a numbered list of the specific topics\n",
+ "1. These should map approximately to your main sections of content\n",
+ "1. Or each second-level, `##`, header in your notebook\n",
+ "1. Keep the size and scope of your notebook in check\n",
+ "1. And be sure to let the reader know up front the important concepts they'll be leaving with"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "This part of the Pythia Notebook Template was inspired by another template; in this case, [the template](https://github.com/alan-turing-institute/the-turing-way/blob/master/book/templates/chapter-template/chapter-landing-page.md) for the Jupyter Book known as [The Turing Way](https://the-turing-way.netlify.app).\n",
+ "\n",
+ "Following the overview section, the prerequisites section must enumerate a list of concepts and Python packages. These concepts and packages must comprise the knowledge that readers of your notebook **must know and understand** in order to successfully learn the notebook material. Each concept or package listed must link to a Pythia Foundations tutorial, or to a relevant external resource. To build the prerequisite table, first copy the following Markdown table into your notebook. You must then edit the table to contain your notebook prerequisites. Each row must contain the name of the concept, along with a link to the tutorial, either on Pythia Foundations or a relevant external resource. It must also be noted whether the concept is helpful or necessary.\n",
+ "\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Intro to Cartopy](../core/cartopy/cartopy) | Necessary | |\n",
+ "| [Understanding of NetCDF](some-link-to-external-resource) | Helpful | Familiarity with metadata structure |\n",
+ "| Project management | Helpful | |\n",
+ "\n",
+ "- **Time to learn:** You must provide an estimate of the total time to learn the listed concepts. The general rule is to estimate 5 minutes for each subsection in each concept, or 10 minutes for especially lengthy subsections. Add the estimates for each subsection to obtain the time to learn. Also, please note that overestimates are better than underestimates.\n",
+ "- **System requirements**: \n",
+ " - If there are any system, version, or non-Python software requirements for the material in your notebook, these must be listed in a system requirement list. \n",
+ " - If your notebook has no extra requirements, the **System Requirements** section should be completely removed. \n",
+ " - Note that Python packages do not count as system requirements; these should be listed in the Imports section."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports\n",
+ "Before beginning this section, add a Markdown cell with a `---` divider. This section should list import statements for any Python packages required for your notebook content. Optionally, you can include a description above the code cell as well."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sys"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Your first content section"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Replace this template section with your first section of tutorial material; all tutorial material should roughly match up with the objectives stated in the Overview section. Your notebook sections should be laid out as a narrative, each containing interspersed Markdown text, images, code cells, and other content as necessary."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Code cells like this are an essential part of your notebook\n",
+ "print(\"Hello world!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### A content subsection\n",
+ "To provide more detail about concepts in content sections, it is recommended to create content subsections. As shown in this template section, subsections are added through lower-level Markdown headers, and automatically populate navbars, both when viewing the notebook in JupyterLab and when viewing the notebook as a Pythia Foundations tutorial page."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# some subsection code\n",
+ "new = \"helpful information\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Another content subsection\n",
+ "This subsection was created in the same way as the previous subsection. Subsections often contain detailed information relevant to the material. An example relevant to this template is \"Try to avoid using code comments as narrative; instead, let them only exist for brief clarification as needed.\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Your second content section\n",
+ "The second content section should roughly match up with the second learning objective of your notebook. For this template, the objective in question is to learn levels of Markdown headers. Below is a demonstration of Markdown header levels; however, be aware that each new header is incorporated into the navbars."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### This example is\n",
+ "\n",
+ "#### a quick demonstration\n",
+ "\n",
+ "##### of further and further\n",
+ "\n",
+ "###### header levels"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Each section in your notebook can also contain $\\LaTeX$ equations, enabled through MathJax. In the following example, we illustrate some sample MathJax equations. (Rendering instructions, as well as detailed information about MathJax, can be found in [this documentation](https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Typesetting%20Equations.html).)\n",
+ "\n",
+ "\\begin{align}\n",
+ "\\dot{x} & = \\sigma(y-x) \\\\\n",
+ "\\dot{y} & = \\rho x - y - xz \\\\\n",
+ "\\dot{z} & = -\\beta z + xy\n",
+ "\\end{align}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are many helpful resources for learning Markdown and customizing Jupyter Markdown cells listed on [this useful guide](https://www.markdownguide.org/basic-syntax/). In addition, there is information on formatting relevant specifically to Jupyter on this [Jupyter documentation page](https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Working%20With%20Markdown%20Cells.html). Finally, perfectionism is encouraged in Pythia Foundations, and there are many available resources for formatting notebooks in a perfectionistic manner."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Last Section\n",
+ "\n",
+ "It is possible to embed raw HTML into Jupyter Markdown cells, as shown above with the Project Pythia logo. This allows for many forms of additional content; the most used form in Pythia is message boxes, as illustrated below. (If you are viewing this page as a Jupyter Notebook, you can also edit the following Markdown cell to view the underlying code.)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " This is an info box. Info boxes contain additional information about tutorial concepts.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Making a notebook for Pythia inevitably requires some trial and error for formatting, among other things. If you feel the formatting is lacking in some way, feel free to adjust it in different ways until it is up to your standards. Copying and editing Markdown cells is a good way to try different formatting options.\n",
+ "\n",
+ "In addition, there are other types of boxes, known as `admonitions`, that can be inserted into a tutorial page:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Success
\n",
+ " This is a success box. Success boxes are usually placed at the end of a set of examples, and usually show a message relating to the final state of the examples.\n",
+ "
\n",
+ " This is a warning box. Warning boxes are usually used to indicate a situation where making a mistake, such as a typo, can cause issues with the tutorial content.\n",
+ "
\n",
+ " This is a danger box. Danger boxes are usually used to indicate a situation where making a mistake, such as a typo, can cause more serious issues such as loss of data.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In addition, it is helpful and highly recommended to add cell tags to your Jupyter cells. These tags allow for [customization](https://jupyterbook.org/interactive/hiding.html) of content display, especially for code cells. In addition, cell tags provide a means for [demonstrating errors](https://jupyterbook.org/content/execute.html#dealing-with-code-that-raises-errors) without breaking any production environments. If you are unfamiliar with cell tags, you can review this [brief demonstration](https://jupyterbook.org/content/metadata.html#jupyter-cell-tags) provided by Jupyter Book; this demonstration covers cell tags in Jupyter Notebook and Jupyter Lab, as well as fully manual cell tags."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "Before adding a summary, you must first add another Markdown cell containing `---`, which marks the end of the content body. A good Summary section contains a brief single paragraph that summarizes the tutorial content. The key content elements and their relation to the tutorial objectives should also be covered. Finally, the most important concepts should be listed again in detail.\n",
+ "\n",
+ "### What's next?\n",
+ "This section should briefly describe the content in the page following your tutorial sequentially. You can find the page sequentially following yours using the Next link at the bottom of the page, or using the sidebar; Jupyter Book should pre-populate this. In addition, if your tutorial leads into other Pythia Foundations content, or tutorials found outside Pythia Foundations, these other tutorials can be linked to as well."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Resources and references\n",
+ "In this section, you must provide detailed citations and references to any external content used in your tutorial. Many types of external content are designed in as much detail as Pythia Foundations pages, and crediting the author is essential. In addition, this section can contain links to additional external content, such as reading, documentation, etc. Once this section is complete, your notebook is finished. After giving your new notebook a quick review, you can request the addition of the notebook to Pythia Foundations by sending the team a GitHub Pull Request. Here are a few final notes pertaining to working with Jupyter and Pythia:\n",
+ " - In order to confirm that your notebook runs from start to finish without errors, hangs, etc., go to the `Kernel` menu in Jupyter Lab and select `Restart Kernel and Run All Cells`.\n",
+ " - In order to prepare your notebook to be committed to Pythia Foundations, go to the `Kernel` menu in Jupyter Lab and select `Restart Kernel and Clear All Outputs`. After the notebook is committed, the Jupyter cells will be run and optimized for Pythia automatically.\n",
+ " - If you wish to take credit for your notebook, you can add contact information in this section; this is completely optional.\n",
+ " - It is very important that any code, information, images, etc. referenced in the above sections of your notebook contains appropriate attribution of authorship in this section.\n",
+ " - Finally, it is imperative that you must have a legal right to use any content included in your notebook. **Do not commit copyright infringement or plagiarism.**\n",
+ " \n",
+ "The Project Pythia team thanks you greatly for contributing to Pythia Foundations."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ },
+ "nbdime-conflicts": {
+ "local_diff": [
+ {
+ "diff": [
+ {
+ "diff": [
+ {
+ "key": 0,
+ "op": "addrange",
+ "valuelist": [
+ "Python 3"
+ ]
+ },
+ {
+ "key": 0,
+ "length": 1,
+ "op": "removerange"
+ }
+ ],
+ "key": "display_name",
+ "op": "patch"
+ }
+ ],
+ "key": "kernelspec",
+ "op": "patch"
+ }
+ ],
+ "remote_diff": [
+ {
+ "diff": [
+ {
+ "diff": [
+ {
+ "key": 0,
+ "op": "addrange",
+ "valuelist": [
+ "Python3"
+ ]
+ },
+ {
+ "key": 0,
+ "length": 1,
+ "op": "removerange"
+ }
+ ],
+ "key": "display_name",
+ "op": "patch"
+ }
+ ],
+ "key": "kernelspec",
+ "op": "patch"
+ }
+ ]
+ },
+ "toc-autonumbering": false
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/core/cartopy.md b/_preview/468/_sources/core/cartopy.md
new file mode 100644
index 000000000..03a47555a
--- /dev/null
+++ b/_preview/468/_sources/core/cartopy.md
@@ -0,0 +1,20 @@
+# Cartopy
+
+This section contains tutorials on plotting maps with [Cartopy](https://scitools.org.uk/cartopy/docs/latest/); it is cross-referenced with tutorials on [Xarray](xarray) and [Matplotlib](matplotlib).
+
+---
+
+From the [Cartopy website](https://scitools.org.uk/cartopy/docs/latest):
+
+> Cartopy is a Python package designed for geospatial data processing in order to
+> produce maps and other geospatial data analyses.
+>
+> Cartopy makes use of the powerful PROJ.4, NumPy and Shapely libraries and includes a programmatic interface
+> built on top of Matplotlib for the creation of publication quality maps.
+>
+> Key features of Cartopy are its object-oriented [projection definitions](https://scitools.org.uk/cartopy/docs/latest/reference/crs.html#list-of-projections),
+> and its ability to transform points, lines, vectors, polygons and images between those projections.
+
+Before working through the Cartopy notebooks in this section of Pythia Foundations, you should first have a basic knowledge of [Matplotlib](matplotlib).
+
+In addition, please note that the geographic-features library used by Cartopy makes use of shapefiles directly served by [Natural Earth](https://www.naturalearthdata.com/).
diff --git a/_preview/468/_sources/core/cartopy/cartopy.ipynb b/_preview/468/_sources/core/cartopy/cartopy.ipynb
new file mode 100644
index 000000000..79f686b41
--- /dev/null
+++ b/_preview/468/_sources/core/cartopy/cartopy.ipynb
@@ -0,0 +1,757 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "# Introduction to Cartopy"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "___"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "\n",
+ "The concepts covered in this section include:\n",
+ "\n",
+ "1. Learning core Cartopy concepts: map projections and `GeoAxes`\n",
+ "2. Exploring some of Cartopy's map projections\n",
+ "3. Creating regional maps\n",
+ "\n",
+ "This tutorial will lead you through some basics of creating maps with specified projections using Cartopy, and adding geographical features (like coastlines and borders) to those maps.\n",
+ "\n",
+ "Plotting data on map projections will be covered in later tutorials."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Matplotlib](../matplotlib) | Necessary | |\n",
+ "\n",
+ "- **Time to learn**: 30 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "___"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports\n",
+ "\n",
+ "Here, we import the main libraries of Cartopy: crs and feature. In addition, we import numpy, as well as matplotlib's pyplot interface. Finally, we import a library called warnings, and use it to remove extraneous warnings that Cartopy produces in later examples."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import warnings\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "from cartopy import crs as ccrs, feature as cfeature\n",
+ "\n",
+ "# Suppress warnings issued by Cartopy when downloading data files\n",
+ "warnings.filterwarnings('ignore')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "___"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Basic concepts: map projections and `GeoAxes`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Extend Matplotlib's `axes` into georeferenced `GeoAxes`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Recall from earlier tutorials that a *figure* in Matplotlib has two elements: a `Figure` object, and a list of one or more `Axes` objects (subplots).\n",
+ "\n",
+ "Since we imported `cartopy.crs`, we now have access to Cartopy's *Coordinate Reference System*, which contains many geographical projections. We can specify one of these projections for an `Axes` object to convert it into a `GeoAxes` object. This will effectively *georeference* the subplot. Examples of converting `Axes` objects into `GeoAxes` objects can be found later in this section."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create a map with a specified projection\n",
+ "\n",
+ "In this example, we'll create a `GeoAxes` object that uses the `PlateCarree` projection. `PlateCarree` is a global lat-lon map projection in which each point is evenly spaced in terms of degrees. The name \"Plate Carree\" is French for \"flat square\"."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(11, 8.5))\n",
+ "ax = plt.subplot(1, 1, 1, projection=ccrs.PlateCarree(central_longitude=-75))\n",
+ "ax.set_title(\"A Geo-referenced subplot, Plate Carree projection\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Although the figure seems empty, it has, in fact, been georeferenced using a map projection; this projection is provided by Cartopy's `crs` (coordinate reference system) class. We can now add in cartographic features, in the form of *shapefiles*, to our subplot. One such cartographic feature is coastlines, which can be added to our subplot using the callable `GeoAxes` method simply called `coastlines`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ax.coastlines()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " To get the figure to display again with the features that we've added since the original display, just type the name of the Figure object in its own cell.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Add cartographic features to the map"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Cartopy provides other cartographic features via its `features` class, which was imported at the beginning of this page, under the name `cfeature`. These cartographic features are laid out as data in shapefiles. The shapefiles are downloaded when their cartographic features are used for the first time in a script or notebook, and they are downloaded from https://www.naturalearthdata.com/. Once downloaded, they \"live\" in your `~/.local/share/cartopy` directory (note the `~` represents your home directory)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can add these features to our subplot via the `add_feature` method; this method allows the definition of attributes using arguments, similar to Matplotlib's `plot` method. A list of the various Natural Earth shapefiles can be found at https://scitools.org.uk/cartopy/docs/latest/matplotlib/feature_interface.html. In this example, we add borders and U. S. state lines to our subplot:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ax.add_feature(cfeature.BORDERS, linewidth=0.5, edgecolor='black')\n",
+ "ax.add_feature(cfeature.STATES, linewidth=0.3, edgecolor='brown')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Once again, referencing the `Figure` object will re-render the figure in the notebook, now including the two features."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Explore some of Cartopy's map projections"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Mollweide Projection (often used with global satellite mosaics)\n",
+ "\n",
+ "To save typing later, we can define a projection object to store the definition of the map projection. We can then use this object in the `projection` kwarg of the `subplot` method when creating a `GeoAxes` object. This allows us to use this exact projection in later scripts or Jupyter Notebook cells using simply the object name, instead of repeating the same call to `ccrs`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(11, 8.5))\n",
+ "projMoll = ccrs.Mollweide(central_longitude=0)\n",
+ "ax = plt.subplot(1, 1, 1, projection=projMoll)\n",
+ "ax.set_title(\"Mollweide Projection\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Add in the cartographic shapefiles\n",
+ "\n",
+ "This example shows how to add cartographic features to the Mollweide projection defined earlier:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ax.coastlines()\n",
+ "ax.add_feature(cfeature.BORDERS, linewidth=0.5, edgecolor='blue')\n",
+ "fig"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Add a fancy background image to the map.\n",
+ "\n",
+ "We can also use the `stock_img` method to add a pre-created background to a Mollweide-projection plot:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ax.stock_img()\n",
+ "fig"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Lambert Azimuthal Equal Area Projection\n",
+ "\n",
+ "This example is similar to the above example set, except it uses a Lambert azimuthal equal-area projection instead:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(11, 8.5))\n",
+ "projLae = ccrs.LambertAzimuthalEqualArea(central_longitude=0.0, central_latitude=0.0)\n",
+ "ax = plt.subplot(1, 1, 1, projection=projLae)\n",
+ "ax.set_title(\"Lambert Azimuthal Equal Area Projection\")\n",
+ "ax.coastlines()\n",
+ "ax.add_feature(cfeature.BORDERS, linewidth=0.5, edgecolor='blue');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create regional maps"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Cartopy's `set_extent` method\n",
+ "\n",
+ "For this example, let's create another PlateCarree projection, but this time, we'll use Cartopy's `set_extent` method to restrict the map coverage to a North American view. Let's also choose a lower resolution for coastlines, just to illustrate how one can specify that. In addition, let's also plot the latitude and longitude lines."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Natural Earth defines three resolutions for cartographic features, specified as the strings \"10m\", \"50m\", and \"110m\". Only one resolution can be used at a time, and the higher the number, the less detailed the feature becomes. You can view the documentation for this functionality at the following reference link: https://www.naturalearthdata.com/downloads/ "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "projPC = ccrs.PlateCarree()\n",
+ "lonW = -140\n",
+ "lonE = -40\n",
+ "latS = 15\n",
+ "latN = 65\n",
+ "cLat = (latN + latS) / 2\n",
+ "cLon = (lonW + lonE) / 2\n",
+ "res = '110m'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(11, 8.5))\n",
+ "ax = plt.subplot(1, 1, 1, projection=projPC)\n",
+ "ax.set_title('Plate Carree')\n",
+ "gl = ax.gridlines(\n",
+ " draw_labels=True, linewidth=2, color='gray', alpha=0.5, linestyle='--'\n",
+ ")\n",
+ "ax.set_extent([lonW, lonE, latS, latN], crs=projPC)\n",
+ "ax.coastlines(resolution=res, color='black')\n",
+ "ax.add_feature(cfeature.STATES, linewidth=0.3, edgecolor='brown')\n",
+ "ax.add_feature(cfeature.BORDERS, linewidth=0.5, edgecolor='blue');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " Please note, even though the calls to the `subplot` method use different projections, the calls to `set_extent` use PlateCarree. This ensures that the values we passed into `set_extent` will be transformed from degrees into the values appropriate for the projection we use for the map.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The PlateCarree projection exaggerates the spatial extent of regions closer to the poles. In the following examples, we use `set_extent` with stereographic and Lambert-conformal projections, which display polar regions more accurately."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "projStr = ccrs.Stereographic(central_longitude=cLon, central_latitude=cLat)\n",
+ "fig = plt.figure(figsize=(11, 8.5))\n",
+ "ax = plt.subplot(1, 1, 1, projection=projStr)\n",
+ "ax.set_title('Stereographic')\n",
+ "gl = ax.gridlines(\n",
+ " draw_labels=True, linewidth=2, color='gray', alpha=0.5, linestyle='--'\n",
+ ")\n",
+ "ax.set_extent([lonW, lonE, latS, latN], crs=projPC)\n",
+ "ax.coastlines(resolution=res, color='black')\n",
+ "ax.add_feature(cfeature.STATES, linewidth=0.3, edgecolor='brown')\n",
+ "ax.add_feature(cfeature.BORDERS, linewidth=0.5, edgecolor='blue');"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "projLcc = ccrs.LambertConformal(central_longitude=cLon, central_latitude=cLat)\n",
+ "fig = plt.figure(figsize=(11, 8.5))\n",
+ "ax = plt.subplot(1, 1, 1, projection=projLcc)\n",
+ "ax.set_title('Lambert Conformal')\n",
+ "gl = ax.gridlines(\n",
+ " draw_labels=True, linewidth=2, color='gray', alpha=0.5, linestyle='--'\n",
+ ")\n",
+ "ax.set_extent([lonW, lonE, latS, latN], crs=projPC)\n",
+ "ax.coastlines(resolution='110m', color='black')\n",
+ "ax.add_feature(cfeature.STATES, linewidth=0.3, edgecolor='brown')\n",
+ "# End last line with a semicolon to suppress text output to the screen\n",
+ "ax.add_feature(cfeature.BORDERS, linewidth=0.5, edgecolor='blue');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " Lat/lon labeling for projections other than Mercator and PlateCarree is a recent addition to Cartopy. As you can see, work still needs to be done to improve the placement of labels.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create a regional map centered over New York State "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here we set the domain, which defines the geographical region to be plotted. (This is used in the next section in a `set_extent` call.) Since these coordinates are expressed in degrees, they correspond to a PlateCarree projection, even though the map projection is set to LambertConformal."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Warning
\n",
+ " Be patient; when plotting a small geographical area, the high-resolution \"10m\" shapefiles are used by default. As a result, these plots take longer to create, especially if the shapefiles are not yet downloaded from Natural Earth. Similar issues can occur whenever a `GeoAxes` object is transformed from one coordinate system to another. (This will be covered in more detail in a subsequent page.)\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "latN = 45.2\n",
+ "latS = 40.2\n",
+ "lonW = -80.0\n",
+ "lonE = -71.5\n",
+ "cLat = (latN + latS) / 2\n",
+ "cLon = (lonW + lonE) / 2\n",
+ "projLccNY = ccrs.LambertConformal(central_longitude=cLon, central_latitude=cLat)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Add some predefined features\n",
+ "\n",
+ "Some cartographical features are predefined as constants in the `cartopy.feature` package. The resolution of these features depends on the amount of geographical area in your map, specified by `set_extent`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(15, 10))\n",
+ "ax = plt.subplot(1, 1, 1, projection=projLccNY)\n",
+ "ax.set_extent([lonW, lonE, latS, latN], crs=projPC)\n",
+ "ax.set_facecolor(cfeature.COLORS['water'])\n",
+ "ax.add_feature(cfeature.LAND)\n",
+ "ax.add_feature(cfeature.COASTLINE)\n",
+ "ax.add_feature(cfeature.BORDERS, linestyle='--')\n",
+ "ax.add_feature(cfeature.LAKES, alpha=0.5)\n",
+ "ax.add_feature(cfeature.STATES)\n",
+ "ax.add_feature(cfeature.RIVERS)\n",
+ "ax.set_title('New York and Vicinity');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Note:
\n",
+ " For high-resolution Natural Earth shapefiles such as this, while we could add Cartopy's OCEAN feature, it currently takes much longer to render on the plot. You can create your own version of this example, with the OCEAN feature added, to see for yourself how much more rendering time is added. Instead, we take the strategy of first setting the facecolor of the entire subplot to match that of water bodies in Cartopy. When we then layer on the LAND feature, pixels that are not part of the LAND shapefile remain in the water facecolor, which is the same color as the OCEAN.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Use lower-resolution shapefiles from Natural Earth\n",
+ "\n",
+ "In this example, we create a new map. This map uses lower-resolution shapefiles from Natural Earth, and also eliminates the plotting of country borders.\n",
+ "\n",
+ "This example requires much more code than previous examples on this page. First, we must create new objects associated with lower-resolution shapefiles. This is performed by the `NaturalEarthFeature` method, which is part of the Cartopy `feature` class. Second, we call `add_feature` to add the new objects to our new map."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(15, 10))\n",
+ "ax = plt.subplot(1, 1, 1, projection=projLccNY)\n",
+ "ax.set_extent((lonW, lonE, latS, latN), crs=projPC)\n",
+ "\n",
+ "# The features with names such as cfeature.LAND, cfeature.OCEAN, are higher-resolution (10m)\n",
+ "# shapefiles from the Naturalearth repository. Lower resolution shapefiles (50m, 110m) can be\n",
+ "# used by using the cfeature.NaturalEarthFeature method as illustrated below.\n",
+ "\n",
+ "resolution = '110m'\n",
+ "\n",
+ "land_mask = cfeature.NaturalEarthFeature(\n",
+ " 'physical',\n",
+ " 'land',\n",
+ " scale=resolution,\n",
+ " edgecolor='face',\n",
+ " facecolor=cfeature.COLORS['land'],\n",
+ ")\n",
+ "sea_mask = cfeature.NaturalEarthFeature(\n",
+ " 'physical',\n",
+ " 'ocean',\n",
+ " scale=resolution,\n",
+ " edgecolor='face',\n",
+ " facecolor=cfeature.COLORS['water'],\n",
+ ")\n",
+ "lake_mask = cfeature.NaturalEarthFeature(\n",
+ " 'physical',\n",
+ " 'lakes',\n",
+ " scale=resolution,\n",
+ " edgecolor='face',\n",
+ " facecolor=cfeature.COLORS['water'],\n",
+ ")\n",
+ "state_borders = cfeature.NaturalEarthFeature(\n",
+ " category='cultural',\n",
+ " name='admin_1_states_provinces_lakes',\n",
+ " scale=resolution,\n",
+ " facecolor='none',\n",
+ ")\n",
+ "\n",
+ "ax.add_feature(land_mask)\n",
+ "ax.add_feature(sea_mask)\n",
+ "ax.add_feature(lake_mask)\n",
+ "ax.add_feature(state_borders, linestyle='solid', edgecolor='black')\n",
+ "ax.set_title('New York and Vicinity; lower resolution');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### A figure with two different regional maps\n",
+ "\n",
+ "Finally, let's create a figure with two subplots. On the first subplot, we'll repeat the high-resolution New York State map created earlier; on the second, we'll plot over a different part of the world."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create the figure object\n",
+ "fig = plt.figure(\n",
+ " figsize=(30, 24)\n",
+ ") # Notice we need a bigger \"canvas\" so these two maps will be of a decent size\n",
+ "\n",
+ "# First subplot\n",
+ "ax = plt.subplot(2, 1, 1, projection=projLccNY)\n",
+ "ax.set_extent([lonW, lonE, latS, latN], crs=projPC)\n",
+ "ax.set_facecolor(cfeature.COLORS['water'])\n",
+ "ax.add_feature(cfeature.LAND)\n",
+ "ax.add_feature(cfeature.COASTLINE)\n",
+ "ax.add_feature(cfeature.BORDERS, linestyle='--')\n",
+ "ax.add_feature(cfeature.LAKES, alpha=0.5)\n",
+ "ax.add_feature(cfeature.STATES)\n",
+ "ax.set_title('New York and Vicinity')\n",
+ "\n",
+ "# Set the domain for defining the second plot region.\n",
+ "latN = 70\n",
+ "latS = 30.2\n",
+ "lonW = -10\n",
+ "lonE = 50\n",
+ "cLat = (latN + latS) / 2\n",
+ "cLon = (lonW + lonE) / 2\n",
+ "\n",
+ "projLccEur = ccrs.LambertConformal(central_longitude=cLon, central_latitude=cLat)\n",
+ "\n",
+ "# Second subplot\n",
+ "ax2 = plt.subplot(2, 1, 2, projection=projLccEur)\n",
+ "ax2.set_extent([lonW, lonE, latS, latN], crs=projPC)\n",
+ "ax2.set_facecolor(cfeature.COLORS['water'])\n",
+ "ax2.add_feature(cfeature.LAND)\n",
+ "ax2.add_feature(cfeature.COASTLINE)\n",
+ "ax2.add_feature(cfeature.BORDERS, linestyle='--')\n",
+ "ax2.add_feature(cfeature.LAKES, alpha=0.5)\n",
+ "ax2.add_feature(cfeature.STATES)\n",
+ "ax2.set_title('Europe');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## An example of plotting data\n",
+ "\n",
+ "First, we'll create a lat-lon grid and define some data on it."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lon, lat = np.mgrid[-180:181, -90:91]\n",
+ "data = 2 * np.sin(3 * np.deg2rad(lon)) + 3 * np.cos(4 * np.deg2rad(lat))\n",
+ "plt.contourf(lon, lat, data)\n",
+ "plt.colorbar();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Plotting data on a Cartesian grid is equivalent to plotting data in the PlateCarree projection, where meridians and parallels are all straight lines with constant spacing. As a result of this simplicity, the global datasets we use often begin in the PlateCarree projection.\n",
+ "\n",
+ "Once we create our map again, we can plot these data values as a contour map. We must also specify the `transform` keyword argument. This is an argument to a contour-plotting method that specifies the projection type currently used by our data. The projection type specified by this argument will be transformed into the projection type specified in the `subplot` method. Let's plot our data in the Mollweide projection to see how shapes change under a transformation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(11, 8.5))\n",
+ "ax = plt.subplot(1, 1, 1, projection=projMoll)\n",
+ "ax.coastlines()\n",
+ "dataplot = ax.contourf(lon, lat, data, transform=ccrs.PlateCarree())\n",
+ "plt.colorbar(dataplot, orientation='horizontal');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "___"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Cartopy allows for the georeferencing of Matplotlib `Axes` objects.\n",
+ "- Cartopy's `crs` class supports a variety of map projections.\n",
+ "- Cartopy's `feature` class allows for a variety of cartographic features to be overlaid on a georeferenced plot or subplot."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "___"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## What's Next?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the next notebook, we will delve further into how one can transform data that is defined in one coordinate reference system (`crs`) so it displays properly on a map that uses a different `crs`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Resources and References\n",
+ "\n",
+ "1. [Cartopy Documentation](https://scitools.org.uk/cartopy/docs/latest/)\n",
+ "2. [Full list of projections in Cartopy](https://scitools.org.uk/cartopy/docs/latest/reference/crs.html) \n",
+ "3. [Maps with Cartopy (Ryan Abernathey)](https://rabernat.github.io/research_computing_2018/maps-with-cartopy.html)\n",
+ "4. [Map Projections (GeoCAT)](https://geocat-examples.readthedocs.io/en/latest/gallery/index.html#map-projections)\n",
+ "5. [NCAR xdev Cartopy Tutorial Video](https://www.youtube.com/watch?v=ivmd3RluMiw)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/core/data-formats.md b/_preview/468/_sources/core/data-formats.md
new file mode 100644
index 000000000..ea64ce755
--- /dev/null
+++ b/_preview/468/_sources/core/data-formats.md
@@ -0,0 +1,7 @@
+# Data Formats
+
+```{note}
+This content is under construction!
+```
+
+There are many data file formats used commonly in the geosciences, such as NetCDF and GRIB. This section contains tutorials on how to interact with these files in Python.
diff --git a/_preview/468/_sources/core/data-formats/netcdf-cf.ipynb b/_preview/468/_sources/core/data-formats/netcdf-cf.ipynb
new file mode 100644
index 000000000..2abb46f88
--- /dev/null
+++ b/_preview/468/_sources/core/data-formats/netcdf-cf.ipynb
@@ -0,0 +1,1021 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![NetCDF Logo](https://www.unidata.ucar.edu/images/logos/netcdf-400x400.png \"NetCDF Logo\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# NetCDF and CF: The Basics\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "This tutorial will begin with an introduction to netCDF. The CF data model will then be covered, and finally, important implementation details for netCDF. The structure of the tutorial is as follows:\n",
+ "\n",
+ "1. Demonstrating gridded data\n",
+ "1. Demonstrating observational data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Numpy Basics](../numpy/numpy-basics) | Necessary | |\n",
+ "| [Datetime](../datetime) | Necessary | |\n",
+ "\n",
+ "- **Time to learn**: 50 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports\n",
+ "\n",
+ "Some of these imports will be familiar from previous tutorials. However, some of them likely look foreign; these will be covered in detail later in this tutorial."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from datetime import datetime, timedelta\n",
+ "\n",
+ "import numpy as np\n",
+ "from cftime import date2num\n",
+ "from netCDF4 import Dataset\n",
+ "from pyproj import Proj"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "## Gridded Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's say we're working with some numerical weather forecast model output. First, we need to store the data in the netCDF format. Second, we need to ensure that the metadata follows the Climate and Forecasting conventions. These steps ensure that a dataset is available to as many scientific data tools as is possible. The examples in this section illustrate these steps in detail.\n",
+ "\n",
+ "To start, let's assume the following about our data:\n",
+ "* There are three spatial dimensions (`x`, `y`, and `press`) and one temporal dimension (`times`).\n",
+ "* The native coordinate system of the model is on a regular 3km x 3km grid (`x` and `y`) that represents the Earth on a Lambert conformal projection.\n",
+ "* The vertical dimension (`press`) consists of several discrete pressure levels in units of hPa.\n",
+ "* The time dimension consists of twelve consecutive hours (`times`), beginning at 2200 UTC on the current day.\n",
+ "\n",
+ "The following code generates the dimensional arrays just discussed:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "start = datetime.utcnow().replace(hour=22, minute=0, second=0, microsecond=0)\n",
+ "times = np.array([start + timedelta(hours=h) for h in range(13)])\n",
+ "\n",
+ "x = np.arange(-150, 153, 3)\n",
+ "y = np.arange(-100, 100, 3)\n",
+ "\n",
+ "press = np.array([1000, 925, 850, 700, 500, 300, 250])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In addition to dimensional arrays, we also need a variable of interest, which holds the data values at each unique dimensional index. In these examples, this variable is called `temps`, and holds temperature data. Note that the dimensions correspond to the ones we just created above."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps = np.random.randn(times.size, press.size, y.size, x.size)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Creating the file and dimensions\n",
+ "\n",
+ "The first step in setting up a new netCDF file is to create a new file in netCDF format and set up the shared dimensions we'll be using in the file. We'll be using the `netCDF4` library to do all of the requisite netCDF API calls."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nc = Dataset('forecast_model.nc', 'w', format='NETCDF4_CLASSIC', diskless=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ "
The netCDF file created in the above example resides in memory, not disk, due to the diskless=True argument. In order to create this file on disk, you must either remove this argument, or add the persist=True argument.
If you open an existing file with 'w' as the second argument, any data already in the file will be overwritten. If you would like to edit the file, or add to it, open it using 'a' as the second argument.
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We start the setup of this new netCDF file by creating and adding global attribute metadata. These particular metadata elements are not required, but are recommended by the CF standard. In addition, adding these elements to the file is simple, and helps users keep track of the data. Therefore, it is helpful to add these metadata elements, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nc.Conventions = 'CF-1.7'\n",
+ "nc.title = 'Forecast model run'\n",
+ "nc.institution = 'Unidata'\n",
+ "nc.source = 'WRF-1.5'\n",
+ "nc.history = str(datetime.utcnow()) + ' Python'\n",
+ "nc.references = ''\n",
+ "nc.comment = ''"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This next example shows a plain-text representation of our netCDF file as it exists currently:\n",
+ "```\n",
+ "netcdf forecast_model {\n",
+ " attributes:\n",
+ " :Conventions = \"CF-1.7\" ;\n",
+ " :title = \"Forecast model run\" ;\n",
+ " :institution = \"Unidata\" ;\n",
+ " :source = \"WRF-1.5\" ;\n",
+ " :history = \"2019-07-16 02:21:52.005718 Python\" ;\n",
+ " :references = \"\" ;\n",
+ " :comment = \"\" ;\n",
+ "}\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " This plain-text representation is known as netCDF Common Data Format Language, or CDL.\n",
+ "
\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Variables are an important part of every netCDF file; they are used to define data fields. However, before we can add any variables to our file, we must first define the dimensions of the data. In this example, we create dimensions called `x`, `y`, and `pressure`, and set the size of each dimension to the size of the corresponding data array. We then create an additional dimension, `forecast_time`, and set the size as None. This defines the dimension as \"unlimited\", meaning that if additional data values are added later, the netCDF file grows along this dimension."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nc.createDimension('forecast_time', None)\n",
+ "nc.createDimension('x', x.size)\n",
+ "nc.createDimension('y', y.size)\n",
+ "nc.createDimension('pressure', press.size)\n",
+ "nc"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "When we view our file's CDL representation now, we can verify that the dimensions were successfully added to the netCDF file:\n",
+ "```\n",
+ "netcdf forecast_model {\n",
+ " dimensions:\n",
+ " forecast_time = UNLIMITED (currently 13) ;\n",
+ " x = 101 ;\n",
+ " y = 67 ;\n",
+ " pressure = 7 ;\n",
+ " attributes:\n",
+ " :Conventions = \"CF-1.7\" ;\n",
+ " :title = \"Forecast model run\" ;\n",
+ " :institution = \"Unidata\" ;\n",
+ " :source = \"WRF-1.5\" ;\n",
+ " :history = \"2019-07-16 02:21:52.005718 Python\" ;\n",
+ " :references = \"\" ;\n",
+ " :comment = \"\" ;\n",
+ "}\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Creating and filling a variable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Thus far, we have only added basic information to this netCDF dataset; namely, the dataset dimensions and some broad metadata. As described briefly above, variables are used to define data fields in netCDF files. Here, we create a `netCDF4 variable` to hold a data field; in this case, the forecast air temperature. In order to create this netCDF4 variable, we must specify the data type of the values in the data field. We also must specify which dimensions contained in the netCDF file are relevant to this data field. Finally, we can specify whether or not to compress the data using a form of `zlib`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps_var = nc.createVariable(\n",
+ " 'Temperature',\n",
+ " datatype=np.float32,\n",
+ " dimensions=('forecast_time', 'pressure', 'y', 'x'),\n",
+ " zlib=True,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We have now created a netCDF4 variable, but it does not yet define a data field. In this example, we use Python to associate our temperature data with the new variable:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps_var[:] = temps\n",
+ "temps_var"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can also associate data with a variable sporadically. This example illustrates how to only associate one value per time step with the variable created earlier:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "next_slice = 0\n",
+ "for temp_slice in temps:\n",
+ " temps_var[next_slice] = temp_slice\n",
+ " next_slice += 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "At this point, this is the CDL representation of our dataset:\n",
+ "```\n",
+ "netcdf forecast_model {\n",
+ " dimensions:\n",
+ " forecast_time = UNLIMITED (currently 13) ;\n",
+ " x = 101 ;\n",
+ " y = 67 ;\n",
+ " pressure = 7 ;\n",
+ " variables:\n",
+ " float Temperature(forecast_time, pressure, y, x) ;\n",
+ " attributes:\n",
+ " :Conventions = \"CF-1.7\" ;\n",
+ " :title = \"Forecast model run\" ;\n",
+ " :institution = \"Unidata\" ;\n",
+ " :source = \"WRF-1.5\" ;\n",
+ " :history = \"2019-07-16 02:21:52.005718 Python\" ;\n",
+ " :references = \"\" ;\n",
+ " :comment = \"\" ;\n",
+ "}\n",
+ "```\n",
+ "We can also define metadata for this variable in the form of attributes; some specific attributes are required by the CF conventions. For example, the CF conventions require a `units` attribute to be set for all variables that represent a dimensional quantity. In addition, the value of this attribute must be parsable by the [UDUNITS](https://www.unidata.ucar.edu/software/udunits/) library. In this example, the temperatures are in Kelvin, so we set the units attribute to `'Kelvin'`. Next, we set the `long_name` and `standard_name` attributes, which are recommended for most datasets, but optional. The `long_name` attribute contains a longer and more detailed description of a variable. On the other hand, the `standard_name` attribute names a variable using descriptive words from a predefined word list contained in the CF conventions. Defining these attributes allows users of your datasets to understand what each variable in a dataset represents. Sometimes, data fields do not have valid data values at every dimension point. In this case, the standard is to use a filler value for these missing data values, and to set the `missing_value` attribute to this filler value. In this case, however, there are no missing values, so the `missing_value` attribute can be set to any unused value, or not set at all.\n",
+ "\n",
+ "There are many different sets of recommendations for attributes on netCDF variables. For example, here is NASA's set of recommended attributes:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "> **NASA Dataset Interoperability Recommendations:**\n",
+ ">\n",
+ "> Section 2.2 - Include Basic CF Attributes\n",
+ ">\n",
+ "> Include where applicable: `units`, `long_name`, `standard_name`, `valid_min` / `valid_max`, `scale_factor` / `add_offset` and others."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps_var.units = 'Kelvin'\n",
+ "temps_var.standard_name = 'air_temperature'\n",
+ "temps_var.long_name = 'Forecast air temperature'\n",
+ "temps_var.missing_value = -9999\n",
+ "temps_var"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here is the variable section of our dataset's CDL, with the new attributes added:\n",
+ "```\n",
+ " variables:\n",
+ " float Temperature(forecast_time, pressure, y, x) ;\n",
+ " Temperature:units = \"Kelvin\" ;\n",
+ " Temperature:standard_name = \"air_temperature\" ;\n",
+ " Temperature:long_name = \"Forecast air temperature\" ;\n",
+ " Temperature:missing_value = -9999.0 ;\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Coordinate variables"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Dimensions in a netCDF file only define size and alignment metadata. In order to properly orient data in time and space, it is necessary to create \"coordinate variables\", which define data values along each dimension. A coordinate variable is typically created as a one-dimensional variable, and has the same name as the corresponding dimension.\n",
+ "\n",
+ "To start, we define variables which define our `x` and `y` coordinate values. It is recommended to include certain attributes for each coordinate variable. First, you should include a `standard_name`, which allows for associating the variable with projections, among other things. (Projections will be covered in detail later in this page.) Second, you can include an `axis` attribute, which clearly defines the spatial or temporal direction referred to by the coordinate variable. This next example demonstrates how to set up these attributes:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_var = nc.createVariable('x', np.float32, ('x',))\n",
+ "x_var[:] = x\n",
+ "x_var.units = 'km'\n",
+ "x_var.axis = 'X' # Optional\n",
+ "x_var.standard_name = 'projection_x_coordinate'\n",
+ "x_var.long_name = 'x-coordinate in projected coordinate system'\n",
+ "\n",
+ "y_var = nc.createVariable('y', np.float32, ('y',))\n",
+ "y_var[:] = y\n",
+ "y_var.units = 'km'\n",
+ "y_var.axis = 'Y' # Optional\n",
+ "y_var.standard_name = 'projection_y_coordinate'\n",
+ "y_var.long_name = 'y-coordinate in projected coordinate system'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our dataset contains vertical data of air pressure as well, so we must define a coordinate variable for this axis; we can simply call this new variable `pressure`. Since this axis represents air pressure data, we can set a `standard_name` of `'air_pressure'`. With this `standard_name` attribute set, it should be obvious to users of this dataset that this variable represents a vertical axis, but for extra clarification, we also set the `axis` attribute as `'Z'`. We can also specify one more attribute, called `positive`. This attribute indicates whether the variable values increase or decrease as the dimension values increase. Setting this attribute is optional for some data; air pressure is one example. However, we still set the attribute here, for the sake of completeness."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "press_var = nc.createVariable('pressure', np.float32, ('pressure',))\n",
+ "press_var[:] = press\n",
+ "press_var.units = 'hPa'\n",
+ "press_var.axis = 'Z' # Optional\n",
+ "press_var.standard_name = 'air_pressure'\n",
+ "press_var.positive = 'down' # Optional"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Time coordinates must contain a `units` attribute; this attribute is a string value, and must have a form similar to the string`'seconds since 2019-01-06 12:00:00.00'`. 'seconds', 'minutes', 'hours', and 'days' are the most commonly used time intervals in these strings. It is not recommended to use 'months' or 'years' in time strings, as the length of these time intervals can vary.\n",
+ "\n",
+ "Before we can write data, we need to first convert our list of Python `datetime` objects to numeric values usable in time strings. We can perform this conversion by setting a time string in the format described above, then using the `date2num` method from the `cftime` library. An example of this is shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "time_units = f'hours since {times[0]:%Y-%m-%d 00:00}'\n",
+ "time_vals = date2num(times, time_units)\n",
+ "time_vals"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that the time string is set up, we have all of the necessary information to set up the attributes for a `forecast_time` coordinate variable. The creation of this variable is shown in the following example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "time_var = nc.createVariable('forecast_time', np.int32, ('forecast_time',))\n",
+ "time_var[:] = time_vals\n",
+ "time_var.units = time_units\n",
+ "time_var.axis = 'T' # Optional\n",
+ "time_var.standard_name = 'time' # Optional\n",
+ "time_var.long_name = 'time'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This next example shows the CDL representation of the netCDF file's variables at this point. It is clear that much more information is now contained in this representation:\n",
+ "```\n",
+ " dimensions:\n",
+ " forecast_time = UNLIMITED (currently 13) ;\n",
+ " x = 101 ;\n",
+ " y = 67 ;\n",
+ " pressure = 7 ;\n",
+ " variables:\n",
+ " float x(x) ;\n",
+ " x:units = \"km\" ;\n",
+ " x:axis = \"X\" ;\n",
+ " x:standard_name = \"projection_x_coordinate\" ;\n",
+ " x:long_name = \"x-coordinate in projected coordinate system\" ;\n",
+ " float y(y) ;\n",
+ " y:units = \"km\" ;\n",
+ " y:axis = \"Y\" ;\n",
+ " y:standard_name = \"projection_y_coordinate\" ;\n",
+ " y:long_name = \"y-coordinate in projected coordinate system\" ;\n",
+ " float pressure(pressure) ;\n",
+ " pressure:units = \"hPa\" ;\n",
+ " pressure:axis = \"Z\" ;\n",
+ " pressure:standard_name = \"air_pressure\" ;\n",
+ " pressure:positive = \"down\" ;\n",
+ " float forecast_time(forecast_time) ;\n",
+ " forecast_time:units = \"hours since 2019-07-16 00:00\" ;\n",
+ " forecast_time:axis = \"T\" ;\n",
+ " forecast_time:standard_name = \"time\" ;\n",
+ " forecast_time:long_name = \"time\" ;\n",
+ " float Temperature(forecast_time, pressure, y, x) ;\n",
+ " Temperature:units = \"Kelvin\" ;\n",
+ " Temperature:standard_name = \"air_temperature\" ;\n",
+ " Temperature:long_name = \"Forecast air temperature\" ;\n",
+ " Temperature:missing_value = -9999.0 ;\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Auxiliary Coordinates"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our data are still not CF-compliant, because they do not contain latitude and longitude information, which is needed to properly locate the data. In order to add location data to a netCDF file, we must create so-called \"auxiliary coordinate variables\" for latitude and longitude. (In this case, the word \"auxiliary\" means that the variables are not simple one-dimensional variables.)\n",
+ "\n",
+ "In this next example, we use the `Proj` function, found in the `pyproj` library, to create projections of our coordinates. We can then use these projections to generate latitude and longitude values for our data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X, Y = np.meshgrid(x, y)\n",
+ "lcc = Proj({'proj': 'lcc', 'lon_0': -105, 'lat_0': 40, 'a': 6371000.0, 'lat_1': 25})\n",
+ "lon, lat = lcc(X * 1000, Y * 1000, inverse=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we have latitude and longitude values, we can create variables for those values. Both of these variables are two-dimensional; the dimensions in question are `y` and `x`. In order to convey that it contains the longitude information, we must set up the longitude variable with a `units` attribute of `'degrees_east'`. In addition, we can provide further clarity by setting a `standard_name` attribute of `'longitude'`. The case is the same for latitude, except the units are `'degrees_north'` and the `standard_name` is `'latitude'`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lon_var = nc.createVariable('lon', np.float64, ('y', 'x'))\n",
+ "lon_var[:] = lon\n",
+ "lon_var.units = 'degrees_east'\n",
+ "lon_var.standard_name = 'longitude' # Optional\n",
+ "lon_var.long_name = 'longitude coordinate'\n",
+ "\n",
+ "lat_var = nc.createVariable('lat', np.float64, ('y', 'x'))\n",
+ "lat_var[:] = lat\n",
+ "lat_var.units = 'degrees_north'\n",
+ "lat_var.standard_name = 'latitude' # Optional\n",
+ "lat_var.long_name = 'latitude coordinate'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that the auxiliary coordinate variables are created, we must identify them as coordinates for the `Temperature` variable. In order to identify the variables in this way, we set the `coordinates` attribute of the `Temperature` variable to a space-separated list of variables to identify, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps_var.coordinates = 'lon lat'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The portion of the CDL showing the new latitude and longitude variables, as well as the updated `Temperature` variable, is listed below:\n",
+ "```\n",
+ " double lon(y, x);\n",
+ " lon:units = \"degrees_east\";\n",
+ " lon:long_name = \"longitude coordinate\";\n",
+ " lon:standard_name = \"longitude\";\n",
+ " double lat(y, x);\n",
+ " lat:units = \"degrees_north\";\n",
+ " lat:long_name = \"latitude coordinate\";\n",
+ " lat:standard_name = \"latitude\";\n",
+ " float Temperature(time, y, x);\n",
+ " Temperature:units = \"Kelvin\" ;\n",
+ " Temperature:standard_name = \"air_temperature\" ;\n",
+ " Temperature:long_name = \"Forecast air temperature\" ;\n",
+ " Temperature:missing_value = -9999.0 ;\n",
+ " Temperature:coordinates = \"lon lat\";\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Coordinate System Information"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Since the grid containing our data uses a Lambert conformal projection, adding this information to the dataset's metadata can clear up some possible confusion. We can most easily add this metadata information by making use of a \"grid mapping\" variable. A grid mapping variable is a \"placeholder\" variable containing all required grid-mapping information. Other variables that need to access this information can then reference this placeholder variable in their `grid_mapping` attribute.\n",
+ "\n",
+ "In this example, we create a grid-mapping variable; this new variable is then set up for a Lambert-conformal conic projection on a spherical globe. By setting this variable's `grid_mapping_name` attribute, we can indicate which CF-supported grid mapping this variable refers to. There are additional attributes that can also be set; however, the available options depend on the specific mapping."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "proj_var = nc.createVariable('lambert_projection', np.int32, ())\n",
+ "proj_var.grid_mapping_name = 'lambert_conformal_conic'\n",
+ "proj_var.standard_parallel = 25.0\n",
+ "proj_var.latitude_of_projection_origin = 40.0\n",
+ "proj_var.longitude_of_central_meridian = -105.0\n",
+ "proj_var.semi_major_axis = 6371000.0\n",
+ "proj_var"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we have created a grid-mapping variable, we can specify the grid mapping by setting the `grid_mapping attribute` to the variable name. In this example, we set the `grid_mapping` attribute on the `Temperature` variable:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps_var.grid_mapping = 'lambert_projection' # or proj_var.name"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here is the portion of the CDL containing the modified `Temperature` variable, as well as the new grid-mapping `lambert_projection` variable:\n",
+ "```\n",
+ " variables:\n",
+ " int lambert_projection ;\n",
+ " lambert_projection:grid_mapping_name = \"lambert_conformal_conic ;\n",
+ " lambert_projection:standard_parallel = 25.0 ;\n",
+ " lambert_projection:latitude_of_projection_origin = 40.0 ;\n",
+ " lambert_projection:longitude_of_central_meridian = -105.0 ;\n",
+ " lambert_projection:semi_major_axis = 6371000.0 ;\n",
+ " float Temperature(forecast_time, pressure, y, x) ;\n",
+ " Temperature:units = \"Kelvin\" ;\n",
+ " Temperature:standard_name = \"air_temperature\" ;\n",
+ " Temperature:long_name = \"Forecast air temperature\" ;\n",
+ " Temperature:missing_value = -9999.0 ;\n",
+ " Temperature:coordinates = \"lon lat\" ;\n",
+ " Temperature:grid_mapping = \"lambert_projection\" ;\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Cell Bounds\n",
+ "\n",
+ "The use of \"bounds\" attributes is not required, but highly recommended. Here is a relevant excerpt from the NASA Dataset Interoperability Recommendations:\n",
+ "> **NASA Dataset Interoperability Recommendations:**\n",
+ ">\n",
+ "> Section 2.3 - Use CF \"bounds\" attributes\n",
+ ">\n",
+ "> CF conventions state: \"When gridded data does not represent the point values of a field but instead represents some characteristic of the field within cells of finite 'volume,' a complete description of the variable should include metadata that describes the domain or extent of each cell, and the characteristic of the field that the cell values represent.\"\n",
+ "\n",
+ "In this set of examples, consider a rain gauge which is read every three hours, but only dumped every six hours. The netCDF file for this gauge's data readings might look like this:\n",
+ " \n",
+ "```\n",
+ "netcdf precip_bucket_bounds {\n",
+ " dimensions:\n",
+ " lat = 12 ;\n",
+ " lon = 19 ;\n",
+ " time = 8 ;\n",
+ " tbv = 2;\n",
+ " variables:\n",
+ " float lat(lat) ;\n",
+ " float lon(lon) ;\n",
+ " float time(time) ;\n",
+ " time:units = \"hours since 2019-07-12 00:00:00.00\";\n",
+ " time:bounds = \"time_bounds\" ;\n",
+ " float time_bounds(time,tbv)\n",
+ " float precip(time, lat, lon) ;\n",
+ " precip:units = \"inches\" ;\n",
+ " data:\n",
+ " time = 3, 6, 9, 12, 15, 18, 21, 24;\n",
+ " time_bounds = 0, 3, 0, 6, 6, 9, 6, 12, 12, 15, 12, 18, 18, 21, 18, 24;\n",
+ "}\n",
+ "```\n",
+ "\n",
+ "Considering the coordinate variable for time, and the `bounds` attribute set for this variable, the below graph illustrates the times of the gauge's data readings:\n",
+ "```\n",
+ "|---X\n",
+ "|-------X\n",
+ " |---X\n",
+ " |-------X\n",
+ " |---X\n",
+ " |-------X\n",
+ " |---X\n",
+ " |-------X\n",
+ "0 3 6 9 12 15 18 21 24\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "## Observational Data\n",
+ "\n",
+ "Thus far, we have only worked with data arranged on grids. One common type of data, called \"in-situ\" or \"observational\" data, is usually arranged in other ways. The CF conventions for this type of data are called *Conventions for DSG (Discrete Sampling Geometries)*.\n",
+ "\n",
+ "For data that are regularly sampled (e.g., from a vertical profiler site), this is straightforward. For these examples, we will be using vertical profile data from three hypothetical profilers, located in Boulder, Norman, and Albany. These hypothetical profilers report data for every 10 m of altitude, from altitudes of 10 m up to (but not including) 1000 m. This first example illustrates how to set up latitude, longitude, altitude, and other necessary data for these profilers:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lons = np.array([-97.1, -105, -73.8])\n",
+ "lats = np.array([35.25, 40, 42.75])\n",
+ "heights = np.linspace(10, 1000, 10)\n",
+ "temps = np.random.randn(lats.size, heights.size)\n",
+ "stids = ['KBOU', 'KOUN', 'KALB']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Creation and basic setup\n",
+ "First, we create a new netCDF file, and define dimensions for it, corresponding to altitude and latitude. Since we are working with observational profile data, we define these dimensions as `heights` and `station`. We then set the global `featureType` attribute to `'profile'`, which defines the file as holding profile data. In these examples, the term \"profile data\" is defined as \"an ordered set of data points along a vertical line at a fixed horizontal position and fixed time\". In addition, we define a placeholder dimension called str_len, which helps with storing station IDs as strings."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nc.close()\n",
+ "nc = Dataset('obs_data.nc', 'w', format='NETCDF4_CLASSIC', diskless=True)\n",
+ "nc.createDimension('station', lats.size)\n",
+ "nc.createDimension('heights', heights.size)\n",
+ "nc.createDimension('str_len', 4)\n",
+ "nc.Conventions = 'CF-1.7'\n",
+ "nc.featureType = 'profile'\n",
+ "nc"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "After this initial setup, the current state of our netCDF file is described in the following CDL:\n",
+ "```\n",
+ "netcdf obs_data {\n",
+ " dimensions:\n",
+ " station = 3 ;\n",
+ " heights = 10 ;\n",
+ " str_len = 4 ;\n",
+ " attributes:\n",
+ " :Conventions = \"CF-1.7\" ;\n",
+ " :featureType = \"profile\" ;\n",
+ "}\n",
+ "```\n",
+ "This example illustrates the setup of coordinate variables for latitude and longitude:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lon_var = nc.createVariable('lon', np.float64, ('station',))\n",
+ "lon_var.units = 'degrees_east'\n",
+ "lon_var.standard_name = 'longitude'\n",
+ "\n",
+ "lat_var = nc.createVariable('lat', np.float64, ('station',))\n",
+ "lat_var.units = 'degrees_north'\n",
+ "lat_var.standard_name = 'latitude'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "When a coordinate variable refers to an instance of a feature, netCDF standards refer to it as an \"instance variable\". The latitude and longitude coordinate variables declared above are examples of instance variables. In this next example, we create an instance variable for altitude, referred to here as `heights`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "heights_var = nc.createVariable('heights', np.float32, ('heights',))\n",
+ "heights_var.units = 'meters'\n",
+ "heights_var.standard_name = 'altitude'\n",
+ "heights_var.positive = 'up'\n",
+ "heights_var[:] = heights"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Station IDs\n",
+ "Using the placeholder dimension defined earlier, we can write the station IDs of our profilers to a variable as well. The variable used to store these station IDs is two-dimensional; however, one of these dimensions only holds metadata designed to aid in converting strings to character arrays. We can also assign the attribute `cf_role` to this variable, with a value of `'profile_id'`. If certain software programs read this netCDF file, this attribute assists in identifying individual profiles."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "stid_var = nc.createVariable('stid', 'c', ('station', 'str_len'))\n",
+ "stid_var.cf_role = 'profile_id'\n",
+ "stid_var.long_name = 'Station identifier'\n",
+ "stid_var[:] = stids"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "After adding station ID information, our file's updated CDL should resemble this example:\n",
+ "```\n",
+ "netcdf obs_data {\n",
+ " dimensions:\n",
+ " station = 3 ;\n",
+ " heights = 10 ;\n",
+ " str_len = 4 ;\n",
+ " variables:\n",
+ " double lon(station) ;\n",
+ " lon:units = \"degrees_east\" ;\n",
+ " lon:standard_name = \"longitude\" ;\n",
+ " double lat(station) ;\n",
+ " lat:units = \"degrees_north\" ;\n",
+ " lat:standard_name = \"latitude\" ;\n",
+ " float heights(heights) ;\n",
+ " heights:units = \"meters\" ;\n",
+ " heights:standard_name = \"altitude\";\n",
+ " heights:positive = \"up\" ;\n",
+ " char stid(station, str_len) ;\n",
+ " stid:cf_role = \"profile_id\" ;\n",
+ " stid:long_name = \"Station identifier\" ;\n",
+ " attributes:\n",
+ " :Conventions = \"CF-1.7\" ;\n",
+ " :featureType = \"profile\" ;\n",
+ "}\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Writing the field\n",
+ "The final setup step for this netCDF file is to write our actual profile data to the file. In addition, we add an additional scalar variable, which holds the time of data capture for each profile:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "time_var = nc.createVariable('time', np.float32, ())\n",
+ "time_var.units = 'minutes since 2019-07-16 17:00'\n",
+ "time_var.standard_name = 'time'\n",
+ "time_var[:] = [5.0]\n",
+ "\n",
+ "temp_var = nc.createVariable('temperature', np.float32, ('station', 'heights'))\n",
+ "temp_var.units = 'celsius'\n",
+ "temp_var.standard_name = 'air_temperature'\n",
+ "temp_var.coordinates = 'lon lat heights time'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The auxiliary coordinate variables in this netCDF file are not proper coordinate variables, and are all associated with the `station` dimension. Therefore, the names of these variables must be listed in an attribute called `coordinates`. The final CDL of the variables, including the `coordinates` attribute, is shown below:\n",
+ "```\n",
+ " variables:\n",
+ " double lon(station) ;\n",
+ " lon:units = \"degrees_east\" ;\n",
+ " lon:standard_name = \"longitude\" ;\n",
+ " double lat(station) ;\n",
+ " lat:units = \"degrees_north\" ;\n",
+ " lat:standard_name = \"latitude\" ;\n",
+ " float heights(heights) ;\n",
+ " heights:units = \"meters\" ;\n",
+ " heights:standard_name = \"altitude\";\n",
+ " heights:positive = \"up\" ;\n",
+ " char stid(station, str_len) ;\n",
+ " stid:cf_role = \"profile_id\" ;\n",
+ " stid:long_name = \"Station identifier\" ;\n",
+ " float time ;\n",
+ " time:units = \"minutes since 2019-07-16 17:00\" ;\n",
+ " time:standard_name = \"time\" ;\n",
+ " float temperature(station, heights) ;\n",
+ " temperature:units = \"celsius\" ;\n",
+ " temperature:standard_name = \"air_temperature\" ;\n",
+ " temperature:coordinates = \"lon lat heights time\" ;\n",
+ "```\n",
+ "\n",
+ "These standards for storing DSG data in netCDF files can be used for profiler data, as shown in these examples, as well as timeseries and trajectory data, and any combination of these types of data models. You can also use these standards for datasets with differing amounts of data in each feature, using so-called \"ragged\" arrays. For more information on ragged arrays, or other elements of the CF DSG standards, see the [main documentation page](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html#discrete-sampling-geometries), or try some of the [annotated DSG examples](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html#appendix-examples-discrete-geometries)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "We have created examples of and discussed the structure of **netCDF** `Datasets`, both gridded and in-situ. In addition, we covered the Climate and Forecasting (**CF**) Conventions, and the setup of netCDF files that follow these conventions. netCDF `Datasets` are self-describing; in other words, their attributes, or *metadata*, are included. Other libraries in the Python scientific software ecosystem, such as `xarray` and `MetPy`, are therefore easily able to read in, write to, and analyze these `Datasets`.\n",
+ "\n",
+ "### What's Next?\n",
+ "In subsequent notebooks, we will work with netCDF `Datasets` built from actual, non-example data sources, both model and in-situ."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "## Resources and References\n",
+ "\n",
+ "- [CF Conventions doc (1.7)](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html)\n",
+ "- [Jonathan Gregory's old CF presentation](http://cfconventions.org/Data/cf-documents/overview/viewgraphs.pdf)\n",
+ "- [NASA ESDS \"Dataset Interoperability Recommendations for Earth Science\"](https://earthdata.nasa.gov/user-resources/standards-and-references/dataset-interoperability-recommendations-for-earth-science)\n",
+ "- [CF Data Model (cfdm) python package tutorial](https://ncas-cms.github.io/cfdm/tutorial.html)\n",
+ "- [Tim Whiteaker's cfgeom python package (GitHub repo)](https://github.com/twhiteaker/CFGeom) and [(tutorial)]( https://twhiteaker.github.io/CFGeom/tutorial.html)\n",
+ "- [netCDF4 Documentation](https://unidata.github.io/netcdf4-python/)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/core/datetime.md b/_preview/468/_sources/core/datetime.md
new file mode 100644
index 000000000..185df93ec
--- /dev/null
+++ b/_preview/468/_sources/core/datetime.md
@@ -0,0 +1,14 @@
+# Datetime
+
+```{note}
+This content is under construction!
+```
+
+This section contains tutorials on dealing with times and calendars in scientific Python. The first and most basic of these tutorials covers the standard Python library known as [datetime](https://docs.python.org/3/library/datetime.html).
+
+When this chapter is fully built out, it will include a comprehensive guide to different time libraries, where to use them, and when they might be useful. This set of time libraries includes these libraries, among others:
+
+- [Numpy `datetime64`](https://numpy.org/doc/stable/reference/arrays.datetime.html) (for efficient vectorized date and time operations)
+- [cftime library](https://unidata.github.io/cftime/) (for dealing with dates and times in non-standard calendars)
+
+These tutorials will be cross-referenced with other tutorials on time-related topics, such as dealing with timeseries data in [Pandas](pandas) and [Xarray](xarray).
diff --git a/_preview/468/_sources/core/datetime/datetime.ipynb b/_preview/468/_sources/core/datetime/datetime.ipynb
new file mode 100644
index 000000000..4ce443e78
--- /dev/null
+++ b/_preview/468/_sources/core/datetime/datetime.ipynb
@@ -0,0 +1,562 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Times and Dates in Python"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "\n",
+ "Time is an essential component of nearly all geoscience data. Timescales commonly used in science can have many different orders of magnitude, from mere microseconds to millions or even billions of years. Some of these magnitudes are listed below:\n",
+ "\n",
+ "- microseconds for lightning\n",
+ "- hours for a supercell thunderstorm\n",
+ "- days for a global weather model\n",
+ "- millennia and beyond for the earth's climate\n",
+ "\n",
+ "To properly analyze geoscience data, you must have a firm understanding of how to handle time in Python. \n",
+ "\n",
+ "In this notebook, we will:\n",
+ "\n",
+ "1. Introduce the [time](https://docs.python.org/3/library/time.html) and [datetime](https://docs.python.org/3/library/datetime.html) modules from the Python Standard Library\n",
+ "1. Look at formatted input and output of dates and times\n",
+ "1. See how we can do simple arithmetic on date and time data, by making use of the `timedelta` object\n",
+ "1. Briefly make use of the [pytz](https://pypi.org/project/pytz/) module to handle some thorny time zone issues in Python."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Python Quickstart](../../foundations/quickstart) | Necessary | Understanding strings |\n",
+ "| Basic Python string formatting | Helpful | Try this [Real Python string formatting tutorial](https://realpython.com/python-string-formatting/) |\n",
+ "\n",
+ "- **Time to learn**: 30 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports\n",
+ "\n",
+ "For the examples on this page, we import three modules from the Python Standard Library, as well as one third-party module. The import syntax used here, as well as a discussion on this syntax and an overview of these modules, can be found in the next section."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Python Standard Library packages\n",
+ "# We'll discuss below WHY we alias the packages this way\n",
+ "import datetime as dt\n",
+ "import math\n",
+ "import time as tm\n",
+ "\n",
+ "# Third-party package for time zone handling, we'll discuss below!\n",
+ "import pytz"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## `Time` Versus `Datetime` modules \n",
+ "\n",
+ "### Some core terminology\n",
+ "\n",
+ "Every Python installation comes with a Standard Library, which includes many helpful modules; in these examples, we cover the [time](https://docs.python.org/3/library/time.html) and [datetime](https://docs.python.org/3/library/datetime.html) modules. Unfortunately, the use of dates and times in Python can be disorienting. There are many different terms used in Python relating to dates and times, and many such terms apply to multiple scopes, such as modules, classes, and functions. For example:\n",
+ "\n",
+ "- `datetime` **module** has a `datetime` **class**\n",
+ "- `datetime` **module** has a `time` **class**\n",
+ "- `datetime` **module** has a `date` **class**\n",
+ "- `time` **module** has a `time` function, which returns (almost always) [Unix time](#What-is-Unix-Time?)\n",
+ "- `datetime` **class** has a `date` method, which returns a `date` object\n",
+ "- `datetime` **class** has a `time` method, which returns a `time` object\n",
+ "\n",
+ "This confusion can be partially alleviated by aliasing our imported modules, we did above:\n",
+ "\n",
+ "```\n",
+ "import datetime as dt\n",
+ "import time as tm\n",
+ "```\n",
+ "\n",
+ "We can now reference the `datetime` module (aliased to `dt`) and `datetime` class unambiguously."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pisecond = dt.datetime(2021, 3, 14, 15, 9, 26)\n",
+ "print(pisecond)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our variable `pisecond` now stores a particular date and time, which just happens to be $\\pi$-day 2021 down to the nearest second (3.1415926...)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "now = tm.time()\n",
+ "print(now)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The variable `now` holds the current time in seconds since January 1, 1970 00:00 UTC. For more information on this important, but seemingly esoteric time format, see the section on this page called \"[What is Unix Time](#What-is-Unix-Time?)\". In addition, if you are not familiar with UTC, there is a section on this page called \"[What is UTC](#What-is-UTC?)\"."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### `time` module\n",
+ "\n",
+ "The `time` module is well-suited for measuring [Unix time](#What-is-Unix-Time?). For example, when you are calculating how long it takes a Python function to run, you can employ the `time()` function, which can be found in the `time` module, to obtain Unix time before and after the function completes. You can then take the difference of those two times to determine how long the function was running. (Measuring the runtime of a block of code this way is known as \"benchmarking\".)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "start = tm.time()\n",
+ "tm.sleep(1) # The sleep function will stop the program for n seconds\n",
+ "end = tm.time()\n",
+ "diff = end - start\n",
+ "print(f\"The benchmark took {diff} seconds\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " You can use the `timeit` module and the `timeit` Jupyter magic for more accurate benchmarking. Documentation on these can be found here.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### What is Unix Time?\n",
+ "\n",
+ "Unix time is an example of system time, which is how a computer tracks the passage of time. Computers do not inherently know human representations of time; as such, they store time as a large binary number, indicating a number of time units after a set date and time. This is much easier for a computer to keep track of. In the case of Unix time, the time unit is seconds, and the set date and time is the epoch. Therefore, Unix time is the number of seconds since the epoch. The epoch is defined as January 1, 1970 00:00 [UTC](#What-is-UTC?). This is quite confusing for humans, but again, computers store time in a way that makes sense for them. It is represented \"under the hood\" as a [floating point number](https://en.wikipedia.org/wiki/Floating_point) which is how computers represent real (ℝ) numbers."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### `datetime` module\n",
+ "\n",
+ "The `datetime` module handles time with the Gregorian calendar (the calendar we, as humans, are familiar with); it is independent of Unix time. The `datetime` module uses an [object-oriented](#Thirty-second-introduction-to-Object-Oriented-programming) approach; it contains the `date`, `time`, `datetime`, `timedelta`, and `tzinfo` classes.\n",
+ "\n",
+ "- `date` class represents the day, month, and year\n",
+ "- `time` class represents the time of day\n",
+ "- `datetime` class is a combination of the `date` and `time` classes\n",
+ "- `timedelta` class represents a time duration\n",
+ "- `tzinfo` class represents time zones, and is an abstract class.\n",
+ "\n",
+ "The `datetime` module is effective for:\n",
+ "\n",
+ "- performing date and time arithmetic and calculating time duration\n",
+ "- reading and writing date and time strings with various formats\n",
+ "- handling time zones (with the help of third-party libraries)\n",
+ "\n",
+ "The `time` and `datetime` modules overlap in functionality, but in your geoscientific work, you will probably be using the `datetime` module more than the `time` module."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We'll delve into more details below, but here's a quick example of writing out our `pisecond` datetime object as a formatted string. Suppose we wanted to write out just the date, and write it in the _month/day/year_ format typically used in the US. We can do this using the `strftime()` method. This method formats datetime objects using format specifiers. An example of its usage is shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print('Pi day occurred on:', pisecond.strftime(format='%m/%d/%Y'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Reading and writing dates and times\n",
+ "\n",
+ "### Parsing lightning data timestamps with the `datetime.strptime` method\n",
+ "\n",
+ "In this example, we are analyzing [US NLDN lightning data](https://ghrc.nsstc.nasa.gov/uso/ds_docs/vaiconus/vaiconus_dataset.html). Here is a sample row of data:\n",
+ "\n",
+ " 06/27/07 16:18:21.898 18.739 -88.184 0.0 kA 0 1.0 0.4 2.5 8 1.2 13 G\n",
+ "\n",
+ "Part of the task involves parsing the `06/27/07 16:18:21.898` time string into a `datetime` object. (Although it is outside the scope of this page's tutorial, a full description of this lightning data format can be found [here](https://ghrc.nsstc.nasa.gov/uso/ds_docs/vaiconus/vaiconus_dataset.html#a6).) In order to parse this string or others that follow the same format, you will need to employ the [datetime.strptime()](https://docs.python.org/3/library/datetime.html#datetime.datetime.strptime) method from the `datetime` module. This method takes two arguments: \n",
+ "1. the date/time string you wish to parse\n",
+ "2. the format which describes exactly how the date and time are arranged. \n",
+ "\n",
+ "[The full range of formatting options for strftime() and strptime() is described in the Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior). In most cases, finding the correct formatting options inherently takes some degree of experimentation to get right. This is a situation where Python shines; you can use the IPython interpreter, or a Jupyter notebook, to quickly test numerous formatting options. Beyond the official documentation, Google and Stack Overflow are your friends in this process. \n",
+ "\n",
+ "After some trial and error (as described above), you can find that, in this example, the format string `'%m/%d/%y %H:%M:%S.%f'` will convert the date and time in the data to the correct format."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "strike_time = dt.datetime.strptime('06/27/07 16:18:21.898', '%m/%d/%y %H:%M:%S.%f')\n",
+ "# print strike_time to see if we have properly parsed our time\n",
+ "print(strike_time)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Example usage of the `datetime` object\n",
+ "\n",
+ "Why did we bother doing this? This is a deceptively simple example; it may appear that we only took the string `06/27/07 16:18:21.898` and reformatted it to `2007-06-27 16:18:21.898000`.\n",
+ "\n",
+ "However, our new variable, `strike_time`, is in fact a `datetime` object that we can manipulate in many useful ways. \n",
+ "\n",
+ "Here are a few quick examples of the advantages of a datetime object:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Controlling the output format with `strftime()`\n",
+ "\n",
+ "The following example shows how to write out the time only, without a date, in a particular format:\n",
+ "```\n",
+ "16h 18m 21s\n",
+ "```\n",
+ "\n",
+ "We can do this with the [datetime.strftime()](https://docs.python.org/2/library/datetime.html#datetime.date.strftime) method, which takes a format identical to the one we employed for `strptime()`. After some trial and error from the IPython interpreter, we arrive at `'%Hh %Mm %Ss'`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(strike_time.strftime(format='%Hh %Mm %Ss'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### A simple query of just the year:\n",
+ "\n",
+ "Here's a useful shortcut that doesn't even need a format specifier:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "strike_time.year"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This works because the `datetime` object stores the data as individual attributes: \n",
+ "`year`, `month`, `day`, `hour`, `minute`, `second`, `microsecond`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### See how many days have elapsed since the strike:\n",
+ "\n",
+ "This example shows how to find the number of days since an event; in this case, the lightning strike described earlier:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "(dt.datetime.now() - strike_time).days"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The above example illustrates some simple arithmetic with `datetime` objects. This commonly-used feature will be covered in more detail in the next section."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Calculating coastal tides with the `timedelta` class\n",
+ "\n",
+ "In these examples, we will look at current data pertaining to coastal tides during a [tropical cyclone storm surge](http://www.nhc.noaa.gov/surge/).\n",
+ "\n",
+ "The [lunar day](http://oceanservice.noaa.gov/education/kits/tides/media/supp_tide05.html) is 24 hours and 50 minutes; there are two low tides and two high tides in that time duration. If we know the time of the current high tide, we can easily calculate the occurrence of the next low and high tides by using the [timedelta class](https://docs.python.org/3/library/datetime.html#timedelta-objects). (In reality, the *exact time* of tides is influenced by local coastal effects, in addition to the laws of celestial mechanics, but we will ignore that fact for this exercise.)\n",
+ "\n",
+ "The `timedelta` class is initialized by supplying time duration, usually supplied with [keyword arguments](https://docs.python.org/3/glossary.html#term-argument), to clearly express the length of time. The `timedelta` class allows you to perform arithmetic with dates and times using standard operators (i.e., `+`, `-`, `*`, `/`). You can use these operators with a `timedelta` object, and either another `timedelta` object, a datetime object, or a numeric literal, to obtain objects representing new dates and times.\n",
+ "\n",
+ "This convenient language feature is known as [operator overloading](https://en.wikipedia.org/wiki/Operator_overloading), and is another example of Python offering built-in functionality to make programming easier. (In some other languages, such as Java, you would have to call a method to perform such operations, which significantly obfuscates the code.) \n",
+ "\n",
+ "In addition, you can use these arithmetic operators with two datetime objects, as shown above with [lightning-strike data](#See-how-many-days-have-elapsed-since-the-strike:), to create `timedelta` objects. Let's examine all these features in the following code block."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "high_tide = dt.datetime(2016, 6, 1, 4, 38, 0)\n",
+ "lunar_day = dt.timedelta(hours=24, minutes=50)\n",
+ "tide_duration = lunar_day / 4 # Here we do some arithmetic on the timedelta object!\n",
+ "next_low_tide = (\n",
+ " high_tide + tide_duration\n",
+ ") # Here we add a timedelta object to a datetime object\n",
+ "next_high_tide = high_tide + (2 * tide_duration) # and so on\n",
+ "tide_length = next_high_tide - high_tide\n",
+ "print(f\"The time between high and low tide is {tide_duration}.\")\n",
+ "print(f\"The current high tide is {high_tide}.\")\n",
+ "print(f\"The next low tide is {next_low_tide}.\")\n",
+ "print(f\"The next high tide {next_high_tide}.\")\n",
+ "print(f\"The tide length is {tide_length}.\")\n",
+ "print(f\"The type of the 'tide_length' variable is {type(tide_length)}.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To illustrate that the difference of two times yields a `timedelta` object, we can use a built-in Python function called `type()`, which returns the type of its argument. In the above example, we call `type()` in the last `print` statement, and it returns the type of `timedelta`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Dealing with Time Zones\n",
+ "\n",
+ "Time zones can be a source of confusion and frustration in geoscientific data and in computer programming in general. Core date and time libraries in various programming languages, including Python, inevitably have design flaws, relating to time zones, date and time formatting, and other inherently complex issues. Third-party libraries are often created to fix the limitations of the core libraries, but this approach is frequently unsuccessful. To avoid time-zone-related issues, it is best to handle data in UTC; if data cannot be handled in UTC, efforts should be made to consistently use the same time zone for all data. However, this is not always possible; events such as severe weather are expected to be reported in a local time zone, which is not always consistent.\n",
+ "\n",
+ "### What is UTC?\n",
+ "\n",
+ "\"[UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time)\" is a combination of the French and English abbreviations for Coordinated Universal Time. It is, in practice, equivalent to Greenwich Mean Time (GMT), the time zone at 0 degrees longitude. (The prime meridian, 0 degrees longitude, runs through Greenwich, a district of London, England.) In geoscientific data, times are often in UTC, although you should always verify that this is actually true to avoid time zone issues.\n",
+ "\n",
+ "### Time Zone Naive Versus Time Zone Aware `datetime` Objects\n",
+ "\n",
+ "When you create `datetime` objects in Python, they are \"time zone naive\", or, if the subject of time zones is assumed, simply \"naive\". This means that they are unaware of the time zone of the date and time they represent; time zone naive is the opposite of time zone aware. In many situations, you can happily go forward without this detail getting in the way of your work. As the [Python documentation states](https://docs.python.org/3/library/datetime.html#aware-and-naive-objects):\n",
+ ">Naive objects are easy to understand and to work with, at the cost of ignoring some aspects of reality. \n",
+ "\n",
+ "However, if you wish to convey time zone information, you will have to make your `datetime` objects time zone aware. The `datetime` library is able to easily convert the time zone to UTC, also converting the object to a time zone aware state, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "naive = dt.datetime.now()\n",
+ "aware = dt.datetime.now(dt.timezone.utc)\n",
+ "print(f\"I am time zone naive {naive}.\")\n",
+ "print(f\"I am time zone aware {aware}.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Notice that `aware` has `+00:00` appended at the end, indicating zero hours offset from UTC.\n",
+ "\n",
+ "Our `naive` object shows the local time on whatever computer was used to run this code. If you're reading this online, chances are the code was executed on a cloud server that already uses UTC. If this is the case, `naive` and `aware` will differ only at the microsecond level, due to round-off error.\n",
+ "\n",
+ "In the code above, we used `dt.timezone.utc` to initialize the UTC timezone for our `aware` object. Unfortunately, at this time, the Python Standard Library does not fully support initializing datetime objects with arbitrary time zones; it also does not fully support conversions between time zones for datetime objects. However, there exist third-party libraries that provide some of this functionality; one such library is covered below."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Full time zone support with the `pytz` module\n",
+ "\n",
+ "For improved handling of time zones in Python, you will need the third-party [pytz](https://pypi.org/project/pytz/) module, whose classes build upon, or, in object-oriented programming terms, inherit from, classes from the `datetime` module.\n",
+ "\n",
+ "In this next example, we repeat the above exercise, but this time, we use a method from the `pytz` module to initialize the `aware` object in a different time zone:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "naive = dt.datetime.now()\n",
+ "aware = dt.datetime.now(pytz.timezone('US/Mountain'))\n",
+ "print(f\"I am time zone naive: {naive}.\")\n",
+ "print(f\"I am time zone aware: {aware}.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `pytz.timezone()` method takes a time zone string; if this string is formatted correctly, the method returns a `tzinfo` object, which can be used when making a datetime object time zone aware. This initializes the time zone for the newly aware object to a specific time zone matching the time zone string. The `-06:00` indicates that we are now operating in a time zone six hours behind UTC.\n",
+ "\n",
+ "### Print Time with a Different Time Zone\n",
+ "\n",
+ "If you have data that are in UTC, and wish to convert them to another time zone (in this example, US Mountain Time Zone), you will again need to make use of the `pytz` module.\n",
+ "\n",
+ "First, we will create a new datetime object with the [utcnow()](https://docs.python.org/3/library/datetime.html#datetime.datetime.utcnow) method. Despite the name of this method, the newly created object is time zone naive. Therefore, we must invoke the object's [replace()](https://docs.python.org/3/library/datetime.html#datetime.datetime.replace) method and specify UTC with a `tzinfo` object in order to make the object time zone aware. As described above, we can use the `pytz` module's timezone() method to create a new `tzinfo` object, again using the time zone string 'US/Mountain' (US Mountain Time Zone). To convert the datetime object `utc` from UTC to Mountain Time, we can then run the [astimezone()](https://docs.python.org/3/library/datetime.html#datetime.datetime.astimezone) method."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "utc = dt.datetime.utcnow().replace(tzinfo=pytz.utc)\n",
+ "print(\"The UTC time is {}.\".format(utc.strftime('%B %d, %Y, %-I:%M%p')))\n",
+ "mountaintz = pytz.timezone(\"US/Mountain\")\n",
+ "ny = utc.astimezone(mountaintz)\n",
+ "print(\"The 'US/Mountain' time is {}.\".format(ny.strftime('%B %d, %Y, %-I:%M%p')))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the above example, we also use the `strftime()` method to format the date and time string in a human-friendly format."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "\n",
+ "The Python Standard Library contains several modules for dealing with date and time data. We saw how we can avoid some name ambiguities by aliasing the module names; this can be done with import statements like `import datetime as dt` and `import time as tm`. The `tm.time()` method just returns the current [Unix time](#What-is-Unix-Time?) in seconds -- which can be useful for measuring elapsed time, but not all that useful for working with geophysical data.\n",
+ "\n",
+ "The `datetime` module contains various classes for storing, converting, comparing, and formatting date and time data on the Gregorian calendar. We saw how we can parse data files with date and time strings into `dt.datetime` objects using the `dt.datetime.strptime()` method. We also saw how to perform arithmetic using date and time data; this uses the `dt.timedelta` class to represent intervals of time.\n",
+ "\n",
+ "Finally, we looked at using the third-party [pytz](https://pypi.org/project/pytz/) module to handle time zone awareness and conversions.\n",
+ "\n",
+ "### What's Next?\n",
+ "\n",
+ "In subsequent tutorials, we will dig deeper into different time and date formats, and discuss how they are handled by important Python modules such as Numpy, Pandas, and Xarray."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Resources and References\n",
+ "\n",
+ "This page was based on and adapted from material in [Unidata's Python Training](https://unidata.github.io/python-training/python/times_and_dates/).\n",
+ "\n",
+ "For further reading on these modules, take a look at the official documentation for:\n",
+ "- [time](https://docs.python.org/3/library/time.html)\n",
+ "- [datetime](https://docs.python.org/3/library/datetime.html)\n",
+ "- [pytz](https://pypi.org/project/pytz/)\n",
+ "\n",
+ "For more information on Python string formatting, try:\n",
+ "- [Python string documentation](https://docs.python.org/3/library/string.html)\n",
+ "- RealPython's [string formatting tutorial](https://realpython.com/python-string-formatting/) (nicely written)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/core/matplotlib.md b/_preview/468/_sources/core/matplotlib.md
new file mode 100644
index 000000000..5312e9bdf
--- /dev/null
+++ b/_preview/468/_sources/core/matplotlib.md
@@ -0,0 +1,29 @@
+![Matplotlib logo](https://matplotlib.org/stable/_images/sphx_glr_logos2_003.png)
+
+# Matplotlib
+
+[Matplotlib](https://matplotlib.org) is the go-to library for plotting within Python. Numerous packages and libraries build off of Matplotlib, making it the de facto standard Python plotting package. If you were to learn a single plotting tool to keep in your toolbox, this is it.
+
+## Why Matplotlib?
+
+Matplotlib is a plotting library for Python and is often the first plotting package Python learners encounter. You may be wondering, "Why learn Matplotlib? Why not [Seaborn](https://seaborn.pydata.org) or another plotting library first?"
+
+The simple answer to the much-asked question of "why Matplotlib?" is that it is extremely popular; in fact, Matplotlib is one of the most popular Python packages. Because of its history as Python's "go-to" plotting package, most other open source plotting libraries, including Seaborn, are built on top of Matplotlib; thus, these more specialized plotting packages inherit some of Matplotlib's capabilities, syntax, and limitations. Thus, you will find it useful to be familiar with Matplotlib when learning other plotting libraries.
+
+Matplotlib supports a variety of output formats, chart types, and interactive options, and runs well on most operating systems and graphic backends. The key features of Matplotlib are its extensibility and the [extensive documentation](https://matplotlib.org) available to the community. All of these things contribute to Matplotlib's popularity, which is the answer to the question of "Why Matplotlib", and the reason Matplotlib is the first plotting package we will introduce you to in this book.
+
+## In this section
+
+In this section of Pythia Foundations, you will find tutorials on basic plotting with [Matplotlib](https://matplotlib.org).
+
+From the [Matplotlib documentation](https://matplotlib.org), "Matplotlib is a comprehensive library for creating static, animated, and interactive visualizations in Python."
+
+Currently, Pythia Foundations provides a basic introduction to Matplotlib, as well as:
+
+- Histograms
+- Piecharts
+- Animations
+- Annotations
+- Colorbars
+- Contour plots
+- Customizing layouts
diff --git a/_preview/468/_sources/core/matplotlib/annotations-colorbars-layouts.ipynb b/_preview/468/_sources/core/matplotlib/annotations-colorbars-layouts.ipynb
new file mode 100644
index 000000000..691cd8ab9
--- /dev/null
+++ b/_preview/468/_sources/core/matplotlib/annotations-colorbars-layouts.ipynb
@@ -0,0 +1,687 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a2d0abc7-ffd8-483e-87ae-bb169c5bcecf",
+ "metadata": {},
+ "source": [
+ "![Matplotlib logo](https://matplotlib.org/stable/_images/sphx_glr_logos2_003.png)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2583ef82-33dc-4df5-9f6d-f357d72f0b81",
+ "metadata": {},
+ "source": [
+ "# Annotations, Colorbars, and Advanced Layouts\n",
+ "\n",
+ "---\n",
+ "## Overview\n",
+ "\n",
+ "In this section we explore methods for customizing plots. The following topics will be covered:\n",
+ "\n",
+ "1. Adding annotations\n",
+ "1. Rendering equations\n",
+ "1. Colormap overview \n",
+ "1. Basic colorbars \n",
+ "1. Shared colorbars\n",
+ "1. Custom colorbars\n",
+ "1. Mosaic subplots"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "94250818-a557-4717-ae71-6aa45b9f212b",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "\n",
+ "| Concepts | Importance |\n",
+ "| --- | --- |\n",
+ "| [NumPy Basics](../numpy/numpy-basics) | Necessary |\n",
+ "| [Matplotlib Basics](matplotlib-basics) | Necessary |\n",
+ "\n",
+ "- **Time to learn**: *30-40 minutes*"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9deb8579-2995-46b4-a82f-1ab79a67155c",
+ "metadata": {},
+ "source": [
+ "## Imports\n",
+ "Here, we import the `matplotlib.pyplot` interface and `numpy`, in addition to the `scipy` statistics package (`scipy.stats`) for generating sample data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "00b72a52-d8e5-48e1-ac4c-35c2e3217de5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "import scipy.stats as stats\n",
+ "from matplotlib.colors import LinearSegmentedColormap, ListedColormap, Normalize"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a4a423a8-5692-448f-aa78-3d16d3ace19d",
+ "metadata": {},
+ "source": [
+ "## Create Some Sample Data\n",
+ "By using `scipy.stats`, the Scipy statistics package described above, we can easily create a data array containing a normal distribution. We can plot these data points to confirm that the correct distribution was generated. The generated sample data will then be used later in this section. The code and sample plot for this data generation are as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cd97b1e4-0b10-4099-b288-0c9cb7624a11",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mu = 0\n",
+ "variance = 1\n",
+ "sigma = np.sqrt(variance)\n",
+ "\n",
+ "x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 200)\n",
+ "pdf = stats.norm.pdf(x, mu, sigma)\n",
+ "\n",
+ "plt.plot(x, pdf);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fbe53c79-98d1-4cda-bea9-4cf532ce835e",
+ "metadata": {},
+ "source": [
+ "## Adding Annotations\n",
+ "A common part of many people's workflows is adding annotations. A rough definition of 'annotation' is 'a note of explanation or comment added to text or a diagram'.\n",
+ "\n",
+ "We can add an annotation to a plot using `plt.text`. This method takes the x and y data coordinates at which to draw the annotation (as floating-point values), and the string containing the annotation text."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1b5598da-caf6-4bc6-aa59-e74954b77bce",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.plot(x, pdf)\n",
+ "plt.text(0, 0.05, 'here is some text!');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1ab0a677-90c8-43f9-9d0d-c2f9de698edb",
+ "metadata": {},
+ "source": [
+ "## Rendering Equations"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9e2bc873-0592-4813-81ab-79e3ea7f5855",
+ "metadata": {},
+ "source": [
+ "We can also add annotations with **equation formatting**, by using LaTeX syntax. The key is to use strings in the following format:\n",
+ "\n",
+ "```python\n",
+ "r'$some_equation$'\n",
+ "```\n",
+ "\n",
+ "Let's run an example that renders the following equation as an annotation:\n",
+ "\n",
+ "$$f(x) = \\frac{1}{\\mu\\sqrt{2\\pi}} e^{-\\frac{1}{2}\\left(\\frac{x-\\mu}{\\sigma}\\right)^2}$$\n",
+ "\n",
+ "The next code block and plot demonstrate rendering this equation as an annotation.\n",
+ "\n",
+ "If you are interested in learning more about LaTeX syntax, check out [their official documentation](https://latex-tutorial.com/tutorials/amsmath/).\n",
+ "\n",
+ "Furthermore, if the code is being executed in a Jupyter notebook run interactively (e.g., on Binder), you can double-click on the cell to see the LaTeX source for the rendered equation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9ae9caab-af46-42f5-ac36-47cafbcdaf68",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.plot(x, pdf)\n",
+ "\n",
+ "plt.text(\n",
+ " -1,\n",
+ " 0.05,\n",
+ " r'$f(x) = \\frac{1}{\\mu\\sqrt{2\\pi}} e^{-\\frac{1}{2}\\left(\\frac{x-\\mu}{\\sigma}\\right)^2}$',\n",
+ ");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aad54e23-b488-4437-89ad-55e14e410f90",
+ "metadata": {},
+ "source": [
+ "As you can see, the equation was correctly rendered in the plot above. However, the equation appears quite small. We can increase the size of the text using the `fontsize` keyword argument, and center the equation using the `ha` (horizontal alignment) keyword argument.\n",
+ "\n",
+ "The following example illustrates the use of these keyword arguments, as well as creating a legend containing LaTeX notation:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f915a43f-dd59-462a-a52f-f2d39e53f6cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fstr = r'$f(x) = \\frac{1}{\\mu\\sqrt{2\\pi}} e^{-\\frac{1}{2}\\left(\\frac{x-\\mu}{\\sigma}\\right)^2}$'\n",
+ "\n",
+ "plt.plot(x, pdf, label=r'$\\mu=0, \\,\\, \\sigma^2 = 1$')\n",
+ "plt.text(0, 0.05, fstr, fontsize=15, ha='center')\n",
+ "plt.legend();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "79bc5d01-186c-41df-b645-73d49cfc85d1",
+ "metadata": {},
+ "source": [
+ "### Add a Box Around the Text"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "210d8d70-e14c-47fa-a710-1d04e84496f4",
+ "metadata": {},
+ "source": [
+ "To improve readability, we can also add a box around the equation text. This is done using `bbox`.\n",
+ "\n",
+ "`bbox` is a keyword argument in `plt.text` that creates a box around text. It takes a dictionary that specifies options, behaving like additional keyword arguments inside of the `bbox` argument. In this case, we use the following dictionary keys:\n",
+ "* a rounded box style (`boxstyle = 'round'`)\n",
+ "* a light grey facecolor (`fc = 'lightgrey'`)\n",
+ "* a black edgecolor (`ec = 'k'`)\n",
+ "\n",
+ "This example demonstrates the correct use of `bbox`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3be0cb9a-8058-4357-97e4-089d556b3194",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 8))\n",
+ "plt.plot(x, pdf)\n",
+ "\n",
+ "fstr = r'$f(x) = \\frac{1}{\\mu\\sqrt{2\\pi}} e^{-\\frac{1}{2}\\left(\\frac{x-\\mu}{\\sigma}\\right)^2}$'\n",
+ "plt.text(\n",
+ " 0,\n",
+ " 0.05,\n",
+ " fstr,\n",
+ " fontsize=18,\n",
+ " ha='center',\n",
+ " bbox=dict(boxstyle='round', fc='lightgrey', ec='k'),\n",
+ ")\n",
+ "\n",
+ "plt.xticks(fontsize=16)\n",
+ "plt.yticks(fontsize=16)\n",
+ "\n",
+ "plt.title(\"Normal Distribution with SciPy\", fontsize=24);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "91f4b661-131f-4d29-925f-0028f987be14",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "60564465-4b40-467b-a1a5-d10c4378329c",
+ "metadata": {},
+ "source": [
+ "## Colormap Overview\n",
+ "\n",
+ "Colormaps are a visually appealing method of looking at visualized data in a new and different way. They associate specific values with hues, using color to ease rapid understanding of plotted data; for example, displaying hotter temperatures as red and colder temperatures as blue."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b0179902-36d5-4b51-894c-f8aad2d92ad8",
+ "metadata": {},
+ "source": [
+ "### Classes of colormaps\n",
+ "\n",
+ "There are four different classes of colormaps, and many individual maps are contained in each class. To view some examples for each class, use the dropdown arrow next to the class name below.\n",
+ "\n",
+ "\n",
+ " 1. Sequential: These colormaps incrementally increase or decrease in lightness and/or saturation of color. In general, they work best for ordered data. \n",
+ "\n",
+ "![Perceptually Sequential](images/perceptually-sequential.png)\n",
+ "\n",
+ "![Sequential](images/sequential.png)\n",
+ "\n",
+ "![Sequential2](images/sequential2.png)\n",
+ "\n",
+ "![Perceptually Sequential](images/ps.png)\n",
+ "\n",
+ "![Sequential](images/s1.png)\n",
+ "\n",
+ "![Sequential2](images/s2.png)\n",
+ "\n",
+ "\n",
+ "\n",
+ " 2. Diverging: These colormaps contain two colors that change in lightness and/or saturation in proportion to distance from the middle, and an unsaturated color in the middle. They are almost always used with data containing a natural zero point, such as sea level. \n",
+ "\n",
+ "![Diverging](images/diverging.png)\n",
+ "\n",
+ "![Diverging](images/d.png)\n",
+ "\n",
+ "\n",
+ "\n",
+ " 3. Cyclic: These colormaps have two different colors that change in lightness and meet in the middle, and unsaturated colors at the beginning and end. They are usually best for data values that wrap around, such as longitude. \n",
+ "\n",
+ "![Cyclic](images/cyclic.png)\n",
+ "\n",
+ "![Cyclic](images/c.png)\n",
+ "\n",
+ "\n",
+ "\n",
+ " 4. Qualitative: These colormaps have no pattern, and are mostly bands of miscellaneous colors. You should only use these colormaps for unordered data without relationships. \n",
+ "\n",
+ "![Qualitative](images/qualitative.png)\n",
+ "\n",
+ "![Miscellanous](images/misc.png)\n",
+ "\n",
+ "![Miscellanous](images/m.png)\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5ce3b4c7-1186-4067-9b34-552382bf614e",
+ "metadata": {},
+ "source": [
+ "### Other considerations\n",
+ "\n",
+ "There is a lot of info about choosing colormaps that could be its own tutorial. Two important considerations:\n",
+ "1. Color-blind friendly patterns: By using colormaps that do not contain both red and green, you can help people with the most common form of color blindness read your data plots more easily. The GeoCAT examples gallery has a section about [picking better colormaps](https://geocat-examples.readthedocs.io/en/latest/gallery/index.html#colors) that covers this issue in greater detail.\n",
+ "1. Grayscale conversion: It is not too uncommon for a plot originally rendered in color to be converted to black-and-white (monochrome grayscale). This reduces the usefulness of specific colormaps, as shown below.\n",
+ "\n",
+ "![hsv colormap in grayscale](images/hsv2gray.png)\n",
+ "\n",
+ "- For more information on these concerns, as well as colormap choices in general, see the documentation page [Choosing Colormaps in Matplotlib](https://matplotlib.org/stable/tutorials/colors/colormaps.html). "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "66f8b410-ce74-47ad-99be-0b7c670c6c05",
+ "metadata": {},
+ "source": [
+ "## Basic Colorbars"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cb557718-d4ee-41c9-834c-ceddf2e3329a",
+ "metadata": {},
+ "source": [
+ "Before we look at a colorbar, let's generate some fake X and Y data using `numpy.random`, and set a number of bins for a histogram:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49665676-e0db-425d-9ac2-9a0cab084297",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "npts = 1000\n",
+ "nbins = 15\n",
+ "\n",
+ "x = np.random.normal(size=npts)\n",
+ "y = np.random.normal(size=npts)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "614c5189-a563-4856-a900-26bf3dcc849a",
+ "metadata": {},
+ "source": [
+ "Now we can use our fake data to plot a 2-D histogram with the number of bins set above. We then add a colorbar to the plot, using the default colormap `viridis`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bf3694f1-e8ed-40de-a50a-7e85b179868c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure()\n",
+ "ax = plt.gca()\n",
+ "\n",
+ "plt.hist2d(x, y, bins=nbins, density=True)\n",
+ "plt.colorbar();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dc8e6e2b-3850-4379-bf18-d78ee01739dc",
+ "metadata": {},
+ "source": [
+ "We can change which colormap to use by setting the keyword argument `cmap = 'colormap_name'` in the plotting function call. This sets the colormap not only for the plot, but for the colorbar as well. In this case, we use the `magma` colormap:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "16c61ae2-14f6-4b14-8360-c832a46a42b1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure()\n",
+ "ax = plt.gca()\n",
+ "\n",
+ "plt.hist2d(x, y, bins=nbins, density=True, cmap='magma')\n",
+ "plt.colorbar();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7c80a5af-364b-4eda-87c0-10a709b1f32b",
+ "metadata": {},
+ "source": [
+ "## Shared Colorbars\n",
+ "Oftentimes, you are plotting multiple subplots, or multiple `Axes` objects, simultaneously. In these scenarios, you can create colorbars that span multiple plots, as shown in the following example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "affb4b48-1656-4f70-a9d5-e7bfb0002e7b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, ax = plt.subplots(nrows=1, ncols=2, constrained_layout=True)\n",
+ "\n",
+ "hist1 = ax[0].hist2d(x, y, bins=15, density=True, vmax=0.18)\n",
+ "hist2 = ax[1].hist2d(x, y, bins=30, density=True, vmax=0.18)\n",
+ "\n",
+ "fig.colorbar(hist1[3], ax=ax, location='bottom')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1662bf3c",
+ "metadata": {},
+ "source": [
+ "You may be wondering why the call to `fig.colorbar` uses the argument `hist1[3]`. The explanation is as follows: `hist1` is a tuple returned by `hist2d`, and `hist1[3]` contains a `matplotlib.collections.QuadMesh` that points to the colormap for the first histogram. To make sure that both histograms are using the same colormap with the same range of values, `vmax` is set to 0.18 for both plots. This ensures that both histograms are using colormaps that represent values from 0 (the default for histograms) to 0.18. Because the same data values are used for both plots, it doesn't matter whether we pass in `hist1[3]` or `hist2[3]` to `fig.colorbar`.\n",
+ "You can learn more about this topic by reviewing the [`matplotlib.axes.Axes.hist2d` documentation](https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.hist2d.html)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "84c50862",
+ "metadata": {},
+ "source": [
+ "In addition, there are many other types of plots that can also share colorbars. An actual use case that is quite common is to use shared colorbars to compare data between filled contour plots. The `vmin` and `vmax` keyword arguments behave the same way for `contourf` as they do for `hist2d`. However, there is a potential downside to using the `vmin` and `vmax` kwargs. When plotting two different datasets, the dataset with the smaller range of values won't show the full range of colors, even though the colormaps are the same. Thus, it can potentially matter which output from `contourf` is used to make a colorbar. The following examples demonstrate general plotting technique for filled contour plots with shared colorbars, as well as best practices for dealing with some of these logistical issues:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "28d4cea3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x2 = y2 = np.arange(-3, 3.01, 0.025)\n",
+ "X2, Y2 = np.meshgrid(x2, y2)\n",
+ "Z = np.sqrt(np.sin(X2) ** 2 + np.sin(Y2) ** 2)\n",
+ "Z2 = np.sqrt(2 * np.cos(X2) ** 2 + 2 * np.cos(Y2) ** 2)\n",
+ "\n",
+ "fig, ax = plt.subplots(nrows=1, ncols=2, constrained_layout=True)\n",
+ "c1 = ax[0].contourf(X2, Y2, Z, vmin=0, vmax=2)\n",
+ "c2 = ax[1].contourf(X2, Y2, Z2, vmin=0, vmax=2)\n",
+ "fig.colorbar(c1, ax=ax[0], location='bottom')\n",
+ "fig.colorbar(c2, ax=ax[1], location='bottom')\n",
+ "\n",
+ "fig.suptitle('Shared colormaps on data with different ranges')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5570ebb7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, ax = plt.subplots(nrows=1, ncols=2, constrained_layout=True)\n",
+ "c1 = ax[0].contourf(X2, Y2, Z, vmin=0, vmax=2)\n",
+ "c2 = ax[1].contourf(X2, Y2, Z2, vmin=0, vmax=2)\n",
+ "fig.colorbar(c2, ax=ax, location='bottom')\n",
+ "\n",
+ "fig.suptitle('Using the contourf output from the data with a wider range')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "92d072f8-7370-4ea5-92e0-4407cb5905bb",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Custom Colorbars\n",
+ "\n",
+ "Despite the availability of a large number of premade colorbar styles, it can still occasionally be helpful to create your own colorbars.\n",
+ "\n",
+ "Below are 2 similar examples of using custom colorbars.\n",
+ "\n",
+ "The first example uses a very discrete list of colors, simply named `colors`, and creates a colormap from this list by using the call `ListedColormap`. \n",
+ "\n",
+ "The second example uses the function `LinearSegmentedColormap` to create a new colormap, using interpolation and the `colors` list defined in the first example."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "308cb21e-7d82-42b9-a02a-0b452d58d4ed",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "colors = [\n",
+ " 'white',\n",
+ " 'pink',\n",
+ " 'red',\n",
+ " 'orange',\n",
+ " 'yellow',\n",
+ " 'green',\n",
+ " 'blue',\n",
+ " 'purple',\n",
+ " 'black',\n",
+ "]\n",
+ "ccmap = ListedColormap(colors)\n",
+ "norm = Normalize(vmin=0, vmax=0.18)\n",
+ "\n",
+ "fig, ax = plt.subplots(nrows=1, ncols=2, constrained_layout=True)\n",
+ "\n",
+ "hist1 = ax[0].hist2d(x, y, bins=15, density=True, cmap=ccmap, norm=norm)\n",
+ "hist2 = ax[1].hist2d(x, y, bins=30, density=True, cmap=ccmap, norm=norm)\n",
+ "\n",
+ "cbar = fig.colorbar(hist1[3], ax=ax, location='bottom')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5c72b622-ba9b-4fdb-be25-27366eca3872",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cbcmap = LinearSegmentedColormap.from_list(\"cbcmap\", colors)\n",
+ "\n",
+ "fig, ax = plt.subplots(nrows=1, ncols=2, constrained_layout=True)\n",
+ "\n",
+ "hist1 = ax[0].hist2d(x, y, bins=15, density=True, cmap=cbcmap, norm=norm)\n",
+ "hist2 = ax[1].hist2d(x, y, bins=30, density=True, cmap=cbcmap, norm=norm)\n",
+ "\n",
+ "cbar = fig.colorbar(hist1[3], ax=ax, location='bottom')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ea7f200f",
+ "metadata": {},
+ "source": [
+ "### The `Normalize` Class\n",
+ "Notice that both of these examples contain plotting functions that make use of the `norm` kwarg. This keyword argument takes an object of the `Normalize` class. A `Normalize` object is constructed with two numeric values, representing the start and end of the data. It then linearly normalizes the data in that range into an interval of [0,1]. If this sounds familiar, it is because this functionality was used in a previous histogram example. Feel free to review any previous examples if you need a refresher on particular topics. In this example, the values of the `vmin` and `vmax` kwargs used in `hist2d` are reused as arguments to the `Normalize` class constructor. This sets the values of `vmin` and `vmax` as the starting and ending data values for our `Normalize` object, which is passed to the `norm` kwarg of `hist2d` to normalize the data. There are many different options for normalizing data, and it is important to explicitly specify how you want your data normalized, especially when making a custom colormap.\n",
+ "\n",
+ "For information on nonlinear and other complex forms of normalization, review this [Colormap Normalization tutorial](https://matplotlib.org/stable/tutorials/colors/colormapnorms.html#)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e41f44e0-2c4f-4ce2-abe6-35d20b8c142e",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Mosaic Subplots\n",
+ "One of the helpful features recently added to Matplotlib is the `subplot_mosaic` method. This method allows you to specify the structure of your figure using specially formatted strings, and will generate subplots automatically based on that structure.\n",
+ "\n",
+ "For example, if we wanted two plots on top, and one on the bottom, we can construct them by passing the following string to `subplot_mosaic`:\n",
+ "\n",
+ "```python\n",
+ "\"\"\n",
+ "AB\n",
+ "CC\n",
+ "\"\"\n",
+ "```\n",
+ "\n",
+ "This creates three `Axes` objects corresponding to three subplots. The subplots `A` and `B` are on top of the subplot `C`, and the `C` subplot spans the combined width of `A` and `B`.\n",
+ "\n",
+ "Once we create the subplots, we can access them using the dictionary returned by `subplot_mosaic`. You can specify an `Axes` object (in this example, `your_axis`) in the dictionary (in this example, `axes_dict`) by using the syntax `axes_dict['your_axis']`. A full example of `subplot_mosaic` is as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7e080054-ce5c-451d-81f6-c4791a4b2537",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "axdict = plt.figure(constrained_layout=True).subplot_mosaic(\n",
+ " \"\"\"\n",
+ " AB\n",
+ " CC\n",
+ " \"\"\"\n",
+ ")\n",
+ "\n",
+ "histA = axdict['A'].hist2d(x, y, bins=15, density=True, cmap=cbcmap, norm=norm)\n",
+ "histB = axdict['B'].hist2d(x, y, bins=10, density=True, cmap=cbcmap, norm=norm)\n",
+ "histC = axdict['C'].hist2d(x, y, bins=30, density=True, cmap=cbcmap, norm=norm)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7569067a-7c59-46b0-b283-b108094010f1",
+ "metadata": {},
+ "source": [
+ "You'll notice there is not a colorbar plotted by default. When constructing the colorbar, we need to specify the following:\n",
+ "* Which plot to use for the colormapping (ex. `histA`)\n",
+ "* Which subplots (`Axes` objects) to merge colorbars across (ex. [`histA`, `histB`])\n",
+ "* Where to place the colorbar (ex. `bottom`)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3077f1b6-adba-411d-a5a5-430600f0e2fa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "axdict = plt.figure(constrained_layout=True).subplot_mosaic(\n",
+ " \"\"\"\n",
+ " AB\n",
+ " CC\n",
+ " \"\"\"\n",
+ ")\n",
+ "\n",
+ "histA = axdict['A'].hist2d(x, y, bins=15, density=True, cmap=cbcmap, norm=norm)\n",
+ "histB = axdict['B'].hist2d(x, y, bins=10, density=True, cmap=cbcmap, norm=norm)\n",
+ "histC = axdict['C'].hist2d(x, y, bins=30, density=True, cmap=cbcmap, norm=norm)\n",
+ "\n",
+ "fig.colorbar(histA[3], ax=[axdict['A'], axdict['B']], location='bottom')\n",
+ "fig.colorbar(histC[3], ax=[axdict['C']], location='right');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "85b884b1-4db7-4d9d-9563-79750dbcfc67",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2f505b91-cb9a-4175-a1b7-91f501c1e2cc",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "* You can use features in Matplotlib to add text annotations to your plots, including equations in mathematical notation\n",
+ "* There are a number of considerations to take into account when choosing your colormap\n",
+ "* You can create your own colormaps with Matplotlib\n",
+ "* Various subplots and corresponding `Axes` objects in a figure can share colorbars\n",
+ " \n",
+ "## Resources and references\n",
+ "- [Matplotlib text documentation](https://matplotlib.org/stable/api/text_api.html#matplotlib.text.Text.set_math_fontfamily)\n",
+ "- [Matplotlib annotation documentation](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.annotate.html)\n",
+ "- [Matplotlib's annotation examples](https://matplotlib.org/stable/tutorials/text/annotations.html)\n",
+ "- [Writing mathematical expressions in Matplotlib](https://matplotlib.org/stable/tutorials/text/mathtext.html)\n",
+ "- [Mathtext Examples](https://matplotlib.org/stable/gallery/text_labels_and_annotations/mathtext_examples.html#sphx-glr-gallery-text-labels-and-annotations-mathtext-examples-py)\n",
+ "- [Drawing fancy boxes with Matplotlib](https://matplotlib.org/stable/gallery/shapes_and_collections/fancybox_demo.html)\n",
+ "- [Plot Types Cheat Sheet](https://lnkd.in/dD5fE8V)\n",
+ "- [Choosing Colormaps in Matplotlib](https://matplotlib.org/stable/tutorials/colors/colormaps.html)\n",
+ "- [Making custom colormaps](https://matplotlib.org/stable/tutorials/colors/colormap-manipulation.html)\n",
+ "- [Complex figure and subplot composition](https://matplotlib.org/stable/tutorials/provisional/mosaic.html#)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "28649673-c9c6-4914-9f60-5c06c25e1e49",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/_preview/468/_sources/core/matplotlib/histograms-piecharts-animation.ipynb b/_preview/468/_sources/core/matplotlib/histograms-piecharts-animation.ipynb
new file mode 100644
index 000000000..1974c8e31
--- /dev/null
+++ b/_preview/468/_sources/core/matplotlib/histograms-piecharts-animation.ipynb
@@ -0,0 +1,462 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3d9564ec",
+ "metadata": {},
+ "source": [
+ "![Matplotlib logo](https://matplotlib.org/stable/_images/sphx_glr_logos2_003.png)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e9eb4444",
+ "metadata": {},
+ "source": [
+ "# Histograms, Pie Charts, and Animations"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cba92e3a",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## Overview\n",
+ "\n",
+ "In this section we'll explore some more specialized plot types, including:\n",
+ "\n",
+ "1. Histograms\n",
+ "1. Pie Charts\n",
+ "1. Animations"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "56c73537",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [NumPy Basics](../numpy/numpy-basics) | Necessary | |\n",
+ "| [Matplotlib Basics](matplotlib-basics) | Necessary | |\n",
+ "\n",
+ "* **Time to Learn**: 30 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4440f2b1",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0702fe7b",
+ "metadata": {},
+ "source": [
+ "## Imports"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "148180d5",
+ "metadata": {},
+ "source": [
+ "Just like in the previous tutorial, we are going to import Matplotlib's `pyplot` interface as `plt`. We must also import `numpy` for working with data arrays."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d16d139c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7310773f",
+ "metadata": {},
+ "source": [
+ "## Histograms\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a5ea8056",
+ "metadata": {},
+ "source": [
+ "We can plot a 1-D histogram using most 1-D data arrays.\n",
+ "\n",
+ "To get the 1-D data array for this example, we generate example data using NumPy’s normal-distribution random-number generator. For demonstration purposes, we've specified the random seed for reproducibility. The code for this number generation is as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "df424130",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "npts = 2500\n",
+ "nbins = 15\n",
+ "\n",
+ "np.random.seed(0)\n",
+ "x = np.random.normal(size=npts)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32b9f3bd",
+ "metadata": {},
+ "source": [
+ "Now that we have our data array, we can make a histogram using `plt.hist`. In this case, we change the y-axis to represent probability, instead of count; this is performed by setting `density=True`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dfd7c0cc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.hist(x, bins=nbins, density=True)\n",
+ "plt.title('1D histogram')\n",
+ "plt.xlabel('Data')\n",
+ "plt.ylabel('Probability');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "84fb255f",
+ "metadata": {},
+ "source": [
+ "Similarly, we can make a 2-D histogram, by first generating a second 1-D array, and then calling `plt.hist2d` with both 1-D arrays as arguments:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4ed3325e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y = np.random.normal(size=npts)\n",
+ "\n",
+ "plt.hist2d(x, y, bins=nbins);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dd5a6bca",
+ "metadata": {},
+ "source": [
+ "## Pie Charts"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "35bc61ba",
+ "metadata": {},
+ "source": [
+ "Matplotlib also has the capability to plot pie charts, by way of `plt.pie`. The most basic implementation uses a 1-D array of wedge 'sizes' (i.e., percent values), as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9399feaa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x = np.array([25, 15, 20, 40])\n",
+ "plt.pie(x);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1cec2e20",
+ "metadata": {},
+ "source": [
+ "Typically, you'll see examples where all of the values in the array `x` will sum to 100, but the data values provided to `plt.pie` do not necessarily have to add up to 100. The sum of the numbers provided will be normalized to 1, and the individual values will thereby be converted to percentages, regardless of the actual sum of the values. If this behavior is unwanted or unneeded, you can set `normalize=False`.\n",
+ "\n",
+ "If you set `normalize=False`, and the sum of the values of x is less than 1, then a partial pie chart is plotted. If the values sum to larger than 1, a `ValueError` will be raised."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "be883e4e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x = np.array([0.25, 0.20, 0.40])\n",
+ "plt.pie(x, normalize=False);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e747e452",
+ "metadata": {},
+ "source": [
+ "Let's do a more complicated example.\n",
+ "\n",
+ "Here we create a pie chart with various sizes associated with each color. Labels are derived by capitalizing each color in the array `colors`. Since colors can be specified by strings corresponding to named colors, this allows both the colors and the labels to be set from the same array, reducing code and effort.\n",
+ "\n",
+ "If you want to offset one or more wedges for effect, you can use the `explode` keyword argument. The value for this argument must be a list of floating-point numbers with the same length as the number of wedges. The numbers indicate the percentage of offset for each wedge. In this example, each wedge is not offset except for the pink (3rd index)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fa9ecaec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "colors = ['red', 'blue', 'yellow', 'pink', 'green']\n",
+ "labels = [c.capitalize() for c in colors]\n",
+ "\n",
+ "sizes = [1, 3, 5, 7, 9]\n",
+ "explode = (0, 0, 0, 0.1, 0)\n",
+ "\n",
+ "\n",
+ "plt.pie(sizes, labels=labels, explode=explode, colors=colors, autopct='%1.1f%%');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c516ce4b",
+ "metadata": {},
+ "source": [
+ "## Animations"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "46c1acfc",
+ "metadata": {},
+ "source": [
+ "Matplotlib offers a single commonly-used animation tool, `FuncAnimation`. This tool must be imported separately through Matplotlib’s animation package, as shown below. You can find more information on animation with Matplotlib at the [official documentation page](https://matplotlib.org/stable/api/animation_api.html)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3879e346",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from matplotlib.animation import FuncAnimation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a31e17ed",
+ "metadata": {},
+ "source": [
+ "`FuncAnimation` creates animations by repeatedly calling a function. Using this method involves three main steps:\n",
+ "\n",
+ "1. Create an initial state of the plot\n",
+ "1. Make a function that can \"progress\" the plot to the next frame of the animation\n",
+ "1. Create the animation using FuncAnimation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1abc210",
+ "metadata": {},
+ "source": [
+ "For this example, let's create an animated sine wave."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5455b9de",
+ "metadata": {},
+ "source": [
+ "### Step 1: Initial State\n",
+ "In the initial state step, we will define a function called `init`. This function will then create the animation plot in its initial state. However, please note that the successful use of `FuncAnimation` does not technically require such a function; in a later example, creating animations without an initial-state function is demonstrated."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1037bb3b",
+ "metadata": {},
+ "source": [
+ "First, we’ll define `Figure` and `Axes` objects. After that, we can create a line-plot object (referred to here as a line) with `plt.plot`. To create the initialization function, we set the line's data to be empty and then return the line.\n",
+ "\n",
+ "Please note, this code block will display a blank plot when run as a Jupyter notebook cell."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3feef13d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, ax = plt.subplots()\n",
+ "ax.set_xlim(0, 2 * np.pi)\n",
+ "ax.set_ylim(-1.5, 1.5)\n",
+ "\n",
+ "(line,) = ax.plot([], [])\n",
+ "\n",
+ "\n",
+ "def init():\n",
+ " line.set_data([], [])\n",
+ " return (line,)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "679981fb",
+ "metadata": {},
+ "source": [
+ "### Step 2: Animation Progression Function\n",
+ "For this step, we create a progression function, which takes an index (usually named `n` or `i`), and returns the corresponding (in other words, `n`-th or `i`-th) frame of the animation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c5b606e2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def animate(i):\n",
+ " x = np.linspace(0, 2 * np.pi, 250)\n",
+ "\n",
+ " y = np.sin(2 * np.pi * (x - 0.1 * i))\n",
+ "\n",
+ " line.set_data(x, y)\n",
+ "\n",
+ " return (line,)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ef0f128",
+ "metadata": {},
+ "source": [
+ "### Step 3: Using `FuncAnimation`\n",
+ "The last step is to feed the parts we created to `FuncAnimation`. Please note, when using the `FuncAnimation` function, it is important to save the output in a variable, even if you do not intend to use this output later. If you do not, Python’s garbage collector may attempt to save memory by deleting the animation data, and it will be unavailable for later use."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8ecb3760",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "anim = FuncAnimation(fig, animate, init_func=init, frames=200, interval=20, blit=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbc69df4",
+ "metadata": {},
+ "source": [
+ "In order to show the animation in a Jupyter notebook, we have to use the `rc` function. This function must be imported separately, and is used to set specific parameters in Matplotlib. In this case, we need to set the `html` parameter for animation plots to `html5`, instead of the default value of none. The code for this is written as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b464c460",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from matplotlib import rc\n",
+ "\n",
+ "rc('animation', html='html5')\n",
+ "\n",
+ "anim"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8be86c41-2ac0-4385-8b3f-70cf0139b19e",
+ "metadata": {},
+ "source": [
+ "### Saving an Animation\n",
+ "\n",
+ "To save an animation to a file, use the `save()` method of the animation variable, in this case `anim.save()`, as shown below. The arguments are the file name to save the animation to, in this case `animate.gif`, and the writer used to save the file. Here, the animation writer chosen is [Pillow](https://pillow.readthedocs.io/en/stable/index.html), a library for image processing in Python. There are many choices for an animation writer, which are described in detail in the Matplotlib writer documentation. The documentation for the Pillow writer is described on [this page](https://matplotlib.org/stable/api/_as_gen/matplotlib.animation.PillowWriter.html); links to other writer documentation pages are on the left side of the Pillow writer documentation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6a1693bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "anim.save('animate.gif', writer='pillow');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "85b884b1-4db7-4d9d-9563-79750dbcfc67",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "076b1bf3",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "* Matplotlib supports many different plot types, including the less-commonly-used types described in this section. \n",
+ "* Some of these lesser-used plot types include histograms and pie charts.\n",
+ "* This section also covered animation of Matplotlib plots.\n",
+ "\n",
+ "\n",
+ "## What's Next\n",
+ "The next section introduces [more plotting functionality](annotations-colorbars-layouts), such as annotations, equation rendering, colormaps, and advanced layout.\n",
+ "\n",
+ "## Additional Resources\n",
+ "- [Plot Types Cheat Sheet](https://lnkd.in/dD5fE8V)\n",
+ "- [Matplotlib Documentation: Basic Pie Charts](https://matplotlib.org/stable/gallery/pie_and_polar_charts/pie_features.html)\n",
+ "- [Matplotlib Documentation: Histograms](https://matplotlib.org/stable/gallery/statistics/hist.html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "370d8045-216c-4150-9ce5-5128721b3962",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/_preview/468/_sources/core/matplotlib/matplotlib-basics.ipynb b/_preview/468/_sources/core/matplotlib/matplotlib-basics.ipynb
new file mode 100644
index 000000000..cf48576ba
--- /dev/null
+++ b/_preview/468/_sources/core/matplotlib/matplotlib-basics.ipynb
@@ -0,0 +1,908 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![Matplotlib logo](https://matplotlib.org/stable/_images/sphx_glr_logos2_003.png)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Matplotlib Basics"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## Overview\n",
+ "We will cover the basics of using the Matplotlib library to create plots in Python, including a few different plots available within the library. This page is laid out as follows:\n",
+ "\n",
+ "1. Why Matplotlib?\n",
+ "1. Figure and axes\n",
+ "1. Basic line plots\n",
+ "1. Labels and grid lines\n",
+ "1. Customizing colors\n",
+ "1. Subplots\n",
+ "1. Scatterplots\n",
+ "1. Displaying Images\n",
+ "1. Contour and filled contour plots."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Prerequisites\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [NumPy Basics](../numpy/numpy-basics) | Necessary | |\n",
+ "| MATLAB plotting experience | Helpful | |\n",
+ "\n",
+ "* **Time to Learn**: 30 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's import the Matplotlib library's `pyplot` interface; this interface is the simplest way to create new Matplotlib figures. To shorten this long name, we import it as `plt`; this helps keep things short, but clear."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " Matplotlib is a Python 2-D plotting library. It is used to produce publication quality figures in a variety of hard-copy formats and interactive environments across platforms.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Generate test data using `NumPy`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here, we generate some test data to use for experimenting with plotting:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "times = np.array(\n",
+ " [\n",
+ " 93.0,\n",
+ " 96.0,\n",
+ " 99.0,\n",
+ " 102.0,\n",
+ " 105.0,\n",
+ " 108.0,\n",
+ " 111.0,\n",
+ " 114.0,\n",
+ " 117.0,\n",
+ " 120.0,\n",
+ " 123.0,\n",
+ " 126.0,\n",
+ " 129.0,\n",
+ " 132.0,\n",
+ " 135.0,\n",
+ " 138.0,\n",
+ " 141.0,\n",
+ " 144.0,\n",
+ " 147.0,\n",
+ " 150.0,\n",
+ " 153.0,\n",
+ " 156.0,\n",
+ " 159.0,\n",
+ " 162.0,\n",
+ " ]\n",
+ ")\n",
+ "temps = np.array(\n",
+ " [\n",
+ " 310.7,\n",
+ " 308.0,\n",
+ " 296.4,\n",
+ " 289.5,\n",
+ " 288.5,\n",
+ " 287.1,\n",
+ " 301.1,\n",
+ " 308.3,\n",
+ " 311.5,\n",
+ " 305.1,\n",
+ " 295.6,\n",
+ " 292.4,\n",
+ " 290.4,\n",
+ " 289.1,\n",
+ " 299.4,\n",
+ " 307.9,\n",
+ " 316.6,\n",
+ " 293.9,\n",
+ " 291.2,\n",
+ " 289.8,\n",
+ " 287.1,\n",
+ " 285.8,\n",
+ " 303.3,\n",
+ " 310.0,\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Figure and Axes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, let's make our first plot with Matplotlib. Matplotlib has two core objects: the `Figure` and the `Axes`. The `Axes` object is an individual plot, containing an x-axis, a y-axis, labels, etc.; it also contains all of the various methods we might use for plotting. A `Figure` contains one or more `Axes` objects; it also contains methods for saving plots to files (e.g., PNG, SVG), among other similar high-level functionality. You may find the following diagram helpful:\n",
+ "\n",
+ "![anatomy of a figure](https://matplotlib.org/stable/_images/sphx_glr_anatomy_001.png \"anatomy of a figure\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Basic Line Plots\n",
+ "\n",
+ "Let's create a `Figure` whose dimensions, if printed out on hardcopy, would be 10 inches wide and 6 inches long (assuming a landscape orientation). We then create an `Axes` object, consisting of a single subplot, on the `Figure`. After that, we call the `Axes` object's `plot` method, using the `times` array for the data along the x-axis (i.e., the independent values), and the `temps` array for the data along the y-axis (i.e., the dependent values).\n",
+ "\n",
+ "
\n",
+ "
Info
\n",
+ " By default, ax.plot will create a line plot, as seen in the following example: \n",
+ "
\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a figure\n",
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "\n",
+ "# Ask, out of a 1x1 grid of plots, the first axes.\n",
+ "ax = fig.add_subplot(1, 1, 1)\n",
+ "\n",
+ "# Plot times as x-variable and temperatures as y-variable\n",
+ "ax.plot(times, temps);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Labels and Grid Lines"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Adding labels to an `Axes` object"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, we add x-axis and y-axis labels to our `Axes` object, like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add some labels to the plot\n",
+ "ax.set_xlabel('Time')\n",
+ "ax.set_ylabel('Temperature')\n",
+ "\n",
+ "# Prompt the notebook to re-display the figure after we modify it\n",
+ "fig"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also add a title to the plot and increase the font size:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ax.set_title('GFS Temperature Forecast', size=16)\n",
+ "\n",
+ "fig"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are many other functions and methods associated with `Axes` objects and labels, but they are too numerous to list here.\n",
+ "\n",
+ "Here, we set up another test array of temperature data, to be used later:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps_1000 = np.array(\n",
+ " [\n",
+ " 316.0,\n",
+ " 316.3,\n",
+ " 308.9,\n",
+ " 304.0,\n",
+ " 302.0,\n",
+ " 300.8,\n",
+ " 306.2,\n",
+ " 309.8,\n",
+ " 313.5,\n",
+ " 313.3,\n",
+ " 308.3,\n",
+ " 304.9,\n",
+ " 301.0,\n",
+ " 299.2,\n",
+ " 302.6,\n",
+ " 309.0,\n",
+ " 311.8,\n",
+ " 304.7,\n",
+ " 304.6,\n",
+ " 301.8,\n",
+ " 300.6,\n",
+ " 299.9,\n",
+ " 306.3,\n",
+ " 311.3,\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Adding labels and a grid"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here, we call `plot` more than once, in order to plot multiple series of temperature data on the same plot. We also specify the `label` keyword argument to the `plot` method to allow Matplotlib to automatically create legend labels. These legend labels are added via a call to the `legend` method. By utilizing the `grid()` method, we can also add gridlines to our plot."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "ax = fig.add_subplot(1, 1, 1)\n",
+ "\n",
+ "# Plot two series of data\n",
+ "# The label argument is used when generating a legend.\n",
+ "ax.plot(times, temps, label='Temperature (surface)')\n",
+ "ax.plot(times, temps_1000, label='Temperature (1000 mb)')\n",
+ "\n",
+ "# Add labels and title\n",
+ "ax.set_xlabel('Time')\n",
+ "ax.set_ylabel('Temperature')\n",
+ "ax.set_title('Temperature Forecast')\n",
+ "\n",
+ "# Add gridlines\n",
+ "ax.grid(True)\n",
+ "\n",
+ "# Add a legend to the upper left corner of the plot\n",
+ "ax.legend(loc='upper left');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Customizing colors"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We're not restricted to the default look for plot elements. Most plot elements have style attributes, such as `linestyle` and `color`, that can be modified to customize the look of a plot. For example, the `color` attribute can accept a wide array of color options, including keywords (named colors) like `red` or `blue`, or HTML color codes. Here, we use some different shades of red taken from the Tableau colorset in Matplotlib, by using the `tab:red` option for the color attribute."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "ax = fig.add_subplot(1, 1, 1)\n",
+ "\n",
+ "# Specify how our lines should look\n",
+ "ax.plot(times, temps, color='tab:red', label='Temperature (surface)')\n",
+ "ax.plot(\n",
+ " times,\n",
+ " temps_1000,\n",
+ " color='tab:red',\n",
+ " linestyle='--',\n",
+ " label='Temperature (isobaric level)',\n",
+ ")\n",
+ "\n",
+ "# Set the labels and title\n",
+ "ax.set_xlabel('Time')\n",
+ "ax.set_ylabel('Temperature')\n",
+ "ax.set_title('Temperature Forecast')\n",
+ "\n",
+ "# Add the grid\n",
+ "ax.grid(True)\n",
+ "\n",
+ "# Add a legend to the upper left corner of the plot\n",
+ "ax.legend(loc='upper left');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Subplots\n",
+ "\n",
+ "The term \"subplots\" refers to working with multiple plots, or panels, in a figure."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here, we create yet another set of test data, in this case dew-point data, to be used in later examples:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dewpoint = 0.9 * temps\n",
+ "dewpoint_1000 = 0.9 * temps_1000"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, we can use subplots to plot this new data alongside the temperature data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Using add_subplot to create two different subplots within the figure\n",
+ "We can use the `.add_subplot()` method to add subplots to our figure! This method takes the arguments `(rows, columns, subplot_number)`.\n",
+ "\n",
+ "For example, if we want a single row and two columns, we can use the following code block:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "\n",
+ "# Create a plot for temperature\n",
+ "ax = fig.add_subplot(1, 2, 1)\n",
+ "ax.plot(times, temps, color='tab:red')\n",
+ "\n",
+ "# Create a plot for dewpoint\n",
+ "ax2 = fig.add_subplot(1, 2, 2)\n",
+ "ax2.plot(times, dewpoint, color='tab:green');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can also call `plot.subplots()` with the keyword arguments `nrows` (number of rows) and `ncols` (number of columns). This initializes a new `Axes` object, called `ax`, with the specified number of rows and columns. This object also contains a 1-D list of subplots, with a size equal to `nrows` x `ncols`.\n",
+ "\n",
+ "You can index this list, using `ax[0].plot()`, for example, to decide which subplot you're plotting to. Here is some example code for this technique:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 6))\n",
+ "\n",
+ "ax[0].plot(times, temps, color='tab:red')\n",
+ "ax[1].plot(times, dewpoint, color='tab:green');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Adding titles to each subplot\n",
+ "We can add titles to these plots too; notice that these subplots are titled separately, by calling `ax.set_title` after plotting each subplot:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "\n",
+ "# Create a plot for temperature\n",
+ "ax = fig.add_subplot(1, 2, 1)\n",
+ "ax.plot(times, temps, color='tab:red')\n",
+ "ax.set_title('Temperature')\n",
+ "\n",
+ "# Create a plot for dewpoint\n",
+ "ax2 = fig.add_subplot(1, 2, 2)\n",
+ "ax2.plot(times, dewpoint, color='tab:green')\n",
+ "ax2.set_title('Dewpoint');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Using `ax.set_xlim` and `ax.set_ylim` to control the plot boundaries\n",
+ "\n",
+ "It is common when plotting data to set the extent (boundaries) of plots, which can be performed by calling `.set_xlim` and `.set_ylim` on the `Axes` object containing the plot or subplot(s):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "\n",
+ "# Create a plot for temperature\n",
+ "ax = fig.add_subplot(1, 2, 1)\n",
+ "ax.plot(times, temps, color='tab:red')\n",
+ "ax.set_title('Temperature')\n",
+ "ax.set_xlim(110, 130)\n",
+ "ax.set_ylim(290, 315)\n",
+ "\n",
+ "# Create a plot for dewpoint\n",
+ "ax2 = fig.add_subplot(1, 2, 2)\n",
+ "ax2.plot(times, dewpoint, color='tab:green')\n",
+ "ax2.set_title('Dewpoint')\n",
+ "ax2.set_xlim(110, 130);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Using `sharex` and `sharey` to share plot limits\n",
+ "\n",
+ "You may want to have both subplots share the same x/y axis limits. When setting up a new `Axes` object through a method like `add_subplot`, specify the keyword arguments `sharex=ax` and `sharey=ax`, where `ax` is the `Axes` object with which to share axis limits.\n",
+ "\n",
+ "Let's take a look at an example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "\n",
+ "# Create a plot for temperature\n",
+ "ax = fig.add_subplot(1, 2, 1)\n",
+ "ax.plot(times, temps, color='tab:red')\n",
+ "ax.set_title('Temperature')\n",
+ "ax.set_ylim(260, 320)\n",
+ "\n",
+ "# Create a plot for dewpoint\n",
+ "ax2 = fig.add_subplot(1, 2, 2, sharex=ax, sharey=ax)\n",
+ "ax2.plot(times, dewpoint, color='tab:green')\n",
+ "ax2.set_title('Dewpoint');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Putting this all together"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " If desired, you can move the location of your legend; to do this, specify the loc keyword argument when calling ax.legend().\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "ax = fig.add_subplot(1, 2, 1)\n",
+ "\n",
+ "# Specify how our lines should look\n",
+ "ax.plot(times, temps, color='tab:red', label='Temperature (surface)')\n",
+ "ax.plot(\n",
+ " times,\n",
+ " temps_1000,\n",
+ " color='tab:red',\n",
+ " linestyle=':',\n",
+ " label='Temperature (isobaric level)',\n",
+ ")\n",
+ "\n",
+ "# Add labels, grid, and legend\n",
+ "ax.set_xlabel('Time')\n",
+ "ax.set_ylabel('Temperature')\n",
+ "ax.set_title('Temperature Forecast')\n",
+ "ax.grid(True)\n",
+ "ax.legend(loc='upper left')\n",
+ "ax.set_ylim(257, 312)\n",
+ "ax.set_xlim(95, 162)\n",
+ "\n",
+ "\n",
+ "# Add our second plot - for dewpoint, changing the colors and labels\n",
+ "ax2 = fig.add_subplot(1, 2, 2, sharex=ax, sharey=ax)\n",
+ "ax2.plot(times, dewpoint, color='tab:green', label='Dewpoint (surface)')\n",
+ "ax2.plot(\n",
+ " times,\n",
+ " dewpoint_1000,\n",
+ " color='tab:green',\n",
+ " linestyle=':',\n",
+ " marker='o',\n",
+ " label='Dewpoint (isobaric level)',\n",
+ ")\n",
+ "\n",
+ "ax2.set_xlabel('Time')\n",
+ "ax2.set_ylabel('Dewpoint')\n",
+ "ax2.set_title('Dewpoint Forecast')\n",
+ "ax2.grid(True)\n",
+ "ax2.legend(loc='upper left');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Scatterplot\n",
+ "Some data cannot be plotted accurately as a line plot. Another type of plot that is popular in science is the marker plot, more commonly known as a scatter plot. A simple scatter plot can be created by setting the `linestyle` to `None`, and specifying a marker type, size, color, etc., like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "ax = fig.add_subplot(1, 1, 1)\n",
+ "\n",
+ "# Specify no line with circle markers\n",
+ "ax.plot(temps, temps_1000, linestyle='None', marker='o', markersize=5)\n",
+ "\n",
+ "ax.set_xlabel('Temperature (surface)')\n",
+ "ax.set_ylabel('Temperature (1000 hPa)')\n",
+ "ax.set_title('Temperature Cross Plot')\n",
+ "ax.grid(True);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " You can also use the scatter method, which is slower, but will give you more control, such as being able to color the points individually based upon a third variable.\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "ax = fig.add_subplot(1, 1, 1)\n",
+ "\n",
+ "# Specify no line with circle markers\n",
+ "ax.scatter(temps, temps_1000)\n",
+ "\n",
+ "ax.set_xlabel('Temperature (surface)')\n",
+ "ax.set_ylabel('Temperature (1000 hPa)')\n",
+ "ax.set_title('Temperature Cross Plot')\n",
+ "ax.grid(True);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's put together the following:\n",
+ " * Beginning with our code above, add the `c` keyword argument to the `scatter` call; in this case, to color the points by the difference between the temperature at the surface and the temperature at 1000 hPa.\n",
+ " * Add a 1:1 line to the plot (slope of 1, intercept of zero). Use a black dashed line.\n",
+ " * Change the colormap to one more suited for a temperature-difference plot.\n",
+ " * Add a colorbar to the plot (have a look at the Matplotlib documentation for help)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(10, 6))\n",
+ "ax = fig.add_subplot(1, 1, 1)\n",
+ "\n",
+ "ax.plot([285, 320], [285, 320], color='black', linestyle='--')\n",
+ "s = ax.scatter(temps, temps_1000, c=(temps - temps_1000), cmap='bwr', vmin=-5, vmax=5)\n",
+ "fig.colorbar(s)\n",
+ "\n",
+ "ax.set_xlabel('Temperature (surface)')\n",
+ "ax.set_ylabel('Temperature (1000 hPa)')\n",
+ "ax.set_title('Temperature Cross Plot')\n",
+ "ax.grid(True);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Displaying Images\n",
+ "\n",
+ "`imshow` displays the values in an array as colored pixels, similar to a heat map.\n",
+ "\n",
+ "Here, we declare some fake data in a bivariate normal distribution, to illustrate the `imshow` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x = y = np.arange(-3.0, 3.0, 0.025)\n",
+ "X, Y = np.meshgrid(x, y)\n",
+ "Z1 = np.exp(-(X**2) - Y**2)\n",
+ "Z2 = np.exp(-((X - 1) ** 2) - (Y - 1) ** 2)\n",
+ "Z = (Z1 - Z2) * 2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can now pass this fake data to `imshow` to create a heat map of the distribution:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, ax = plt.subplots()\n",
+ "im = ax.imshow(\n",
+ " Z, interpolation='bilinear', cmap='RdYlGn', origin='lower', extent=[-3, 3, -3, 3]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Contour and Filled Contour Plots\n",
+ "\n",
+ "- `contour` creates contours around data.\n",
+ "- `contourf` creates filled contours around data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start with the `contour` method, which, as just mentioned, creates contours around data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, ax = plt.subplots()\n",
+ "ax.contour(X, Y, Z);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "After creating contours, we can label the lines using the `clabel` method, like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, ax = plt.subplots()\n",
+ "c = ax.contour(X, Y, Z, levels=np.arange(-2, 2, 0.25))\n",
+ "ax.clabel(c);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As described above, the `contourf` (contour fill) method creates filled contours around data, like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, ax = plt.subplots()\n",
+ "c = ax.contourf(X, Y, Z);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As a final example, let's create a heatmap figure with contours using the `contour` and `imshow` methods. First, we use `imshow` to create the heatmap, specifying a colormap using the `cmap` keyword argument. We then call `contour`, specifying black contours and an interval of 0.5. Here is the example code, and resulting figure:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, ax = plt.subplots()\n",
+ "im = ax.imshow(\n",
+ " Z, interpolation='bilinear', cmap='PiYG', origin='lower', extent=[-3, 3, -3, 3]\n",
+ ")\n",
+ "c = ax.contour(X, Y, Z, levels=np.arange(-2, 2, 0.5), colors='black')\n",
+ "ax.clabel(c);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "* `Matplotlib` can be used to visualize datasets you are working with.\n",
+ "* You can customize various features such as labels and styles.\n",
+ "* There are a wide variety of plotting options available, including (but not limited to):\n",
+ " * Line plots (`plot`)\n",
+ " * Scatter plots (`scatter`)\n",
+ " * Heatmaps (`imshow`)\n",
+ " * Contour line and contour fill plots (`contour`, `contourf`)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## What's Next?\n",
+ "In the next section, [more plotting functionality](histograms-piecharts-animation) is covered, such as histograms, pie charts, and animation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Resources and References\n",
+ "\n",
+ "The goal of this tutorial is to provide an overview of the use of the Matplotlib library. It covers creating simple line plots, but it is by no means comprehensive. For more information, try looking at the following documentation:\n",
+ "- [Matplotlib documentation](http://matplotlib.org)\n",
+ "- [Matplotlib examples gallery](https://matplotlib.org/stable/gallery/index.html)\n",
+ "- [GeoCAT examples gallery](https://geocat-examples.readthedocs.io/en/latest/gallery/index.html)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/core/numpy.md b/_preview/468/_sources/core/numpy.md
new file mode 100644
index 000000000..8229b0c8a
--- /dev/null
+++ b/_preview/468/_sources/core/numpy.md
@@ -0,0 +1,13 @@
+
+
+# NumPy
+
+This section contains tutorials on array computing with [NumPy](https://numpy.org).
+
+---
+
+From the [NumPy documentation](https://numpy.org/doc/stable/user/whatisnumpy.html):
+
+> NumPy is the fundamental package for scientific computing in Python. It is a Python library that provides a multidimensional array object, various derived objects (such as masked arrays and matrices), and an assortment of routines for fast operations on arrays, including mathematical, logical, shape manipulation, sorting, selecting, I/O, discrete Fourier transforms, basic linear algebra, basic statistical operations, random simulation, and much more.
+
+NumPy's position at the center of the scientific Python ecosystem means that all users should start here in their learning journey through the core scientific packages.
diff --git a/_preview/468/_sources/core/numpy/intermediate-numpy.ipynb b/_preview/468/_sources/core/numpy/intermediate-numpy.ipynb
new file mode 100644
index 000000000..0a660cf22
--- /dev/null
+++ b/_preview/468/_sources/core/numpy/intermediate-numpy.ipynb
@@ -0,0 +1,676 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "# Intermediate NumPy\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "1. Working with multiple dimensions\n",
+ "1. Subsetting of irregular arrays with booleans\n",
+ "1. Sorting, or indexing with indices"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [NumPy Basics](numpy-basics) | Necessary | |\n",
+ "\n",
+ "* **Time to learn**: 20 minutes\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports\n",
+ "We will be including [Matplotlib](../matplotlib) to illustrate some of our examples, but you don't need knowledge of it to complete this notebook."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Using axes to slice arrays\n",
+ "\n",
+ "Here we introduce an important concept when working with NumPy: the axis. This indicates the particular dimension along which a function should operate (provided the function does something taking multiple values and converts to a single value). \n",
+ "\n",
+ "Let's look at a concrete example with `sum`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.arange(12).reshape(3, 4)\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This calculates the total of all values in the array."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.sum(a)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " Some of NumPy's functions can be accessed as `ndarray` methods!\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a.sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, with a reminder about how our array is shaped,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "we can specify `axis` to get _just_ the sum across each of our rows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.sum(a, axis=0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Or do the same and take the sum across columns:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.sum(a, axis=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "After putting together some data and introducing some more advanced calculations, let's demonstrate a multi-layered example: calculating temperature advection. If you're not familiar with this (don't worry!), we'll be looking to calculate\n",
+ "\n",
+ "\\begin{equation*}\n",
+ "\\text{advection} = -\\vec{v} \\cdot \\nabla T\n",
+ "\\end{equation*}\n",
+ "\n",
+ "and to do so we'll start with some random $T$ and $\\vec{v}$ values,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp = np.random.randn(100, 50)\n",
+ "u = np.random.randn(100, 50)\n",
+ "v = np.random.randn(100, 50)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can calculate the `np.gradient` of our new $T(100x50)$ field as two separate component gradients,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gradient_x, gradient_y = np.gradient(temp)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In order to calculate $-\\vec{v} \\cdot \\nabla T$, we will use `np.dstack` to turn our two separate component gradient fields into one multidimensional field containing $x$ and $y$ gradients at each of our $100x50$ points,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "grad_vectors = np.dstack([gradient_x, gradient_y])\n",
+ "print(grad_vectors.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "and then do the same for our separate $u$ and $v$ wind components,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "wind_vectors = np.dstack([u, v])\n",
+ "print(wind_vectors.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we can calculate the dot product of these two multidimensional fields of wind and temperature gradient components by hand as an element-wise multiplication, `*`, and then a `sum` of our separate components at each point (i.e., along the last `axis`),"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "advection = (wind_vectors * -grad_vectors).sum(axis=-1)\n",
+ "print(advection.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Indexing arrays with boolean values\n",
+ "\n",
+ "### Array comparisons\n",
+ "NumPy can easily create arrays of boolean values and use those to select certain values to extract from an array"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create some synthetic data representing temperature and wind speed data\n",
+ "np.random.seed(19990503) # Make sure we all have the same data\n",
+ "temp = 20 * np.cos(np.linspace(0, 2 * np.pi, 100)) + 50 + 2 * np.random.randn(100)\n",
+ "speed = np.abs(\n",
+ " 10 * np.sin(np.linspace(0, 2 * np.pi, 100)) + 10 + 5 * np.random.randn(100)\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.plot(temp, 'tab:red')\n",
+ "plt.plot(speed, 'tab:blue');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "By doing a comparison between a NumPy array and a value, we get an\n",
+ "array of values representing the results of the comparison between\n",
+ "each element and the value"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp > 45"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This, which is its own NumPy array of `boolean` values, can be used as an index to another array of the same size. We can even use it as an index within the original `temp` array we used to compare,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp[temp > 45]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " This only returns the values from our original array meeting the indexing conditions, nothing more! Note the size,\n",
+ "
\n",
+ " Indexing arrays with arrays requires them to be the same size!\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If we store this array somewhere new,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp_45 = temp[temp > 45]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": [
+ "raises-exception"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "temp_45[temp < 45]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We find that our original `(100,)` shape array is too large to subset our new `(60,)` array."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If their sizes _do_ match, the boolean array can come from a totally different array!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "speed > 10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp[speed > 10]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Replacing values\n",
+ "To extend this, we can use this conditional indexing to _assign_ new values to certain positions within our array, somewhat like a masking operation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Make a copy so we don't modify the original data\n",
+ "temp2 = temp.copy()\n",
+ "speed2 = speed.copy()\n",
+ "\n",
+ "# Replace all places where speed is <10 with NaN (not a number)\n",
+ "temp2[speed < 10] = np.nan\n",
+ "speed2[speed < 10] = np.nan"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.plot(temp2, 'tab:red');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "and to put this in context,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.plot(temp, 'r:')\n",
+ "plt.plot(temp2, 'r')\n",
+ "plt.plot(speed, 'b:')\n",
+ "plt.plot(speed2, 'b');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If we use parentheses to preserve the order of operations, we can combine these conditions with other bitwise operators like the `&` for `bitwise_and`,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "multi_mask = (temp < 45) & (speed > 10)\n",
+ "multi_mask"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp[multi_mask]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Heat index is only defined for temperatures >= 80F and relative humidity values >= 40%. Using the data generated below, we can use boolean indexing to extract the data where heat index has a valid value."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Here's the \"data\"\n",
+ "np.random.seed(19990503)\n",
+ "temp = 20 * np.cos(np.linspace(0, 2 * np.pi, 100)) + 80 + 2 * np.random.randn(100)\n",
+ "relative_humidity = np.abs(\n",
+ " 20 * np.cos(np.linspace(0, 4 * np.pi, 100)) + 50 + 5 * np.random.randn(100)\n",
+ ")\n",
+ "\n",
+ "# Create a mask for the two conditions described above\n",
+ "good_heat_index = (temp >= 80) & (relative_humidity >= 0.4)\n",
+ "\n",
+ "# Use this mask to grab the temperature and relative humidity values that together\n",
+ "# will give good heat index values\n",
+ "print(temp[good_heat_index])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Another bitwise operator we can find helpful is Python's `~` complement operator, which can give us the **inverse** of our specific mask to let us assign `np.nan` to every value _not_ satisfied in `good_heat_index`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plot_temp = temp.copy()\n",
+ "plot_temp[~good_heat_index] = np.nan\n",
+ "plt.plot(plot_temp, 'tab:red');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Indexing using arrays of indices\n",
+ "\n",
+ "You can also use a list or array of indices to extract particular values--this is a natural extension of the regular indexing. For instance, just as we can select the first element:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also extract the first, fifth, and tenth elements as a list:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp[[0, 4, 9]]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "One of the ways this comes into play is trying to sort NumPy arrays using `argsort`. This function returns the indices of the array that give the items in sorted order. So for our `temp`,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "inds = np.argsort(temp)\n",
+ "inds"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "i.e., our lowest value is at index `52`, next `57`, and so on. We can use this array of indices as an index for `temp`,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp[inds]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "to get a sorted array back!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "With some clever slicing, we can pull out the last 10, or 10 highest, values of `temp`,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ten_highest = inds[-10:]\n",
+ "print(temp[ten_highest])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are other NumPy `arg` functions that return indices for operating; check out the [NumPy docs](https://numpy.org/doc/stable/reference/routines.sort.html) on sorting your arrays!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "In this notebook we introduced the power of understanding the dimensions of our data by specifying math along `axis`, used `True` and `False` values to subset our data according to conditions, and used lists of positions within our array to sort our data.\n",
+ "\n",
+ "### What's Next\n",
+ "Taking some time to practice this is valuable to be able to quickly manipulate arrays of information in useful or scientific ways."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Resources and references\n",
+ "The [NumPy Users Guide](https://numpy.org/devdocs/user/quickstart.html#less-basic) expands further on some of these topics, as well as suggests various [Tutorials](https://numpy.org/learn/), lectures, and more at this stage."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/core/numpy/numpy-basics.ipynb b/_preview/468/_sources/core/numpy/numpy-basics.ipynb
new file mode 100644
index 000000000..3281deb5b
--- /dev/null
+++ b/_preview/468/_sources/core/numpy/numpy-basics.ipynb
@@ -0,0 +1,1047 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "# NumPy Basics\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "Welcome to your first Python library - NumPy! NumPy is the fundamental package for numerical operations with Python. It contains among other things:\n",
+ "\n",
+ "- a powerful N-dimensional array object\n",
+ "- sophisticated (broadcasting) functions\n",
+ "- useful linear algebra, Fourier transform, and random number capabilities\n",
+ "\n",
+ "Let's get you started with the basics! In this notebook we will cover\n",
+ "\n",
+ "1. Creating an `array`\n",
+ "1. Math and calculations with arrays\n",
+ "1. Inspecting an array with slicing and indexing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Python Quickstart](../../foundations/quickstart) | Necessary | Lists, indexing, slicing, math |\n",
+ "\n",
+ "* **Time to learn**: 35 minutes\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports\n",
+ "A common convention you might encounter is to rename `numpy` to `np` on import to shorten it for the many times we will be calling on `numpy` for functionality."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create an array of 'data'\n",
+ "\n",
+ "The NumPy array represents a *contiguous* block of memory, holding entries of a given type (and hence fixed size). The entries are laid out in memory according to the shape, or list of dimension sizes. Let's start by creating an array from a list of integers and taking a look at it,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.array([1, 2, 3])\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can inspect the number of dimensions our array is organized along with `ndim`, and how long each of these dimensions are with `shape`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a.ndim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "So our 1-dimensional array has a shape of `3` along that dimension! Finally we can check out the underlying type of our underlying data,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a.dtype"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, let's expand this with a new data type, and by using a list of lists we can grow the dimensions of our array!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a.ndim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a.dtype"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "And as before we can use `ndim`, `shape`, and `dtype` to discover how many dimensions of what lengths are making up our array of floats."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Generation\n",
+ "NumPy also provides helper functions for generating arrays of data to save you typing for regularly spaced data. Don't forget your Python indexing rules!\n",
+ "\n",
+ "* `arange(start, stop, step)` creates a range of values in the interval `[start,stop)` with `step` spacing.\n",
+ "* `linspace(start, stop, num)` creates a range of `num` evenly spaced values over the range `[start,stop]`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### arange"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.arange(5)\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.arange(3, 11)\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.arange(1, 10, 2)\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### linspace"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = np.linspace(0, 4, 5)\n",
+ "b"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = np.linspace(3, 10, 15)\n",
+ "b"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = np.linspace(2.5, 10.25, 11)\n",
+ "b"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = np.linspace(0, 100, 30)\n",
+ "b"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Perform calculations with NumPy\n",
+ "\n",
+ "### Arithmetic\n",
+ "\n",
+ "In core Python, that is *without* NumPy, creating sequences of values and adding them together requires writing a lot of manual loops, just like one would do in C/C++:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = list(range(5, 10))\n",
+ "b = [3 + i * 1.5 / 4 for i in range(5)]\n",
+ "\n",
+ "a, b"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result = []\n",
+ "for x, y in zip(a, b):\n",
+ " result.append(x + y)\n",
+ "print(result)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That is very verbose and not very intuitive. Using NumPy this becomes:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.arange(5, 10)\n",
+ "b = np.linspace(3, 4.5, 5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a + b"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Many major mathematical operations operate in the same way. They perform an element-by-element calculation of the two arrays."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a - b"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a / b"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a**b"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Warning
\n",
+ " These arrays must be the same shape!\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = np.linspace(3, 4.5, 6)\n",
+ "a.shape, b.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": [
+ "raises-exception"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "a * b"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Constants\n",
+ "\n",
+ "NumPy provides us access to some useful constants as well - remember you should never be typing these in manually! Other libraries such as SciPy and MetPy have their own set of constants that are more domain specific."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.pi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.e"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can use these for classic calculations you might be familiar with! Here we can create a range `t = [0, 2 pi]` by `pi/4`,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t = np.arange(0, 2 * np.pi + np.pi / 4, np.pi / 4)\n",
+ "t"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t / np.pi"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Array math functions\n",
+ "\n",
+ "NumPy also has math functions that can operate on arrays. Similar to the math operations, these greatly simplify and speed up these operations. Let's start with calculating $\\sin(t)$!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sin_t = np.sin(t)\n",
+ "sin_t"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "and clean it up a bit by `round`ing to three decimal places."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.round(sin_t, 3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cos_t = np.cos(t)\n",
+ "cos_t"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " Check out NumPy's list of mathematical functions here!\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can convert between degrees and radians with only NumPy, by hand"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t / np.pi * 180"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "or with built-in function `rad2deg`,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "degrees = np.rad2deg(t)\n",
+ "degrees"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We are similarly provided algorithms for operations including integration, bulk summing, and cumulative summing."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sine_integral = np.trapz(sin_t, t)\n",
+ "np.round(sine_integral, 3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cos_sum = np.sum(cos_t)\n",
+ "cos_sum"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cos_csum = np.cumsum(cos_t)\n",
+ "print(cos_csum)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Indexing and subsetting arrays\n",
+ "\n",
+ "### Indexing\n",
+ "\n",
+ "We can use integer indexing to reach into our arrays and pull out individual elements. Let's make a toy 2-d array to explore. Here we create a 12-value `arange` and `reshape` it into a 3x4 array."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.arange(12).reshape(3, 4)\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Recall that Python indexing starts at `0`, and we can begin indexing our array with the list-style `list[element]` notation,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "to pull out just our first _row_ of data within `a`. Similarly we can index in reverse with negative indices,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[-1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "to pull out just the last row of data within `a`. This notation extends to as many dimensions as make up our array as `array[m, n, p, ...]`. The following diagram shows these indices for an example, 2-dimensional `6x6` array,"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![](array_index.png)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For example, let's find the entry in our array corresponding to the 2nd row (`m=1` in Python) and the 3rd column (`n=2` in Python)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[1, 2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can again use these negative indices to index backwards,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[-1, -1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "and even mix-and-match along dimensions,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[1, -2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Slices\n",
+ "\n",
+ "Slicing syntax is written as `array[start:stop[:step]]`, where **all numbers are optional**.\n",
+ "- defaults: \n",
+ " - start = 0\n",
+ " - stop = len(dim)\n",
+ " - step = 1\n",
+ "- The second colon is **also optional** if no step is used.\n",
+ "\n",
+ "Let's pull out just the first row, `m=0` of `a` and see how this works!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = a[0]\n",
+ "b"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Laying out our default slice to see the entire array explicitly looks something like this,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b[0:4:1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "where again, these default values are optional,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b[::]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "and even the second `:` is optional"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b[:]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now to actually make our own slice, let's select all elements from `m=0` to `m=2`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b[0:2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Warning
\n",
+ " Slice notation is exclusive of the final index.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This means that slices will include every value **up to** your `stop` index and not this index itself, like a half-open interval `[start, end)`. For example,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b[3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "reveals a different value than"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b[0:3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, a few more examples of this notation before reintroducing our 2-d array `a`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b[2:] # m=2 through the end, can leave off the number"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b[:3] # similarly, the same as our b[0:3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Multidimensional slicing\n",
+ "This entire syntax can be extended to each dimension of multidimensional arrays."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First let's pull out rows `0` through `2`, and then every `:` column for each of those"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[0:2, :]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Similarly, let's get all rows for just column `2`,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[:, 2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "or just take a look at the full row `:`, for every second column, `::2`,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[:, ::2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For any shape of array, you can use `...` to capture full slices of every non-specified dimension. Consider the 3-D array,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c = a.reshape(2, 2, 3)\n",
+ "c"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c[0, ...]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "and so this is equivalent to"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c[0, :, :]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "for extracting every dimension across our first row. We can also flip this around,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c[..., -1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "to investigate every preceding dimension along our the last entry of our last axis, the same as `c[:, :, -1]`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "In this notebook we introduced NumPy and the `ndarray` that is so crucial to the entirety of the scientific Python community ecosystem. We created some arrays, used some of NumPy's own mathematical functions to manipulate them, and then introduced the world of NumPy indexing and selecting for even multi-dimensional arrays.\n",
+ "\n",
+ "### What's next?\n",
+ "This notebook is the gateway to nearly every other Pythia resource here. This information is crucial for understanding SciPy, pandas, xarray, and more. Continue into NumPy to explore some more intermediate and advanced topics!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Resources and references\n",
+ "- [NumPy User Guide](http://docs.scipy.org/doc/numpy/user/)\n",
+ "- [SciPy Lecture Notes](https://scipy-lectures.org/)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.15"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ },
+ "toc-autonumbering": false
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/core/numpy/numpy-broadcasting.ipynb b/_preview/468/_sources/core/numpy/numpy-broadcasting.ipynb
new file mode 100644
index 000000000..ee6bee2ac
--- /dev/null
+++ b/_preview/468/_sources/core/numpy/numpy-broadcasting.ipynb
@@ -0,0 +1,939 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "# NumPy Broadcasting\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "Before we begin, it is important to know that broadcasting is a valuable part of the power that NumPy provides. However, there's no looking past the fact that broadcasting can be conceptually difficult to digest. This information can be helpful and very powerful, but it may be more prudent to first start learning the other label-based elements of the Python ecosystem, [Pandas](../pandas) and [Xarray](../xarray). This can make understanding NumPy broadcasting easier or simpler when using real-world data. When you are ready to learn about NumPy broadcasting, this section is organized as follows:\n",
+ "\n",
+ "1. An introduction to broadcasting\n",
+ "1. Avoiding loops with vectorization"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [NumPy Basics](numpy-basics) | Necessary | |\n",
+ "| [Intermediate NumPy](intermediate-numpy) | Helpful | |\n",
+ "| [Conceptual guide to broadcasting](https://numpy.org/doc/stable/user/theory.broadcasting.html#array-broadcasting-in-numpy) | Helpful | |\n",
+ "\n",
+ "* **Time to learn**: 30 minutes\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports\n",
+ "\n",
+ "As always, when working with NumPy, it must be imported first:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Using broadcasting to implicitly loop over data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### What is broadcasting?\n",
+ "Broadcasting is a useful NumPy tool that allows us to perform operations between arrays with different shapes, provided that they are compatible with each other in certain ways. To start, we can create an array below and add 5 to it:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.array([10, 20, 30, 40])\n",
+ "a + 5"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This works even though 5 is not an array. It behaves as expected, adding 5 to each of the elements in `a`. This also works if 5 is an array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = np.array([5])\n",
+ "a + b"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This takes the single element in `b` and adds it to each of the elements in `a`. This won't work for just any `b`, though; for instance, the following won't work:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": [
+ "raises-exception"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "b = np.array([5, 6, 7])\n",
+ "a + b"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It does work if `a` and `b` are the same shape:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = np.array([5, 5, 10, 10])\n",
+ "a + b"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "What if what we really want is pairwise addition of a and b? Without broadcasting, we could accomplish this by looping:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = np.array([1, 2, 3, 4, 5])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result = np.empty((5, 4), dtype=np.int32)\n",
+ "for row, valb in enumerate(b):\n",
+ " for col, vala in enumerate(a):\n",
+ " result[row, col] = vala + valb\n",
+ "result"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also do this by manually repeating the arrays to the proper shape for the result, using `np.tile`. This avoids the need to manually loop:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "aa = np.tile(a, (5, 1))\n",
+ "aa"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Turn b into a column array, then tile it\n",
+ "bb = np.tile(b.reshape(5, 1), (1, 4))\n",
+ "bb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "aa + bb"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Giving NumPy room for broadcasting\n",
+ "We can also do this using broadcasting, which is where NumPy implicitly repeats the array without using additional memory. With broadcasting, NumPy takes care of repeating for you, provided dimensions are \"compatible\". This works as follows:\n",
+ "1. Check the number of dimensions of the arrays. If they are different, *prepend* dimensions of size one until the arrays are the same dimension shape.\n",
+ "2. Check if each of the dimensions are compatible. This works as follows:\n",
+ " - Each dimension is checked.\n",
+ " - If one of the arrays has a size of 1 in the checked dimension, or both arrays have the same size in the checked dimension, the check passes.\n",
+ " - If all dimension checks pass, the dimensions are compatible.\n",
+ "\n",
+ "For example, consider the following arrays:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Right now, these arrays both have the same number of dimensions. They both have only one dimension, but that dimension is incompatible. We can solve this by appending a dimension using `np.newaxis` when indexing, like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bb = b[:, np.newaxis]\n",
+ "bb.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a + bb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "(a + bb).shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also make the code more succinct by performing the newaxis and addition operations in a single line, like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a + b[:, np.newaxis]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Extending to higher dimensions\n",
+ "The same broadcasting ability and rules also apply for arrays of higher dimensions. Consider the following arrays `x`, `y`, and `z`, which are all different dimensions. We can use newaxis and broadcasting to perform $x^2 + y^2 + z^2$:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x = np.array([1, 2])\n",
+ "y = np.array([3, 4, 5])\n",
+ "z = np.array([6, 7, 8, 9])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First, we extend the `x` array using newaxis, and then square it. Then, we square `y`, and broadcast it onto the extended `x` array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "d_2d = x[:, np.newaxis] ** 2 + y**2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "d_2d.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we further extend this new 2-D array to a 3-D array using newaxis, square the `z` array, and then broadcast `z` onto the newly extended array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "d_3d = d_2d[..., np.newaxis] + z**2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "d_3d.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As described above, we can also perform these operations in a single line of code, like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "h = x[:, np.newaxis, np.newaxis] ** 2 + y[np.newaxis, :, np.newaxis] ** 2 + z**2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can use the shape method to see the shape of the array created by the single line of code above. As you can see, it matches the shape of the array created by the multi-line process above:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "h.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also use the all method to confirm that both arrays contain the same data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.all(h == d_3d)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Broadcasting is often useful when you want to do calculations with coordinate values, which are often given as 1-D arrays corresponding to positions along a particular array dimension. For example, we can use broadcasting to help with taking range and azimuth values for radar data (1-D separable polar coordinates) and converting to x,y pairs relative to the radar location."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Given the 3-D temperature field and 1-D pressure coordinates below, let's calculate $T * exp(P / 1000)$. We will need to use broadcasting to make the arrays compatible. The following code demonstrates how to use newaxis and broadcasting to perform this calculation:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pressure = np.array([1000, 850, 500, 300])\n",
+ "temps = np.linspace(20, 30, 24).reshape(4, 3, 2)\n",
+ "pressure.shape, temps.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pressure[:, np.newaxis, np.newaxis].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps * np.exp(pressure[:, np.newaxis, np.newaxis] / 1000)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Vectorize calculations to avoid explicit loops"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "When working with arrays of data, loops over the individual array elements is a fact of life. However, for improved runtime performance, it is important to avoid performing these loops in Python as much as possible, and let NumPy handle the looping for you. Avoiding these loops frequently, but not always, results in shorter and clearer code as well."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Look ahead/behind\n",
+ "\n",
+ "One common pattern for vectorizing is in converting loops that work over the current point, in addition to the previous point and/or the next point. This comes up when doing finite-difference calculations, e.g., approximating derivatives:\n",
+ "\n",
+ "\\begin{equation*}\n",
+ "f'(x) = f_{i+1} - f_{i}\n",
+ "\\end{equation*}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = np.linspace(0, 20, 6)\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can calculate the forward difference for this array using a manual loop, like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "d = np.zeros(a.size - 1)\n",
+ "for i in range(len(a) - 1):\n",
+ " d[i] = a[i + 1] - a[i]\n",
+ "d"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It would be nice to express this calculation without a loop, if possible. To see how to go about this, let's consider the values that are involved in calculating `d[i]`; in other words, the values `a[i+1]` and `a[i]`. The values over the loop iterations are:\n",
+ "\n",
+ "| i | a[i+1] | a[i] |\n",
+ "| --- | ---- | ---- |\n",
+ "| 0 | 4 | 0 |\n",
+ "| 1 | 8 | 4 |\n",
+ "| 2 | 12 | 8 |\n",
+ "| 3 | 16 | 12 |\n",
+ "| 4 | 20 | 16 |\n",
+ "\n",
+ "We can then express the series of values for `a[i+1]` as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[1:]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also express the series of values for `a[i]` as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[:-1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This means that we can express the forward difference using the following statement:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a[1:] - a[:-1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It should be noted that using slices in this way returns only a **view** on the original array. In other words, you can use the slices to modify the original data, either intentionally or accidentally. Also, this is a quick operation that does not involve a copy and does not bloat memory usage."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 2nd Derivative\n",
+ " \n",
+ "A finite-difference estimate of the 2nd derivative is given by the following equation (ignoring $\\Delta x$):\n",
+ "\n",
+ "\\begin{equation*}\n",
+ "f''(x) = 2\n",
+ "f_i - f_{i+1} - f_{i-1}\n",
+ "\\end{equation*}\n",
+ "\n",
+ "Let's write some vectorized code to calculate this finite difference for `a`, using slices. Analyze the code below, and compare the result to the values you would expect to see from the 2nd derivative of `a`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "2 * a[1:-1] - a[:-2] - a[2:]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Blocking\n",
+ "\n",
+ "Another application that can become more efficient using vectorization is operating on blocks of data. Let's start by creating some temperature data (rounding to make it easier to see and recognize the values):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps = np.round(20 + np.random.randn(10) * 5, 1)\n",
+ "temps"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start by writing a loop to take a 3-point running mean of the data. We'll do this by iterating over all points in the array and averaging the 3 points centered on each point. We'll simplify the problem by avoiding dealing with the cases at the edges of the array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "avg = np.zeros_like(temps)\n",
+ "for i in range(1, len(temps) - 1):\n",
+ " sub = temps[i - 1 : i + 2]\n",
+ " avg[i] = sub.mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "avg"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As with the case of doing finite differences, we can express this using slices of the original array instead of loops:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# i - 1 i i + 1\n",
+ "(temps[:-2] + temps[1:-1] + temps[2:]) / 3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Another option to solve this type of problem is to use the powerful NumPy tool `as_strided` instead of slicing. This tool can result in some odd behavior, so take care when using it. However, the trade-off is that the `as_strided` tool can be used to perform powerful operations. What we're doing here is altering how NumPy is interpreting the values in the memory that underpins the array. Take this array, for example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using `as_strided`, we can create a view of this array with a new, bigger shape, with rows made up of overlapping values. We do this by specifying a new shape of 8x3. There are 3 columns, for fitting blocks of data containing 3 values each, and 8 rows, to correspond to the 8 blocks of data of that size that are possible in the original 1-D array. We can then use the `strides` argument to control how NumPy walks between items in each dimension. The last item in the strides tuple simply states that the number of bytes to walk between items is just the size of an item. (Increasing this last item would skip items.) The first item says that when we go to a new element (in this example, a new row), only advance the size of a single item. This is what gives us overlapping rows. The code for these operations looks like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "block_size = 3\n",
+ "new_shape = (len(temps) - block_size + 1, block_size)\n",
+ "bytes_per_item = temps.dtype.itemsize\n",
+ "temps_strided = np.lib.stride_tricks.as_strided(\n",
+ " temps, shape=new_shape, strides=(bytes_per_item, bytes_per_item)\n",
+ ")\n",
+ "temps_strided"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we have this view of the array with the rows representing overlapping blocks, we can operate across the rows with `mean` and the `axis=-1` argument to get our running average:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps_strided.mean(axis=-1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It should be noted that there are no copies going on here, so if we change a value at a single indexed location, the change actually shows up in multiple locations:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps_strided[0, 2] = 2000\n",
+ "temps_strided"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Finding the difference between min and max\n",
+ "\n",
+ "Another operation that crops up when slicing and dicing data is trying to identify a set of indices along a particular axis, contained within a larger multidimensional array. For instance, say we have a 3-D array of temperatures, and we want to identify the location of the $-10^oC$ isotherm within each column:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pressure = np.linspace(1000, 100, 25)\n",
+ "temps = np.random.randn(25, 30, 40) * 3 + np.linspace(25, -100, 25).reshape(-1, 1, 1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "NumPy has the function `argmin()`, which returns the index of the minimum value. We can use this to find the minimum absolute difference between the value and -10:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Using axis=0 to tell it to operate along the pressure dimension\n",
+ "inds = np.argmin(np.abs(temps - -10), axis=0)\n",
+ "inds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "inds.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Great! We now have an array representing the index of the point closest to $-10^oC$ in each column of data. We can use this new array as a lookup index for our pressure coordinate array to find the pressure level for each column:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pressure[inds]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, we can try to find the closest actual temperature value using the new array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps[inds, :, :].shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Unfortunately, this replaced the pressure dimension (size 25) with the shape of our index array (30 x 40), giving us a 30 x 40 x 30 x 40 array. Obviously, if scientifically relevant data values were being used, this result would almost certainly make such data invalid. One solution would be to set up a loop with the `ndenumerate` function, like this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "output = np.empty(inds.shape, dtype=temps.dtype)\n",
+ "for (i, j), val in np.ndenumerate(inds):\n",
+ " output[i, j] = temps[val, i, j]\n",
+ "output"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Of course, what we really want to do is avoid the explicit loop. Let's temporarily simplify the problem to a single dimension. If we have a 1-D array, we can pass a 1-D array of indices (a full range), and get back the same as the original data array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pressure[np.arange(pressure.size)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.all(pressure[np.arange(pressure.size)] == pressure)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can use this to select all the indices on the other dimensions of our temperature array. We will also need to use the magic of broadcasting to combine arrays of indices across dimensions."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This can be written as a vectorized solution. For example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_inds = np.arange(temps.shape[1])[:, np.newaxis]\n",
+ "x_inds = np.arange(temps.shape[2])\n",
+ "temps[inds, y_inds, x_inds]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, we can use this new array to find, for example, the relative humidity at the $-10^oC$ isotherm:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.all(output == temps[inds, y_inds, x_inds])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "We've previewed some advanced NumPy capabilities, with a focus on _vectorization_; in other words, using clever broadcasting and data windowing techniques to enhance the speed and readability of our calculation code. By making use of vectorization, you can reduce explicit construction of loops in your code, and improve speed of calculation throughout the execution of such code.\n",
+ "\n",
+ "### What's next\n",
+ "This is an advanced NumPy topic; however, it is important to learn this topic in order to design calculation code that maximizes scalability and speed. If you would like to explore this topic further, please review the links below. We also suggest diving into label-based indexing and subsetting with [Pandas](../pandas) and [Xarray](../xarray), where some of this broadcasting can be simplified, or have added context."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Resources and references\n",
+ "* [NumPy Broadcasting Documentation](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/core/overview.md b/_preview/468/_sources/core/overview.md
new file mode 100644
index 000000000..c7b207b76
--- /dev/null
+++ b/_preview/468/_sources/core/overview.md
@@ -0,0 +1,122 @@
+# Overview
+
+As you might know by now that Python is a programming language. To make your job easier, developers of this programming language provide users like you with libraries (or packages). Core libraries will help you with fundamental numerical functions, and high-level libraries will help you efficiently analyze and visualize your data. Some of these libraries are used all across the Python community, while others are domain-specific. Read below to learn more about core and high-level libraries, and domain-specific libraries of the geoscience community. We suggest that new users start with the [Foundational Skills](../foundations/overview) section in order to get the most out of the tutorials below.
+
+## Core libraries
+
+Most geoscience data analysis involves working with numerical arrays.
+The default library for dealing with numerical arrays in Python is [NumPy](http://www.numpy.org/).
+It has some built in functions for calculating very simple statistics
+(e.g. maximum, mean, standard deviation),
+but for more complex analysis
+(e.g. interpolation, integration, linear algebra)
+the [SciPy](https://scipy.org) library is the default.
+If you’re dealing with particularly large arrays,
+[Dask](https://dask.org/) works with the existing Python ecosystem
+(including NumPy) to scale your analysis
+to multi-core machines and/or distributed clusters (i.e. parallel processing).
+
+Another common feature of geo-data science is time series analysis.
+The Python standard library comes with a [datetime](https://docs.python.org/3/library/datetime.html)
+package for manipulating dates and times.
+NumPy also includes a [datetime64](https://numpy.org/doc/stable/reference/arrays.datetime.html)
+module for efficient vectorized datetime operations
+and the [cftime](https://unidata.github.io/cftime/) library
+is useful for dealing with non-standard calendars.
+
+When it comes to data visualization,
+the default library is [Matplotlib](https://matplotlib.org/).
+As you can see at the [Matplotlib gallery](https://matplotlib.org/stable/gallery/index.html),
+this library is great for any simple (e.g. bar charts, contour plots, line graphs),
+static (e.g. .png, .eps, .pdf) plots.
+The [Cartopy](https://scitools.org.uk/cartopy/docs/latest/) library
+provides additional plotting functionality for common geographic map projections.
+
+## High-level libraries
+
+While pretty much all data analysis and visualization tasks
+could be achieved with a combination of the core libraries,
+their flexible, all-purpose nature means relatively common/simple tasks
+can often require quite a bit of work (i.e. many lines of code).
+To make things more efficient for data scientists,
+the scientific Python community has therefore built a number of libraries on top of the core stack.
+These high-levels libraries aren’t as flexible
+– they can’t do _everything_ like the core stack can –
+but they can do common tasks with far less effort.
+
+The most popular high-level data science library is undoubtedly [Pandas](http://pandas.pydata.org/).
+The key advance offered by Pandas is the concept of labeled arrays.
+Rather than referring to the individual elements of a data array using a numeric index
+(as is required with NumPy),
+the actual row and column headings can be used.
+That means information from the cardiac ward on 3 July 2005
+could be obtained from a medical dataset by asking for `data['cardiac'].loc['2005-07-03']`,
+rather than having to remember the numeric index corresponding to that ward and date.
+This labeled array feature,
+combined with a bunch of other features that streamline common statistical and plotting tasks
+traditionally performed with SciPy, datetime and Matplotlib,
+greatly simplifies the code development process (read: less lines of code).
+
+One of the limitations of Pandas
+is that it’s only able to handle one- or two-dimensional (i.e. tabular) data arrays.
+The [Xarray](http://xarray.pydata.org/) library was therefore created
+to extend the labelled array concept to x-dimensional arrays.
+Not all of the Pandas functionality is available
+(which is a trade-off associated with being able to handle multi-dimensional arrays),
+but the ability to refer to array elements by their actual latitude (e.g. 20 South),
+longitude (e.g. 50 East), height (e.g. 500 hPa) and time (e.g. 2015-04-27), for example,
+makes the Xarray data array far easier to deal with than the NumPy array.
+As an added bonus,
+Xarray also has built in functionality for reading/writing specific geoscience file formats
+(e.g netCDF, GRIB)
+and incorporates Dask under the hood to make dealing with large arrays easier.
+
+You will occasionally find yourself needing to use a core library directly
+(e.g. you might create a plot with Xarray and then call a specific Matplotlib
+function to customise a label on that plot),
+but to avoid re-inventing the wheel your first impulse should always be
+to check whether a high-level library like Pandas or Xarray has the functionality you need.
+Nothing would be more heartbreaking than spending hours writing your own function
+using the netCDF4 library for extracting the metadata contained within a netCDF file,
+for instance,
+only to find that Xarray automatically keeps this information upon reading a netCDF file.
+In this way, a solid working knowledge of the geoscience stack
+can save you a lot of time and effort.
+
+## Domain-specific libraries
+
+So far we’ve considered libraries that do general,
+broad-scale tasks like data input/output, common statistics, visualisation, etc.
+Given their large user base,
+these libraries are usually written and supported by large companies/institutions
+(e.g. the MetOffice supports Cartopy)
+or the wider PyData community (e.g. NumPy, Pandas, Xarray).
+Within each sub-discipline of the geosciences,
+individuals and research groups take these general libraries
+and apply them to their very specific data analysis tasks.
+Increasingly, these individuals and groups
+are formally packaging and releasing their code for use within their community.
+For instance, Andrew Dawson (an atmospheric scientist at Oxford)
+does a lot of EOF analysis and manipulation of wind data,
+so he has released his [eofs](https://ajdawson.github.io/eofs/latest/)
+and [windspharm](https://ajdawson.github.io/windspharm/latest/) libraries
+(which are able to handle data arrays from NumPy or Xarray).
+Similarly, a group at the Atmospheric Radiation Measurement (ARM) Climate Research Facility
+have released their Python ARM Radar Toolkit ([Py-ART](http://arm-doe.github.io/pyart/))
+for analysing weather radar data.
+
+There are too many domain specific libraries to mention here,
+but online resources such as the
+[Python for Atmosphere and Ocean Science (PyAOS) package index](https://pyaos.github.io/packages/)
+attempt to keep track of the domain-specific libraries in their field.
+Also check out the [Pythia Resource Gallery](https://projectpythia.org/resource-gallery.html) and try filtering by domain.
+
+## Tutorials
+
+- [NumPy](numpy): Core package for array computing, the workhorse of the Scientific Python stack
+- [Matplotlib](matplotlib): Basic plotting
+- [Cartopy](cartopy): Plotting on map projections
+- [Datetime](datetime): Dealing with time and calendar data
+- [Pandas](pandas): Working with labeled tabular data
+- [Data formats](data-formats): Working with common geoscience data formats
+- [Xarray](xarray): Working with gridded and labeled N-dimensional data
diff --git a/_preview/468/_sources/core/pandas.md b/_preview/468/_sources/core/pandas.md
new file mode 100644
index 000000000..680210b1b
--- /dev/null
+++ b/_preview/468/_sources/core/pandas.md
@@ -0,0 +1,15 @@
+# Pandas
+
+```{note}
+This content is under construction!
+```
+
+This section will contain tutorials on using [pandas](https://pandas.pydata.org) for labeled tabular data.
+
+---
+
+From the [official documentation](https://pandas.pydata.org/), Pandas "is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool, built on top of the Python programming language."
+
+Pandas is a very powerful library for working with tabular data (e.g., spreadsheets, comma-separated-value files, or database printouts; all of these are quite common for geoscientific data). It allows us to use labels for our data; this, in turn, allows us to write expressive and robust code to manipulate the data.
+
+Key features of Pandas are the abilities to read in tabular data and to slice and dice data, as well as exploratory analysis tools native to the library.
diff --git a/_preview/468/_sources/core/pandas/pandas.ipynb b/_preview/468/_sources/core/pandas/pandas.ipynb
new file mode 100644
index 000000000..21843074a
--- /dev/null
+++ b/_preview/468/_sources/core/pandas/pandas.ipynb
@@ -0,0 +1,1249 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a132f14e-55b7-4894-8a09-5f08be34e4c7",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "\n",
+ "# Introduction to Pandas\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b96a6aed-52ec-4d0b-bcba-5a8cb1044b5a",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "1. Introduction to pandas data structures\n",
+ "1. How to slice and dice pandas dataframes and dataseries\n",
+ "1. How to use pandas for exploratory data analysis\n",
+ "\n",
+ "## Prerequisites\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Python Quickstart](../../foundations/quickstart) | Necessary | Intro to `dict` |\n",
+ "| [Numpy Basics](../numpy/numpy-basics) | Necessary | |\n",
+ "\n",
+ "* **Time to learn**: 60 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1c805eb8-a545-4ba2-a3bb-e8e0232da2c9",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eb2bab73-f28a-4f78-ac03-10f46cf4e2a3",
+ "metadata": {},
+ "source": [
+ "## Imports"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b8627bb7",
+ "metadata": {},
+ "source": [
+ "You will often see the nickname `pd` used as an abbreviation for pandas in the import statement, just like `numpy` is often imported as `np`. We also import the `DATASETS` class from `pythia_datasets`, which allows us to use example datasets created for Pythia."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "daf58736",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from pythia_datasets import DATASETS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dab3da7f",
+ "metadata": {},
+ "source": [
+ "## The pandas [`DataFrame`](https://pandas.pydata.org/docs/user_guide/dsintro.html#dataframe)...\n",
+ "...is a **labeled**, two-dimensional columnar structure, similar to a table, spreadsheet, or the R `data.frame`.\n",
+ "\n",
+ "![dataframe schematic](https://github.com/pandas-dev/pandas/raw/main/doc/source/_static/schemas/01_table_dataframe.svg \"Schematic of a pandas DataFrame\")\n",
+ "\n",
+ "The `columns` that make up our `DataFrame` can be lists, dictionaries, NumPy arrays, pandas `Series`, or many other data types not mentioned here. Within these `columns`, you can have data values of many different data types used in Python and NumPy, including text, numbers, and dates/times. The first column of a `DataFrame`, shown in the image above in dark gray, is uniquely referred to as an `index`; this column contains information characterizing each row of our `DataFrame`. Similar to any other `column`, the `index` can label rows by text, numbers, datetime objects, and many other data types. Datetime objects are a quite popular way to label rows.\n",
+ "\n",
+ "For our first example using Pandas DataFrames, we start by reading in some data in comma-separated value (`.csv`) format. We retrieve this dataset from the Pythia DATASETS class (imported at the top of this page); however, the dataset was originally contained within the NCDC teleconnections database. This dataset contains many types of geoscientific data, including El Nino/Southern Oscillation indices. For more information on this dataset, review the description [here](https://www.ncdc.noaa.gov/teleconnections/enso/indicators/sst/)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0a064237-8e78-4b57-9200-6e97987d3ad8",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " As described above, we are retrieving the datasets for these examples from Project Pythia's custom library of example data. In order to retrieve datasets from this library, you must use the statement from pythia_datasets import DATASETS. This is shown and described in the Imports section at the top of this page. The fetch() method of the DATASETS class will automatically download the data file specified as a string argument, in this case enso_data.csv, and cache the file locally, assuming the argument corresponds to a valid Pythia example dataset. This is illustrated in the following example.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0be820cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filepath = DATASETS.fetch('enso_data.csv')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "68316c6c",
+ "metadata": {},
+ "source": [
+ "Once we have a valid path to a data file that Pandas knows how to read, we can open it, as shown in the following example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e99652d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(filepath)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ae9dcbbd",
+ "metadata": {},
+ "source": [
+ "If we print out our `DataFrame`, it will render as text by default, in a tabular-style ASCII output, as shown in the following example. However, if you are using a Jupyter notebook, there exists a better way to print `DataFrames`, as described below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "25a23571",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f22bb442",
+ "metadata": {},
+ "source": [
+ "As described above, there is a better way to print Pandas `DataFrames`. If you are using a Jupyter notebook, you can run a code cell containing the `DataFrame` object name, by itself, and it will display a nicely rendered table, as shown below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8942e69",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "377d4803",
+ "metadata": {},
+ "source": [
+ "The `DataFrame` index, as described above, contains information characterizing rows; each row has a unique ID value, which is displayed in the index column. By default, the IDs for rows in a `DataFrame` are represented as sequential integers, which start at 0."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cde6999b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8af49ac9",
+ "metadata": {},
+ "source": [
+ "At the moment, the index column of our `DataFrame` is not very helpful for humans. However, Pandas has clever ways to make index columns more human-readable. The next example demonstrates how to use optional keyword arguments to convert `DataFrame` index IDs to a human-friendly datetime format."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b4657f7e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(filepath, index_col=0, parse_dates=True)\n",
+ "\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0d3ae28",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8d26ed71",
+ "metadata": {},
+ "source": [
+ "Each of our data rows is now helpfully labeled by a datetime-object-like index value; this means that we can now easily identify data values not only by named columns, but also by date labels on rows. This is a sneak preview of the `DatetimeIndex` functionality of Pandas; this functionality enables a large portion of Pandas' timeseries-related usage. Don't worry; `DatetimeIndex` will be discussed in full detail later on this page. In the meantime, let's look at the columns of data read in from the `.csv` file:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "847347f8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "08d3b2fb",
+ "metadata": {},
+ "source": [
+ "## The pandas [`Series`](https://pandas.pydata.org/docs/user_guide/dsintro.html#series)...\n",
+ "\n",
+ "...is essentially any one of the columns of our `DataFrame`. A `Series` also includes the index column from the source `DataFrame`, in order to provide a label for each value in the `Series`.\n",
+ "\n",
+ "![pandas Series](https://github.com/pandas-dev/pandas/raw/main/doc/source/_static/schemas/01_table_series.svg \"Schematic of a pandas Series\")\n",
+ "\n",
+ "The pandas `Series` is a fast and capable 1-dimensional array of nearly any data type we could want, and it can behave very similarly to a NumPy `ndarray` or a Python `dict`. You can take a look at any of the `Series` that make up your `DataFrame`, either by using its column name and the Python `dict` notation, or by using dot-shorthand with the column name:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ee085815",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[\"Nino34\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2fcc97fa",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "Tip: You can also use the dot notation illustrated below to specify a column name, but this syntax is mostly provided for convenience. For the most part, this notation is interchangeable with the dictionary notation; however, if the column name is not a valid Python identifier (e.g., it starts with a number or space), you cannot use dot notation.
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7d46cbb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.Nino34"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dfed2a7c-5532-44d4-a2be-f1cc484d842c",
+ "metadata": {},
+ "source": [
+ "## Slicing and Dicing the `DataFrame` and `Series`\n",
+ "\n",
+ "In this section, we will expand on topics covered in the previous sections on this page. One of the most important concepts to learn about Pandas is that it allows you to _**access anything by its associated label**_, regardless of data organization structure."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "59128a2d-f23f-4b1c-93e7-63a85046b881",
+ "metadata": {},
+ "source": [
+ "### Indexing a `Series`\n",
+ "\n",
+ "As a review of previous examples, we'll start our next example by pulling a `Series` out of our `DataFrame` using its column label."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ec9ed333",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series = df[\"Nino34\"]\n",
+ "\n",
+ "nino34_series"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c84a81b9",
+ "metadata": {},
+ "source": [
+ "You can use syntax similar to that of NumPy `ndarrays` to index, select, and subset with Pandas `Series`, as shown in this example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "39bb0ae3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series[3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a1a55a7c",
+ "metadata": {},
+ "source": [
+ "You can also use labels alongside Python dictionary syntax to perform the same operations:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "62006988",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series[\"1982-04-01\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ce9788fd-a420-4c64-92b2-188818c52cc8",
+ "metadata": {},
+ "source": [
+ "You can probably figure out some ways to extend these indexing methods, as shown in the following examples:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "221e798d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series[0:12]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8ae4a117-2e37-4e01-bd06-2bef62f83741",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " Index-based slices are exclusive of the final value, similar to Python's usual indexing rules.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3cba4b8e-5dbb-4da6-ba5f-bc8a8a726b14",
+ "metadata": {},
+ "source": [
+ "However, there are many more ways to index a `Series`. The following example shows a powerful and useful indexing method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f7a06967",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series[\"1982-01-01\":\"1982-12-01\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1d6b4d75-b6a5-4960-9f83-8adbff1e2830",
+ "metadata": {},
+ "source": [
+ "This is an example of label-based slicing. With label-based slicing, Pandas will automatically find a range of values based on the labels you specify."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b9c167aa-6b8d-4533-9e89-09d75af76025",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " As opposed to index-based slices, label-based slices are inclusive of the final value.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cebfac9a-9e8f-449a-a88d-b1196a49d87d",
+ "metadata": {},
+ "source": [
+ "If you already have some knowledge of xarray, you will quite likely know how to create `slice` objects by hand. This can also be used in pandas, as shown below. If you are completely unfamiliar with xarray, it will be covered on a [later Pythia tutorial page](../xarray)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "771d6f04",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series[slice(\"1982-01-01\", \"1982-12-01\")]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9798abf4",
+ "metadata": {},
+ "source": [
+ "### Using `.iloc` and `.loc` to index\n",
+ "\n",
+ "In this section, we introduce ways to access data that are preferred by Pandas over the methods listed above. When accessing by label, it is preferred to use the `.loc` method, and when accessing by index, the `.iloc` method is preferred. These methods behave similarly to the notation introduced above, but provide more speed, security, and rigor in your value selection. Using these methods can also help you avoid [chained assignment warnings](https://pandas.pydata.org/docs/user_guide/indexing.html#returning-a-view-versus-a-copy) generated by pandas."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d5eb9de2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series.iloc[3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8d0bc3e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series.iloc[0:12]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "59a10070",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series.loc[\"1982-04-01\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3e2b3fc1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series.loc[\"1982-01-01\":\"1982-12-01\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "722e3d11-4c27-4a4c-a31b-2d551587f2b3",
+ "metadata": {},
+ "source": [
+ "### Extending to the `DataFrame`\n",
+ "\n",
+ "These subsetting capabilities can also be used in a full `DataFrame`; however, if you use the same syntax, there are issues, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8971371",
+ "metadata": {
+ "tags": [
+ "raises-exception"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "df[\"1982-01-01\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b89bf013-4492-461f-a1ef-a4f1a3423a4a",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Danger
\n",
+ " Attempting to use Series subsetting with a DataFrame can crash your program. A proper way to subset a DataFrame is shown below.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b40c7ace-939b-4997-a185-be1ea8363d06",
+ "metadata": {},
+ "source": [
+ "When indexing a `DataFrame`, pandas will not assume as readily the intention of your code. In this case, using a row label by itself will not work; **with `DataFrames`, labels are used for identifying columns**."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b504ed93-d310-4384-b99b-08d3ddc96bb0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[\"Nino34\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "34196c97-5117-402c-a1d6-c05298ed8500",
+ "metadata": {},
+ "source": [
+ "As shown below, you also cannot subset columns in a `DataFrame` using integer indices:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c393a116-da08-4b99-b87d-de76e2614f00",
+ "metadata": {
+ "tags": [
+ "raises-exception"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "df[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4e6213d",
+ "metadata": {},
+ "source": [
+ "From earlier examples, we know that we can use an index or label with a `DataFrame` to pull out a column as a `Series`, and we know that we can use an index or label with a `Series` to pull out a single value. Therefore, by chaining brackets, we can pull any individual data value out of the `DataFrame`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c61fa6d4",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df[\"Nino34\"][\"1982-04-01\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3bd7cf95",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[\"Nino34\"][3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "afb0d6ef",
+ "metadata": {},
+ "source": [
+ "However, subsetting data using this chained-bracket technique is not preferred by Pandas. As described above, Pandas prefers us to use the `.loc` and `.iloc` methods for subsetting. In addition, these methods provide a clearer, more efficient way to extract specific data from a `DataFrame`, as illustrated below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9fb5df7b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.loc[\"1982-04-01\", \"Nino34\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "98d6e445-51ed-4128-bfeb-fb82abbe9cb8",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " When using this syntax to pull individual data values from a DataFrame, make sure to list the row first, and then the column.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e710252f",
+ "metadata": {},
+ "source": [
+ "The `.loc` and `.iloc` methods also allow us to pull entire rows out of a `DataFrame`, as shown in these examples:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aad4fde6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.loc[\"1982-04-01\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f93737ba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.loc[\"1982-01-01\":\"1982-12-01\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6c23cbca",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.iloc[3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "22c07d7d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.iloc[0:12]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4c2ed15e",
+ "metadata": {},
+ "source": [
+ "In the next example, we illustrate how you can use slices of rows and lists of columns to create a smaller `DataFrame` out of an existing `DataFrame`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8390a35b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.loc[\n",
+ " \"1982-01-01\":\"1982-12-01\", # slice of rows\n",
+ " [\"Nino12\", \"Nino3\", \"Nino4\", \"Nino34\"], # list of columns\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c128cc18-e433-4060-870d-19835b5e556e",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " There are certain limitations to these subsetting techniques. For more information on these limitations, as well as a comparison of DataFrame and Series indexing methods, see the Pandas indexing documentation.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2e2739dc",
+ "metadata": {},
+ "source": [
+ "## Exploratory Data Analysis\n",
+ "\n",
+ "### Get a Quick Look at the Beginning/End of your `DataFrame`\n",
+ "Pandas also gives you a few shortcuts to quickly investigate entire `DataFrames`. The `head` method shows the first five rows of a `DataFrame`, and the `tail` method shows the last five rows of a `DataFrame`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3c11b92a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3bf87294",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cba9f221",
+ "metadata": {},
+ "source": [
+ "### Quick Plots of Your Data\n",
+ "A good way to explore your data is by making a simple plot. Pandas contains its own `plot` method; this allows us to plot Pandas series without needing `matplotlib`. In this example, we plot the `Nino34` series of our `df` `DataFrame` in this way:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bf317171",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.Nino34.plot();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "99c2c7a3",
+ "metadata": {},
+ "source": [
+ "Before, we called `.plot()`, which generated a single line plot. Line plots can be helpful for understanding some types of data, but there are other types of data that can be better understood with different plot types. For example, if your data values form a distribution, you can better understand them using a histogram plot.\n",
+ "\n",
+ "The code for plotting histogram data differs in two ways from the code above for the line plot. First, two series are being used from the `DataFrame` instead of one. Second, after calling the `plot` method, we call an additional method called `hist`, which converts the plot into a histogram."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5f85e2dd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[['Nino12', 'Nino34']].plot.hist();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a4e07618",
+ "metadata": {},
+ "source": [
+ "The histogram plot helped us better understand our data; there are clear differences in the distributions. To even better understand this type of data, it may also be helpful to create a box plot. This can be done using the same line of code, with one change: we call the `box` method instead of `hist`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6329d231",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[['Nino12', 'Nino34']].plot.box();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c338385b",
+ "metadata": {},
+ "source": [
+ "Just like the histogram plot, this box plot indicates a clear difference in the distributions. Using multiple types of plot in this way can be useful for verifying large datasets. The pandas plotting methods are capable of creating many different types of plots. To see how to use the plotting methods to generate each type of plot, please review the [pandas plot documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "69fc4078",
+ "metadata": {},
+ "source": [
+ "#### Customize your Plot\n",
+ "The pandas plotting methods are, in fact, wrappers for similar methods in matplotlib. This means that you can customize pandas plots by including keyword arguments to the plotting methods. These keyword arguments, for the most part, are equivalent to their matplotlib counterparts."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "da22f990",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.Nino34.plot(\n",
+ " color='black',\n",
+ " linewidth=2,\n",
+ " xlabel='Year',\n",
+ " ylabel='ENSO34 Index (degC)',\n",
+ " figsize=(8, 6),\n",
+ ");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e7145ef6",
+ "metadata": {},
+ "source": [
+ "Although plotting data can provide a clear visual picture of data values, sometimes a more quantitative look at data is warranted. As elaborated on in the next section, this can be achieved using the `describe` method. The `describe` method is called on the entire `DataFrame`, and returns various summarized statistics for each column in the `DataFrame`.\n",
+ "### Basic Statistics\n",
+ "\n",
+ "We can garner statistics for a `DataFrame` by using the `describe` method. When this method is called on a `DataFrame`, a set of statistics is returned in tabular format. The columns match those of the `DataFrame`, and the rows indicate different statistics, such as minimum."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3b5c27a8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a92bb8b3",
+ "metadata": {},
+ "source": [
+ "You can also view specific statistics using corresponding methods. In this example, we look at the mean values in the entire `DataFrame`, using the `mean` method. When such methods are called on the entire `DataFrame`, a `Series` is returned. The indices of this `Series` are the column names in the `DataFrame`, and the values are the calculated values (in this case, mean values) for the `DataFrame` columns."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "db9e4a16",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1ff5aec7",
+ "metadata": {},
+ "source": [
+ "If you want a specific statistic for only one column in the `DataFrame`, pull the column out of the `DataFrame` with dot notation, then call the statistic function (in this case, mean) on that column, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9aa38e59",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.Nino34.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7295e7b0",
+ "metadata": {},
+ "source": [
+ "### Subsetting Using the Datetime Column\n",
+ "\n",
+ "Slicing is a useful technique for subsetting a `DataFrame`, but there are also other options that can be equally useful. In this section, some of these additional techniques are covered.\n",
+ "\n",
+ "If your `DataFrame` uses datetime values for indices, you can select data from only one month using `df.index.month`. In this example, we specify the number 1, which only selects data from January."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a506724a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Uses the datetime column\n",
+ "df[df.index.month == 1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "16d4e0e7",
+ "metadata": {},
+ "source": [
+ "This example shows how to create a new column containing the month portion of the datetime index for each data row. The value returned by `df.index.month` is used to obtain the data for this new column:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fe5d5ee4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['month'] = df.index.month"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8750443f",
+ "metadata": {},
+ "source": [
+ "This next example illustrates how to use the new month column to calculate average monthly values over the other data columns. First, we use the `groupby` method to group the other columns by the month. Second, we take the average (mean) to obtain the monthly averages. Finally, we plot the resulting data as a line plot by simply calling `plot()`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f94c91f9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.groupby('month').mean().plot();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a0c9481b",
+ "metadata": {},
+ "source": [
+ "### Investigating Extreme Values"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fec15b77",
+ "metadata": {},
+ "source": [
+ "If you need to search for rows that meet a specific criterion, you can use **conditional indexing**. In this example, we search for rows where the Nino34 anomaly value (`Nino34anom`) is greater than 2:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "098fc88d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[df.Nino34anom > 2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f26bc439",
+ "metadata": {},
+ "source": [
+ "This example shows how to use the `sort_values` method on a `DataFrame`. This method sorts values in a `DataFrame` by the column specified as an argument."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8051c4f6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.sort_values('Nino34anom')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a293de79",
+ "metadata": {},
+ "source": [
+ "You can also reverse the ordering of the sort by specifying the `ascending` keyword argument as `False`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "be7ff8ce",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.sort_values('Nino34anom', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5504a0da",
+ "metadata": {},
+ "source": [
+ "### Resampling\n",
+ "In these examples, we illustrate a process known as resampling. Using resampling, you can change the frequency of index data values, reducing so-called 'noise' in a data plot. This is especially useful when working with timeseries data; plots can be equally effective with resampled data in these cases. The resampling performed in these examples converts monthly values to yearly averages. This is performed by passing the value '1Y' to the `resample` method."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "597cfeac",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.Nino34.plot();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9e3ee506",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.Nino34.resample('1Y').mean().plot();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "16c80788",
+ "metadata": {},
+ "source": [
+ "### Applying operations to a DataFrame\n",
+ "\n",
+ "One of the most commonly used features in Pandas is the performing of calculations to multiple data values in a `DataFrame` simultaneously. Let's first look at a familiar concept: a function that converts single values. The following example uses such a function to convert temperature values from degrees Celsius to Kelvin."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c8afa857",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def convert_degc_to_kelvin(temperature_degc):\n",
+ " \"\"\"\n",
+ " Converts from degrees celsius to Kelvin\n",
+ " \"\"\"\n",
+ "\n",
+ " return temperature_degc + 273.15"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "34892381",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Convert a single value\n",
+ "convert_degc_to_kelvin(0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "384a0bdb",
+ "metadata": {},
+ "source": [
+ "The following examples instead illustrate a new concept: using such functions with `DataFrames` and `Series`. For the first example, we start by creating a `Series`; in order to do so, we subset the `DataFrame` by the `Nino34` column. This has already been done earlier in this page; we do not need to create this `Series` again. We are using this particular `Series` for a reason: the data values are in degrees Celsius."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f09ee7c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nino34_series"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "28ac04e8",
+ "metadata": {},
+ "source": [
+ "Here, we look at a portion of an existing `DataFrame` column. Notice that this column portion is a Pandas `Series`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8718a43f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "type(df.Nino12[0:10])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ff1f569f",
+ "metadata": {},
+ "source": [
+ "As shown in the following example, each Pandas `Series` contains a representation of its data in numpy format. Therefore, it is possible to convert a Pandas `Series` into a numpy array; this is done using the `.values` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "61a8255f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "type(df.Nino12.values[0:10])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2a5693fe",
+ "metadata": {},
+ "source": [
+ "This example illustrates how to use the temperature-conversion function defined above on a `Series` object. Just as calling the function with a single value returns a single value, calling the function on a `Series` object returns another `Series` object. The function performs the temperature conversion on each data value in the `Series`, and returns a `Series` with all values converted."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ae197a92",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "convert_degc_to_kelvin(nino34_series)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "87871b82",
+ "metadata": {},
+ "source": [
+ "If we call the `.values` method on the `Series` passed to the function, the `Series` is converted to a numpy array, as described above. The function then converts each value in the numpy array, and returns a new numpy array with all values sorted."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "84ec100b-60bd-4cb9-b596-40af2a04b95d",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Warning
\n",
+ " It is recommended to only convert Series to NumPy arrays when necessary; doing so removes the label information that enables much of the Pandas core functionality.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "52ae68ee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "convert_degc_to_kelvin(nino34_series.values)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "65b3cd56",
+ "metadata": {},
+ "source": [
+ "As described above, when our temperature-conversion function accepts a `Series` as an argument, it returns a `Series`. We can directly assign this returned `Series` to a new column in our `DataFrame`, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2d84dfe1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['Nino34_degK'] = convert_degc_to_kelvin(nino34_series)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dd9a0811",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.Nino34_degK"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a8c6dba3",
+ "metadata": {},
+ "source": [
+ "In this final example, we demonstrate the use of the `to_csv` method to save a `DataFrame` as a `.csv` file. This example also demonstrates the `read_csv` method, which reads `.csv` files into Pandas `DataFrames`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "054428db",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.to_csv('nino_analyzed_output.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3f7d378f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.read_csv('nino_analyzed_output.csv', index_col=0, parse_dates=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9327e958",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## Summary\n",
+ "* Pandas is a very powerful tool for working with tabular (i.e., spreadsheet-style) data\n",
+ "* There are multiple ways of subsetting your pandas dataframe or series\n",
+ "* Pandas allows you to refer to subsets of data by label, which generally makes code more readable and more robust\n",
+ "* Pandas can be helpful for exploratory data analysis, including plotting and basic statistics\n",
+ "* One can apply calculations to pandas dataframes and save the output via `csv` files\n",
+ "\n",
+ "### What's Next?\n",
+ "In the next notebook, we will look more into using pandas for more in-depth data analysis.\n",
+ "\n",
+ "## Resources and References\n",
+ "1. [NOAA NCDC ENSO Dataset Used in this Example](https://www.ncdc.noaa.gov/teleconnections/enso/indicators/sst/)\n",
+ "1. [Getting Started with Pandas](https://pandas.pydata.org/docs/getting_started/index.html#getting-started)\n",
+ "1. [Pandas User Guide](https://pandas.pydata.org/docs/user_guide/index.html#user-guide)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/_preview/468/_sources/core/xarray.md b/_preview/468/_sources/core/xarray.md
new file mode 100644
index 000000000..27e827cc0
--- /dev/null
+++ b/_preview/468/_sources/core/xarray.md
@@ -0,0 +1,19 @@
+![xarray Logo](https://docs.xarray.dev/en/stable/_static/Xarray_Logo_RGB_Final.svg "xarray Logo")
+
+# Xarray
+
+This section contains tutorials on using [Xarray][xarray home]. Xarray is used widely in the geosciences and beyond for analysis of gridded N-dimensional datasets.
+
+---
+
+From the [Xarray website][xarray home]:
+
+> Xarray (formerly Xray) is an open source project and Python package that makes working with labelled multi-dimensional arrays simple, efficient, and fun!
+>
+> Xarray introduces labels in the form of dimensions, coordinates and attributes on top of raw NumPy-like arrays, which allows for a more intuitive, more concise, and less error-prone developer experience. The package includes a large and growing library of domain-agnostic functions for advanced analytics and visualization with these data structures.
+>
+> Xarray is inspired by and borrows heavily from pandas, the popular data analysis package focused on labelled tabular data. It is particularly tailored to working with netCDF files, which were the source of xarray’s data model, and integrates tightly with dask for parallel computing.
+
+You should have a basic familiarity with [Numpy arrays](numpy) prior to working through the Xarray notebooks presented here.
+
+[xarray home]: http://xarray.pydata.org/en/stable/
diff --git a/_preview/468/_sources/core/xarray/computation-masking.ipynb b/_preview/468/_sources/core/xarray/computation-masking.ipynb
new file mode 100644
index 000000000..3e3c29f5b
--- /dev/null
+++ b/_preview/468/_sources/core/xarray/computation-masking.ipynb
@@ -0,0 +1,907 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "7b494629-f859-4586-b235-e61fed184b9a",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "# Computations and Masks with Xarray"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b1fb677c-11f3-4901-a7df-3cd3f9e45b6e",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "79f94c0a-b585-4510-97da-379daea2b873",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Overview\n",
+ "\n",
+ "In this tutorial, we will cover the following topics:\n",
+ "\n",
+ "1. Performing basic arithmetic on `DataArrays` and `Datasets`\n",
+ "2. Performing aggregation (i.e., reduction) along single or multiple dimensions of a `DataArray` or `Dataset`\n",
+ "3. Computing climatologies and anomalies of data using Xarray's \"split-apply-combine\" approach, via the `.groupby()` method\n",
+ "4. Performing weighted-reduction operations along single or multiple dimensions of a `DataArray` or `Dataset`\n",
+ "5. Providing a broad overview of Xarray's data-masking capability\n",
+ "6. Using the `.where()` method to mask Xarray data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "921b094b-d556-4a9e-a1bd-7a8560d8b335",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Introduction to Xarray](xarray-intro) | Necessary | |\n",
+ "\n",
+ "\n",
+ "- **Time to learn**: 60 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f6e62752-c323-4e24-8767-0754d1816556",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0af7bee1-3de3-453a-8ae8-bcd7910b4266",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Imports\n",
+ "\n",
+ "In order to work with data and plotting, we must import NumPy, Matplotlib, and Xarray. These packages are covered in greater detail in earlier tutorials. We also import a package that allows quick download of Pythia example datasets."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "06073287-7bdb-45b5-9cec-8cdf123adb49",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "import xarray as xr\n",
+ "from pythia_datasets import DATASETS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9719db5b-e645-4815-b8df-d454fa7703e7",
+ "metadata": {},
+ "source": [
+ "## Data Setup\n",
+ "\n",
+ "The bulk of the examples in this tutorial make use of a single dataset. This dataset contains monthly sea surface temperature (SST, call 'tos' here) data, and is obtained from the Community Earth System Model v2 (CESM2). (For this tutorial, however, the dataset will be retrieved from the Pythia example data repository.) The following example illustrates the process of retrieving this Global Climate Model dataset:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7837f8bd-da89-4718-ab02-d5107576d2d6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filepath = DATASETS.fetch('CESM2_sst_data.nc')\n",
+ "ds = xr.open_dataset(filepath)\n",
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b3f4e108-f55e-4c25-a00d-99dc00ba849a",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Arithmetic Operations\n",
+ "\n",
+ "In a similar fashion to NumPy arrays, performing an arithmetic operation on a `DataArray` will automatically perform the operation on all array values; this is known as vectorization. To illustrate the process of vectorization, the following example converts the air temperature data from units of degrees Celsius to units of Kelvin:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "09542eab-998d-4b2d-807c-dccd5bd4329e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos + 273.15"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6f35c8d6-b0e6-4371-ad80-e182ffcec51b",
+ "metadata": {},
+ "source": [
+ "In addition, there are many other arithmetic operations that can be performed on `DataArrays`. In this example, we demonstrate squaring the original Celsius values of our air temperature data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "78c1ffc2-45cb-40cc-962e-c76021d9ab1c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos**2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0bebb17b-6906-4ba7-a4ff-c07a9206e790",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Aggregation Methods \n",
+ "\n",
+ "A common practice in the field of data analysis is aggregation. Aggregation is the process of reducing data through methods such as `sum()`, `mean()`, `median()`, `min()`, and `max()`, in order to gain greater insight into the nature of large datasets. In this set of examples, we demonstrate correct usage of a select group of aggregation methods:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a4d79093-f013-4821-84f8-3c223141046e",
+ "metadata": {},
+ "source": [
+ "Compute the mean:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "59b84034-7d42-4080-932f-0eefd165953d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a75e0064-4363-4328-9a79-d87475ed1c81",
+ "metadata": {},
+ "source": [
+ "Notice that we did not specify the `dim` keyword argument; this means that the function was applied over all of the dataset's dimensions. In other words, the aggregation method computed the mean of every element of the temperature dataset across every temporal and spatial data point. However, if a dimension name is used with the `dim` keyword argument, the aggregation method computes an aggregation along the given dimension. In this next example, we use aggregation to calculate the temporal mean across all spatial data; this is performed by providing the dimension name `'time'` to the `dim` keyword argument:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a49b957e-ea24-414e-a422-c40a3723fbae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos.mean(dim='time').plot(size=7);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "109f77cf-54bb-4cac-a667-2afeb2cfef9d",
+ "metadata": {},
+ "source": [
+ "There are many other combinations of aggregation methods and dimensions on which to perform these methods. In this example, we compute the temporal minimum:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ceddf519-7459-4eaf-ae0d-69b2ba135317",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos.min(dim=['time'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cb5f55c5-95bc-4fe3-a4fc-49958b6cf64c",
+ "metadata": {},
+ "source": [
+ "This example computes the spatial sum. Note that this dataset contains no altitude data; as such, the required spatial dimensions passed to the method consist only of latitude and longitude."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "487cafa8-c1cc-451a-96da-900c6ab961d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos.sum(dim=['lat', 'lon'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b613fc00-5c75-4df1-b4e4-d391de84aab2",
+ "metadata": {},
+ "source": [
+ "For the last example in this set of aggregation examples, we compute the temporal median:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e0873a32-de0e-456e-8948-e6516ddb7fd1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos.median(dim='time')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b9790835-51ae-460b-87a4-618e7760d7a7",
+ "metadata": {},
+ "source": [
+ "In addition, there are many other commonly used aggregation methods in Xarray. Some of the more popular aggregation methods are summarized in the following table:\n",
+ "\n",
+ "| Aggregation | Description |\n",
+ "|--------------------------|---------------------------------|\n",
+ "| ``count()`` | Total number of items |\n",
+ "| ``mean()``, ``median()`` | Mean and median |\n",
+ "| ``min()``, ``max()`` | Minimum and maximum |\n",
+ "| ``std()``, ``var()`` | Standard deviation and variance |\n",
+ "| ``prod()`` | Compute product of elements |\n",
+ "| ``sum()`` | Compute sum of elements |\n",
+ "| ``argmin()``, ``argmax()``| Find index of minimum and maximum value |"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8704803f-300d-4631-a2fa-f62d18726d1c",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## GroupBy: Split, Apply, Combine\n",
+ "\n",
+ "While we can obtain useful summaries of datasets using simple aggregation methods, it is more often the case that aggregation must be performed over coordinate labels or groups. In order to perform this type of aggregation, it is helpful to use the **split-apply-combine** workflow. Fortunately, Xarray provides this functionality for `DataArrays` and `Datasets` by means of the `groupby` operation. The following figure illustrates the split-apply-combine workflow in detail:\n",
+ "\n",
+ "\n",
+ "\n",
+ "Based on the above figure, you can understand the split-apply-combine process performed by `groupby`. In detail, the steps of this process are:\n",
+ "\n",
+ "- The split step involves breaking up and grouping an xarray `Dataset` or `DataArray` depending on the value of the specified group key.\n",
+ "- The apply step involves computing some function, usually an aggregate, transformation, or filtering, within the individual groups.\n",
+ "- The combine step merges the results of these operations into an output xarray `Dataset` or `DataArray`.\n",
+ "\n",
+ "In this set of examples, we will remove the seasonal cycle (also known as a climatology) from our dataset using `groupby`. There are many types of input that can be provided to `groupby`; a full list can be found in [Xarray's `groupby` user guide](https://xarray.pydata.org/en/stable/user-guide/groupby.html)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "713cc8d8-7374-4c5b-be61-aec4b5b0ffe6",
+ "metadata": {},
+ "source": [
+ "In this first example, we plot data to illustrate the annual cycle described above. We first select the grid point closest to a specific latitude-longitude point. Once we have this grid point, we can plot a temporal series of sea-surface temperature (SST) data at that location. Reviewing the generated plot, the annual cycle of the data becomes clear."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c0348ee8-6e9b-4f50-a844-375ae00d2771",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos.sel(lon=310, lat=50, method='nearest').plot();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d1505625-cbcd-495b-a15f-8824e455415b",
+ "metadata": {},
+ "source": [
+ "### Split\n",
+ "\n",
+ "The first step of the split-apply-combine process is splitting. As described above, this step involves splitting a dataset into groups, with each group matching a group key. In this example, we split the SST data using months as a group key. Therefore, there is one resulting group for January data, one for February data, etc. This code illustrates how to perform such a split:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6e4fb25e-165f-4350-a93d-46a344f2d175",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos.groupby(ds.time.dt.month)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5d176ad8-15f1-4ecc-ab3e-898cef3b4e18",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "\n",
+ "In the above code example, we are extracting components of date/time data by way of the time coordinate's `.dt` attribute. This attribute is a `DatetimeAccessor` object that contains additional attributes for units of time, such as hour, day, and year. Since we are splitting the data into monthly data, we use the `month` attribute of .dt in this example. (In addition, there exists similar functionality in Pandas; see the [official documentation](https://pandas.pydata.org/docs/reference/api/pandas.Series.dt.month.html) for details.)\n",
+ " \n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ad273652-178c-4eda-80b6-6d39a11d6f1e",
+ "metadata": {},
+ "source": [
+ "In addition, there is a more concise syntax that can be used in specific instances. This syntax can be used if the variable on which the grouping is performed is already present in the dataset. The following example illustrates this syntax; it is functionally equivalent to the syntax used in the above example."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c6990393-fb5f-4a10-b8e2-fd9c6917d9d2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos.groupby('time.month')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b85dbf7-daf1-4889-8b3b-6991d290969f",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### Apply & Combine \n",
+ "\n",
+ "Now that we have split our data into groups, the next step is to apply a calculation to the groups. There are two types of calculation that can be applied:\n",
+ "\n",
+ "- aggregation: reduces the size of the group\n",
+ "- transformation: preserves the group's full size\n",
+ "\n",
+ "After a calculation is applied to the groups, Xarray will automatically combine the groups back into a single object, completing the split-apply-combine workflow.\n",
+ "\n",
+ "\n",
+ "\n",
+ "#### Compute climatology \n",
+ "\n",
+ "\n",
+ "In this example, we use the split-apply-combine workflow to calculate the monthly climatology at every point in the dataset. Notice that we are using the `month` `DatetimeAccessor`, as described above, as well as the `.mean()` aggregation function:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7e568c2f-7143-4346-85ce-a430db03316e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tos_clim = ds.tos.groupby('time.month').mean()\n",
+ "tos_clim"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2ef90862-aeb4-45b3-87fb-e9df8f197c81",
+ "metadata": {},
+ "source": [
+ "Now that we have a `DataArray` containing the climatology data, we can plot the data in different ways. In this example, we plot the climatology at a specific latitude-longitude point:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f908c377-67fa-449c-b8d1-82ba6a14baff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tos_clim.sel(lon=310, lat=50, method='nearest').plot();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0e5dc34b-99bc-494b-9c04-ed8388ab2e6c",
+ "metadata": {},
+ "source": [
+ "In this example, we plot the zonal mean climatology:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "22c61c11-2a48-4c6c-8009-6f20e0101237",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tos_clim.mean(dim='lon').transpose().plot.contourf(levels=12, cmap='turbo');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3411ebb7-9831-4e52-ab2e-7e4e7a1356ee",
+ "metadata": {},
+ "source": [
+ "Finally, this example calculates and plots the difference between the climatology for January and the climatology for December:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "19a2808b-81f9-40e5-ab31-d63bfce85eae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "(tos_clim.sel(month=1) - tos_clim.sel(month=12)).plot(size=6, robust=True);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "266b8130-ca7d-4aec-a9a2-d7281ad64425",
+ "metadata": {},
+ "source": [
+ "#### Compute anomaly\n",
+ "\n",
+ "In this example, we compute the anomaly of the original data by removing the climatology from the data values. As shown in previous examples, the climatology is first calculated. The calculated climatology is then removed from the data using arithmetic and Xarray's `groupby` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4c9940df-5174-49bf-9117-eef1e14abec0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gb = ds.tos.groupby('time.month')\n",
+ "tos_anom = gb - gb.mean(dim='time')\n",
+ "tos_anom"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "35fc2054-df48-4ea2-8433-632ba8755c61",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tos_anom.sel(lon=310, lat=50, method='nearest').plot();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c3c087dc-966d-48a0-bb99-ca63cf20ff05",
+ "metadata": {},
+ "source": [
+ "In this example, we compute and plot our dataset's mean global anomaly over time. In order to specify global data, we must provide both `lat` and `lon` to the `mean()` method's `dim` keyword argument:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5d3abf06-a341-45ac-a3f2-76131016c0b3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unweighted_mean_global_anom = tos_anom.mean(dim=['lat', 'lon'])\n",
+ "unweighted_mean_global_anom.plot();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d9f768be-a960-4417-bb1e-9785ca9ca4ea",
+ "metadata": {},
+ "source": [
+ "
\n",
+ " \n",
+ "\n",
+ "Many geoscientific algorithms perform operations over data contained in many different grid cells. However, if the grid cells are not equivalent in size, the operation is not scientifically valid by default. Fortunately, this can be fixed by weighting the data in each grid cell by the size of the cell. Weighting data in Xarray is simple, as Xarray has a built-in weighting method, known as [`.weighted()`](https://xarray.pydata.org/en/stable/user-guide/computation.html#weighted-array-reductions).\n",
+ "\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "908bcc38-bf93-478c-99e4-8bbafeec1f21",
+ "metadata": {},
+ "source": [
+ "In this example, we again make use of the Pythia example data library to load a new CESM2 dataset. Contained in this dataset are weights corresponding to the grid cells in our anomaly data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a5878de6-f3ab-43e0-8f0d-12ab51631450",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filepath2 = DATASETS.fetch('CESM2_grid_variables.nc')\n",
+ "areacello = xr.open_dataset(filepath2).areacello\n",
+ "areacello"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8a73a748-46b4-4350-b167-32725eebaec8",
+ "metadata": {},
+ "source": [
+ "In a similar fashion to a previous example, this example calculates mean global anomaly. However, this example makes use of the `.weighted()` method and the newly loaded CESM2 dataset to weight the grid cell data as described above:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8f7e3a5-0748-4395-95b0-0e31d0a5d4d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "weighted_mean_global_anom = tos_anom.weighted(areacello).mean(dim=['lat', 'lon'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "17da2e3a-3ca6-41f4-892e-b26021c492e6",
+ "metadata": {},
+ "source": [
+ "This example plots both unweighted and weighted mean data, which illustrates the degree of scientific error with unweighted data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "802c5e99-7223-49b5-a867-91d943075d52",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unweighted_mean_global_anom.plot(size=7)\n",
+ "weighted_mean_global_anom.plot()\n",
+ "plt.legend(['unweighted', 'weighted']);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3045c67e-21cd-4ef9-a49f-e12ae7db23cf",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Other high level computation functionality\n",
+ "\n",
+ "- `resample`: [This method behaves similarly to groupby, but is specialized for time dimensions, and can perform temporal upsampling and downsampling.](https://xarray.pydata.org/en/stable/user-guide/time-series.html#resampling-and-grouped-operations)\n",
+ "- `rolling`: [This method is used to compute aggregation functions, such as `mean`, on moving windows of data in a dataset.](https://xarray.pydata.org/en/stable/user-guide/computation.html#rolling-window-operations)\n",
+ "- `coarsen`: [This method provides generic functionality for performing downsampling operations on various types of data.](https://xarray.pydata.org/en/stable/user-guide/computation.html#coarsen-large-arrays)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eaf4dc7d-dfac-419e-a875-fc0c70fcd08c",
+ "metadata": {},
+ "source": [
+ "This example illustrates the resampling of a dataset's time dimension to annual frequency:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a9cfb76b-c4ab-441e-a474-c66b7af944ad",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "r = ds.tos.resample(time='AS')\n",
+ "r"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6927f5be-d313-4d03-bab8-d22b3cb13899",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "r.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32370e4f-a1c4-4163-a926-b9e3a8d6d5c2",
+ "metadata": {},
+ "source": [
+ "This example illustrates using the `rolling` method to compute averages in a moving window of 5 months of data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "342acbf1-4eee-4d0d-bb52-b394ffcd556d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "m_avg = ds.tos.rolling(time=5, center=True).mean()\n",
+ "m_avg"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0eb0cc4e-661a-4ab1-96ad-e096917ef104",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lat = 50\n",
+ "lon = 310\n",
+ "\n",
+ "m_avg.isel(lat=lat, lon=lon).plot(size=6)\n",
+ "ds.tos.isel(lat=lat, lon=lon).plot()\n",
+ "plt.legend(['5-month moving average', 'monthly data']);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "da76db37-e833-42c5-a740-5dcf0877b43c",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Masking Data\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8a657ca8-fa2d-409c-9aaf-580828671018",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "Masking of data can be performed in Xarray by providing single or multiple conditions to either Xarray's `.where()` method or a `Dataset` or `DataArray`'s `.where()` method. Data values matching the condition(s) are converted into a single example value, effectively masking them from the scientifically important data. In the following set of examples, we use the `.where()` method to mask various data values in the `tos` `DataArray`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "59a59bef-b08c-4e0f-a48a-894565a962e7",
+ "metadata": {},
+ "source": [
+ "For reference, we will first print our entire sea-surface temperature (SST) dataset:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "afff083b-6c0b-4756-b1be-716a443d98a2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "386c7a1b-1a47-4f52-a42d-27fa997427d3",
+ "metadata": {},
+ "source": [
+ "### Using `where` with one condition"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a8d48b6b-a40e-469f-861f-83d943d70f03",
+ "metadata": {},
+ "source": [
+ "In this set of examples, we are trying to analyze data at the last temporal value in the dataset. This first example illustrates the use of `.isel()` to perform this analysis:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ac3e42eb-1852-4580-9c52-e7237135ed01",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sample = ds.tos.isel(time=-1)\n",
+ "sample"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ccdd1fa6-93fd-490d-8b05-c222ddcf953a",
+ "metadata": {},
+ "source": [
+ "As shown in the previous example, methods like `.isel()` and `.sel()` return data of a different shape than the original data provided to them. However, `.where()` preserves the shape of the original data by masking the values with a Boolean condition. Data values for which the condition is `True` are returned identical to the values passed in. On the other hand, data values for which the condition is `False` are returned as a preset example value. (This example value defaults to `nan`, but can be set to other values as well.)\n",
+ "\n",
+ "Before testing `.where()`, it is helpful to look at the [official documentation](http://xarray.pydata.org/en/stable/generated/xarray.DataArray.where.html). As stated above, the `.where()` method takes a Boolean condition. (Boolean conditions use operators such as less-than, greater-than, and equal-to, and return a value of `True` or `False`.) Most uses of `.where()` check whether or not specific data values are less than or greater than a constant value. As stated in the documentation, the data values specified in the Boolean condition of `.where()` can be any of the following:\n",
+ "\n",
+ "- a `DataArray`\n",
+ "- a `Dataset`\n",
+ "- a function\n",
+ "\n",
+ "In the following example, we make use of `.where()` to mask data with temperature values greater than `0`. Therefore, values greater than `0` are set to `nan`, as described above. (It is important to note that the Boolean condition matches values to keep, not values to mask out.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "61abc5b3-aadf-4a96-98a2-c9c36094a863",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "masked_sample = sample.where(sample < 0.0)\n",
+ "masked_sample"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09aeeee1-3924-4ccd-9b69-1be396c496b9",
+ "metadata": {},
+ "source": [
+ "In this example, we use Matplotlib to plot the original, unmasked data, as well as the masked data created in the previous example."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "91457518-fc38-42e2-8b96-c5786e36f33f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig, axes = plt.subplots(ncols=2, figsize=(19, 6))\n",
+ "sample.plot(ax=axes[0])\n",
+ "masked_sample.plot(ax=axes[1]);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4dd6b000-b079-461c-9a0e-8fd2bced814b",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### Using `where` with multiple conditions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "538bd497-3059-4f6e-9c48-5104958f8528",
+ "metadata": {},
+ "source": [
+ "Those familiar with Boolean conditions know that such conditions can be combined by using logical operators. In the case of `.where()`, the relevant logical operators are bitwise or exclusive `'and'` (represented by the `&` symbol) and bitwise or exclusive 'or' (represented by the `|` symbol). This allows multiple masking conditions to be specified in a single use of `.where()`; however, be aware that if multiple conditions are specified in this way, each simple Boolean condition must be enclosed in parentheses. (If you are not familiar with Boolean conditions, or this section is confusing in any way, please review a detailed Boolean expression guide before continuing with the tutorial.) In this example, we provide multiple conditions to `.where()` using a more complex Boolean condition. This allows us to mask locations with temperature values less than 25, as well as locations with temperature values greater than 30. (As stated above, the Boolean condition matches values to keep, and everything else is masked out. Because we are now using more complex Boolean conditions, understanding the following example may be difficult. Please review a Boolean condition guide if needed.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c9bf1c46-e7ed-43c1-8a45-e03d22295da1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sample.where((sample > 25) & (sample < 30)).plot(size=6);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d1796fc8-039b-4c40-a6f4-b3a00c130770",
+ "metadata": {},
+ "source": [
+ "In addition to using `DataArrays` and `Datasets` in Boolean conditions provided to `.where()`, we can also use coordinate variables. In the following example, we make use of Boolean conditions containing `latitude` and `longitude` coordinates. This greatly simplifies the masking of regions outside of the [Niño 3.4 region](https://www.ncdc.noaa.gov/teleconnections/enso/indicators/sst/):\n",
+ "\n",
+ "![](https://www.ncdc.noaa.gov/monitoring-content/teleconnections/nino-regions.gif)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "090f1997-8dea-4eed-aa55-ab3a180ecdd5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sample.where(\n",
+ " (sample.lat < 5) & (sample.lat > -5) & (sample.lon > 190) & (sample.lon < 240)\n",
+ ").plot(size=6);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "47ebbbff-2409-43e1-90e9-2cd4c9777bdc",
+ "metadata": {},
+ "source": [
+ "### Using `where` with a custom fill value"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a1b76d95-f8b3-44ef-a7a5-c2028daaf500",
+ "metadata": {},
+ "source": [
+ "In the previous examples that make use of `.where()`, the masked data values are set to `nan`. However, this behavior can be modified by providing a second value, in numeric form, to `.where()`; if this numeric value is provided, it will be used instead of `nan` for masked data values. In this example, masked data values are set to `0` by providing a second value of `0` to the `.where()` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aae12476-4802-42a2-993b-29da6c383535",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sample.where((sample > 25) & (sample < 30), 0).plot(size=6);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5fad83ca-faf6-44c5-8d05-173b425118e1",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d2cb7356-714b-45cd-b109-d0b6965b6a0c",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Summary \n",
+ "\n",
+ "- In a similar manner to NumPy arrays, performing arithmetic on a `DataArray` affects all values simultaneously.\n",
+ "- Xarray allows for simple data aggregation, over single or multiple dimensions, by way of built-in methods such as `sum()` and `mean()`.\n",
+ "- Xarray supports the useful split-apply-combine workflow through the `groupby` method.\n",
+ "- Xarray allows replacing (masking) of data matching specific Boolean conditions by means of the `.where()` method.\n",
+ "\n",
+ "### What's next?\n",
+ "\n",
+ "The next tutorial illustrates the use of previously covered Xarray concepts in a geoscientifically relevant example: plotting the [Niño 3.4 Index](https://climatedataguide.ucar.edu/climate-data/nino-sst-indices-nino-12-3-34-4-oni-and-tni)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "374de3a3-807a-47be-9014-e1af98909456",
+ "metadata": {},
+ "source": [
+ "## Resources and References\n",
+ "\n",
+ "- `groupby`: [Useful for binning/grouping data and applying reductions and/or transformations on those groups](https://xarray.pydata.org/en/stable/user-guide/groupby.html)\n",
+ "- `resample`: [Functionality similar to groupby, specialized for time dimensions. Can be used for temporal upsampling and downsampling](https://xarray.pydata.org/en/stable/user-guide/time-series.html#resampling-and-grouped-operations)\n",
+ "- `rolling`: [Useful for computing aggregations on moving windows of your dataset, e.g., computing moving averages](https://xarray.pydata.org/en/stable/user-guide/computation.html#rolling-window-operations)\n",
+ "- `coarsen`: [Generic functionality for downsampling data](https://xarray.pydata.org/en/stable/user-guide/computation.html#coarsen-large-arrays)\n",
+ "\n",
+ "- `weighted`: [Useful for weighting data before applying reductions](https://xarray.pydata.org/en/stable/user-guide/computation.html#weighted-array-reductions)\n",
+ "\n",
+ "- [More xarray tutorials and videos](https://xarray.pydata.org/en/stable/tutorials-and-videos.html)\n",
+ "- [Xarray Documentation - Masking with `where()`](https://xarray.pydata.org/en/stable/user-guide/indexing.html#masking-with-where)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/_preview/468/_sources/core/xarray/dask-arrays-xarray.ipynb b/_preview/468/_sources/core/xarray/dask-arrays-xarray.ipynb
new file mode 100644
index 000000000..4f8e4996f
--- /dev/null
+++ b/_preview/468/_sources/core/xarray/dask-arrays-xarray.ipynb
@@ -0,0 +1,712 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d59e6a58-b50e-4015-bbd8-b48608d44b26",
+ "metadata": {},
+ "source": [
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "013dde55-1cea-4fd8-b980-0fa06bdd5568",
+ "metadata": {},
+ "source": [
+ "# Dask Arrays with Xarray\n",
+ "\n",
+ "The scientific Python package known as Dask provides Dask Arrays: parallel, larger-than-memory, n-dimensional arrays that make use of blocked algorithms. They are analogous to Numpy arrays, but are distributed. These terms are defined below:\n",
+ "\n",
+ "* **Parallel** code uses many or all of the cores on the computer running the code.\n",
+ "* **Larger-than-memory** refers to algorithms that break up data arrays into small pieces, operate on these pieces in an optimized fashion, and stream data from a storage device. This allows a user or programmer to work with datasets of a size larger than the available memory.\n",
+ "* A **blocked algorithm** speeds up large computations by converting them into a series of smaller computations.\n",
+ "\n",
+ "In this tutorial, we cover the use of Xarray to wrap Dask arrays. By using Dask arrays instead of Numpy arrays in Xarray data objects, it becomes possible to execute analysis code in parallel with much less code and effort.\n",
+ "\n",
+ "\n",
+ "## Learning Objectives\n",
+ "\n",
+ "- Learn the distinction between *eager* and *lazy* execution, and performing both types of execution with Xarray\n",
+ "- Understand key features of Dask Arrays\n",
+ "- Learn to perform operations with Dask Arrays in similar ways to performing operations with NumPy arrays\n",
+ "- Understand the use of Xarray `DataArrays` and `Datasets` as \"Dask collections\", and the use of top-level Dask functions such as `dask.visualize()` on such collections\n",
+ "- Understand the ability to use Dask transparently in all built-in Xarray operations\n",
+ "\n",
+ "## Prerequisites\n",
+ "\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Introduction to NumPy](../numpy/numpy-basics) | Necessary | Familiarity with Data Arrays |\n",
+ "| [Introduction to Xarray](xarray-intro) | Necessary | Familiarity with Xarray Data Structures |\n",
+ "\n",
+ "\n",
+ "- **Time to learn**: *30-40 minutes*\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "027ccc87-420b-45dd-830a-38766dd6248f",
+ "metadata": {},
+ "source": [
+ "## Imports\n",
+ "\n",
+ "For this tutorial, as we are working with Dask, there are a number of Dask packages that must be imported. Also, this is technically an Xarray tutorial, so Xarray and NumPy must also be imported. Finally, the Pythia datasets package is imported, allowing access to the Project Pythia example data library."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8aa666b9-4af2-41b5-8e77-e28f9cecddd1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import dask\n",
+ "import dask.array as da\n",
+ "import numpy as np\n",
+ "import xarray as xr\n",
+ "from dask.diagnostics import ProgressBar\n",
+ "from dask.utils import format_bytes\n",
+ "from pythia_datasets import DATASETS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "259bdc60-6e96-4258-8d71-e733ce2d9aca",
+ "metadata": {},
+ "source": [
+ "## Blocked algorithms\n",
+ "\n",
+ "As described above, the definition of \"blocked algorithm\" is an algorithm that replaces a large operation with many small operations. In the case of datasets, this means that a blocked algorithm separates a dataset into chunks, and performs an operation on each.\n",
+ "\n",
+ "As an example of how blocked algorithms work, consider a dataset containing a billion numbers, and assume that the sum of the numbers is needed. Using a non-blocked algorithm, all of the numbers are added in one operation, which is extremely inefficient. However, by using a blocked algorithm, the dataset is broken into chunks. (For the purposes of this example, assume that 1,000 chunks are created, with 1,000,000 numbers each.) The sum of the numbers in each chunk is taken, most likely in parallel, and then each of those sums are summed to obtain the final result.\n",
+ "\n",
+ "By using blocked algorithms, we achieve the result, in this case one sum of one billion numbers, through the results of many smaller operations, in this case one thousand sums of one million numbers each. (Also note that each of the one thousand sums must then be summed, making the total number of sums 1,001.) This allows for a much greater degree of parallelism, potentially speeding up the code execution dramatically."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "53b69958-355f-4121-a644-227adc1b14ef",
+ "metadata": {},
+ "source": [
+ "### `dask.array` contains these algorithms\n",
+ "\n",
+ "The main object type used in Dask is `dask.array`, which implements a subset of the `ndarray` (NumPy array) interface. However, unlike `ndarray`, `dask.array` uses blocked algorithms, which break up the array into smaller arrays, as described above. This allows for the execution of computations on arrays larger than memory, by using parallelism to divide the computation among multiple cores. Dask manages and coordinates blocked algorithms for any given computation by using Dask graphs, which lay out in detail the steps Dask takes to solve a problem. In addition, `dask.array` objects, known as Dask Arrays, are **lazy**; in other words, any computation performed on them is delayed until a specific method is called."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1aef9368-240b-423b-a3c7-0ab7baa8fa13",
+ "metadata": {},
+ "source": [
+ "### Create a `dask.array` object\n",
+ "\n",
+ "As stated earlier, Dask Arrays are loosely based on NumPy arrays. In the next set of examples, we illustrate the main differences between Dask Arrays and NumPy arrays. In order to illustrate the differences, we must have both a Dask Array object and a NumPy array object. Therefore, this first example creates a 3-D NumPy array of random data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "690bc749-976e-4b78-a801-8d01ee363ad7",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "shape = (600, 200, 200)\n",
+ "arr = np.random.random(shape)\n",
+ "arr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c36032d1-0fb6-43d2-a188-87e8e00bd5a4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "format_bytes(arr.nbytes)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8e7e54dc-342c-4d31-902a-ece54a813e7e",
+ "metadata": {},
+ "source": [
+ "As shown above, this NumPy array contains about 183 MB of data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "905dffb1-0879-4842-8b76-2538592f6156",
+ "metadata": {},
+ "source": [
+ "As stated above, we must also create a Dask Array. This next example creates a Dask Array with the same dimension sizes as the existing NumPy array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "be84728a-7953-4561-aa7d-326f4a45e3aa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "darr = da.random.random(shape, chunks=(300, 100, 200))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "632e9f08-3142-46d7-b457-e9bde0d1dce9",
+ "metadata": {},
+ "source": [
+ "By specifying values to the `chunks` keyword argument, we can specify the array pieces that Dask's blocked algorithms break the array into; in this case, we specify `(300, 100, 200)`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ebbafe88-bb79-436c-aa3b-a9c5f31ff1ec",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Specifying Chunks
\n",
+ " In this tutorial, we specify Dask Array chunks in a block shape. However, there are many additional ways to specify chunks; see this documentation for more details.\n",
+ "\n",
+ "
\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ed467b75-9629-4fbc-a235-866459e4b881",
+ "metadata": {},
+ "source": [
+ "If you are viewing this page as a Jupyter Notebook, the next Jupyter cell will produce a rich information graphic giving in-depth details about the array and each individual chunk."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "42c13417-a5a2-4fd2-8610-a3cd70f4b93a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "darr"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "75988622-8ec1-45b0-a069-29ca74f53836",
+ "metadata": {},
+ "source": [
+ "The above graphic contains a symbolic representation of the array, including `shape`, `dtype`, and `chunksize`. (Your view may be different, depending on how you are accessing this page.) Notice that there is no data shown for this array; this is because Dask Arrays are lazy, as described above. Before we call a compute method for this array, we first illustrate the structure of a Dask graph. In this example, we show the Dask graph by calling `.visualize()` on the array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "44a7a6e4-bcfc-40c1-a095-8fa315bfef4e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "darr.visualize()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "37f4038c-f2b2-40c3-91cd-af2713dd23df",
+ "metadata": {},
+ "source": [
+ "As shown in the above Dask graph, our array has four chunks, each one created by a call to NumPy's \"random\" method (`np.random.random`). These chunks are concatenated into a single array after the calculation is performed."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c5b29134-62af-4a0b-8e44-9f99b5072b45",
+ "metadata": {},
+ "source": [
+ "### Manipulate a `dask.array` object as you would a numpy array\n",
+ "\n",
+ "\n",
+ "We can perform computations on the Dask Array created above in a similar fashion to NumPy arrays. These computations include arithmetic, slicing, and reductions, among others.\n",
+ "\n",
+ "Although the code for performing these computations is similar between NumPy arrays and Dask Arrays, the process by which they are performed is quite different. For example, it is possible to call `sum()` on both a NumPy array and a Dask Array; however, these two `sum()` calls are definitely not the same, as shown below.\n",
+ "\n",
+ "#### What's the difference?\n",
+ "\n",
+ "When `sum()` is called on a Dask Array, the computation is not performed; instead, an expression of the computation is built. The `sum()` computation, as well as any other computation methods called on the same Dask Array, are not performed until a specific method (known as a compute method) is called on the array. (This is known as **lazy execution**.) On the other hand, calling `sum()` on a NumPy array performs the calculation immediately; this is known as **eager execution**.\n",
+ "\n",
+ "#### Why the difference?\n",
+ "\n",
+ "As described earlier, a Dask Array is divided into chunks. Any computations run on the Dask Array run on each chunk individually. If the result of the computation is obtained before the computation runs through all of the chunks, Dask can stop the computation to save CPU time and memory resources.\n",
+ "\n",
+ "This example illustrates calling `sum()` on a Dask Array; it also includes a demonstration of lazy execution, as well as another Dask graph display:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cf14f2f0-a66e-4578-8a8d-f0c7701beb9c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "total = darr.sum()\n",
+ "total"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "36724be4-63ec-4b1c-9a9e-1a605a12a5a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "total.visualize()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09186a60-51fc-49c1-91ac-3614af0202cc",
+ "metadata": {},
+ "source": [
+ "#### Compute the result\n",
+ "\n",
+ "As described above, Dask Array objects make use of lazy execution. Therefore, operations performed on a Dask Array wait to execute until a compute method is called. As more operations are queued in this way, the Dask Array's Dask graph increases in complexity, reflecting the steps Dask will take to perform all of the queued operations. \n",
+ "\n",
+ "In this example, we call a compute method, simply called `.compute()`, to run on the Dask Array all of the stored computations:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "36aff31f-2f06-4703-a2a6-cf692ab5eed3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "total.compute()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "af09c610-0e66-4ce8-a53a-fdd50471271d",
+ "metadata": {},
+ "source": [
+ "### Exercise with `dask.arrays`\n",
+ "\n",
+ "In this section of the page, the examples are hands-on exercises pertaining to Dask Arrays. If these exercises are not interesting to you, this section can be used strictly as examples regardless of how the page is viewed. However, if you wish to participate in the exercises, make sure that you are viewing this page as a Jupyter Notebook.\n",
+ "\n",
+ "For the first exercise, modify the chunk size or shape of the Dask Array created earlier. Call `.sum()` on the modified Dask Array, and visualize the Dask graph to view the changes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "11922e84-f13e-4db6-a8a0-cf75a5727cfb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "da.random.random(shape, chunks=(50, 200, 400)).sum().visualize()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b275b5cc-51a6-48ff-a0a4-62bdc43e6530",
+ "metadata": {},
+ "source": [
+ "As is obvious from the above exercise, Dask quickly and easily determines a strategy for performing the operations, in this case a sum. This illustrates the appeal of Dask: automatic algorithm generation that scales from simple arithmetic problems to highly complex scientific equations with large datasets and multiple operations."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7a7dcaaa-6a6e-4f58-aa80-2890136158fd",
+ "metadata": {},
+ "source": [
+ "In this next set of examples, we demonstrate that increasing the complexity of the operations performed also increases the complexity of the Dask graph.\n",
+ "\n",
+ "In this example, we use randomly selected functions, arguments and Python slices to create a complex set of operations. We then visualize the Dask graph to illustrate the increased complexity:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e74a6817-8d06-4dd1-afec-98c53a8ae52a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "z = darr.dot(darr.T).mean(axis=0)[::2, :].std(axis=1)\n",
+ "z"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a4ccffad-5bda-4108-a6c8-6628510f8363",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "z.visualize()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "35c59c2f-3e5c-443b-908f-4f14535d2802",
+ "metadata": {},
+ "source": [
+ "### Testing a bigger calculation\n",
+ "\n",
+ "While the earlier examples in this tutorial described well the basics of Dask, the size of the data in those examples, about 180 MB, is far too small for an actual use of Dask.\n",
+ "\n",
+ "In this example, we create a much larger array, more indicative of data actually used in Dask:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "482fb0fe-87d4-46fa-bb9d-ed38cc71d834",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "darr = da.random.random((4000, 100, 4000), chunks=(1000, 100, 500)).astype('float32')\n",
+ "darr"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6c0f8b73-41c1-49fb-9fa4-97cb10ae6f4d",
+ "metadata": {},
+ "source": [
+ "The dataset created in the previous example is much larger, approximately 6 GB. Depending on how many programs are running on your computer, this may be greater than the amount of free RAM on your computer. However, as Dask is larger-than-memory, the amount of free RAM does not impede Dask's ability to work on this dataset.\n",
+ "\n",
+ "In this example, we again perform randomly selected operations, but this time on the much larger dataset. We also visualize the Dask graph, and then run the compute method. However, as computing complex functions on large datasets is inherently time-consuming, we show a progress bar to track the progress of the computation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "51e9addb-cc13-46f5-b542-827f8bdd94b5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "z = (darr + darr.T)[::2, :].mean(axis=2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c44ed57c-2a31-4df1-897b-02b614279755",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "z.visualize()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f5bf25c1-7384-4953-bbb8-be0c3c4e02e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with ProgressBar():\n",
+ " computed_ds = z.compute()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "116996d8-cc8a-4201-94d9-8152f4b6aa42",
+ "metadata": {},
+ "source": [
+ "## Dask Arrays with Xarray\n",
+ "\n",
+ "While directly interacting with Dask Arrays can be useful on occasion, more often than not Dask Arrays are interacted with through [Xarray](http://xarray.pydata.org/en/stable/\n",
+ "). Since Xarray wraps NumPy arrays, and Dask Arrays contain most of the functionality of NumPy arrays, Xarray can also wrap Dask Arrays, allowing anyone with knowledge of Xarray to easily start using the Dask interface."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "57d12474-e71a-400f-b103-824f0de7288b",
+ "metadata": {},
+ "source": [
+ "### Reading data with `Dask` and `Xarray`\n",
+ "\n",
+ "As demonstrated in previous examples, a Dask Array consists of many smaller arrays, called chunks:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d6e932e0-ff01-412b-9629-1fb5590ffb0a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "darr"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4d7e0c25-3baa-45aa-bb4d-940480b2fbe9",
+ "metadata": {},
+ "source": [
+ "As shown in the following example, to read data into Xarray as Dask Arrays, simply specify the `chunks` keyword argument when calling the `open_dataset()` function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3396e4e6-911b-4c40-a3f8-cdccf034a4ee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds = xr.open_dataset(DATASETS.fetch('CESM2_sst_data.nc'), chunks={})\n",
+ "ds.tos"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f7333413-63ba-41f6-bf6c-8f2046402931",
+ "metadata": {},
+ "source": [
+ "While it is a valid operation to pass an empty list to the `chunks` keyword argument, this technique does not specify how to chunk the data, and therefore the resulting Dask Array contains only one chunk.\n",
+ "\n",
+ "Correct usage of the `chunks` keyword argument specifies how many values in each dimension are contained in a single chunk. In this example, specifying the chunks keyword argument as `chunks={'time':90}` indicates to Xarray and Dask that 90 time slices are allocated to each chunk on the temporal axis.\n",
+ "\n",
+ "Since this dataset contains 180 total time slices, the data variable `tos` (holding the sea surface temperature data) is now split into two chunks in the temporal dimension."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4e7ac6b2-500f-4371-98ca-dc28dfe27648",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds = xr.open_dataset(\n",
+ " DATASETS.fetch('CESM2_sst_data.nc'),\n",
+ " engine=\"netcdf4\",\n",
+ " chunks={\"time\": 90, \"lat\": 180, \"lon\": 360},\n",
+ ")\n",
+ "ds.tos"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8e175e4d-9950-48fb-b81c-4e67fa00b106",
+ "metadata": {},
+ "source": [
+ "It is fairly straightforward to retrieve a list of the chunks and their sizes for each dimension; simply call the `.chunks` method on an Xarray `DataArray`. In this example, we show that the `tos` `DataArray` now contains two chunks on the `time` dimension, with each chunk containing 90 time slices."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4265a298-275c-4f93-992f-6fe8de9a311a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.tos.chunks"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "54853249-beec-4d74-8881-82d9d253c3b7",
+ "metadata": {},
+ "source": [
+ "### Xarray data structures are first-class dask collections\n",
+ "\n",
+ "If an Xarray `Dataset` or `DataArray` object uses a Dask Array, rather than a NumPy array, it counts as a first-class Dask collection. This means that you can pass such an object to `dask.visualize()` and `dask.compute()`, in the same way as an individual Dask Array.\n",
+ "\n",
+ "In this example, we call `dask.visualize` on our Xarray `DataArray`, displaying a Dask graph for the `DataArray` object:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "333de4bb-db42-42e8-95f9-5db108962ae7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dask.visualize(ds)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d827f1b1-7d13-4707-93c2-45c2f99a7e60",
+ "metadata": {},
+ "source": [
+ "### Parallel and lazy computation using `dask.array` with Xarray\n",
+ "\n",
+ "\n",
+ "As described above, Xarray `Datasets` and `DataArrays` containing Dask Arrays are first-class Dask collections. Therefore, computations performed on such objects are deferred until a compute method is called. (This is the definition of lazy computation.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a6cc7961-d43e-4dca-84f2-d3f82631e1f0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "z = ds.tos.mean(['lat', 'lon']).dot(ds.tos.T)\n",
+ "z"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "764537d7-b8e2-4f92-9b5e-cf81a1d8baa7",
+ "metadata": {},
+ "source": [
+ "As shown in the above example, the result of the applied operations is an Xarray `DataArray` that contains a Dask Array, an identical object type to the object that the operations were performed on. This is true for any operations that can be applied to Xarray `DataArrays`, including subsetting operations; this next example illustrates this:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "631fb768-f900-448a-a605-a93bea26bdc8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "z.isel(lat=0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "34679cf2-4907-44d7-b386-0c3cbc7ce6d2",
+ "metadata": {},
+ "source": [
+ "Because the data subset created above is also a first-class Dask collection, we can view its Dask graph using the `dask.visualize()` function, as shown in this example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7ebf0c9e-ac35-4841-9294-2d9e1b6dbc24",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dask.visualize(z)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "56bf9674-57f2-4b49-a5d5-95a4eafac587",
+ "metadata": {},
+ "source": [
+ "Since this object is a first-class Dask collection, the computations performed on it have been deferred. To run these computations, we must call a compute method, in this case `.compute()`. This example also uses a progress bar to track the computation progress."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "30082ac8-694a-4a48-bf03-840720aaa9b7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with ProgressBar():\n",
+ " computed_ds = z.compute()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7cf5002f-ed03-4318-935a-b5ce6e57434e",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f58e6da5-1492-4778-8821-aa03721e3db4",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "\n",
+ "This tutorial covered the use of Xarray to access Dask Arrays, and the use of the `chunks` keyword argument to open datasets with Dask data instead of NumPy data. Another important concept introduced in this tutorial is the usage of Xarray `Datasets` and `DataArrays` as Dask collections, allowing Xarray objects to be manipulated in a similar manner to Dask Arrays. Finally, the concepts of larger-than-memory datasets, lazy computation, and parallel computation, and how they relate to Xarray and Dask, were covered.\n",
+ "\n",
+ "### Dask Shortcomings\n",
+ "\n",
+ "Although Dask Arrays and NumPy arrays are generally interchangeable, NumPy offers some functionality that is lacking in Dask Arrays. The usage of Dask Array comes with the following relevant issues:\n",
+ "\n",
+ "1. Operations where the resulting shape depends on the array values can produce erratic behavior, or fail altogether, when used on a Dask Array. If the operation succeeds, the resulting Dask Array will have unknown chunk sizes, which can cause other sections of code to fail.\n",
+ "2. Operations that are by nature difficult to parallelize or less useful on very large datasets, such as `sort`, are not included in the Dask Array interface. Some of these operations have supported versions that are inherently more intuitive to parallelize, such as [`topk`](https://pytorch.org/docs/stable/generated/torch.topk.html).\n",
+ "3. Development of new Dask functionality is only initiated when such functionality is required; therefore, some lesser-used NumPy functions, such as `np.sometrue`, are not yet implemented in Dask. However, many of these functions can be added as community contributions, or have already been added in this manner.\n",
+ "\n",
+ "## Learn More\n",
+ "\n",
+ "For more in-depth information on Dask Arrays, visit the [official documentation page](https://docs.dask.org/en/latest/array.html). In addition, [this screencast](https://youtu.be/9h_61hXCDuI) reinforces the concepts covered in this tutorial. (If you are viewing this page as a Jupyter Notebook, the screencast will appear below as an embedded YouTube video.)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21fbe02b-6bee-447b-bbc8-2ba8a0b96c87",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from IPython.display import YouTubeVideo\n",
+ "\n",
+ "YouTubeVideo(id=\"9h_61hXCDuI\", width=600, height=300)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c282d878-a11f-41a2-9737-caee406ad5c3",
+ "metadata": {},
+ "source": [
+ "## Resources and references\n",
+ "\n",
+ "* To find specific reference information about Dask and Xarray, see the official documentation pages listed below:\n",
+ " * [Dask Docs](https://dask.org/)\n",
+ " * [Dask Examples](https://examples.dask.org/)\n",
+ " * [Dask Code](https://github.com/dask/dask/)\n",
+ " * [Dask Blog](https://blog.dask.org/)\n",
+ " \n",
+ " * [Xarray Docs](https://xarray.pydata.org/)\n",
+ " \n",
+ "* If you require assistance with a specific issue involving Xarray or Dask, the following resources may be of use:\n",
+ " * Dask tag on StackOverflow, for usage questions\n",
+ " * [github discussions: dask](https://github.com/dask/dask/discussions) for general, non-bug, discussion, and usage questions\n",
+ " * [github issues: dask](https://github.com/dask/dask/issues/new) for bug reports and feature requests\n",
+ " * [github discussions: xarray](https://github.com/pydata/xarray/discussions) for general, non-bug, discussion, and usage questions\n",
+ " * [github issues: xarray](https://github.com/pydata/xarray/issues/new) for bug reports and feature requests\n",
+ " \n",
+ "* Certain sections of this tutorial are adapted from the following existing tutorials:\n",
+ " * [Dask Array Tutorial](https://tutorial.dask.org/02_array.html)\n",
+ " * [Parallel Computing with Xarray and Dask](https://tutorial.xarray.dev/intermediate/xarray_and_dask.html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d7013ef8-33c4-4bd8-8acc-63cce238acb2",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/_preview/468/_sources/core/xarray/enso-xarray.ipynb b/_preview/468/_sources/core/xarray/enso-xarray.ipynb
new file mode 100644
index 000000000..a419fd5f3
--- /dev/null
+++ b/_preview/468/_sources/core/xarray/enso-xarray.ipynb
@@ -0,0 +1,405 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "# Calculating ENSO with Xarray\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "\n",
+ "In this tutorial, we perform and demonstrate the following tasks:\n",
+ "\n",
+ "1. Load SST data from the CESM2 model\n",
+ "2. Mask data using `.where()`\n",
+ "3. Compute climatologies and anomalies using `.groupby()`\n",
+ "4. Use `.rolling()` to compute moving average\n",
+ "5. Compute, normalize, and plot the Niño 3.4 Index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Introduction to Xarray](xarray-intro) | Necessary | |\n",
+ "| [Computation and Masking](computation-masking) | Necessary | |\n",
+ "\n",
+ "\n",
+ "\n",
+ "- **Time to learn**: 20 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Imports \n",
+ "\n",
+ "For this tutorial, we import several Python packages. As plotting ENSO data requires a geographically accurate map, Cartopy is imported to handle geographic features and map projections. Xarray is used to manage raw data, and Matplotlib allows for feature-rich data plotting. Finally, a custom Pythia package is imported, in this case allowing access to the Pythia example data library."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import cartopy.crs as ccrs\n",
+ "import matplotlib.pyplot as plt\n",
+ "import xarray as xr\n",
+ "from pythia_datasets import DATASETS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## The Niño 3.4 Index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "In this tutorial, we combine topics covered in previous Xarray tutorials to demonstrate a real-world example. The real-world scenario demonstrated in this tutorial is the computation of the [Niño 3.4 Index](https://climatedataguide.ucar.edu/climate-data/nino-sst-indices-nino-12-3-34-4-oni-and-tni), as shown in the CESM2 submission for the [CMIP6 project](https://esgf-node.llnl.gov/projects/cmip6/). A rough definition of Niño 3.4, in addition to a definition of Niño data computation, is listed below:\n",
+ "\n",
+ "> Niño 3.4 (5N-5S, 170W-120W): The Niño 3.4 anomalies may be thought of as representing the average equatorial SSTs across the Pacific from about the dateline to the South American coast. The Niño 3.4 index typically uses a 5-month running mean, and El Niño or La Niña events are defined when the Niño 3.4 SSTs exceed +/- 0.4C for a period of six months or more.\n",
+ "\n",
+ "> Niño X Index computation: a) Compute area averaged total SST from Niño X region; b) Compute monthly climatology (e.g., 1950-1979) for area averaged total SST from Niño X region, and subtract climatology from area averaged total SST time series to obtain anomalies; c) Smooth the anomalies with a 5-month running mean; d) Normalize the smoothed values by its standard deviation over the climatological period.\n",
+ "\n",
+ "![](https://www.ncdc.noaa.gov/monitoring-content/teleconnections/nino-regions.gif)\n",
+ "\n",
+ "The overall goal of this tutorial is to produce a plot of ENSO data using Xarray; this plot will resemble the Oceanic Niño Index plot shown below.\n",
+ "\n",
+ "![ONI index plot from NCAR Climate Data Guide](https://climatedataguide.ucar.edu/sites/default/files/styles/extra_large/public/2022-03/indices_oni_2_2_lg.png)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "In this first example, we begin by opening datasets containing the sea-surface temperature (SST) and grid-cell size data. (These datasets are taken from the Pythia example data library, using the Pythia package imported above.) The two datasets are then combined into a single dataset using Xarray's `merge` method."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filepath = DATASETS.fetch('CESM2_sst_data.nc')\n",
+ "data = xr.open_dataset(filepath)\n",
+ "filepath2 = DATASETS.fetch('CESM2_grid_variables.nc')\n",
+ "areacello = xr.open_dataset(filepath2).areacello\n",
+ "\n",
+ "ds = xr.merge([data, areacello])\n",
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This example uses Matplotlib and Cartopy to plot the first time slice of the dataset on an actual geographic map. By doing so, we verify that the data values fit the pattern of SST data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(12, 6))\n",
+ "ax = plt.axes(projection=ccrs.Robinson(central_longitude=180))\n",
+ "ax.coastlines()\n",
+ "ax.gridlines()\n",
+ "ds.tos.isel(time=0).plot(\n",
+ " ax=ax, transform=ccrs.PlateCarree(), vmin=-2, vmax=30, cmap='coolwarm'\n",
+ ");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Select the Niño 3.4 region \n",
+ "\n",
+ "In this set of examples, we demonstrate the selection of data values from a dataset which are located in the Niño 3.4 geographic region. The following example illustrates a selection technique that uses the `sel()` or `isel()` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tos_nino34 = ds.sel(lat=slice(-5, 5), lon=slice(190, 240))\n",
+ "tos_nino34"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This example illustrates the alternate technique for selecting Niño 3.4 data, which makes use of the `where()` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tos_nino34 = ds.where(\n",
+ " (ds.lat < 5) & (ds.lat > -5) & (ds.lon > 190) & (ds.lon < 240), drop=True\n",
+ ")\n",
+ "tos_nino34"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we plot the selected region to ensure it fits the definition of the Niño 3.4 region:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(12, 6))\n",
+ "ax = plt.axes(projection=ccrs.Robinson(central_longitude=180))\n",
+ "ax.coastlines()\n",
+ "ax.gridlines()\n",
+ "tos_nino34.tos.isel(time=0).plot(\n",
+ " ax=ax, transform=ccrs.PlateCarree(), vmin=-2, vmax=30, cmap='coolwarm'\n",
+ ")\n",
+ "ax.set_extent((120, 300, 10, -10))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Compute the anomalies\n",
+ "\n",
+ "There are three main steps to obtain the anomalies from the Niño 3.4 dataset created in the previous set of examples. First, we use the `groupby()` method to convert to monthly data. Second, we subtract the mean sea-surface temperature (SST) from the monthly data. Finally, we obtain the anomalies by computing a weighted average. These steps are illustrated in the next example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gb = tos_nino34.tos.groupby('time.month')\n",
+ "tos_nino34_anom = gb - gb.mean(dim='time')\n",
+ "index_nino34 = tos_nino34_anom.weighted(tos_nino34.areacello).mean(dim=['lat', 'lon'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this example, we smooth the data curve by applying a `mean` function with a 5-month moving window to the anomaly dataset. We then plot the smoothed data against the original data to demonstrate:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "index_nino34_rolling_mean = index_nino34.rolling(time=5, center=True).mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "index_nino34.plot(size=8)\n",
+ "index_nino34_rolling_mean.plot()\n",
+ "plt.legend(['anomaly', '5-month running mean anomaly'])\n",
+ "plt.title('SST anomaly over the Niño 3.4 region');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Since the ENSO index conveys deviations from a norm, the calculation of Niño data requires a standard deviation. In this example, we calculate the standard deviation of the SST in the Niño 3.4 region data, across the entire time period of the data array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "std_dev = tos_nino34.tos.std()\n",
+ "std_dev"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The final step of the Niño 3.4 index calculation involves normalizing the data. In this example, we perform this normalization by dividing the smoothed anomaly data by the standard deviation calculated above:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "normalized_index_nino34_rolling_mean = index_nino34_rolling_mean / std_dev"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Visualize the computed Niño 3.4 index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this example, we use Matplotlib to generate a plot of our final Niño 3.4 data. This plot is set up to highlight values above 0.5, corresponding to El Niño (warm) events, and values below -0.5, corresponding to La Niña (cold) events."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = plt.figure(figsize=(12, 6))\n",
+ "\n",
+ "plt.fill_between(\n",
+ " normalized_index_nino34_rolling_mean.time.data,\n",
+ " normalized_index_nino34_rolling_mean.where(\n",
+ " normalized_index_nino34_rolling_mean >= 0.4\n",
+ " ).data,\n",
+ " 0.4,\n",
+ " color='red',\n",
+ " alpha=0.9,\n",
+ ")\n",
+ "plt.fill_between(\n",
+ " normalized_index_nino34_rolling_mean.time.data,\n",
+ " normalized_index_nino34_rolling_mean.where(\n",
+ " normalized_index_nino34_rolling_mean <= -0.4\n",
+ " ).data,\n",
+ " -0.4,\n",
+ " color='blue',\n",
+ " alpha=0.9,\n",
+ ")\n",
+ "\n",
+ "normalized_index_nino34_rolling_mean.plot(color='black')\n",
+ "plt.axhline(0, color='black', lw=0.5)\n",
+ "plt.axhline(0.4, color='black', linewidth=0.5, linestyle='dotted')\n",
+ "plt.axhline(-0.4, color='black', linewidth=0.5, linestyle='dotted')\n",
+ "plt.title('Niño 3.4 Index');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "\n",
+ "This tutorial covered the use of Xarray features, including selection, grouping, and statistical functions, to compute and visualize a data index important to climate science."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Resources and References\n",
+ "\n",
+ "- [Niño 3.4 index](https://climatedataguide.ucar.edu/climate-data/nino-sst-indices-nino-12-3-34-4-oni-and-tni)\n",
+ "- [Matplotlib's `fill_between` method](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.fill_between.html)\n",
+ "- [Matplotlib's `axhline` method](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.axhline.html) (see also its analogous `axvline` method)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ },
+ "toc-autonumbering": false,
+ "toc-showcode": false,
+ "toc-showmarkdowntxt": false,
+ "toc-showtags": false
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/core/xarray/xarray-intro.ipynb b/_preview/468/_sources/core/xarray/xarray-intro.ipynb
new file mode 100644
index 000000000..bec195c1a
--- /dev/null
+++ b/_preview/468/_sources/core/xarray/xarray-intro.ipynb
@@ -0,0 +1,938 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![xarray Logo](https://docs.xarray.dev/en/stable/_static/Xarray_Logo_RGB_Final.svg \"xarray Logo\")\n",
+ "\n",
+ "# Introduction to Xarray"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Overview\n",
+ "\n",
+ "The examples in this tutorial focus on the fundamentals of working with gridded, labeled data using Xarray. Xarray works by introducing additional abstractions into otherwise ordinary data arrays. In this tutorial, we demonstrate the usefulness of these abstractions. The examples in this tutorial explain how the proper usage of Xarray abstractions generally leads to simpler, more robust code.\n",
+ "\n",
+ "The following topics will be covered in this tutorial:\n",
+ "\n",
+ "1. Create a `DataArray`, one of the core object types in Xarray\n",
+ "1. Understand how to use named coordinates and metadata in a `DataArray`\n",
+ "1. Combine individual `DataArrays` into a `Dataset`, the other core object type in Xarray\n",
+ "1. Subset, slice, and interpolate the data using named coordinates\n",
+ "1. Open netCDF data using Xarray\n",
+ "1. Basic subsetting and aggregation of a `Dataset`\n",
+ "1. Brief introduction to plotting with Xarray"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [NumPy Basics](../numpy/numpy-basics) | Necessary | |\n",
+ "| [Intermediate NumPy](../numpy/intermediate-numpy) | Helpful | Familiarity with indexing and slicing arrays |\n",
+ "| [NumPy Broadcasting](../numpy/numpy-broadcasting) | Helpful | Familiarity with array arithmetic and broadcasting |\n",
+ "| [Introduction to Pandas](../pandas/pandas) | Helpful | Familiarity with labeled data |\n",
+ "| [Datetime](../datetime/datetime) | Helpful | Familiarity with time formats and the `timedelta` object |\n",
+ "| [Understanding of NetCDF](some-link-to-external-resource) | Helpful | Familiarity with metadata structure |\n",
+ "\n",
+ "- **Time to learn**: 40 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Imports"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In earlier tutorials, we explained the abbreviation of commonly used scientific Python package names in import statements. Just as `numpy` is abbreviated `np`, and just as `pandas` is abbreviated `pd`, the name `xarray` is often abbreviated `xr` in import statements. In addition, we also import `pythia_datasets`, which provides sample data used in these examples."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from datetime import timedelta\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import xarray as xr\n",
+ "from pythia_datasets import DATASETS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Introducing the `DataArray` and `Dataset`\n",
+ "\n",
+ "As stated in earlier tutorials, NumPy arrays contain many useful features, making NumPy an essential part of the scientific Python stack. Xarray expands on these features, adding streamlined data manipulation capabilities. These capabilities are similar to those provided by Pandas, except that they are focused on gridded N-dimensional data instead of tabular data. Its interface is based largely on the netCDF data model (variables, attributes, and dimensions), but it goes beyond the traditional netCDF interfaces in order to provide additional useful functionality, similar to netCDF-java's [Common Data Model (CDM)](https://docs.unidata.ucar.edu/netcdf-java/current/userguide/common_data_model_overview.html). "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Creation of a `DataArray` object\n",
+ "\n",
+ "The `DataArray` in one of the most basic elements of Xarray; a `DataArray` object is similar to a numpy `ndarray` object. (For more information, see the documentation [here](http://xarray.pydata.org/en/stable/user-guide/data-structures.html#dataarray).) In addition to retaining most functionality from NumPy arrays, Xarray `DataArrays` provide two critical pieces of functionality:\n",
+ "\n",
+ "1. Coordinate names and values are stored with the data, making slicing and indexing much more powerful.\n",
+ "2. Attributes, similar to those in netCDF files, can be stored in a container built into the `DataArray`.\n",
+ "\n",
+ "In these examples, we create a NumPy array, and use it as a wrapper for a new `DataArray` object; we then explore some properties of a `DataArray`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Generate a random numpy array\n",
+ "\n",
+ "In this first example, we create a numpy array, holding random placeholder data of temperatures in Kelvin:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = 283 + 5 * np.random.randn(5, 3, 4)\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Wrap the array: first attempt\n",
+ "\n",
+ "For our first attempt at wrapping a NumPy array into a `DataArray`, we simply use the `DataArray` method of Xarray, passing the NumPy array we just created:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp = xr.DataArray(data)\n",
+ "temp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Note two things:\n",
+ "\n",
+ "1. Since NumPy arrays have no dimension names, our new `DataArray` takes on placeholder dimension names, in this case `dim_0`, `dim_1`, and `dim_2`. In our next example, we demonstrate how to add more meaningful dimension names.\n",
+ "2. If you are viewing this page as a Jupyter Notebook, running the above example generates a rich display of the data contained in our `DataArray`. This display comes with many ways to explore the data; for example, clicking the array symbol expands or collapses the data view."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Assign dimension names\n",
+ "\n",
+ "Much of the power of Xarray comes from making use of named dimensions. In order to make full use of this, we need to provide more useful dimension names. We can generate these names when creating a `DataArray` by passing an ordered list of names to the `DataArray` method, using the keyword argument `dims`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp = xr.DataArray(data, dims=['time', 'lat', 'lon'])\n",
+ "temp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This `DataArray` is already an improvement over a NumPy array; the `DataArray` contains names for each of the dimensions (or axes in NumPy parlance). An additional improvement is the association of coordinate-value arrays with data upon creation of a `DataArray`. In the next example, we illustrate the creation of NumPy arrays representing the coordinate values for each dimension of the `DataArray`, and how to associate these coordinate arrays with the data in our `DataArray`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create a `DataArray` with named Coordinates\n",
+ "\n",
+ "#### Make time and space coordinates\n",
+ "\n",
+ "In this example, we use [Pandas](../pandas) to create an array of [datetime data](../datetime). This array will be used in a later example to add a named coordinate, called `time`, to a `DataArray`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "times = pd.date_range('2018-01-01', periods=5)\n",
+ "times"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Before associating coordinates with our `DataArray`, we must also create latitude and longitude coordinate arrays. In these examples, we use placeholder data, and create the arrays in NumPy format:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lons = np.linspace(-120, -60, 4)\n",
+ "lats = np.linspace(25, 55, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Initialize the `DataArray` with complete coordinate info\n",
+ "\n",
+ "In this example, we create a new `DataArray`. Similar to an earlier example, we use the `dims` keyword argument to specify the dimension names; however, in this case, we also specify the coordinate arrays using the `coords` keyword argument:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp = xr.DataArray(data, coords=[times, lats, lons], dims=['time', 'lat', 'lon'])\n",
+ "temp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Set useful attributes\n",
+ "\n",
+ "As described above, `DataArrays` have a built-in container for attribute metadata. These attributes are similar to those in netCDF files, and are added to a `DataArray` using its `attrs` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp.attrs['units'] = 'kelvin'\n",
+ "temp.attrs['standard_name'] = 'air_temperature'\n",
+ "\n",
+ "temp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Issues with preservation of attributes\n",
+ "\n",
+ "In this example, we illustrate an important concept relating to attributes. When a mathematical operation is performed on a `DataArray`, all of the coordinate arrays remain attached to the `DataArray`, but any attribute metadata assigned is lost. Attributes are removed in this way due to the fact that they may not convey correct or appropriate metadata after an arbitrary arithmetic operation.\n",
+ "\n",
+ "This example converts our DataArray values from Kelvin to degrees Celsius. Pay attention to the attributes in the Jupyter rich display below. (If you are not viewing this page as a Jupyter notebook, see the Xarray documentation to learn how to display the attributes.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp_in_celsius = temp - 273.15\n",
+ "temp_in_celsius"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In addition, if you need more details on how Xarray handles metadata, you can review this [documentation page](http://xarray.pydata.org/en/stable/getting-started-guide/faq.html#approach-to-metadata)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### The `Dataset`: a container for `DataArray`s with shared coordinates\n",
+ "\n",
+ "Along with the `DataArray`, the other main object type in Xarray is the `Dataset`. `Datasets` are containers similar to Python dictionaries; each `Dataset` can hold one or more `DataArrays`. In addition, the `DataArrays` contained in a `Dataset` can share coordinates, although this behavior is optional. (For more information, see the [official documentation page](http://xarray.pydata.org/en/stable/user-guide/data-structures.html#dataset).)\n",
+ "\n",
+ "`Dataset` objects are most often created by loading data from a data file. We will cover this functionality in a later example; in this example, we will create a `Dataset` from two `DataArrays`. We will use our existing temperature `DataArray` for one of these `DataArrays`; the other one is created in the next example.\n",
+ "\n",
+ "In addition, both of these `DataArrays` will share coordinate axes. Therefore, the next example will also illustrate the usage of common coordinate axes across `DataArrays` in a `Dataset`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Create a pressure `DataArray` using the same coordinates\n",
+ "\n",
+ "In this example, we create a `DataArray` object to hold pressure data. This new `DataArray` is set up in a very similar fashion to the temperature `DataArray` created above."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pressure_data = 1000.0 + 5 * np.random.randn(5, 3, 4)\n",
+ "pressure = xr.DataArray(\n",
+ " pressure_data, coords=[times, lats, lons], dims=['time', 'lat', 'lon']\n",
+ ")\n",
+ "pressure.attrs['units'] = 'hPa'\n",
+ "pressure.attrs['standard_name'] = 'air_pressure'\n",
+ "\n",
+ "pressure"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Create a `Dataset` object\n",
+ "\n",
+ "Before we can create a `Dataset` object, we must first name each of the `DataArray` objects that will be added to the new `Dataset`.\n",
+ "\n",
+ "To name the `DataArrays` that will be added to our `Dataset`, we can set up a Python dictionary as shown in the next example. We can then pass this dictionary to the `Dataset` method using the keyword argument `data_vars`; this creates a new `Dataset` containing both of our `DataArrays`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds = xr.Dataset(data_vars={'Temperature': temp, 'Pressure': pressure})\n",
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As listed in the rich display above, the new `Dataset` object is aware that both `DataArrays` share the same coordinate axes. (Please note that if this page is not run as a Jupyter Notebook, the rich display may be unavailable.)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Access Data variables and Coordinates in a `Dataset`\n",
+ "\n",
+ "This set of examples illustrates different methods for retrieving `DataArrays` from a `Dataset`.\n",
+ "\n",
+ "This first example shows how to retrieve `DataArrays` using the \"dot\" notation:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.Pressure"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In addition, you can access `DataArrays` through a dictionary syntax, as shown in this example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds['Pressure']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`Dataset` objects are mainly used for loading data from files, which will be covered later in this tutorial."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Subsetting and selection by coordinate values\n",
+ "\n",
+ "Much of the power of labeled coordinates comes from the ability to select data based on coordinate names and values instead of array indices. This functionality will be covered on a basic level in these examples. (Later tutorials will cover this topic in much greater detail.)\n",
+ "\n",
+ "### NumPy-like selection\n",
+ "\n",
+ "In these examples, we are trying to extract all of our spatial data for a single date; in this case, January 2, 2018. For our first example, we retrieve spatial data using index selection, as with a NumPy array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "indexed_selection = temp[1, :, :] # Index 1 along axis 0 is the time slice we want...\n",
+ "indexed_selection"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This example reveals one of the major shortcomings of index selection. In order to retrieve the correct data using index selection, anyone using a `DataArray` must have precise knowledge of the axes in the `DataArray`, including the order of the axes and the meaning of their indices.\n",
+ "\n",
+ "By using named coordinates, as shown in the next set of examples, we can avoid this cumbersome burden."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Selecting with `.sel()`\n",
+ "\n",
+ "In this example, we show how to select data based on coordinate values, by way of the `.sel()` method. This method takes one or more named coordinates in keyword-argument format, and returns data matching the coordinates."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "named_selection = temp.sel(time='2018-01-02')\n",
+ "named_selection"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This method yields the same result as the index selection, however:\n",
+ "- we didn't have to know anything about how the array was created or stored\n",
+ "- our code is agnostic about how many dimensions we are dealing with\n",
+ "- the intended meaning of our code is much clearer"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Approximate selection and interpolation\n",
+ "\n",
+ "When working with temporal and spatial data, it is a common practice to sample data close to the coordinate points in a dataset. The following set of examples illustrates some common techniques for this practice.\n",
+ "\n",
+ "#### Nearest-neighbor sampling\n",
+ "\n",
+ "In this example, we are trying to sample a temporal data point within 2 days of the date `2018-01-07`. Since the final date on our `DataArray`'s temporal axis is `2018-01-05`, this is an appropriate problem.\n",
+ "\n",
+ "We can use the `.sel()` method to perform nearest-neighbor sampling, by setting the `method` keyword argument to 'nearest'. We can also optionally provide a `tolerance` argument; with temporal data, this is a `timedelta` object."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp.sel(time='2018-01-07', method='nearest', tolerance=timedelta(days=2))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using the rich display above, we can see that `.sel` indeed returned the data at the temporal value corresponding to the date `2018-01-05`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Interpolation\n",
+ "\n",
+ "In this example, we are trying to extract a timeseries for Boulder, CO, which is located at 40°N latitude and 105°W longitude. Our `DataArray` does not contain a longitude data value of -105, so in order to retrieve this timeseries, we must interpolate between data points.\n",
+ "\n",
+ "The `.interp()` method allows us to retrieve data from any latitude and longitude by means of interpolation. This method uses coordinate-value selection, similarly to `.sel()`. (For more information on the `.interp()` method, see the official documentation [here](http://xarray.pydata.org/en/stable/interpolation.html).)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp.interp(lon=-105, lat=40)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " In order to interpolate data using Xarray, the SciPy package must be imported. You can learn more about SciPy from the official documentation.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Slicing along coordinates\n",
+ "\n",
+ "Frequently, it is useful to select a range, or _slice_, of data along one or more coordinates. In order to understand this process, you must first understand Python `slice` objects. If you are unfamiliar with `slice` objects, you should first read the official [Python slice documentation](https://docs.python.org/3/library/functions.html#slice). Once you are proficient using `slice` objects, you can create slices of data by passing `slice` objects to the `.sel` method, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp.sel(\n",
+ " time=slice('2018-01-01', '2018-01-03'), lon=slice(-110, -70), lat=slice(25, 45)\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " As detailed in the documentation page linked above, the slice function uses the argument order (start, stop[, step]), where step is optional.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Because we are now working with a slice of data, instead of our full dataset, the lengths of our coordinate axes have been shortened, as shown in the Jupyter rich display above. (You may need to use a different display technique if you are not running this page as a Jupyter Notebook.)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### One more selection method: `.loc`\n",
+ "\n",
+ "In addition to using the `sel()` method to select data from a `DataArray`, you can also use the `.loc` attribute. Every `DataArray` has a `.loc` attribute; in order to leverage this attribute to select data, you can specify a coordinate value in square brackets, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp.loc['2018-01-02']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This selection technique is similar to NumPy's index-based selection, as shown below:\n",
+ "```\n",
+ "temp[1,:,:]\n",
+ "```\n",
+ "However, this technique also resembles the `.sel()` method's fully label-based selection functionality. The advantages and disadvantages of using the `.loc` attribute are discussed in detail below.\n",
+ "\n",
+ "This example illustrates a significant disadvantage of using the `.loc` attribute. Namely, we specify the values for each coordinate, but cannot specify the dimension names; therefore, the dimensions must be specified in the correct order, and this order must already be known:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp.loc['2018-01-01':'2018-01-03', 25:45, -110:-70]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In contrast with the previous example, this example shows a useful advantage of using the `.loc` attribute. When using the `.loc` attribute, you can specify data slices using a syntax similar to NumPy in addition to, or instead of, using the slice function. Both of these slicing techniques are illustrated below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temp.loc['2018-01-01':'2018-01-03', slice(25, 45), -110:-70]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As described above, the arguments to `.loc` must be in the order of the `DataArray`'s dimensions. Attempting to slice data without ordering arguments properly can cause errors, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This will generate an error\n",
+ "# temp.loc[-110:-70, 25:45,'2018-01-01':'2018-01-03']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Opening netCDF data\n",
+ "\n",
+ "Xarray has close ties to the netCDF data format; as such, netCDF was chosen as the premier data file format for Xarray. Hence, Xarray can easily open netCDF datasets, provided they conform to certain limitations (for example, 1-dimensional coordinates).\n",
+ "\n",
+ "### Access netCDF data with `xr.open_dataset`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " The data file for this example, NARR_19930313_0000.nc, is retrieved from Project Pythia's custom example data library. The DATASETS class imported at the top of this page contains a .fetch() method, which retrieves, downloads, and caches a Pythia example data file.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filepath = DATASETS.fetch('NARR_19930313_0000.nc')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Once we have a valid path to a data file that Xarray knows how to read, we can open the data file and load it into Xarray; this is done by passing the path to Xarray's `open_dataset` method, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds = xr.open_dataset(filepath)\n",
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Subsetting the `Dataset`\n",
+ "\n",
+ "Xarray's `open_dataset()` method, shown in the previous example, returns a `Dataset` object, which must then be assigned to a variable; in this case, we call the variable `ds`. Once the netCDF dataset is loaded into an Xarray `Dataset`, we can pull individual `DataArrays` out of the `Dataset`, using the technique described earlier in this tutorial. In this example, we retrieve isobaric pressure data, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.isobaric1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "(As described earlier in this tutorial, we can also use dictionary syntax to select specific `DataArrays`; in this case, we would write `ds['isobaric1']`.)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Many of the subsetting operations usable on `DataArrays` can also be used on `Datasets`. However, when used on `Datasets`, these operations are performed on every `DataArray` in the `Dataset`, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds_1000 = ds.sel(isobaric1=1000.0)\n",
+ "ds_1000"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As shown above, the subsetting operation performed on the `Dataset` returned a new `Dataset`. If only a single `DataArray` is needed from this new `Dataset`, it can be retrieved using the familiar dot notation:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds_1000.Temperature_isobaric"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Aggregation operations\n",
+ "\n",
+ "As covered earlier in this tutorial, you can use named dimensions in an Xarray `Dataset` to manually slice and index data. However, these dimension names also serve an additional purpose: you can use them to specify dimensions to aggregate on. There are many different aggregation operations available; in this example, we focus on `std` (standard deviation)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "u_winds = ds['u-component_of_wind_isobaric']\n",
+ "u_winds.std(dim=['x', 'y'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
Info
\n",
+ " Recall from previous tutorials that aggregations in NumPy operate over axes specified by numeric values. However, with Xarray objects, aggregation dimensions are instead specified through a list passed to the dim keyword argument.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For this set of examples, we will be using the sample dataset defined above. The calculations performed in these examples compute the mean temperature profile, defined as temperature as a function of pressure, over Colorado. For the purposes of these examples, the bounds of Colorado are defined as follows:\n",
+ " * x: -182km to 424km\n",
+ " * y: -1450km to -990km\n",
+ " \n",
+ "This dataset uses a Lambert Conformal projection; therefore, the data values shown above are projected to specific latitude and longitude values. In this example, these latitude and longitude values are 37°N to 41°N and 102°W to 109°W. Using the original data values and the `mean` aggregation function as shown below yields the following mean temperature profile data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps = ds.Temperature_isobaric\n",
+ "co_temps = temps.sel(x=slice(-182, 424), y=slice(-1450, -990))\n",
+ "prof = co_temps.mean(dim=['x', 'y'])\n",
+ "prof"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Plotting with Xarray\n",
+ "\n",
+ "As demonstrated earlier in this tutorial, there are many benefits to storing data as Xarray `DataArrays` and `Datasets`. In this section, we will cover another major benefit: Xarray greatly simplifies plotting of data stored as `DataArrays` and `Datasets`. One advantage of this is that many common plot elements, such as axis labels, are automatically generated and optimized for the data being plotted. The next set of examples demonstrates this and provides a general overview of plotting with Xarray.\n",
+ "\n",
+ "### Simple visualization with `.plot()`\n",
+ "\n",
+ "Similarly to [Pandas](../pandas/pandas), Xarray includes a built-in plotting interface, which makes use of [Matplotlib](../matplotlib) behind the scenes. In order to use this interface, you can call the `.plot()` method, which is included in every `DataArray`.\n",
+ "\n",
+ "In this example, we show how to create a basic plot from a `DataArray`. In this case, we are using the `prof` `DataArray` defined above, which contains a Colorado mean temperature profile."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prof.plot()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the figure shown above, Xarray has generated a line plot, which uses the mean temperature profile and the `'isobaric'` coordinate variable as axes. In addition, the axis labels and unit information have been read automatically from the `DataArray`'s metadata."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Customizing the plot\n",
+ "\n",
+ "As mentioned above, the `.plot()` method of Xarray `DataArrays` uses Matplotlib behind the scenes. Therefore, knowledge of Matplotlib can help you more easily customize plots generated by Xarray.\n",
+ "\n",
+ "In this example, we need to customize the air temperature profile plot created above. There are two changes that need to be made:\n",
+ "- swap the axes, so that the Y (vertical) axis corresponds to isobaric levels\n",
+ "- invert the Y axis to match the model of air pressure decreasing at higher altitudes\n",
+ "\n",
+ "We can make these changes by adding certain keyword arguments when calling `.plot()`, as shown below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prof.plot(y=\"isobaric1\", yincrease=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Plotting 2-D data\n",
+ "\n",
+ "In the previous example, we used `.plot()` to generate a plot from 1-D data, and the result was a line plot. In this section, we illustrate plotting of 2-D data.\n",
+ "\n",
+ "In this example, we illustrate basic plotting of a 2-D array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "temps.sel(isobaric1=1000).plot()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The figure above is generated by Matplotlib's `pcolormesh` method, which was automatically called by Xarray's `plot` method. This occurred because Xarray recognized that the `DataArray` object calling the `plot` method contained two distinct coordinate variables.\n",
+ "\n",
+ "The plot generated by the above example is a map of air temperatures over North America, on the 1000 hPa isobaric surface. If a different map projection or added geographic features are needed on this plot, the plot can easily be modified using [Cartopy](../cartopy)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "\n",
+ "Xarray expands on Pandas' labeled-data functionality, bringing the usefulness of labeled data operations to N-dimensional data. As such, it has become a central workhorse in the geoscience community for the analysis of gridded datasets. Xarray allows us to open self-describing NetCDF files and make full use of the coordinate axes, labels, units, and other metadata. By making use of labeled coordinates, our code is often easier to write, easier to read, and more robust.\n",
+ "\n",
+ "### What's next?\n",
+ "\n",
+ "Additional notebooks to appear in this section will describe the following topics in greater detail:\n",
+ "- performing arithmetic and broadcasting operations with Xarray data structures\n",
+ "- using \"group by\" operations\n",
+ "- remote data access with OPeNDAP\n",
+ "- more advanced visualization, including map integration with Cartopy"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Resources and references\n",
+ "\n",
+ "This tutorial contains content adapted from the material in [Unidata's Python Training](https://unidata.github.io/python-training/workshop/XArray/xarray-and-cf/).\n",
+ "\n",
+ "Most basic questions and issues with Xarray can be resolved with help from the material in the [Xarray documentation](http://xarray.pydata.org/en/stable/). Some of the most popular sections of this documentation are listed below:\n",
+ "- [Why Xarray](http://xarray.pydata.org/en/stable/getting-started-guide/why-xarray.html)\n",
+ "- [Quick overview](http://xarray.pydata.org/en/stable/getting-started-guide/quick-overview.html#)\n",
+ "- [Example gallery](http://xarray.pydata.org/en/stable/gallery.html)\n",
+ "\n",
+ "Another resource you may find useful is this [Xarray Tutorial collection](https://xarray-contrib.github.io/xarray-tutorial/), created from content hosted on GitHub."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/foundations/conda.md b/_preview/468/_sources/foundations/conda.md
new file mode 100644
index 000000000..f371876cb
--- /dev/null
+++ b/_preview/468/_sources/foundations/conda.md
@@ -0,0 +1,134 @@
+# Installing and Managing Python with Conda
+
+---
+
+## Overview
+
+Conda is an open-source, cross-platform, language-agnostic package manager and environment management system that allows you to quickly install, run, and update packages within your work environment(s).
+
+Here we will cover:
+
+1. What are packages?
+2. Installing Conda
+3. Creating a Conda environment
+4. Useful Conda commands
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| --------------------------------------------------------------------------------------------------------- | ---------- | ----- |
+| [Installing and Running Python](https://foundations.projectpythia.org/foundations/how-to-run-python.html) | Helpful | |
+
+- **Time to learn**: 20 minutes
+
+---
+
+## What are Packages?
+
+A Python package is a collection of modules, which, in turn, are essentially Python scripts that contain published functionality. There are Python packages for data input, data analysis, data visualization, etc. Each package offers a unique toolset and may have its own unique syntax rules.
+
+Package management is useful because you may want to update a package for one of your projects, but keep it at the same version in other projects to ensure that they continue to run as expected.
+
+## Installing Conda
+
+We recommend you install Miniconda. You can do that by following the [instructions for your machine](https://docs.conda.io/en/latest/miniconda.html).
+
+Miniconda only comes with the `conda` package management system; it is a pared-down version of the full Anaconda Python distribution.
+
+[Installing Anaconda](https://docs.anaconda.com/anaconda/install/) takes longer and uses up more disk space, but provides you with more functionality, including Spyder (a Python-specific integrated development environment or IDE) and Jupyter, in addition to other immediately installed packages. Also, the interface of Anaconda is great if you are uncomfortable with the terminal.
+
+We recommend Miniconda for two reasons:
+
+1. It's quicker and takes up less disk space.
+2. It encourages you to install only the packages you need in reproducible isolated environments for specific projects. This is generally a more robust way to work with open source tools.
+
+Once you have `conda` via the Miniconda installer, the next step is to create an environment and install packages.
+
+## Creating a Conda Environment
+
+A Conda environment is an interoperable collection of specific versions of packages or libraries that you install and use for a specific workflow. The Conda package manager takes care of dependencies, so everything works together in a predictable way. One huge advantage of using environments is that any changes you make to one environment will not affect your other environments at all, so you are much less likely to "break" something!
+
+To create a new Conda environment, type `conda create --name` and the name of your environment in your terminal, and then specify any packages that you would like to have installed. For example, to install a Jupyter-ready environment called `sample_environment`, type
+
+```
+conda create --name sample_environment python jupyterlab
+```
+
+Once the environment is created, you need to _activate_ it in the current terminal session (see below).
+
+It is a good idea to create a new environment for every project. Because Python is open source, new versions of the tools are released frequently. Isolated environments help guarantee that your scripts use the same versions of packages and libraries to ensure they run as expected. Similarly, it is best practice to NOT modify your `base` environment.
+
+## Useful Conda commands
+
+Some other Conda commands that you will find useful include:
+
+- Activating a specific environment
+
+```
+conda activate sample_environment
+```
+
+- Deactivating the current environment
+
+```
+conda deactivate
+```
+
+- Checking what packages/versions are installed in the current environment
+
+```
+conda list
+```
+
+- Installing a new package into the current environment
+
+```
+conda install somepackage
+```
+
+- Installing a specific version of a package into the current environment
+
+```
+conda install somepackage=0.17
+```
+
+- Updating all packages in the current environment to the latest versions
+
+```
+conda update --all
+```
+
+- Checking what conda environments you have
+
+```
+conda env list
+```
+
+- Deleting an environment
+
+```
+conda env remove --name sample_environment
+```
+
+You can find lots more information in the [Conda documentation](https://docs.conda.io/en/latest/) or this handy [Conda cheat sheet](https://docs.conda.io/projects/conda/en/latest/_downloads/843d9e0198f2a193a3484886fa28163c/conda-cheatsheet.pdf).
+
+If you're not a command line user, the Anaconda navigator offers GUI functionality for selecting environments and installing packages.
+
+---
+
+## Summary
+
+Conda is a package and environment management system that allows you to quickly install, run, and update packages within your work environment(s). This is important for gathering all of the tools necessary for your workflow. Conda can be managed in the command line or in the Anaconda GUI.
+
+### What's Next?
+
+- [How to Run Python in the Terminal](terminal.md)
+- [How to Run Python in a Jupyter Session](jupyter.md)
+
+## Resources and References
+
+- [Linux commands](https://cheatography.com/davechild/cheat-sheets/linux-command-line/)
+- [Conda documentation](https://docs.conda.io/en/latest/)
+- [Conda cheat sheet](https://docs.conda.io/projects/conda/en/latest/_downloads/843d9e0198f2a193a3484886fa28163c/conda-cheatsheet.pdf)
+- [Anaconda](https://docs.anaconda.com/anaconda/install/)
+- [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
diff --git a/_preview/468/_sources/foundations/getting-started-github.md b/_preview/468/_sources/foundations/getting-started-github.md
new file mode 100644
index 000000000..c87e40b9b
--- /dev/null
+++ b/_preview/468/_sources/foundations/getting-started-github.md
@@ -0,0 +1,23 @@
+```{image} ../images/GitHub-logo.png
+:alt: GitHub Logo
+:width: 600px
+```
+
+# Getting Started with GitHub
+
+Python and Jupyter are cool technologies, but they only scratch the surface of why you might want to adopt Python for your geoscience workflow.
+
+This section will introduce GitHub, the de facto standard platform for collaboration and version control used by the open-source Python community.
+
+We will walk users through these topics:
+
+- [What is GitHub?](github/what-is-github), and how to create your free account
+- [What are GitHub Repositories](github/github-repos), and what are some Python-specific examples?
+- [Issues and Discussions](github/github-issues) on GitHub: what they're for and how to participate
+- [Cloning and Forking a Repository](github/github-cloning-forking) (and what's the difference?)
+- [Detailed GitHub Configuration](github/github-setup-advanced), including how to set up secure permissions and notifications
+- [Basic Version Control with _git_](github/basic-git): why you may need it, and how to get started
+- [What is a git _Branch_?](github/git-branches)
+- [What's a Pull Request](github/github-pull-request), and how do you open one?
+- [GitHub Workflows](github/github-workflows), sets of best practices for collaborative work
+- [Contributing to Project Pythia via GitHub](github/contribute-to-pythia)
diff --git a/_preview/468/_sources/foundations/getting-started-jupyter.ipynb b/_preview/468/_sources/foundations/getting-started-jupyter.ipynb
new file mode 100644
index 000000000..821910630
--- /dev/null
+++ b/_preview/468/_sources/foundations/getting-started-jupyter.ipynb
@@ -0,0 +1,275 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "RYdHzMOHLr1U"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "382TcknGLr1V"
+ },
+ "source": [
+ "# Getting Started with Jupyter"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "FppyUssDLr1W"
+ },
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "R5NbthgvLr1W"
+ },
+ "source": [
+ "## Overview\n",
+ "Project Jupyter is a project and community whose goal is to \"develop open-source software, open-standards, and services for interactive computing across dozens of programming languages\". Jupyter consists of four main components: Jupyter Notebooks, Jupyter Kernels, Jupyter Lab, and Jupyter Hub. Jupyter can be executed locally and remotely.\n",
+ "\n",
+ "1. Jupyter Notebooks\n",
+ "2. Jupyter Kernels\n",
+ "3. Jupyter Lab\n",
+ "4. Jupyter Hub\n",
+ "5. Executing Jupyter"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ZZZEz-GrLr1X"
+ },
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Installing and Running Python: Python in Jupyter](jupyter) | Helpful | |\n",
+ "\n",
+ "- **Time to learn**: 10 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "MS5w7x7ELr1Y"
+ },
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Sm5RbdhALr1b"
+ },
+ "source": [
+ "## Jupyter Notebooks\n",
+ "\n",
+ "The Jupyter Notebook software is an open-source web application that allows you to create and share Jupyter Notebooks (*.ipynb files). Jupyter Notebooks contain executable code, LaTeX equations, visualizations (e.g., plots, pictures), and narrative text. The code does not have to just be Python, other languages such as Julia or R are supported as well. \n",
+ "\n",
+ "Jupyter Notebooks are celebrated for their interactive output that allows movement between code, code output, explanations, and more code - similar to how scientists think and solve problems. Jupyter Notebooks can be thought of as a living, runnable publication and make for a great presentation platform."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WlqwpUr4Lr1e"
+ },
+ "source": [
+ "## Jupyter Kernels\n",
+ "Software engines and their environments (e.g., conda environments) that execute the code contained in Jupyter Notebooks."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "F-sZF8KGLr1f"
+ },
+ "source": [
+ "## Jupyter Lab\n",
+ "\n",
+ "A popular web application on which users can create and write their Jupyter Notebooks, as well as explore data, install software, etc.\n",
+ "\n",
+ "You can find more information on running Jupyter Lab [here](jupyterlab)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "EnMNKdOVOSJI"
+ },
+ "source": [
+ "## Jupyter Hub\n",
+ "A web-based platform that authenticates users and launches Jupyter Lab applications for users on remote systems."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5kfhYv7yOVMh"
+ },
+ "source": [
+ "## Executing Jupyter"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tQvmXjFtUSJa"
+ },
+ "source": [
+ "### Local Execution Model\n",
+ "\n",
+ "You can launch JupyterLab from a terminal; it will open up in a web browser. The application will then be running in that web browser. When you open a notebook, Jupyter opens a kernel which can be tied to a specific coding language.\n",
+ "\n",
+ "To launch the JupyterLab interface in your browser, follow the instructions in [Installing and Running Python: Python in Jupyter](https://foundations.projectpythia.org/foundations/jupyter.html).\n",
+ "\n",
+ "![Local Execution Model](../images/local-execution-model.gif)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "1ymyUHsMUSlT"
+ },
+ "source": [
+ "### Remote Execution Model\n",
+ "\n",
+ "In the remote execution model, you start out in the browser, then navigate to a specific URL that points to a JupyterHub. On JupyterHub, you authenticate on the remote system, and then JupyterLab is launched and redirected back to your browser. The interface appears the same as if you were running Jupyter locally.\n",
+ "\n",
+ "![Remote Execution Model](../images/remote-execution-model.gif)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "kXqWPdonLr1i"
+ },
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "cYUI27u_Lr1i"
+ },
+ "source": [
+ "## Summary\n",
+ "\n",
+ "Jupyter consists of four main components:\n",
+ "- Jupyter Notebooks (the \"*.ipynb\" files),\n",
+ "- Jupyter Kernels (the work environment),\n",
+ "- Jupyter Lab (a popular web application and interface for local execution),\n",
+ "- and Jupyter Hub (an application and launcher for remote execution).\n",
+ "\n",
+ "### What's next?\n",
+ "\n",
+ "- [JupyterLab](jupyterlab)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yndFhI9VLr1i"
+ },
+ "source": [
+ "## Resources and references\n",
+ "\n",
+ "- [Jupyter Documentation](https://jupyter.org/)\n",
+ "- [Xdev Python Tutorial Seminar Series - Jupyter Notebooks](https://youtu.be/xSzXvwzFsDU)"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "collapsed_sections": [],
+ "name": "jupyter.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.8"
+ },
+ "nbdime-conflicts": {
+ "local_diff": [
+ {
+ "diff": [
+ {
+ "diff": [
+ {
+ "key": 0,
+ "op": "addrange",
+ "valuelist": [
+ "Python 3"
+ ]
+ },
+ {
+ "key": 0,
+ "length": 1,
+ "op": "removerange"
+ }
+ ],
+ "key": "display_name",
+ "op": "patch"
+ }
+ ],
+ "key": "kernelspec",
+ "op": "patch"
+ }
+ ],
+ "remote_diff": [
+ {
+ "diff": [
+ {
+ "diff": [
+ {
+ "key": 0,
+ "op": "addrange",
+ "valuelist": [
+ "Python3"
+ ]
+ },
+ {
+ "key": 0,
+ "length": 1,
+ "op": "removerange"
+ }
+ ],
+ "key": "display_name",
+ "op": "patch"
+ }
+ ],
+ "key": "kernelspec",
+ "op": "patch"
+ }
+ ]
+ },
+ "toc-autonumbering": false
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_preview/468/_sources/foundations/getting-started-python.md b/_preview/468/_sources/foundations/getting-started-python.md
new file mode 100644
index 000000000..cbdd93bd1
--- /dev/null
+++ b/_preview/468/_sources/foundations/getting-started-python.md
@@ -0,0 +1,8 @@
+# Getting Started with Python
+
+**_New Python users, start here!_**
+
+## Topics
+
+- [Quickstart: Zero to Python](quickstart): For the impatient among us: run your first Python code in the cloud!
+- [Installing and Running Python](how-to-run-python): Detailed instructions for choosing a Python platform and getting up and running on a laptop, including using the conda package manager.
diff --git a/_preview/468/_sources/foundations/github/basic-git.md b/_preview/468/_sources/foundations/github/basic-git.md
new file mode 100644
index 000000000..5b1c9a4f1
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/basic-git.md
@@ -0,0 +1,447 @@
+```{image} ../../images/Git-Logo-2Color.png
+:alt: Git Logo
+:width: 400px
+```
+
+# Basic Version Control with _git_
+
+## Overview:
+
+1. The need for version control
+1. Basic git usage
+1. Making your first git commit
+1. Viewing and comparing across the commit history
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| ---------------------------------------------------------- | ----------- | ---------------------------- |
+| [What is GitHub?](what-is-github) | Necessary | GitHub user account required |
+| [GitHub Repositories](github-repos) | Necessary | |
+| [Issues and Discussions](github-issues) | Recommended | |
+| [Cloning and Forking a Repository](github-cloning-forking) | Recommended | |
+| [Configuring your GitHub Account](github-setup-advanced) | Recommended | |
+
+- **Time to learn**: 45 minutes
+
+---
+
+## About version control and git
+
+### What is version control (and why should we care)?
+
+[Version Control](https://en.wikipedia.org/wiki/Version_control) refers generally to systems for managing changes to documents or files. Version control systems let us keep track of what changes were made to a file, when they were made, and by whom. If you've ever used "Tracked changes" on a Word document with multiple authors, then you've seen a form of version control in action (though NOT one that is well suited to working with computer code!).
+
+The need for version control is particularly acute when _working with computer code_, where small changes to the text can have huge impacts on the results of running the code.
+
+Do you have a directory somewhere on your machine right now with five different versions of a Python script like this?
+
+```
+analysis_script_OLD.py
+analysis_script.py
+analysis_script_09122021.py
+analysis_script_09122021_edit.py
+analysis_script_NEW.py
+```
+
+A Version Control System (VCS) like git will replace this mess with a _well-ordered and labelled history_ of edits that you can freely browse through, and will greatly simplify collaborating with other people on writing new code.
+
+### What is git?
+
+#### Git is not GitHub
+
+That's the first thing to understand. GitHub is a web-based platform for hosting code and collaborating with other people. On the other hand, **git is a command-line Version Control System (VCS)** that you can download and install. It runs on your local computer as well as under the hood on GitHub. You need to understand something about version control with git in order to use many of GitHub's collaboration features.
+
+#### A little history and nomenclature
+
+Git has been around [since the mid-2000s](https://en.wikipedia.org/wiki/Git). It was originally written by Linus Torvalds specifically for use in development of the Linux kernel. Git is [FOSS](https://foundations.projectpythia.org/foundations/github/what-is-github.html#free-and-open-source-software-foss) and comes pre-installed on many Linux and Mac OS systems.
+
+There are many other VCSs out there. A few that you might encounter in scientific codebases include [Subversion](https://subversion.apache.org), [Mercurial](https://www.mercurial-scm.org), and [CVS](http://cvs.nongnu.org). However, git is overwhelmingly the VCS of choice for open-source projects in the Scientific Python ecosystem these days (as well as among software developers more generally).
+
+There is no universally agreed-upon meaning of the name "git". From the [git project's own README file](https://github.com/git/git/blob/master/README.md):
+
+> The name "git" was given by Linus Torvalds when he wrote the very first version. He described the tool as "the stupid content tracker" and the name as (depending on your mood):
+>
+> - random three-letter combination that is pronounceable, and not actually used by any common UNIX command. The fact that it is a mispronunciation of "get" may or may not be relevant.
+> - stupid. contemptible and despicable. simple. Take your pick from the dictionary of slang.
+> - "global information tracker": you're in a good mood, and it actually works for you. Angels sing, and a light suddenly fills the room.
+> - "goddamn idiotic truckload of sh\*t": when it breaks
+
+#### Git is a distributed VCS
+
+Aside from being free and widely deployed, an important distinguishing feature of git is that it is a distributed Version Control System. Essentially this means that every git directory on every computer is a complete independent repository with complete history.
+
+When we cloned the [`github-sandbox`](https://github.com/ProjectPythia/github-sandbox) repository back in the [Cloning and Forking](github-cloning-forking) section, we not only copied the current repository files but also the entire revision history of the repo.
+
+In this section we are going to explore basic git usage _on our local computer_. Nothing that we do here is going to affect other copies of the repositories stored elsewhere. _So don't worry about breaking anything!_
+
+Later, we will explore how to collaborate on code repositories using GitHub. But in keep in mind the basic idea that _all git repos are equal and independent_! You will have separate copies of repos stored on your local machine and in your GitHub organization.
+
+Now that we are oriented, let's dive into some basic git usage with the [`github-sandbox`](https://github.com/ProjectPythia/github-sandbox) repository!
+
+## Inspect a git repository with `git status`
+
+First, make sure you followed the steps in the [Cloning a repository](github-cloning-forking) lesson to make a clone of the `github-sandbox` repo on your local computer. Navigate to wherever you saved your copy of the repo.
+
+Now meet your new best friend:
+
+```bash
+git status
+```
+
+which will always give you information about the current git repo. Try it! You should see something like this:
+
+```bash
+On branch main
+Your branch is up to date with 'origin/main'.
+
+nothing to commit, working tree clean
+```
+
+**Two really important things here**:
+
+1. The first line show you the current _branch_ (here called `main`). We'll cover branching in more detail in the [next lesson](git-branches), but basically each branch is a completely independent version with its own history. When we start making changes to files, we'll have to pay attention to which branch we're currently on.
+1. The last line `nothing to commit, working tree clean` tells us that we haven't made any changes to files.
+
+You'll want to use
+
+```bash
+git status
+```
+
+frequently to keep track of things in your repos.
+
+## Make some changes
+
+Version control is all about keeping track of changes made to files. So let's make some changes!
+
+You may have noticed that the file `sample.txt` in the `github-sandbox` repository contains a typo. Here we're going to fix the error and save it locally.
+
+### Create a new feature branch
+
+Before we start editing files, the first thing to do is to _create a new branch_ where we can safely make any changes we want.
+
+```{tip}
+While there's nothing stopping us from making changes directly to the `main` branch, it's often best to avoid this! The reason is that it makes collaboration trickier. See the [lesson on Pull Requests](github-pull-request).
+```
+
+Let's create and checkout a new branch in one line:
+
+```bash
+git checkout -b fix-typo
+```
+
+Now try your new best friend again:
+
+```bash
+git status
+```
+
+You should see something like this:
+
+```bash
+On branch fix-typo
+nothing to commit, working tree clean
+```
+
+This tells us that we have switched over to a new branch called `fix-typo`, but there are not (yet) any changes to the files in the repo.
+
+### Time to make some changes
+
+Now do the following:
+
+- Using your favorite text editor, open the file `github-sandbox/sample.txt`.
+- Replace the word `Fxing` with the much more satisfying `Fixing`.
+- Save the changes.
+- Revisit your new best friend `git status`. It should now show something like this:
+
+```bash
+On branch fix-typo
+Changes not staged for commit:
+ (use "git add ..." to update what will be committed)
+ (use "git restore ..." to discard changes in working directory)
+ modified: sample.txt
+
+no changes added to commit (use "git add" and/or "git commit -a")
+```
+
+Here `git` is telling us that the file `sample.txt` does _not_ match what's in the repository.
+
+Of course we know what changed in that file because we just finished editing it. But here's a quick and easy way to see the changes:
+
+```bash
+git diff
+```
+
+which should show you something like this:
+
+```bash
+diff --git a/sample.txt b/sample.txt
+index 4bc074c..edc31c0 100644
+--- a/sample.txt
++++ b/sample.txt
+@@ -4,6 +4,6 @@ We can use it to demonstrate making pull requests or raising issues in a GitHub
+
+ One good way to contribute to a project is to make additions and/or edits to documentation!
+
+-Fxing something as simple as a typo is a great way to get started as a contributor!
++Fixing something as simple as a typo is a great way to get started as a contributor!
+
+ Or, consider adding some more content to this file.
+```
+
+We can see here that `git diff` finds the line(s) where our current file differs from what's in the repo, along with a few lines before and after for context.
+
+The next step is to add our changes to the "official" history of our repo. This is a two-step process (staging and committing).
+
+## Stage and commit our changes
+
+The `commit` is the centerpiece of the git workflow. Each commit is a specific set of changes, additions, and/or deletions of files that gets added to the official history of the repository.
+
+### Staging
+
+Before we make a commit, we must first stage our changes. Think of staging simply as "getting ready to commit". The two-step process can help avoid accidentally committing something that wasn't ready.
+
+To stage our changes, we use `git add` like this:
+
+```bash
+git add sample.txt
+```
+
+and now our new best friend tells us
+
+```bash
+On branch fix-typo
+Changes to be committed:
+ (use "git restore --staged ..." to unstage)
+ modified: sample.txt
+
+```
+
+Now we see that all-important line `Changes to be committed`, telling us the contents of our staging area.
+
+If you made a mistake (e.g., staged the wrong file), you can always unstage using `git restore` as shown in the `git status` output. Nothing is permanent until we commit!
+
+(And if you accidentally commit the wrong thing? Don't worry, you can always "go back in time" to previous commits -- see below!)
+
+### Committing
+
+It's time to make a commitment. We can now permanently add our edit to the history of our `fix-typo` branch by doing this:
+
+```bash
+git commit -m 'Fix the typo'
+```
+
+```{note}
+Every commit should have a "message" that explains briefly what the commit is for. Here we set the commit message with the `-m` flag and chose some descriptive text. Note, it's critical to have those quotes around `'Fix the typo'`. Otherwise the command shell will misinterpret what you are trying to do.
+```
+
+Now when we do `git status` we see
+
+```bash
+On branch fix-typo
+nothing to commit, working tree clean
+```
+
+And we're back to a clean state! We have now added a new permanent change to the history of our repo (or more specifically, to this _branch_ of the repo).
+
+## Going back in time
+
+Each commit is essentially a snapshot in time of the state of the repo. So how can we look back on that history, or revert back to a previous version of a file?
+
+### Viewing the commit history with `git log`
+
+A simple way to see this history of the current branch is this:
+
+```bash
+git log
+```
+
+You'll see something like this:
+
+```bash
+commit 7dca0292467e4bbd73643556f83fd1c52b5c113c (HEAD -> fix-typo)
+Author: Brian Rose
+Date: Mon Jan 17 11:31:49 2022 -0500
+
+ Fix the typo
+
+commit 35fcbd991f911e170df550db58f74a082ba18b50 (origin/main, origin/HEAD, main)
+Author: Kevin Tyle
+Date: Thu Jan 13 11:29:40 2022 -0500
+
+ Close docstring quote on sample.py
+
+commit e56ea58071f150ec00904a50341a672456cbcb8f
+Author: Kevin Tyle
+Date: Tue Jan 11 14:15:31 2022 -0500
+
+ Create sample.md
+
+commit f98d05e312d19a84b74c45402a2904ab94d86e45
+Author: Kevin Tyle
+Date: Tue Jan 11 13:58:09 2022 -0500
+
+ Create sample.py
+```
+
+which shows the last few commits on this branch, including the commit number, author, timestamp, and commit message. You can page down to see the rest of the history
+or just press `Q` to exit `git log`!
+
+```{note}
+Every commit has a unique hexadecimal checksum code like `7dca0292467e4bbd73643556f83fd1c52b5c113c`. Your history will look a little different from the above!
+```
+
+### Checking out a previous commit
+
+Let's say you want to retrieve the file `sample.txt` from the previous commit. Two possible reasons why:
+
+1. You just want to take a quick look at something in the previous commit, but then go back to the current version. That's what we'll do here.
+2. Maybe you don't like the most recent commit and want to do some new edits _starting from the previous commit_ -- in effect, undoing the most recent commit and going back in time. The simplest way to do this is to _create a new branch_ starting from the previous commit. We'll cover branches more fully in the next lesson.
+
+To retrieve the previous commit, just use `git checkout` and the unique number code which you can just copy and paste from the `git log` output:
+
+```bash
+git checkout 35fcbd991f911e170df550db58f74a082ba18b50
+```
+
+You may see output that looks like this:
+
+```bash
+Note: switching to '35fcbd991f911e170df550db58f74a082ba18b50'.
+
+You are in 'detached HEAD' state. You can look around, make experimental
+changes and commit them, and you can discard any commits you make in this
+state without impacting any branches by switching back to a branch.
+
+If you want to create a new branch to retain commits you create, you may
+do so (now or later) by using -c with the switch command. Example:
+
+ git switch -c
+
+Or undo this operation with:
+
+ git switch -
+
+Turn off this advice by setting config variable advice.detachedHead to false
+
+HEAD is now at 35fcbd9 Close docstring quote on sample.py
+```
+
+(the details may vary depending on what version of git you are running).
+
+By `detached HEAD`, git is telling us that we are NOT on the most recent commit in this branch.
+
+If you inspect `sample.txt` in your editor, you will see that the typo `Fxing` is back!
+
+As the git message above is reminding us, it's possible to create an entirely new branch with changes that we make from this point in the history using `git switch -c`. But for now, let's just go back to the most recent commit on our `fix-typo` branch:
+
+```bash
+git checkout fix-typo
+```
+
+## Comparing versions
+
+We already saw one use of the `git diff` command to look at changes in a repo. By default `git diff` will compare the currently saved files against the most recent commit.
+
+We can also use `git diff` to compare across commits within a branch, or between two different branches. Here are some examples.
+
+### Compare across commits
+
+To compare across any commits in our history, we again use the unique commit checksum that we listed with `git log`:
+
+```bash
+git diff HEAD 35fcbd991f911e170df550db58f74a082ba18b50
+```
+
+gives
+
+```bash
+diff --git a/sample.txt b/sample.txt
+index edc31c0..4bc074c 100644
+--- a/sample.txt
++++ b/sample.txt
+@@ -4,6 +4,6 @@ We can use it to demonstrate making pull requests or raising issues in a GitHub
+
+ One good way to contribute to a project is to make additions and/or edits to documentation!
+
+-Fixing something as simple as a typo is a great way to get started as a contributor!
++Fxing something as simple as a typo is a great way to get started as a contributor!
+
+ Or, consider adding some more content to this file.
+```
+
+```{note}
+Here we use `HEAD` as an alias for the _most recent commit_.
+```
+
+### Compare across branches
+
+Recall that, since we have done all our editing in a new branch, the `main` branch still has the typo!
+
+We can see this with `git diff` using the `..` notation to compare two branches:
+
+```bash
+git diff main..fix-typo
+```
+
+The output is very similar:
+
+```bash
+diff --git a/sample.txt b/sample.txt
+index 4bc074c..edc31c0 100644
+--- a/sample.txt
++++ b/sample.txt
+@@ -4,6 +4,6 @@ We can use it to demonstrate making pull requests or raising issues in a GitHub
+
+ One good way to contribute to a project is to make additions and/or edits to documentation!
+
+-Fxing something as simple as a typo is a great way to get started as a contributor!
++Fixing something as simple as a typo is a great way to get started as a contributor!
+
+ Or, consider adding some more content to this file.
+```
+
+The `git diff` command is a powerful comparison tool (and maybe your second new best friend). For many more detail on its usage, see the [git documentation](https://git-scm.com/docs/git-diff).
+
+## Git commands mini-reference
+
+### Commands we used in this tutorial
+
+- `git status`: see what branch we're on and what state our repo is in.
+- `git checkout`: switch between branches (use the `-b` flag to create a new branch and check it out)
+- `git diff`: compare files between current version and last commit (default), between two commits, or between two branches.
+- `git add`: stage a file for a commit.
+- `git commit`: create a new commit with the staged files.
+- `git log`: see the commit history of our branch.
+
+### Some other git commands you'll want to know
+
+We'll cover many of these in subsequent sections.
+
+- `git branch`: list all the branch in the repo
+- `git mv` and `git rm`: git-enhanced versions of the `mv` (move file) and `rm` (remove file) commands. These will automatically stage the changes in your current branch.
+- `git merge`: merge changes from one branch into another.
+- `git push` and `git pull`: export or input changes between your local branch and a remote repository (e.g. hosted on GitHub).
+- `git init`: create a brand-new repo in the current directory
+
+---
+
+## Summary
+
+- Version control is an important tool for working with code files (or anything that is saved as plain text).
+- git is the most common version control software in use today.
+- `git status` is your new best friend because it gives you a quick view into what's going on in a git repository.
+- Every branch of a git repository has a history which is a series of numbered and labelled commits.
+- You can view this history with `git log`
+- Making a new commit is a two-step process with `git add` and `git commit`.
+- Commits are non-destructive, meaning you can always go back in time to previous commits.
+
+### What's Next?
+
+Next we'll explore the concept of branching in git repositories in more detail, including how to merge changes made on one branch into another branch.
+
+## References
+
+1. [Official git documentation](https://git-scm.com/doc)
+1. [The Software Carpentries beginner lessons on git](https://swcarpentry.github.io/git-novice/)
diff --git a/_preview/468/_sources/foundations/github/contribute-to-pythia.md b/_preview/468/_sources/foundations/github/contribute-to-pythia.md
new file mode 100644
index 000000000..313688227
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/contribute-to-pythia.md
@@ -0,0 +1,97 @@
+```{image} ../../images/GitHub-logo.png
+:alt: GitHub Logo
+:width: 400px
+```
+
+# Contribute to Project Pythia via GitHub
+
+## Overview:
+
+1. Suggest a change
+2. Make the edits
+3. Create a Pull Request
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| --------------------------------------------- | ----------- | ----- |
+| [What is GitHub](what-is-github) | Necessary | |
+| [GitHub Repositories](github-repos) | Necessary | |
+| [Cloning and Forking](github-cloning-forking) | Necessary | |
+| [Basic Version Control with _git_](basic-git) | Necessary | |
+| [Issues and Discussions](github-issues) | Recommended | |
+| [Branches](git-branches) | Necessary | |
+| [Pull Requests](github-pull-request) | Necessary | |
+| [Reviewing Pull Requests](review-pr) | Recommended | |
+| [GitHub Workflows](git-workflow) | Necessary | |
+
+- **Time to learn**: 30 minutes
+
+---
+
+Now that you have become more familiar with how to use Git and GitHub, you might have an idea or some material that you want to contribute to Project Pythia! The [Project Pythia Contributor's Guide](https://projectpythia.org/contributing.html) describes the steps required to submit a PR to any of Project Pythia's repos. Here, we will go through an example of submitting a PR to `pythia-foundations`.
+
+## Suggest a change
+
+One simple way to contribute is to fix a typo or suggest a change to one of the tutorials. For example, in the [Computations and Masks with Xarray tutorial](https://foundations.projectpythia.org/core/xarray/computation-masking.html), let's suggest a clarification that the sea surface temperature is called `tos` in the dataset we are using.
+
+
+
+We could open an issue to suggest this change in order to get feedback on the idea before we take the time to edit files, but since this is such a small change, let's just create a PR.
+
+## Make the edits
+
+We will follow the [Forking Workflow](https://foundations.projectpythia.org/foundations/github/github-workflows.html#forking-workflow) described in the previous section of this tutorial, assuming `pythia-foundations` has already been forked:
+
+- Create a new branch with a descriptive name
+- Make the changes and commit them locally
+- Push to the remote repository
+- Open a PR on GitHub
+
+First, making the new branch,
+
+```bash
+git branch clarify-sst-tos
+git checkout clarify-sst-tos
+```
+
+There are a variety of ways to make changes, depending on the type of file, as well as preference. Here we want to edit a Jupyter Notebook (file extension `.ipynb`), so we can use JupyterLab. We find the file of interest at `/core/xarray/computation-masking.ipynb` and add in some text:
+
+
+
+After saving and exiting (and checking for changes with a `git status`), we commit with the following:
+
+```bash
+git add core/xarray/computation-masking.ipynb
+git commit -m 'Mention that SST is called tos in the model'
+```
+
+Then pushing to our remote aliased `origin`:
+
+```bash
+git push origin clarify-sst-tos
+```
+
+## Create a Pull Request
+
+Now, going to our remote repo on GitHub, forked from `pythia-foundations`, we see that recent changes have been made. By clicking on the "Compare & pull request" button, we can open a PR, proposing that our changes be merged into the main branch of `ProjectPythia/pythia-foundations`.
+
+
+
+Project Pythia has an automated reviewer system: when a PR is created, two members of the organization will be randomly chosen to review it. If your PR is not immediately ready to be approved and merged, open it as a draft to delay the review process. As shown in this [Git Branches section](https://foundations.projectpythia.org/foundations/github/git-branches.html#merging-branches), the "Draft pull request" button is found using the arrow on the "Create pull request" button.
+
+Let's add the `content` tag and open this one as a draft for now:
+
+
+
+For any PR opened in `pythia-foundations`, there will be a few checks that need to pass before merging is allowed. Once the `deploy-book / build` check has completed (which will likely take a few minutes), there will be a Deployment Preview URL commented by the github-actions bot that will take you to a build of the Pythia Foundations book with your edits. There you can ensure your edits show up as expected.
+
+
+
+Once it is ready, click "Ready for review" to take it out of draft mode. Now we wait for any comments or reviews!
+
+---
+
+## Summary
+
+- You can contribute to Project Pythia by suggesting edits or adding content with a Pull Request
diff --git a/_preview/468/_sources/foundations/github/git-branches.md b/_preview/468/_sources/foundations/github/git-branches.md
new file mode 100644
index 000000000..5ee0bd1bc
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/git-branches.md
@@ -0,0 +1,304 @@
+```{image} ../../images/Git-Logo-2Color.png
+:alt: Git Logo
+:width: 400px
+```
+
+# Git Branches
+
+Git "branches" are an important component of many Git and GitHub workflows. If you plan to use GitHub to manage your own resources, or contribute to a GitHub hosted project, it is essential to have a basic understanding of what branches are and how to use them. For example, the best practices for a simple workflow for suggesting changes to a GitHub repository are: create your own fork of the repository, make a branch from your fork where your changes are made, and then suggest these changes move to the upstream repository with a Pull Request. This section of the GitHub chapter assumes you have read the prior GitHub sections, are at least somewhat familiar with git commands and the vocabulary ("cloning," "forking," "merging," "Pull Request" etc), and that you have already created your own fork of the [GitHub Sandbox Repository](https://github.com/ProjectPythia/github-sandbox) hosted by Project Pythia.
+
+## Overview:
+
+1. What are Git Branches
+1. Creating a New Branch
+1. Switching Branches
+1. Setting up a Remote Branch
+1. Merging Branches
+1. Deleting Branches
+1. Updating Your Branches
+1. Complete Workflow
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| ---------------------------------------------------------- | ----------- | ---------------------------- |
+| [What is GitHub?](what-is-github) | Necessary | GitHub user account required |
+| [GitHub Repositories](github-repos) | Necessary | |
+| [Issues and Discussions](github-issues) | Recommended | |
+| [Cloning and Forking a Repository](github-cloning-forking) | Necessary | |
+| [Configuring your GitHub Account](github-setup-advanced) | Recommended | |
+| [Basic Version Control with _git_](basic-git) | Necessary | |
+
+- **Time to learn**: 30 minutes
+
+---
+
+## What are Git branches?
+
+Git branches allow for non-linear or differing revision histories of a repository. At a point in time, you can split your repository into multiple development paths (branches) where you can make different commits in each, typically with the ultimate intention of merging these branches and development changes together at a later time.
+
+Branching is one of git's methods for helping with collaborative document editing, much like "change tracking" in Google Docs or Microsoft Word. It enables multiple people to edit copies of the same document content, while reducing or managing edit collisions, and with the ultimate aim of merging everyone's changes together later. It also allows the same person to edit multiple copies of the same document, but with different intentions. Some reasons for wanting to split your repository into multiple paths (i.e. branches) is to experiment with different methods of solving a problem (before deciding which method will ultimately be merged) and to work on different problems within the same codebase (without confusing which code changes are relevant to which problem).
+
+These branches can live on your computer (local) or on GitHub (remote). They are brought together through Git _pushes_, _pulls_, _merges_, and _Pull Requests_. _Pushing_ is how you transfer changes from your local repository to a remote repository. _Pulling_ is how you fetch upstream changes into your branch. _Merging_ is how you piece the forked history back together again (i.e. join two branches). And _Pull Requests_ are how you suggest the changes you've made on your branch to the upstream codebase.
+
+```{admonition} Pull Requests
+:class: info
+We will cover [Pull Requests]((github-pull-request)) more in-depthly in the next section.
+```
+
+One rule of thumb is for each development feature to have its own development branch until that feature is ready to be added to the upstream (remote) codebase. This allows you to compartmentalize your Pull Requests so that smaller working changes can be merged upstream independently of one another. For example, you might have a complete or near-complete feature on its own branch with an open Pull Request awaiting review. While you wait for feedback from the team before merging it, you can still work on a second feature on a second branch without affecting your first feature's Pull Request. **We encourage you to always do your work in a designated branch.**
+
+## Creating a New Branch
+
+```{admonition} Have you forked the repository?
+:class: info
+Having forked (NOT just cloned) the [GitHub Sandbox Repository](https://github.com/ProjectPythia/github-sandbox) is essential for following the steps in this book chapter. See the chapter on [GitHub Cloning and Forking](github-cloning-forking.md).
+```
+
+![branching](../../images/branching.gif)
+The above flowchart demonstrates forking a remote repository, labeled "Upstream", creating a local copy, labeled "Clone", creating a new branch, "branchA", and adding two commits, C3 and C4, to "branchA" of the local clone of the forked repository. Different commits can be added to different branches in any order without depending on or knowing about each other.
+
+From your terminal, navigate to your local clone of your `Github-Sandbox` Repository fork:
+
+```bash
+cd github-sandbox
+```
+
+Let's begin by checking the status of our repository:
+
+```bash
+git status
+```
+
+![Git Status](../../images/1-gitstatus.png)
+
+You will see that you are already on a branch called "main". And that this branch is up-to-date with "origin/main" and has nothing to commit.
+
+```{admonition} The Main Branch
+:class: info
+Historically, the `main` branch was called the `master` branch. The name change was relatively recent, so all of your GitHub repositories may not reflect this yet. See instructions to change your branch name at [Github's Branch Renaming documentation](https://github.com/github/renaming).
+```
+
+Now check the status of your remote repository with
+
+```bash
+git remote -v
+```
+
+![Git Remote](../../images/2-gitremote.png)
+
+We are set up to pull (denoted as 'fetch' in the output above) and push from the same remote repository.
+
+Next, check all of your exising Git branches with:
+
+```bash
+git branch -a
+```
+
+![Git Branch](../../images/3-gitbranch.png)
+
+You will see one local branch (`main`) and your remote branch (`remotes/origin/HEAD` and `remotes/origin/main`, where `HEAD` points to `main`). `HEAD` is the pointer to the current branch reference, or in essence, a pointer to your last commit. More on this in a later section.
+
+Now, before we make some sample changes to our codebase, let's create a new branch where we'll make these changes:
+
+```bash
+git branch branchA
+```
+
+Check that this branch was created with:
+
+```bash
+git branch
+```
+
+![Git NewBranch](../../images/4-gitnewbranch.png)
+
+This will display the current and the new branch. You'll notice that current or active branch, indicated by the "\*" is still the `main` branch. Thus, any changes we make to the contents of our local repository will still be made on `main`. We will need to switch branches to work in the new branch, `branchA`.
+
+## Switching Branches
+
+To switch branches use the command `git checkout` as in:
+
+```bash
+git checkout branchA
+```
+
+To check your current branch use `git status`:
+
+```bash
+git status
+```
+
+![Git Checkout](../../images/5-gitcheckout.png)
+
+Notice that `git status` doesn't say anything about being up-to-date, as before. This is because this branch only exists locally, not in our upstream GitHub fork.
+
+## Setting up a Remote Branch
+
+While your clone lives locally on your laptop, a remote branch exists on your GitHub server. You have to tell GitHub about your local branch before these changes are reflected remotely in your upstream fork.
+
+![pushing](../../images/pushing.gif)
+The above flowchart demonstrates pushing two new local commits (C3 and C4) to the corresponding remote branch. Before the push, the changes from these commits exist ONLY locally and are not represented on your upstream GitHub repository. After the push, everything is up-to-date.
+
+Before we push this branch upstream, let's make some sample changes (like C3 or C4) by creating a new empty file, with the ending ".py".
+
+```bash
+touch hello.py
+```
+
+![Git Status](../../images/6-samplechange.png)
+
+You can check that this file has been created by comparing an `ls` before and after this command, and also with a `git status` that will show your new untracked file.
+
+`git add` and `git commit` your new file and check the status again.
+
+![Git Add](../../images/6a-gitadd.png)
+
+Your new branch is now one commit ahead of your main branch. You can see this with a `git log.`
+
+![Git Log](../../images/6b-gitlog.png)
+
+In a real workflow, you would continue making edits and git commits on a branch until you are ready to push up to GitHub.
+
+Try to do this with
+
+```bash
+git push
+```
+
+![Git Push](../../images/6c-gitpush.png)
+
+You will get an error message, "fatal: The current branch `branchA` has no upstream branch." So what is the proper method for getting our local branch changes up to GitHub?
+
+First, we need to set an upstream branch to direct our local push to:
+
+```bash
+git push --set-upstream origin branchA
+```
+
+Thankfully, Git provided this command in the previous error message. If you cloned using HTTPS, you will be asked to enter your username and password, as described in [GitHub's PAT Creation page](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token).
+
+![Set Upstream](../../images/6d-setupstream.png)
+
+We can see that this worked by doing a `git branch -a`
+
+Notice the new branch called `remotes/origin/newbranch`. And when you do a `git status` you'll see that we are up to date with this new remote branch.
+
+![Git Commit Status](../../images/7-github-branchandstatus.png)
+
+On future commits you will not have to repeat these steps, as your remote branch will already be established. Simply push with `git push` to have your remote branch reflect your future local changes.
+
+## Merging Branches
+
+Merging is how you bring your split branches of a repository back together again.
+
+If you want to merge two _local_ branches together, the steps are as follows:
+
+Let's assume your two branches are named `branchA` and `branchB`, and you want your changes from `branchB` to now be reflected in `branchA`
+
+1. First checkout the branch you want to merge INTO:
+
+```bash
+git checkout branchA
+```
+
+2. Then execute a `merge`:
+
+```bash
+git merge branchB
+```
+
+If there were competing edits in the 2 branches that Git cannot automatically resolve, a **merge conflict** occurs. This typically happens if edits are to the same line in different commits. Conflicts can be [resolved in the command line](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line) or in your GUI of choice (such as Visual Studio Code).
+
+A **Pull Request** is essentially a merge that happens on an upstream remote. We will continue this demonstration and cover the specifics of merging via a [Pull Request](github-pull-request) more thoroughly in the next section.
+
+![PR](../../images/pullrequest.gif)
+The above flowchart demonstrates a simple Pull Request where the upstream main repository has accepted the changes from the feature branch of your fork. The latest commit to the Upstream Main repository is now C4. Your Feature branch can now be safely deleted.
+
+## Deleting Branches
+
+After the feature you worked on has been completed and merged, you may want to delete your branch.
+![deletebranch](../../images/deletingbranch.gif)
+
+To do this locally, you must first switch back to `main` or any non-target branch. Then you can enter
+
+```bash
+git branch -d
+```
+
+for example
+
+```bash
+git branch -d branchA
+```
+
+To delete the branch remotely, type
+
+```bash
+git push --delete .
+```
+
+as in
+
+```bash
+git push origin --delete jukent/branchA
+```
+
+## Updating Your Branches
+
+Previously, we showed you how to merge branches together, combining the changes from two different branches into one. Afterwards you deleted your feature branch `branchA`. Your local clone and fork of your `main` branch have now both need to pull from the upstream repository.
+
+![pull](../../images/pulling.gif)
+The above flowchart demonstrates pulling in the upstream changes from Upstream Main after a Pull Request has been merged, first into your fork and then into your clone. Before continuing to work, with new commits on the feature branch, it is best to pull in the upstream changes.
+
+In this example, all of the changes to the branches were local and made by a single person, you. In a collaborative environment, other contributors may be making changes to their own feature branches (or main branch), which will ultimately be pushed up to the remote repository. Either way, your branches will become stale and need to be refreshed. The more time that passes by, the more likely this is to happen, particularly for an active GitHub repository. Here we show you how to sync your branches with the upstream branches.
+
+Once a Pull Request has been merged, you will find that these upstream changes are not automatically included in your fork or your other branches. In order to include the changes from the upstream main branch, you will need to do a `git pull`.
+
+First check if there are any upstream changes:
+
+```bash
+git status
+```
+
+Then, if there are no merge conflicts:
+
+```bash
+git pull
+```
+
+`git pull` is a combination of `git fetch` and `git merge`. That is it updates the remote tracking branches (`git fetch`) AND updates your current branch with any new commits on the remote tracking branch (`git merge`).
+
+This same concept appplies to work in a team setting. Multiple authors will have their own feature branches that merge into the same Upstream Main repository via Pull Requests. It is important for each author to do regular `git pulls` to stay up to date with each other's contributions.
+
+## Complete Workflow
+
+All in all your Git Branching workflow should resemble this flow:
+![gitworkflow](../../images/gitworkflow.gif)
+
+1. Forking the upstream repository
+1. Creating a local clone of your upstream fork
+1. Creating a new branch
+1. Switching branches
+1. Making a commit
+1. Setting up a remote branch
+1. Merging branches via a PR
+1. Deleting branches
+1. Pulling from upstream
+
+---
+
+## Summary
+
+- Git Branches allow you to independently work on different features of a project via differing revision histories of a repository.
+- A useful workflow is to create a new branch locally, switch to it and set up a remote branch. During your revision, push to your upstream branch and pull from main as often as necessary. Then suggest your edits via a Pull Request and, if desired, delete your branch after the merge.
+
+### What's Next?
+
+[Opening a Pull Request on GitHub](github-pull-request)
+
+## Resources and references
+
+- [GitHub.com Help Documentation (GitHub Docs)](https://docs.github.com/en)
+- [Xdev Python Tutorial Seminar Series - Github (Kevin Paul)](https://www.youtube.com/watch?v=fYkPn0Nttlg)
+- [Resolving a Merge Conflict Using the Command Line (GitHub Docs)](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line)
diff --git a/_preview/468/_sources/foundations/github/github-advanced.md b/_preview/468/_sources/foundations/github/github-advanced.md
new file mode 100644
index 000000000..0e00ec3eb
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/github-advanced.md
@@ -0,0 +1,38 @@
+# Advanced GitHub Topics
+
+```{note}
+This content is under construction!
+```
+
+## Overview:
+
+1. Overview 1
+1. Overview 2
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| --------------------- | ---------- | ----- |
+| Prior GitHub Sections | Necessary | |
+
+- **Time to learn**: 30 minutes
+
+---
+
+## Content section
+
+---
+
+## Summary
+
+- Sum 1
+- Sum 2
+
+### What's Next?
+
+End of GitHub content
+
+## References
+
+1. Ref 1
+1. Ref 2
diff --git a/_preview/468/_sources/foundations/github/github-cloning-forking.md b/_preview/468/_sources/foundations/github/github-cloning-forking.md
new file mode 100644
index 000000000..bdb5205f1
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/github-cloning-forking.md
@@ -0,0 +1,236 @@
+```{image} ../../images/GitHub-logo.png
+:alt: GitHub Logo
+:width: 400px
+```
+
+# Cloning and Forking a Repository
+
+## Overview:
+
+1. Cloning and forking a git repository
+1. Cloning a repository
+1. Forking a repository
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| --------------------------------------- | ----------- | ---------------------------- |
+| [What is GitHub?](what-is-github) | Necessary | GitHub user account required |
+| [GitHub Repositories](github-repos) | Necessary | |
+| [Issues and Discussions](github-issues) | Recommended | |
+| Command-line shell | Helpful | |
+
+- **Time to learn**: 30 minutes
+
+---
+
+## Cloning and forking
+
+_Cloning_ and _forking_ are two related terms in the GitHub vernacular
+that, unfortunately, are not always used consistently throughout
+the _web-o-sphere_. In Project Pythia we use the term _clone_ to refer to
+making a **local** copy of a **remote** repository; the source for
+the copy is a remote repo, and the destination for the copy is your
+local laptop/desktop. When working with GitHub, a _fork_, on the
+other hand, creates a copy of a GitHub repository on GitHub. In other
+words, both the source and the destination of the _fork_ operations are
+hosted in the cloud on GitHub. Forking is performed via your GitHub
+account. While the forked repository may be owned by anyone, the
+newly created repository will be owned by you. Cloning, on the
+other hand, is performed using a Git command. Naturally, since the
+destination of the clone operation is your local computer, you will
+own the cloned contents. In either case, whether you clone or fork,
+any changes you make to the newly created repository will not impact
+the original without taking explicit action (e.g. performing a
+_push_ or submitting a _Pull Request_, the topics of later sections
+in this guide).
+
+Cloning and forking are often used together (more on this later).
+The illustration below demonstrates the operation of a Fork of a
+remote repository (UPSTREAM), followed by a clone of the newly
+created ORIGIN.
+
+![clone-and-fork](../../images/github-clone-fork.png)
+
+## Cloning a repository
+
+Cloning is ideal for the following scenarios:
+
+1. You wish to download, build, and install the latest version of a software package.
+1. You would like to experiment with a repository on your local computer, but do not desire to maintain a separate copy of it (termed a _fork_, to be covered later in this lesson) on your GitHub account.
+1. You have previously _forked_ a repository to your own GitHub account, and now wish to make changes to it for possible incorporation into the original repo, via a _Pull Request_.
+
+Let's consider the 2nd scenario. Say you wish to copy a GitHub repository to a computer you have access to (which could be your own computer, or one you have access to at work or school).
+
+We'll use a very basic repo that is part of the [Project Pythia organization](https://github.com/ProjectPythia) as our example.
+
+First, point your browser to :
+
+
+
+---
+
+We see that in the repository, there exists five files. Above the list of files is this row:
+
+
+
+---
+
+Click on the green **Code** button to the right:
+
+
+
+---
+
+Select the **HTTPS** option, and click on the copy-to-clipboard icon:
+
+
+
+---
+
+```{tip}
+This link points to where the repository "lives" on GitHub. We will use the term **origin** to refer to this location.
+```
+
+Now, open up a terminal on your local computer, and if desired, `cd` into a directory that you'd like to house whatever repos you clone. Type `git clone`, and then paste in the URL that you copied from GitHub (i.e., the **origin**):
+
+```
+git clone https://github.com/ProjectPythia/github-sandbox.git
+```
+
+You'll see something like the following:
+
+```
+Cloning into 'github-sandbox'...
+remote: Enumerating objects: 15, done.
+remote: Counting objects: 100% (15/15), done.
+remote: Compressing objects: 100% (14/14), done.
+remote: Total 15 (delta 3), reused 0 (delta 0), pack-reused 0
+Receiving objects: 100% (15/15), 7.41 KiB | 2.47 MiB/s, done.
+Resolving deltas: 100% (3/3), done.
+```
+
+```{admonition} Windows users
+:class: info
+While `git` is typically part of a Linux or Mac OS command-line shell, similar functionality must be installed if you are running Windows. Download and install the [Git for Windows](https://gitforwindows.org/) package.
+```
+
+Now, you can `cd` into the `github-sandbox` directory which has been created and populated with the exact contents of the **origin**'s repository at the time you cloned it. If you have a Python installation, you could then type
+
+```
+python sample.py
+```
+
+to run the sample Python script. You should see the following output:
+
+```
+Hello, Python learners!
+```
+
+By virtue of cloning the repo, _git_ automatically registers the URL of the **origin**'s repository on GitHub. You can show this by typing the following:
+
+```
+git remote -v
+```
+
+You should see:
+
+```
+origin git@github.com:ProjectPythia/github-sandbox.git (fetch)
+origin git@github.com:ProjectPythia/github-sandbox.git (push)
+```
+
+```{tip}
+We discuss the `git` command-line interface in the [Basic version control with git](basic-git) lesson.
+```
+
+**Congratulations!** You have now cloned a GitHub repository!
+
+Now, let's consider the 3rd scenario for cloning... which involves the related topic of _forking_.
+
+## Forking a repository
+
+Forking is similar to cloning, but has a bit more involved workflow. Scenarios where forking a repo is indicated include the following:
+
+1. You wish to collaborate on projects that are hosted on GitHub, but you are not one of that project's _maintainers_ (i.e., you do not have _write permissions_ on it).
+1. You wish to experiment with changing or adding new features to a project, and do not immediately intend to _merge_ them into the original project's repo (aka, the _upstream_ repository).
+
+In a fork, you create a copy of an existing repository, but store it in your own personal GitHub organization (recall that when you create a GitHub account, the _organization_ name is your GitHub user ID).
+
+Let's say we intend to make some changes to the [Project Pythia Sandbox](https://github.com/ProjectPythia/github-sandbox) repo, that ultimately we'll submit to the original repository as a _Pull request_.
+
+```{note}
+Be sure you have logged into GitHub at this time!
+```
+
+Notice at the top right of the screen, there is a _Fork_ button:
+
+
+
+---
+
+Click on it:
+
+
+
+---
+
+You should see your GitHub user ID (if you administer any other GitHub organizations, you will see them as well). Click on your user ID to complete the _fork_. After a few seconds, your browser will be redirected to the forked repo, now residing in your personal GitHub organization:
+
+
+
+---
+
+Notice that the _Fork_ button on the upper right has incremented by one, and there is also is a line relating your fork to the original repo:
+
+
+
+---
+
+```{tip}
+We discuss *branches* in the [Git Branches](git-branches) lesson.
+```
+
+You now have a copy (essentially a clone) of the forked repository, which is now owned by you.
+
+You could, at this point, select one of the files in the repository and use GitHub's built-in editor to make changes to these text-based files. However, the typical use case that leverages the collaborative power of GitHub and its command-line cousin, _git_, involves _cloning_ your _forked_ copy of the repo to your local computer, where you can then perform your edits, and (in the case of software) test them on your system.
+
+Cloning your fork is the same as cloning the original repo. Click on the Code button, select the HTTPS protocol, copy the URL to the clipboard, and then run `git clone ` on your local computer. In this case, you will need to either run this command in a different directory, or rename the destination directory with `git clone `, since it will by default use the name of the repo, `github-sandbox`.
+
+```{tip}
+Unlike cloning, forking is not an option supported by the *git* command-line interface. In other words, `git fork` is not a valid command.
+```
+
+Once you've cloned the fork to your local machine, try running `git remote -v` again. You will see that the _origin_ URL now points to your GitHub account or organization.
+
+_The main purpose of cloning and forking a remote repository is so that you can make changes to the contents of those repositories in a safe and version-controlled manner._ The process of making changes and submitting them as _Pull Requests_ to the original repository is covered in our lesson on [Opening a Pull Request on GitHub](github-pull-request), but the workflow is as follows:
+
+1. Edit an existing file or files, and/or create new files.
+1. Stage your changes by running `git add`.
+1. Commit your changes by running `git commit`.
+1. (If you created a fork): Push your changes to your _fork_ by running `git push`.
+1. (If you did not create a fork): Push your changes to the _upstream_ repository by running `git push`. This assumes you have write permissions on the _upstream_ repository.
+1. In GitHub, create a _Pull request_.
+
+---
+
+## Summary
+
+- The process of making a **local** copy of a GitHub repository is called _cloning_. The destination for the cloned copy is whatever machine you ran the `git clone` command from.
+- _Forking_ a repository also makes a copy of a GitHub repo, but places it in your GitHub organization in the GitHub.com cloud.
+- Forking allows you to modify a remote repo, without affecting the original version.
+- After cloning your fork to your local computer, you can make changes to your copy, which you can then submit to the original repo as a [_Pull request_](github-pull-request).
+
+## Things to try
+
+- Clone another GitHub-hosted repository that is of interest to you.
+- Try creating a fork of that repository.
+
+### What's Next?
+
+In the next lesson, you will set some configurations on your GitHub account that enable uploads (aka _pushes_) from your local computer to GitHub. You will also configure notifications on your GitHub account.
+
+## References
+
+1. [Cloning vs Forking (GitHub Support)](https://github.community/t/the-difference-between-forking-and-cloning-a-repository/10189)
+1. [What the Fork?(GitHub Community)](https://github.community/t/what-the-fork/10187)
diff --git a/_preview/468/_sources/foundations/github/github-issues.md b/_preview/468/_sources/foundations/github/github-issues.md
new file mode 100644
index 000000000..d11843cf7
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/github-issues.md
@@ -0,0 +1,100 @@
+```{image} ../../images/GitHub-logo.png
+:alt: GitHub Logo
+:width: 400px
+```
+
+# Issues and Discussions
+
+## Overview:
+
+1. What are Issues and Discussions?
+1. Examine an existing Issue
+1. Examine an existing Discussion
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| ----------------------------------- | ---------- | ----- |
+| [What is GitHub?](what-is-github) | Necessary | |
+| [GitHub Repositories](github-repos) | Necessary | |
+
+- **Time to learn**: 5 minutes
+
+---
+
+## What are Issues and Discussions?
+
+GitHub provides two different, but related mechanisms for communicating
+within a repository about a project: _Issues_ and _Discussions_.
+Issues are more like “todo” items; they are task-focused. For example, Issues
+are often used to report and track bugs, request new features, or
+perhaps note a performance problem. Ultimately, the maintainers of
+a project may resolve the issue by fixing the bug, adding the
+feature, etc., and then closing the resolved issue, marking the
+task as completed. GitHub _Discussions_, much like the name implies,
+are more open ended, and may not have a resolution. Asking about a
+topic, discussing the merits of a new feature, or even advertising
+an event, such as a tutorial for your project, are all examples
+of _Discussions_.
+
+In the text below we discuss _Issues_ in more detail, followed by
+a discussion on Discussions. Keep in mind that when initiating a
+conversation on GitHub, it is often unclear whether something is
+more suited as an _Issue_ or a _Discussion_. We, the creators of
+Project Pythia, struggle with this ourselves. If you’re not sure, simply pick
+one. Fortunately, the GitHub developers recognized this dilemma, and
+made it easy to convert _Issues_ into _Discussions_ and vice versa.
+
+## Issues
+
+To get started, let's take a look at the [Issues page](https://github.com/ProjectPythia/pythia-foundations/issues) in Project Pythia's `pythia-foundations` repository:
+
+
+
+By default, it shows all open Issues, but we can see all [closed Issues](https://github.com/ProjectPythia/pythia-foundations/issues?q=is%3Aissue+is%3Aclosed) by clicking "Closed".
+
+
+
+Issues, Discussions, and Pull Requests are all numbered for easy reference. By opening, resolving, and then closing an issue, we are leaving behind a searchable public record of what the issue was, why we thought it was important, and how we resolved it. This is great for project management, since it gets old Issues out of the way without actually deleting them.
+
+Let's now examine [Issue \#144](https://github.com/ProjectPythia/pythia-foundations/issues/144).
+
+
+
+As you can see, some broken links were found in one of the Pythia Foundations tutorials, likely because the site being linked recently had its structure changed. An additional comment was added, as well as a label to help filtering/sorting Issues by topic. We then see that this issue was mentioned (by typing the issue number) elsewhere in the repository. In this case, it was mentioned in [Pull Request \#145](https://github.com/ProjectPythia/pythia-foundations/pull/145), which makes the changes to fix the issue. We can also see that the PR has been merged, which means the changes have been incorporated into the main branch of the code.
+
+Like this example, Issues can notify others of bugs or typos, but they can also be used as "calls to action", whether you plan on addressing the issue yourself, or are hoping that someone else will be interested in making the changes. Issues [\#97](https://github.com/ProjectPythia/pythia-foundations/issues/97) and [\#98](https://github.com/ProjectPythia/pythia-foundations/issues/98) are examples of this, in which ideas for changes are proposed and then addressed at a later time.
+
+A new issue can be opened by pressing the "New issue" button on the top right of the Issues page. Depending on the repository, you may be prompted to choose from a template, or you may just see title and text boxes to fill out.
+
+## Discussions
+
+Discussions, on the other hand, are more open-ended and do not _necessarily_ suggest a change or addition to the repository. Here is the [Discussions page for Pythia Foundations](https://github.com/ProjectPythia/pythia-foundations/discussions):
+
+
+
+Let's take a look at [Discussion \#156](https://github.com/ProjectPythia/pythia-foundations/discussions/156).
+
+
+
+This discussion brings up a resource relevant to the repository that could help others, but it is not suggesting a change like an issue would. Other Discussions might include announcements, Q&A, or general thoughts about the repository.
+
+GitHub also makes it simple to reference a Discussion in an Issue (and vice versa),
+which can help provide background and context for a piece of work.
+
+---
+
+## Summary
+
+- GitHub provides Issues and Discussions to facilitate collaboration.
+- Issues are specific and actionable, while Discussions are open-ended.
+- If you want to discuss a topic and you're not sure if it is an Issue
+ or a Discussion, just pick one. It will be okay. :-)
+
+### What's Next?
+
+We will work through cloning and forking an example repository.
+
+## References
+
+1. [What is GitHub Discussions? A complete guide](https://resources.github.com/devops/process/planning/discussions/)
diff --git a/_preview/468/_sources/foundations/github/github-pull-request.md b/_preview/468/_sources/foundations/github/github-pull-request.md
new file mode 100644
index 000000000..b1b720104
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/github-pull-request.md
@@ -0,0 +1,169 @@
+```{image} ../../images/GitHub-logo.png
+:alt: GitHub Logo
+:width: 400px
+```
+
+# Opening a Pull Request on GitHub
+
+A Pull Request, aka a "merge request," is an event that occurs when a project contributor begins the process of merging new code changes from a feature branch with the main project repository.
+
+## Overview:
+
+1. What is a Pull Request?
+1. Opening a Pull Request
+1. Pull Request Features
+1. GitHub Workflows
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| --------------------------------------------- | ----------- | ----- |
+| [What is GitHub](what-is-github) | Necessary | |
+| [GitHub Repositories](github-repos) | Necessary | |
+| [Cloning and Forking](github-cloning-forking) | Necessary | |
+| [Basic Version Control with _git_](basic-git) | Necessary | |
+| [Issues and Discussions](github-issues) | Recommended | |
+| [Branches](git-branches) | Necessary | |
+
+- **Time to learn**: 60 minutes
+
+---
+
+## What is a Pull Request?
+
+A Pull Request (PR) is a formal mechanism for requesting that changes
+that you have made to one repository are integrated (merged) into
+another repository. Typically, the changes are reviewed by the
+maintainers of the destination repository, potentially triggering
+a cycle of revisions, before the PR is “merged”, and your changes
+become part of the destination repo.
+
+Just like Issues, PRs have
+their own discussion forum for communicating about the proposed
+changes. In fact, not only can maintainers or collaborators communicate
+about your PR via GitHub, they can also suggest changes and may
+even be able to make changes of their own by pushing follow-up
+commits. All of the activity, from start to finish, is tracked
+inside of the PR and can be reviewed at any time.
+
+When a contributor to a project creates a PR they are requesting
+that the owners of another destination repository pull a git
+branch from the contributor’s repository and merge the contents of
+the branch into a branch of the destination repository. This means
+that the contributor must provide four pieces of information: the
+contributor’s repository, the contributor’s branch, the destination
+repository, and finally, the destination branch.
+
+A typical sequence of steps consists of the following:
+
+1. A contributor clones a personal remote repository, creating a local copy
+1. The contributor creates a new branch in their local repository
+1. The contributor makes changes to the branch and commits them to
+ their local repository
+1. The contributor _pushes_ the branch to a remote repository
+1. The contributor submits a PR via GitHub
+
+After the maintainers or collaborators of the destination review
+the changes, and any suggested revisions are made, the project
+maintainer merges the feature into the destination repository and
+closes the PR.
+
+## Opening a Pull Request
+
+The demonstration is a continuation from the [GitHub Branches chapter](github-branches). Here, we will move from your local terminal to GitHub.
+
+### Navigate to Your Fork
+
+Go to your fork of the [GitHub Sandbox Repository](https://github.com/ProjectPythia/github-sandbox). One fast way to get to your fork, is to click the "fork" button and then follow the link underneath the message, "You've already forked github-sandbox."
+
+When you've navigated to your fork, you should see a message box alerting you that your branch `branchA` had recent changes with the option to generate an open Pull Request. This Pull Request would take the changes from your `branchA` branch and suggest them for the original upstream ProjectPythia github-sandbox repository. You'll also notice that you are on branch `main`, but that there are now 2 branches.
+
+![GitHub](../../images/8-github.png)
+
+### Switch Branches
+
+If you click on the branch `main` you'll see the list of these branches.
+
+![GitHub Branches](../../images/9-github-seebranches.png)
+
+There you can click on the branch `branchA` to switch branches.
+
+![New Branch](../../images/10-github-newbranch.png)
+
+Here you will see the message, "This branch is 1 commit ahead of ProjectPythia:main." Next to this message you'll see either the option to "Contribute" (which opens a Pull Request) or "Fetch Upstream" (which pulls in changes from the original repository). And just above your files you'll see your most recent commit.
+
+### Open a Draft Pull Request
+
+Click on the "Open pull request" button under the "Contribute" drop-down.
+
+![Contribute](../../images/11-newbranch-contribute.png)
+
+This will send you to a new page. Notice that you are now in "ProjectPythia/github-sandbox" and not your fork.
+
+![Compare](../../images/12-compare.png)
+
+The page will have the two branches you are comparing with an arrow indicating which branch is to be merged into which. Here, `base` is the upstream origin and `head` is your forked repository. If you wanted, you could click on these branches to switch the merge configuration. Underneath that you'll see a green message, "Able to merge. These branches can be automatically merged." This message means that there are no conflicts. We will discuss conflicts in a later chapter.
+
+In a one-commit PR, the PR title defaults to your commit message. You can change this if you'd like. There is also a space to add a commit message. This is your opportunity to explain your changes to the owners of the upstream repository.
+
+![Message](../../images/13-message.png)
+
+And if you scroll down, you'll see a summary of this PR with every commit and changed file listed.
+
+![Summary](../../images/14-prsummary.png)
+
+Click the arrow next to "Create Pull Request" to change this to a draft PR.
+
+![To Draft](../../images/15-todraft.png)
+
+Once you've clicked "Draft Pull Request," you will be directed to the page of your new PR. Here you can add more comments or request reviews.
+
+![Draft PR](../../images/16-draft.png)
+
+## Pull Request Features
+
+Now let's look at the features and discussions in an open (draft) PR.
+Clicking "Files Changed" allows you to see all of the changes that would be merged with this PR.
+
+![Files](../../images/17-fileschanged.png)
+
+If you are working in a repository that has automatic checks, it is a good idea to wait for these checks to pass successfully before you request reviewers or change to a non-draft PR. Do this by clicking "Ready for Review."
+
+![Review](../../images/18-review.png)
+
+When working on a project with a larger team, do NOT merge your Pull Request before you have the approval of your teammates. Every team has their own requirements and best practice workflows, and will discuss/approve/reject Pull Requests together. We will cover more about the ways to interact with PRs through conversations and reviews in a later section.
+
+To someone with write permissions on the repository, the ability to merge will look like this green button:
+![Green](../../images/20-green.png)
+
+However, this PR will NOT be merged, as the GitHub-Sandbox repository is intended to be static.
+
+## GitHub Workflows
+
+The above demonstration is an example of the Git **Forking Workflow**, because we forked the [GitHub Sandbox repository](https://github.com/ProjectPythia/github-sandbox) before making our feature branches. This is most common when you do NOT have write-access to the upstream repository.
+
+This differs from the **Feature Workflow**, where all contributors work on a single, remote GitHub repository in specific feature branches. This is common when all contributors DO have write-access to the upstream repository.
+
+The steps leading up to creating your PR depend on your workflow. The main difference in creating the PR is that
+the contributor now, for the Feature Workflow, navigates to the upstream, remote
+repository, not a personal remote fork, and initiates the PR there.
+
+We will cover [GitHub Workflows](github-workflows) in greater detail in the next chapter.
+
+---
+
+## Summary
+
+- A Pull Request (PR) is a formal mechanism for requesting that changes
+ that you have made to one repository are integrated (merged) into
+ another repository.
+- The steps that lead up to
+ the PR depend your GitHub Workflow.
+
+### What's Next?
+
+In the next lesson we will learn more about [Reviewing Pull Requests](review-pr).
+
+## References
+
+1. GitHub's [Collaborating with Pull Requests](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests)
diff --git a/_preview/468/_sources/foundations/github/github-repos.md b/_preview/468/_sources/foundations/github/github-repos.md
new file mode 100644
index 000000000..4782dddeb
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/github-repos.md
@@ -0,0 +1,104 @@
+```{image} ../../images/GitHub-logo.png
+:alt: GitHub Logo
+:width: 400px
+```
+
+# GitHub Repositories
+
+## Overview:
+
+1. Explore GitHub Repositories
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| ----------------------------------------------------------------------------------------------- | ---------- | ----- |
+| [What is GitHub?](https://foundations.projectpythia.org/foundations/github/what-is-github.html) | Necessary | |
+
+- **Time to learn**: 15 minutes
+
+---
+
+## What is a GitHub repository?
+
+GitHub gives the following explanation of a [repository](https://docs.github.com/en/get-started/quickstart/hello-world):
+
+> A repository is usually used to organize a single project. Repositories can contain folders and files, images, videos, spreadsheets, and data sets -- anything your project needs. Often, repositories include a `README` file, a file with information about your project. GitHub makes it easy to add one at the same time you create your new repository. It also offers other common options such as a license file.
+
+In short, it is a _collection of files_. Each GitHub repository has an _owner_, which could be an individual or an organization. Repositories can also be set to _public_ or _private_, determining who can see and interact with it. While a repository can simply store files, GitHub is designed with **collaboration** in mind. Three key collaborative tools in GitHub are:
+
+1. **Issues**: report a bug, plan improvements, or provide feedback to others working on the repository.
+1. **Discussions**: post ideas or other conversations that are not as specific or actionable as an **Issue**.
+1. **Pull requests**: We will go into the specifics later, but a **Pull request** allows a user to _propose a change_ to any of the files within a repository.
+
+```{admonition} Tip
+:class: tip
+Typically, a GitHub repository will always include the **Issues** and **Pull requests** tabs. **Discussions** are not enabled by default, but are increasingly prevalent.
+```
+
+## What are some examples of repositories?
+
+All of the Python packages covered (e.g. [Numpy](https://github.com/numpy/numpy) and [Xarray](https://github.com/pydata/xarray)) in this Foundations book have associated GitHub repositories, as well as [Python itself](https://github.com/python/cpython):
+
+
+
+
+
+
+
+As you can see by the recent timestamps, these repositories are actively changing; this reflects the adaptability of the [open-source software](https://opensource.org/osd) ecosystem surrounding Python.
+
+```{admonition} Tip
+:class: tip
+Notice that each of the three *Repositories* each exist as part of their own *Organization*. In other words, the NumPy repository exists within the NumPy organization; the Xarray repo exists within the Pydata org, and so forth.
+
+When you [create your own GitHub account](https://foundations.projectpythia.org/foundations/github/what-is-github.html), your user ID functions as the *organization*. Any repositories you create (and therefore, *own*) will exist within that org.
+```
+
+Another example is this project's [Pythia Foundations repository](https://github.com/ProjectPythia/pythia-foundations), on which this tutorial is stored. It is owned by the [Project Pythia organization](https://github.com/ProjectPythia). This organization also owns several other repositories that store the files needed to generate , among other things.
+
+## GitHub's distributed repositories
+
+Finally, we introduce an important concept that is vital to your
+understanding when working with GitHub. It is the source of GitHub's power, as well
+as much of its complexity. GitHub repositories
+are _distributed_; in the general case, there is more than one
+repository for any project. In fact, repositories can come and go
+at any time, created and deleted as need dictates. Creating new
+repositories from existing ones, synchronizing them, and managing them
+are the topics of later sections. For now, it is only important to
+understand that for a GitHub-managed project, there is typically one
+"official" repository, often called the "upstream" repository, and it lives on GitHub.com. There may be any
+number of copies of the "official" repository, known as _forks_ (or _origins_,
+if it is owned by you),
+that also reside on GitHub.com. Repos that are hosted on GitHub.com
+are referred to as _remotes_. In addition to the remotes, there may
+be one or more copies of the remotes on your desktop or laptop
+computer that are referred to as _locals_. A conceptual diagram of
+the various repos is shown in the image below.
+
+![GitHub repositories](../../images/github-repos.png)
+
+---
+
+## Things to try:
+
+1. Browse the [NumPy](https://github.com/numpy/numpy), [Xarray](https://github.com/pydata/xarray), [Python](https://github.com/python/cpython), and [Pythia Foundations](https://github.com/ProjectPythia/pythia-foundations) repos.
+1. Browse the organizations (e.g., [Pydata](https://github.com/pydata)) which house the repos within.
+1. Check out GitHub's ["Create a repo"](https://docs.github.com/en/get-started/quickstart/create-a-repo) tutorial to learn how to create your own repository!
+
+---
+
+## Summary
+
+- GitHub's Repositories are collections of files.
+- Issues, Discussions, and Pull requests can be used to collaborate within a repository.
+- A GitHub _Organization_ contains _Repositories_.
+
+### What's Next?
+
+We will further explore Issues and Discussions.
+
+## References
+
+1. [GitHub's quickstart guide](https://docs.github.com/en/get-started/quickstart)
diff --git a/_preview/468/_sources/foundations/github/github-setup-advanced.md b/_preview/468/_sources/foundations/github/github-setup-advanced.md
new file mode 100644
index 000000000..e66244637
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/github-setup-advanced.md
@@ -0,0 +1,193 @@
+```{image} ../../images/GitHub-logo.png
+:alt: GitHub Logo
+:width: 400px
+```
+
+# Configuring Your GitHub Account
+
+## Overview:
+
+1. Configure your GitHub account for secure logins via ssh and/or https
+1. Set up notifications on repositories you own or follow
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| ---------------------------------------------------------- | ----------- | ---------------------------- |
+| [What is GitHub?](what-is-github) | Necessary | GitHub user account required |
+| [GitHub Repositories](github-repos) | Necessary | |
+| [Issues and Discussions](github-issues) | Recommended | |
+| [Cloning and Forking a Repository](github-cloning-forking) | Recommended | |
+
+- **Time to learn**: 35 minutes
+
+---
+
+## GitHub secure key generation
+
+When you signed up for your free account on [GitHub](https://github.com), you established a _user ID_ and its corresponding _password_. Many of the repositories that GitHub serves are readable from anywhere, not even requiring a GitHub account.
+
+However, especially when you use the git command-line interface to access a GitHub-hosted repo, there are cases when you need to provide an additional set of login credentials. Some of these cases are:
+
+1. When you want to clone a _private_, as opposed to _public_ GitHub repository (**read-access**)
+2. When you wish to _push_ to a repo (**write-access**)
+
+For these use-cases, you won't be able to simply type your GitHub user ID and password from the command line. Instead, you need to set up _access tokens_ that live in two places: in your GitHub account, and in your local computer's file system.
+
+GitHub supports two means of key-based access: via _https_, and via _ssh_.
+
+For example, one can clone [Project Pythia's Sandbox repository](https://github.com/ProjectPythia/github-sandbox) using a URL for the https protocol:
+
+```{image} ../../images/GitHub_Setup_Advanced_https_URL.png
+:alt: GitHub Clone https
+:width: 600px
+```
+
+---
+
+The URL in this case is **https://github.com/ProjectPythia/github-sandbox.git**
+
+Similarly, if you click on the **SSH** tab:
+
+```{image} ../../images/GitHub_Setup_Advanced_ssh_URL.png
+:alt: GitHub Clone ssh
+:width: 600px
+```
+
+---
+
+Here, the URL is **git@github.com:ProjectPythia/github-sandbox.git**
+
+## Generate a secure personal access token for https
+
+First, you will create a secure token in your GitHub account settings, and then use the token on your local computer.
+
+Follow the steps with helpful screenshots at [GitHub's PAT Creation](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) page.
+
+```{admonition} Tip:
+:class: tip
+If using the *https* protocol to *push* to a remote repo, you must have generated and downloaded a personal access token. You *may* also need it when cloning, *if* the remote repo is *not* open to all.
+```
+
+## Generate an SSH public/private keypair
+
+First, on your local computer, you will create an SSH _public/private keypair_, and then upload the _public key_ to your GitHub account.
+
+Follow the steps with helpful screenshots at [GitHub's Connecting to GitHub with SSH](https://docs.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) page.
+
+```{admonition} Tip:
+:class: tip
+If using the *ssh* protocol to clone *or* push, you *must* have generated and created an ssh key-pair.
+```
+
+---
+
+```{admonition} HTTPS vs SSH: Either is fine!
+:class: note
+Either **https** or **ssh** works fine. Choose whatever you prefer. See [this overview](https://www.toolsqa.com/git/ssh-protocol/) of the pros and cons of each protocol.
+```
+
+---
+
+## GitHub notifications
+
+In keeping with the social network aspect of GitHub, you can _follow_ particular repositories that are of interest to you. Additionally, once you begin contributing to a repository, you may wish to be notified when Pull Requests are made, Issues are posted, your code review is requested, and so on. While it's easy to have GitHub email you at the address you used when you registered for your GitHub account, you may wish to avoid email clutter.
+
+### Email notifications
+
+Let's say you wish to monitor (or _watch_) the Project Pythia GitHub Sandbox repository and receive emails about it.
+
+Click on the **Watch** link near the top of the page:
+
+```{image} ../../images/GitHub_Setup_Advanced_Watch.png
+:alt: GitHub Watch
+:width: 600px
+```
+
+---
+
+You can then select what type of notifications you wish to receive. For example, you may want to receive _all notifications_ related to that repo:
+
+```{image} ../../images/GitHub_Setup_Advanced_Watch_All_Activity.png
+:alt: GitHub Watch All Activity
+:width: 600px
+```
+
+---
+
+You will then receive email at the address you used when you signed up for GitHub whenever activity occurs on that repo.
+
+```{image} ../../images/GitHub_Setup_Advanced_Unwatch.png
+:alt: GitHub Unwatch
+:width: 600px
+```
+
+---
+
+You can stop watching that repo by just clicking on the now-labeled _Unwatch_ link again, and choosing _Participating and @mentions_ to toggle it back to _Unwatch_.
+
+## Stop spamming me, GitHub!
+
+It's easy to become overwhelmed with email from one or more repos that you are following and/or participating in! In this case, you may wish to disable email notifications.
+In order to set your notification settings, go to **https://github.com/settings/notifications**. You can, for example, uncheck the **Email** boxes to cease receiving notifications that way:
+
+```{image} ../../images/GitHub_Setup_Advanced_Notification_Settings.png
+:alt: GitHub Notification Settings
+:width: 600px
+```
+
+---
+
+If you turn email notifications off, get in the habit of clicking on the _Notifications_ icon when logged into GitHub:
+
+```{image} ../../images/GitHub_Setup_Advanced_Notifications.png
+:alt: GitHub Notifications
+:width: 600px
+```
+
+---
+
+You can click on the _Notifications_ icon and scroll through all notifications from repos that you opted into receiving notifications from:
+
+```{image} ../../images/GitHub_Setup_Advanced_Notifications_Browser.png
+:alt: GitHub Notification Browser
+:width: 600px
+```
+
+---
+
+Use the _Filter notifications_ control to display only those that meet certain criteria. For example, say you only wanted to view topics related to the _MetPy_ repo:
+
+```{image} ../../images/GitHub_Setup_Advanced_Notification_Filter.png
+:alt: GitHub Notification Filter
+:width: 600px
+```
+
+---
+
+```{admonition} Tip:
+:class: tip
+In the list of notifications, you can unsubscribe as shown below.
+```
+
+```{image} ../../images/GitHub_Setup_Advanced_Notifications_Unsubscribe.png
+:alt: GitHub Notification Unsubscribe
+:width: 600px
+```
+
+---
+
+## Summary
+
+- GitHub uses **secure tokens** to enable _write_ (and sometimes _read_) _access_ to GitHub repositories.
+- You can opt-in to notifications on a repo. The default, which can be easily changed, is to receive email.
+
+### What's Next?
+
+In the next section, we will learn the basics of version control using command-line `git`.
+
+## References
+
+1. [GitHub Personal Access Token (https)](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token)
+1. [GitHub Public/Private Keypair (ssh)](https://docs.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh)
+1. [Remotes in GitHub (Carpentries Tutorial)](https://swcarpentry.github.io/git-novice/07-github.html)
diff --git a/_preview/468/_sources/foundations/github/github-workflows.md b/_preview/468/_sources/foundations/github/github-workflows.md
new file mode 100644
index 000000000..eb83969f1
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/github-workflows.md
@@ -0,0 +1,469 @@
+```{image} ../../images/Git-Logo-2Color.png
+:alt: Git Logo
+:width: 400px
+```
+
+# GitHub Workflows
+
+A workflow is a series of activities or tasks that must be completed sequentially or parallel to achieve the desired outcome. Here we outline two different GitHub workflows that take you through the steps leading up to opening a Pull Request.
+
+## Overview:
+
+1. GitHub workflows overview
+1. Git Feature Branch Workflow
+1. Forking workflow
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| --------------------------------------------- | ----------- | ----- |
+| [What is GitHub](what-is-github) | Necessary | |
+| [GitHub Repositories](github-repos) | Necessary | |
+| [Cloning and Forking](github-cloning-forking) | Necessary | |
+| [Basic Version Control with _git_](basic-git) | Necessary | |
+| [Issues and Discussions](github-issues) | Recommended | |
+| [Branches](git-branches) | Necessary | |
+| [Pull Requests](github-pull-request) | Necessary | |
+| [Reviewing Pull Requests](review-pr) | Recommended | |
+
+- **Time to learn**: 60 minutes
+
+---
+
+## GitHub workflows
+
+GitHub, together with Git, are powerful tools for managing and
+collaborating on all kinds of digital assets, such as software,
+documentation, and even manuscripts for research papers. Like other
+complex software environments, often these tools can be employed
+in many different ways to accomplish the same goal. In order to
+effectively and consistently use Git and GitHub, over the years a
+variety of best practices have evolved for supporting different
+modes of collaboration. Collectively these different models, or
+recipes, are referred to as _workflows_.
+
+A typical sequence of workflow steps consists of the following:
+
+1. A contributor clones a personal remote repository, creating a local copy
+1. The contributor creates a new branch in their local repository
+1. The contributor makes changes to the branch and commits them to
+ their local repository
+1. The contributor _pushes_ the branch to a remote repository
+1. The contributor submits a PR via GitHub
+
+The sequence of steps
+outlined above provides a general framework for submitting a PR.
+But the precise set of steps is highly dependent on the choice of
+workflow for a given project. In this chapter we describe Pull
+Requests for two commonly used workflows: The **Git Feature Branch
+Workflow** and the **Forking Workflow**. The former is simpler and often
+used by teams when everyone on the team is an authorized contributor
+to the destination repository. I.e. all of the contributors have
+write access to the remote repository hosted by GitHub. The latter
+is typically what is needed to contribute to external projects for
+which the contributor is not authorized (i.e. does not have write
+access) to make changes to the destination repository. We briefly
+describe both workflows below, and include the steps necessary to
+make a PR on each.
+
+## Git Feature Branch Workflow
+
+The **Git Feature Branch Workflow** is one of the simplest and oldest
+collaborative workflows that is used for small team projects. The
+key idea behind this workflow, which is also common to the **Forking
+Workflow**, is that all development (all changes) should take place
+on a dedicated Git _feature_ branch, not the _main_ (historically
+referred to as _master_) branch. The motivation behind this is that
+one or more developers can iterate over a feature branch without
+disturbing the contents of the main branch. Consider using the **Git
+Feature Branch Workflow** for GitHub’s most widely used purpose,
+software development. Software modifications are liable to introduce
+bugs. Isolating them to a dedicated branch until they can be fixed
+ensures that a known, or official, version of the software is always
+available and in working order.
+
+```{note}
+Avoiding making edits directly on the `main` branch is considered best practice for most workflows and projects!
+```
+
+### Working with the Git Feature Branch Workflow
+
+This model assumes a single, remote GitHub repository with a branch
+named `main`, that contains the official version of all of the digital
+assets, along with a history of all of the changes made. When a
+contributor wishes to make changes to the remote repository, they
+clone the repo and create a descriptively named feature branch,
+such as `my-new-feature` or perhaps `issue-nnn`, where `nnn` is the
+number of an issue opened on the repository that this new feature
+branch will address. Changes by the contributor are then made to
+the feature branch in a local copy of the repository. When ready,
+the new branch is pushed to the remote repository.
+
+At this point,
+the new branch can be viewed, discussed, and even changed by
+contributors with write access to the remote repository. When the
+author of the feature branch thinks the changes are ready to be
+merged into `main` on the remote repository, they create a PR. The
+PR signals the project maintainers that the contributor would like
+to merge their feature branch into `main`, and invites review of the
+changes made in the branch. GitHub simplifies the process of viewing
+the changes by offering a variety of ways to see context differences
+(diffs) between `main` and the feature branch. Discussion between
+the reviewers and the contributor inside a PR discussion forum
+occurs in the same way that discussion over GitHub [Issues](github-issues) takes
+place inside a discussion forum associated with a particular issue.
+If additional changes are requested by the reviewers, these can be
+made by the contributor in their local repository, committed, and
+then pushed to the remote using the same processes they used with
+the initial push. Once reviewers are satisfied with the changes, a
+project maintainer can merge the feature branch with `main`.
+
+##### Cloning the remote repository
+
+If you don’t have a local copy of the remote repository, you’ll want
+to create one by [cloning the
+remote](github-cloning-forking)
+to your local computer. This can be done with the git command line
+tools and the general form of the command looks like this:
+
+```
+git clone repository-url local-directory-name
+```
+
+Where `repository-url` is the URL for the GitHub repo that you want
+to clone, and `local-directory-name` is the directory path on your
+local machine into which you want to create the clone. The local
+directory need not already exist. The clone command will create the
+local directory for you. If you don’t know the URL for your
+repository, navigate your web browser to your GitHub repository,
+and click on the `Code` button. The URL will be displayed.
+
+For example, let's clone the [Project Pythia sandbox repository](https://github.com/ProjectPythia/github-sandbox):
+
+```
+git clone https://github.com/ProjectPythia/github-sandbox.git
+```
+
+Note, we did not specify a `local-directory_name` here, so git will
+use the `base name` of the `repository_url`, "github-sandbox" as
+the local directory.
+
+##### Start with the main branch
+
+Continuing with our example above, make sure you are on the main
+branch and that it is up to date with the remote repository main:
+
+```
+cd github-sandbox
+git checkout main
+git pull
+```
+
+You should see output that looks like:
+
+```
+Already on 'main'
+Already up to date.
+```
+
+Remember you can read more about [GitHub branches](github-branches) in our previous chapter.
+
+##### Create a new branch
+
+Create a separate branch for every new capability you work on:
+
+```
+git checkout -b my-new-feature
+```
+
+This command will create a new branch named `my-new-feature`, if it
+doesn’t exist already, or switch to the existing branch if it does.
+Either way, any changes you make will occur in the branch `my-new-feature`,
+not in `main`. The output should look something like:
+
+```
+Switched to a new branch 'my-new-feature'
+```
+
+##### Make changes and commit
+
+Next, we'll make changes and commit them to the `my-new-feature branch` in
+the local git repository.
+
+Use your favorite editor to edit the file "sample.py". Add the line:
+
+```
+print ("Do you like to rock the party?")
+```
+
+after the existing `print` statement in the file.
+
+Run the command `git status` and look at the output. You should see
+something like:
+
+```
+On branch my-new-feature
+Changes not staged for commit:
+ (use "git add ..." to update what will be committed)
+ (use "git restore ..." to discard changes in working directory)
+ modified: sample.py
+
+no changes added to commit (use "git add" and/or "git commit -a")
+```
+
+Another helpful command is `git diff`, which should give output
+that looks like:
+
+```
+diff --git a/sample.py b/sample.py
+index b2a3b61..bf89419 100644
+--- a/sample.py
++++ b/sample.py
+@@ -1,5 +1,6 @@
+ """This is a text file that contains a sample Python script"""
+ print ("Hello, Python learners!")
++print ("Do you like to rock the party?")
+ a = 2
+ b = 8
+```
+
+It's probably obvious that `git status` will show you which files have been modified and are
+ready to be committed, while `git diff` will show you how your changes
+to `my-new-feature` branch differ from the `main` branch in the local
+repository. Once you are ready, commit your changes to the local
+repository:
+
+```
+git add sample.py
+git commit -m "having fun yet?" .
+```
+
+After a successful commit you should see a message like:
+
+```
+[my-new-feature 69162bc] having fun yet?
+ 1 file changed, 1 insertion(+)
+```
+
+##### Push the feature branch to the remote repository
+
+After running `git commit` your changes have been captured in your
+local repository. But most likely only you can see them, and if
+your local file system fails your changes may be lost. To make your
+changes visible to others, and safely stored on your remote GitHub
+repository, you need to push them. However, remember at the beginning
+of this section we said that the **Git Feature Branch Workflow** works
+when you have write access to the remote repository? Unless you are
+a member of Project Pythia you probably don't have write access to
+the `github-sandbox` remote repo. So you won't be able to push your
+changes to it. That's OK. We can still run the `push` command. It won't
+break anything. In the next section on **Forking Workflow** we will
+discuss how to make changes on remote repositories that you do NOT
+have write access to, such as the one we're using in this example. Here
+is the `push` command that we expect to fail:
+
+```
+git push --set-upstream origin my-new-feature
+```
+
+You should get a helpful error message like:
+
+```
+remote: Permission to ProjectPythia/github-sandbox.git denied to clyne.
+fatal: unable to access 'https://github.com/ProjectPythia/github-sandbox.git/': The requested URL returned error: 403
+
+```
+
+The use of the ‘--set-upstream’ option is a one-time operation when
+you push a new branch. Later, if you want to push subsequent changes
+to the remote you can simply do:
+
+```
+git push
+```
+
+If you are feeling unsatisfied about not having `git push` succeed, there
+is a simple solution: create a GitHub repository owned by you. The
+GitHub Quickstart guide provides an excellent [tutorial](https://docs.github.com/en/get-started/quickstart/create-a-repo) on how to
+do this.
+
+##### Making a Pull Request
+
+Finally, after cloning a remote repository, creating a feature
+branch, making your changes, committing them to your local repository,
+and pushing your commits back to the remote repository, you are now
+ready to issue a PR requesting that the remote repository maintainers
+review your changes for potential merger into the main branch on
+the remote. This final action must be performed from within your
+web browser. After
+navigating to your repo do the following:
+
+1. Click on “Pull Requests” in the top navigation bar
+1. Click on “New Pull Request”
+1. Under “Compare changes”, make sure that `base` is set to `main`, and `compare` is set to the name of your feature branch, `my-new-feature`
+1. Click on “Create Pull Request”
+1. A PR window should open up. Provide a descriptive title, and any helpful comments that you want to communicate with the reviewers
+1. Click on “Create Pull Request” in the PR window.
+
+That’s it! You’re done! Sit back and wait for comments from reviewers.
+If changes are requested, simply repeat the steps above. Once your
+PR is merged you’ll receive notification from GitHub.
+
+##### Safety tip on synchronization
+
+Over time your local repository will diverge from the remote. Before
+starting on a new feature, or if the `main` branch on remote may have
+been updated while you were working on `my-new-feature`, it is a good
+idea to periodically sync up with the remote `main`. Make sure all
+of your changes to `my-new-feature` have been committed to the local
+repository, and then do:
+
+```
+git checkout main
+git pull
+git checkout my-new-feature
+git merge main
+```
+
+## Forking Workflow
+
+The **Git Feature Branch Workflow** described above, along with the
+steps needed to submit a PR, work when you have write access to the
+remote repository. But as we saw, if you don't have write access
+you will not be able to push your changes to the remote repo. So,
+if you are contributing to an open source project, such as Project
+Pythia for example, a slightly different workflow is required.
+The **Forking Workflow** is the one most commonly used for public open
+source projects. The primary difference between the **Forking Workflow**
+and the **Git Feature Branch Workflow** is that with the former, two
+remote repositories are involved: one managed by the developers of
+the project that you wish to contribute to, and one owned by you.
+To help keep things clear we will refer to these remotes as the
+upstream repository and the personal repository, respectively. Not
+surprisingly, the personal repository will be a clone of the project
+repository that you own and can push changes too. The personal
+repository must be public, so that the maintainers of the upstream
+repository can pull changes from it. Other than a couple of additional
+steps required at the beginning and the end, the process of submitting
+a PR when using the **Forking Workflow** is identical to that of the
+**Git Feature Branch Workflow**. The basic steps are as follows:
+
+1. A contributor _forks_ the upstream repository, creating a remote clone that is owned by the contributor: the personal repository
+1. The contributor then clones the newly created personal remote repository, creating a local copy. Yup, that is two clones.
+1. The contributor creates a new branch in their local repository
+1. The contributor makes changes to the branch and commits them to their local repository
+1. The contributor pushes the branch to their personal remote repository that was created in step 1
+1. The contributor submits a PR via GitHub to the upstream repository
+
+Note that steps 2 through 5 are identical to steps 1 through 4 for
+the **Git Feature Branch Workflow**. Hence, here we only discuss the
+first step, and last step.
+
+### Forking the upstream repository
+
+GitHub makes it really easy to fork a remote repository. Simply
+navigate your web browser to the upstream repository that you want
+to fork, and click on Fork. GitHub will create a clone of the
+upstream repository in the remote destination selected by you on
+GitHub, and will then redirect your browser to the newly created
+forked, personal repository. The personal repository is owned by
+you. Any changes made here will not impact the upstream repository
+until you are ready to submit a PR. Let's try it. Follow
+the steps under Forking a repository [here](github-cloning-forking).
+
+### Clone, branch, change, commit, push
+
+The next steps are the same as described above for the **Git Feature
+Branch Workflow**. Clone a local copy of the newly created remote,
+personal repository, create a feature branch, make your changes,
+commit your changes, and push the new branch with your commits to your personal repository.
+
+### Making a Pull Request
+
+Once the new feature branch has been pushed to the contributor’s
+personal repository, a PR can be created that asks the maintainers
+of the upstream repository to merge the contents of the feature
+branch on the contributor’s repository into the main branch on the
+upstream repository. This step is remarkably similar to making a
+PR in the **Git Feature Branch Workflow**. The only difference is that
+the contributor navigates their browser to the upstream, remote
+repository, not the personal remote, and initiates the PR there.
+Specifically, the following steps are once again followed, but
+performed on the upstream remote:
+
+1. Click on “Pull Requests” in the top navigation bar
+1. Click on “New Pull Request”
+1. Under “Compare changes”, make sure that `base` is set to `main`, and `compare` is set to the name of your feature branch, `my-new-feature`
+1. Click on “Create Pull Request”
+1. A PR window should open up. Provide a descriptive title, and any helpful comments that you want to communicate with the reviewers
+1. Click on “Create Pull Request” in the PR window.
+
+### Safety tip on synchronization
+
+Just like with the **Git Feature Branch Workflow** model, over time
+your local repository will diverge from the remote(s). Before
+starting on a new feature, or if the main branch on remote may have
+been updated while you were working on `my-new-feature`, it is a good
+idea to periodically sync up with the remote `main`. When working
+with forks things get a little more complicated than when only a
+single remote is involved. Before syncing with the upstream remote
+you must first configure your local repository by running the
+following commands from within your local copy of the repo:
+
+```
+git remote -v
+```
+
+This should produce an output that looks similar to the following:
+
+origin https://github.com/YOUR_USERNAME/YOUR_FORK.git (fetch)
+origin https://github.com/YOUR_USERNAME/YOUR_FORK.git (push)
+
+Next, specify a new remote upstream repository that will be synced with the fork.
+
+```
+git remote add upstream upstream-url
+```
+
+Where `upstream-url` is the URL of the upstream repository.
+
+Finally, rerun the `git remote -v` command and you should see output
+that looks like this:
+
+```
+origin https://github.com/YOUR_USERNAME/YOUR_FORK.git (fetch)
+origin https://github.com/YOUR_USERNAME/YOUR_FORK.git (push)
+upstream https://github.com/ORIGINAL_OWNER/ORIGINAL_REPOSITORY.git (fetch)
+upstream https://github.com/ORIGINAL_OWNER/ORIGINAL_REPOSITORY.git (push)
+```
+
+After performing the above steps, you can then synchronize your
+local repository with the upstream remote by running the following:
+
+```
+git fetch upstream
+git checkout main
+git merge upstream/main
+```
+
+---
+
+## Summary
+
+- The steps that lead up to
+ the PR depend your GitHub Workflow.
+- Two commonly used GitHub Worflows are **Git Feature Branch Workflow** and
+ **Forking Workflow**. The former is appropriate for teams of collaborators
+ where everyone has write access to the GitHub repository. The latter
+ is commonly used when a developer wishes to contribute to a public GitHub
+ project for which they do not have write access to the repository.
+
+### What's Next?
+
+In the next lesson we will put the **Forking Workflow** to work and show you
+how to use it to [contribute to Project Pythia](contribute-to-pythia).
+
+## References
+
+1. Atlassian's tutorial on [workflows](https://www.atlassian.com/git/tutorials/comparing-workflows)
+1. GitHub's [Collaborating with Pull Requests](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests)
diff --git a/_preview/468/_sources/foundations/github/review-pr.md b/_preview/468/_sources/foundations/github/review-pr.md
new file mode 100644
index 000000000..52ef0bc91
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/review-pr.md
@@ -0,0 +1,127 @@
+```{image} ../../images/GitHub-logo.png
+:alt: GitHub Logo
+:width: 400px
+```
+
+# Reviewing Pull Requests
+
+Pull Requests (PRs) are typically reviewed by collaborators before being merged in to the main project branch. Many people feel overwhelmed, or feel as though their skills are lacking, when asked to perform their first PR review. If you find yourself in this or a similar situation, the examples in this tutorial can be quite helpful. With the help of this tutorial, anyone can quickly learn the basics of reviewing PRs, which can boost collaboration and productivity in any project hosted on GitHub. This tutorial also contains useful tips on how to effectively review a PR in many different situations.
+
+## Overview:
+
+This tutorial covers the following topics:
+
+1. What is a Pull Request Review?
+1. Requesting Pull Request Reviews
+1. Ways to View a Pull Request
+1. Providing a Pull Request Review
+1. What to Look for When Reviewing
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| --------------------------------------------- | ----------- | ----- |
+| [What is GitHub](what-is-github) | Necessary | |
+| [GitHub Repositories](github-repos) | Necessary | |
+| [Cloning and Forking](github-cloning-forking) | Necessary | |
+| [Basic Version Control with _git_](basic-git) | Necessary | |
+| [Issues and Discussions](github-issues) | Recommended | |
+| [Branches](git-branches) | Necessary | |
+| [Pull Requests](github-pull-request) | Necessary | |
+
+- **Time to learn**: 30 minutes
+
+---
+
+## What is a Pull Request Review?
+
+A PR Review is an opportunity for a team member to look through proposed file changes and request changes before merging these changes into the primary project branch (usually called "main"), or another important project branch. The reviewer may attempt to acquire information about the content of the PR by asking precise questions. They may also suggest edits to the content, either explicitly, such as changes to specific lines of code, or implicitly, such as a request for more detailed documentation. Before the PR is merged, the author of the PR content should attempt to satisfy all requests in the review. In fact, if the branch being updated by the PR has active protections, the author may be required to satisfy some such requests.
+
+## Requesting Pull Request Reviews
+
+Most people learning GitHub are confused about when to request review on a PR they create. The answer is that review should be requested when a PR is (or is likely) ready to merge into the primary project branch (or another important project branch).
+
+To start the review process, navigate to the right sidebar menu that appears when viewing your PR. Then, under "Reviewers", select the gear icon, and then select or enter a GitHub user's ID for whom you would like to approve your work. If the files listed in the PR are owned or recently edited by specific reviewers, GitHub may automatically suggest the user IDs of those reviewers.
+
+
+
+```{admonition} Did you know?
+:class: info
+It is possible to automate this process with a `CODEOWNERS` file and [GitHub actions](https://docs.github.com/en/actions).
+```
+
+To learn more about any topic relating to requesting a PR review, including topics such as CODEOWNERS files, please review the official [Requesting a Pull Request Review Documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/requesting-a-pull-request-review).
+
+## Ways to View a Pull Request
+
+If you are unfamiliar with the process of reviewing a PR, the material in this tutorial section will describe the process in detail. The first step to reviewing a PR is to review the files changed by the PR. However, before reviewing the changed files, it is very helpful to view these files in a meaningful way.
+
+The first useful way to view changed files in a PR is through the PR's "Files Changed" tab. On this tab, added content is displayed in green, while removed content is displayed in red.
+
+
+
+This method of viewing changed files works well for most types of code; however, if the code is designed to be rendered as a webpage, Jupyter Notebook, or other similar format, a different method of viewing is recommended.
+
+There are some standard methods of easily viewing Jupyter Notebooks and rendered webpages in GitHub; these are commonly used by repositories with large amounts of this type of content. GitHub actions can be used to provide previews of the rendered content; there are also third-party services, such as [ReviewNB](https://www.reviewnb.com/), that allow for viewing of this content. Also, it is important to know that when viewing a preview of webpage content provided by GitHub actions, using any absolute links in the preview will take the web browser out of the preview and out of GitHub.
+
+Another popular way to easily view any type of PR content is to locally check out the PR branch. This can be accomplished by cloning the GitHub repo and switching in the local clone to the branch containing the PR. Viewing a PR through a local clone allows the reviewer to use any applications available through the terminal, including code editors, Jupyter applications, and Web browsers, to view the changed files quickly and easily. For more information on this process, please review the [documentation GitHub provides](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/checking-out-pull-requests-locally) on checking out pull requests locally.
+
+As described above, there are many ways to view changed files in a GitHub PR, including local clones, GitHub action previews, and services such as ReviewNB. However, these services may not detail the changes to the files listed in the PR; therefore, the "Files Changed" tab should be the main resource for deciding where to focus a review.
+
+## Providing a Pull Request Review
+
+There are many ways to provide a PR review. The most basic of these is to comment on specific lines. This type of review can be performed through the "Files Changed" tab. By clicking on the "+" icon next to a line of code, the reviewer can provide a comment, and either start a new review, or simply link the comment to the line of code.
+
+
+
+If the review consists mainly of comments relevant to specific lines of code, this review method is preferred.
+
+If you are the reviewer, and the review consists mainly of small edits that you can perform yourself, this is also the preferred review method. To start one of these small edits, open a comment on the line of code to be edited, as described above. You can then suggest the edit by clicking on the "+-" icon, circled in red in the screenshot below. This icon automatically populates the comment box with the line of code and formats it with Markdown. You must replace the line of code in the comment box with the edited version, then link the comment or start a new review as described above.
+
+
+
+If the review is more complex than simple edits to specific lines of code, you can find more detailed reviewing tools in the Review Changes menu in the top right. This menu contains a comment box, as well as options for specific types of review. These options are described in detail after the informational screenshot below.
+
+
+
+- The "Comment" option allows the reviewer to provide simple comments or questions on the PR before the review is finished and the PR merged. Please note that comments and questions that may hinder the PR merge process should not be handled in this way.
+
+- The "Approve" option is used to indicate that the reviewer wholeheartedly approves the content changes in the PR, and that these content changes should be merged into an important project branch as quickly as possible. This option is also known as the LGTM (let's get this merged) option.
+
+- The "Request changes" option is used to indicate that the content changes contain one or more elements that require improvement or resolution before the PR can be merged.
+
+After providing review text in the comment box, and selecting a review type, make sure to click on the "Submit Review" button to finish the review.
+
+## What to Look for When Reviewing
+
+There are specific elements of PRs that are more commonly prioritized during a review. To address these elements, most reviewers perform the following tasks:
+
+- Look at the description and linked GitHub issue to make sure the PR addresses the issue
+- Attempt to figure out the details of the content changes in the PR, and the purpose of those changes
+- Look at the content for spelling errors
+- Provide feedback on the code itself
+ - Does the code contain input checks, debug statements, verification, or the like?
+ - If the code contains any of these checks, are they sufficiently robust?
+ - Is the code written in a way that allows for understanding of its purpose?
+ - As the reviewer, are you familiar with a way to simplify the code, or make the code more efficient?
+ - Does the code contain identifiers with conflicting or confusing names that need correcting?
+ - Do you, as the reviewer, notice any other issue with the code that may need to be dealt with in your review?
+- If any of the content changed by the PR is meant to be rendered (e.g., as a webpage or Jupyter Notebook), preview this content to check for issues with design and functionality
+- Finally, try to clearly state not only the changes made in your review, but also the issues not changed by your review. It is perfectly acceptable to not cover every item in this list; however, it is good practice to include the items covered in the review, and the nature of these changes. Most teams that manage a GitHub repository appreciate the inclusion of opinion and detail in a PR review.
+
+---
+
+## Summary
+
+- PR Reviews safeguard the primary project branch (and other important project branches) in a GitHub repository. These reviews require contributors in a repository to perform a detailed examination of changes to code and other files. The files remain unchanged until these examinations are finished.
+- There exist certain standards pertaining to PR reviews; in addition to following these standards, it is important to provide detail on the basis of your review.
+
+### What's Next?
+
+The next tutorial will cover standards and other details about [GitHub Workflows](github-workflows).
+
+## Resources and References
+
+1. GitHub's tutorial on [Collaborating with Pull Requests](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests)
+2. GitHub's tutorial on [Requesting a Pull Request Review](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/requesting-a-pull-request-review)
+3. GitHub's tutorial on [Checking Out Pull Requests Locally](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/checking-out-pull-requests-locally)
diff --git a/_preview/468/_sources/foundations/github/what-is-github.md b/_preview/468/_sources/foundations/github/what-is-github.md
new file mode 100644
index 000000000..c09073f22
--- /dev/null
+++ b/_preview/468/_sources/foundations/github/what-is-github.md
@@ -0,0 +1,135 @@
+```{image} ../../images/GitHub-logo.png
+:alt: GitHub Logo
+:width: 400px
+```
+
+# What is GitHub?
+
+## Overview:
+
+1. What is GitHub?
+1. No experience necessary!
+1. Free and open-source software (FOSS)
+1. Version control systems (VCS)
+1. GitHub = FOSS + VCS + Web
+1. Register for a free GitHub account
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| -------- | ---------- | ----- |
+| None | | |
+
+- **Time to learn**: 15 minutes
+
+---
+
+## What is GitHub?
+
+[GitHub](https://github.com) is a web-based platform for the dissemination of free and open-source software.
+
+If you are reading this lesson, you are already using GitHub, as that is where Project Pythia hosts its content!
+
+GitHub provides the following:
+
+1. _Version control_ for free and open-source software and other digital assets
+1. Project _discussion forums_
+1. _DevOps_ to facilitate building and testing software
+1. _Bug_ reporting, patching, and tracking
+1. _Documentation_ hosting
+1. An environment that fosters _collaboration_
+
+Although GitHub can host any digital asset, the most common use case for GitHub is for individuals or organizations to house _repositories_ of _free_ and _open-source software_:
+
+## No experience necessary!
+
+You do not need to be an experienced software developer or be proficient in version control to make use of GitHub! Perhaps, though, you have used a particular package (e.g., Xarray or Matplotlib) and have had questions about its usage, noticed a bug, or had an idea for a new feature for the package! You can participate in a project's development via GitHub the same way you might have interacted with its developers via email in the past.
+
+## Free and open-source software (FOSS)
+
+Much of what we term the _scientific Python software ecosystem_ consists of _free and open-source software_. Often abbreviated as **FOSS**, this means:
+
+1. The software is free of charge, and
+1. The various files which contain the _software code_ are publicly available.
+
+```{admonition} Did you know?
+:class: info
+The [Python language](https://python.org) itself is an example of *FOSS*!
+```
+
+FOSS is nothing new. For example, the [Linux kernel source code](https://kernel.org) has been available to download for many years.
+
+```{admonition} Free $\neq$ open source!
+:class: tip
+Just because a software package may be free does not mean that its source code is open! For example, although Nvidia makes its video drivers available for free download, the source code for those drivers is proprietary.
+```
+
+Arguably, the greatest advantage of open-source software is that it enables _collaborative sharing_, and thus community feedback.
+
+Types of community input may include the following:
+
+1. _Issues_: usage questions, bug reports, feature requests
+1. _Pull requests_: a user can ask that that their changes/additions be incorporated into the project
+1. _Discussions_: a community forum on the open source project
+
+## Version control systems (VCS)
+
+We will discuss version control in more detail later in this series, but the need to track and manage changes to a project, especially one that involves software, has long been known. Over the years, FOSS developers have used VCS such as _cvs_, _svn_, and most recently, _git_. All of these systems are _command-line tools_.
+
+## FOSS and VCS on the Internet
+
+A successful FOSS project needs to be accessible via the web. As mentioned before, the Linux kernel and the Python language have long been available using first-generation remote access protocols such as FTP and HTTP, and SSH. Later, VCS tools such as cvs and svn established their own TCP protocols for remote access. With the advent of _git_, web-based services that supported HTTP(S) and SSH sprung up. Each of these VCS leverages the concept of a particular FOSS project as a code repository.
+
+```{admonition} Did you know?
+:class: info
+Linus Torvalds, the original developer (and still the lead maintainer) of **Linux**, is also the original developer of [Git](https://git-scm.com)!
+```
+
+```{admonition} Stay tuned!
+:class: tip
+We will discuss version control and the use of **Git** via the command line later in this series.
+```
+
+## FOSS + VCS + Web = GitHub
+
+Perhaps the most popular web-based platform that uses Git for FOSS VCS is [GitHub](https://github.com). GitHub hosts all of the Python software packages that Project Pythia covers as code repositories (we'll use the term Git repo, or more generally just repo henceforth to represent a GitHub code repository).
+
+For example, here is a screenshot from [Xarray's GitHub](https://github.com/pydata/xarray) Git repo:
+
+
+
+```{note}
+The above screenshot is from one moment in time. When you visit the Xarray GitHub link above, it will no doubt look different!
+```
+
+## Register for a free GitHub account
+
+While one can freely browse GitHub repositories such as Xarray anonymously, it's necessary to log into a unique (and free) user account in order to take advantage of GitHub's full capabilities, such as:
+
+1. Opening Issues and Pull Requests
+1. Participating in Discussions
+1. Hosting your own repository
+
+Your next step (if you haven't already) should be to register for your free GitHub account. As with many online services, you will specify a user ID, password, and email address to use with your account.
+
+To do so, simply point your browser to the [GitHub sign-up page](https://github.com/join):
+
+
+
+While GitHub offers paid options, a free account is typically all that is needed!
+
+---
+
+## Summary
+
+- GitHub serves as a web-based platform for digital assets, particularly FOSS.
+- GitHub uses Git as its version control system.
+- You can set up a free user account on GitHub.
+
+### What's Next?
+
+In the next lesson, we will explore some GitHub repositories.
+
+## References
+
+1. [GitHub (Wikipedia)](https://en.wikipedia.org/wiki/GitHub)
diff --git a/_preview/468/_sources/foundations/how-to-run-python.md b/_preview/468/_sources/foundations/how-to-run-python.md
new file mode 100644
index 000000000..c0fd2116e
--- /dev/null
+++ b/_preview/468/_sources/foundations/how-to-run-python.md
@@ -0,0 +1,80 @@
+# Installing and Running Python
+
+---
+
+## Overview
+
+This section provides an overview of different ways to run Python code, and quickstart guides for:
+
+1. Choosing a Python platform
+2. Installing and managing Python with Conda
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| -------------------------------------------------------------------------------- | ---------- | ----- |
+| [Why Python?](https://foundations.projectpythia.org/foundations/why-python.html) | Helpful | |
+
+- **Time to learn**: 20 minutes
+
+---
+
+## Choosing a Python Platform
+
+There is no single official platform for the Python language. Here we provide a brief rundown of 3 popular platforms:
+
+1. The terminal,
+2. Jupyter notebooks, and
+3. IDEs (integrated development environments).
+
+Here we hope to provide you with enough information to understand the differences and similarities between each platform, so that you can make the best choice for your work environment and learn along effectively, regardless of your Python platform preference.
+
+In general, it is always best to test your programs in the same environment in which they will be run. The biggest factors to consider when choosing your platform are:
+
+- What are you already comfortable with?
+- What are the people around you using (peers, coworkers, instructors, etc.)?
+
+### Terminal
+
+For learners who are familiar with basic [Linux commands](https://cheatography.com/davechild/cheat-sheets/linux-command-line/) and text editors (such as Vim or Nano), running Python in the terminal is the quickest route straight to learning Python syntax without the covering the bells and whistles of a new platform. If you are running Python on a supercomputer, through an HTTP request or SSH tunneling, you might want to consider learning in the terminal.
+
+[How to Run Python in the Terminal](terminal.md)
+
+### Jupyter Notebooks
+
+We highly encourage the use of Jupyter notebooks: a free, open-source, interactive tool running inside a web browser that allows you to run Python code in "cells." This means that your workflow can alternate between code, output, and even Markdown-formatted explanatory sections that create an easy-to-follow analysis or "computational narrative" from start to finish. Jupyter notebooks are a great option for presentations or learning tools. For these reasons, Jupyter is very popular among scientists. Most lessons in this book will be taught via Jupyter notebooks.
+
+[How to Run Python in a Jupyter Session](jupyter.md)
+
+### Other IDEs
+
+If you code in other languages, you might already have a favorite IDE that will work just as well in Python. [Spyder](https://www.spyder-ide.org) is a Python specific IDE that comes with the [Anaconda download](https://www.anaconda.com/products/distribution). It is perhaps the most familiar IDE if you are coming from languages such as [Matlab](https://www.mathworks.com/products/matlab.html) that have a language specific platform and display a list of variables. [PyCharm](https://www.jetbrains.com/pycharm/) and [Visual Studio Code](https://code.visualstudio.com) are also popular IDEs. Many IDEs offer support for terminal execution, scripts, and Jupyter display. To learn about your specific IDE, visit its official documentation.
+
+_We recommend eventually learning how to develop and run Python code in each of these platforms._
+
+## Installing and managing Python with Conda
+
+Conda is an open-source, cross-platform, language-agnostic package manager and environment management system that allows you to quickly install, run, and update packages within your work environment(s). Conda is a vital component of the Python ecosystem. Understanding it is important, regardless of the platform you chose to run your Python code.
+
+[Learn more about Conda here](conda.md)
+
+---
+
+## Summary
+
+Python can be run on many different platforms. You may choose where to run Python based on a number of factors. The tutorials in this book will be formatted as Jupyter Notebooks.
+
+### What's Next?
+
+- [How to Run Python in the Terminal](terminal.md)
+- [How to Run Python in a Jupyter Session](jupyter.md)
+- [Learn more about Conda here](conda.md)
+
+## Resources and References
+
+- [Linux commands](https://cheatography.com/davechild/cheat-sheets/linux-command-line/)
+- [Spyder](https://www.spyder-ide.org)
+- [Anaconda](https://www.anaconda.com/products/distribution)
+- [Matlab](https://www.mathworks.com/products/matlab.html)
+- [PyCharm](https://www.jetbrains.com/pycharm/)
+- [Visual Studio Code](https://code.visualstudio.com)
diff --git a/_preview/468/_sources/foundations/jupyter.md b/_preview/468/_sources/foundations/jupyter.md
new file mode 100644
index 000000000..9fd45a415
--- /dev/null
+++ b/_preview/468/_sources/foundations/jupyter.md
@@ -0,0 +1,115 @@
+# Python in Jupyter
+
+---
+
+## Overview
+
+You'd like to learn to run Python in a Jupyter session. Here we will cover:
+
+1. Installing Python in Jupyter
+2. Running Python code in Jupyter
+3. Saving your notebook and exiting
+
+## Prerequisites
+
+| Concepts | Importance | Notes |
+| --------------------------------------------------------------------------------------------------------- | ---------- | ----- |
+| [Installing and Running Python](https://foundations.projectpythia.org/foundations/how-to-run-python.html) | Helpful | |
+
+- **Time to learn**: 20 minutes
+
+---
+
+## Installing Python in Jupyter
+
+To run a Jupyter session, you will need to install some necessary packages into your Conda environment.
+
+Install `miniconda` by following the [instructions for your machine](https://docs.conda.io/en/latest/miniconda.html).
+
+[Learn more about Conda here](conda.md)
+
+Next, create a Conda environment with Jupyter Lab installed. In the terminal, type:
+
+```
+$ conda create --name pythia_foundations_env jupyterlab
+```
+
+Test that you have installed everything correctly by first activating your environment and then launching a Jupyter Lab session:
+
+```
+$ conda activate pythia_foundations_env
+$ jupyter lab
+```
+
+Or you can install the full [Anaconda](https://www.anaconda.com/products/distribution), and select **LAUNCH** under the Jupyter panel in the GUI.
+
+![Anaconda Navigator](../images/Anaconda.png)
+
+In both methods, a new window should open automatically in your default browser. You can change the browser when launching from the terminal with (for example):
+
+```
+jupyter lab —browser=chrome
+```
+
+## Running Python in Jupyter
+
+1. With your Conda environment activated and Jupyter session launched (see above), create a directory to store our work. Let's call it `pythia-foundations`.
+
+ ![Jupyter GUI](../images/jupyter_gui.png)
+
+ You can do this in the GUI left sidebar by clicking the new-folder icon. If you prefer to use the command line, you can access a terminal by clicking the icon under the "Other" heading in the Launcher.
+
+2. Create a new `mysci.ipynb` file within the `pythia-foundations` folder:
+
+ Do this in the GUI on the left sidebar by clicking the "+" icon.
+
+ This will open a new launcher window where you can select a Python kernel under the "Notebooks" heading for your project. _You should see "Python 3" as in the screenshot above._ Depending on the details of your system, you might see some additional buttons with different kernels.
+
+ Selecting a kernel will open a Jupyter notebook instance and add an untitled file to the left sidebar navigator, which you can then rename to `mysci.ipynb`.
+
+ Select "Python 3" to use the Python version you just installed in the `pythia_foundations_env` conda environment.
+
+3. Change the first notebook cell to include the classic first command: printing, "Hello, world!".
+
+ ```python
+ print("Hello, world!")
+ ```
+
+4. Run your cell with {kbd}`Shift`\+{kbd}`Enter` and see that the results are printed below the cell.
+
+ ![Jupyter - Hello World](../images/mysci.png)
+
+**Congratulations!** You have just set up your first Python environment and run your first Python code in a Jupyter notebook.
+
+## Saving your notebook and exiting
+
+When you are done with your work, it is time to save and exit.
+
+To save your file, you can click the disc icon in the upper left Jupyter toolbar or use keyboard shortcuts.
+
+Jupyter allows you to close the browser tab without shutting down the server. When you're done working on your notebook, _it's important to **click the "Shutdown" button** on the dashboard_ to free up memory, especially on a shared system.
+
+Then you can quit Jupyter by:
+
+- clicking the "Quit" button on the top right, or
+- typing `exit` into the terminal
+
+Alternatively you can simultaneously shutdown and exit the Jupyter session by typing
+{kbd}`Ctrl`\+{kbd}`C` in the terminal and confirming that you do want to
+"shutdown this notebook server."
+
+---
+
+## Summary
+
+Jupyter notebooks are a free, open-source, interactive tool running inside a web browser that allows you to run Python code in "cells." To run a Jupyter session you will need to install `jupyterlab` into your Conda environment. Jupyter sessions need to be shutdown, not just exited.
+
+### What's Next?
+
+- [How to Run Python in the Terminal](terminal.md)
+- [Learn more about Conda here](conda.md)
+- [Getting Started with Jupyter](getting-started-jupyter)
+
+## Resources and References
+
+- [Anaconda](https://www.anaconda.com/products/distribution)
diff --git a/_preview/468/_sources/foundations/jupyterlab.ipynb b/_preview/468/_sources/foundations/jupyterlab.ipynb
new file mode 100644
index 000000000..1c0446067
--- /dev/null
+++ b/_preview/468/_sources/foundations/jupyterlab.ipynb
@@ -0,0 +1,620 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hC-731NWXAnQ"
+ },
+ "source": [
+ "# JupyterLab"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3rIfwtTKpQLf"
+ },
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "MkdbWkKzfegV",
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "## Overview\n",
+ "\n",
+ "JupyterLab is a popular web application on which users can create and write their Jupyter Notebooks, as well as explore data, install software, etc. This section will introduce the JupyterLab interface and cover details of JupyterLab Notebooks.\n",
+ "\n",
+ "1. Set Up\n",
+ "2. The JupyterLab Interface\n",
+ "3. Running JupyterLab Notebooks"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yt-WtTrcpRpo"
+ },
+ "source": [
+ "## Prerequisites\n",
+ "| Concepts | Importance | Notes |\n",
+ "| --- | --- | --- |\n",
+ "| [Getting Started with Jupyter](getting-started-jupyter) | Helpful | |\n",
+ "| [Installing and Running Python: Python in Jupyter](https://foundations.projectpythia.org/foundations/jupyter.html) | Helpful | |\n",
+ "\n",
+ "- **Time to learn**: 50 minutes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "nvLyiIa8pTI5"
+ },
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "eRjK14BjeQXA",
+ "tags": []
+ },
+ "source": [
+ "## Set Up\n",
+ "\n",
+ "To launch the JupyterLab interface in your browser, follow the instructions in [Installing and Running Python: Python in Jupyter](https://foundations.projectpythia.org/foundations/jupyter.html).\n",
+ "\n",
+ "If, instead, you want to follow along using a provided remote JupyterLab instance, launch this notebook via [Binder](https://mybinder.org/) using the launch icon at the top of this page,\n",
+ "\n",
+ "![Binder Launch](../images/binder-highlight.png \"Binder launch button location\")\n",
+ "\n",
+ "and follow along from there! If launching Binder, take note of the `Launcher` tab in the upper-left (see interface below). Click there to find yourself in the same interface moving forward, and feel free to refer back to this tab to follow along."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Qe2fbSa2gS2X"
+ },
+ "source": [
+ "## The JupyterLab Interface\n",
+ "\n",
+ "Go to your browser and take a look at the JupyterLab interface.\n",
+ "\n",
+ "With a base installation of JupyterLab your screen should look similar to the image below.\n",
+ "\n",
+ "Notice:\n",
+ "- The **Menu Bar** at the top of the screen, containing the typical dropdown menus: \"File\", \"Edit\", \"View\", etc.\n",
+ "- Below that is the **Workspace Area** (currently contains the Launcher).\n",
+ "- The **Launcher** is a quick shortcut for accessing the Console/Terminal, for creating new Notebooks, or for creating new Text or Markdown files.\n",
+ "- On the left is a **Collapsible Sidebar**. It currently contains the File Browser, but you can also select the Running Tabs and Kernels, the Table of Contents, and the Extensions Manager.\n",
+ "- Below everything is the **Information Bar**, which is context sensitive. We will touch on this more.\n",
+ "\n",
+ "![Interface](../images/interface_labeled.png)\n",
+ "\n",
+ "We will now take a closer look at certain aspects and features of the JupyterLab Interface."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zpDZFM3ngsY6"
+ },
+ "source": [
+ "### Left Sidebar\n",
+ "\n",
+ "The Collapsible Left Sidebar is open to the **File Browser Tab** at launch. Clicking the File Browser Tab will collapse the sidebar or reopen it to this tab.\n",
+ "- Within this tab, you will see the \"+\" button, which allows you to create a new launcher. \n",
+ "- Next to that is the \"+ folder\" button which allows you to create a new folder that then appears below \"Name\" in the contents of your directory. Double click the folder to enter it, right click the folder for options, or press the \"root folder\" icon to return to the root directory. The root directory is the directory from which JupyterLab was launched. You cannot go above the root directory. \n",
+ "- The \"upload\" button (looks like an arrow pointing up) allows you to upload files to the current folder. \n",
+ "- The \"refresh\" button refreshes the File Browser.\n",
+ "\n",
+ "Below the File Browser Tab is the **Running Tabs and Kernels Tab**. Currently, this tab doesn't have much in it. We will revisit this when we have running kernels. Remember that Kernels are background processes, so closing a tab (Terminal or Notebook) doesn’t shut down the kernel. You have to do that manually. \n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "The **Table of Contents Tab** is auto-populated based on the headings and subheadings used in the Markdown cells of your notebook. It allows you to quickly jump between sections of the document.\n",
+ "\n",
+ " \n",
+ "\n",
+ "Last is the **Extensions Manager Tab** where you can customize or enhance any part of JupyterLab. These customizations could pertain to themes or keyboard shortcuts, for example. We will not be covering JupyterLab extensions, but you can read more about them [here](https://jupyterlab.readthedocs.io/en/stable/user/extensions.html).\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "1WPljwIRg1KH"
+ },
+ "source": [
+ "### Terminals\n",
+ " \n",
+ "Let’s select the Running Tabs and Kernels Tab in the Left Sidebar and see how it changes when we've used the Launcher.\n",
+ "\n",
+ "Open a Terminal in the Launcher. It should look very similar to the desktop terminal that you initially launched JupyterLab from, but is running from within JupyterLab, within your existing Conda environment, and within the directory you launched JupyterLab from (the same root folder shown in the File Browser Tab). Notice that there is now a Terminal listed in the Running Terminals Tab.\n",
+ "\n",
+ "In the terminal you can use your usual terminal commands. For example, in the terminal window, run: \n",
+ "\n",
+ "```\n",
+ "$ mkdir test\n",
+ "```\n",
+ "\n",
+ "Select the File Browser Tab, refresh it, and see that your new folder is there.\n",
+ "\n",
+ "In the Terminal Window run:\n",
+ "\n",
+ "```\n",
+ "$ rmdir test\n",
+ "```\n",
+ "Hit refresh in the File Browser again to see that the directory is gone.\n",
+ "\n",
+ "![Terminal](../images/terminal.png)\n",
+ "\n",
+ "Back with the Running Terminals and Kernels Tab open, click the \"X\" in your workspace to close the Terminal window. Notice that the Terminal is still running in the background! Click on the terminal in the Running Terminals and Kernels Tab to reopen it (and hit enter or return to get your prompt back). To truly close it, execute in the Terminal window:\n",
+ "\n",
+ "```\n",
+ "$ exit\n",
+ "```\n",
+ "\n",
+ "OR click the “X” shut down button in the Running Terminals tab.\n",
+ "\n",
+ "Doing so will return you to the Launcher. \n",
+ "\n",
+ "
\n",
+ "
Info
\n",
+ " The terminal is running on the local host when JupyterLab is launched locally, and remote host when invoked through Jupyter Hub.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "iQAVvbtJg6pi"
+ },
+ "source": [
+ "### Consoles\n",
+ "\n",
+ "Back in the Launcher, click the “Python 3” Console button. There is only one console option right now, but you could install more kernels into your Conda environment. \n",
+ "\n",
+ "There will be three dots while the kernel starts, then what loads looks like an IPython console. This is a place to execute Python commands in a stand alone workspace which is good for testing. Notice that the kernel started in the Running Terminals and Kernels tab!\n",
+ "\n",
+ "Start in a “cell” at the bottom of the Console window. Type:\n",
+ "``` python\n",
+ "i = 5\n",
+ "print(i)\n",
+ "```\n",
+ "To execute the cell, type Shift+Enter. Notice that the console redisplays the code you wrote, labels it with a number, and displays (prints) the output to the screen. \n",
+ "\n",
+ "In the next cell, enter:\n",
+ "``` python\n",
+ "s = 'Hello, World!'\n",
+ "print(f's = {s}')\n",
+ "s\n",
+ "```\n",
+ "\n",
+ "The first line of this code designates a string `s` with the value \"Hello, World!\", the second line uses f-formatting to print the string, and the final line just calls up `s`. The last line in the cell will always be returned (its value displayed) regardless of whether you called `print`. Type Shift+Enter to execute the cell. Again the output is labeled (this time with a 2), and we see the input code, the printed standard-out statement, and the return statement. The “return value” and the values “printed to screen” are different!\n",
+ "\n",
+ "![Console](../images/console.png)\n",
+ "\n",
+ "Close the window and shut down the Console in the Running Kernels tab. We’re back to the Launcher again."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "UDCCs37IhK7S"
+ },
+ "source": [
+ "### Text Editor\n",
+ "\n",
+ "Click on the “Text File” button in the Launcher.\n",
+ "Select the File Browser tab to see the new file `untitled.txt` you created.\n",
+ "\n",
+ "Enter this Python code into the new text file:\n",
+ "``` python\n",
+ "s = 'Hello, World!'\n",
+ "print(s)\n",
+ "```\n",
+ "\n",
+ "You may notice that the file has a dot instead of an \"X\" where you'd close it. This indicates that the file hasn't been saved or has unsaved changes. Save the file (\"command+s\" on Mac, \"control+s\" on Windows, or \"File▶Save Text\").\n",
+ "\n",
+ "Go to the File Browser tab, right-click the new file we created and “Rename” it to `hello.py`. Once the extension changes to `.py`, Jupyter recognizes that this is a Python file, and the text editor highlights the code based on Python syntax.\n",
+ "\n",
+ "Now, click the “+” button in the File Browser to create a new Launcher. In the Launcher tab, click on the “Terminal” button again to create a terminal. Now you have 2 tabs open: a text editor tab and a terminal tab. Drag the Terminal tab to the right side of the main work area to view both windows simultaneously. Click the File Browser tab to collapse the left sidebar and get more real estate! Alternatively, you could stack the windows one on top of the other.\n",
+ "\n",
+ "Run `ls` in the Terminal window to see the text file we just created. Execute `python hello.py` in the Terminal window. See the output in the Terminal window.\n",
+ "\n",
+ "![Text Editor](../images/txt-editor.png)\n",
+ "\n",
+ "Now, let’s close the Terminal tab and shut down the Terminal in the Running Kernels tab (or execute “exit” in the Terminal itself). You should just have the Text editor window open; now we're ready to look at Jupyter Notebooks.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "b0Zv8pF8e7Y0"
+ },
+ "source": [
+ "## Running JupyterLab Notebooks\n",
+ "\n",
+ "There are two ways to open a Jupyter Notebook. One way is to select the File Browser Tab, click the \"New Launcher\" button, and select a Python 3 Notebook from the Launcher. Another way is to go to the top Menu Bar and select \"File▶New▶Notebook\". JupyterLab will prompt you with a dialogue box to select the Kernel you want to use in your Notebook. Select the “Python 3” kernel.\n",
+ "\n",
+ "If you have the File Browser Tab open, notice you just created a file called `Untitled.ipynb.` You will also have a new window open in the main work area for your new Notebook.\n",
+ "\n",
+ "Let’s explore the Notebook interface:\n",
+ " \n",
+ "There is a **Toolbar** at the top with buttons that allow you to Save, Create New Cells, Cut/Paste/Copy Cells, Run the Cell, Stop the Kernel, and Refresh the Kernel. There is also a **dropdown menu** to select the kind of cell (Markdown, Raw, or Code). All the way to the right is the name of your Kernel (which you can click to change Kernels) and a **Kernel Status Icon** that indicates if something is being computed by the Kernel (by a dark circle) or not (by an empty circle).\n",
+ "\n",
+ "Below the Toolbar is the Notebook itself. You should see an empty cell at the top of the Notebook. This should look similar to the layout of the Console.\n",
+ "\n",
+ "The cell can be in 1 of 2 modes: command mode or edit mode.\n",
+ "If your cell is grayed out and you can’t see a blinking cursor in the cell, then the cell is in command mode. We’ll talk about command mode more later. Click inside the box, and the cell will turn white with a blinking cursor inside it; the cell is now in edit mode. The mode is also listed on the info bar at the bottom of the page. The cell is selected if the blue bar is on the left side of the cell.\n",
+ "\n",
+ "![Notebook Interface](../images/notebook-interface_labeled.png)\n",
+ "\n",
+ "You may move the Notebook over so you can see your text file at the same time to compare, resizing the Notebook window as needed."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Dl5dbdlPwmLk"
+ },
+ "source": [
+ "### Code Cells\n",
+ "Click inside the first cell of the Notebook to switch the cell to edit mode.\n",
+ "Enter the following into the cell:\n",
+ "\n",
+ "``` python\n",
+ "print(2+2)\n",
+ "```\n",
+ "\n",
+ "Then type Shift+Enter to execute the cell.\n",
+ "\n",
+ "You'll see the output, `4`, printed directly below your code cell. Executing the cell automatically creates a new cell in edit mode below the first.\n",
+ "\n",
+ "In this new cell, enter:\n",
+ "\n",
+ "``` python\n",
+ "for i in range(4):\n",
+ " print(i) \n",
+ "```\n",
+ "\n",
+ "Execute the cell. A Jupyter code cell can run multiple lines of code; each Jupyter code cell can even contain a complete Python program! \n",
+ "\n",
+ "To demonstrate how to import code that you have written in a `.py` file, enter the following into the next cell:\n",
+ "\n",
+ "``` python\n",
+ "import hello\n",
+ "```\n",
+ "\n",
+ "Then type Shift+Enter. \n",
+ "\n",
+ "This single-line `import` statement runs the contents of your `hello.py` script file, and would do the same for any file regardless of length. \n",
+ "\n",
+ "
\n",
+ "
Warning
\n",
+ " It is generally considered bad practice to include any output in a “.py” file meant to be imported and used within different Python scripts. Such a file should contain only function and class definitions.\n",
+ "
\n",
+ "You've executed the cell with the Python 3 kernel, and it spit out the output, “Hello, World!” Since you've imported the `hello.py` module into the Notebook’s kernel runtime, you can now directly look at the variable “s” in this second cell.\n",
+ "Enter the following in the next cell:\n",
+ "\n",
+ "``` python\n",
+ "hello.s\n",
+ "```\n",
+ "\n",
+ "Hit Shift+Enter to execute.\n",
+ "\n",
+ "Again, it displays the value of the variable `s` from the “hello” module we just created. One difference is that this time the output is given its own label `[2]` matching the input label of the cell (whereas the output from cell `[1]` is not labelled). This is the difference between output sent to the screen vs. the return value of the cell.\n",
+ "\n",
+ "Let’s now import a module from the Python standard library:\n",
+ "\n",
+ "``` python\n",
+ "import time\n",
+ "time.sleep(10)\n",
+ "```\n",
+ "\n",
+ "Again, hit Shift+Enter. \n",
+ "\n",
+ "The `time.sleep(10)` function causes code to wait for 10 seconds, which is plenty of time to notice how the cell changes in time:\n",
+ " - The label of the cell is `[*]`, indicating that the kernel is running that cell \n",
+ " - In the top right corner of the Notebook, the status icon is a filled-in circle, indicating that the kernel is running\n",
+ "\n",
+ "After 10 seconds, the cell completes running, the label is updated to `[3], and the status icon returns to the “empty circle” state. If you rerun the cell, the label will update to `[4]`.\n",
+ "\n",
+ "![Code Cells](../images/codecells.png)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DTSoIqjAM4sg",
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "### Markdown Cells\n",
+ "Now, with the next cell selected (i.e., the blue bar appears to the left of the cell), whether in edit or command mode, go up to the “cell type” dropdown menu above and select “Markdown”.\n",
+ "Notice that the `[ ]` label goes away.\n",
+ "\n",
+ "Markdown is a markup language that allows you to format text in a plain-text editor. Here we will demonstrate some common Markdown syntax. You can learn more at [the Markdown Guide site](https://www.markdownguide.org/) or in our [Getting Started with Jupyter: Markdown](https://foundations.projectpythia.org/foundations/markdown.html) content.\n",
+ "Click on the cell and enter edit mode; we can now type in some markdown text like so:\n",
+ "\n",
+ "```markdown\n",
+ "# This is a heading!\n",
+ "And this is some text.\n",
+ "\n",
+ "## And this is a subheading\n",
+ "with a bulleted list in it:\n",
+ "\n",
+ " - one\n",
+ " - two\n",
+ " - three\n",
+ "```\n",
+ "\n",
+ "Then press Shift+Enter to render the markdown to HTML.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Again, in the next cell, change the cell type to “Markdown”. To demonstrate displaying equations, enter:\n",
+ "\n",
+ "```markdown\n",
+ "## Some math\n",
+ "\n",
+ "And Jupyter’s version of markdown can display LaTeX:\n",
+ "\n",
+ "$$\n",
+ "e^x=\\sum_{i=0}^{\\infty} \\frac{1}{i!}x^i\n",
+ "$$\n",
+ "```\n",
+ "\n",
+ "When you are done, type Shift+Enter to render the markdown document.\n",
+ "\n",
+ "\n",
+ "\n",
+ "You can also do inline equations with a single \"$\", for example\n",
+ "\n",
+ "```markdown\n",
+ "This is an equation: $i^4$.\n",
+ "```\n",
+ "\n",
+ "\n",
+ "\n",
+ "Note that the “markdown” source code is rendered into much prettier text, which we can take advantage of for narrating our work in the Notebook!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DPP39honNHSh",
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "### Raw Cells\n",
+ "\n",
+ "Now in a new cell selected, select “raw” from the “cell type” dropdown menu. Again, the `[ ]` label goes away, and you can enter the following in the cell:\n",
+ "\n",
+ "```text\n",
+ "i = 8\n",
+ "print(i)\n",
+ "```\n",
+ "\n",
+ "When you Shift+Enter the text isn’t rendered. \n",
+ "\n",
+ "This is a way of entering text/source that you do not want the Notebook to do anything with (i.e., no rendering). \n",
+ "\n",
+ "![Raw](../images/raw.png)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "N89Z7vdgNMKx"
+ },
+ "source": [
+ "### Command Mode Shortcuts\n",
+ "\n",
+ "Now, select the “raw” cell you just created by clicking on the far left of the cell.\n",
+ "\n",
+ "You are now in “command mode”. The up and down arrows move to different cells. Don't hit “enter” which would switch the cell to “edit mode.\" Let’s explore command mode.\n",
+ "\n",
+ "You can change the cell type with `y` for code, `m` for markdown, or `r` for raw.\n",
+ "\n",
+ "You can add a new cell above the selected cell with `a` (or below the selected cell with `b`).\n",
+ "\n",
+ "You can cut (`x`), copy (`c`), and paste (`v`).\n",
+ "\n",
+ "You can move a cell up or down by clicking and dragging. \n",
+ "\n",
+ "
\n",
+ "
Warning
\n",
+ " Cells can be executed in any order you want. You just have to select the cell and Shift+Enter, and select the cells in any order you want. However, if you share your notebook, there is an implicit expectation to execute the cells in the order in which they are presented in the notebook. Be careful with this! If variables are reused or redefined between cells, reordering them could have unintended consequences!\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KySy3X-aauaR"
+ },
+ "source": [
+ "### Special Variables\n",
+ "\n",
+ "Now, in the empty cell at the end, enter one underscore:\n",
+ "\n",
+ "```\n",
+ "_\n",
+ "```\n",
+ "\n",
+ "This is a special character that means the last cell output. Two underscores means the second to last cell output, and three underscores means the third to last output. You can also refer to the output by label with:\n",
+ "\n",
+ "```\n",
+ "_2\n",
+ "```\n",
+ "\n",
+ "
\n",
+ "
Danger
\n",
+ " If the cell you to refer to does not have a return value, this will raise an error.\n",
+ "