From 66ab16bef676679b99e24c569f1299aa6162ee7a Mon Sep 17 00:00:00 2001 From: Saloni029 <100422560+Saloni029@users.noreply.github.com> Date: Sat, 2 Jul 2022 17:56:30 +0530 Subject: [PATCH 01/13] Update SOLUTION.md --- Minor Project Submissions/SOLUTION.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Minor Project Submissions/SOLUTION.md b/Minor Project Submissions/SOLUTION.md index 978c4b2..e9dee57 100644 --- a/Minor Project Submissions/SOLUTION.md +++ b/Minor Project Submissions/SOLUTION.md @@ -1,2 +1,23 @@ # SOLUTION +Jupyter Notebook Link: + + +1. +Quantitative variables=Data with numerical variables are considered quantitative data (e.g. how many; how much; or how often) +Example-In honey production dataset numcol, yieldpercol, totalprod, stocks, priceperlb, prodvalue are quantitative variables. + +Qualitative variables=Qualitative data are data about categorical variables (e.g. what type). +They may be represented by a name, symbol, or a number code. +Example-In honey production dataset, state is the qualitative variables. + +2. From correlation graph of numcol one can infer that as number of colonies increases production value also increases over the year. +In case of yieldpercol, one can infer that most of the production value of yield ranges from 40 to 100. +In case of + + +3.There is no state named as "US" in the dataset + + +7. From the last second graph you can clear see that production value is decreasing and from last graph you can infer that total production is the cause for its declining followed by stocks. + From 141949730ec8df07b68693f210a01d60943af42c Mon Sep 17 00:00:00 2001 From: Saloni029 <100422560+Saloni029@users.noreply.github.com> Date: Sat, 2 Jul 2022 17:57:02 +0530 Subject: [PATCH 02/13] Add files via upload --- .../SaloniChoudhary_Honeyproduction.html | 15700 ++++++++++++++++ 1 file changed, 15700 insertions(+) create mode 100644 Minor Project Submissions/SaloniChoudhary_Honeyproduction.html diff --git a/Minor Project Submissions/SaloniChoudhary_Honeyproduction.html b/Minor Project Submissions/SaloniChoudhary_Honeyproduction.html new file mode 100644 index 0000000..1590db1 --- /dev/null +++ b/Minor Project Submissions/SaloniChoudhary_Honeyproduction.html @@ -0,0 +1,15700 @@ + + +
+ + +import pandas as pd
+import numpy as np
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+%matplotlib inline
+import warnings
+warnings.filterwarnings("ignore")
+
data = pd.read_csv('honeyproduction.csv')
+data.head(300)
+
+ | state | +numcol | +yieldpercol | +totalprod | +stocks | +priceperlb | +prodvalue | +year | +
---|---|---|---|---|---|---|---|---|
0 | +AL | +16000.0 | +71 | +1136000.0 | +159000.0 | +0.72 | +818000.0 | +1998 | +
1 | +AZ | +55000.0 | +60 | +3300000.0 | +1485000.0 | +0.64 | +2112000.0 | +1998 | +
2 | +AR | +53000.0 | +65 | +3445000.0 | +1688000.0 | +0.59 | +2033000.0 | +1998 | +
3 | +CA | +450000.0 | +83 | +37350000.0 | +12326000.0 | +0.62 | +23157000.0 | +1998 | +
4 | +CO | +27000.0 | +72 | +1944000.0 | +1594000.0 | +0.70 | +1361000.0 | +1998 | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +
295 | +UT | +24000.0 | +70 | +1680000.0 | +554000.0 | +1.10 | +1848000.0 | +2004 | +
296 | +VT | +6000.0 | +68 | +408000.0 | +192000.0 | +1.51 | +616000.0 | +2004 | +
297 | +VA | +7000.0 | +38 | +266000.0 | +69000.0 | +2.10 | +559000.0 | +2004 | +
298 | +WA | +56000.0 | +63 | +3528000.0 | +1376000.0 | +0.98 | +3457000.0 | +2004 | +
299 | +WV | +9000.0 | +55 | +495000.0 | +183000.0 | +1.41 | +698000.0 | +2004 | +
300 rows × 8 columns
+data.tail()
+
+ | state | +numcol | +yieldpercol | +totalprod | +stocks | +priceperlb | +prodvalue | +year | +
---|---|---|---|---|---|---|---|---|
621 | +VA | +4000.0 | +41 | +164000.0 | +23000.0 | +3.77 | +618000.0 | +2012 | +
622 | +WA | +62000.0 | +41 | +2542000.0 | +1017000.0 | +2.38 | +6050000.0 | +2012 | +
623 | +WV | +6000.0 | +48 | +288000.0 | +95000.0 | +2.91 | +838000.0 | +2012 | +
624 | +WI | +60000.0 | +69 | +4140000.0 | +1863000.0 | +2.05 | +8487000.0 | +2012 | +
625 | +WY | +50000.0 | +51 | +2550000.0 | +459000.0 | +1.87 | +4769000.0 | +2012 | +
data.describe()
+
+ | numcol | +yieldpercol | +totalprod | +stocks | +priceperlb | +prodvalue | +year | +
---|---|---|---|---|---|---|---|
count | +626.000000 | +626.000000 | +6.260000e+02 | +6.260000e+02 | +626.000000 | +6.260000e+02 | +626.000000 | +
mean | +60284.345048 | +62.009585 | +4.169086e+06 | +1.318859e+06 | +1.409569 | +4.715741e+06 | +2004.864217 | +
std | +91077.087231 | +19.458754 | +6.883847e+06 | +2.272964e+06 | +0.638599 | +7.976110e+06 | +4.317306 | +
min | +2000.000000 | +19.000000 | +8.400000e+04 | +8.000000e+03 | +0.490000 | +1.620000e+05 | +1998.000000 | +
25% | +9000.000000 | +48.000000 | +4.750000e+05 | +1.430000e+05 | +0.932500 | +7.592500e+05 | +2001.000000 | +
50% | +26000.000000 | +60.000000 | +1.533000e+06 | +4.395000e+05 | +1.360000 | +1.841500e+06 | +2005.000000 | +
75% | +63750.000000 | +74.000000 | +4.175250e+06 | +1.489500e+06 | +1.680000 | +4.703250e+06 | +2009.000000 | +
max | +510000.000000 | +136.000000 | +4.641000e+07 | +1.380000e+07 | +4.150000 | +6.961500e+07 | +2012.000000 | +
data.isnull().sum()
+
state 0 +numcol 0 +yieldpercol 0 +totalprod 0 +stocks 0 +priceperlb 0 +prodvalue 0 +year 0 +dtype: int64+
type(data)
+
pandas.core.frame.DataFrame+
data.shape
+
(626, 8)+
data.columns
+
Index(['state', 'numcol', 'yieldpercol', 'totalprod', 'stocks', 'priceperlb', + 'prodvalue', 'year'], + dtype='object')+
data.dtypes
+
state object +numcol float64 +yieldpercol int64 +totalprod float64 +stocks float64 +priceperlb float64 +prodvalue float64 +year int64 +dtype: object+
data['numcol']
+
0 16000.0 +1 55000.0 +2 53000.0 +3 450000.0 +4 27000.0 + ... +621 4000.0 +622 62000.0 +623 6000.0 +624 60000.0 +625 50000.0 +Name: numcol, Length: 626, dtype: float64+
# 2. Finding relationship between numerical variables using pair plot
+sns.pairplot(data);
+
# 2. Finding relationship between numerical variables using pair plot
+sns.pairplot(data, hue='state', palette="husl")
+
<seaborn.axisgrid.PairGrid at 0x20455078d90>+
# 3. Correlation plots
+sns.regplot(x=data["numcol"], y=data["yieldpercol"],color="red")
+sns.regplot(x=data["numcol"], y=data["totalprod"],color="blue")
+sns.regplot(x=data["numcol"], y=data["stocks"],color="pink")
+sns.regplot(x=data["numcol"], y=data["priceperlb"],color="purple")
+sns.regplot(x=data["numcol"], y=data["prodvalue"],color="green")
+sns.regplot(x=data["numcol"], y=data["year"],color="maroon")
+
<AxesSubplot:xlabel='numcol', ylabel='year'>+
sns.regplot(x=data["yieldpercol"], y=data["numcol"],color="red")
+sns.regplot(x=data["yieldpercol"], y=data["totalprod"],color="skyblue")
+sns.regplot(x=data["yieldpercol"], y=data["stocks"],color="black")
+sns.regplot(x=data["yieldpercol"], y=data["priceperlb"],color="gold")
+sns.regplot(x=data["yieldpercol"], y=data["prodvalue"],color="orange")
+sns.regplot(x=data["yieldpercol"], y=data["year"],color="green")
+
<AxesSubplot:xlabel='yieldpercol', ylabel='year'>+
sns.regplot(x=data["totalprod"], y=data["numcol"],color="black")
+sns.regplot(x=data["totalprod"], y=data["yieldpercol"],color="red")
+sns.regplot(x=data["totalprod"], y=data["stocks"],color="blue")
+sns.regplot(x=data["totalprod"], y=data["priceperlb"],color="gold")
+sns.regplot(x=data["totalprod"], y=data["prodvalue"],color="green")
+sns.regplot(x=data["totalprod"], y=data["year"],color="purple")
+
<AxesSubplot:xlabel='totalprod', ylabel='year'>+
sns.regplot(x=data["stocks"], y=data["numcol"],color="red")
+sns.regplot(x=data["stocks"], y=data["yieldpercol"],color="brown")
+sns.regplot(x=data["stocks"], y=data["totalprod"],color="purple")
+sns.regplot(x=data["stocks"], y=data["priceperlb"],color="black")
+sns.regplot(x=data["stocks"], y=data["prodvalue"],color="green")
+sns.regplot(x=data["stocks"], y=data["year"],color="gold")
+
<AxesSubplot:xlabel='stocks', ylabel='year'>+
sns.regplot(x=data["priceperlb"], y=data["numcol"],color="gold")
+sns.regplot(x=data["priceperlb"], y=data["yieldpercol"],color="red")
+sns.regplot(x=data["priceperlb"], y=data["totalprod"],color="blue")
+sns.regplot(x=data["priceperlb"], y=data["stocks"],color="purple")
+sns.regplot(x=data["priceperlb"], y=data["prodvalue"],color="maroon")
+sns.regplot(x=data["priceperlb"], y=data["year"],color="green")
+
<AxesSubplot:xlabel='priceperlb', ylabel='year'>+
sns.regplot(x=data["prodvalue"], y=data["numcol"],color="blue")
+sns.regplot(x=data["prodvalue"], y=data["yieldpercol"],color="red")
+sns.regplot(x=data["prodvalue"], y=data["totalprod"],color="black")
+sns.regplot(x=data["prodvalue"], y=data["priceperlb"],color="purple")
+sns.regplot(x=data["prodvalue"], y=data["stocks"],color="green")
+sns.regplot(x=data["prodvalue"], y=data["year"],color="orange")
+
<AxesSubplot:xlabel='prodvalue', ylabel='year'>+
sns.regplot(x=data["year"], y=data["numcol"],color="blue")
+sns.regplot(x=data["year"], y=data["yieldpercol"],color="red")
+sns.regplot(x=data["year"], y=data["totalprod"],color="purple")
+sns.regplot(x=data["year"], y=data["priceperlb"],color="black")
+sns.regplot(x=data["year"], y=data["stocks"],color="green")
+sns.regplot(x=data["year"], y=data["prodvalue"],color="orange")
+
<AxesSubplot:xlabel='year', ylabel='prodvalue'>+
# 3.
+a=np.array(data["state"])
+print(a)
+
['AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' 'IL' 'IN' 'IA' 'KS' 'KY' + 'LA' 'ME' 'MD' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NV' 'NJ' 'NM' 'NY' 'NC' + 'ND' 'OH' 'OK' 'OR' 'PA' 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' + 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' 'IL' 'IN' 'IA' 'KS' + 'KY' 'LA' 'ME' 'MD' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NV' 'NJ' 'NM' 'NY' + 'NC' 'ND' 'OH' 'OK' 'OR' 'PA' 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' + 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' 'IL' 'IN' 'IA' + 'KS' 'KY' 'LA' 'ME' 'MD' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NV' 'NJ' 'NM' + 'NY' 'NC' 'ND' 'OH' 'OK' 'OR' 'PA' 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' 'WA' + 'WV' 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' 'IL' 'IN' + 'IA' 'KS' 'KY' 'LA' 'ME' 'MD' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NV' 'NJ' + 'NM' 'NY' 'NC' 'ND' 'OH' 'OK' 'OR' 'PA' 'SC' 'SD' 'TN' 'TX' 'UT' 'VT' + 'VA' 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' + 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MD' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' + 'NV' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OK' 'OR' 'PA' 'SC' 'SD' 'TN' 'TX' + 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' + 'HI' 'ID' 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MD' 'MI' 'MN' 'MS' 'MO' + 'MT' 'NE' 'NV' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OK' 'OR' 'PA' 'SC' 'SD' + 'TN' 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' + 'FL' 'GA' 'HI' 'ID' 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MI' 'MN' 'MS' + 'MO' 'MT' 'NE' 'NV' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OR' 'PA' 'SD' 'TN' + 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' + 'GA' 'HI' 'ID' 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MI' 'MN' 'MS' 'MO' + 'MT' 'NE' 'NV' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OR' 'PA' 'SD' 'TN' 'TX' + 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' + 'HI' 'ID' 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MI' 'MN' 'MS' 'MO' 'MT' + 'NE' 'NV' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OR' 'PA' 'SD' 'TN' 'TX' 'UT' + 'VT' 'VA' 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' + 'ID' 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' + 'NV' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OR' 'PA' 'SD' 'TN' 'TX' 'UT' 'VT' + 'VA' 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' + 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NV' + 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OR' 'PA' 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' + 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' 'IL' + 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NJ' 'NM' + 'NY' 'NC' 'ND' 'OH' 'OR' 'PA' 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' + 'WI' 'WY' 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' 'IL' 'IN' 'IA' + 'KS' 'KY' 'LA' 'ME' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NJ' 'NM' 'NY' 'NC' + 'ND' 'OH' 'OR' 'PA' 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' 'WY' + 'AL' 'AZ' 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' 'IL' 'IN' 'IA' 'KS' 'KY' + 'LA' 'ME' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' + 'OR' 'PA' 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' 'WY' 'AL' 'AZ' + 'AR' 'CA' 'CO' 'FL' 'GA' 'HI' 'ID' 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' + 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OR' 'PA' + 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' 'WY'] ++
#5. Variation in number of colonies over the year
+#This graph shows maximum no of colonies were in 2010 and the steep line between each year i.e between 1998 to 1999 then between
+#1999 to 2000 shows that they also decrease a little bit between every year.
+plt.plot(data['year'],data['numcol'],color='blue')
+plt.ylabel("Number of colonies")
+plt.xlabel("Year")
+plt.show()
+
#This graph shows that overall trend of honey production was highest in 2002 followed by 1998 followed by 2008 and 2012
+sns.histplot(data, x="year", color="skyblue")
+
<AxesSubplot:xlabel='year', ylabel='Count'>+
#This graph shows variation in number of colonies after every 2 years.And It shows that in most of the year number of colonies
+#increase mostly from a range of 0 to 100000. They also increase till 500000 but its very less in ratio.
+sns.scatterplot(data['year'],data['numcol'])
+
<AxesSubplot:xlabel='year', ylabel='numcol'>+
#This graph shows that every year a minimum of 80 yield per colony is produced.
+plt.bar(data['year'],data['yieldpercol'],width=0.5)
+plt.xlabel("Year")
+plt.ylabel("Yield per colony")
+plt.show()
+
plt.plot(data['year'],data['yieldpercol'],color='red')
+plt.ylabel("Yield per colony")
+plt.xlabel("Year")
+plt.show()
+
plt.plot(data['year'],data['prodvalue'],color='skyblue')
+plt.xlabel("Production value")
+plt.ylabel("Year")
+plt.show()
+
#7.
+sns.histplot(data, x="prodvalue", color="teal")
+
<AxesSubplot:xlabel='prodvalue', ylabel='Count'>+
sns.regplot(x=data["prodvalue"], y=data["numcol"],color="blue")
+sns.regplot(x=data["prodvalue"], y=data["yieldpercol"],color="red")
+sns.regplot(x=data["prodvalue"], y=data["totalprod"],color="black")
+sns.regplot(x=data["prodvalue"], y=data["priceperlb"],color="purple")
+sns.regplot(x=data["prodvalue"], y=data["stocks"],color="green")
+sns.regplot(x=data["prodvalue"], y=data["year"],color="orange")
+
<AxesSubplot:xlabel='prodvalue', ylabel='year'>+
+
#v)>VJkP
zX%dMUVh_}Y%!Ez4>~W7y_pY9iXYHjz 2gZ$&3>kH9r_8tZ jF^Z*@r`5_OG|~ gm!n#&cjAti-(@m{48s*
ztk99sW?Zr)>~`T6y`j~_W1&u`<^vq4QKVO>bUN8!Yzjp~o%QEZ_&2e%l73VVeeD(s
zN|%i~N!WHKmz#{ikXlN8=S5Yqsb9$x@2Al*CM$On!_vue7fmxPZO%o1)TDWZE9ERt?IT>?*xPLpcp
zx9K-ch6y5TPHm!vm1TnPA{MJw@iyg|uB4-&kCXLo2sYW?@O{!%cL`9sUvO0;n{7?5
zCT_
+^$Q-Y5Nu6_v=k{Cl;V5hX(4VweVL-ABHvg>VU32FI;<2?xLm>w2A
zOR~4H>pJMbg
nF^`h=GkgA-Rs`AAY?_+r&h=xXdNl6=d5y+
s
zKbG{EO?x70{rM41jpv8R^Vldk4pUW_;AveMo
zOt()J08i@mbkMnM`%%o|7VutZiz(cLWEtdK#0mXr8AF&+P(DQ>wyvY@^v_r5|0pbX
z;VNYl;JA&t=YrR}X5;B*el@Xs>K4ypxg@_6Uaym`fvv01IjuRoMwgjlBcI$?OoL@3
zOn;bcjC}mE)Ba
LvVLM|@JA@2#qf
zIcCO-$?sH9NBZznQK^M|_>VD#(&~PKq-}p2ge;U3?F0JP0lfezikIPE+cDKW)jTSt
zx!|%8m>ai)GaKZ1)0V5uvPbU2!}ZTY$)`djgujBe5lM`YH}%Y+^g6i&U%LhvyW`Fn
z#kl!*a=+cu|3GH><(OtE_|eIEQ*qt5uE?gU5fayQ+kJUSPHA_JPE`m!yjAIts2#fw
zw+HJpl9dq;rR!$7cKGy9#w+yS7)SZaC&2bE!%xHlWCYNR=O