import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

/var/folders/gc/0752xrm56pnf0r0dsrn5370c0000gr/T/ipykernel_58267/1020607637.py:1: DeprecationWarning: 
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd

white = pd.read_csv('winequality-white.csv', sep = ';')
red = pd.read_csv('winequality-red.csv', sep = ';')

def remove_outliers(df):
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)
    IQR = Q3 - Q1
    df_out = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis = 1)]
    return df_out

white = remove_outliers(white.select_dtypes(include = [np.number]))
red = remove_outliers(red.select_dtypes(include = [np.number]))

white['wine_type'] = 'white'
red['wine_type'] = 'red'

wine = pd.concat([white, red])

wine = wine.dropna()

wine.describe()

sns.catplot(data = wine, x = 'sulphates', kind = 'box', col = 'wine_type')

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)

<seaborn.axisgrid.FacetGrid at 0x1356f3590>

sns.catplot(data = wine[['free sulfur dioxide', 'total sulfur dioxide']], kind = 'box')

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)

<seaborn.axisgrid.FacetGrid at 0x135867710>

sns.catplot(data = wine, x = 'alcohol', kind = 'box', col = 'quality')

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)

<seaborn.axisgrid.FacetGrid at 0x1357a7990>

sns.catplot(data = wine[['citric acid', 'volatile acidity']], kind = 'box')

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)

<seaborn.axisgrid.FacetGrid at 0x135b11050>

sns.catplot(data = wine, x = 'chlorides', kind = 'box')

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)

<seaborn.axisgrid.FacetGrid at 0x135a8e4d0>

sns.catplot(data = wine, x = 'pH', kind = 'box')

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)

<seaborn.axisgrid.FacetGrid at 0x135c0f990>

sns.catplot(data = wine, x = 'residual sugar', kind = 'box', col = 'wine_type')

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)

<seaborn.axisgrid.FacetGrid at 0x135c5afd0>

sns.catplot(data = wine, x = 'fixed acidity', kind = 'box')

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)

<seaborn.axisgrid.FacetGrid at 0x135d27990>

sns.catplot(data = wine, x = 'density', kind = 'box')

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/categorical.py:640: FutureWarning: SeriesGroupBy.grouper is deprecated and will be removed in a future version of pandas.
  positions = grouped.grouper.result_index.to_numpy(dtype=float)

<seaborn.axisgrid.FacetGrid at 0x135d50850>

plt.figure(figsize = (9, 7))
sns.heatmap(wine.drop(columns = 'wine_type').corr(), annot = True, cmap = 'vlag')

<Axes: >

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	quality
count	5037.000000	5037.000000	5037.000000	5037.000000	5037.000000	5037.000000	5037.000000	5037.000000	5037.000000	5037.000000	5037.000000	5037.000000
mean	7.128956	0.324697	0.305950	5.408785	0.050942	30.120409	115.174509	0.994540	3.218817	0.516831	10.499715	5.815168
std	1.113448	0.150817	0.119913	4.686900	0.018869	16.174677	55.619411	0.002865	0.148287	0.120583	1.147178	0.754510
min	4.800000	0.080000	0.000000	0.600000	0.015000	1.000000	6.000000	0.987110	2.820000	0.220000	8.400000	4.000000
25%	6.400000	0.220000	0.250000	1.800000	0.038000	17.000000	78.000000	0.992150	3.120000	0.430000	9.500000	5.000000
50%	6.900000	0.280000	0.300000	2.800000	0.047000	29.000000	117.000000	0.994800	3.210000	0.500000	10.400000	6.000000
75%	7.600000	0.380000	0.380000	8.100000	0.059000	41.000000	155.000000	0.996800	3.320000	0.590000	11.300000	6.000000
max	12.300000	1.005000	0.730000	22.000000	0.119000	80.000000	255.000000	1.001960	3.680000	0.980000	14.200000	7.000000

Introduction¶

Preprocessing¶

Summary Data Analysis¶

Statistical Summary¶

Correlations¶

Discussion¶