import numpy as np
import pandas as pd
import seaborn as sbn

/var/folders/gc/0752xrm56pnf0r0dsrn5370c0000gr/T/ipykernel_49923/2255310124.py:2: DeprecationWarning: 
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd

water_quality = None

water_quality = pd.read_csv("WaterQualityData.csv")

water_quality

water_quality = water_quality.drop(['Unit_Id', 'Air Temp-Celsius', 'Time (24:00)', 'Field_Tech', 'DateVerified', 'WhoVerified', 'AirTemp (C)','Year'], axis=1)

water_quality = water_quality.set_index('Read_Date')

water_quality

water_quality['Water Temp (?C)'] = water_quality['Water Temp (?C)'] * 9/5 + 32

water_quality = water_quality.rename(columns={'Water Temp (?C)': 'Water Temp (F)', 'Air Temp (?F)': 'Air Temp (F)'})

water_quality

water_quality = water_quality.dropna()

print(water_quality["Site_Id"].value_counts())

water_quality

Site_Id
Bay    661
D      195
B      164
A      156
C      100
Name: count, dtype: int64

by_pool = water_quality.groupby("Site_Id")

def is_outlier(x):
    Q25, Q75 = x.quantile([.25,.75])
    I = Q75 - Q25
    return (x < Q25 - 1.5*I) |  (x > Q75 + 1.5*I)

outliers_salt = by_pool["Salinity (ppt)"].transform(is_outlier)
print("Salinity Outliers")
print(water_quality.loc[outliers_salt,"Site_Id"].value_counts())
print("")

outliers_DO = by_pool["Dissolved Oxygen (mg/L)"].transform(is_outlier)
print("Dissolved Oxygen Outliers")
print(water_quality.loc[outliers_DO,"Site_Id"].value_counts())
print("")

outliers_pH = by_pool["pH (standard units)"].transform(is_outlier)
print("pH Outliers")
print(water_quality.loc[outliers_pH,"Site_Id"].value_counts())
print("")

outliers_turb = by_pool["Secchi Depth (m)"].transform(is_outlier)
print("Secchi Depth Outliers")
print(water_quality.loc[outliers_turb,"Site_Id"].value_counts())
print("")

outliers_depth = by_pool["Water Depth (m)"].transform(is_outlier)
print("Water Depth Outliers")
print(water_quality.loc[outliers_depth,"Site_Id"].value_counts())
print("")

outliers_watemp = by_pool["Water Temp (F)"].transform(is_outlier)
print("Water Temp Outliers")
print(water_quality.loc[outliers_watemp,"Site_Id"].value_counts())
print("")

outliers_airtemp = by_pool["Air Temp (F)"].transform(is_outlier)
print("Air Temp Outliers")
print("None of the pools have outliers for air temperature")
print(water_quality.loc[outliers_airtemp,"Site_Id"].value_counts())

Salinity Outliers
Site_Id
B      39
A      38
D      18
C       6
Bay     3
Name: count, dtype: int64

Dissolved Oxygen Outliers
Site_Id
Bay    2
C      2
Name: count, dtype: int64

pH Outliers
Site_Id
D      71
A      25
B       2
Bay     2
C       1
Name: count, dtype: int64

Secchi Depth Outliers
Site_Id
Bay    22
A       5
B       3
D       3
Name: count, dtype: int64

Water Depth Outliers
Site_Id
Bay    16
D       7
B       4
A       2
C       2
Name: count, dtype: int64

Water Temp Outliers
Site_Id
Bay    2
A      1
D      1
Name: count, dtype: int64

Air Temp Outliers
None of the pools have outliers for air temperature
Series([], Name: count, dtype: int64)

print("Water Temp")
print(by_pool["Water Temp (F)"].describe())
print("")
print("Air Temp")
print(by_pool["Air Temp (F)"].describe())
print("")
print("Water Depth")
print(by_pool["Water Depth (m)"].describe())
sbn.catplot(data=water_quality, x="Water Temp (F)", y="Site_Id", kind="violin")
sbn.catplot(data=water_quality, x="Air Temp (F)", y="Site_Id", kind="violin")
sbn.catplot(data=water_quality, x="Water Depth (m)", y="Site_Id", kind="violin")

Water Temp
         count       mean        std    min   25%   50%   75%    max
Site_Id                                                             
A        156.0  66.926923  15.469368  37.40  53.6  69.8  78.8  140.0
B        164.0  65.093659  14.799033  35.60  53.6  68.0  78.8   94.1
Bay      661.0  63.268015  14.446407  34.34  51.8  63.5  76.1  165.2
C        100.0  64.895000  15.905595  33.80  50.0  66.2  78.8  109.4
D        195.0  64.742462  15.634240  32.00  52.7  66.2  78.8  129.2

Air Temp
         count       mean        std   min    25%   50%    75%   max
Site_Id                                                             
A        156.0  63.247179  15.805530  26.0  50.75  66.0  77.00  88.0
B        164.0  61.449146  15.902048  23.0  48.15  63.0  75.00  89.0
Bay      661.0  62.236436  14.725684  25.0  50.00  62.6  75.20  92.3
C        100.0  61.693000  16.103549  11.5  48.00  62.5  74.25  88.0
D        195.0  61.971077  15.512472  12.0  48.00  63.0  75.00  88.0

Water Depth
         count      mean       std   min  25%   50%  75%  max
Site_Id                                                      
A        156.0  0.816026  0.569040  0.10  0.6  0.80  1.0  7.0
B        164.0  0.329634  0.399357  0.01  0.2  0.30  0.4  4.5
Bay      661.0  0.698570  0.547189  0.03  0.5  0.66  0.8  7.5
C        100.0  0.451700  0.220491  0.10  0.3  0.45  0.6  1.5
D        195.0  1.355026  0.273612  0.13  1.2  1.40  1.5  2.1

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)

<seaborn.axisgrid.FacetGrid at 0x12f2db810>

print("Salinity")
print(by_pool["Salinity (ppt)"].describe())
print("")
print("Dissolved Oxygen")
print(by_pool["Dissolved Oxygen (mg/L)"].describe())
sbn.catplot(data=water_quality, x="Salinity (ppt)", y="Site_Id", kind="violin")
sbn.catplot(data=water_quality, x="Dissolved Oxygen (mg/L)", y="Site_Id", kind="violin")

Salinity
         count      mean       std  min  25%  50%    75%  max
Site_Id                                                      
A        156.0  0.347692  0.760398  0.0  0.0  0.0  0.000  3.0
B        164.0  0.432927  0.872421  0.0  0.0  0.0  0.275  4.0
Bay      661.0  1.489713  1.593030  0.0  0.0  1.0  2.900  9.0
C        100.0  0.501100  0.846263  0.0  0.0  0.0  1.000  3.5
D        195.0  0.091487  0.318060  0.0  0.0  0.0  0.000  2.0

Dissolved Oxygen
         count      mean       std  min    25%  50%    75%   max
Site_Id                                                         
A        156.0  5.383974  2.002458  0.0  3.900  5.0  6.800  10.0
B        164.0  5.401585  2.163687  0.0  3.800  5.3  7.025  10.0
Bay      661.0  7.716430  2.461330  0.1  6.000  7.8  9.500  15.1
C        100.0  5.563000  1.782273  1.9  4.375  5.3  6.425   9.9
D        195.0  6.466410  2.098069  2.0  5.000  6.1  8.100  10.3

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)

<seaborn.axisgrid.FacetGrid at 0x12f5027d0>

print("Secchi Depth")
print(by_pool["Secchi Depth (m)"].describe())
print("")
print("pH")
print(by_pool["pH (standard units)"].describe())
sbn.catplot(data=water_quality, x="Secchi Depth (m)", y="Site_Id", kind="violin")
sbn.catplot(data=water_quality, x="pH (standard units)", y="Site_Id", kind="violin")

Secchi Depth
         count      mean       std   min     25%    50%   75%  max
Site_Id                                                           
A        156.0  0.607051  0.452928  0.00  0.4375  0.575  0.70  5.5
B        164.0  0.306890  0.367354  0.00  0.2000  0.250  0.40  4.0
Bay      661.0  0.407322  0.469375  0.03  0.2500  0.350  0.45  7.4
C        100.0  0.422600  0.176416  0.10  0.3000  0.400  0.55  0.9
D        195.0  0.889128  0.341444  0.10  0.7000  0.900  1.10  2.0

pH
         count      mean       std  min  25%  50%    75%  max
Site_Id                                                      
A        156.0  6.935897  0.856772  0.3  6.5  7.0  7.000  9.2
B        164.0  7.312195  0.728372  6.0  7.0  7.0  7.825  9.7
Bay      661.0  7.377080  0.767799  5.7  7.0  7.0  8.000  9.9
C        100.0  7.300000  0.654742  6.0  7.0  7.0  7.900  9.5
D        195.0  6.696410  0.733188  5.0  6.5  6.5  6.500  9.3

/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
/Users/driscoll/mambaforge/envs/219/lib/python3.11/site-packages/seaborn/_base.py:949: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)

<seaborn.axisgrid.FacetGrid at 0x12f65fa50>

correlation_by_pool = by_pool.apply(lambda x: x['Salinity (ppt)'].corr(x['pH (standard units)'], method='spearman'))
print("Correlation between Salinity and pH")
print("")
print(correlation_by_pool)
sbn.relplot( data=water_quality, x="Salinity (ppt)", y="pH (standard units)", kind = "scatter", col = "Site_Id")

/var/folders/gc/0752xrm56pnf0r0dsrn5370c0000gr/T/ipykernel_49923/1049744730.py:1: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  correlation_by_pool = by_pool.apply(lambda x: x['Salinity (ppt)'].corr(x['pH (standard units)'], method='spearman'))

Correlation between Salinity and pH

Site_Id
A      0.462851
B      0.356092
Bay    0.308925
C      0.411849
D     -0.062838
dtype: float64

<seaborn.axisgrid.FacetGrid at 0x12f6c7cd0>

correlation_by_pool = by_pool.apply(lambda x: x['Dissolved Oxygen (mg/L)'].corr(x['Secchi Depth (m)'], method='spearman'))
print("Correlation between Dissolved Oxygen and Secchi Depth")
print("")
print(correlation_by_pool)
sbn.relplot( data=water_quality, x="Dissolved Oxygen (mg/L)", y="Secchi Depth (m)", kind = "scatter", col = "Site_Id")

/var/folders/gc/0752xrm56pnf0r0dsrn5370c0000gr/T/ipykernel_49923/1982946106.py:1: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  correlation_by_pool = by_pool.apply(lambda x: x['Dissolved Oxygen (mg/L)'].corr(x['Secchi Depth (m)'], method='spearman'))

Correlation between Dissolved Oxygen and Secchi Depth

Site_Id
A      0.228879
B      0.172859
Bay   -0.402239
C      0.117189
D      0.533812
dtype: float64

<seaborn.axisgrid.FacetGrid at 0x12f90f350>

correlation_by_pool = by_pool.apply(lambda x: x['Dissolved Oxygen (mg/L)'].corr(x['Water Temp (F)'], method='spearman'))
print("Correlation between Dissolved Oxygen and Water Temp")
print("")
print(correlation_by_pool)
sbn.relplot( data=water_quality, x="Dissolved Oxygen (mg/L)", y="Water Temp (F)", kind = "scatter", col = "Site_Id")

/var/folders/gc/0752xrm56pnf0r0dsrn5370c0000gr/T/ipykernel_49923/2992145461.py:1: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  correlation_by_pool = by_pool.apply(lambda x: x['Dissolved Oxygen (mg/L)'].corr(x['Water Temp (F)'], method='spearman'))

Correlation between Dissolved Oxygen and Water Temp

Site_Id
A     -0.591336
B     -0.567870
Bay   -0.487778
C     -0.580896
D     -0.792397
dtype: float64

<seaborn.axisgrid.FacetGrid at 0x12fb23990>

correlation_by_pool = by_pool.apply(lambda x: x['pH (standard units)'].corr(x['Water Depth (m)'], method='spearman'))
print("Correlation between pH and Water Depth")
print("")
print(correlation_by_pool)
sbn.relplot( data=water_quality, x="pH (standard units)", y="Water Depth (m)", kind = "scatter", col = "Site_Id")

/var/folders/gc/0752xrm56pnf0r0dsrn5370c0000gr/T/ipykernel_49923/1086740621.py:1: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  correlation_by_pool = by_pool.apply(lambda x: x['pH (standard units)'].corr(x['Water Depth (m)'], method='spearman'))

Correlation between pH and Water Depth

Site_Id
A      0.000110
B     -0.012994
Bay    0.093002
C     -0.183627
D     -0.275356
dtype: float64

<seaborn.axisgrid.FacetGrid at 0x12fc5f350>

	Site_Id	Unit_Id	Read_Date	Salinity (ppt)	Dissolved Oxygen (mg/L)	pH (standard units)	Secchi Depth (m)	Water Depth (m)	Water Temp (?C)	Air Temp-Celsius	Air Temp (?F)	Time (24:00)	Field_Tech	DateVerified	WhoVerified	AirTemp (C)	Year
0	Bay	NaN	1/3/1994	1.3	11.7	7.3	0.40	0.40	5.9	8.0	46.40	11:00	NaN	NaN	NaN	8.000000	1994
1	Bay	NaN	1/31/1994	1.5	12.0	7.4	0.20	0.35	3.0	2.6	36.68	11:30	NaN	NaN	NaN	2.600000	1994
2	Bay	NaN	2/7/1994	1.0	10.5	7.2	0.25	0.60	5.9	7.6	45.68	9:45	NaN	NaN	NaN	7.600000	1994
3	Bay	NaN	2/23/1994	1.0	10.1	7.4	0.35	0.50	10.0	2.7	36.86	NaN	NaN	NaN	NaN	2.700000	1994
4	Bay	NaN	2/28/1994	1.0	12.6	7.2	0.20	0.40	1.6	0.0	32.00	10:30	NaN	NaN	NaN	0.000000	1994
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2366	Bay	NaN	10/11/2018	1.9	5.0	7.0	4.00	1.20	25.0	NaN	78.00	09:30	Sue Poe	11/13/2019	Christine Folks	25.555556	2018
2367	Bay	NaN	10/24/2018	0.0	9.0	7.0	0.30	0.60	18.0	NaN	58.00	09:30	Sue Poe	11/13/2019	Christine Folks	14.444444	2018
2368	Bay	NaN	10/28/2018	0.9	2.9	7.0	0.40	0.90	13.0	NaN	49.00	09:20	Sue Poe	11/13/2019	Christine Folks	9.444444	2018
2369	Bay	NaN	11/7/2018	1.7	NaN	7.0	0.45	0.90	20.0	NaN	65.00	09:45	Sue Poe	11/13/2019	Christine Folks	18.333333	2018
2370	Bay	NaN	12/11/2018	0.1	NaN	7.0	0.10	0.10	10.0	NaN	42.00	09:40	Sue Poe	11/13/2019	Christine Folks	5.555556	2018

Introduction¶

Preprocessing¶

Summary Data Analysis¶

Discussion¶