import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

import numpy as np
import pandas as pd
import seaborn as sns

allNFL = pd.read_csv("nfl_elo.csv")
allNFL.head()

from numpy import NaN


nfl = allNFL.loc[allNFL["date"] >= "2017-09-07"]
nfl  = nfl.drop("neutral",axis=1)
nfl = nfl.drop("importance",axis=1)
nfl = nfl.drop("total_rating",axis=1)
nfl = nfl.fillna("n") 
#replaces the NaN's in the playoff column with n which represents a non-playoff game

cols = ["elo1_pre","elo2_pre","elo_prob1","elo_prob2","elo1_post","elo2_post",
        "qbelo1_pre","qbelo2_pre","qb1_value_pre","qb2_value_pre","qb1_adj","qb2_adj",
        "qbelo_prob1","qbelo_prob2","qb1_game_value","qb2_game_value","qb1_value_post",
        "qb2_value_post","qbelo1_post","qbelo2_post","score1","score2","quality"]
#outlier detection
def is_outlier(x):
    Q25, Q75 = x.quantile([.25,.75])
    I = Q75 - Q25
    return (x < Q25 - 1.5*I) |  (x > Q75 + 1.5*I)

outl = nfl[cols].apply(is_outlier)
for col in cols:
    out =  nfl.loc[outl[col],col]
    print( "outliers in " + col)
    if not out.empty:
        print(out)
    
nfl.head()

outliers in elo1_pre
15936    1227.490928
15956    1219.337610
17069    1217.456406
Name: elo1_pre, dtype: float64
outliers in elo2_pre
15926    1231.838645
15971    1210.773886
15982    1201.561463
17041    1233.393097
17053    1222.225724
Name: elo2_pre, dtype: float64
outliers in elo_prob1
outliers in elo_prob2
outliers in elo1_post
15936    1219.337610
15956    1210.773886
Name: elo1_post, dtype: float64
outliers in elo2_post
15906    1231.838645
15926    1227.490928
15971    1201.561463
15982    1200.365246
17041    1222.225724
17053    1217.456406
Name: elo2_post, dtype: float64
outliers in qbelo1_pre
15936    1241.083079
15956    1229.675102
17069    1227.834387
Name: qbelo1_pre, dtype: float64
outliers in qbelo2_pre
15926    1245.425052
15971    1220.838048
15982    1210.904201
17041    1244.823574
17053    1232.846190
Name: qbelo2_pre, dtype: float64
outliers in qb1_value_pre
16129   -6.005942
17355   -2.903320
Name: qb1_value_pre, dtype: float64
outliers in qb2_value_pre
16252    0.000000
16589    0.000000
16930    0.000000
16962    0.000000
16966    0.000000
17039   -4.891621
17272    0.000000
17358    0.000000
Name: qb2_value_pre, dtype: float64
outliers in qb1_adj
15777   -111.206263
15790     55.904372
15793    -87.361926
15813    -80.459917
15815     66.709965
            ...    
17357    -77.088545
17360    -80.449120
17363    -63.323206
17366    -73.127285
17375    -53.296133
Name: qb1_adj, Length: 176, dtype: float64
outliers in qb2_adj
15749    -52.805172
15755     52.558532
15758   -115.222694
15780     54.403417
15804   -131.788958
            ...    
17362    -59.889697
17364    -91.423174
17368   -139.668248
17370    -93.565657
17376    -48.540538
Name: qb2_adj, Length: 171, dtype: float64
outliers in qbelo_prob1
outliers in qbelo_prob2
outliers in qb1_game_value
15742   -235.201505
16055    561.903586
16062    536.886815
16205   -227.955756
16293   -186.157718
16319    542.956722
16345    628.580483
16370    556.976361
16379   -203.377584
16468    568.133430
16567    529.377014
16634   -210.655138
16671    552.757798
16889    567.489608
17038    541.279023
17049   -207.503337
17083    615.592584
17091    622.042187
17112    637.077418
17334   -241.574446
Name: qb1_game_value, dtype: float64
outliers in qb2_game_value
16010    563.905841
16028    527.794655
16045    568.784401
16060   -226.758577
16341   -314.069846
16390   -218.361351
16588    600.417397
16844   -293.851943
16859   -219.034447
Name: qb2_game_value, dtype: float64
outliers in qb1_value_post
16129    -2.90332
16713   -13.56946
Name: qb1_value_post, dtype: float64
outliers in qb2_value_post
15749    -1.495673
16015    -6.005942
16341   -21.645923
16966    -4.891621
17272    -6.109637
17358    -9.074079
17368     0.806843
Name: qb2_value_post, dtype: float64
outliers in qbelo1_post
15936    1229.675102
15956    1220.838048
Name: qbelo1_post, dtype: float64
outliers in qbelo2_post
15926    1241.083079
15971    1210.904201
15982    1208.150518
17041    1232.846190
17053    1227.834387
Name: qbelo2_post, dtype: float64
outliers in score1
15790    57
15818    52
15864    51
15895    54
16141    52
16167    54
16345    53
16390    51
16536    51
16765    52
16783    56
16909    54
17048    56
17053    50
17063    51
17288    54
17332    51
Name: score1, dtype: int64
outliers in score2
15866    51
16142    51
16167    51
16280    59
16331    55
16410    49
16597    49
17067    51
Name: score2, dtype: int64
outliers in quality

col1 = ["elo1_pre","elo2_pre","elo2_post",
        "qbelo1_pre","qbelo2_pre","qb1_value_pre","qb2_value_post","qbelo1_post"]
col2 = ["elo_prob2","elo1_post","qb2_value_pre","qb1_adj","qb2_adj",
        "qbelo_prob1","qbelo_prob2","qb1_game_value","qb2_game_value","qb1_value_post",
        "qb2_value_post","qbelo1_post","score1","score2","quality"]

sns.catplot(data=nfl[col1],
     kind="box"
    )
sns.catplot(data=nfl[col2],
     kind="box"
    )

for col in cols:
    print(col)
    print(nfl[col].describe())

elo1_pre
count    1639.000000
mean     1512.499466
std       100.225540
min      1217.456406
25%      1444.568312
50%      1513.175041
75%      1582.497828
max      1777.933310
Name: elo1_pre, dtype: float64
elo2_pre
count    1639.000000
mean     1507.805605
std        97.034370
min      1201.561463
25%      1442.571702
50%      1508.127681
75%      1579.908910
max      1761.858581
Name: elo2_pre, dtype: float64
elo_prob1
count    1639.000000
mean        0.586480
std         0.169136
min         0.123942
25%         0.467316
50%         0.603330
75%         0.710388
max         0.953944
Name: elo_prob1, dtype: float64
elo_prob2
count    1639.000000
mean        0.413520
std         0.169136
min         0.046056
25%         0.289612
50%         0.396670
75%         0.532684
max         0.876058
Name: elo_prob2, dtype: float64
elo1_post
count    1639.000000
mean     1510.467249
std       101.999854
min      1210.773886
25%      1441.360522
50%      1510.990513
75%      1583.654653
max      1777.933310
Name: elo1_post, dtype: float64
elo2_post
count    1639.000000
mean     1509.837821
std        99.429424
min      1200.365246
25%      1442.314091
50%      1509.539094
75%      1582.412029
max      1775.119369
Name: elo2_post, dtype: float64
qbelo1_pre
count    1639.000000
mean     1510.173885
std        95.508050
min      1227.834387
25%      1444.237378
50%      1513.257169
75%      1577.819690
max      1757.263199
Name: qbelo1_pre, dtype: float64
qbelo2_pre
count    1639.000000
mean     1506.507542
std        92.303346
min      1210.904201
25%      1442.976575
50%      1506.610858
75%      1573.316037
max      1742.902172
Name: qbelo2_pre, dtype: float64
qb1_value_pre
count    1639.000000
mean      158.897306
std        59.232412
min        -6.005942
25%       120.028441
50%       158.196359
75%       201.699522
max       313.828383
Name: qb1_value_pre, dtype: float64
qb2_value_pre
count    1639.000000
mean      157.764127
std        58.490109
min        -4.891621
25%       120.409097
50%       155.669948
75%       198.870798
max       310.130678
Name: qb2_value_pre, dtype: float64
qb1_adj
count    1639.000000
mean       -5.703507
std        35.525193
min      -242.487678
25%       -10.871662
50%         2.008961
75%        13.295850
max        71.795946
Name: qb1_adj, dtype: float64
qb2_adj
count    1639.000000
mean       -5.671711
std        35.999316
min      -235.050690
25%       -10.257874
50%         1.979643
75%        13.551762
max        69.108875
Name: qb2_adj, dtype: float64
qbelo_prob1
count    1639.000000
mean        0.572435
std         0.181273
min         0.070229
25%         0.444650
50%         0.585947
75%         0.711116
max         0.964496
Name: qbelo_prob1, dtype: float64
qbelo_prob2
count    1639.000000
mean        0.427565
std         0.181273
min         0.035504
25%         0.288884
50%         0.414053
75%         0.555350
max         0.929771
Name: qbelo_prob2, dtype: float64
qb1_game_value
count    1639.000000
mean      168.777503
std       136.350794
min      -241.574446
25%        80.799033
50%       166.424500
75%       255.162020
max       637.077418
Name: qb1_game_value, dtype: float64
qb2_game_value
count    1639.000000
mean      149.472068
std       137.020214
min      -314.069846
25%        57.393301
50%       151.832530
75%       239.763865
max       600.417397
Name: qb2_game_value, dtype: float64
qb1_value_post
count    1639.000000
mean      159.885326
std        59.151489
min       -13.569460
25%       120.333154
50%       158.521746
75%       202.390239
max       310.130678
Name: qb1_value_post, dtype: float64
qb2_value_post
count    1639.000000
mean      156.934922
std        58.903806
min       -21.645923
25%       119.773154
50%       155.751957
75%       197.520046
max       313.828383
Name: qb2_value_post, dtype: float64
qbelo1_post
count    1639.000000
mean     1508.866310
std        97.279330
min      1220.838048
25%      1441.094966
50%      1510.827405
75%      1579.186263
max      1757.263199
Name: qbelo1_post, dtype: float64
qbelo2_post
count    1639.000000
mean     1507.815117
std        95.037135
min      1208.150518
25%      1443.262089
50%      1508.323190
75%      1576.703624
max      1754.511219
Name: qbelo2_post, dtype: float64
score1
count    1639.000000
mean       23.700427
std        10.134743
min         0.000000
25%        17.000000
50%        24.000000
75%        30.000000
max        57.000000
Name: score1, dtype: float64
score2
count    1639.000000
mean       22.198292
std         9.967792
min         0.000000
25%        16.000000
50%        22.000000
75%        29.000000
max        59.000000
Name: score2, dtype: float64
quality
count    1639.000000
mean       48.269067
std        29.395812
min         0.000000
25%        22.000000
50%        48.000000
75%        74.000000
max       100.000000
Name: quality, dtype: float64

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns

colsC = nfl[["elo1_pre","elo1_post","qbelo2_pre","qbelo2_post","qbelo1_pre","qbelo1_post","elo2_pre","elo2_post"]]
C = colsC.corr()
sns.heatmap(C, annot=True,cmap = 'coolwarm',vmin=-1,vmax = 1)

<Axes: >

	date	season	playoff	team1	team2	elo1_pre	elo2_pre	elo_prob1	elo_prob2	elo1_post	...	qbelo_prob2	qb1_game_value	qb2_game_value	qb1_value_post	qb2_value_post	qbelo1_post	qbelo2_post	score1	score2	quality
15740	2017-09-07	2017	n	NE	KC	1687.395154	1613.148952	0.690309	0.309691	1646.529757	...	0.253030	39.100603	400.020698	229.550400	195.440810	1626.616848	1605.309919	27	42	95.0
15741	2017-09-10	2017	n	CLE	PIT	1335.767660	1598.852911	0.242271	0.757729	1329.605337	...	0.764130	52.193001	169.429875	86.018744	182.850392	1340.915881	1603.053780	18	21	27.0
15742	2017-09-10	2017	n	CIN	BAL	1515.969638	1491.099567	0.626524	0.373476	1476.195532	...	0.412898	-235.201505	43.763990	123.092565	151.492584	1479.743800	1548.464635	0	20	56.0
15743	2017-09-10	2017	n	BUF	NYJ	1484.127683	1451.565526	0.636826	0.363174	1500.142289	...	0.289442	161.895534	90.819847	163.301767	127.907467	1464.768579	1348.394459	21	12	10.0
15744	2017-09-10	2017	n	HOU	JAX	1502.139008	1381.984201	0.743804	0.256196	1451.208768	...	0.327975	-115.102578	142.320362	27.442070	150.016385	1469.052384	1470.615187	7	29	28.0

1. Indtroduction¶

2. Preprocessing¶

3. Summary Data Analysis¶

4. Discussion¶

	date	season	playoff	team1	team2	elo1_pre	elo2_pre	elo_prob1	elo_prob2	...	qb2_game_value	qb1_value_post	qb2_value_post	qbelo1_post	qbelo2_post	score1	score2	quality	importance	total_rating
0	1920-09-26	1920	NaN	RII	STP	1503.947	1300.000	0.824651	0.175349	...	NaN	NaN	NaN	NaN	NaN	48	0	NaN	NaN	NaN
1	1920-10-03	1920	NaN	AKR	WHE	1503.420	1300.000	0.824212	0.175788	...	NaN	NaN	NaN	NaN	NaN	43	0	NaN	NaN	NaN
2	1920-10-03	1920	NaN	BFF	WBU	1478.004	1300.000	0.802000	0.198000	...	NaN	NaN	NaN	NaN	NaN	32	6	NaN	NaN	NaN
3	1920-10-03	1920	NaN	DAY	COL	1493.002	1504.908	0.575819	0.424181	...	NaN	NaN	NaN	NaN	NaN	14	0	NaN	NaN	NaN
4	1920-10-03	1920	NaN	RII	MUN	1516.108	1478.004	0.644171	0.355829	...	NaN	NaN	NaN	NaN	NaN	45	0	NaN	NaN	NaN