import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
/var/folders/gc/0752xrm56pnf0r0dsrn5370c0000gr/T/ipykernel_71803/555797462.py:1: DeprecationWarning: Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), (to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) but was not found to be installed on your system. If this would cause problems for you, please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466 import pandas as pd
#Loading the dataset
data = pd.read_csv("Death_rates_for_suicide__by_sex__race__Hispanic_origin__and_age__United_States.csv")
What the code below does:¶
The code below filters the dataset to include only the rows where the 'STUB_LABEL' column contains the string 'Male: Black or African American' and the 'YEAR' column falls within the range of 1985 to 2018. It then creates a new DataFrame called Black_Male_Data containing these filtered rows. Finally, it displays a subset of columns ('INDICATOR', 'UNIT', 'STUB_LABEL', 'AGE', 'YEAR', 'ESTIMATE') from the filtered dataset.
Black_Male_Data = data[(data['STUB_LABEL'].str.contains('Male: Black or African American')) &
(data['YEAR'].between(1985, 2018))]
Black_Male_Data[['INDICATOR', 'UNIT', 'STUB_LABEL', 'AGE', 'YEAR', 'ESTIMATE']]
INDICATOR | UNIT | STUB_LABEL | AGE | YEAR | ESTIMATE | |
---|---|---|---|---|---|---|
218 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 15-24 years | 15-24 years | 1985 | 13.3 |
219 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 15-24 years | 15-24 years | 1986 | 11.4 |
220 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 15-24 years | 15-24 years | 1987 | 12.9 |
221 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 15-24 years | 15-24 years | 1988 | 14.5 |
222 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 15-24 years | 15-24 years | 1989 | 16.6 |
... | ... | ... | ... | ... | ... | ... |
416 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 75-84 years | 75-84 years | 2014 | 11.0 |
417 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 75-84 years | 75-84 years | 2015 | 11.2 |
418 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 75-84 years | 75-84 years | 2016 | 10.6 |
419 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 75-84 years | 75-84 years | 2017 | 8.8 |
420 | Death rates for suicide | Deaths per 100,000 resident population, crude | Male: Black or African American: 75-84 years | 75-84 years | 2018 | 10.7 |
171 rows × 6 columns
The code below does the exact same thing as the code above, however, it filters the dataset to include only the rows where the 'STUB_LABEL' column contains the string 'Female: Black or African American'
# Filter the dataset to include males who are Black or African American and the years 1985 and 2018
Black_Female_Data = data[(data['STUB_LABEL'].str.contains('Female: Black or African American')) &
(data['YEAR'].between(1985, 2018))]
# Display the filtered dataset including the INDICATOR, UNIT, AGE, YEAR, and ESTIMATE columns
Black_Female_Data[['INDICATOR', 'UNIT', 'STUB_LABEL', 'AGE', 'YEAR', 'ESTIMATE']]
INDICATOR | UNIT | STUB_LABEL | AGE | YEAR | ESTIMATE | |
---|---|---|---|---|---|---|
806 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 15-24 years | 15-24 years | 1985 | 2.0 |
807 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 15-24 years | 15-24 years | 1986 | 2.3 |
808 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 15-24 years | 15-24 years | 1987 | 2.5 |
809 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 15-24 years | 15-24 years | 1988 | 2.6 |
810 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 15-24 years | 15-24 years | 1989 | 2.9 |
... | ... | ... | ... | ... | ... | ... |
920 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 45-64 years | 45-64 years | 2015 | 2.6 |
921 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 45-64 years | 45-64 years | 2016 | 2.3 |
922 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 45-64 years | 45-64 years | 2017 | 2.6 |
923 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 45-64 years | 45-64 years | 2018 | 3.0 |
924 | Death rates for suicide | Deaths per 100,000 resident population, crude | Female: Black or African American: 45-64 years | 45-64 years | 2018 | 5.5 |
103 rows × 6 columns
What the code below does:¶
It visualizes the suicide estimates over time for Black males and females, by creating a single plot showing all the suicide estimates over time, categorized by age.
# Create a single plot
plt.figure(figsize=(12, 8))
# Define colors for each age category
colors = ['blue', 'green', 'red', 'orange', 'purple', 'cyan', 'magenta', 'yellow', 'lime', 'pink']
# Plot line plots for each age category for Black Males
for i, age_category in enumerate(['15-24 years', '25-44 years', '45-64 years', '65-74 years', '75-84 years']):
# Filter data for Black Males of the current age category
male_data = Black_Male_Data[Black_Male_Data['AGE'] == age_category]
# Plot line plot for Black Males of the current age category with a different color
plt.plot(male_data['YEAR'], male_data['ESTIMATE'], color=colors[i], marker='o', label=f'Black Males ({age_category})')
# Plot line plots for each age category for Black Females
for i, age_category in enumerate(['15-24 years', '25-44 years', '45-64 years', '65-74 years', '75-84 years']):
# Filter data for Black Females of the current age category
female_data = Black_Female_Data[Black_Female_Data['AGE'] == age_category]
# Plot line plot for Black Females of the current age category with a different color
plt.plot(female_data['YEAR'], female_data['ESTIMATE'], color=colors[i+len(colors)//2], marker='o', linestyle='--', label=f'Black Females ({age_category})')
# Set titles and labels
plt.title('Suicide Estimates Over Time for Black Males and Females by Age Category')
plt.xlabel('Year')
plt.ylabel('Suicide Estimate (% per 100,000 resident population)')
# Add legend
plt.legend()
# Show the plot
plt.grid(True)
plt.tight_layout()
plt.show()