A Complete Cheat Sheet For Data Visualization in Pandas

A Complete Cheat Sheet For Data Visualization in Pandas

I will start with the very basic plots using random data and then move to the more advanced one with a real dataset.

If you want a Jupyter notebook installed that’s also a great idea. Please go ahead and install the anaconda package.

pip install pandas
conda install pandas

Pandas Visualization

import pandas as pd
a = pd.Series([40, 34, 30, 22, 28, 17, 19, 20, 13, 9, 15, 10, 7, 3])
a.plot()
Image for post
a.plot(figsize=(8, 6), color='green', title = 'Line Plot', fontsize=12)
Image for post
a.plot(kind='area')
a.plot.area()
Image for post
b = pd.Series([45, 22, 12, 9, 20, 34, 28, 19, 26, 38, 41, 24, 14, 32])
c = pd.Series([25, 38, 33, 38, 23, 12, 30, 37, 34, 22, 16, 24, 12, 9])
d = pd.DataFrame({'a':a, 'b': b, 'c': c})
Image for post
d.plot.area(figsize=(8, 6), title='Area Plot')
Image for post
d.plot.area(alpha=0.4, color=['coral', 'purple', 'lightgreen'],figsize=(8, 6), title='Area Plot', fontsize=12)
Image for post

The ‘alpha’ parameter adds some translucent looks to the plot.

df = pd.read_csv('nhanes_2015_2016.csv')
df.head()
Image for post
df.columns
Index(['SEQN', 'ALQ101', 'ALQ110', 'ALQ130', 'SMQ020', 'RIAGENDR', 'RIDAGEYR', 'RIDRETH1', 'DMDCITZN', 'DMDEDUC2', 'DMDMARTL', 'DMDHHSIZ', 'WTINT2YR', 'SDMVPSU', 'SDMVSTRA', 'INDFMPIR', 'BPXSY1', 'BPXDI1', 'BPXSY2', 'BPXDI2', 'BMXWT', 'BMXHT', 'BMXBMI', 'BMXLEG', 'BMXARML', 'BMXARMC', 'BMXWAIST', 'HIQ210', 'DMDEDUC2x', 'DMDMARTLx'], dtype='object')
df['BMXWT'].hist()
Image for post
df[['BMXWT', 'BMXHT', 'BMXBMI']].plot.hist(stacked=True, bins=20, fontsize=12, figsize=(10, 8))
Image for post
df[['BMXWT', 'BMXHT', 'BMXBMI']].hist(bins=20,figsize=(10, 8))
Image for post
df["DMDEDUC2x"] = df.DMDEDUC2.replace({1: "less than 9", 2: "9-11", 3: "HS/GED", 4: "Some college/AA", 5: "College", 7: "Refused", 9: "Don't know"})
df[['DMDEDUC2x', 'BPXSY1']].hist(by='DMDEDUC2x', figsize=(18, 12))
Image for post
df["DMDMARTLx"] = df.DMDMARTL.replace({1: "Married", 2: "Widowed", 3: "Divorced", 4: "Separated", 5: "Never married", 6: "Living w/partner", 77: "Refused"})
df.groupby('DMDMARTLx')['BPXSY1'].mean().plot(kind='bar', rot=45, fontsize=10, figsize=(8, 6))
Image for post
df.groupby('DMDEDUC2x')['BPXSY1'].mean().plot(kind='barh', rot=45, fontsize=10, figsize=(8, 6))
Image for post
df_bmx = df.groupby('RIDRETH1')['BMXWT', 'BMXHT', 'BMXBMI'].mean().reset_index()
Image for post
df_bmx.plot(x = 'RIDRETH1', 
            y=['BMXWT', 'BMXHT', 'BMXBMI'], 
            kind = 'bar', 
            color = ['lightblue', 'red', 'yellow'], 
            fontsize=10)
Image for post
df_bmx.plot(x = 'RIDRETH1', 
            y=['BMXWT', 'BMXHT', 'BMXBMI'], 
            kind = 'bar', stacked=True,
            color = ['lightblue', 'red', 'yellow'], 
            fontsize=10)
Image for post
df_edu_marit = df.groupby('DMDEDUC2x')['DMDMARTL'].count()
pd.Series(df_edu_marit)
Image for post
ax = pd.Series(df_edu_marit).plot.pie(subplots=True, label='',
     labels = ['College Education', 'high school', 
     'less than high school', 'Some college',
     'HS/GED', 'Unknown'],
     figsize = (8, 6),
     colors = ['lightgreen', 'violet', 'coral', 'skyblue', 'yellow', 'purple'], autopct = '%.2f')
Image for post
color = {'boxes': 'DarkBlue', 'whiskers': 'coral', 
         'medians': 'Black', 'caps': 'Green'}
df[['BMXBMI', 'BMXLEG', 'BMXARML']].plot.box(figsize=(8, 6),color=color)
Image for post
df.head(300).plot(x='BMXBMI', y= 'BPXSY1', kind = 'scatter')
Image for post
df.head(500).plot.scatter(x= 'BMXWT', y = 'BMXHT', c ='BMXLEG', s=50, figsize=(8, 6))
Image for post
df.head(200).plot.scatter(x= 'BMXHT', y = 'BMXWT', 
                          s =df['BMXBMI'][:200] * 7, 
                          alpha=0.5, color='purple',
                         figsize=(8, 6))
Image for post
df.plot.hexbin(x='BMXARMC', y='BMXLEG', gridsize= 20)
Image for post

Look at the documentation for more information here

df.plot.hexbin(x='BMXARMC', y='BMXLEG', C = 'BMXHT',
                         reduce_C_function=np.max,
                         gridsize=15,
                        figsize=(8,6))
Image for post
df.plot.hexbin(x='BMXARMC', y='BMXLEG', C = 'BMXHT',
                         reduce_C_function=np.max,
                         gridsize=15,
                        figsize=(8,6),
                        cmap = 'viridis')
Image for post

Some Advanced Visualization

from pandas.plotting import scatter_matrixscatter_matrix(df[['BMXWT', 'BMXHT', 'BMXBMI', 'BMXLEG', 'BMXARML']], alpha = 0.2, figsize=(10, 8), diagonal = 'kde')
Image for post
df['BMXWT'].plot.kde()
Image for post
df[['BMXWT', 'BMXHT', 'BMXBMI']].plot.kde(figsize = (8, 6))
Image for post
from pandas.plotting import parallel_coordinatesparallel_coordinates(df[['BMXWT', 'BMXHT', 'BMXBMI', 'RIAGENDR']].dropna().head(200), 'RIAGENDR', color=['blue', 'violet'])
Image for post
from pandas.plotting import bootstrap_plotbootstrap_plot(df['BMXBMI'], size=100, samples=1000, color='skyblue')
Image for post

Conclusion

Leave a Reply

Close Menu