Seaborn 绘图分析代码示例——Python中数据可视化库

/ Python / 没有评论 / 452浏览

主题风格

darkgrid、whitegrid、dark、white、ticks

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.set_style("white")
data = np.random.normal(size=(20, 6)) + np.arange(6) / 2
sns.boxplot(data= data)

despine—轴线设置

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
def sinplot(flip=1):
    x = np.linspace(0, 14, 100)
    for i in range(1, 7):
        plt.plot(x, np.sin(x + i * .5)* (7 - i) * flip)
sinplot()
sns.despine(offset=10)
sns.despine(bottom=True)

with—打开一种Style风格

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
def sinplot(flip=1):
    x = np.linspace(0, 14, 100)
    for i in range(1, 7):
        plt.plot(x, np.sin(x + i * .5)* (7 - i) * flip)
with sns.axes_style("darkgrid"):
    plt.subplot(211)
    sinplot()
plt.subplot(212)
sinplot(-1)

set_context—设置图属性

paper、talk、poster、notebook

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
def sinplot(flip=1):
    x = np.linspace(0, 14, 100)
    for i in range(1, 7):
        plt.plot(x, np.sin(x + i * .5)* (7 - i) * flip)
sns.set_context("paper", font_scale=2.3, rc={"lines.linewidth":5.5})
plt.figure(figsize=(8,6))
sinplot()

调色板

分类色板

默认颜色

deep、muted、pastel、bright、dark、colorblind

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
current = sns.color_palette()
sns.palplot(current)

圆形画板

在一个圆形的颜色空间中均匀的切分

sns.palplot(sns.color_palette("hls", 10))
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
data = np.random.normal(size=(20, 10)) + np.arange(10) / 2
sns.boxplot(data = data, palette = sns.color_palette("hls", 10))

hls_palette—亮度饱和度

l — lightness、s — saturation

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.palplot(sns.hls_palette(10,l= 0.5,s= 0.9))

Paired—颜色对儿

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.palplot(sns.color_palette("Paired", 10))

xkcd—调用知名颜色

xkcd包含了一套针对随机RGB色的命名,产生了954个可以通过xdcd_rgb字典调用的颜色名

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.plot([0,1],[0,1], sns.xkcd_rgb["pale red"], lw = 3)
plt.plot([0,1],[0,2], sns.xkcd_rgb["medium green"], lw = 3)
plt.plot([0,1],[0,3], sns.xkcd_rgb["denim blue"], lw = 3)

连续色板

由浅到深

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.palplot(sns.color_palette("Blues"))

由深到浅

在颜色后面加“_r”

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.palplot(sns.color_palette("Blues_r"))

线性变化

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.palplot(sns.color_palette("Blues_r", 10))

cubehelix_palette—指定区间

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.palplot(sns.cubehelix_palette(10, start=.5, rot=.75))

light_palette—指定浅色

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.palplot(sns.light_palette("blue"))
sns.palplot(sns.light_palette("blue",reverse = True))

dark_palette—指定深色

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.palplot(sns.dark_palette("blue"))

单变量分析

distplot—画条形图

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.set(color_codes=True)
np.random.seed(sum(map(ord, "distributions")))
data = np.random.normal(size=100)
sns.distplot(data, kde=False)

bins—将数据切块

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.set(color_codes=True)
np.random.seed(sum(map(ord, "distributions")))
data = np.random.normal(size=100)
sns.distplot(data,bins=5, kde=False)

fit—显示轮廓

import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(color_codes=True)
np.random.seed(sum(map(ord, "distributions")))
data = np.random.normal(size=100)
sns.distplot(data,fit=stats.gamma, kde=False)

jointplot—画散列图&条形图

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
mean = [0, 1]
cov = [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns = ["x","y"])
sns.jointplot(x="x", y = "y", data =df)

kind

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
mean = [0, 1]
cov = [(1, .5), (.5, 1)]
x,y = np.random.multivariate_normal(mean, cov, 2000).T
with sns.axes_style("white"):
    sns.jointplot(x = x, y = y, kind="hex", color="blue")

pairplot—数据集中两两对比

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
iris = sns.load_dataset("iris")
sns.pairplot(iris)

多变量分析

stripplot—

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
np.random.seed(sum(map(ord,"categorical")))
tips = sns.load_dataset("tips")
sns.stripplot(x = "day", y ="total_bill", data = tips, jitter=True)

swarmplot

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
np.random.seed(sum(map(ord,"categorical")))
tips = sns.load_dataset("tips")
sns.swarmplot(x = "day", y ="total_bill", data = tips)

hue

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
np.random.seed(sum(map(ord,"categorical")))
tips = sns.load_dataset("tips")
sns.swarmplot(x = "day", y ="total_bill",hue="sex", data = tips)

分类属性绘图

离群点

IQR 即统计学概念四分位点, 第 1/4 与 3/4 之间的距离
定义N = 1.5IQR, 如果一个值小于1/4分位点 - N 或者 大于 3/4 分位点 + N, 则为分位点

boxplot—盒图展示

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.boxplot(x = "day", y ="total_bill",hue="time", data = tips)

orient—指定横向纵向

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.boxplot(data = tips, orient="h")

violinplot—小提琴图展示

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.violinplot(x = "day", y ="total_bill",hue="time", data = tips)

split—将属性分到两侧

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.violinplot(x = "day", y ="total_bill",hue="time", data = tips, split=True)

多图混合

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.violinplot(x = "day", y ="total_bill", data = tips, inner=None)
sns.swarmplot(x = "day", y ="total_bill", data = tips, color ="w", alpha = .5)

barplot—条形图

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
titanic = sns.load_dataset("titanic")
sns.barplot(x = "sex", y ="survived",hue="class",  data = titanic)

pointplot—点图(描述差异性)

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
titanic = sns.load_dataset("titanic")
sns.pointplot(x = "sex", y ="survived",hue="class",  data = titanic)

指定属性

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
titanic = sns.load_dataset("titanic")
color = {"male":"g", "female":"m"}
marker = ["^", "o"]
linestyle = ["-","--"]
sns.pointplot(x = "class", y ="survived",hue="sex",  data = titanic, palette=color, markers = marker, linestyles = linestyle)

factorplot—多层面板分类图

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.factorplot(x = "day", y ="total_bill",hue="smoker",  data = tips)

kind—指定画什么类型图

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.factorplot(x = "day", y ="total_bill",hue="smoker",  data = tips, kind = "bar")

col—指定维度

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.factorplot(x = "day", y ="total_bill",hue="smoker",col="time", data = tips, kind = "swarm")

size—指定大小

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.factorplot(x = "time", y ="total_bill",hue="smoker",col="day", data = tips, kind = "bar", size=3)

aspect—指定长宽比

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
sns.factorplot(x = "time", y ="total_bill",hue="smoker",col="day", data = tips, kind = "bar", size=6, aspect=.5)

参数列表

参数名意义
x,y,data数据集变量(变量名)
data数据集(数据集名)
row,col更多分类变量进行平铺显示(变量名)
col_wrap每行的最高平铺属(整数)
estimator在每个分类中进行矢量到标量的映射(矢量)
ci置信区间(浮点数或者None)
n_boot计算置信区间时使用的引导迭代次数(整数)
units采样单元的标识符, 用于执行多级引导和重复测量设计(数据变量或向量数据)
order, hue_order对应排序列表(字符串列表)
row_order, col_order对应排序列表(字符串列表)
kindpoint(默认),bar(柱状图),count(频次图),box(盒图)
kindviolin(提琴),strip(散点), swarm(分散点)
size每个页的高度(单位英寸)(标量)
aspect纵横比(标量)
orient方向(v/h)
color颜色(matplotlib颜色)
palette调色板(seaborn颜色色板或字典)
legendhue的信息面板(True/False)
legend_out是否扩展图形,并将信息框绘制在中心右边(True/False)
share{x,y} 共享轴线 (True/False)

FacetGrid—数据子集展示

设置区域,实例化FacetGrid

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="white", color_codes=True)
tips = sns.load_dataset("tips")
g = sns.FacetGrid(tips, col="time")

map—指定画什么图,参数设置

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
color = dict(Lunch="seagreen", Dinner="gray")
g = sns.FacetGrid(tips, hue="time", palette= color, size = 10)
g.map(plt.scatter, "total_bill", "tip", s = 50, alpha = .7, linewidth = .5, edgecolor="white")
g.add_legend();

更多设置

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from pandas import Categorical
sns.set(style="whitegrid", color_codes=True)
tips = sns.load_dataset("tips")
color = dict(Lunch="seagreen", Dinner="gray")
#ordered_sex = tips.sex.value_counts().index
ordered_sex = Categorical(["Female","Male"])
g = sns.FacetGrid(tips,row ="sex",row_order = ordered_sex, col="smoker",hue="time", palette= color, margin_titles = True, size = 5)
g.map(plt.scatter, "total_bill", "tip", s = 50, alpha = .7, linewidth = .5, edgecolor="white")
g.add_legend();
g.set_axis_labels("Total_bill($)", "Tips")
g.set(xticks=[10,30,50], yticks=[2,6,10])
g.fig.subplots_adjust(left=.2, top=.8,wspace=.2,hspace=.2)

PairGrid—数据集中两两对比

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
iris = sns.load_dataset("iris")
g = sns.PairGrid(iris)
g.map(plt.scatter)

heatmap—热度图

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
data = np.random.rand(3,3)
print(data)
heatmap = sns.heatmap(data)

vmin,vmax—设置取值区间

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
data = np.random.rand(3,3)
print(data)
heatmap = sns.heatmap(data, vmin=0.2, vmax = 0.5)

center—指定中心值

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
data = np.random.randn(3,3)
print(data)
heatmap = sns.heatmap(data, center=0)

annot—显示值

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
print(flights)
sns.heatmap(flights, annot=True, fmt="d")

lw—格之间间距

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
sns.heatmap(flights, annot=True, fmt="d", lw = .5)

cmap—指定调色板

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
sns.heatmap(flights, annot=True, fmt="d", lw = .5, cmap="YlGnBu")

char—隐藏调色板

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
sns.heatmap(flights, annot=True, fmt="d", lw = .5, cmap="YlGnBu", cbar=False)

回归分析

regplot

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
tips = sns.load_dataset("tips")
np.random.seed(sum(map(ord,"regression")))
sns.regplot(x = "total_bill", y ="tip", data = tips)

implot

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
tips = sns.load_dataset("tips")
np.random.seed(sum(map(ord,"regression")))
sns.lmplot(x = "total_bill", y ="tip", data = tips)

x_jitter—抖动(浮动)

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
tips = sns.load_dataset("tips")
np.random.seed(sum(map(ord,"regression")))
sns.lmplot(x = "size", y ="tip", data = tips, x_jitter=.05)