我有时间序列类数据。第一列包含SupportsShouldProcess=$true
。第二列包含param(...)
,适用于各种学生。第三列是join time
。因此,学生有可能在10分钟后离开班级,并在一段时间后再次加入。两次活动的时间都被记录下来。我想对数据进行可视化,以便了解最多的学生什么时候上课。
leave time
我的方法:
Class ID
data['Join Time Hour'] = data['Join Time'].dt.hour
data['Join Time Date'] = data['Join Time'].dt.date
data['Leave Time Hour'] = data['Leave Time'].dt.hour
data['Leave Time Date'] = data['Leave Time'].dt.date
# Added one dummy
data['Dummy Column'] = 1
对于给定的课时,此输出为我提供了基于虚拟变量总和的热图。在可视化中不考虑data1 = (
pd.pivot_table(data,
values='Dummy Column',
index='Join Time Date',
columns='Join Time Hour',
aggfunc='sum')
)
。
我想要
谢谢!
答案 0 :(得分:0)
我认为sankey图可以解决您的问题。下面是我的测试代码。
import pandas as pd
import numpy as np
from itertools import product
import seaborn as sns
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
# generate test hours between 8:00 and 12:00
times = pd.date_range("202005010800", '202005011200', freq="H")
# generate test 1000 students id
students = list(np.arange(1000))
# generate test class
classes = ["class A", "class B"]
# make nodes including classes, join times and leave times
nodes = []
join_time = [time.strftime("join time: %Y%m%d %H") for time in times]
leave_time = [time.strftime("leave time: %Y%m%d %H") for time in times]
nodes.extend(classes)
nodes.extend(join_time)
nodes.extend(leave_time)
# every node has a color and an id
df_nodes = pd.DataFrame(nodes, columns=['node'])
df_nodes['color'] = list(sns.palettes.xkcd_rgb.values())[:len(df_nodes)]
df_nodes['node id'] = df_nodes.index
# nodes dict used in links
nodes_id_dict = dict(zip(df_nodes['node'], df_nodes['node id']))
nodes_color_dict = dict(zip(df_nodes['node'], df_nodes['color']))
# make records
records = product(times, times, students, classes)
# filter records whose leave time is later than join time
records = [record for record in records if record[1] > record[0]]
df_records = pd.DataFrame(
records, columns=['join time', 'leave time', 'student', 'class']
)
# pick 10000 records randomly
df_records = df_records.sample(10000)
# format time to use nodes dict
df_records['join time'] = df_records['join time'].\
dt.strftime("join time: %Y%m%d %H")
df_records['leave time'] = df_records['leave time'].\
dt.strftime("leave time: %Y%m%d %H")
# the first link from class to join time
class_join_time = df_records.groupby(['class', 'join time']) \
['student'].count().reset_index()
class_join_time.columns = ['source', 'target', 'value']
# the second link from join time to leave time
join_leave_time = df_records.groupby(['join time', 'leave time'])\
['student'].count().reset_index()
join_leave_time.columns = ['source', 'target', 'value']
# merge the two links
df_links = pd.concat([class_join_time, join_leave_time])
# use nodes dict to get node id and link color
# you can generate colors customly
df_links['source id'] = df_links['source'].replace(nodes_id_dict)
df_links['target id'] = df_links['target'].replace(nodes_id_dict)
df_links['link color'] = df_links['target'].replace(nodes_color_dict)
# configure the data_trace
data_trace = dict(
type='sankey',
domain = dict(
x = [0,1],
y = [0,1]
),
orientation = "h", # horizontal
valueformat = ".0f",
node = dict(
pad = 10,
line = dict(
color = "rgba(0,0,0,0.5)",
width = 0.1
),
label = df_nodes['node'],
color = df_nodes['color']
),
link = dict(
source = df_links['source id'],
target = df_links['target id'],
value = df_links['value'],
color = df_links['link color'],
line = dict(
color = "rgba(0,0,0,0.5)",
width = 0.1
),
)
)
# cofigure the layout
layout = dict(
title = "Sankey Diagram Test",
height = 640,
width = 900,
font = dict(
size=12
)
)
# plot
fig = dict(data=[data_trace], layout=layout)
iplot(fig, validate=False)
答案 1 :(得分:0)
我尝试使用热图进行可视化。数据创建正确。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
join_t = pd.date_range('2020-04-01 09:00:00', '2020-04-30 20:00:00', freq='BH')
leave_t = pd.date_range('2020-04-01 09:00:00', '2020-04-30 20:00:00', freq='BH')
persons = np.random.randint(1,40,(176,))
df = pd.DataFrame({'Join Time':pd.to_datetime(join_t),
'Leave Time':pd.to_datetime(leave_t),
'Persons':persons})
df['Leave Time'] = df['Leave Time'].shift(-1, fill_value=df.iloc[-1]['Leave Time'])
df['Join Time Hour'] = df['Join Time'].dt.hour
df['Join Time Date'] = df['Join Time'].dt.date
df['Leave Time Hour'] = df['Leave Time'].dt.hour
df['Leave Time Date'] = df['Leave Time'].dt.date
df.loc[:,['Join Time Date','Join Time Hour','Persons']]
fig = plt.figure(figsize=(8,6),dpi=144)
ax = fig.add_subplot(111)
data = df.pivot(index='Join Time Date', columns='Join Time Hour', values='Persons')
ax = sns.heatmap(data, ax=ax, annot=True, cmap="YlGnBu")
plt.show()