在python中自动化绘图例程

时间:2016-12-14 03:00:40

标签: python-3.x plotly

目标:我想创建一个堆积绘图函数,绘制给定数据框中的所有列。这样的数据帧可以有N列。

在Plotly中绘制堆积图的通用代码如下:

from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import HoverTool
import tweepy
import pandas as pd


CONSUMER_KEY = ''
CONSUMER_SECRET = ''
ACCESS_TOKEN = ''
ACCESS_TOKEN_SECRET = ''

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)

#set up search
results = []
for tweet in tweepy.Cursor(api.search, q = '#twitter').items(100):
    results.append(tweet)

#set up dataframe
id_list = [tweet.id for tweet in results]
data_set = pd.DataFrame(id_list, columns=["id"])

#tweet data
data_set["text"] = [tweet.text for tweet in results]
data_set["retweet_count"] = [tweet.retweet_count for tweet in results]
data_set["source"] = [tweet.source for tweet in results]


output_file("toolbar.html")

#set data source  
source = ColumnDataSource(data=dict(x=data_set['source'],y=data_set['retweet_count'],desc=data_set['text'))

#create hover tool object 
hover = HoverTool(
        tooltips=[
            ("index", "$index"),
            ("(x,y)", "($x, $y)"),
            ("desc", "@desc"),
        ]
    )

#set plot parameters 
p = figure(plot_width=400, plot_height=400, tools=[hover],
       title="Mouse over the dots")

p.circle('x', 'y', size=20, source=source)

show(p)

如何:我如何创建一个循环来创建Plotly的代码来绘制?

我的尝试

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from plotly import tools

trace1 = go.Scatter(
    x=[0, 1, 2],
    y=[10, 11, 12]
)
trace2 = go.Scatter(
    x=[0, 1, 2],
    y=[100, 110, 120],
)
trace3 = go.Scatter(
    x=[0, 1, 2],
    y=[1000, 1100, 1200],
)
fig = tools.make_subplots(rows=3, cols=1, specs=[[{}], [{}], [{}]],
                          shared_xaxes=True, shared_yaxes=False,
                          vertical_spacing=0.1)
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 2, 1)
fig.append_trace(trace3, 3, 1)
plot(fig)

运行此操作,我收到以下错误:

import pandas as pd
import numpy as np

df = pd.DataFrame()
df['x'] = np.array([0, 1, 2])
df['y1'] = np.array([10, 11, 12])
df['y2'] = np.array([100, 110, 120])
df['y3'] = np.array([1000, 1100, 1200])

d = {}
for i in np.arange(df.shape[0]):
    d["trace{0}".format(i)] = "go.Scatter(x=[{0}],y=[{1}])".format(df.iloc[:,0], df.iloc[:, i])

fig = tools.make_subplots(rows=3, cols=1, specs=[[{}], [{}], [{}]],
                          shared_xaxes=True, shared_yaxes=False,
                          vertical_spacing=0.1)

for index, key in enumerate(d):
    fig.append(d[key], index+1, 1)
plot(fig)

如何让它发挥作用?

1 个答案:

答案 0 :(得分:0)

您需要按fig创建它的方式重新创建Plotly的结构。通过运行代码,我发现fig的结构如下:

In [4]: fig
Out[4]:
{'data': [{'type': 'scatter',
   'x': [0, 1, 2],
   'xaxis': 'x1',
   'y': [10, 11, 12],
   'yaxis': 'y1'},
  {'type': 'scatter',
   'x': [0, 1, 2],
   'xaxis': 'x1',
   'y': [100, 110, 120],
   'yaxis': 'y2'},
  {'type': 'scatter',
   'x': [0, 1, 2],
   'xaxis': 'x1',
   'y': [1000, 1100, 1200],
   'yaxis': 'y3'}],
 'layout': {'xaxis1': {'anchor': 'y3', 'domain': [0.0, 1.0]},
  'yaxis1': {'anchor': 'free',
   'domain': [0.7333333333333334, 1.0],
   'position': 0.0},
  'yaxis2': {'anchor': 'free',
   'domain': [0.3666666666666667, 0.6333333333333333],
   'position': 0.0},
  'yaxis3': {'anchor': 'x1', 'domain': [0.0, 0.26666666666666666]}}}

您错过了fig.append_trace。通过包含它,进行一些更改,我创建了一个函数,它接收数据框并将所有列绘制为堆栈图:

def plot_plotly(dataframe):
    """
    Plots all of the columns in a given dataframe as a stacked plot.

    Note: Plotly is extremely slow when it comes to plotting data points
    greater than 100,000. So, this program will quit if the size is larger.

    Example:
    ---------
    df = pd.DataFrame()
    df['x'] = np.array([0, 1, 2])
    df['y1'] = np.array([10, 11, 12])
    df['y2'] = np.array([100, 110, 120])
    df['y3'] = np.array([1000, 1100, 1200])
    df['y4'] = np.array([2000, 3000, 1000])

    # Selecting first four columns
    df1 = df.iloc[:, :4]

    plot_plotly(df1)
    """

    if dataframe.shape[0] >= 100000:
        print "Data Frame too Large to plot"
    return None

    d = {}
    spec_list = []
    for i in np.arange(dataframe.shape[1] - 1):
        d["trace{0}".format(i)] = go.Scatter(x=list(dataframe.iloc[:, 0].values), y=list(dataframe.iloc[:, i + 1].values))
        spec_list.append([{}])

    fig = tools.make_subplots(rows=dataframe.shape[1] - 1, cols=1, specs=spec_list,
                              shared_xaxes=True, shared_yaxes=False,
                              vertical_spacing=0.1)

    for index, key in enumerate(d):
        fig.append_trace(d[key], index + 1, 1)
    return plot(fig)