Examples¶

These examples are taken unedited from the test suite. Look at the body of each test to see how altair_recipes can be used.

import altair as alt
import altair_recipes as ar
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test
from vega_datasets import data

Areaplot

@viz_reg_test
def test_areaplot():
    return alt.vconcat(
        *map(
            lambda stack: ar.areaplot(
                data.iowa_electricity(),
                x="year",
                y="net_generation",
                color="source",
                stack=stack,
            ),
            ar.StackType,
        )
    )


show_test(test_areaplot)

import altair_recipes as ar
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test
import numpy as np
import pandas as pd

Autocorrelation

@viz_reg_test
def test_autocorrelation():
    data = pd.DataFrame(dict(x=np.random.uniform(size=100)))
    return ar.autocorrelation(data, column="x", max_lag=15)


show_test(test_autocorrelation)

import altair_recipes as ar
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test
from vega_datasets import data

Barchart

@viz_reg_test
def test_barchart_color():
    source = data.barley()
    return ar.barchart(source, x="year", y="mean(yield)", color=True)


show_test(test_barchart_color)

import altair_recipes as ar
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test
from vega_datasets import data

Boxplot from melted data

@viz_reg_test
def test_boxplot_melted():
    return ar.boxplot(data.iris(), columns=["petalLength"], group_by="species")


show_test(test_boxplot_melted)

Boxplot from cast data

@viz_reg_test
def test_boxplot_cast():
    iris = data.iris()
    return ar.boxplot(iris, columns=list(iris.columns[:-1]))


show_test(test_boxplot_cast)

Boxplot with color

@viz_reg_test
def test_boxplot_color():
    source = data.barley()
    return ar.boxplot(
        source,
        columns=["yield"],
        group_by="year",
        color=True,
        width=800 // len(source["site"].unique()),
    ).facet(column="site")


show_test(test_boxplot_color)

import altair_recipes as ar
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test
import numpy as np
import pandas as pd
from vega_datasets import data

Heatmap

@viz_reg_test
def test_heatmap():
    # Compute x^2 + y^2 across a 2D grid
    x, y = np.meshgrid(range(-5, 6), range(-5, 6))
    z = x ** 2 + y ** 2

    # Convert this grid to columnar data expected by Altair
    data = pd.DataFrame({"x": x.ravel(), "y": y.ravel(), "z": z.ravel()})

    return ar.heatmap(data, x="x", y="y", color="z")


show_test(test_heatmap)

Count Heatmap

@viz_reg_test
def test_count_heatmap():
    source = data.movies.url
    return ar.heatmap(
        source, x="IMDB Rating", y="Rotten Tomatoes Rating", color="", aggregate="count"
    )


show_test(test_count_heatmap)

import altair_recipes as ar
from altair_recipes.common import viz_reg_test, gather
from altair_recipes.display_pweave import show_test
import numpy as np
import pandas as pd
from vega_datasets import data

Histogram

@viz_reg_test
def test_histogram():
    return ar.histogram(data.movies(), column="IMDB Rating")


show_test(test_histogram)

Layered Histogram from wide data

@viz_reg_test
def test_layered_histogram_wide():
    df = pd.DataFrame(
        {
            "Trial A": np.random.normal(0, 0.8, 1000),
            "Trial B": np.random.normal(-2, 1, 1000),
            "Trial C": np.random.normal(3, 2, 1000),
        }
    )
    return ar.layered_histogram(df, columns=["Trial A", "Trial B", "Trial C"])


show_test(test_layered_histogram_wide)

Layered Histogram from long data

@viz_reg_test
def test_layered_histogram_long():
    data = pd.DataFrame(
        {
            "Trial A": np.random.normal(0, 0.8, 1000),
            "Trial B": np.random.normal(-2, 1, 1000),
            "Trial C": np.random.normal(3, 2, 1000),
        }
    )
    columns = list(data.columns)

    ldata = gather(data, key="key", value="value", columns=columns)
    return ar.layered_histogram(ldata, columns=["value"], group_by="key")


show_test(test_layered_histogram_long)

import altair_recipes as ar
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test
import numpy as np
import pandas as pd

Qqplot

@viz_reg_test
def test_qqplot():

    df = pd.DataFrame(
        {
            "Trial A": np.random.normal(0, 0.8, 1000),
            "Trial B": np.random.normal(-2, 1, 1000),
            "Trial C": np.random.uniform(3, 2, 1000),
        }
    )
    return ar.qqplot(df, x="Trial A", y="Trial C")


show_test(test_qqplot)

import altair_recipes as ar
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test
from hypothesis import given
from hypothesis.extra.pandas import columns, data_frames
from vega_datasets import data

Scatterplot

@viz_reg_test
def test_scatterplot():
    return ar.scatterplot(
        data.iris(),
        x="petalWidth",
        y="petalLength",
        color="sepalWidth",
        tooltip="species",
    )


show_test(test_scatterplot)

Scatterplot alternate data syntax

@viz_reg_test
def test_scatterplot_alternate_data():
    d = data.iris()
    return ar.scatterplot(
        x=d["petalWidth"],
        y=d["petalLength"],
        color=d["sepalWidth"],
        tooltip=d["species"],
    )


show_test(test_scatterplot_alternate_data)

A randomized test of equivalence between the two data syntaxes:

@given(data=data_frames(columns=columns(["a", "b", "c"], dtype=float)))
def test_scatterplot_series(data):
    chart1 = ar.scatterplot(data=data[["a", "c"]])
    chart2 = ar.scatterplot(x=data["a"], y=data["c"])
    assert chart1.to_dict() == chart2.to_dict()

Multiscatterplot at defaults

@viz_reg_test
def test_multiscatterplot_defaults():
    return ar.multiscatterplot(data.iris())


show_test(test_multiscatterplot_defaults)

Multiscatterplot with explicit parameters

@viz_reg_test
def test_multiscatterplot_args():
    """Test multiscatterplot."""
    return ar.multiscatterplot(
        data.iris(), columns=data.iris().columns[:-1], color="species"
    )


show_test(test_multiscatterplot_args)

Multiscatterplot alternate data syntax

@viz_reg_test
def test_multiscatterplot_args_alternate():
    """Test multiscatterplot."""
    d = data.iris()
    return ar.multiscatterplot(
        columns=[d["sepalLength"], d["sepalWidth"], d["petalLength"]],
        color=d["species"],
    )


show_test(test_multiscatterplot_args_alternate)

A randomized test of equivalence bewteen the two

@given(data=data_frames(columns=columns(["a", "b", "c"], dtype=float)))
def test_multiscatterplot_series(data):
    chart1 = ar.multiscatterplot(data=data)
    chart2 = ar.multiscatterplot(columns=[data["a"], data["b"], data["c"]])
    assert chart1.to_dict() == chart2.to_dict()

import altair_recipes as ar
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test
from vega_datasets import data

Lineplot

@viz_reg_test
def test_lineplot():
    return ar.lineplot(
        data.iowa_electricity(), x="year", y="net_generation", color="source"
    )


show_test(test_lineplot)

import altair_recipes as ar
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test
import numpy as np
import pandas as pd

Stripplot

@viz_reg_test
def test_stripplot():
    x = np.array(range(100)) // 10
    data = pd.DataFrame(dict(x=x, y=np.random.normal(size=len(x))))

    return ar.stripplot(data)


show_test(test_stripplot)

import altair_recipes as ar
import numpy as np
import pandas as pd
from altair_recipes.common import viz_reg_test
from altair_recipes.display_pweave import show_test

Autoplot

Autoplot is very easy to use but can produce a variety of charts that are reasonably appropriate for the data to be displayed. Here is a longish sequence of examples of what autoplot will do with different combinations of up to three categorical or numerical variables and different data sizes

test_size = 5000


def rand_cat(x, n):
    return (
        pd.Series((x + np.random.normal(size=test_size) * n) + 77)
        .astype(int)
        .apply(chr)
    )


np.random.seed(seed=0)
x = np.random.normal(size=test_size)
y = np.random.normal(size=test_size) + x
z = np.random.normal(size=test_size) + y
data = pd.DataFrame(
    dict(
        x=x,
        x_cat=rand_cat(x, 1),
        y=y,
        y_cat=rand_cat(y, 0.5),
        z=z,
        z_cat=rand_cat(z, 0.5),
    )
)

The test code was generated by this code. So meta!

#
# numvars = ["x", "y", "z"]
# catvars = ["x_cat", "y_cat", "z_cat"]
# n = 0
# for nvars in range(1, 4):
#     for ncatvars in range(0, nvars + 1):
#         vars = catvars[:ncatvars] + numvars[ncatvars:nvars]
#         for nrows in [10, 50, 250, 1000, 5000]:
#             n = n + 1
#             print(
#                 """
# #'  <h3> Test autoplot #{n}</h3>
#
# @viz_reg_test
# def test_autoplot_{n}():
#     return ar.autoplot(data.head({nrows}), columns={vars})
#
# show_test(test_autoplot_{n})
# """.format(
#                     nrows=nrows, vars=vars, n=n
#                 )
#             )

Test autoplot #1

@viz_reg_test
def test_autoplot_1():
    return ar.autoplot(data.head(10), columns=["x"])


show_test(test_autoplot_1)

Test autoplot #2

@viz_reg_test
def test_autoplot_2():
    return ar.autoplot(data.head(50), columns=["x"])


show_test(test_autoplot_2)

Test autoplot #3

@viz_reg_test
def test_autoplot_3():
    return ar.autoplot(data.head(250), columns=["x"])


show_test(test_autoplot_3)

Test autoplot #4

@viz_reg_test
def test_autoplot_4():
    return ar.autoplot(data.head(1000), columns=["x"])


show_test(test_autoplot_4)

Test autoplot #5

@viz_reg_test
def test_autoplot_5():
    return ar.autoplot(data.head(5000), columns=["x"])


show_test(test_autoplot_5)

Test autoplot #6

@viz_reg_test
def test_autoplot_6():
    return ar.autoplot(data.head(10), columns=["x_cat"])


show_test(test_autoplot_6)

Test autoplot #7

@viz_reg_test
def test_autoplot_7():
    return ar.autoplot(data.head(50), columns=["x_cat"])


show_test(test_autoplot_7)

Test autoplot #8

@viz_reg_test
def test_autoplot_8():
    return ar.autoplot(data.head(250), columns=["x_cat"])


show_test(test_autoplot_8)

Test autoplot #9

@viz_reg_test
def test_autoplot_9():
    return ar.autoplot(data.head(1000), columns=["x_cat"])


show_test(test_autoplot_9)

Test autoplot #10

@viz_reg_test
def test_autoplot_10():
    return ar.autoplot(data.head(5000), columns=["x_cat"])


show_test(test_autoplot_10)

Test autoplot #11

@viz_reg_test
def test_autoplot_11():
    return ar.autoplot(data.head(10), columns=["x", "y"])


show_test(test_autoplot_11)

Test autoplot #12

@viz_reg_test
def test_autoplot_12():
    return ar.autoplot(data.head(50), columns=["x", "y"])


show_test(test_autoplot_12)

Test autoplot #13

@viz_reg_test
def test_autoplot_13():
    return ar.autoplot(data.head(250), columns=["x", "y"])


show_test(test_autoplot_13)

Test autoplot #14

@viz_reg_test
def test_autoplot_14():
    return ar.autoplot(data.head(1000), columns=["x", "y"])


show_test(test_autoplot_14)

Test autoplot #15

@viz_reg_test
def test_autoplot_15():
    return ar.autoplot(data.head(5000), columns=["x", "y"])


show_test(test_autoplot_15)

Test autoplot #16

@viz_reg_test
def test_autoplot_16():
    return ar.autoplot(data.head(10), columns=["x_cat", "y"])


show_test(test_autoplot_16)

Test autoplot #17

@viz_reg_test
def test_autoplot_17():
    return ar.autoplot(data.head(50), columns=["x_cat", "y"])


show_test(test_autoplot_17)

Test autoplot #18

@viz_reg_test
def test_autoplot_18():
    return ar.autoplot(data.head(250), columns=["x_cat", "y"])


show_test(test_autoplot_18)

Test autoplot #19

@viz_reg_test
def test_autoplot_19():
    return ar.autoplot(data.head(1000), columns=["x_cat", "y"])


show_test(test_autoplot_19)

Test autoplot #20

@viz_reg_test
def test_autoplot_20():
    return ar.autoplot(data.head(5000), columns=["x_cat", "y"])


show_test(test_autoplot_20)

Test autoplot #21

@viz_reg_test
def test_autoplot_21():
    return ar.autoplot(data.head(10), columns=["x_cat", "y_cat"])


show_test(test_autoplot_21)

Test autoplot #22

@viz_reg_test
def test_autoplot_22():
    return ar.autoplot(data.head(50), columns=["x_cat", "y_cat"])


show_test(test_autoplot_22)

Test autoplot #23

@viz_reg_test
def test_autoplot_23():
    return ar.autoplot(data.head(250), columns=["x_cat", "y_cat"])


show_test(test_autoplot_23)

Test autoplot #24

@viz_reg_test
def test_autoplot_24():
    return ar.autoplot(data.head(1000), columns=["x_cat", "y_cat"])


show_test(test_autoplot_24)

Test autoplot #25

@viz_reg_test
def test_autoplot_25():
    return ar.autoplot(data.head(5000), columns=["x_cat", "y_cat"])


show_test(test_autoplot_25)

Test autoplot #26

@viz_reg_test
def test_autoplot_26():
    return ar.autoplot(data.head(10), columns=["x", "y", "z"])


show_test(test_autoplot_26)

Test autoplot #27

@viz_reg_test
def test_autoplot_27():
    return ar.autoplot(data.head(50), columns=["x", "y", "z"])


show_test(test_autoplot_27)

Test autoplot #28

@viz_reg_test
def test_autoplot_28():
    return ar.autoplot(data.head(250), columns=["x", "y", "z"])


show_test(test_autoplot_28)

Test autoplot #29

@viz_reg_test
def test_autoplot_29():
    return ar.autoplot(data.head(1000), columns=["x", "y", "z"])


show_test(test_autoplot_29)

Test autoplot #30

@viz_reg_test
def test_autoplot_30():
    return ar.autoplot(data.head(5000), columns=["x", "y", "z"])


show_test(test_autoplot_30)

Test autoplot #31

@viz_reg_test
def test_autoplot_31():
    return ar.autoplot(data.head(10), columns=["x_cat", "y", "z"])


show_test(test_autoplot_31)

Test autoplot #32

@viz_reg_test
def test_autoplot_32():
    return ar.autoplot(data.head(50), columns=["x_cat", "y", "z"])


show_test(test_autoplot_32)

Test autoplot #33

@viz_reg_test
def test_autoplot_33():
    return ar.autoplot(data.head(250), columns=["x_cat", "y", "z"])


show_test(test_autoplot_33)

Test autoplot #34

@viz_reg_test
def test_autoplot_34():
    return ar.autoplot(data.head(1000), columns=["x_cat", "y", "z"])


show_test(test_autoplot_34)

Test autoplot #35

@viz_reg_test
def test_autoplot_35():
    return ar.autoplot(data.head(5000), columns=["x_cat", "y", "z"])


show_test(test_autoplot_35)

Test autoplot #36

@viz_reg_test
def test_autoplot_36():
    return ar.autoplot(data.head(10), columns=["x_cat", "y_cat", "z"])


show_test(test_autoplot_36)

Test autoplot #37

@viz_reg_test
def test_autoplot_37():
    return ar.autoplot(data.head(50), columns=["x_cat", "y_cat", "z"])


show_test(test_autoplot_37)

Test autoplot #38

@viz_reg_test
def test_autoplot_38():
    return ar.autoplot(data.head(250), columns=["x_cat", "y_cat", "z"])


show_test(test_autoplot_38)

Test autoplot #39

@viz_reg_test
def test_autoplot_39():
    return ar.autoplot(data.head(1000), columns=["x_cat", "y_cat", "z"])


show_test(test_autoplot_39)

Test autoplot #40

@viz_reg_test
def test_autoplot_40():
    return ar.autoplot(data.head(5000), columns=["x_cat", "y_cat", "z"])


show_test(test_autoplot_40)

Test autoplot #41

@viz_reg_test
def test_autoplot_41():
    return ar.autoplot(data.head(10), columns=["x_cat", "y_cat", "z_cat"])


show_test(test_autoplot_41)

Test autoplot #42

@viz_reg_test
def test_autoplot_42():
    return ar.autoplot(data.head(50), columns=["x_cat", "y_cat", "z_cat"])


show_test(test_autoplot_42)

Test autoplot #43

@viz_reg_test
def test_autoplot_43():
    return ar.autoplot(data.head(250), columns=["x_cat", "y_cat", "z_cat"])


show_test(test_autoplot_43)

Test autoplot #44

@viz_reg_test
def test_autoplot_44():
    return ar.autoplot(data.head(1000), columns=["x_cat", "y_cat", "z_cat"])


show_test(test_autoplot_44)

Test autoplot #45

@viz_reg_test
def test_autoplot_45():
    return ar.autoplot(data.head(5000), columns=["x_cat", "y_cat", "z_cat"])


show_test(test_autoplot_45)

To finish three corner cases when there's no overlap with categorical vars only

w = pd.Series(range(10))

no_overlap_data = pd.DataFrame(
    dict(
        x=pd.concat([w, w - 3]).astype(str),
        y=pd.concat([w, w]).astype(str),
        z=pd.concat([w, w]).astype(str),
    )
)


@viz_reg_test
def test_autoplot_CCC():
    return ar.autoplot(no_overlap_data)


show_test(test_autoplot_CCC)


@viz_reg_test
def test_autoplot_CC():
    return ar.autoplot(no_overlap_data, columns=["x", "y"])


show_test(test_autoplot_CC)


@viz_reg_test
def test_autoplot_C():
    return ar.autoplot(no_overlap_data.head(10), columns=["x"])


show_test(test_autoplot_C)