Skip to content

Plotting Functions

UpSetAltair(data=None, title='', subtitle='', sets=None, abbre=None, sort_by='frequency', sort_by_order='ascending', inter_degree_frequency='ascending', width=1200, height=700, height_ratio=0.6, horizontal_bar_chart_width=300, set_colors_dict=dict(), highlight_color='#777777', glyph_size=200, set_label_bg_size=1000, line_connection_size=2, horizontal_bar_size=20, vertical_bar_label_size=16, vertical_bar_padding=20, set_labelstyle='normal')

This function generates Altair-based interactive UpSet plots.

Parameters:

Name Type Description Default
data DataFrame

Tabular data containing the membership of each element (row) in exclusive intersecting sets (column).

None
sets list

List of set names of interest to show in the UpSet plots. This list reflects the order of sets to be shown in the plots as well.

None
abbre dict

Dictionary mapping set names to abbreviated set names.

None
sort_by str

"frequency" or "degree"

'frequency'
sort_by_order str

"ascending" or "descending"

'ascending'
inter_degree_frequency str

"ascending" or "descending", only makes sense if sort_by="degree"

'ascending'
width int

Vertical size of the UpSet plot.

1200
height int

Horizontal size of the UpSet plot.

700
height_ratio float

Ratio of height between upper and under views, ranges from 0 to 1.

0.6
horizontal_bar_chart_width int

Width of horizontal bar chart on the bottom-right.

300
set_colors_dict dict

Dictionary containing the sets as keys with corresponding colors as values

dict()
highlight_color str

Color to encode intersecting sets upon mouse hover.

'#777777'
glyph_size int

Size of UpSet glyph (⬤).

200
set_label_bg_size int

Size of label background in the horizontal bar chart.

1000
line_connection_size int

width of lines in matrix view.

2
horizontal_bar_size int

Height of bars in the horizontal bar chart.

20
vertical_bar_label_size int

Font size of texts in the vertical bar chart on the top.

16
vertical_bar_padding int

Gap between a pair of bars in the vertical bar charts.

20
set_labelstyle str

"normal" (default) or "italic"

'normal'

Run rda.utility.get_upsetplot_df() on the df before trying this function.

Source code in rda_toolbox/plot.py
def UpSetAltair(
    data=None,
    title="",
    subtitle="",
    sets=None,
    abbre=None,
    sort_by="frequency",
    sort_by_order="ascending",
    inter_degree_frequency="ascending",
    width=1200,
    height=700,
    height_ratio=0.6,
    horizontal_bar_chart_width=300,
    set_colors_dict=dict(),
    highlight_color="#777777",
    glyph_size=200,
    set_label_bg_size=1000,
    line_connection_size=2,
    horizontal_bar_size=20,
    vertical_bar_label_size=16,
    vertical_bar_padding=20,
    set_labelstyle="normal",
):
    """This function generates Altair-based interactive UpSet plots.

    Parameters:
          data (pandas.DataFrame): Tabular data containing the membership of each element (row) in exclusive intersecting sets (column).
          sets (list): List of set names of interest to show in the UpSet plots. This list reflects the order of sets to be shown in the plots as well.
          abbre (dict): Dictionary mapping set names to abbreviated set names.
          sort_by (str): "frequency" or "degree"
          sort_by_order (str): "ascending" or "descending"
          inter_degree_frequency (str): "ascending" or "descending", only makes sense if sort_by="degree"
          width (int): Vertical size of the UpSet plot.
          height (int): Horizontal size of the UpSet plot.
          height_ratio (float): Ratio of height between upper and under views, ranges from 0 to 1.
          horizontal_bar_chart_width (int): Width of horizontal bar chart on the bottom-right.
          set_colors_dict (dict): Dictionary containing the sets as keys with corresponding colors as values
          highlight_color (str): Color to encode intersecting sets upon mouse hover.
          glyph_size (int): Size of UpSet glyph (⬤).
          set_label_bg_size (int): Size of label background in the horizontal bar chart.
          line_connection_size (int): width of lines in matrix view.
          horizontal_bar_size (int): Height of bars in the horizontal bar chart.
          vertical_bar_label_size (int): Font size of texts in the vertical bar chart on the top.
          vertical_bar_padding (int): Gap between a pair of bars in the vertical bar charts.
          set_labelstyle (str): "normal" (default) or "italic"

    Run rda.utility.get_upsetplot_df() on the df before trying this function.
    """

    if data is None:
        print("No data and/or a list of sets are provided")
        return
    if sets is None:
        sets = list(data.columns[1:])

    if (height_ratio < 0) or (1 < height_ratio):
        print("height_ratio set to 0.5")
        height_ratio = 0.5
    if not abbre:
        abbre = {set: set for set in sets}
    if len(sets) != len(abbre):
        abbre = sets
        print(
            "Dropping the `abbre` list because the lengths of `sets` and `abbre` are not identical."
        )
    if not set_colors_dict:  # build default colors dict
        colors = [  # observable10
            "#4269d0",
            "#efb118",
            "#ff725c",
            "#6cc5b0",
            "#3ca951",
            "#ff8ab7",
            "#a463f2",
            "#97bbf5",
            "#9c6b4e",
            "#9498a0",
        ]
        if len(sets) > len(colors):
            colors = colors * len(sets)
        set_colors_dict = {key: value for key, value in zip(sets, colors[: len(sets)])}
    else:
        if sorted(list(set_colors_dict.keys())) != sorted(sets):
            raise ValueError(
                f"Wrong set names, correct names are:\n{dict((set, '') for set in sets)}"
            )
    # filter set_colors_dict with the sets which are actually in the data df (sets)
    # this might be needed if set_colors_dict if more comprehensive than the data
    set_colors_dict = {
        key: value for key, value in set_colors_dict.items() if key in sets
    }
    """
    Data Preprocessing
    """
    data = data.copy()
    data["count"] = 0
    data = data[sets + ["count"]]
    data = data.groupby(sets).count().reset_index()

    data["intersection_id"] = data.index
    data["degree"] = data[sets].sum(axis=1)
    data = data.sort_values(
        by=["count"],
        ascending=True if inter_degree_frequency == "ascending" else False,
    )

    data = pd.melt(data, id_vars=["intersection_id", "count", "degree"])
    data = data.rename(columns={"variable": "set", "value": "is_intersect"})

    set_to_abbre = pd.DataFrame(abbre.items(), columns=["set", "set_abbre"])

    set_to_order = (
        data[data["is_intersect"] == 1]
        .groupby("set")
        .sum()
        .reset_index()
        .sort_values(by="count", ascending=False)
        .filter(["set"])
    )
    set_to_order["set_order"] = list(range(len(sets)))

    degree_calculation = ""
    for s in sets:
        degree_calculation += f"(isDefined(datum['{s}']) ? datum['{s}'] : 0)"
        if sets[-1] != s:
            degree_calculation += "+"
    """
    Selections
    """
    legend_selection = alt.selection_point(fields=["set"], bind="legend")
    color_selection = alt.selection_point(
        fields=["intersection_id"], on="pointerover", empty=False
    )
    opacity_selection = alt.selection_point(fields=["intersection_id"])

    """
    Styles
    """
    vertical_bar_chart_height = height * height_ratio
    matrix_height = height - vertical_bar_chart_height
    matrix_width = width - horizontal_bar_chart_width

    vertical_bar_size = min(
        30,
        width / len(data["intersection_id"].unique().tolist()) - vertical_bar_padding,
    )

    main_color = "#3A3A3A"
    brush_opacity = alt.condition(~opacity_selection, alt.value(1), alt.value(0.6))
    brush_color = alt.condition(
        color_selection, alt.value(highlight_color), alt.value(main_color)
    )
    is_show_horizontal_bar_label_bg = len(list(abbre.values())[0]) <= 2
    horizontal_bar_label_bg_color = (
        "white" if is_show_horizontal_bar_label_bg else "black"
    )

    x_sort = alt.Sort(
        field="count" if sort_by == "frequency" else "degree",
        order=sort_by_order,
    )

    tooltip = [
        alt.Tooltip("max(count):Q", title="Cardinality"),
        alt.Tooltip("degree:Q", title="Degree"),
    ]
    """
    Plots
    """
    # To use native interactivity in Altair, we are using the data transformation functions
    # supported in Altair.
    base = (
        alt.Chart(data)
        .transform_pivot(
            "set",
            op="max",
            groupby=["intersection_id", "count"],
            value="is_intersect",
        )
        .transform_aggregate(
            # count, set1, set2, ...
            count="sum(count)",
            groupby=sets,
        )
        .transform_calculate(
            # count, set1, set2, ...
            degree=degree_calculation
        )
        .transform_filter(
            # count, set1, set2, ..., degree
            alt.datum["degree"]
            != 0
        )
        .transform_window(
            # count, set1, set2, ..., degree
            intersection_id="row_number()",
            frame=[None, None],
        )
        .transform_fold(
            # count, set1, set2, ..., degree, intersection_id
            sets,
            as_=["set", "is_intersect"],
        )
        .transform_lookup(
            # count, set, is_intersect, degree, intersection_id
            lookup="set",
            from_=alt.LookupData(set_to_abbre, "set", ["set_abbre"]),
        )
        .transform_lookup(
            # count, set, is_intersect, degree, intersection_id, set_abbre
            lookup="set",
            from_=alt.LookupData(set_to_order, "set", ["set_order"]),
        )
        .transform_filter(
            # Make sure to remove the filtered sets.
            legend_selection
        )
        .transform_window(
            # count, set, is_intersect, degree, intersection_id, set_abbre
            set_order="distinct(set)",
            frame=[None, 0],
            sort=[{"field": "set_order"}],
        )
        .transform_lookup(
            lookup="set",
            from_=alt.LookupData(set_to_order, "set", ["set_order"]),
        )
    )

    vertical_bar = (
        base.mark_bar(color=main_color)  # , size=vertical_bar_size)
        .encode(
            x=alt.X(
                "intersection_id:N",
                axis=alt.Axis(grid=False, labels=False, ticks=False, domain=True),
                sort=x_sort,
                title=None,
            ),
            y=alt.Y(
                "max(count):Q",
                axis=alt.Axis(grid=False, tickCount=3, orient="right"),
                title="Intersection Size",
            ),
            color=brush_color,
            tooltip=tooltip,
        )
        .properties(width=matrix_width, height=vertical_bar_chart_height)
    )
    vertical_bar_text = vertical_bar.mark_text(
        color=main_color, dy=-10, size=vertical_bar_label_size, fontSize=20
    ).encode(text=alt.Text("count:Q", format=".0f"))
    vertical_bar_chart = (vertical_bar + vertical_bar_text).add_params(
        color_selection,
    )

    circle_bg = (
        vertical_bar.mark_circle(size=glyph_size, opacity=1)
        .encode(
            x=alt.X(
                "intersection_id:N",
                axis=alt.Axis(grid=False, labels=False, ticks=False, domain=False),
                sort=x_sort,
                title=None,
            ),
            y=alt.Y(
                "set_order:N",
                axis=alt.Axis(grid=False, labels=False, ticks=False, domain=False),
                title=None,
            ),
            color=alt.value("#E6E6E6"),
        )
        .properties(height=matrix_height)
    )
    rect_bg = (
        circle_bg.mark_rect()
        .transform_filter(alt.datum["set_order"] % 2 == 1)
        .encode(color=alt.value("#F7F7F7"))
    )
    circle = circle_bg.transform_filter(alt.datum["is_intersect"] == 1).encode(
        color=brush_color
    )
    line_connection = (
        circle_bg.mark_bar(size=line_connection_size, color=main_color)
        .transform_filter(alt.datum["is_intersect"] == 1)
        .encode(
            y=alt.Y("min(set_order):N"),
            y2=alt.Y2("max(set_order):N"),
            color=brush_color,
        )
    )
    matrix_view = alt.layer(
        circle + rect_bg + circle_bg + line_connection + circle
    ).add_params(
        # Duplicate `circle` is to properly show tooltips.
        color_selection,
    )

    # Cardinality by sets (horizontal bar chart)
    horizontal_bar_label_bg = base.mark_circle(size=set_label_bg_size).encode(
        y=alt.Y(
            "set_order:N",
            axis=alt.Axis(grid=False, labels=False, ticks=False, domain=False),
            title=None,
        ),
        color=alt.Color(
            "set:N",
            scale=alt.Scale(
                domain=list(set_colors_dict.keys()),
                range=list(set_colors_dict.values()),
            ),
            title=None,
        ),
        opacity=alt.value(1),
    )
    horizontal_bar_label = horizontal_bar_label_bg.mark_text(
        align=("center" if is_show_horizontal_bar_label_bg else "center"),
        fontSize=20,
        fontStyle=set_labelstyle,
    ).encode(
        text=alt.Text("set_abbre:N"),
        color=alt.value(horizontal_bar_label_bg_color),
    )
    horizontal_bar_axis = (
        (horizontal_bar_label_bg + horizontal_bar_label)
        if is_show_horizontal_bar_label_bg
        else horizontal_bar_label
    )

    horizontal_bar = (
        horizontal_bar_label_bg.mark_bar(size=horizontal_bar_size)
        .transform_filter(alt.datum["is_intersect"] == 1)
        .encode(
            x=alt.X(
                "sum(count):Q",
                axis=alt.Axis(grid=False, tickCount=3),
                title="Set Size",
                # scale=alt.Scale(range=color_range)
            ),
            # color=alt.Color(None,legend=None), # remove interactivity, color and legend
        )
        .properties(width=horizontal_bar_chart_width)
    )
    horizontal_bar_text = horizontal_bar.mark_text(
        align="left", dx=2, fontSize=20
    ).encode(text="sum(count):Q")
    horizontal_bar_chart = alt.layer(horizontal_bar, horizontal_bar_text)
    # Concat Plots
    upsetaltair = alt.vconcat(
        vertical_bar_chart,
        alt.hconcat(
            matrix_view,
            horizontal_bar_axis,
            horizontal_bar_chart,
            spacing=5,
        ).resolve_scale(y="shared"),
        spacing=20,
    ).add_params(
        legend_selection,
    )

    # Apply top-level configuration
    upsetaltair = upsetaltair_top_level_configuration(
        upsetaltair,
        legend_orient="top",
        legend_symbol_size=set_label_bg_size / 2.0,
    ).properties(
        title={
            "text": title,
            "subtitle": subtitle,
            "fontSize": 20,
            "fontWeight": 500,
            "subtitleColor": main_color,
            "subtitleFontSize": 14,
        }
    )
    return upsetaltair

UpSet_per_dataset(df, save_formats=['pdf', 'svg'], id_column='Internal ID')

UpsetPlot wrapper function which applies threshold to processed data (without controls, references etc.). For each dataset present in the given df, create a dummy_df for rda.UpSetAltair() and save the UpSetPlot.

Source code in rda_toolbox/plot.py
def UpSet_per_dataset(
    df: pd.DataFrame,  # processed
    save_formats=["pdf", "svg"],
    id_column="Internal ID",
):
    """
    UpsetPlot wrapper function which applies threshold to processed data (without controls, references etc.).
    For each dataset present in the given df, create a dummy_df for rda.UpSetAltair() and save the UpSetPlot.
    """
    subset = get_thresholded_subset(
        df,
        id_column="Internal ID",
        negative_controls="Bacteria + Medium",
        blanks="Medium",
        threshold=50,
    )

    for dataset, sub_df in subset.groupby("Dataset"):
        dummy_df = get_upsetplot_df(sub_df, counts_column=id_column)
        # Create dataset folder if non-existent
        pathlib.Path(f"../figures/{dataset}").mkdir(parents=True, exist_ok=True)
        for save_format in save_formats:
            filename = f"../figures/{dataset}/UpSetPlot_{dataset}.{save_format}"
            print("Saving", filename)
            dataset_upsetplot = UpSetAltair(dummy_df, title=dataset).save(filename)

lineplots_facet(df, hline_y=50, by_id='Internal ID', whisker_width=10, exclude_negative_zfactors=True, threshold=50.0)

Assay: MIC Input: processed_df Output: Altair Chart with faceted lineplots. Negative controls and blanks are dropped inside the function.

Source code in rda_toolbox/plot.py
def lineplots_facet(
    df: pd.DataFrame,
    hline_y: int=50,
    by_id: str="Internal ID",
    whisker_width: int=10,
    exclude_negative_zfactors: bool=True,
    threshold: float=50.0,
) -> alt.vegalite.v5.api.HConcatChart:
    """
    Assay: MIC
    Input: processed_df
    Output: Altair Chart with faceted lineplots.
    Negative controls and blanks are dropped inside the function.
    """
    df = prepare_visualization(
        df, by_id=by_id, exclude_negative_zfactors=exclude_negative_zfactors, threshold=threshold
    )
    hline_y = 50
    organism_columns = []

    color = alt.condition(
        # alt.datum.Concentration
        alt.datum.max_conc_below_threshold,
        alt.Color(f"{by_id}:N"),
        alt.value("lightgray"),
    )
    for organism, org_data in df.groupby(["Organism"]):
        base = alt.Chart(org_data).encode(color=color)  # , title=organism)
        lineplot = base.mark_line(point=True, size=0.8).encode(
            x=alt.X(
                "Concentration:O",
                title="Concentration in µM",
                axis=alt.Axis(labelAngle=-45, format=".2e", formatType="number"),
            ),
            y=alt.Y(
                "Mean Relative Optical Density:Q",
                title="Relative Optical Density",
                scale=alt.Scale(domain=[-20, 160], clamp=True),
            ),
            # color="Internal ID:N",
            shape=alt.Shape("External ID:N", legend=None),
            # color=color,
            tooltip=[
                "Internal ID",
                "External ID",
                "Organism",
                "Dataset",
                "Concentration",
                "Used Replicates",
                "Raw Optical Density",
                "Mean Relative Optical Density",
                r"Std\. Relative Optical Density",
                "Z-Factor",
            ],
        )

        error_bars = base.mark_rule().encode(
            x="Concentration:O",
            y="uerror:Q",
            y2="lerror:Q",
        )
        uerror_whiskers = base.mark_tick(size=whisker_width).encode(
            x="Concentration:O",
            y="uerror:Q",
        )
        lerror_whiskers = base.mark_tick(size=whisker_width).encode(
            x="Concentration:O",
            y="lerror:Q",
        )

        hline = base.mark_rule(strokeDash=[3, 2]).encode(
            y=alt.datum(hline_y),
            # x=[alt.value(0), alt.value(50)],
            color=alt.value("black"),
        )

        org_column = (
            alt.layer(lineplot, error_bars, uerror_whiskers, lerror_whiskers, hline)
            .facet(
                row="AsT Barcode 384",
                column="AsT Plate Subgroup",
                title=alt.Title(organism, anchor="middle"),
            )
            .resolve_axis(x="independent")
            .resolve_scale(color="independent", shape="independent")
            # .add_params(selection)
        )

        organism_columns.append(org_column)
    return alt.hconcat(*organism_columns).configure_point(size=60)

measurement_vs_bscore_scatter(df, measurement_header='Relative Optical Density mean', measurement_title='Relative Optical Density', bscore_header='b_scores mean', bscore_title='B-Score', color_header='Organism', show_area=True, measurement_threshold=50, b_score_threshold=-3)

Creates a scatter plot for Primary Screens plotting the raw measurement values against B-Scores. Dont forget to exclude controls from the given DF.

Source code in rda_toolbox/plot.py
def measurement_vs_bscore_scatter(
    df: pd.DataFrame,
    measurement_header: str = "Relative Optical Density mean",
    measurement_title: str = "Relative Optical Density",
    bscore_header: str = "b_scores mean",
    bscore_title: str = "B-Score",
    color_header: str = "Organism",
    show_area: bool = True,
    measurement_threshold: float = 50,
    b_score_threshold: float = -3,
):
    """
    Creates a scatter plot for Primary Screens plotting the raw measurement values against B-Scores.
    Dont forget to exclude controls from the given DF.
    """
    chart_df = df.copy()
    # Add values for thresholds
    chart_df["Growth Threshold"] = measurement_threshold
    chart_df["B-Score Threshold"] = b_score_threshold
    base = alt.Chart(chart_df, width=600)
    chart = base.mark_circle().encode(
        x=alt.X(f"{bscore_header}:Q", title=bscore_title),
        y=alt.Y(
            f"{measurement_header}:Q",
            scale=alt.Scale(reverse=True),
            title=measurement_title,
        ),
        color=f"{color_header}:N",
    )
    growth_threshold_rule = base.mark_rule(color="blue", strokeDash=[4.4]).encode(
        y="Growth Threshold:Q"
    )
    bscore_threshold_rule = base.mark_rule(color="red", strokeDash=[4.4]).encode(
        x="B-Score Threshold:Q"
    )

    rect = base.mark_rect(color="blue").encode(
        y=f"min({measurement_header}):Q",
        y2="Growth Threshold:Q",
        x="B-Score Threshold:Q",
        x2=f"min({bscore_header}):Q",
        opacity=alt.value(0.2),
    )

    if show_area:
        return alt.layer(chart, growth_threshold_rule, bscore_threshold_rule, rect)
    else:
        return alt.layer(chart, growth_threshold_rule, bscore_threshold_rule)

mic_hitstogram(data, mic_col, title='Count Distribution of Hits over Concentration')

It's a Hi(t)stogram... Plots distribution of hits over determined MICs. Example: mic_distribution_overview(mic_results_long, 'MIC50 in µM')

Source code in rda_toolbox/plot.py
def mic_hitstogram(
    data, mic_col, title="Count Distribution of Hits over Concentration"
):
    """
    It's a Hi(t)stogram...
    Plots distribution of hits over determined MICs.
    Example: mic_distribution_overview(mic_results_long, 'MIC50 in µM')
    """
    data = data.dropna(subset=[mic_col])
    bars = (
        alt.Chart(data, title=alt.Title(title))
        .mark_bar()
        .encode(
            x=alt.X(f"{mic_col}:O"),
            y=alt.Y("count(Internal ID):Q"),
            xOffset="Organism:N",
            color="Organism:N",
        )
    )
    text = (
        alt.Chart(data)
        .mark_text(dx=0, dy=-5)
        .encode(
            x=alt.X(f"{mic_col}:O"),
            y=alt.Y("count(Internal ID):Q"),
            text=alt.Text("count(Internal ID):Q"),
            xOffset="Organism:N",
            color="Organism:N",
        )
    )

    return alt.layer(bars, text)

plateheatmaps(df, substance_id='ID', measurement='Raw Optical Density', barcode='Barcode', negative_control='Negative Control', blank='Medium')

Parameters:

Name Type Description Default
df DataFrame

Dataframe with relevant data

required
substance_id str

column name in df containing the unique substance id

'ID'
measurement str

column name in df with the measurements to colorize via heatmaps

'Raw Optical Density'
negative_control str

controls with organism + medium

'Negative Control'
blank str

controls with only medium (no organism and therefore no growth)

'Medium'

Plots heatmaps of the plates from df in a gridlike manner. Exclude unwanted plates, for example Blanks from the df outside this function, like so df[df["Organism"] != "Blank"] before plotting, otherwise it will appear as an extra plate.

Source code in rda_toolbox/plot.py
def plateheatmaps(
    df,
    substance_id="ID",
    measurement="Raw Optical Density",
    barcode="Barcode",
    negative_control="Negative Control",
    blank="Medium",
) -> alt.vegalite.v5.api.HConcatChart:
    """
    Parameters:
        df (pandas.DataFrame): Dataframe with relevant data
        substance_id (str): column name in df containing the unique substance id
        measurement (str): column name in df with the measurements to colorize via heatmaps
        negative_control (str): controls with organism + medium
        blank (str): controls with only medium (no organism and therefore no growth)

    Plots heatmaps of the plates from df in a gridlike manner.
    Exclude unwanted plates, for example Blanks from the df outside this function, like so
    `df[df["Organism"] != "Blank"]`
    before plotting, otherwise it will appear as an extra plate.
    """
    df["Col_384"] = df["Col_384"].astype(int)
    # df[substance_id] = df[substance_id].astype(str)
    plots = []
    for _, _organism_df in df.groupby("Organism"):
        plots.append(
            get_heatmap(
                _organism_df,
                substance_id,
                measurement,
                negative_control,
                blank,
            )
            .facet(
                row=alt.Row(f"{barcode}:N"),
                column=alt.Column("Replicate:N"),
                title=alt.Title(
                    _organism_df["Organism"].unique()[0],
                    orient="top",
                    anchor="middle",
                    dx=-20,
                ),
            )
            .resolve_scale(color="shared")
            .resolve_axis(x="independent", y="independent")
        )

    plate_heatmaps = (
        alt.hconcat(*plots).resolve_scale(color="independent").resolve_axis(y="shared")
    )
    return plate_heatmaps

potency_distribution(dataset_grp, threshold, dataset, intervals=[0.05, 0.1, 0.78, 6.25, 50], title='Potency Distribution', ylabel='Number of Compounds', xlabel='MIC Interval', legendlabelorient='bottom')

Input: MIC.results["MIC_Results_AllDatasets_longformat"]

Returns a potency distribution (histogram if MIC intervals) plot.

Example: Obtain a list of potency distribution plots. One plot per dataset and threshold.

plots_per_dataset = []
thresholds = [50.0]
for threshold in thresholds:
    for dataset, dataset_grp in mic_df.groupby("Dataset"):
        plots_per_dataset.append(potency_distribution(dataset_grp, threshold, dataset))

Parameters:

Name Type Description Default
dataset_grp DataFrame

Group DataFrame from grouping via Datasets.

required
threshold float

single threshold value (usually from a list of thresholds).

required
dataset str

The name of the dataset.

required
intervals list[float]

the upper limits for the interval bins. Interval example: (x, y] -> open below x, <= y

[0.05, 0.1, 0.78, 6.25, 50]
title str

Plot title.

'Potency Distribution'
ylabel str

Y-Axis label.

'Number of Compounds'
xlabel str

X-Axis label.

'MIC Interval'
legendlabelorient str

Position of the legend (options: "left", "right", "top", "bottom", "top-left", "top-right", "bottom-left", "bottom-right", "none" (Default))

'bottom'
Source code in rda_toolbox/plot.py
def potency_distribution(
    dataset_grp: pd.DataFrame,
    threshold: float,
    dataset: str,
    intervals: list[float] = [0.05, 0.1, 0.78, 6.25, 50],
    title: str = "Potency Distribution",
    ylabel: str = "Number of Compounds",
    xlabel: str = "MIC Interval",
    legendlabelorient: str = "bottom",  # right, bottom, top-left, etc.
):
    """
    Input: MIC.results["MIC_Results_AllDatasets_longformat"]

    Returns a potency distribution (histogram if MIC intervals) plot.

    Example: Obtain a list of potency distribution plots. One plot per dataset and threshold.
    ```
    plots_per_dataset = []
    thresholds = [50.0]
    for threshold in thresholds:
        for dataset, dataset_grp in mic_df.groupby("Dataset"):
            plots_per_dataset.append(potency_distribution(dataset_grp, threshold, dataset))
    ```

    Parameters:
        dataset_grp (pd.DataFrame): Group DataFrame from grouping via Datasets.
        threshold (float): single threshold value (usually from a list of thresholds).
        dataset (str): The name of the dataset.
        intervals (list[float]): the upper limits for the interval bins. Interval example: (x, y] -> open below x, <= y
        title (str): Plot title.
        ylabel (str): Y-Axis label.
        xlabel (str): X-Axis label.
        legendlabelorient (str): Position of the legend (options: "left", "right", "top", "bottom", "top-left", "top-right", "bottom-left", "bottom-right", "none" (Default))
    """
    no_mic = (
        dataset_grp[dataset_grp[f"MIC{threshold} in µM"].isna()]["Organism"]
        .value_counts()
        .reset_index(name=ylabel)
    )
    no_mic[xlabel] = f">{max(intervals)}"
    sub_df = (
        dataset_grp.groupby("Organism")[f"MIC{threshold} in µM"]
        .value_counts(bins=intervals, dropna=False)
        .rename_axis(["Organism", xlabel])
        .reset_index(name=ylabel)
    )
    sub_df[xlabel] = sub_df[xlabel].astype(str)
    sub_df = pd.concat([no_mic, sub_df])
    legendcolumns = None
    if legendlabelorient == "bottom":
        legendcolumns = 3
    base = alt.Chart(sub_df, title=alt.Title(title, subtitle=[f"Dataset: {dataset}"]))
    bar = base.mark_bar(stroke="white").encode(
        alt.X(f"{xlabel}:N").axis(labelAngle=0),
        y=alt.Y(f"{ylabel}:Q").scale(domain=[0, max(sub_df[ylabel])+2]),
        color=alt.Color("Organism:N").legend(
            orient=legendlabelorient,
            labelLimit=200,
            fillColor="white",
            columns=legendcolumns,
        ),
        xOffset="Organism:N",
    )
    text = base.mark_text(dy=-5).encode(
        alt.X(f"{xlabel}:N"),
        y=f"{ylabel}:Q",
        xOffset="Organism:N",
        text=f"{ylabel}:Q",
    )
    return alt.layer(bar, text)