Drawing Utilities
apply_matching_colorscheme(letter, ref_letter, color_format)
Apply a match/mismatch color scheme to sequence letters :param letter: letter from target sequence :param ref_letter: letter from reference sequence (for match/mismatch) :param color_format: 'hex' or 'rgb' for hex string or RGB tuple, respectively :returns: an RGB hex string (for Bokeh) or simple RGB tuple (for vizqes)
Source code in seqlike/draw_utils.py
def apply_matching_colorscheme(letter, ref_letter, color_format: str):
"""Apply a match/mismatch color scheme to sequence letters
:param letter: letter from target sequence
:param ref_letter: letter from reference sequence (for match/mismatch)
:param color_format: 'hex' or 'rgb' for hex string or RGB tuple, respectively
:returns: an RGB hex string (for Bokeh) or simple RGB tuple (for vizqes)
"""
# gap match
if letter == gap_letter and letter == ref_letter:
return convert_weblogo_color(wl.color.Color.from_string("lightblue"), color_format)
# gap
elif letter == gap_letter:
return convert_weblogo_color(wl.color.Color.from_string("white"), color_format)
# match
elif letter == ref_letter:
return convert_weblogo_color(wl.color.Color.from_string("limegreen"), color_format)
# mismatch
else:
return convert_weblogo_color(wl.color.Color.from_string("darkred"), color_format)
convert_colorscheme_to_color_map(color_scheme, color_format)
Convert weblogo ColorScheme into bokeh color map :param color_scheme: a Callable that returns a weblogo ColorScheme object :param color_format: 'hex' or 'rgb' for hex string or RGB tuple, respectively :returns: a dict of bokeh colors indexed by letter
Source code in seqlike/draw_utils.py
def convert_colorscheme_to_color_map(color_scheme: Callable, color_format: str) -> dict:
"""Convert weblogo ColorScheme into bokeh color map
:param color_scheme: a Callable that returns a weblogo ColorScheme object
:param color_format: 'hex' or 'rgb' for hex string or RGB tuple, respectively
:returns: a dict of bokeh colors indexed by letter
"""
# return a Bokeh color object or simple RGB tuple (for draw_alignment)
# convert SymbolColor to bokeh color object
color_dict = dict()
for rule in color_scheme().rules:
color_dict[rule.symbols] = convert_weblogo_color(rule.color, color_format)
# default for spaces (white)
color_dict["-*"] = convert_weblogo_color(wl.color.Color.from_string("white"), color_format)
# expand letter strings so that dict maps to single letters
expanded_color_dict = dict()
for letters, color in color_dict.items():
expanded_color_dict.update(dict((l, color) for l in letters))
return expanded_color_dict
convert_weblogo_color(color, color_format)
Convert weblogo Color to Bokeh color object
Note: Weblogo colors are RGB but fractional [0, 1], whereas Bokeh and draw_alignment are [0, 255]
:sa: https://github.com/WebLogo/weblogo/blob/master/weblogo/color.py
:param color: A weblogo Color object. :param color_format: Either "rgb" or "hex". :returns: Either an RGB tuple (for "rgb") or hexadecimal string (for "hex").
Source code in seqlike/draw_utils.py
def convert_weblogo_color(color: "wl.color.Color", color_format: str) -> Union[tuple, str]:
"""Convert weblogo Color to Bokeh color object
Note: Weblogo colors are RGB but fractional [0, 1],
whereas Bokeh and draw_alignment are [0, 255]
:sa: https://github.com/WebLogo/weblogo/blob/master/weblogo/color.py
:param color: A weblogo Color object.
:param color_format: Either "rgb" or "hex".
:returns: Either an RGB tuple (for "rgb") or hexadecimal string (for "hex").
"""
assert color_format in ["rgb", "hex"]
rgb_tuple = int(255 * color.red), int(255 * color.green), int(255 * color.blue)
hex_str = f"#{rgb_tuple[0]:02X}{rgb_tuple[1]:02X}{rgb_tuple[2]:02X}"
if color_format == "rgb":
return rgb_tuple
else:
return hex_str
draw_alignment(aligned, colorscheme=<function aa_chemistry_simple at 0x7f6b134488b0>, boxwidth=2, boxheight=12, label_width=100, show_ids=False, show_names=False, show_descriptions=False, show_grouping=False)
Generate a colored figure from an alignment :param aligned: MultipleSeqAlignment object :param colorscheme: a Callable that returns a weblogo ColorScheme object :param boxwidth: column width of alignment :param boxheight: row height of alignment :param label_width: maximum length of row label; if None, extend to maximum label length :param show_ids: if True, show SeqRecord ID for each row :param show_names: if True, show SeqRecord name for each row :param show_descriptions: if True, show SeqRecord description for each row :param show_grouping: if True, highlight changes from reference in red against green background, instead of using the residue colorscheme :returns: PIL Image object
:note: based on vizqespkg.vizqes_main.draw :sa: vizqespkg.vizqes_main.draw :sa: http://www.bioinformatics.nl/~berndb/aacolour.html
Source code in seqlike/draw_utils.py
def draw_alignment(
aligned,
colorscheme: Callable = aa_chemistry_simple,
boxwidth=2,
boxheight=12,
label_width=100,
show_ids=False,
show_names=False,
show_descriptions=False,
show_grouping=False,
):
"""Generate a colored figure from an alignment
:param aligned: MultipleSeqAlignment object
:param colorscheme: a Callable that returns a weblogo ColorScheme object
:param boxwidth: column width of alignment
:param boxheight: row height of alignment
:param label_width: maximum length of row label; if None, extend to maximum label length
:param show_ids: if True, show SeqRecord ID for each row
:param show_names: if True, show SeqRecord name for each row
:param show_descriptions: if True, show SeqRecord description for each row
:param show_grouping: if True, highlight changes from reference in red against green background,
instead of using the residue colorscheme
:returns: PIL Image object
:note: based on vizqespkg.vizqes_main.draw
:sa: vizqespkg.vizqes_main.draw
:sa: http://www.bioinformatics.nl/~berndb/aacolour.html
"""
if show_names or show_ids or show_descriptions:
font = find_font(boxheight)
offset = -1
if show_names:
offset += int(font.getlength(max([m.name[None:label_width] for m in aligned], key=len)) + 1)
if show_ids:
offset += int(font.getlength(max([m.id[None:label_width] for m in aligned], key=len)) + 1)
if show_descriptions:
offset += int(font.getlength(max([m.description[None:label_width] for m in aligned], key=len)) + 1)
else:
font, offset = None, 0
height = len(aligned) * boxheight
width = aligned.get_alignment_length() * boxwidth + offset
img = Image.new("RGB", (width, height), "white")
draw = ImageDraw.Draw(img)
yd = None
color_dict = convert_colorscheme_to_color_map(colorscheme, color_format="rgb")
refseq = aligned[0].seq
for y, member in enumerate(aligned):
y *= boxheight
for x, xs in enumerate(member.seq):
if show_grouping:
color = apply_matching_colorscheme(xs, refseq[x], color_format="rgb")
else:
color = color_dict[xs]
x *= boxwidth
for i in range(0, boxwidth):
xd = x + i + offset
for j in range(0, boxheight):
yd = y + j
draw.point((xd, yd), fill=color)
if show_names or show_ids or show_descriptions:
text = ""
if show_names:
text += member.name[None:label_width] + " "
if show_ids:
text += member.id[None:label_width] + " "
if show_descriptions:
text += member.description[None:label_width] + " "
# clip last ' ' from text
draw.text((0, yd - boxheight), text[:-1], font=font, fill=(0, 0, 0))
return img
find_font(size, fontpath=None)
Find and scale font based on fontpath.
Helper function for draw_alignment.
:param size: desired font size :param fontpath: optional search path for font file (.ttf) :returns: PIL.ImageFont object
:sa: vizqes (https://pypi.python.org/pypi/vizqes)
Source code in seqlike/draw_utils.py
def find_font(size, fontpath=None):
"""Find and scale font based on fontpath.
Helper function for draw_alignment.
:param size: desired font size
:param fontpath: optional search path for font file (.ttf)
:returns: PIL.ImageFont object
:sa: vizqes (https://pypi.python.org/pypi/vizqes)
"""
if fontpath:
font_searchpath = fontpath
else:
font_searchpath = os.path.join(os.path.dirname(__file__), "FreeMono.ttf")
try:
font = ImageFont.truetype(font_searchpath, size=size)
sys.stdout.write("Found font in {}\n".format(str(font_searchpath)))
except IOError as e:
sys.stderr.write(str(e))
sys.stderr.write("could not find font in {}\nUsing default\n".format(str(font_searchpath)))
font = ImageFont.load_default()
return font
view_alignment(aligned, fontsize='9pt', show_N=100, colorscheme=<function aa_chemistry_simple at 0x7f6b134488b0>, boxwidth=9, boxheight=15, label_width=None, show_descriptions=False, show_grouping=False)
Bokeh sequence alignment view for protein and nucleic acid sequences
:sa: https://dmnfarrell.github.io/bioinformatics/bokeh-sequence-aligner
:param aligned: MultipleSeqAlignment object :param fontsize: font size for text labels :param show_N: size of sequence window (in number of sequence letters) :param colorscheme: a Callable that returns a weblogo ColorScheme object :param boxwidth: column width of alignment :param boxheight: row height of alignment :param label_width: maximum length of row label; if None, extend to maximum label length :param show_descriptions: if True, show SeqRecord description for each row :param show_grouping: if True, highlight changes from reference in red against green background, instead of using the residue colorscheme :returns: A Bokeh plot of the Multiple Sequence Alignment.
Source code in seqlike/draw_utils.py
def view_alignment(
aligned,
fontsize="9pt",
show_N=100,
colorscheme: Callable = aa_chemistry_simple,
boxwidth=9,
boxheight=15,
label_width=None,
show_descriptions=False,
show_grouping=False,
):
"""Bokeh sequence alignment view for protein and nucleic acid sequences
:sa: https://dmnfarrell.github.io/bioinformatics/bokeh-sequence-aligner
:param aligned: MultipleSeqAlignment object
:param fontsize: font size for text labels
:param show_N: size of sequence window (in number of sequence letters)
:param colorscheme: a Callable that returns a weblogo ColorScheme object
:param boxwidth: column width of alignment
:param boxheight: row height of alignment
:param label_width: maximum length of row label; if None, extend to maximum label length
:param show_descriptions: if True, show SeqRecord description for each row
:param show_grouping: if True, highlight changes from reference in red against green background,
instead of using the residue colorscheme
:returns: A Bokeh plot of the Multiple Sequence Alignment.
"""
from bokeh.models import ColumnDataSource, Range1d
from bokeh.plotting import figure
from bokeh.models.glyphs import Rect
def get_colors(seqs, color_scheme):
"""make colors for letters in sequence
:param seqs: A string sequence.
:param color_scheme: A string.
:returns: a sequence of colors for each letter in seqs.
"""
# get colors
color_dict = convert_colorscheme_to_color_map(color_scheme, color_format="hex")
# assign colors to sequences
text = [i for s in list(seqs) for i in s]
return [color_dict[a] for a in text]
def get_colors_for_matching(seqs):
"""match/mismatch color scheme for show_grouping
:param seqs: Sequences for which colors need to be matched.
:returns: a list of colors (strings)
"""
refseq = seqs[0]
colors = list()
for seq in list(seqs):
for xs, ref_s in zip(seq, refseq):
colors.append(apply_matching_colorscheme(xs, ref_s, color_format="hex"))
return colors
# make sequence and id lists from the aligned object
seqs = [rec.seq for rec in (aligned)]
if show_descriptions:
labels = [f"{row} - {rec.description} ({rec.id})" for (row, rec) in enumerate(aligned)]
else:
labels = [f"{row} - {rec.id}" for (row, rec) in enumerate(aligned)]
if label_width:
labels = [label[:label_width] for label in labels]
else:
label_width = max(len(label) for label in labels)
text = [i for s in list(seqs) for i in s]
if show_grouping:
colors = get_colors_for_matching(seqs)
else:
colors = get_colors(seqs, colorscheme)
N = len(seqs[0])
S = len(seqs)
x = np.arange(1, N + 1)
# need to reverse y so that sequences are plotted top-to-bottom
y = np.arange(S - 1, -1, -1)
# creates a 2D grid of coords from the 1D arrays
xx, yy = np.meshgrid(x, y)
# flattens the arrays
gx = xx.ravel()
gy = yy.flatten()
# use recty for rect coords with an offset
recty = gy + 0.5
# now we can create the ColumnDataSource with all the arrays
source = ColumnDataSource(dict(x=gx, y=gy, recty=recty, text=text, colors=colors))
plot_height = len(seqs) * boxheight + 50
x_range = Range1d(0, N + 1, bounds="auto")
viewlen = min(show_N, N)
# view_range is for the close up view
view_range = (0, viewlen)
tools = "xpan,xwheel_zoom,reset,save"
# plot_width combines length of text labels and number of letters in sequence view window
# note: this part requires additional tuning; 5 pixel average width of y-axis labels is a guess
plot_width = int(5 * label_width) + boxwidth * viewlen + 40
# entire sequence view (no text, with zoom)
p = figure(
title=None,
width=plot_width,
height=50,
x_range=x_range,
y_range=(0, S),
tools=tools,
min_border=0,
toolbar_location="below",
)
rects = Rect(
x="x",
y="recty",
width=1,
height=1,
fill_color="colors",
line_color=None,
fill_alpha=0.6,
)
p.add_glyph(source, rects)
p.yaxis.visible = False
p.grid.visible = False
# sequence text view with ability to scroll along x axis
p1 = figure(
title=None,
width=plot_width,
height=plot_height,
x_range=view_range,
y_range=labels[::-1],
tools="xpan,reset,save",
min_border=0,
toolbar_location="below",
) # , lod_factor=1)
glyph = bk.models.glyphs.Text(
x="x",
y="y",
text="text",
text_align="center",
text_color="black",
text_font=bk.core.properties.value("monospace"),
text_font_size=fontsize,
)
rects = Rect(
x="x",
y="recty",
width=1,
height=1,
fill_color="colors",
line_color=None,
fill_alpha=0.4,
)
p1.add_glyph(source, glyph)
p1.add_glyph(source, rects)
p1.grid.visible = False
p1.xaxis.major_label_text_font_style = "bold"
p1.yaxis.minor_tick_line_width = 0
p1.yaxis.major_tick_line_width = 0
p = bk.layouts.gridplot([[p], [p1]], toolbar_location="below")
bk.plotting.show(p)
return p