#! /usr/bin/env python3

# Usage:
#   ./wiggins-plot.py <in_tblfile> <out_pngfile>
#
# Input:
#    <in_tblfile> is the output of wiggins-coverage.py for any number of phage: tab-delimited file with
#    three columns, <dataset> <fractional ORF coverage> <GC%>
#
# Produce a strip plot with each of a few different <dataset>'s
# (string; name of dataset) in the tabular data in <in_tblfile> along
# the categorical x-axis, showing fractional ORF coverage on the Y
# axis; data points colored by GC% of the genome.
#
# Output:
#    <out_pngfile> is the strip plot, in .png format.
#
import sys                           # A standalone script (as opposed to a Jupyter Notebook cell) usually needs to interact with the system, at least to get commandline args thru sys.argv[]
import matplotlib.pyplot as plt      # We'll start using matplotlib for plotting in week 02...
import pandas as pd                  # and we'll see Pandas starting in week 03...
import seaborn as sns                # and you might see me using Seaborn, one of many data display packages built atop matplotlib (and Pandas); I like its strip plots.

if len(sys.argv) != 3: sys.exit('Usage: wiggins-plot.py <in_tblfile> <out_pngfile>')
in_tblfile  = sys.argv[1]
out_pngfile = sys.argv[2]

df = pd.read_table(in_tblfile)                              # When a data file is already tidy and tabular, Pandas sucks it right into a data frame for you ... especially when it's tab-delimited.
df.columns = [ 'dataset', 'coverage', 'GC composition' ]    # Wiggins' file didn't have a header so here I name the table columns. This makes the sns.stripplot call below more self-documenting.


fig, ax = plt.subplots()             # establishing that I'm going to use the object-oriented style of matplotlib, as discussed before in w00...

sns.stripplot(ax=ax,                 # <ax> arg is not strictly needed but I do it as a habit; you can overlay more than one kind of plot this way. I often superimpose some form of boxplot on a stripplot, for example.
              data=df,               # <data> arg means we're working from a tidy tabular data frame, and here it is...
              x='dataset',           #   ... from that table, we're asking to plot this (categorical data) column as the x-axis,
              y='coverage',          #   ...                  and this column as the y-axis,
              hue='GC composition',  #   ...                  and to color the points according to values in this column.
              jitter=0.4,            # Widen the default 0.1 jitter because we have a lot of points to plot
              linewidth=0.5,         # Plot the points as filled color circles by adding an edge line to them...
              edgecolor='black',     #   ... making those edges black. They're prettier this way.
              size=3,                # Make the points a little smaller, to separate them a bit more.
              alpha=0.7,             # This controls the transparency: points aren't completely opaque, so we can see more depth.
              palette='Spectral',    # Seaborn has a bunch of color palettes; this is a "divergent" one, good for heat maps.
              order=['SEA-PHAGE','Lestrade phage'])  # by default it will order the categorical x-axis alphabetically, but I want the SEA-PHAGES first.

ax.set_title("Wiggins' ORF density data")                         # Put a title on the figure
ax.set_xlabel('phage genome dataset')                             # ... and a more explanatory x-axis label
ax.set_ylabel('Fractional genome coverage by ORFs >= 200aa')      # ... and y-axis label
ax.set_ylim([0, 3.2])                                             # Don't make plots with deceptive truncated axes; one of Tufte's commandments
ax.legend(title='%GC composition', markerscale=3.0)               # Make the legend more legible, increasing the size of the colored labels

fig.savefig(out_pngfile)


# Didn't like these options, or want to play with making strip plots in different ways?
# Consult the Seaborn documentation.
# See: https://seaborn.pydata.org/generated/seaborn.stripplot.html

