Loading…
 

This page contains the protocol and the software used at Computational Genomics Lab to perform the RGASP evaluations at Sanger Center cluster server. To take advantage of the multiple CPUs available on that server via the queuing system, most of the processes were turned into scripts running a single step of the pipeline (S), on every sequence being considered (Q), for each submission (M), on each of the three species (P). That approach yields Q x M x P scripts running simultaneously (depending on CPUs availability at the queue), and a minimum of S x Q x M x P processes executed on each RGASP analysis round.

Software tools on this web page are provided “AS IS”. Those programs are distributed under Free Software Foundation GNU General Public License (GNU-GPL), unless stated otherwise.



Preliminaries


The following files in tabular format contain information that will be used by different bash commands on the sections below:

Code Libraries


Here we provide a number of software libraries/modules that can be called by many of the programs listed on the sections below.

File Description Pre-requisites
global.pm (35.35 Kb) A set of general purpose functions Getopt::Long, Term::ANSIColor, Benchmark, POSIX
largeseqs.pm (41.66 Kb) Loading large sets of sequences
in fasta or tabular format.
Inline, Inline::C, Inline::Files, Inline::Filters

Main Programs

File Description Pre-requisites
validate_cds.pl (32.24 Kb) Checking GFF features for RGASP submissions global, largeseqs

GFF Features Validation


Testing if validate_cds.pl works as expected (you can click on the test files shown on the following command-line to download them):

Validating GFF Submissions


Making Boxplots


The following files contain some vectors with settings for the plots, like group names, color definitions, etc…, as well as the R functions used on the following code blocks:

RGASPr1

R Console
############ R RGASP1

WD <- "./rgasp1/summary";
OD <- "./rgasp1/plots";
source("./plots_rgasp1.r");

# HUMAN SCATTER-/BOX- PLOTS

Mnuch <- read.table(paste(WD,"all_human_ALL.evalnucleotide", sep="/"),
                   header = TRUE, row.names = NULL, # col.names = NULL,
                   as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                   comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mexnh <- read.table(paste(WD,"all_human_ALL.evalexin", sep="/"),
                   header = TRUE, row.names = NULL, # col.names = NULL,
                   as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                   comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mgenh <- read.table(paste(WD,"all_human_ALL.evalgene", sep="/"),
                   header = TRUE, row.names = NULL, # col.names = NULL,
                   as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                   comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );

do.scatterplots(Mnuch, Mexnh, Mgenh, hum.set,      "ALL",     "[ALL]",              "human", gp.human);
do.scatterplots(Mnuch, Mexnh, Mgenh, hum.set.part, "partial", "[PARTIAL PREDs]",    "human", gp.human);
do.scatterplots(Mnuch, Mexnh, Mgenh, hum.set.exon, "exon",    "[nonCDS EXONS]",     "human", gp.human);
do.scatterplots(Mnuch, Mexnh, Mgenh, hum.set.cds,  "cds",     "[CDS EXONS]",        "human", gp.human);
do.scatterplots(Mnuch, Mexnh, Mgenh, hum.set.full, "full",    "[CDS+nonCDS EXONS]", "human", gp.human);

do.boxplots(Mnuch, Mexnh, Mgenh, hum.set,      "ALL",    "[ALL]",              "human", gp.human);
do.boxplots(Mnuch, Mexnh, Mgenh, hum.set.part, "partial","[PARTIAL PREDs]",    "human", gp.human);
do.boxplots(Mnuch, Mexnh, Mgenh, hum.set.exon, "exon",   "[nonCDS EXONS]",     "human", gp.human);
do.boxplots(Mnuch, Mexnh, Mgenh, hum.set.cds,  "cds",    "[CDS EXONS]",        "human", gp.human);
do.boxplots(Mnuch, Mexnh, Mgenh, hum.set.full, "full",   "[CDS+nonCDS EXONS]", "human", gp.human);

do.boxplots.seqs(Mnuch, Mexnh, Mgenh, hum.set,      "ALL",    "[ALL]",              "human", gp.human, seqs.human, seqs.human.color);
do.boxplots.seqs(Mnuch, Mexnh, Mgenh, hum.set.part, "partial","[PARTIAL PREDs]",    "human", gp.human, seqs.human, seqs.human.color);
do.boxplots.seqs(Mnuch, Mexnh, Mgenh, hum.set.exon, "exon",   "[nonCDS EXONS]",     "human", gp.human, seqs.human, seqs.human.color);
do.boxplots.seqs(Mnuch, Mexnh, Mgenh, hum.set.cds,  "cds",    "[CDS EXONS]",        "human", gp.human, seqs.human, seqs.human.color);
do.boxplots.seqs(Mnuch, Mexnh, Mgenh, hum.set.full, "full",   "[CDS+nonCDS EXONS]", "human", gp.human, seqs.human, seqs.human.color);

do.boxplots.subs(Mnuch, Mexnh,  Mgenh, hum.set, "human", gp.human, seqs.human);

# Same for WORM and FLY, replacing "hum"/"human" accordingly...

RGASPr2

R Console
############ R RGASP2

WD <- "./rgasp2/summary/";
OD <- "./rgasp2/plots";

source("./plots_rgasp2.r");

# HUMAN SCATTER-/BOX- PLOTS (by ANNOTATION SUBSET)

predsets <- c("ALL", "FIL", "HIG", "MED", "LOW");

for (i in 1:length(predsets)) {

    SET <- predsets[i];
    SETV <- paste("[",SET,"]",sep="");

    Mnuc <- read.table(paste(WD,"all_human_ALL.",SET,".evalnucleotide",sep=""),
                       header = TRUE, row.names = NULL, # col.names = NULL,
                       as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                       comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
    Mexn <- read.table(paste(WD,"all_human_ALL.",SET,".evalexin", sep=""),
                       header = TRUE, row.names = NULL, # col.names = NULL,
                       as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                       comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
    Mgen <- read.table(paste(WD,"all_human_ALL.",SET,".evalgene", sep=""),
                       header = TRUE, row.names = NULL, # col.names = NULL,
                       as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                       comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );

    # Mnuc$SEQUENCE <- paste("chr_",Mnuc$SEQUENCE,sep="")
    # Mexn$SEQUENCE <- paste("chr_",Mexn$SEQUENCE,sep="")

    do.scatterplots(Mnuc,  Mexn, Mgen, human.set, SET, SETV, "human", gp.human);
    do.boxplots(Mnuc,      Mexn, Mgen, human.set, SET, SETV, "human", gp.human);
    do.boxplots.seqs(Mnuc, Mexn, Mgen, human.set, SET, SETV, "human", gp.human, seqs.human, seqs.human.color);
    do.boxplots.subs(Mnuc, Mexn, Mgen, human.set, SET, SETV, "human", gp.human, seqs.human);

}; # for SET

# Same for WORM and FLY, replacing "hum"/"human" accordingly...

R Console
# HUMAN 4-VARS BOX-PLOTS (ANNOTATION SUBSETS MERGED)

Mnuc.A <- read.table(paste(WD,"all_human_ALL.ALL.evalnucleotide", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mexn.A <- read.table(paste(WD,"all_human_ALL.ALL.evalexin", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mgen.A <- read.table(paste(WD,"all_human_ALL.ALL.evalgene", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mnuc.F <- read.table(paste(WD,"all_human_ALL.FIL.evalnucleotide", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mexn.F <- read.table(paste(WD,"all_human_ALL.FIL.evalexin", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mgen.F <- read.table(paste(WD,"all_human_ALL.FIL.evalgene", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mnuc.L <- read.table(paste(WD,"all_human_ALL.LOW.evalnucleotide", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mexn.L <- read.table(paste(WD,"all_human_ALL.LOW.evalexin", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mgen.L <- read.table(paste(WD,"all_human_ALL.LOW.evalgene", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mnuc.M <- read.table(paste(WD,"all_human_ALL.MED.evalnucleotide", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mexn.M <- read.table(paste(WD,"all_human_ALL.MED.evalexin", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mgen.M <- read.table(paste(WD,"all_human_ALL.MED.evalgene", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mnuc.H <- read.table(paste(WD,"all_human_ALL.HIG.evalnucleotide", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mexn.H <- read.table(paste(WD,"all_human_ALL.HIG.evalexin", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );
Mgen.H <- read.table(paste(WD,"all_human_ALL.HIG.evalgene", sep="/"),
                     header = TRUE, row.names = NULL, # col.names = NULL,
                     as.is = TRUE, na.strings = c("NAN", "NA", "nan", "na"), dec = ".",
                     comment.char = "#", strip.white = TRUE, blank.lines.skip = TRUE );

do.boxplots.comp5(Mnuc.A, Mexn.A, Mgen.A,
        	  Mnuc.F, Mexn.F, Mgen.F,
        	  Mnuc.L, Mexn.L, Mgen.L,
                  Mnuc.M, Mexn.M, Mgen.M,
                  Mnuc.H, Mexn.H, Mgen.H,
                  human.set, "ALL",   "[ALL]", "human", gp.human);

# Same for WORM and FLY, replacing "hum"/"human" accordingly...

Page last modified on Wednesday 14 of December, 2011 19:19:46 CET