<?xml version="1.0" encoding="UTF-8"?>
<workflowlist xmlns="http://www.cnio.es/scombio/jmfernandez/inb/IWWEM/frontend"><!--	This content was generated by workflowmanager, an
	application of IWWE&M, INB Interactive Web Workflow Enactor & Manager
	The workflow enactor itself is based on Taverna core, and
	uses it.
	
	Author: José María Fernández González (C) 2007-2008
	Institutions:
	*	Spanish National Cancer Research Institute (CNIO, http://www.cnio.es/)
	*	Spanish National Bioinformatics Institute (INB, http://www.inab.org/)
--><domain class="IWWEM" time="2010-07-30T18:18:02+02:00" relURI="workflows"><workflow uuid="workflow:0395ec68-dcec-4ac6-b534-a87f38e12817" title="ESTs assembly workflow"><release uuid="workflow:0395ec68-dcec-4ac6-b534-a87f38e12817" lsid="urn:lsid:www.mygrid.org.uk:operation:ZR18RDQYZP0" author="" title="ESTs assembly workflow" path="0395ec68-dcec-4ac6-b534-a87f38e12817/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="acanada@cnio.es" responsibleName="Andrés Cañada" date="2008-02-13T03:38:26+01:00"><description><![CDATA[]]></description><graph mime="image/svg+xml">0395ec68-dcec-4ac6-b534-a87f38e12817/workflow.svg</graph><input name="trace_data"><mime type="text/plain"/></input><input name="namespace"><mime type="text/plain"/></input><input name="identifier"><mime type="text/plain"/></input><output name="All_Reads_Quality_Data"><mime type="text/plain"/></output><output name="All_Reads_Sequences"><mime type="text/plain"/></output><output name="Phrap_Singlets_Contigs_Sequences"><mime type="text/plain"/></output><output name="Assembly_Ace_Output"><mime type="text/xml"/></output><output name="High_Quality_Reads_Sequences"><mime type="text/plain"/></output><output name="High_Quality_Reads_Quality_Data"><mime type="text/plain"/></output><step name="articleName_rawdata"/><step name="make_unique_fasta_sequences_string"/><step name="Encode_byte___to_base64"/><step name="parse_moby_sequences"/><step name="articleNameContent"/><step name="identifierDefault"/><step name="parse_moby_quality_data"/><step name="parse_moby_phrap_sequences"/><step name="make_unique_fasta_quality_string"/><step name="String2"><description><![CDATA[a string]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="filterSequencesAndQualityDataByLength" kind="moby"><description><![CDATA[Filters a set of sequences based on their length. Only sequences which length is greater than the length cutoff will be returned. It also filters out the quality data associated with the removed sequences.]]></description><secondaryInput name="length_cutoff" isOptional="true" type="string" default="75"/><secondaryInput name="trim_masked_regions" isOptional="true" type="string" default="On"/></step><step name="runCrossMatchToScreenVector" kind="moby"><description><![CDATA[Runs cross_match to screen out vector sequences from a DNA sequence. The vector sequences dataset is stored internally]]></description><secondaryInput name="minscore" isOptional="true" type="string" default="20"/><secondaryInput name="minmatch" isOptional="true" type="string" default="12"/></step><step name="runPhrapWithQualityData" kind="moby"><description><![CDATA[Runs Phrap to Assemble a set of DNA sequences, taking into account base quality data]]></description><secondaryInput name="node_seg" isOptional="true" type="string" default="8"/><secondaryInput name="node_space" isOptional="true" type="string" default="4"/></step><step name="runPhred" kind="moby"><description><![CDATA[Phred writes the base calls from a chromatogram and returns a DNA sequence and FASTA base quality data]]></description><secondaryInput name="trim_cutoff" isOptional="true" type="string" default="0.05"/><secondaryInput name="trim_alt" isOptional="true" type="string" default="On"/></step><step name="String1"><description><![CDATA[a string]]></description></step><step name="ABI_Encoded"><description><![CDATA[Trace data in ABI format]]></description></step><step name="FASTA_Base_Quality_multi"><description><![CDATA[sequences base quality data in FASTA format]]></description></step><step name="FASTA_NA_multi"><description><![CDATA[multiple nucleic acids sequence in FASTA format]]></description></step><step name="make_unique_fasta_sequences_string_2"/><step name="make_unique_fasta_sequences_string_3"/><step name="make_unique_fasta_quality_string_2"/><step name="parse_moby_ace_output"/><step name="parse_moby_high_quality_reads_sequences"/><step name="parse_moby_high_quality_reads_quality_data"/></release></workflow><workflow uuid="workflow:0b4d3b5c-d4e7-4697-a831-7be74a35943b" title="Preprocessing and differential expression test of gene expression data"><release uuid="workflow:0b4d3b5c-d4e7-4697-a831-7be74a35943b" lsid="urn:lsid:net.sf.taverna:wfDefinition:2ab38286-c858-49d7-97d1-edb0ce272589" author="Joaquin Tarraga Gimenez, jtarraga@cipf.es" title="Preprocessing and differential expression test of gene expression data" path="0b4d3b5c-d4e7-4697-a831-7be74a35943b/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jtarraga@cipf.es" responsibleName="Joaquin Tarraga Gimenez" date="2007-11-29T11:47:29+01:00"><description><![CDATA[You can use this workflow, if you have DNA microarray data and you want to know if there are genes differentially expressed under different experimental conditions.

At the moment there are four different experimental contexts involving gene expression data that you can analyze using this workflow:

It is possible with this workflow to study differential expression between two conditions. For example, when comparing tumour and healthy cells you can find which genes are over-expressed or under-expressed in the tumour.

It is possible indeed, to analyze differential expression among more than two conditions. For instance, if you are studding three different types of tumour cells, you can find the genes that have expression patterns more differentiated among them.

Another kind of data that you can handle with our tools are those concerned with differential expression related to a continuous variable. For example, if you treat some cells with a drug and measure their gene expression in several time-points after the treatment, you can find genes which expression increases or decreases with time as a consequence of the treatment.

The last type of inspection that can be done using this workflow is the study of gene expression related to a survival time. You can study for example which genes are more directly related to the death of your cells by analyzing the relationships between the expression of the genes and the survival time of the cells.]]></description><graph mime="image/png">0b4d3b5c-d4e7-4697-a831-7be74a35943b/workflow.png</graph><graph mime="image/svg+xml">0b4d3b5c-d4e7-4697-a831-7be74a35943b/workflow.svg</graph><graph mime="application/pdf">0b4d3b5c-d4e7-4697-a831-7be74a35943b/workflow.pdf</graph><input name="gene_expression_data"><mime type="text/plain"/></input><output name="differential_expression_image"><mime type="text/plain"/></output><step name="runPreprocessing" kind="moby"><description><![CDATA[Preprocessing service for gene expression patterns that allows users to apply log-transform, merge replicated values, filter missing values, impute missing values and standardize profiles]]></description><secondaryInput name="standardize" isOptional="true" type="string" default="Yes"/><secondaryInput name="filter_missing_mode" isOptional="true" type="string" default="None"/><secondaryInput name="log_transform" isOptional="true" type="string" default="Log 2"/><secondaryInput name="impute_missing_mode" isOptional="true" type="string" default="Fill with zeros"/><secondaryInput name="merge_replicates_mode" isOptional="true" type="string" default="None"/></step><step name="runDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice allows users to analyze gene expression under different experimental conditions: differential expresssion between two conditions (applying t-test, Bayes, Data adaptive, SAM and CLEAR tests), differential expression among more than two conditions (Anova and CLEAR tests), differential expression related to a continuous variable (Regression, Pearson and Spearman correlations) and differential expression related to a survival time (Cox test)]]></description><secondaryInput name="test" isOptional="true" type="string" default="T-test"/><secondaryInput name="clear_significance_level" isOptional="true" type="string" default="0.05"/></step><step name="displayDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice returns a PNG image representing the results from a differential expresssion test (t-test, Bayes, Data adaptive, SAM, CLEAR, Anova, Regression, Pearson correlation, Spearman correlation and Cox tests). In addition, an URL to the image is returned in the output field 'legend']]></description><secondaryInput name="standardize" isOptional="true" type="string" default="yes"/><secondaryInput name="scale" isOptional="true" type="string" default="-3/+3"/><secondaryInput name="rows" isOptional="true" type="string" default="50"/></step><step name="String"><description><![CDATA[a string]]></description></step><step name="MicroArrayData_Text"><description><![CDATA[Micro-array data in text format representing gene expression patterns. Each line must have the gene name and the expression values and the expression values for each condition. All the data items must be separated by tabulators]]></description></step><example uuid="f81c2a2a-3a85-454a-a9fe-077b2fcc6201" name="gene_expression_data_example" path="0b4d3b5c-d4e7-4697-a831-7be74a35943b/examples/f81c2a2a-3a85-454a-a9fe-077b2fcc6201.xml" date="2007-11-29T11:47:41+01:00"/></release></workflow><workflow uuid="workflow:1e8f85e8-cb99-4b55-861d-27c1587ab848" title="Automatic Annotation of Protein Function"><release uuid="workflow:1e8f85e8-cb99-4b55-861d-27c1587ab848" lsid="urn:lsid:net.sf.taverna:wfDefinition:15467c2e-c99a-492b-8bf2-67d0c82854c2" author="José Manuel Rodríguez Carrasco INB-GN2-CNIO (jmrodriguez@cnio.es)" title="Automatic Annotation of Protein Function" path="1e8f85e8-cb99-4b55-861d-27c1587ab848/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jmrodriguez@cnio.es" responsibleName="José Manuel Rodríguez Carrasco" date="2007-11-29T17:55:58+01:00"><description><![CDATA[This workflow runs some methods that belong to annotate homologous sequences and include new features related to the specific identification of protein subfamilies (orthologous groups). This workflow is composed by three methods:

   1. ISS (Complete): A sequence similitary search is carried out to find proteins related to the query sequence. A clustering algorithm is applied in order to identity closely related sequence groups in the set of similar proteins. More related sequences are more likely to share a common function. In some cases, recursive sequence similitary searches lead to better representation of the related subfamilies, which facilitates the clustering. This method returns the matrix of distances, the first Blast report, and annotations of the proteins.

   2. NCut: The local alignments with the closely related proteins clusteres together with the query protein are classified in different categories depending on the extent to which the alignments cover the length of the query and target sequences (alignment categories). So, this method returns the cluster of homologous sequence whose values are the distance among themselves.

   3. OFunCUT: Key functional annotations of the corresponding proteins are analyzed, including functional descriptions, enzymatic activity codes, and Swiss-Prot style keywords. The transference of information is carried out starting from the alignment categories with a better coverage. A confidence level is assigned to each one of the annotations. This level is derived from the alignment categories.

The FunCUT results, as we said, includes key functional annotations of the corresponding proteins as the functional descriptions, enzymatic activity codes, and Swiss-Prot style keywords. Also, the results includes the clusters of the query protein and the hits of Blast.

http://www.pdg.cnb.uam.es/fabascal/search_and_clus.html

Other links

http://www.pdg.cnb.uam.es/fcut/funcut.html]]></description><graph mime="image/png">1e8f85e8-cb99-4b55-861d-27c1587ab848/workflow.png</graph><graph mime="image/svg+xml">1e8f85e8-cb99-4b55-861d-27c1587ab848/workflow.svg</graph><graph mime="application/pdf">1e8f85e8-cb99-4b55-861d-27c1587ab848/workflow.pdf</graph><input name="aminoacid sequence"><description><![CDATA[Protein sequence.]]></description><mime type="text/plain"/></input><input name="length of sequence"><description><![CDATA[Length of protein sequence.]]></description><mime type="text/plain"/></input><input name="aminoacid id"><description><![CDATA[Identifier of protein.]]></description><mime type="text/plain"/></input><output name="iss result"><description><![CDATA[Xml document that show us the sequence similitary search.]]></description><mime type="text/plain"/></output><output name="distance matrix"><description><![CDATA[Xml document that show us the Matrix of distances.]]></description><mime type="text/plain"/></output><output name="first blast"><description><![CDATA[First Blast report.]]></description><mime type="text/plain"/></output><output name="distance cluster"><description><![CDATA[Matrix of distances among homologous sequences.]]></description><mime type="text/plain"/></output><output name="family annotation"><description><![CDATA[Xml document that show us annotations of homologous sequences.]]></description><mime type="text/plain"/></output><step name="runNCut"><description><![CDATA[MOBY Web Service that runs NCut application, the second step of FunCUT method.

NCut is a clustering application that groups the homolog sequences -subfamilies- and weights its closeness to the query sequence.

* Inputs: 

   - >Moby Object of Matrix distance (NCut_Matrix{'ncutMatrix'}): Representation of distances among homologous sequences as xml document.

* Outputs:

   - Moby Object based on NCut result (NCut_Clusters{'ncutClusters'}): Cluster of homologous sequence whose values are the distance among themselves.]]></description></step><step name="runOFunCUT" kind="moby"><description><![CDATA[MOBY Web Service that runs oFunCUT, the third step of FunCUT method.

oFunCUT analyzes the key functional annotations of the neighbor sequences related to the query sequence and makes the transference of the annotations, from clustering result (NCut) and sequence similitary search (ISSComplete, NCBI Blast).

* Inputs: 

   - Moby Object of protein sequence (AminoAcidSequence{'sequence'}): Lightweight representation an amino acid sequence.

   - Moby Object based on NCBI Blast result (NCBI_BLAST_Text{'firstBlast'}): First Blast report.

   - Moby Object based on NCut result (NCut_Clusters{'ncutClusters'}): Cluster of homologous sequence whose values are the distance among themselves.


* Outputs:

   - Complex Moby Object based on FunCUT result (FunCUT_Annotation_XML{'funcutResult'}): Annotations of homologous sequences as xml document.

* Secondaries:

   - database (String{'Swiss-Prot', 'TrEMBL', 'SWALL'}, default value: SWALL): Database used for Blast searches.]]></description><secondaryInput name="database" isOptional="true" type="string" default="SWALL"/></step><step name="runISSComplete" kind="moby"><description><![CDATA[MOBY Web Service that runs ISS (Complete), the first step of FunCUT method.

ISS, a sequence similitary search (NCBI Blast) is carried out to find proteins related to the query sequence. Returns the matrix of distances, the first Blast report, and annotations of the proteins

* Inputs: 

   - Moby Object of protein sequence (AminoAcidSequence{'sequence'}): Lightweight representation an amino acid sequence.

* Outputs:

   - Complex Moby Object based on ISS result (ISS_Annotation_XML{'issXML'}): Sequence similitary search as xml document.

   - Complex Moby Object based on NCBI Blast result (NCBI_BLAST_Text{'firstBlast'}): First Blast report.

   - Complex Moby Object based on NCut input (NCut_Matrix{'ncutMatrix'}): Matrix of distances as xml document.

* Secondaries:

   - rounds (Integer{''}, default value: 2): Number of recursive sequence similitary searches in Blast.

   - cutlen (Integer{''}, default value: 35): Minimum length of intermediate sequences.

   - evalue (Float{''}, default value: 0.0005): Cut-off e-value.

   - database (String{'Swiss-Prot', 'TrEMBL', 'SWALL'}, default value: SWALL): Database used for Blast searches.

   - maxsearches (Integer{''}, default value: 750): Maximum number of searches per round.

    - filters (String{'No', 'XNU', 'SEG', 'COILS', 'XNU SEG', 'XNU COILS', 'SEG COILS', 'XNU SEG COILS'}, default value: No): Filters that search each sequence for statistically significant tandem repeats.]]></description><secondaryInput name="filters" isOptional="true" type="string" default="No"/><secondaryInput name="rounds" isOptional="true" type="string" default="2"/><secondaryInput name="evalue" isOptional="true" type="string" default="0.0005"/><secondaryInput name="cutlen" isOptional="true" type="string" default="35"/><secondaryInput name="database" isOptional="true" type="string" default="SWALL"/><secondaryInput name="maxsearches" isOptional="true" type="string" default="750"/></step><step name="String"><description><![CDATA[a string]]></description></step><step name="Integer"><description><![CDATA[an int]]></description></step><step name="AminoAcidSequence"><description><![CDATA[Lightweight representation an amino acid sequence]]></description></step><example uuid="808c311a-29d9-4baa-b72c-8004f114106e" name="AminoAcid Sequence example" path="1e8f85e8-cb99-4b55-861d-27c1587ab848/examples/808c311a-29d9-4baa-b72c-8004f114106e.xml" date="2007-11-29T17:57:00+01:00"/></release></workflow><workflow uuid="workflow:1f1f2e70-d227-425a-a9e2-0cbda1c3b5d1" title="Clustering of co-expressed genes in subsets showing similar configurations of TFBSs."><release uuid="workflow:1f1f2e70-d227-425a-a9e2-0cbda1c3b5d1" lsid="urn:lsid:www.mygrid.org.uk:operation:SSAVFCN82D1" author="Arnaud Kerhornou" title="Clustering of co-expressed genes in subsets showing similar configurations of TFBSs." path="1f1f2e70-d227-425a-a9e2-0cbda1c3b5d1/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="arnaud@ebi.ac.uk" responsibleName="Arnaud Kerhornou" date="2008-02-13T03:38:26+01:00"><description><![CDATA[Input: 
  * a list of Ensembl gene identifiers. It is also possible to give any external gene identifiers Ensembl recognizes (e.g. RefSeq ids)
  * a namespace.

Output: 
  * a list of gene sub-clusters
  * foreach cluster, a TF alignment map that defines a consensus transcriptional regulatory pattern.

See, http://genome.imim.es/webservices/workflows.html page for more details.]]></description><graph mime="image/png">1f1f2e70-d227-425a-a9e2-0cbda1c3b5d1/workflow.png</graph><graph mime="image/svg+xml">1f1f2e70-d227-425a-a9e2-0cbda1c3b5d1/workflow.svg</graph><graph mime="application/pdf">1f1f2e70-d227-425a-a9e2-0cbda1c3b5d1/workflow.pdf</graph><input name="namespace"><mime type="text/plain"/></input><input name="list_of_geneIdentifiers_input"><mime type="text/plain"/></input><output name="matscan_gff"><mime type="text/plain"/></output><output name="meta-alignment"><mime type="text/plain"/></output><output name="score_matrix"><mime type="text/plain"/></output><output name="upstream_sequences_fasta"><mime type="text/plain"/></output><output name="gene_clusters"><mime type="text/plain"/></output><output name="MultiMeta_alignment"><mime type="text/xml"/></output><output name="MultiMeta_alignment_gff"><mime type="text/xml"/></output><output name="TFBSs_cluster_image"><mime type="image/*"/></output><output name="meta-alignment_gff"><mime type="text/plain"/></output><output name="gene_tree"><mime type="text/plain"/></output><step name="StringArticleName"/><step name="parse_moby_multiple_meta"/><step name="identifier"/><step name="parse_moby_matscan_gff"/><step name="Beanshell_scripting_host"/><step name="Filter_MatScan"/><step name="parse_moby_score_matrix"/><step name="decode_image_gff2jpeg"/><step name="parse_moby_gene_clusters"/><step name="parse_moby_meta"/><step name="parse_moby_gene_tree"/><step name="parse_moby_meta_gff"/><step name="parse_moby_multi_meta_gff"/><step name="parse_moby_upstream_sequences"/><step name="runSOTAClustering" kind="moby"><description><![CDATA[Runs SOTA algorithm to partition the gene space into subclusters. The input is a gene score matrix represented as a MicroArrayData_Text object.]]></description><secondaryInput name="resource_threshold" isOptional="true" type="string" default="35"/><secondaryInput name="distance" isOptional="true" type="string" default="euclidean"/></step><step name="fromGenericSequenceCollectionToFASTA"><description><![CDATA[Converts a collection of generic sequences into FASTA sequences]]></description></step><step name="getUpstreamSeqFromEnsembl" kind="moby"><description><![CDATA[Sequence retrieval tool from Ensembl database. It returns the upstream sequence of a given set of Ensembl gene identifiers. These identifiers could be external ones, such as Refseq Ids or Affymetrix ids.
 In case you select the orthologous mode, it will returns the upstream sequence of all orthologous genes of a given input gene (only one input gene identifier in that case)]]></description><secondaryInput name="upstream length" isOptional="true" type="string" default="500"/><secondaryInput name="intergenic only" isOptional="true" type="string" default="False"/><secondaryInput name="downstream length" isOptional="true" type="string" default=""/><secondaryInput name="organism" isOptional="true" type="string" default="Homo sapiens"/></step><step name="fromMetaAlignmentsToTextScoreMatrix"><description><![CDATA[Parses a collection of meta-alignment outputs to produce a text-formatted sequence similarity score matrix]]></description></step><step name="runMultiMetaAlignmentGFF" kind="moby"><description><![CDATA[Runs Multiple-Meta-Alignment software to perform multiple non-collinear transcription factor map alignments of promoter regions. It returns the multiple-meta-alignment output in GFF format.]]></description><secondaryInput name="mu penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="lamba penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="alpha penalty" isOptional="true" type="string" default="0.5"/><secondaryInput name="NoN-colinear penalty" isOptional="true" type="string" default="100"/><secondaryInput name="gap penalty" isOptional="true" type="string" default="-10"/></step><step name="runMultiPairwiseMetaAlignment" kind="moby"><description><![CDATA[runMultiPairwiseMetaAlignment runs Meta-alignment software on a multiple running mode, receiving a collection of maps, making pairs of them and, foreach pair, it produces alignments of sequences of TF binding sites. It returns the predictions in 'Meta-alignment' format. You can use runMatScanGFF to produce the input GFF files]]></description><secondaryInput name="lamba penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="alpha penalty" isOptional="true" type="string" default="0.5"/><secondaryInput name="mu penalty" isOptional="true" type="string" default="0.1"/></step><step name="runMultiPairwiseMetaAlignmentGFF" kind="moby"><description><![CDATA[Runs Meta-alignment software on a multiple running mode, receiving a collection of maps, making pairs of them and, foreach pair, producing, in GFF format, alignments of sequences of TF binding sites]]></description><secondaryInput name="mu penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="lamba penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="alpha penalty" isOptional="true" type="string" default="0.5"/></step><step name="runMatScanGFFCollection" kind="moby"><description><![CDATA[Reports putative predicted motifs on a given collection of DNA sequences. The motifs collections currently available are Transcription Factor binding site collections. The predicted set of motifs are reported in GFF format. If you want to give MatScan output to Meta-alignment program, you MUST use the 'log-likelihood matrix' mode.]]></description><secondaryInput name="matrix mode" isOptional="true" type="string" default="log-likelihood"/><secondaryInput name="strand" isOptional="true" type="string" default="Both"/><secondaryInput name="threshold" isOptional="true" type="string" default="0.85"/><secondaryInput name="motif database" isOptional="true" type="string" default="Jaspar"/></step><step name="runMultiMetaAlignment" kind="moby"><description><![CDATA[Runs Multiple-Meta-Alignment software to perform multiple non-collinear transcription factor map alignments of promoter regions.]]></description><secondaryInput name="alpha penalty" isOptional="true" type="string" default="0.5"/><secondaryInput name="lamba penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="NoN-colinear penalty" isOptional="true" type="string" default="100"/><secondaryInput name="gap penalty" isOptional="true" type="string" default="-10"/><secondaryInput name="mu penalty" isOptional="true" type="string" default="0.1"/></step><step name="Parse_Moby_Data_b64_encoded_jpeg"><description><![CDATA[Processor to parse the datatype b64_encoded_jpeg]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="runGFF2JPEG"><description><![CDATA[Runs gff2ps software to vizualize a set of GFF annotations maps. It returns an encoded image in JPEG format.]]></description></step><step name="List_Of_GeneIdentifiers"><description><![CDATA[Fractional solvation report]]></description></step><example uuid="719f7ce9-24dd-438c-bf7c-b0ab664dd9cb" name="list_of_geneIdentifiers_input_example" path="1f1f2e70-d227-425a-a9e2-0cbda1c3b5d1/examples/719f7ce9-24dd-438c-bf7c-b0ab664dd9cb.xml" date="2007-11-29T18:03:13+01:00"/></release></workflow><workflow uuid="workflow:2365b91b-b7ae-4aa9-ae70-e8be4f40c0b3" title="Predicting Functionally Important Residues"><release uuid="workflow:2365b91b-b7ae-4aa9-ae70-e8be4f40c0b3" lsid="urn:lsid:net.sf.taverna:wfDefinition:c19346b4-de3b-474e-8559-22a86c5155e3" author="José Manuel Rodríguez Carrasco INB-GN2-CNIO (jmrodriguez@cnio.es)" title="Predicting Functionally Important Residues" path="2365b91b-b7ae-4aa9-ae70-e8be4f40c0b3/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jmrodriguez@cnio.es" responsibleName="José Manuel Rodríguez Carrasco" date="2007-11-29T14:51:28+01:00"><description><![CDATA[This workflow runs some automatic method for predicting functionally important residues in protein squence alignments. The methods are 'FASS', 'MB', and 'S-method'. All these methods are called TreeDet.
At the end, the results of TreeDet are diplayed as HTML format.

Its input is Protein Multiple Sequence Alignment. The accepted formats are ALN, FASTA, MSF, and PIR. The alignment has to be longer than 50 residues and contain at least 15 sequences (no more than 200).

FASS method:
Principal component analysis of the multiple alignment and computation of the statistical confidence in the organization of the family into sub-families.

MB method:
The idea behind the MB method is to look for positions in the multiple sequence alignment whose Mutational Behaviour resembles that of the global alignment. Such behaviour is expected in positions that have family-dependent conservation, since in these positions the amino acids within a family are similar to each other and different from those in other families.

SM method:
The S-method aims to find the tree-determinants associated to the optimal division of a functional protein family into functionally specific subfamilies. This method relies on a family phylogenetic tree, internally generated by ClustalW, and searches for the cut level with the greatest value of the position relative entropy (optimal level).


http://treedet.bioinfo.cnio.es/]]></description><graph mime="image/png">2365b91b-b7ae-4aa9-ae70-e8be4f40c0b3/workflow.png</graph><graph mime="image/svg+xml">2365b91b-b7ae-4aa9-ae70-e8be4f40c0b3/workflow.svg</graph><graph mime="application/pdf">2365b91b-b7ae-4aa9-ae70-e8be4f40c0b3/workflow.pdf</graph><input name="Protein Multiple Sequence Alignment"><description><![CDATA[It is protein multiple sequence alignment. The accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.]]></description><mime type="text/plain"/></input><output name="FASS result"><description><![CDATA[Xml document that shows us important residues in protein sequence alignments.]]></description><mime type="text/plain"/></output><output name="SM result"><mime type="text/plain"/></output><output name="MB result"><mime type="text/plain"/></output><output name="html TreeDet result"><mime type="text/plain"/><mime type="text/html"/></output><step name="Flatten_list"/><step name="runMB" kind="moby"><description><![CDATA[MOBY Web Service that runs MB method.

MB is an automatic method for predicting functionally important residues (Mutational Behaviour) in protein sequence alignments (longer than 50 residues and at least 15 sequences). The accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Inputs: 

   - Moby Object of "Sequence_alignment_report". This object accepts Multiple Sequence Alignment. But for FASS method, the accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Outputs:

   - Complex Moby Object based on MB result (TreeDet_Annotation_XML{'treedet_annotation'}): Shows us important residues in protein sequence alignments as xml document.

* Parameters:

   - cutoff (Float{''}, default value: 0.05): Correlation cutoff above which positions are taken as predicted functional residues.

   - residues (Integer{''}, default value: 100): Percentage of High Scoring Residues.]]></description><secondaryInput name="cutoff" isOptional="true" type="string" default="0.05"/><secondaryInput name="residues" isOptional="true" type="string" default="100"/></step><step name="runFASS" kind="moby"><description><![CDATA[MOBY Web Service that runs FASS method.

FASS is an automatic method for predicting functionally important residues in protein sequence alignments (longer than 50 residues and at least 15 sequences). The accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Inputs: 

   - Moby Object of "Sequence_alignment_report". This object accepts Multiple Sequence Alignment. But for FASS method, the accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Outputs:

   - Complex Moby Object based on FASS result (TreeDet_Annotation_XML{'treedet_annotation'}): Shows us important residues in protein sequence alignments as xml document.

* Parameters:

   - gaps (Integer{''}, default value: 10): Maximum number of gaps to include a column in the analysis.

   - cutoff (Float{''}, default value: 0.05): Represents the significance level for no parametric test and tries to choose optimal number axis.

   - clusters (Integer{''}, default value: 4): Represents the number of groups refered to the above mentioned sequence space.

   - axes (Integer{''}, default value: 3): Represents the dimension of the sequence space -the residues space adopts the same dimension automatically-.]]></description><secondaryInput name="clusters" isOptional="true" type="string" default="4"/><secondaryInput name="axes" isOptional="true" type="string" default="3"/><secondaryInput name="gaps" isOptional="true" type="string" default="10"/><secondaryInput name="cutoff" isOptional="true" type="string" default="0.05"/></step><step name="runSM" kind="moby"><description><![CDATA[Automatic method for predicting functionally important residues using the concept of Relative Entropy in protein sequence alignments (longer than 50 residues and at least 15 sequences). The accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.]]></description><secondaryInput name="maxsubfamily" isOptional="true" type="string" default="10"/><secondaryInput name="minsubfamily" isOptional="true" type="string" default="2"/><secondaryInput name="conservation" isOptional="true" type="string" default="85"/><secondaryInput name="secfamily" isOptional="true" type="string" default="3"/></step><step name="displayAlignmentFromTreeDetAnnotation"><description><![CDATA[Display protein sequence alignments as HTML format from annotations of TreeDet methods: FASS, MB, S-method, and/or SQUARE.]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="Parse_Moby_Data_TreeDet_Annotation_HTML"><description><![CDATA[Processor to parse the datatype TreeDet_Annotation_HTML]]></description></step><step name="Sequence_alignment_report"/><example uuid="6048785c-2957-426f-b239-950580f34124" name="MSF, Multiple Sequence Alginment" path="2365b91b-b7ae-4aa9-ae70-e8be4f40c0b3/examples/6048785c-2957-426f-b239-950580f34124.xml" date="2007-11-29T14:54:24+01:00"><![CDATA[<p>Example of Multiple Sequence Alignment as MSF format.</p>]]></example></release></workflow><workflow uuid="workflow:3e43b45a-5906-4c11-b593-09b27d3e75d0" title="GeneID_Workflow_icapture"><release uuid="workflow:3e43b45a-5906-4c11-b593-09b27d3e75d0" lsid="urn:lsid:net.sf.taverna:wfDefinition:fe6f841e-3148-4f45-b0e7-8d5949faeedd" author="" title="GeneID_Workflow_icapture" path="3e43b45a-5906-4c11-b593-09b27d3e75d0/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="acanada@cnio.es" responsibleName="Andrés Cañada" date="2008-02-13T03:38:26+01:00"><description><![CDATA[]]></description><graph mime="image/png">3e43b45a-5906-4c11-b593-09b27d3e75d0/workflow.png</graph><graph mime="image/svg+xml">3e43b45a-5906-4c11-b593-09b27d3e75d0/workflow.svg</graph><graph mime="application/pdf">3e43b45a-5906-4c11-b593-09b27d3e75d0/workflow.pdf</graph><input name="identifier"><mime type="text/plain"/></input><input name="namespace"><mime type="text/plain"/></input><input name="fasta_sequence"><mime type="text/plain"/></input><output name="peptide_sequences"><mime type="text/plain"/></output><output name="gff_map"><mime type="image/*"/></output><output name="geneid_gff_predictions"><mime type="text/plain"/></output><output name="pepStats"><mime type="text/plain"/></output><output name="blastp_report"><mime type="text/plain"/></output><output name="interproscan_report_debugging"><mime type="text/plain"/></output><output name="pfam_report"><mime type="text/plain"/></output><output name="motif_alignments"><mime type="text/plain"/></output><step name="Decode_base64_to_byte"/><step name="StringArticleName"/><step name="runEmbossPepstatsFromSequence" kind="moby"><description><![CDATA[Outputs a report of simple protein sequence information.]]></description><secondaryInput name="to" isOptional="true" type="string" default=""/><secondaryInput name="from" isOptional="true" type="string" default=""/></step><step name="translateGeneIDGFFPredictions" kind="moby"><description><![CDATA[Translates the GeneID gene predictions, given in GFF format, into a set of aminoacid sequences]]></description><secondaryInput name="translation table" isOptional="true" type="string" default="Standard (1)"/></step><step name="fromGenericSequenceCollectionToFASTA"><description><![CDATA[Converts a collection of generic sequences into FASTA sequences]]></description></step><step name="searchInterPro" kind="moby"><description><![CDATA[Query an Amino Acid sequence against InterPro.]]></description><secondaryInput name="hmmsmart" isOptional="true" type="string" default="no"/><secondaryInput name="tmhmm" isOptional="true" type="string" default="yes"/><secondaryInput name="seg" isOptional="true" type="string" default="yes"/><secondaryInput name="fprintscan" isOptional="true" type="string" default="no"/><secondaryInput name="hmmpir" isOptional="true" type="string" default="no"/><secondaryInput name="superfamily" isOptional="true" type="string" default="no"/><secondaryInput name="coils" isOptional="true" type="string" default="yes"/><secondaryInput name="gene3d" isOptional="true" type="string" default="no"/><secondaryInput name="scanregexp" isOptional="true" type="string" default="no"/><secondaryInput name="blastprodom" isOptional="true" type="string" default="no"/><secondaryInput name="hmmpfam" isOptional="true" type="string" default="no"/><secondaryInput name="signalp" isOptional="true" type="string" default="yes"/><secondaryInput name="hmmpanther" isOptional="true" type="string" default="no"/><secondaryInput name="profilescan" isOptional="true" type="string" default="no"/><secondaryInput name="hmmtigr" isOptional="true" type="string" default="no"/></step><step name="runHMMPfam" kind="moby"><description><![CDATA[Searches a HMM profile database (Pfam) with a query sequence. Use this if you are trying to annotate an unknown sequence.]]></description><secondaryInput name="hmm_accessions" isOptional="true" type="string" default="on"/><secondaryInput name="e-value_domain" isOptional="true" type="string" default=""/><secondaryInput name="alignment_number" isOptional="true" type="string" default="10"/><secondaryInput name="score_cutoff" isOptional="true" type="string" default=""/><secondaryInput name="e-value_score" isOptional="true" type="string" default="59021"/><secondaryInput name="score_cutoff_domain" isOptional="true" type="string" default=""/><secondaryInput name="e-value" isOptional="true" type="string" default="10.0"/></step><step name="fromFASTAToDNASequence"><description><![CDATA[Converts a DNA FASTA sequence into a DNA sequence]]></description></step><step name="runGeneIDGFF" kind="moby"><description><![CDATA[Ab initio gene prediction tool that returns the gene predictions in GFF format (GFF version 2).]]></description><secondaryInput name="strand" isOptional="true" type="string" default="Both"/><secondaryInput name="profile" isOptional="true" type="string" default="Homo sapiens (suitable for mammals)"/><secondaryInput name="engine" isOptional="true" type="string" default="Normal"/><secondaryInput name="signals" isOptional="true" type="string" default="None"/><secondaryInput name="exons" isOptional="true" type="string" default="None"/></step><step name="runGFF2JPEG"><description><![CDATA[Runs gff2ps software to vizualize a set of GFF annotations maps. It returns an encoded image in JPEG format.]]></description></step><step name="Parse_Moby_Data_Pepstats_Text"><description><![CDATA[Processor to parse the datatype Pepstats_Text]]></description></step><step name="runNCBIBlastp" kind="moby"><description><![CDATA[Compares a protein sequence against a protein database and calculates the statistical significance of matches using the Basic Local Alignment Tool (BLAST).]]></description><secondaryInput name="database" isOptional="true" type="string" default="Swiss-Prot"/><secondaryInput name="gapalign" isOptional="true" type="string" default="true"/><secondaryInput name="opengap" isOptional="true" type="string" default="11"/><secondaryInput name="matrix" isOptional="true" type="string" default="BLOSUM62"/><secondaryInput name="alignments" isOptional="true" type="string" default="15"/><secondaryInput name="extendgap" isOptional="true" type="string" default="2"/><secondaryInput name="filter" isOptional="true" type="string" default="true"/><secondaryInput name="expected_threshold" isOptional="true" type="string" default="0.00001"/><secondaryInput name="dropoff" isOptional="true" type="string" default="0"/><secondaryInput name="scores" isOptional="true" type="string" default="25"/></step><step name="Parse_Moby_Data_GFF"><description><![CDATA[Processor to parse the datatype GFF]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="Parse_Moby_Data_b64_encoded_jpeg"><description><![CDATA[Processor to parse the datatype b64_encoded_jpeg]]></description></step><step name="FastaSequencObject"><description><![CDATA[NA sequence in FASTA format]]></description></step><step name="Parse_Moby_Data_BLAST_Text"><description><![CDATA[Processor to parse the datatype BLAST-Text]]></description></step><step name="Parse_Moby_Data_FASTA"><description><![CDATA[Processor to parse the datatype FASTA]]></description></step><step name="Parse_Moby_Data_HMMPfam_Report"><description><![CDATA[Processor to parse the datatype HMMPfam_Report]]></description></step><example uuid="29f80a0b-839e-4b21-b524-e8039e98ec4d" name="geneId_workflow_input" path="3e43b45a-5906-4c11-b593-09b27d3e75d0/examples/29f80a0b-839e-4b21-b524-e8039e98ec4d.xml" date="2007-11-29T18:26:50+01:00"/></release></workflow><workflow uuid="workflow:4e088ed1-f29d-4b8b-8161-34d32ca367f9" title="Protein Structure Solvation Analysis Workflow"><release uuid="workflow:4e088ed1-f29d-4b8b-8161-34d32ca367f9" lsid="urn:lsid:net.sf.taverna:wfDefinition:ce4ff31a-2e38-4b6b-98a4-8759fb7858d9" author="Adam Hospital (adam@mmb.pcb.ub.es)" title="Protein Structure Solvation Analysis Workflow" path="4e088ed1-f29d-4b8b-8161-34d32ca367f9/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="adam@mmb.pcb.ub.es" responsibleName="Adam Hospital" date="2007-11-29T19:45:34+01:00"><description><![CDATA[From a structure (PDB code), runs a fractional solvation analysis (FSOLV). It returns three different outputs

* A FSOLV report with information about fractional solvation for each protein residue.
* A Jmol 3D representation of this FSOLV report.
* A parsed Sequence with FSOLV report information, where N means NORMAL and O means OUTSIDE.


Steps (Web Services Used):

1.- getStructurePDB Service
	Getting the structure from the Protein Data Bank.
2.- runFSOLVFromPDBText
	Evaluates fractional solvation based on Linear Response Theory (LRT) method.
3.- parsePropertySequenceFromFSOLV
	Parses a FSOLVText report into a PropertySequence (where N means NORMAL and O means OUTSIDE).
4.- showFSOLVonStructure
	FSolv report 3D Jmol representation.]]></description><graph mime="image/png">4e088ed1-f29d-4b8b-8161-34d32ca367f9/workflow.png</graph><graph mime="image/svg+xml">4e088ed1-f29d-4b8b-8161-34d32ca367f9/workflow.svg</graph><graph mime="application/pdf">4e088ed1-f29d-4b8b-8161-34d32ca367f9/workflow.pdf</graph><input name="idPDB"><mime type="text/plain"/></input><output name="FsolvOnStructure"><mime type="text/plain"/></output><output name="propertySequence"><mime type="text/plain"/></output><output name="fsolvOutput"><mime type="text/plain"/></output><step name="parsePropertySequenceFromFSOLVText"><description><![CDATA[Parses a FSOLVText report into a PropertySequence (where N means NORMAL and O means OUTSIDE)]]></description></step><step name="showFSOLVonStructure"><description><![CDATA[FSolv report 3D representation]]></description></step><step name="getStructureFromPDB"><description><![CDATA[Retrieves structure from PDB (PDB format) from a PDB Id Allows to specify a single chain using the format XXXX_X.]]></description></step><step name="runFSOLVFromPDBText"><description><![CDATA[Evaluates fractional solvation based on LRT method]]></description></step><step name="Object"><description><![CDATA[an object]]></description></step></release></workflow><workflow uuid="workflow:5611f3f0-0ad0-479e-8dfb-a750bfb85175" title="Hot Spot Analysis of Protein Sequence Workflow"><release uuid="workflow:5611f3f0-0ad0-479e-8dfb-a750bfb85175" lsid="urn:lsid:net.sf.taverna:wfDefinition:3ccab69d-8893-46c2-8e3b-7ede874f6832" author="Adam Hospital (adam@mmb.pcb.ub.es)" title="Hot Spot Analysis of Protein Sequence Workflow" path="5611f3f0-0ad0-479e-8dfb-a750bfb85175/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="adam@mmb.pcb.ub.es" responsibleName="Adam Hospital" date="2007-11-29T20:52:55+01:00"><description><![CDATA[A Hot Spot is defined as a sequence position with a high propensity to give pathological effects when mutated. The workflow covers all possible calculations, unnecessary output could be omitted.

Input: PDB id. Also a Uniprot Id or raw Sequences could be used.

Output: 
	Hot Spot Prediction:
		As a raw text.
		As GFF file.
		Shown in 3D Structure.
		Shown as sequence-based plot.

Intermediate output also available:

	3D Structure.
	Protein Sequence.
	PSI-Blast report.
	Multiple Alignment.
	Secondary Structure and accessibility prediction (PHD), raw or as a property sequence.

Steps (Web Services Used):

	1.- getStructurePDB Service
		Getting the structure from the Protein Data Bank.

	2.- parseAminoAcidSequenceFromPDBText
		Extracts amino acid sequence that corresponds to structure in PDB.

	3.- runPSIBlastpFromAminoAcidSequence
		Find homologues performing 2 iterations of protein PSI Blast against non-redundant database with standard settings.

	4.- runPMUTHotSpotFromBlastText
		Perform Secondary Structure and Accessibility Prediction and Hot Spot Analysis on Blast Report.

		Steps 3 and 4 can be done in one step with runPMUTHotSpotFromAminoAcidSequence.

	5.- parsePropertySequenceFromPMUTText, parseMultipleAlignFromBLASTText, parseGFFFromFeatureAASequence, parseFeatureAASequenceFromPMUTText, plotFeatureAASequence, showPMUTonStructure
		Data Formatting.]]></description><graph mime="image/png">5611f3f0-0ad0-479e-8dfb-a750bfb85175/workflow.png</graph><graph mime="image/svg+xml">5611f3f0-0ad0-479e-8dfb-a750bfb85175/workflow.svg</graph><graph mime="application/pdf">5611f3f0-0ad0-479e-8dfb-a750bfb85175/workflow.pdf</graph><input name="idPDB"><mime type="text/plain"/></input><output name="AA_Seq"><mime type="text/plain"/></output><output name="PmutReport"><mime type="text/plain"/></output><output name="HotSpot3D"><mime type="text/plain"/></output><output name="FeatureAAseq"><mime type="text/plain"/></output><output name="HotSpot2D"><mime type="text/plain"/></output><output name="GFF"><mime type="text/plain"/></output><output name="PHDFromSeq"><mime type="text/plain"/></output><output name="PropSeq"><mime type="text/plain"/></output><output name="Blast"><mime type="text/plain"/></output><output name="HotSpotBlast"><mime type="text/plain"/></output><output name="PHDFromBlast"><mime type="text/plain"/></output><output name="MultAlign"><mime type="text/plain"/></output><step name="parseGFFFromFeatureAASequence"><description><![CDATA[Converts a feature AA sequence into GFF]]></description></step><step name="plotFeatureAASequence"><description><![CDATA[Returns a 2D graphic from a FeatureAASequence (png image)]]></description></step><step name="parseFeatureAASequenceFromPMUTText"><description><![CDATA[Parses a PMUTText report into a FeatureAASequence. Value is patogenicity prediction]]></description></step><step name="parsePropertySequenceFromPMUTText"><description><![CDATA[Parses a PMUTText report into a PropertySequence (where N means NEUTRAL and H means HOT-SPOT)]]></description></step><step name="parseAminoAcidSequenceFromPDBText"><description><![CDATA[Extracts amino acid sequence that corresponds to structure in PDB]]></description></step><step name="Object"><description><![CDATA[an object]]></description></step><step name="parseMultipleAlignFromBLASTText"><description><![CDATA[Produces a multiple alignment in FASTA format from Blast Hits.]]></description></step><step name="runPMUTHotSpotFromBLASTText"><description><![CDATA[Predicts sequence positions (Hot Spots) that would produce pathological behaviour when mutated. Trained with human pathological mutations]]></description></step><step name="runPSIBlastpFromAminoAcidSequence"><description><![CDATA[Perform 2 iterations of protein PSI Blast against non-redundant database with standard settings]]></description></step><step name="runPMUTHotSpotFromAminoAcidSequence"><description><![CDATA[Predicts sequence positions (Hot Spots) that would produce pathological behaviour when mutated. Trained with human pathological mutations]]></description></step><step name="runPHDFromAminoAcidSequence"><description><![CDATA[Performs secondary structure and accessibility predictions using PHD]]></description></step><step name="getStructureFromPDB"><description><![CDATA[Retrieves structure from PDB (PDB format) from a PDB Id Allows to specify a single chain using the format XXXX_X.]]></description></step><step name="runPHDFromBLASTText"><description><![CDATA[Predicts secondary structure and accessibility using PHD program]]></description></step><step name="showPMUTonStructure"><description><![CDATA[PMUT report 3D representation]]></description></step><snapshot name="1que snapshot" uuid="cca63c1d-3c8d-4275-8099-f9012a22c8a9" date="2008-07-18T00:11:24+02:00" responsibleMail="jmfernandez@cnio.es" responsibleName="José María Fernández"><![CDATA[<p>This snapshot was taken after a run using as input 1que PDB identifier.</p>]]></snapshot></release></workflow><workflow uuid="workflow:83944220-8adb-49c9-878a-439906a60092" title="Preprocessing, differential expression test and FatiGO test"><release uuid="workflow:83944220-8adb-49c9-878a-439906a60092" lsid="urn:lsid:net.sf.taverna:wfDefinition:2ab38286-c858-49d7-97d1-edb0ce272589" author="Joaquin Tarraga Gimenez, jtarraga@cipf.es" title="Preprocessing, differential expression test and FatiGO test" path="83944220-8adb-49c9-878a-439906a60092/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jtarraga@cipf.es" responsibleName="Joaquin Tarraga Gimenez" date="2007-11-29T11:50:21+01:00"><description><![CDATA[You can use this workflow, if you have DNA microarray data and you want to know if there are genes differentially expressed under different experimental conditions, and if the resulting differential expression can be considered a significant biological difference according to the FatiGO test.

FatiGO test takes two lists of genes (ideally a group of interest and the rest of the genes in the experiment, although any two groups, formed in any way, can be tested against each other) and convert them into two lists of GO terms using the corresponding gene-GO association table. Then a Fisher's exact test for 2x2 contingency tables is used to check for significant overrepresentation of GO terms in one of the sets with respect to the other one. Multiple test correction to account for the multiple hypothesis tested (one for each GO term) is applied as previously described.]]></description><graph mime="image/png">83944220-8adb-49c9-878a-439906a60092/workflow.png</graph><graph mime="image/svg+xml">83944220-8adb-49c9-878a-439906a60092/workflow.svg</graph><graph mime="application/pdf">83944220-8adb-49c9-878a-439906a60092/workflow.pdf</graph><input name="gene_expression_data"><mime type="text/plain"/></input><output name="differential_expression_image"><mime type="text/plain"/></output><output name="fatigo_result_url"><mime type="text/plain"/></output><step name="runPreprocessing" kind="moby"><description><![CDATA[Preprocessing service for gene expression patterns that allows users to apply log-transform, merge replicated values, filter missing values, impute missing values and standardize profiles]]></description><secondaryInput name="standardize" isOptional="true" type="string" default="Yes"/><secondaryInput name="filter_missing_mode" isOptional="true" type="string" default="None"/><secondaryInput name="log_transform" isOptional="true" type="string" default="Log 2"/><secondaryInput name="impute_missing_mode" isOptional="true" type="string" default="Fill with zeros"/><secondaryInput name="merge_replicates_mode" isOptional="true" type="string" default="None"/></step><step name="runDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice allows users to analyze gene expression under different experimental conditions: differential expresssion between two conditions (applying t-test, Bayes, Data adaptive, SAM and CLEAR tests), differential expression among more than two conditions (Anova and CLEAR tests), differential expression related to a continuous variable (Regression, Pearson and Spearman correlations) and differential expression related to a survival time (Cox test)]]></description><secondaryInput name="test" isOptional="true" type="string" default="T-test"/><secondaryInput name="clear_significance_level" isOptional="true" type="string" default="0.05"/></step><step name="displayDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice returns a PNG image representing the results from a differential expresssion test (t-test, Bayes, Data adaptive, SAM, CLEAR, Anova, Regression, Pearson correlation, Spearman correlation and Cox tests). In addition, an URL to the image is returned in the output field 'legend']]></description><secondaryInput name="standardize" isOptional="true" type="string" default="yes"/><secondaryInput name="scale" isOptional="true" type="string" default="-3/+3"/><secondaryInput name="rows" isOptional="true" type="string" default="50"/></step><step name="String"><description><![CDATA[a string]]></description></step><step name="MicroArrayData_Text"><description><![CDATA[Micro-array data in text format representing gene expression patterns. Each line must have the gene name and the expression values and the expression values for each condition. All the data items must be separated by tabulators]]></description></step><step name="getGeneListFromDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice extracts a list of genes from a differential expession test according to your input parameters: the list size and the extraction mode (from top to bottom, or from bottom to top)]]></description><secondaryInput name="extraction" isOptional="true" type="string" default="from top to bottom"/><secondaryInput name="size" isOptional="true" type="string" default="20"/></step><step name="getGeneListFromDifferentialExpressionTest1" kind="moby"><description><![CDATA[This webservice extracts a list of genes from a differential expession test according to your input parameters: the list size and the extraction mode (from top to bottom, or from bottom to top)]]></description><secondaryInput name="extraction" isOptional="true" type="string" default="from bottom to top"/><secondaryInput name="size" isOptional="true" type="string" default="20"/></step><step name="runFatiGO" kind="moby"><description><![CDATA[runFatiGO allows you to compare two lists of genes in terms of PubMed bio-entities (chimical and diseases), Gene Ontology terms (cellular component, biological process and molecular function), InterPro motifs, KEGG pathways, SwissProt keywords, Transcriptor factors, cis-regulatory elements (cisRED) and tissues]]></description><secondaryInput name="terms" isOptional="true" type="string" default="Gene Ontology"/><secondaryInput name="organism" isOptional="true" type="string" default="Homo sapiens"/></step><step name="displayFatiGO"><description><![CDATA[This service returns an URL to FatiGO graphical results]]></description></step><example uuid="214f0af3-7222-4d88-91cc-6ed2a08ecb69" name="gene_expression_data_example" path="83944220-8adb-49c9-878a-439906a60092/examples/214f0af3-7222-4d88-91cc-6ed2a08ecb69.xml" date="2007-11-29T11:51:03+01:00"/></release></workflow><workflow uuid="workflow:8b6f89c0-fad2-43f4-8ad6-4d464e4da4ff" title=""><release uuid="workflow:8b6f89c0-fad2-43f4-8ad6-4d464e4da4ff" lsid="urn:lsid:www.mygrid.org.uk:operation:ZR18RDQYZP0" author="" title="" path="8b6f89c0-fad2-43f4-8ad6-4d464e4da4ff/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="acanada@cnio.es" responsibleName="Andrés Cañada" date="2008-01-21T11:04:25+01:00"><description><![CDATA[]]></description><graph mime="image/png">8b6f89c0-fad2-43f4-8ad6-4d464e4da4ff/workflow.png</graph><graph mime="image/svg+xml">8b6f89c0-fad2-43f4-8ad6-4d464e4da4ff/workflow.svg</graph><graph mime="application/pdf">8b6f89c0-fad2-43f4-8ad6-4d464e4da4ff/workflow.pdf</graph><input name="trace_data"><mime type="text/plain"/></input><input name="namespace"><mime type="text/plain"/></input><input name="identifier"><mime type="text/plain"/></input><output name="All_Reads_Quality_Data"><mime type="text/plain"/></output><output name="All_Reads_Sequences"><mime type="text/plain"/></output><output name="Phrap_Singlets_Contigs_Sequences"><mime type="text/plain"/></output><output name="Assembly_Ace_Output"><mime type="text/xml"/></output><output name="High_Quality_Reads_Sequences"><mime type="text/plain"/></output><output name="High_Quality_Reads_Quality_Data"><mime type="text/plain"/></output><step name="articleName_rawdata"/><step name="make_unique_fasta_sequences_string"/><step name="Encode_byte___to_base64"/><step name="parse_moby_sequences"/><step name="articleNameContent"/><step name="identifierDefault"/><step name="parse_moby_quality_data"/><step name="parse_moby_phrap_sequences"/><step name="make_unique_fasta_quality_string"/><step name="String2"><description><![CDATA[a string]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="filterSequencesAndQualityDataByLength" kind="moby"><description><![CDATA[Filters a set of sequences based on their length. Only sequences which length is greater than the length cutoff will be returned. It also filters out the quality data associated with the removed sequences.]]></description><secondaryInput name="length_cutoff" isOptional="true" type="string" default="75"/><secondaryInput name="trim_masked_regions" isOptional="true" type="string" default="On"/></step><step name="runCrossMatchToScreenVector" kind="moby"><description><![CDATA[Runs cross_match to screen out vector sequences from a DNA sequence. The vector sequences dataset is stored internally]]></description><secondaryInput name="minscore" isOptional="true" type="string" default="20"/><secondaryInput name="minmatch" isOptional="true" type="string" default="12"/></step><step name="runPhrapWithQualityData" kind="moby"><description><![CDATA[Runs Phrap to Assemble a set of DNA sequences, taking into account base quality data]]></description><secondaryInput name="node_seg" isOptional="true" type="string" default="8"/><secondaryInput name="node_space" isOptional="true" type="string" default="4"/></step><step name="runPhred" kind="moby"><description><![CDATA[Phred writes the base calls from a chromatogram and returns a DNA sequence and FASTA base quality data]]></description><secondaryInput name="trim_cutoff" isOptional="true" type="string" default="0.05"/><secondaryInput name="trim_alt" isOptional="true" type="string" default="On"/></step><step name="String1"><description><![CDATA[a string]]></description></step><step name="ABI_Encoded"><description><![CDATA[Trace data in ABI format]]></description></step><step name="FASTA_Base_Quality_multi"><description><![CDATA[sequences base quality data in FASTA format]]></description></step><step name="FASTA_NA_multi"><description><![CDATA[multiple nucleic acids sequence in FASTA format]]></description></step><step name="make_unique_fasta_sequences_string_2"/><step name="make_unique_fasta_sequences_string_3"/><step name="make_unique_fasta_quality_string_2"/><step name="parse_moby_ace_output"/><step name="parse_moby_high_quality_reads_sequences"/><step name="parse_moby_high_quality_reads_quality_data"/></release></workflow><workflow uuid="workflow:8c8239be-0dc5-4ad3-8e38-dd36ccc7c9ea" title="Protein Structure Basic Optimization Workflow"><release uuid="workflow:8c8239be-0dc5-4ad3-8e38-dd36ccc7c9ea" lsid="urn:lsid:net.sf.taverna:wfDefinition:a0b98969-7fcb-4948-9567-d4f3f03421ee" author="Adam Hospital" title="Protein Structure Basic Optimization Workflow" path="8c8239be-0dc5-4ad3-8e38-dd36ccc7c9ea/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="adam@mmb.pcb.ub.es" responsibleName="Adam Hospital" date="2008-07-24T10:31:48+02:00"><description><![CDATA[*  Protein Structure Basic Optimization Workflow:

		From a structure (PDB code), runs a basic preparation (ionization, solvent addition), 
		a structure minimization, a system equilibration and a short molecular dynamics simulation
		in order to relax the structure.

		Steps (Web Services Used):
		
			1.- getStructurePDB Service
				Getting the structure from the Protein Data Bank.
				
			2.- cleanPDB Service
				"Cleans" a PDB structure: Remove Crystal Waters and/or Crystal Hydrogens and/or Non-Parametrized Ligands
				and Renumber Residue Sequence.

			3.- addDisulphideBondsPDB Service
				Adding Disulfide Bonds with a distance criteria in the PDB.

			4.- addHydrogensPDB Service
				Adding Hydrogens in the PDB.

			5.- optimizeStructureFromNAMD_Structure Service (Hydrogen Minimization - md_type=1)
				Minimize Hydrogens in the PDB.

			6.- optimizeStructureFromNAMD_Structure Service (Protein Minimization - md_type=2)
				Minimize a PDB formatted Protein, with Backbone and SideChains Restrained.

			7.- solvateProteinFromNAMD_Structure Service (Water Box Type + Ions)
				Adding Ions and Solvent to a Structure.

			8.- optimizeStructureFromNAMD_Structure Service (Protein-Solvent System Basic Equilibration - md_type=4)
				Running an equilibration in order to relax the structure.

			9.- optimizeStructureFromNAMD_Structure Service (Protein-Solvent System Basic Equilibration - md_type=7)
				Running a short molecular dynamics simulation in order to relax the structure.]]></description><graph mime="image/png">8c8239be-0dc5-4ad3-8e38-dd36ccc7c9ea/workflow.png</graph><graph mime="image/svg+xml">8c8239be-0dc5-4ad3-8e38-dd36ccc7c9ea/workflow.svg</graph><graph mime="application/pdf">8c8239be-0dc5-4ad3-8e38-dd36ccc7c9ea/workflow.pdf</graph><input name="pdbCode"><mime type="text/plain"/></input><output name="prepared Structure"><mime type="text/plain"/></output><step name="addDisulphideBondsPDB"><description><![CDATA[Adding Disulfide Bonds with a distance criteria in the PDB.]]></description></step><step name="addHydrogensPDB"><description><![CDATA[Adding Hydrogens in the PDB.]]></description></step><step name="getStructureFromPDB"><description><![CDATA[Retrieves structure from PDB (PDB format) from a PDB Id Allows to specify a single chain using the format XXXX_X.]]></description></step><step name="cleanPDB" kind="moby"><description><![CDATA["Cleans" a PDB structure: Remove Crystal Waters and/or Crystal Hydrogens and/or Non-Parametrized Ligands and Renumber Residue Sequence]]></description><secondaryInput name="waters" isOptional="true" type="string" default="true"/><secondaryInput name="ligands" isOptional="true" type="string" default="true"/><secondaryInput name="hydrogens" isOptional="true" type="string" default="true"/></step><step name="Object"><description><![CDATA[an object]]></description></step><step name="optimizeStructureFromNAMD_Structure" kind="moby"><description><![CDATA[Run a Simple Molecular Dynamic Simulation (optimization) with Protein-Solvent system (Using Namd program).]]></description><secondaryInput name="restraint" isOptional="true" type="string" default="0"/><secondaryInput name="minimize" isOptional="true" type="string" default="100"/><secondaryInput name="time" isOptional="true" type="string" default="2.5"/><secondaryInput name="temperature" isOptional="true" type="string" default="310"/><secondaryInput name="md_type" isOptional="true" type="string" default="1"/><secondaryInput name="timestep" isOptional="true" type="string" default="2.0"/></step><step name="solvateProteinFromNAMD_Structure" kind="moby"><description><![CDATA[Solvates a Protein.]]></description><secondaryInput name="ions" isOptional="true" type="string" default="true"/><secondaryInput name="ionic_concentration" isOptional="true" type="string" default="0.05"/><secondaryInput name="boxtype" isOptional="true" type="string" default="box"/><secondaryInput name="boxsize" isOptional="true" type="string" default="7"/></step><step name="optimizeStructureFromNAMD_Structure2" kind="moby"><description><![CDATA[Run a Simple Molecular Dynamic Simulation (optimization) with Protein-Solvent system (Using Namd program).]]></description><secondaryInput name="restraint" isOptional="true" type="string" default="0"/><secondaryInput name="minimize" isOptional="true" type="string" default="50"/><secondaryInput name="time" isOptional="true" type="string" default="0.5"/><secondaryInput name="temperature" isOptional="true" type="string" default="310"/><secondaryInput name="md_type" isOptional="true" type="string" default="4"/><secondaryInput name="timestep" isOptional="true" type="string" default="2.0"/></step><step name="optimizeStructureFromNAMD_Structure1" kind="moby"><description><![CDATA[Run a Simple Molecular Dynamic Simulation (optimization) with Protein-Solvent system (Using Namd program).]]></description><secondaryInput name="restraint" isOptional="true" type="string" default="0"/><secondaryInput name="minimize" isOptional="true" type="string" default="100"/><secondaryInput name="time" isOptional="true" type="string" default="2.5"/><secondaryInput name="temperature" isOptional="true" type="string" default="310"/><secondaryInput name="md_type" isOptional="true" type="string" default="2"/><secondaryInput name="timestep" isOptional="true" type="string" default="2.0"/></step><step name="optimizeStructureFromNAMD_Structure3" kind="moby"><description><![CDATA[Run a Simple Molecular Dynamic Simulation (optimization) with Protein-Solvent system (Using Namd program).]]></description><secondaryInput name="restraint" isOptional="true" type="string" default="0"/><secondaryInput name="minimize" isOptional="true" type="string" default="50"/><secondaryInput name="time" isOptional="true" type="string" default="0.5"/><secondaryInput name="temperature" isOptional="true" type="string" default="310"/><secondaryInput name="md_type" isOptional="true" type="string" default="7"/><secondaryInput name="timestep" isOptional="true" type="string" default="1.0"/></step></release></workflow><workflow uuid="workflow:8edab9aa-a489-4375-b281-2087427f7cca" title="runSGP2GFF"><release uuid="workflow:8edab9aa-a489-4375-b281-2087427f7cca" lsid="urn:lsid:www.mygrid.org.uk:operation:SSAVFCN82D1" author="" title="runSGP2GFF" path="8edab9aa-a489-4375-b281-2087427f7cca/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="acanada@cnio.es" responsibleName="Andrés Cañada" date="2008-02-13T03:38:26+01:00"><description><![CDATA[]]></description><graph mime="image/png">8edab9aa-a489-4375-b281-2087427f7cca/workflow.png</graph><graph mime="image/svg+xml">8edab9aa-a489-4375-b281-2087427f7cca/workflow.svg</graph><graph mime="application/pdf">8edab9aa-a489-4375-b281-2087427f7cca/workflow.pdf</graph><input name="namespace"><mime type="text/plain"/></input><input name="fasta_sequence_1"><mime type="text/plain"/></input><input name="identifier_1"><mime type="text/plain"/></input><input name="fasta_sequence_2"><mime type="text/plain"/></input><input name="identifier_2"><mime type="text/plain"/></input><output name="geneid_gff"><mime type="text/plain"/></output><output name="tblastx_report"><mime type="text/plain"/></output><output name="gff_map"><mime type="image/*"/></output><output name="peptide_sequences"><mime type="text/xml"/></output><step name="StringArticleName"/><step name="Decode_base64_to_byte"/><step name="fromFASTAToDNASequence_2"><description><![CDATA[Converts a DNA FASTA sequence into a DNA sequence]]></description></step><step name="runSGP2GFF" kind="moby"><description><![CDATA[Ab initio gene prediction service. It runs geneid with synteny evidences to improve the accuracy of the results and returns the output predictions in GFF format. To generate the synteny evidences, use a service that provides tblastx.]]></description><secondaryInput name="profile" isOptional="true" type="string" default="Human Vs Mouse"/></step><step name="runGFF2JPEG"><description><![CDATA[Runs gff2ps software to vizualize a set of GFF annotations maps. It returns an encoded image in JPEG format.]]></description></step><step name="fromGenericSequenceCollectionToFASTA"><description><![CDATA[Converts a collection of generic sequences into FASTA sequences]]></description></step><step name="fromFASTAToDNASequence_1"><description><![CDATA[Converts a DNA FASTA sequence into a DNA sequence]]></description></step><step name="runWUTBlastx_2Seqs" kind="moby"><description><![CDATA[It compares the six-frame translations of a nucleotide query sequence against the six-frame translations of a nucleotide sequence (used as a database) using the Washington University BLAST algorithm.]]></description><secondaryInput name="nogaps" isOptional="true" type="string" default="off"/><secondaryInput name="gap_open" isOptional="true" type="string" default="9"/><secondaryInput name="filter" isOptional="true" type="string" default="none"/><secondaryInput name="statistics" isOptional="true" type="string" default="sump"/><secondaryInput name="word_size" isOptional="true" type="string" default="3"/><secondaryInput name="alignments" isOptional="true" type="string" default="15"/><secondaryInput name="matrix" isOptional="true" type="string" default="blosum62"/><secondaryInput name="expected_threshold" isOptional="true" type="string" default="10.0"/><secondaryInput name="scores" isOptional="true" type="string" default="25"/><secondaryInput name="gap_extension" isOptional="true" type="string" default="2"/></step><step name="translateGeneIDGFFPredictions" kind="moby"><description><![CDATA[Translates the GeneID gene predictions, given in GFF format, into a set of aminoacid sequences]]></description><secondaryInput name="translation table" isOptional="true" type="string" default="Standard (1)"/></step><step name="Parse_Moby_Data_BLAST_Text"><description><![CDATA[Processor to parse the datatype BLAST-Text]]></description></step><step name="Parse_Moby_Data_GFF"><description><![CDATA[Processor to parse the datatype GFF]]></description></step><step name="Parse_Moby_Data_b64_encoded_jpeg"><description><![CDATA[Processor to parse the datatype b64_encoded_jpeg]]></description></step><step name="String_1"><description><![CDATA[a string]]></description></step><step name="String_2"><description><![CDATA[a string]]></description></step><step name="FASTA_NA_1"><description><![CDATA[NA sequence in FASTA format]]></description></step><step name="FASTA_NA_2"><description><![CDATA[NA sequence in FASTA format]]></description></step><step name="Parse_Moby_Data_FASTA"><description><![CDATA[Processor to parse the datatype FASTA]]></description></step><example uuid="c2c2f849-17a5-42c9-b758-26bb44e36f69" name="SGP2_workflow_input" path="8edab9aa-a489-4375-b281-2087427f7cca/examples/c2c2f849-17a5-42c9-b758-26bb44e36f69.xml" date="2007-11-29T18:34:00+01:00"/></release></workflow><workflow uuid="workflow:90da503a-42d9-4e29-b830-c9935a03d492" title="wf-iHOP-SOAP"><release uuid="workflow:90da503a-42d9-4e29-b830-c9935a03d492" lsid="urn:lsid:www.mygrid.org.uk:operation:SOWUXPE5TJ0" author="José María Fernández González (jmfernandez@cnio.es)" title="wf-iHOP-SOAP" path="90da503a-42d9-4e29-b830-c9935a03d492/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jmfernandez@cnio.es" responsibleName="José María Fernández González" date="2007-11-30T10:58:27+01:00"><description><![CDATA[]]></description><graph mime="image/png">90da503a-42d9-4e29-b830-c9935a03d492/workflow.png</graph><graph mime="image/svg+xml">90da503a-42d9-4e29-b830-c9935a03d492/workflow.svg</graph><graph mime="application/pdf">90da503a-42d9-4e29-b830-c9935a03d492/workflow.pdf</graph><input name="gene_input"><mime type="text/plain"/></input><output name="related symbols"><mime type="text/plain"/></output><output name="Symbols"><mime type="text/plain"/></output><output name="Genes"><mime type="text/plain"/></output><output name="related PubMed"><mime type="text/plain"/></output><step name="interactorsXPath"/><step name="xpath"/><step name="iHOPpmidXPath"/><step name="extractSymbols"/><step name="extractPMID"/><step name="Remove_duplicate_strings"/><step name="extractInteractors"/><step name="getRelatedSymbols"><description><![CDATA[It takes a putative gene symbol as input. It returns a XML with the list of iHOP symbols which could best fit with the input.]]></description></step><step name="getSymbolInteractionsFromIHOP"><description><![CDATA[It takes an iHOP gene symbol id, and it returns
the interactions of the symbol with other ones (in terms of experimental
evidences and sentences).]]></description></step></release></workflow><workflow uuid="workflow:91d768dd-bc72-4f3a-b32a-382d482d9b4a" title="Automatic Annotations and Gene Ontology terms of Protein Function"><release uuid="workflow:91d768dd-bc72-4f3a-b32a-382d482d9b4a" lsid="urn:lsid:net.sf.taverna:wfDefinition:15467c2e-c99a-492b-8bf2-67d0c82854c2" author="José Manuel Rodríguez Carrasco INB-GN2-CNIO (jmrodriguez@cnio.es)" title="Automatic Annotations and Gene Ontology terms of Protein Function" path="91d768dd-bc72-4f3a-b32a-382d482d9b4a/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jmrodriguez@cnio.es" responsibleName="José Manuel Rodríguez" date="2008-07-08T17:20:34+02:00"><description><![CDATA[This workflow runs some methods that belong to annotate homologous sequences and include new features related to the specific identification of protein subfamilies (orthologous groups). 
In other hand, there are method that report the Gene Ontolgy terms from homologous sequences obtained.
This workflow is composed by three methods:

1. ISS (Complete): A sequence similitary search is carried out to find proteins related to the query sequence. A clustering algorithm is applied in order to identity closely related sequence groups in the set of similar proteins. More related sequences are more likely to share a common function. In some cases, recursive sequence similitary searches lead to better representation of the related subfamilies, which facilitates the clustering. This method returns the matrix of distances, the first Blast report, and annotations of the proteins.

2. NCut: The local alignments with the closely related proteins clusteres together with the query protein are classified in different categories depending on the extent to which the alignments cover the length of the query and target sequences (alignment categories). So, this method returns the cluster of homologous sequence whose values are the distance among themselves.

3. OFunCUT: Key functional annotations of the corresponding proteins are analyzed, including functional descriptions, enzymatic activity codes, and Swiss-Prot style keywords. The transference of information is carried out starting from the alignment categories with a better coverage. A confidence level is assigned to each one of the annotations. This level is derived from the alignment categories.

The FunCUT results, as we said, includes key functional annotations of the corresponding proteins as the functional descriptions, enzymatic activity codes, Swiss-Prot style keywords, and Gene Ontology evidences.
Also, the results includes the clusters of the query protein and the hits of Blast.

4. SIAM: is a Statistical Inferred Annotation Model. It infers Gene Ontology terms for FunCUT pipeline. Also SIAM retrieces a graphical annotation; for this reason, one of its outputs has to be decoded.

http://ubio.bioinfo.cnio.es/biotools/FunCUT/]]></description><graph mime="image/png">91d768dd-bc72-4f3a-b32a-382d482d9b4a/workflow.png</graph><graph mime="image/svg+xml">91d768dd-bc72-4f3a-b32a-382d482d9b4a/workflow.svg</graph><graph mime="application/pdf">91d768dd-bc72-4f3a-b32a-382d482d9b4a/workflow.pdf</graph><input name="aminoacid sequence"><mime type="text/plain"/></input><input name="length of sequence"><mime type="text/plain"/></input><input name="aminoacid id"><mime type="text/plain"/></input><output name="iss result"><mime type="text/plain"/></output><output name="distance matrix"><mime type="text/plain"/></output><output name="first blast"><mime type="text/plain"/></output><output name="distance cluster"><mime type="text/plain"/></output><output name="family annotation"><mime type="text/plain"/></output><output name="gene ontology terms"><mime type="text/plain"/></output><output name="gene ontology graph"><mime type="image/svg+xml"/><mime type="application/postscript"/><mime type="application/pdf"/></output><step name="runNCut"><description><![CDATA[Clustering application that groups the homolog sequences -subfamilies- and weights its closeness to the query sequence]]></description></step><step name="runOFunCUT" kind="moby"><description><![CDATA[Analyzes the key functional annotations of the neighbor sequences related to the query sequence and makes the transference of the annotations, from clustering result (NCut) and sequence similitary search (ISSComplete, NCBI Blast)]]></description><secondaryInput name="database" isOptional="true" type="string" default="SWALL"/></step><step name="runISSComplete" kind="moby"><description><![CDATA[A sequence similitary search (NCBI Blast) is carried out to find proteins related to the query sequence. Returns the matrix of distances, the first Blast report, and annotations of the proteins]]></description><secondaryInput name="filters" isOptional="true" type="string" default="No"/><secondaryInput name="rounds" isOptional="true" type="string" default="2"/><secondaryInput name="evalue" isOptional="true" type="string" default="0.0005"/><secondaryInput name="cutlen" isOptional="true" type="string" default="35"/><secondaryInput name="database" isOptional="true" type="string" default="SWALL"/><secondaryInput name="maxsearches" isOptional="true" type="string" default="750"/></step><step name="Integer"><description><![CDATA[an int]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="AminoAcidSequence"><description><![CDATA[Lightweight representation an amino acid sequence]]></description></step><step name="runSIAM" kind="moby"><description><![CDATA[SIAM (Statistical Inferred Annotation Model): Inferring Gene Ontology terms for FunCUT pipeline]]></description><secondaryInput name="graphic_format" isOptional="true" type="string" default="svg"/></step><step name="extract_encoded_image"/><step name="xpath_Image_Encoded"/><step name="Merge_string_list_to_string"/><step name="Decode_base64_to_byte"/><step name="Byte___to_String"/><example uuid="fc42d21f-881e-4980-91f9-728bb386dcea" name="Sample AA sequence" path="91d768dd-bc72-4f3a-b32a-382d482d9b4a/examples/fc42d21f-881e-4980-91f9-728bb386dcea.xml" date="2008-07-08T17:22:07+02:00" responsibleMail="jmrodriguez@cnio.es" responsibleName="José Manuel Rodríguez"><![CDATA[<p>&nbsp;Aminoacid sequence for a sample</p>]]></example></release></workflow><workflow uuid="workflow:964beebf-6076-4553-b3ba-2b815ca920c8" title="Clustering of co-expressed genes in subsets showing similar configurations  of TFBSs."><release uuid="workflow:964beebf-6076-4553-b3ba-2b815ca920c8" lsid="urn:lsid:www.mygrid.org.uk:operation:SSAVFCN82D1" author="Arnaud Kerhornou" title="Clustering of co-expressed genes in subsets showing similar configurations  of TFBSs." path="964beebf-6076-4553-b3ba-2b815ca920c8/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="arnaud@ebi.ac.uk" responsibleName="Arnaud Kerhornou" date="2008-02-13T03:38:26+01:00"><description><![CDATA[Input: 
  * a set of upstream sequences in FASTA format
  * a namespace (a datasource, e.g. Ensembl)
  * an identifier (optional)

Output: 
  * a list of gene sub-clusters

See, http://genome.imim.es/webservices/workflows.html page for more details.]]></description><graph mime="image/png">964beebf-6076-4553-b3ba-2b815ca920c8/workflow.png</graph><graph mime="image/svg+xml">964beebf-6076-4553-b3ba-2b815ca920c8/workflow.svg</graph><graph mime="application/pdf">964beebf-6076-4553-b3ba-2b815ca920c8/workflow.pdf</graph><input name="namespace"><mime type="text/plain"/></input><input name="sequences_fasta"><mime type="text/plain"/></input><input name="identifier"><mime type="text/plain"/></input><output name="matscan_gff"><mime type="text/txt"/></output><output name="meta-alignment"><mime type="text/txt"/></output><output name="score_matrix"><mime type="text/xml"/></output><output name="gene_clusters"><mime type="text/plain"/></output><output name="meta-alignment_GFF"><mime type="text/plain"/></output><output name="gene_tree"><mime type="text/plain"/></output><output name="MultiMeta_alignment"><mime type="text/plain"/></output><output name="MultiMeta_alignment_GFF"><mime type="text/plain"/></output><output name="TFBSs_cluster_image"><mime type="image/*"/></output><step name="articleName"/><step name="StringArticleName"/><step name="parse_moby_score_matrix"/><step name="parse_moby_meta"/><step name="decode_image"/><step name="parse_moby_gene_clusters"/><step name="parse_moby_matscan_gff"/><step name="parse_moby_meta_gff"/><step name="parse_moby_gene_tree"/><step name="Filter_MatScan"/><step name="Beanshell_scripting_host"/><step name="runGFF2JPEG"><description><![CDATA[Runs gff2ps software to vizualize a set of GFF annotations maps. It returns an encoded image in JPEG format.]]></description></step><step name="fromFASTAToDNASequenceCollection"><description><![CDATA[Converts a FASTA_NA_multi object into a collection of DNASequence moby objects]]></description></step><step name="fromMetaAlignmentsToTextScoreMatrix"><description><![CDATA[Parses a collection of meta-alignment outputs to produce a text-formatted sequence similarity score matrix]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="runMultiPairwiseMetaAlignment" kind="moby"><description><![CDATA[runMultiPairwiseMetaAlignment runs Meta-alignment software on a multiple running mode, receiving a collection of maps, making pairs of them and, foreach pair, it produces alignments of sequences of TF binding sites. It returns the predictions in 'Meta-alignment' format. You can use runMatScanGFF to produce the input GFF files]]></description><secondaryInput name="lamba penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="alpha penalty" isOptional="true" type="string" default="0.5"/><secondaryInput name="mu penalty" isOptional="true" type="string" default="0.1"/></step><step name="runMultiPairwiseMetaAlignmentGFF" kind="moby"><description><![CDATA[Runs Meta-alignment software on a multiple running mode, receiving a collection of maps, making pairs of them and, foreach pair, producing, in GFF format, alignments of sequences of TF binding sites]]></description><secondaryInput name="mu penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="lamba penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="alpha penalty" isOptional="true" type="string" default="0.5"/></step><step name="runMatScanGFFCollection" kind="moby"><description><![CDATA[Reports putative predicted motifs on a given collection of DNA sequences. The motifs collections currently available are Transcription Factor binding site collections. The predicted set of motifs are reported in GFF format. If you want to give MatScan output to Meta-alignment program, you MUST use the 'log-likelihood matrix' mode.]]></description><secondaryInput name="matrix mode" isOptional="true" type="string" default="log-likelihood"/><secondaryInput name="strand" isOptional="true" type="string" default="Both"/><secondaryInput name="threshold" isOptional="true" type="string" default="0.85"/><secondaryInput name="motif database" isOptional="true" type="string" default="Jaspar"/></step><step name="runSOTAClustering" kind="moby"><description><![CDATA[Runs SOTA algorithm to partition the gene space into subclusters. The input is a gene score matrix represented as a MicroArrayData_Text object.]]></description><secondaryInput name="resource_threshold" isOptional="true" type="string" default="35"/><secondaryInput name="distance" isOptional="true" type="string" default="euclidean"/></step><step name="runMultiMetaAlignmentGFF" kind="moby"><description><![CDATA[Runs Multiple-Meta-Alignment software to perform multiple non-collinear transcription factor map alignments of promoter regions. It returns the multiple-meta-alignment output in GFF format.]]></description><secondaryInput name="mu penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="lamba penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="alpha penalty" isOptional="true" type="string" default="0.5"/><secondaryInput name="NoN-colinear penalty" isOptional="true" type="string" default="100"/><secondaryInput name="gap penalty" isOptional="true" type="string" default="-10"/></step><step name="runMultiMetaAlignment" kind="moby"><description><![CDATA[Runs Multiple-Meta-Alignment software to perform multiple non-collinear transcription factor map alignments of promoter regions. It returns the multiple-meta-alignment output in meta-alignment text format.]]></description><secondaryInput name="alpha penalty" isOptional="true" type="string" default="0.5"/><secondaryInput name="lamba penalty" isOptional="true" type="string" default="0.1"/><secondaryInput name="NoN-colinear penalty" isOptional="true" type="string" default="100"/><secondaryInput name="gap penalty" isOptional="true" type="string" default="-10"/><secondaryInput name="mu penalty" isOptional="true" type="string" default="0.1"/></step><step name="Parse_Moby_Data_GFF"><description><![CDATA[Processor to parse the datatype GFF]]></description></step><step name="Parse_Moby_Data_Meta_Alignment_Text"><description><![CDATA[Processor to parse the datatype Meta_Alignment_Text]]></description></step><step name="Parse_Moby_Data_b64_encoded_jpeg"><description><![CDATA[Processor to parse the datatype b64_encoded_jpeg]]></description></step><step name="FASTA_NA_multi"><description><![CDATA[FASTA formatted sequence]]></description></step><example uuid="75c23d84-7f74-4af4-9911-baf0ed6ae189" name="FASTA sequences input" path="964beebf-6076-4553-b3ba-2b815ca920c8/examples/75c23d84-7f74-4af4-9911-baf0ed6ae189.xml" date="2007-11-29T18:15:23+01:00"/></release></workflow><workflow uuid="workflow:9b995f65-2325-4617-9e99-f320332a82d6" title="Full analysis of gene expression data"><release uuid="workflow:9b995f65-2325-4617-9e99-f320332a82d6" lsid="urn:lsid:net.sf.taverna:wfDefinition:2ab38286-c858-49d7-97d1-edb0ce272589" author="Joaquin Tarraga Gimenez, jtarraga@cipf.es" title="Full analysis of gene expression data" path="9b995f65-2325-4617-9e99-f320332a82d6/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jtarraga@cipf.es" responsibleName="Joaquin Tarraga Gimenez" date="2007-11-29T11:18:26+01:00"><description><![CDATA[This workflow was developped for pedagogical purporses. It shows the different analysis that can be performed in gene expression data: preprocessing, clustering methods, differential expression tests, and functional profiling with FatiGO and FatiScan.]]></description><graph mime="image/png">9b995f65-2325-4617-9e99-f320332a82d6/workflow.png</graph><graph mime="image/svg+xml">9b995f65-2325-4617-9e99-f320332a82d6/workflow.svg</graph><graph mime="application/pdf">9b995f65-2325-4617-9e99-f320332a82d6/workflow.pdf</graph><input name="gene_expression_data"><mime type="text/plain"/></input><output name="differential_expression_image"><mime type="text/plain"/></output><output name="fatiscan_result_url"><mime type="text/plain"/></output><output name="fatigo_result_url"><mime type="text/plain"/></output><output name="clustering_image"><mime type="text/plain"/></output><step name="runPreprocessing" kind="moby"><description><![CDATA[Preprocessing service for gene expression patterns that allows users to apply log-transform, merge replicated values, filter missing values, impute missing values and standardize profiles]]></description><secondaryInput name="standardize" isOptional="true" type="string" default="Yes"/><secondaryInput name="filter_missing_mode" isOptional="true" type="string" default="None"/><secondaryInput name="log_transform" isOptional="true" type="string" default="Log 2"/><secondaryInput name="impute_missing_mode" isOptional="true" type="string" default="Fill with zeros"/><secondaryInput name="merge_replicates_mode" isOptional="true" type="string" default="None"/></step><step name="runDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice allows users to analyze gene expression under different experimental conditions: differential expresssion between two conditions (applying t-test, Bayes, Data adaptive, SAM and CLEAR tests), differential expression among more than two conditions (Anova and CLEAR tests), differential expression related to a continuous variable (Regression, Pearson and Spearman correlations) and differential expression related to a survival time (Cox test)]]></description><secondaryInput name="test" isOptional="true" type="string" default="T-test"/><secondaryInput name="clear_significance_level" isOptional="true" type="string" default="0.05"/></step><step name="displayDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice returns a PNG image representing the results from a differential expresssion test (t-test, Bayes, Data adaptive, SAM, CLEAR, Anova, Regression, Pearson correlation, Spearman correlation and Cox tests). In addition, an URL to the image is returned in the output field 'legend']]></description><secondaryInput name="standardize" isOptional="true" type="string" default="yes"/><secondaryInput name="scale" isOptional="true" type="string" default="-3/+3"/><secondaryInput name="rows" isOptional="true" type="string" default="50"/></step><step name="String"><description><![CDATA[a string]]></description></step><step name="MicroArrayData_Text"><description><![CDATA[Micro-array data in text format representing gene expression patterns. Each line must have the gene name and the expression values and the expression values for each condition. All the data items must be separated by tabulators]]></description></step><step name="getGeneWeightedListFromDifferentialExpressionTest"><description><![CDATA[This webservice extracts a weighted list of genes from a differential expession test]]></description></step><step name="runFatiScan" kind="moby"><description><![CDATA[This service detects blocks of functionally related genes (GO, KEGG, etc.) with significant coordinate (although modest) behaviour across a list of ranked genes using a segment test]]></description><secondaryInput name="partitions" isOptional="true" type="string" default="10"/><secondaryInput name="organism" isOptional="true" type="string" default="Homo sapiens"/><secondaryInput name="terms" isOptional="true" type="string" default="Gene Ontology"/></step><step name="displayFatiScan"><description><![CDATA[This service returns an URL to FatiScan graphical results]]></description></step><step name="runClustering" kind="moby"><description><![CDATA[Clustering service for gene expression patterns that runs the the following algorithms: SOTA, UPGMA, WPGMA, UPGMC, WPGMC, Single linkage, Complete linkage, SOM and K-means]]></description><secondaryInput name="method" isOptional="true" type="string" default="Unweighted pair-group method using arithmetic averages (UPGMA)"/><secondaryInput name="distance" isOptional="true" type="string" default="Correlation Coeff. (linear)"/><secondaryInput name="k_value" isOptional="true" type="string" default="15"/></step><step name="displayClustering"><description><![CDATA[This service returns a GIF image representing a tree of clusters including their profiles. In addition, an URL to the image is returned in the output field 'legend']]></description></step><step name="getGeneListFromDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice extracts a list of genes from a differential expession test according to your input parameters: the list size and the extraction mode (from top to bottom, or from bottom to top)]]></description><secondaryInput name="extraction" isOptional="true" type="string" default="from top to bottom"/><secondaryInput name="size" isOptional="true" type="string" default="20"/></step><step name="getGeneListFromDifferentialExpressionTest1" kind="moby"><description><![CDATA[This webservice extracts a list of genes from a differential expession test according to your input parameters: the list size and the extraction mode (from top to bottom, or from bottom to top)]]></description><secondaryInput name="extraction" isOptional="true" type="string" default="from bottom to top"/><secondaryInput name="size" isOptional="true" type="string" default="20"/></step><step name="runFatiGO" kind="moby"><description><![CDATA[runFatiGO allows you to compare two lists of genes in terms of PubMed bio-entities (chimical and diseases), Gene Ontology terms (cellular component, biological process and molecular function), InterPro motifs, KEGG pathways, SwissProt keywords, Transcriptor factors, cis-regulatory elements (cisRED) and tissues]]></description><secondaryInput name="terms" isOptional="true" type="string" default="Gene Ontology"/><secondaryInput name="organism" isOptional="true" type="string" default="Homo sapiens"/></step><step name="displayFatiGO"><description><![CDATA[This service returns an URL to FatiGO graphical results]]></description></step><example uuid="8c84aa2e-faf2-4f30-aa14-0ee433eb68d6" name="gene_expression_data_example" path="9b995f65-2325-4617-9e99-f320332a82d6/examples/8c84aa2e-faf2-4f30-aa14-0ee433eb68d6.xml" date="2007-11-29T11:21:26+01:00"/></release></workflow><workflow uuid="workflow:b45540c4-085f-4fe3-a9f2-37a4c68016f4" title="Gene detection by homology (D. Torrents, BSC.)"><release uuid="workflow:b45540c4-085f-4fe3-a9f2-37a4c68016f4" lsid="urn:lsid:net.sf.taverna:wfDefinition:c136f31f-ff22-4eb6-a4dc-2428b8c5af10" author="Romina Royo Garrido" title="Gene detection by homology (D. Torrents, BSC.)" path="b45540c4-085f-4fe3-a9f2-37a4c68016f4/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="romina.royo@bsc.es" responsibleName="Romina Royo" date="2008-07-24T12:10:59+02:00"><description><![CDATA[Gene detection in an unknown dna by homology.
A known protein is searched against the unknown dna sequence (tblastn). This will return some hits where the gene might be found. Finally, runGenewise service predicts the structure of this gene.]]></description><graph mime="image/png">b45540c4-085f-4fe3-a9f2-37a4c68016f4/workflow.png</graph><graph mime="image/svg+xml">b45540c4-085f-4fe3-a9f2-37a4c68016f4/workflow.svg</graph><graph mime="application/pdf">b45540c4-085f-4fe3-a9f2-37a4c68016f4/workflow.pdf</graph><input name="database"><mime type="text/plain"/></input><input name="namespace"><mime type="text/plain"/></input><input name="id"><mime type="text/plain"/></input><output name="output"><mime type="text/plain"/></output><step name="fromFastaToCommentedNASequence" kind="moby"><description><![CDATA[Converts a sequence in Fasta format into a CommentedNASequence object.]]></description><secondaryInput name="persistent" isOptional="true" type="string" default="True"/></step><step name="runBlast2gene" kind="moby"><description><![CDATA[Analysis of genomic regions containing completely or partially duplicated genes: it all high scoring pairwise alignments (HSPs) and provides the disposition of all independent copies along the genomic fragment from a BLAST report.]]></description><secondaryInput name="geneonly" isOptional="true" type="string" default="false"/><secondaryInput name="coverage" isOptional="true" type="string" default="0.01"/></step><step name="loadPersistentFile"><description><![CDATA[Loads a persistent object and returns its reference.]]></description></step><step name="runNCBIFormatdb" kind="moby"><description><![CDATA[Formats a database into NCBI BLAST format.]]></description><secondaryInput name="type" isOptional="true" type="string" default="dna"/><secondaryInput name="persistent" isOptional="true" type="string" default="True"/></step><step name="runGenewise"><description><![CDATA[compares a protein sequence to a genomic DNA sequence, allowing for introns and frameshifting errors.]]></description></step><step name="getAminoAcidSequence"><description><![CDATA[Retrieves an aminoacid sequence from an aminoacid database.]]></description></step><step name="runNCBIBlastAgainstDB" kind="moby"><description><![CDATA[Compares a protein or dna sequence against a protein or dna database and calculates the statistical significance of matches using the Basic Local Alignment Tool (BLAST).]]></description><secondaryInput name="scores" isOptional="true" type="string" default="25"/><secondaryInput name="dropoff" isOptional="true" type="string" default="0"/><secondaryInput name="expected_threshold" isOptional="true" type="string" default="10.0"/><secondaryInput name="filter" isOptional="true" type="string" default="false"/><secondaryInput name="extendgap" isOptional="true" type="string" default="2"/><secondaryInput name="alignments" isOptional="true" type="string" default="15"/><secondaryInput name="matrix" isOptional="true" type="string" default="BLOSUM62"/><secondaryInput name="program" isOptional="true" type="string" default="tblastn"/><secondaryInput name="gapalign" isOptional="true" type="string" default="true"/><secondaryInput name="opengap" isOptional="true" type="string" default="11"/></step><step name="Object"><description><![CDATA[an object]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="FASTA"><description><![CDATA[FASTA formatted sequence]]></description></step><example uuid="367a5e28-664c-421e-8740-3050520c3500" name="Very simple example" path="b45540c4-085f-4fe3-a9f2-37a4c68016f4/examples/367a5e28-664c-421e-8740-3050520c3500.xml" date="2008-07-24T12:26:11+02:00" responsibleMail="romina.royo@bsc.es" responsibleName="Romina Royo"><![CDATA[<p>This example is just a test to show the execution of the workflow.</p>
<p>It searches SRC_HUMAN protein against a small part of chr10.</p>
<p>As a result, you get a fragment of a gene.</p>
<p>(This workflow is supposed to run with longer dna sequences to detect and predict gene structures).</p>]]></example></release></workflow><workflow uuid="workflow:b6e5bbfc-e41f-4eaf-83a4-bc580acf35ea" title="Preprocessing, differential expression test and FatiScan test"><release uuid="workflow:b6e5bbfc-e41f-4eaf-83a4-bc580acf35ea" lsid="urn:lsid:net.sf.taverna:wfDefinition:2ab38286-c858-49d7-97d1-edb0ce272589" author="Joaquin Tarraga Gimenez, jtarraga@cipf.es" title="Preprocessing, differential expression test and FatiScan test" path="b6e5bbfc-e41f-4eaf-83a4-bc580acf35ea/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jtarraga@cipf.es" responsibleName="Joaquin Tarraga Gimenez" date="2007-11-29T11:53:42+01:00"><description><![CDATA[You can use this workflow, if you have DNA microarray data and you want to know if there are genes differentially expressed under different experimental conditions, and if you want to check for asymmetrical distributions of biological labels associated to that genes sorted by differential expression

FatiScan implements a segmentation test which checks for asymmetrical distributions of biological labels associated to genes ranked in a list (Al-Shahrour et al., 2005a,b). Unique in this type of approaches, this test only needs the list of ordered genes and not the original data which generated the sorting. This means that can be applied to the study of the relationship of biological labels to any type of experiment whose outcome is an sorted list of genes. Genes sorted by differential expression between two experimental conditions can be studied, but also genes correlated to a clinical variable (such as the level of a metabolite) or even to survival. Moreover, other lists of genes ranked by any other experimental or theoretical criteria can be studied (e.g. genes arranged by physico-chemical properties, mutability, structural parameters, etc.) in order to understand whether there is some biological feature (among the labels used) which is related to the experimental parameter studied.]]></description><graph mime="image/png">b6e5bbfc-e41f-4eaf-83a4-bc580acf35ea/workflow.png</graph><graph mime="image/svg+xml">b6e5bbfc-e41f-4eaf-83a4-bc580acf35ea/workflow.svg</graph><graph mime="application/pdf">b6e5bbfc-e41f-4eaf-83a4-bc580acf35ea/workflow.pdf</graph><input name="gene_expression_data"><mime type="text/plain"/></input><output name="differential_expression_image"><mime type="text/plain"/></output><output name="fatiscan_result_url"><mime type="text/plain"/></output><step name="runPreprocessing" kind="moby"><description><![CDATA[Preprocessing service for gene expression patterns that allows users to apply log-transform, merge replicated values, filter missing values, impute missing values and standardize profiles]]></description><secondaryInput name="standardize" isOptional="true" type="string" default="Yes"/><secondaryInput name="filter_missing_mode" isOptional="true" type="string" default="None"/><secondaryInput name="log_transform" isOptional="true" type="string" default="Log 2"/><secondaryInput name="impute_missing_mode" isOptional="true" type="string" default="Fill with zeros"/><secondaryInput name="merge_replicates_mode" isOptional="true" type="string" default="None"/></step><step name="runDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice allows users to analyze gene expression under different experimental conditions: differential expresssion between two conditions (applying t-test, Bayes, Data adaptive, SAM and CLEAR tests), differential expression among more than two conditions (Anova and CLEAR tests), differential expression related to a continuous variable (Regression, Pearson and Spearman correlations) and differential expression related to a survival time (Cox test)]]></description><secondaryInput name="test" isOptional="true" type="string" default="T-test"/><secondaryInput name="clear_significance_level" isOptional="true" type="string" default="0.05"/></step><step name="displayDifferentialExpressionTest" kind="moby"><description><![CDATA[This webservice returns a PNG image representing the results from a differential expresssion test (t-test, Bayes, Data adaptive, SAM, CLEAR, Anova, Regression, Pearson correlation, Spearman correlation and Cox tests). In addition, an URL to the image is returned in the output field 'legend']]></description><secondaryInput name="standardize" isOptional="true" type="string" default="yes"/><secondaryInput name="scale" isOptional="true" type="string" default="-3/+3"/><secondaryInput name="rows" isOptional="true" type="string" default="50"/></step><step name="String"><description><![CDATA[a string]]></description></step><step name="MicroArrayData_Text"><description><![CDATA[Micro-array data in text format representing gene expression patterns. Each line must have the gene name and the expression values and the expression values for each condition. All the data items must be separated by tabulators]]></description></step><step name="getGeneWeightedListFromDifferentialExpressionTest"><description><![CDATA[This webservice extracts a weighted list of genes from a differential expession test]]></description></step><step name="runFatiScan" kind="moby"><description><![CDATA[This service detects blocks of functionally related genes (GO, KEGG, etc.) with significant coordinate (although modest) behaviour across a list of ranked genes using a segment test]]></description><secondaryInput name="partitions" isOptional="true" type="string" default="10"/><secondaryInput name="organism" isOptional="true" type="string" default="Homo sapiens"/><secondaryInput name="terms" isOptional="true" type="string" default="Gene Ontology"/></step><step name="displayFatiScan"><description><![CDATA[This service returns an URL to FatiScan graphical results]]></description></step><example uuid="30043d78-aebb-4007-b226-9024b706e4f7" name="gene_expression_data_example" path="b6e5bbfc-e41f-4eaf-83a4-bc580acf35ea/examples/30043d78-aebb-4007-b226-9024b706e4f7.xml" date="2007-11-29T11:53:46+01:00"/></release></workflow><workflow uuid="workflow:bd7c0e35-8749-45d9-9340-50f40d72de2f" title="Preprocessing and clustering of gene expression data"><release uuid="workflow:bd7c0e35-8749-45d9-9340-50f40d72de2f" lsid="urn:lsid:net.sf.taverna:wfDefinition:40139d61-dedc-459d-a8d3-2e7a3ac1c7ba" author="Joaquin Tarraga Gimenez, jtarraga@cipf.es" title="Preprocessing and clustering of gene expression data" path="bd7c0e35-8749-45d9-9340-50f40d72de2f/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jtarraga@cipf.es" responsibleName="Joaquin Tarraga Gimenez" date="2007-11-29T11:45:00+01:00"><description><![CDATA[You can use this workflow, if you have DNA microarray data and you want to find the groups of co-expressing genes.]]></description><graph mime="image/png">bd7c0e35-8749-45d9-9340-50f40d72de2f/workflow.png</graph><graph mime="image/svg+xml">bd7c0e35-8749-45d9-9340-50f40d72de2f/workflow.svg</graph><graph mime="application/pdf">bd7c0e35-8749-45d9-9340-50f40d72de2f/workflow.pdf</graph><input name="gene_expression_data"><mime type="text/plain"/></input><output name="clustering_image"><mime type="text/plain"/></output><step name="displayClustering"><description><![CDATA[This service returns a GIF image representing a tree of clusters including their profiles. In addition, an URL to the image is returned in the output field 'legend']]></description></step><step name="runPreprocessing" kind="moby"><description><![CDATA[Preprocessing service for gene expression patterns that allows users to apply log-transform, merge replicated values, filter missing values, impute missing values and standardize profiles.]]></description><secondaryInput name="standardize" isOptional="true" type="string" default="Yes"/><secondaryInput name="filter_missing_mode" isOptional="true" type="string" default="None"/><secondaryInput name="log_transform" isOptional="true" type="string" default="Log 2"/><secondaryInput name="impute_missing_mode" isOptional="true" type="string" default="Fill with zeros"/><secondaryInput name="merge_replicates_mode" isOptional="true" type="string" default="None"/></step><step name="runClustering" kind="moby"><description><![CDATA[Clustering service for gene expression patterns that runs the the following algorithms: SOTA, UPGMA, WPGMA, UPGMC, WPGMC, Single linkage, Complete linkage, SOM and K-means]]></description><secondaryInput name="method" isOptional="true" type="string" default="Unweighted pair-group method using arithmetic averages (UPGMA)"/><secondaryInput name="distance" isOptional="true" type="string" default="Euclidean (normal)"/><secondaryInput name="k_value" isOptional="true" type="string" default="15"/></step><step name="String"><description><![CDATA[a string]]></description></step><step name="MicroArrayData_Text"><description><![CDATA[Micro-array data in text format representing gene expression patterns. Each line must have the gene name and the expression values and the expression values for each condition. All the data items must be separated by tabulators]]></description></step><example uuid="ec1e9492-eb5e-4cee-9cf2-fa68b69180ca" name="gene_expression_data_example" path="bd7c0e35-8749-45d9-9340-50f40d72de2f/examples/ec1e9492-eb5e-4cee-9cf2-fa68b69180ca.xml" date="2007-11-29T11:45:16+01:00"/><example uuid="060322f4-d83b-4e03-9446-f9137bf284f3" name="clustering_input" path="bd7c0e35-8749-45d9-9340-50f40d72de2f/examples/060322f4-d83b-4e03-9446-f9137bf284f3.xml" date="2007-11-29T12:59:28+01:00"/><snapshot name="toto" uuid="snapshot:bd7c0e35-8749-45d9-9340-50f40d72de2f:fb71bf25-c17b-4345-a379-a52c1249ecb7" date="2007-11-30T13:06:37+01:00"/></release></workflow><workflow uuid="workflow:d4d38653-3628-4c36-9d31-b512d3ead503" title="SGP2_Geneid_Comparison"><release uuid="workflow:d4d38653-3628-4c36-9d31-b512d3ead503" lsid="urn:lsid:www.mygrid.org.uk:operation:SSAVFCN82D1" author="" title="SGP2_Geneid_Comparison" path="d4d38653-3628-4c36-9d31-b512d3ead503/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="acanada@cnio.es" responsibleName="Andrés Cañada" date="2008-02-13T03:38:26+01:00"><description><![CDATA[]]></description><graph mime="image/png">d4d38653-3628-4c36-9d31-b512d3ead503/workflow.png</graph><graph mime="image/svg+xml">d4d38653-3628-4c36-9d31-b512d3ead503/workflow.svg</graph><graph mime="application/pdf">d4d38653-3628-4c36-9d31-b512d3ead503/workflow.pdf</graph><input name="namespace"><mime type="text/plain"/></input><input name="fasta_sequence_1"><mime type="text/plain"/></input><input name="identifier_1"><mime type="text/plain"/></input><input name="fasta_sequence_2"><mime type="text/plain"/></input><input name="identifier_2"><mime type="text/plain"/></input><output name="sgp2_gff"><mime type="text/plain"/></output><output name="tblastx_report"><mime type="text/plain"/></output><output name="geneid_gff"><mime type="text/plain"/></output><output name="gene_predictions_maps"><mime type="image/*"/></output><step name="StringArticleName"/><step name="Decode_base64_to_byte"/><step name="String_2"><description><![CDATA[a string]]></description></step><step name="runGFF2JPEG"><description><![CDATA[Runs gff2ps software to vizualize a set of GFF annotations maps. It returns an encoded image in JPEG format.]]></description></step><step name="runWUTBlastx_2Seqs" kind="moby"><description><![CDATA[It compares the six-frame translations of a nucleotide query sequence against the six-frame translations of a nucleotide sequence (used as a database) using the Washington University BLAST algorithm.]]></description><secondaryInput name="nogaps" isOptional="true" type="string" default="off"/><secondaryInput name="gap_open" isOptional="true" type="string" default="9"/><secondaryInput name="filter" isOptional="true" type="string" default="none"/><secondaryInput name="statistics" isOptional="true" type="string" default="sump"/><secondaryInput name="word_size" isOptional="true" type="string" default="3"/><secondaryInput name="alignments" isOptional="true" type="string" default="15"/><secondaryInput name="matrix" isOptional="true" type="string" default="blosum62"/><secondaryInput name="expected_threshold" isOptional="true" type="string" default="10.0"/><secondaryInput name="scores" isOptional="true" type="string" default="25"/><secondaryInput name="gap_extension" isOptional="true" type="string" default="2"/></step><step name="runSGP2GFF" kind="moby"><description><![CDATA[Ab initio gene prediction service. It runs geneid with synteny evidences to improve the accuracy of the results and returns the output predictions in GFF format. To generate the synteny evidences, use a service that provides tblastx.]]></description><secondaryInput name="profile" isOptional="true" type="string" default="Human Vs Mouse"/></step><step name="runGeneIDGFF" kind="moby"><description><![CDATA[Ab initio gene prediction tool that returns the gene predictions in GFF format (GFF version 2).]]></description><secondaryInput name="strand" isOptional="true" type="string" default="Both"/><secondaryInput name="profile" isOptional="true" type="string" default="Homo sapiens (suitable for mammals)"/><secondaryInput name="engine" isOptional="true" type="string" default="Normal"/><secondaryInput name="signals" isOptional="true" type="string" default="None"/><secondaryInput name="exons" isOptional="true" type="string" default="None"/></step><step name="fromFASTAToDNASequence_2"><description><![CDATA[Converts a DNA FASTA sequence into a DNA sequence]]></description></step><step name="fromFASTAToDNASequence_1"><description><![CDATA[Converts a DNA FASTA sequence into a DNA sequence]]></description></step><step name="FASTA_NA_1"><description><![CDATA[NA sequence in FASTA format]]></description></step><step name="Parse_Moby_Data_GFF1"><description><![CDATA[Processor to parse the datatype GFF]]></description></step><step name="Parse_Moby_Data_b64_encoded_jpeg"><description><![CDATA[Processor to parse the datatype b64_encoded_jpeg]]></description></step><step name="Parse_Moby_Data_GFF"><description><![CDATA[Processor to parse the datatype GFF]]></description></step><step name="FASTA_NA_2"><description><![CDATA[NA sequence in FASTA format]]></description></step><step name="String_1"><description><![CDATA[a string]]></description></step><step name="Parse_Moby_Data_BLAST_Text"><description><![CDATA[Processor to parse the datatype BLAST-Text]]></description></step><step name="Combine_SGP2_GeneID_predictions"/><example uuid="40a625f0-d79c-40f1-b68f-975292e4c3e4" name="BTK_SGP2_workflow_input" path="d4d38653-3628-4c36-9d31-b512d3ead503/examples/40a625f0-d79c-40f1-b68f-975292e4c3e4.xml" date="2007-11-29T18:49:56+01:00"/></release></workflow><workflow uuid="workflow:f1695325-c9c3-4130-a643-b5a5f3c2cf57" title="Characterization of peroxysomal metabolome and its evolutive origin.  (A. Pujol, IRO)"><release uuid="workflow:f1695325-c9c3-4130-a643-b5a5f3c2cf57" lsid="urn:lsid:net.sf.taverna:wfDefinition:9fcc2a27-c54a-458c-a656-6473cbd176d7" author="Romina Royo Garrido" title="Characterization of peroxysomal metabolome and its evolutive origin.  (A. Pujol, IRO)" path="f1695325-c9c3-4130-a643-b5a5f3c2cf57/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="acanada@cnio.es" responsibleName="Andrés Cañada" date="2008-02-13T03:38:26+01:00"><description><![CDATA[This workflow's input is a sequence identifier in UniProt.
It builds an HMM profile of orthologous sequences (retrieved from ensembl) of your input  ID. 
This profile can be searched against a database (e.g. Bacterial database) to find other proteins of the same family.]]></description><graph mime="image/png">f1695325-c9c3-4130-a643-b5a5f3c2cf57/workflow.png</graph><graph mime="image/svg+xml">f1695325-c9c3-4130-a643-b5a5f3c2cf57/workflow.svg</graph><graph mime="application/pdf">f1695325-c9c3-4130-a643-b5a5f3c2cf57/workflow.pdf</graph><input name="id"><mime type="text/plain"/></input><input name="ns"><mime type="text/plain"/></input><output name="results"><mime type="text/plain"/></output><step name="runHMMSearchAgainstDB" kind="moby"><description><![CDATA[Searches a sequence database with a HMM profile.]]></description><secondaryInput name="e-value" isOptional="true" type="string" default="10.0"/><secondaryInput name="score_cutoff_domain" isOptional="true" type="string" default=""/><secondaryInput name="e-value_score" isOptional="true" type="string" default="59021"/><secondaryInput name="score_cutoff" isOptional="true" type="string" default=""/><secondaryInput name="alignment_number" isOptional="true" type="string" default="10"/><secondaryInput name="e-value_domain" isOptional="true" type="string" default=""/><secondaryInput name="database" isOptional="true" type="string" default="Bacteria_AA"/></step><step name="runClustalwFast" kind="moby"><description><![CDATA[ClustalW multiple sequence alignments (fast method).]]></description><secondaryInput name="gapopen" isOptional="true" type="string" default=""/><secondaryInput name="gapdist" isOptional="true" type="string" default="4"/><secondaryInput name="endgaps" isOptional="true" type="string" default="off"/><secondaryInput name="pairgap" isOptional="true" type="string" default="3"/><secondaryInput name="topdiag" isOptional="true" type="string" default="5"/><secondaryInput name="matrix" isOptional="true" type="string" default=""/><secondaryInput name="window" isOptional="true" type="string" default="5"/><secondaryInput name="transitions_weight" isOptional="true" type="string" default="0.5"/><secondaryInput name="gapext" isOptional="true" type="string" default=""/><secondaryInput name="ktup" isOptional="true" type="string" default="1"/></step><step name="runHMMBuild" kind="moby"><description><![CDATA[Constructs a Hidden Markov Model from a multiple sequence alignment.]]></description><secondaryInput name="swentry" isOptional="true" type="string" default="0.5"/><secondaryInput name="fast_algorithm" isOptional="true" type="string" default="true"/><secondaryInput name="alignment_method" isOptional="true" type="string" default="none"/><secondaryInput name="pb_weights" isOptional="true" type="string" default="1000"/><secondaryInput name="idlevel" isOptional="true" type="string" default="0.62"/><secondaryInput name="fast_construction" isOptional="true" type="string" default="0.5"/><secondaryInput name="weighting_algorithm" isOptional="true" type="string" default="wgsc"/><secondaryInput name="architecture_prior" isOptional="true" type="string" default="0.85"/><secondaryInput name="swexit" isOptional="true" type="string" default="0.5"/></step><step name="getHomologiesFromEnsembl" kind="moby"><description><![CDATA[Retrieves homologies from ENSEMBL.]]></description><secondaryInput name="type" isOptional="true" type="string" default="ortholog_one2many"/></step><step name="getSequencesFromENSEMBLHomology" kind="moby"><description><![CDATA[Retrieves all sequence of an ENSEMBL homology.]]></description><secondaryInput name="type" isOptional="true" type="string" default="protein"/></step><step name="getEnsemblIDsFromUniprot"><description><![CDATA[Returns Ensembl cross references from an uniprot entry.]]></description></step><step name="Object"><description><![CDATA[an object]]></description></step></release></workflow><workflow uuid="workflow:f78a0d42-45f3-4a05-828b-55af4a198a24" title="Predicting Functionally Important Residues"><release uuid="workflow:f78a0d42-45f3-4a05-828b-55af4a198a24" lsid="urn:lsid:net.sf.taverna:wfDefinition:c19346b4-de3b-474e-8559-22a86c5155e3" author="José Manuel Rodríguez Carrasco INB-GN2-CNIO (jmrodriguez@cnio.es)" title="Predicting Functionally Important Residues" path="f78a0d42-45f3-4a05-828b-55af4a198a24/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jmrodriguez@cnio.es" responsibleName="José Manuel Rodríguez Carrasco" date="2007-11-27T16:27:30+01:00"><description><![CDATA[This workflow runs some automatic method for predicting functionally important residues in protein sequence alignments. These methods are 'FASS', 'MB', 'S-method' and 'SQUARE'.
Its input is Protein Multiple Sequence Alignment. The accepted formats are ALN, FASTA, MSF, and PIR.
The alignment has to be longer than 50 residues and contain at least 15 sequences (no more than 200).

FASS method:
Principal component analysis of the multiple alignment and computation of the statistical confidence in the organization of the family into sub-families.

MB method:
The idea behind the MB method is to look for positions in the multiple sequence alignment whose Mutational Behaviour resembles that of the global alignment. Such behaviour is expected in positions that have family-dependent conservation, since in these positions the amino acids within a family are similar to each other and different from those in other families.

SM method:
The S-method aims to find the tree-determinants associated to the optimal division of a functional protein family into functionally specific subfamilies. This method relies on a family phylogenetic tree, internally generated by ClustalW, and searches for the cut level with the greatest value of the position relative entropy (optimal level).

SQUARE method:
Server for Quick Alignment Reliability Evaluation. This server produces a measure of per residue reliability for alignments between the query sequence (always the first sequence in the alignment) and the other sequences in the multiple alignment. The higher the score at each position in the alignment, the more likely the two sequences are correctly aligned at this position.


http://treedet.bioinfo.cnio.es/]]></description><graph mime="image/png">f78a0d42-45f3-4a05-828b-55af4a198a24/workflow.png</graph><graph mime="image/svg+xml">f78a0d42-45f3-4a05-828b-55af4a198a24/workflow.svg</graph><graph mime="application/pdf">f78a0d42-45f3-4a05-828b-55af4a198a24/workflow.pdf</graph><input name="Protein Multiple Sequence Alignment"><description><![CDATA[It is protein multiple sequence alignment. The accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.]]></description><mime type="text/plain"/></input><output name="FASS result"><description><![CDATA[Xml document that shows us important residues in protein sequence alignments.]]></description><mime type="text/plain"/></output><output name="SM result"><mime type="text/plain"/></output><output name="MB result"><mime type="text/plain"/></output><output name="SQUARE result"><mime type="text/plain"/></output><output name="html TreeDet result"><mime type="text/plain"/><mime type="text/html"/></output><step name="Flatten_list"/><step name="displayAlignmentFromTreeDetAnnotation"><description><![CDATA[Display protein sequence alignments as HTML format from annotations of TreeDet methods: FASS, MB, S-method, and/or SQUARE.]]></description></step><step name="runSM" kind="moby"><description><![CDATA[Automatic method for predicting functionally important residues using the concept of Relative Entropy in protein sequence alignments (longer than 50 residues and at least 15 sequences). The accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.]]></description><secondaryInput name="maxsubfamily" isOptional="true" type="string" default="10"/><secondaryInput name="minsubfamily" isOptional="true" type="string" default="2"/><secondaryInput name="conservation" isOptional="true" type="string" default="85"/><secondaryInput name="secfamily" isOptional="true" type="string" default="3"/></step><step name="runFASS" kind="moby"><description><![CDATA[MOBY Web Service that runs FASS method.

FASS is an automatic method for predicting functionally important residues in protein sequence alignments (longer than 50 residues and at least 15 sequences). The accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Inputs: 

   - Moby Object of "Sequence_alignment_report". This object accepts Multiple Sequence Alignment. But for FASS method, the accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Outputs:

   - Complex Moby Object based on FASS result (TreeDet_Annotation_XML{'treedet_annotation'}): Shows us important residues in protein sequence alignments as xml document.

* Parameters:

   - gaps (Integer{''}, default value: 10): Maximum number of gaps to include a column in the analysis.

   - cutoff (Float{''}, default value: 0.05): Represents the significance level for no parametric test and tries to choose optimal number axis.

   - clusters (Integer{''}, default value: 4): Represents the number of groups refered to the above mentioned sequence space.

   - axes (Integer{''}, default value: 3): Represents the dimension of the sequence space -the residues space adopts the same dimension automatically-.]]></description><secondaryInput name="clusters" isOptional="true" type="string" default="4"/><secondaryInput name="axes" isOptional="true" type="string" default="3"/><secondaryInput name="gaps" isOptional="true" type="string" default="10"/><secondaryInput name="cutoff" isOptional="true" type="string" default="0.05"/></step><step name="String"><description><![CDATA[a string]]></description></step><step name="Parse_Moby_Data_TreeDet_Annotation_HTML"><description><![CDATA[Processor to parse the datatype TreeDet_Annotation_HTML]]></description></step><step name="Sequence_alignment_report"/><step name="runMB" kind="moby"><description><![CDATA[MOBY Web Service that runs MB method.

MB is an automatic method for predicting functionally important residues (Mutational Behaviour) in protein sequence alignments (longer than 50 residues and at least 15 sequences). The accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Inputs: 

   - Moby Object of "Sequence_alignment_report". This object accepts Multiple Sequence Alignment. But for FASS method, the accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Outputs:

   - Complex Moby Object based on MB result (TreeDet_Annotation_XML{'treedet_annotation'}): Shows us important residues in protein sequence alignments as xml document.

* Parameters:

   - cutoff (Float{''}, default value: 0.05): Correlation cutoff above which positions are taken as predicted functional residues.

   - residues (Integer{''}, default value: 100): Percentage of High Scoring Residues.]]></description><secondaryInput name="cutoff" isOptional="true" type="string" default="0.05"/><secondaryInput name="residues" isOptional="true" type="string" default="100"/></step><step name="runSQUARE" kind="moby"><description><![CDATA[MOBY Web Service that runs SQUARE server.

SQUARE is a Server for Quick Alignment Reliability Evaluation. This server produces a measure of per residue reliability for alignments between the query sequence (always the first sequence in the alignment) and the other sequences in the multiple alignment. The higher the score at each position in the alignment, the more likely the two sequences are correctly aligned at this position.
The alignment as input has to be longer than 50 residues and contain at least 15 sequences (no more than 200). The accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Inputs: 

   - Moby Object of "Sequence_alignment_report". This object accepts Multiple Sequence Alignment. But for FASS method, the accepted formats are ALN (Clustalw), FASTA, MSF, and PIR.

* Outputs:

   - Complex Moby Object based on SQUARE result (TreeDet_Annotation_XML{'treedet_annotation'}): Shows us a measure of per residue reliability for alignments between query sequences and sequences of known structure.

* Parameters:

   - peakresidues (Integer{'1', '2', '3'}, default value: 3): Number of residues that need to be above the peak cut-off score in order to trigger an area).

   - tail (Float{''}, default value: 2): Indication of how far you want the area of reliably aligned residues to extend.

   - peakcutoff (Float{''}, default value: 2): Area can be triggered whenever the profile-derived alignment scores climb above this.]]></description><secondaryInput name="peakcutoff" isOptional="true" type="string" default="2"/><secondaryInput name="tail" isOptional="true" type="string" default="2"/><secondaryInput name="peakresidues" isOptional="true" type="string" default="3"/></step><example uuid="7bc9b6c9-2292-46be-aadd-8fe0ac3db67b" name="MSF, Multiple Sequence Alginment" path="f78a0d42-45f3-4a05-828b-55af4a198a24/examples/7bc9b6c9-2292-46be-aadd-8fe0ac3db67b.xml" date="2007-11-29T15:55:11+01:00"/></release></workflow><workflow uuid="workflow:febda920-89cd-4821-9b55-d8ee6b11250b" title="Information Hyperlinked over Proteins (iHOP)"><release uuid="workflow:febda920-89cd-4821-9b55-d8ee6b11250b" lsid="urn:lsid:net.sf.taverna:wfDefinition:12f2450a-3d69-4026-98a7-e0f90738a731" author="José María Fernández González (jmfernandez@cnio.es)" title="Information Hyperlinked over Proteins (iHOP)" path="febda920-89cd-4821-9b55-d8ee6b11250b/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jmfernandez@cnio.es" responsibleName="José María Fernández González" date="2007-11-29T15:11:28+01:00"><description><![CDATA[This workflow runs some methods that extract Information Hyperlinked over Proteins. All these methods are known as iHOP.

iHOP is  a network of concurring genes and proteins extends through the scientific literature touching on phenotypes, pathologies and gene function.
It provides this network as a natural way of accessing millions of PubMed abstracts. By using genes and proteins as hyperlinks between sentences and abstracts, the information in PubMed can be converted into one navigable resource, bringing all advantages of the internet to scientific literature research. Concept by Robert Hoffman.]]></description><graph mime="image/png">febda920-89cd-4821-9b55-d8ee6b11250b/workflow.png</graph><graph mime="image/svg+xml">febda920-89cd-4821-9b55-d8ee6b11250b/workflow.svg</graph><graph mime="application/pdf">febda920-89cd-4821-9b55-d8ee6b11250b/workflow.pdf</graph><input name="gene input"><description><![CDATA[Basically is a free text that could be a protein name, gene name, keyword, etc.]]></description><mime type="text/plain"/></input><input name="gene namespace"><description><![CDATA[Namespace which is associated to input. If you don't know what namespace to use, try to write 'Global'.]]></description><mime type="text/plain"/></input><output name="related symbols"><mime type="text/xml"/></output><output name="Symbols"><mime type="text/plain"/></output><output name="Interactions"><mime type="text/xml"/></output><output name="related PubMed"><mime type="text/xml"/></output><step name="Flatten_list1"><description><![CDATA[The given list of object is flattened for better visualization.]]></description></step><step name="extractPMID"><description><![CDATA[From given xml document, it extracts the PMID.]]></description></step><step name="iHOPpmidXPath"/><step name="Flatten_list"><description><![CDATA[The given list of object is flattened for better visualization.]]></description></step><step name="xpath"/><step name="PMID_namespace"/><step name="extractSymbols"><description><![CDATA[From given xml document, it extracts the symbols.]]></description></step><step name="iHOP_namespace"/><step name="Flatten_list3"><description><![CDATA[The given list of object is flattened for better visualization.]]></description></step><step name="Flatten_list2"><description><![CDATA[The given list of object is flattened for better visualization.]]></description></step><step name="Check_Namespace"><description><![CDATA[The given namespace is checked. If the value is empty, then is returned 'Global' namespace.]]></description></step><step name="Object2"><description><![CDATA[an object]]></description></step><step name="Object1"><description><![CDATA[an object]]></description></step><step name="getPubMed"><description><![CDATA[It returns the PubMed abstract identified by the input, tagged and enriched by the iHOP system]]></description></step><step name="Object"><description><![CDATA[an object]]></description></step><step name="getRelatedSymbols" kind="moby"><description><![CDATA[It looks for iHOP protein or gene symbols related to the input (free text is an ID living under Global_Keyword or Global namespaces), and it returns an iHOPfoundSymbols object with the result]]></description><secondaryInput name="ncbiTaxId" isOptional="true" type="string" default=""/></step><step name="getSymbolInteractions" kind="moby"><description><![CDATA[It returns all the sentences which show evidences of interactions between the iHOP gene or protein symbol related to the input and other iHOP symbols]]></description><secondaryInput name="ncbiTaxId" isOptional="true" type="string" default=""/></step><example uuid="0c39b38d-9311-4c2a-982b-1cc5ff88913d" name="Gene name example" path="febda920-89cd-4821-9b55-d8ee6b11250b/examples/0c39b38d-9311-4c2a-982b-1cc5ff88913d.xml" date="2007-11-29T15:11:45+01:00"><![CDATA[<p>Gene input example with its namespace.</p>]]></example></release></workflow><workflow uuid="workflow:9f450408-d03b-4e79-97d8-333df903842f" title="somar"><release uuid="workflow:9f450408-d03b-4e79-97d8-333df903842f" lsid="urn:lsid:net.sf.taverna:wfDefinition:f16fc932-f31d-4b4e-b0d3-3515aa0e62d1" author="" title="somar" path="9f450408-d03b-4e79-97d8-333df903842f/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="pdrlps@gmail.com" responsibleName="Pedro Lopes" date="2008-10-01T12:32:33+02:00"><description><![CDATA[]]></description><graph mime="image/png">9f450408-d03b-4e79-97d8-333df903842f/workflow.png</graph><graph mime="image/svg+xml">9f450408-d03b-4e79-97d8-333df903842f/workflow.svg</graph><graph mime="application/pdf">9f450408-d03b-4e79-97d8-333df903842f/workflow.pdf</graph><input name="a"><mime type="text/plain"/></input><input name="b"><mime type="text/plain"/></input><output name="c"><mime type="text/plain"/></output><step name="Somar"/><step name="PIn"/><step name="Pout"/></release></workflow><workflow uuid="workflow:aaa1ce69-b4ac-400a-8128-5b9c48744fa3" title="Conservation of Exon Structure"><release uuid="workflow:aaa1ce69-b4ac-400a-8128-5b9c48744fa3" lsid="urn:lsid:net.sf.taverna:wfDefinition:15467c2e-c99a-492b-8bf2-67d0c82854c2" author="José Manuel Rodríguez Carrasco INB-GN2-CNIO (jmrodriguez@cnio.es)" title="Conservation of Exon Structure" path="aaa1ce69-b4ac-400a-8128-5b9c48744fa3/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jmrodriguez@cnio.es" responsibleName="Jose Manuel" date="2009-01-08T17:53:45+01:00"><description><![CDATA[CExonic is a method for the determination of conservation of exonic structure.

Transcription data are frequently being used to study alternative splicing in one species. However, splicing isoforms may be conserved between species of a certain evolutionary distance. Therefore, cross-species comparison of splicing isoforms may provide insight into the conservation of alternative splicing. The assessment of functional alternative splicing requires the identification of the gene product that retains the core biological function. The conservation of exonic structure between orthologous splicing isoforms of two species would suggest that they exist in both species and that their biological function may be conserved.

CExonic was developed in order to determine if splicing isoforms can be aligned to the genomic DNA of its orthologous gene in another species with conservation of exonic structure. Users can enter a VEGA identifier of a human gene or transcript and the CExonic server will align this transcript to the genomic DNA of its orthologous gene in mouse and assess whether its exonic structure is conserved in mouse. First, it determines the ortholous gene in the mouse genome, using tblastx. Then, the human transcript is aligned to this mouse genomic DNA using exonerate. Subsequently, the human and predicted mouse transcripts are aligned using muscle and the exon/intron coordinates are superimposed on the alignment, similar to exstral[1]. If the aligned intron positions coincide, the exonic structure is conserved.

http://cexonic.bioinfo.cnio.es
]]></description><graph mime="image/png">aaa1ce69-b4ac-400a-8128-5b9c48744fa3/workflow.png</graph><graph mime="image/svg+xml">aaa1ce69-b4ac-400a-8128-5b9c48744fa3/workflow.svg</graph><graph mime="application/pdf">aaa1ce69-b4ac-400a-8128-5b9c48744fa3/workflow.pdf</graph><input name="chromosome"><mime type="text/plain"/></input><input name="start"><mime type="text/plain"/></input><input name="specie"><mime type="text/plain"/></input><input name="end"><mime type="text/plain"/></input><output name="alignment text"><mime type="text/xml"/></output><output name="first alignment image"><mime type="image/png"/><mime type="image/svg+xml"/><mime type="image/jpeg"/><mime type="application/pdf"/></output><output name="second alignment image"><mime type="image/svg+xml"/><mime type="image/png"/><mime type="image/jpeg"/></output><step name="xpath_Image_Encoded"/><step name="Byte___to_String"/><step name="Decode_base64_to_byte"/><step name="Merge_string_list_to_string"/><step name="extract_encoded_image"/><step name="Integer"><description><![CDATA[an int]]></description></step><step name="Integer1"><description><![CDATA[an int]]></description></step><step name="Integer2"><description><![CDATA[an int]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="Genomic_Location"><description><![CDATA[Genomic Location]]></description></step><step name="runCExonicFromLocation" kind="moby"><description><![CDATA[CExonic compares the conservation of exon structure between humans and other species. The method is applicable to any sequenced eukaryotic genome. At the moment, it details the human-mouse comparison.]]></description><secondaryInput name="image_format" isOptional="true" type="string" default="svg"/></step><step name="extract_encoded_image1"/><step name="Merge_string_list_to_string1"/><step name="Decode_base64_to_byte1"/><step name="Byte___to_String1"/><step name="Parse_Moby_Data_CExonic_Alignment_Text"><description><![CDATA[Processor to parse the datatype CExonic_Alignment_Text]]></description></step></release></workflow><workflow uuid="workflow:7e4b7adf-f6c4-4b3d-9623-dd5e795e8906" title="Find Gene Coexpression"><release uuid="workflow:7e4b7adf-f6c4-4b3d-9623-dd5e795e8906" lsid="urn:lsid:net.sf.taverna:wfDefinition:cabb400c-6d7e-4ec4-a452-434b5831964f" author="Joan Segura Mora" title="Find Gene Coexpression" path="7e4b7adf-f6c4-4b3d-9623-dd5e795e8906/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-NonCommercial 3.0" licenseURI="http://creativecommons.org/licenses/by-nc/3.0/legalcode" responsibleMail="jsegura@cnb.csic.es" responsibleName="Joan Segura Mora" date="2009-05-26T10:39:48+02:00"><description><![CDATA[
]]></description><graph mime="image/png">7e4b7adf-f6c4-4b3d-9623-dd5e795e8906/workflow.png</graph><graph mime="image/svg+xml">7e4b7adf-f6c4-4b3d-9623-dd5e795e8906/workflow.svg</graph><graph mime="application/pdf">7e4b7adf-f6c4-4b3d-9623-dd5e795e8906/workflow.pdf</graph><input name="in"><mime type="text/plain"/></input><output name="out"><mime type="text/plain"/></output><step name="parseAnatomicalStructures"><description><![CDATA[Extract anatomical structures from ExpressionStatistics.]]></description></step><step name="parseExpressedGenes"><description><![CDATA[Extract expressed genes from ExpressionStatistics.]]></description></step><step name="parseExpressionStatistics" kind="moby"><description><![CDATA[Filters ExpressionStatistics by level, pattern, gene, structure and developmental stage.]]></description><secondaryInput name="filter_structure" isOptional="true" type="string" default="null"/><secondaryInput name="filter_stage" isOptional="true" type="string" default="0"/><secondaryInput name="filter_level_operator" isOptional="true" type="string" default="5"/><secondaryInput name="filter_development" isOptional="true" type="string" default="0"/><secondaryInput name="filter_gene" isOptional="true" type="string" default="null"/><secondaryInput name="filter_pattern" isOptional="true" type="string" default="Spotted"/><secondaryInput name="operator" isOptional="true" type="string" default="greater"/></step><step name="getExpressionStatistics1"><description><![CDATA[Gives the ExpressionStatistics from a set of structures or from a set of genes.]]></description></step><step name="getExpressionStatistics"><description><![CDATA[Gives the ExpressionStatistics from a set of structures or from a set of genes.]]></description></step><step name="parseExpressionStatistics1" kind="moby"><description><![CDATA[Filters ExpressionStatistics by level, pattern, gene, structure and developmental stage.]]></description><secondaryInput name="filter_structure" isOptional="true" type="string" default="null"/><secondaryInput name="filter_stage" isOptional="true" type="string" default="0"/><secondaryInput name="filter_level_operator" isOptional="true" type="string" default="5"/><secondaryInput name="filter_development" isOptional="true" type="string" default="0"/><secondaryInput name="filter_gene" isOptional="true" type="string" default="null"/><secondaryInput name="filter_pattern" isOptional="true" type="string" default="Spotted"/><secondaryInput name="operator" isOptional="true" type="string" default="greater"/></step></release></workflow><workflow uuid="workflow:108a7648-f66e-4139-a9bd-8e1f2ec76f72" title="firstwf"><release uuid="workflow:108a7648-f66e-4139-a9bd-8e1f2ec76f72" lsid="urn:lsid:net.sf.taverna:wfDefinition:1dc2cae1-1836-4594-9015-952e15b88768" author="" title="firstwf" path="108a7648-f66e-4139-a9bd-8e1f2ec76f72/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="rorynewton23@googlemail.com" responsibleName="rory" date="2009-06-23T17:51:09+02:00"><description><![CDATA[
]]></description><graph mime="image/png">108a7648-f66e-4139-a9bd-8e1f2ec76f72/workflow.png</graph><graph mime="image/svg+xml">108a7648-f66e-4139-a9bd-8e1f2ec76f72/workflow.svg</graph><graph mime="application/pdf">108a7648-f66e-4139-a9bd-8e1f2ec76f72/workflow.pdf</graph><input name="in"><mime type="text/plain"/></input><output name="out"><mime type="text/plain"/></output><step name="Get_Protein_FASTA"/></release></workflow><workflow uuid="workflow:ecd1b4a7-b3c2-4c68-a410-0a9d08ec4e07" title="Workflow_VisualGenomics"><release uuid="workflow:ecd1b4a7-b3c2-4c68-a410-0a9d08ec4e07" lsid="urn:lsid:net.sf.taverna:wfDefinition:651536d5-a8fc-4b1c-9534-6a7d0f40ba63" author="" title="Workflow_VisualGenomics" path="ecd1b4a7-b3c2-4c68-a410-0a9d08ec4e07/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="natalia@cnb.csic.es" responsibleName="natalia@cnb.csic.es" date="2009-07-06T11:47:11+02:00"><description><![CDATA[
]]></description><graph mime="image/png">ecd1b4a7-b3c2-4c68-a410-0a9d08ec4e07/workflow.png</graph><graph mime="image/svg+xml">ecd1b4a7-b3c2-4c68-a410-0a9d08ec4e07/workflow.svg</graph><graph mime="application/pdf">ecd1b4a7-b3c2-4c68-a410-0a9d08ec4e07/workflow.pdf</graph><output name="output"><mime type="text/plain"/></output><step name="runNCBIBlastn" kind="moby"><description><![CDATA[Compares a nucleotide sequence to a nucleotide sequence database and calculates the statistical significance of matches using the Basic Local Alignment Search Tool (BLAST).]]></description><secondaryInput name="extendgap" isOptional="true" type="string" default="1"/><secondaryInput name="gapalign" isOptional="true" type="string" default="true"/><secondaryInput name="database" isOptional="true" type="string" default="GenBank"/><secondaryInput name="alignments" isOptional="true" type="string" default="15"/><secondaryInput name="dropoff" isOptional="true" type="string" default="0"/><secondaryInput name="scores" isOptional="true" type="string" default="25"/><secondaryInput name="filter" isOptional="true" type="string" default="true"/><secondaryInput name="expected_threshold" isOptional="true" type="string" default="10.0"/><secondaryInput name="opengap" isOptional="true" type="string" default="1"/></step><step name="parseBlastIDs"><description><![CDATA[Parses sequence IDs from the hits of a blast report.]]></description></step><step name="fromEMBLToMGI"><description><![CDATA[Converts a EMBL id in a MGI id.]]></description></step><step name="getExpressionStatistics"><description><![CDATA[Gives the ExpressionStatistics from a set of structures or from a set of genes.]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="Integer"><description><![CDATA[an int]]></description></step><step name="NucleotideSequence"><description><![CDATA[Lightweight representation of any type of nucleotide sequence (DNA, R
NA, etc)]]></description></step><example uuid="5cd04a75-ec0f-4718-8f23-78e38824ecaa" name="human sequence" path="ecd1b4a7-b3c2-4c68-a410-0a9d08ec4e07/examples/5cd04a75-ec0f-4718-8f23-78e38824ecaa.xml" date="2009-07-06T11:51:21+02:00" responsibleMail="natalia@cnb.csic.es" responsibleName="Natalia"><![CDATA[<p>acggcacgaggagccggcgaggagttccccgaaacttgttggaactccgggctcgcgcggaggccaggagctgagcggcggcggctgccggacgatgggagcgtgagcaggacggtgataacctctccccgatcgggttgcgagggcgccgggcagaggccaggacgcgagccgccagcggcgggacccatcgacgacttcccggggcgacaggagcagccccgagagccagggcgagcgcccgttccaggtggccggaccgcccgccgcgtccgcgccgcgctccctgcaggcaacgggagacgcccccgcgcagcgcgagcgcctcagcgcggccgctcgctctccccatcgagggacaaacttttcccaaacccgatccgagcccttggaccaaactcgcctgcgccgagagccgtccgcgtagagcgctccgtctccggcgagatgtccgagcgcaaagaaggcagaggcaaagggaagggcaagaagaaggagcgaggctccggcaagaagccggagtccgcggcgggcagccagagcccagccttgcctccccaattgaaagagatgaaaagccaggaatcggctgcaggttccaaactagtccttcggtgtgaaaccagttctgaatactcctctctcagattcaagtggttcaagaatgggaatgaattgaatcgaaaaaacaaaccacaaaatatcaagatacaaaaaaagccagggaagtcagaacttcgcattaacaaagcatcactggctgattctggagagtatatgtgcaaagtgatcagcaaattaggaaatgacagtgcctctgccaatatcaccatcgtggaatcaaacgagatcatcactggtatgccagcctcaactgaaggagcatatgtgtcttcagagtctcccattagaatatcagtatccacagaaggagcaaatacttcttcatctacatctacatccaccactgggacaagccatcttgtaaaatgtgcggagaaggagaaaactttctgtgtgaatggaggggagtgcttcatggtgaaagacctttcaaacccctcgagatacttgtgcaagtaagaaaagaaatcctgtgtgtcgcttatgtctataactccttgtttcagatgattctatgtctcatgattgattgttgctttttttccaattttgttgcatcatgttgaataatgctgttttatatgtagagtcttttaaaacattcacaccattcgtcatcactcctctgtcatatgcagttttgttttttgctcttttcaatgtgtgtgaggtgttttttgtttttgtttttgtttttttgccatgttatttatagtgttgctttccttgtgctttccttgtggttttcttggttggttattcagaaaagatgtgcagatatcacagaggcctatagccttttggtatctacttctacatccaatgtatgaattaagctgtaagataatgttgctttcttatcccagtgatcacctgccaaatgaataagacaacaaagagaagcagaagggcaagaagattatttactgacatatatctattacacttgggattgtgcttactgttgcataactattttttaaacggagtttagttttatattgctagtaaaaaaaaaaaaaaaaaa</p>]]></example></release></workflow><workflow uuid="workflow:7634bac9-97b7-4014-a794-2ebb7377ac22" title="Ask - just message parameter"><release uuid="workflow:7634bac9-97b7-4014-a794-2ebb7377ac22" lsid="urn:lsid:net.sf.taverna:wfDefinition:12dba187-99ad-4990-91ff-4708c7c2cb1f" author="Alan R Williams" title="Ask - just message parameter" path="7634bac9-97b7-4014-a794-2ebb7377ac22/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="natalia@cnb.csic.es" responsibleName="Natalia Jiménez Lozano" date="2009-07-06T12:51:50+02:00"><description><![CDATA[The ask service displays a prompt with no title but with the prompt message 'Some message'.  If the user presses cancel, then a service failure is generated.  If the user presses OK then their input, even if it is empty, is passed to the answer port of the ask service and so to the output of the workflow.
]]></description><graph mime="image/png">7634bac9-97b7-4014-a794-2ebb7377ac22/workflow.png</graph><graph mime="image/svg+xml">7634bac9-97b7-4014-a794-2ebb7377ac22/workflow.svg</graph><graph mime="application/pdf">7634bac9-97b7-4014-a794-2ebb7377ac22/workflow.pdf</graph><output name="output"><mime type="text/plain"/></output><step name="parseExpressedGenes"><description><![CDATA[Extract expressed genes from ExpressionStatistics.]]></description></step><step name="runClustalwFast" kind="moby"><description><![CDATA[ClustalW multiple sequence alignments (fast method).]]></description><secondaryInput name="gapdist" isOptional="true" type="string" default="4"/><secondaryInput name="gapopen" isOptional="true" type="string" default=""/><secondaryInput name="endgaps" isOptional="true" type="string" default="off"/><secondaryInput name="pairgap" isOptional="true" type="string" default="3"/><secondaryInput name="matrix" isOptional="true" type="string" default=""/><secondaryInput name="topdiag" isOptional="true" type="string" default="5"/><secondaryInput name="transitions_weight" isOptional="true" type="string" default="0.5"/><secondaryInput name="gapext" isOptional="true" type="string" default=""/><secondaryInput name="ktup" isOptional="true" type="string" default="1"/><secondaryInput name="window" isOptional="true" type="string" default="5"/></step><step name="fromClustalwToPhylip"><description><![CDATA[It converts a Clustalw data type into a Phylip Interleaved data type.]]></description></step><step name="runPhylipDnamlk" kind="moby"><description><![CDATA[Estimates phylogenies from nucleotide sequences by maximum likelihood under the constraint that the trees estimated must be consistent with a molecular clock.]]></description><secondaryInput name="randseed" isOptional="true" type="string" default="3"/><secondaryInput name="freqa" isOptional="true" type="string" default="0.25"/><secondaryInput name="freqg" isOptional="true" type="string" default="0.25"/><secondaryInput name="lambda" isOptional="true" type="string" default="1.0"/><secondaryInput name="global" isOptional="true" type="string" default="no"/><secondaryInput name="freqc" isOptional="true" type="string" default="0.25"/><secondaryInput name="randtimes" isOptional="true" type="string" default="3"/><secondaryInput name="freqt" isOptional="true" type="string" default="0.25"/><secondaryInput name="basefrequency" isOptional="true" type="string" default="yes"/><secondaryInput name="random" isOptional="true" type="string" default="no"/><secondaryInput name="ttratio" isOptional="true" type="string" default="2.0"/></step><step name="fromMGIToEMBL"><description><![CDATA[Converts a MGI id in a EMBL id.]]></description></step><step name="getExpressionStatistics"><description><![CDATA[Gives the ExpressionStatistics from a set of structures or from a set of genes.]]></description></step><step name="getNucleotideSequence"><description><![CDATA[Retrieves an nucleotide sequence from an nucleotide database.]]></description></step><step name="displayClustering"><description><![CDATA[This service returns a GIF image representing a tree of clusters including their profiles. In addition, an URL to the image is returned in the output field 'legend']]></description></step><step name="Object"><description><![CDATA[an object]]></description></step><example uuid="ae0eab4b-383b-4ed4-a886-93aab4df0ef1" name="" path="7634bac9-97b7-4014-a794-2ebb7377ac22/examples/ae0eab4b-383b-4ed4-a886-93aab4df0ef1.xml" date="2009-07-06T12:56:23+02:00" responsibleMail="natalia@cnb.csic.es" responsibleName="Natalia"><![CDATA[<p>MA0000821</p>]]></example></release></workflow><workflow uuid="workflow:9c45bf46-0ebb-4801-8207-961b5213e8cb" title="Ask - just message parameter"><release uuid="workflow:9c45bf46-0ebb-4801-8207-961b5213e8cb" lsid="urn:lsid:net.sf.taverna:wfDefinition:12dba187-99ad-4990-91ff-4708c7c2cb1f" author="Alan R Williams" title="Ask - just message parameter" path="9c45bf46-0ebb-4801-8207-961b5213e8cb/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-NonCommercial-NoDerivs 3.0" licenseURI="http://creativecommons.org/licenses/by-nc-nd/3.0/legalcode" responsibleMail="jsegura@cnb.csic.es" responsibleName="joan segura mora" date="2009-07-24T11:45:51+02:00"><description><![CDATA[The ask service displays a prompt with no title but with the prompt message 'Some message'.  If the user presses cancel, then a service failure is generated.  If the user presses OK then their input, even if it is empty, is passed to the answer port of the ask service and so to the output of the workflow.
]]></description><graph mime="image/png">9c45bf46-0ebb-4801-8207-961b5213e8cb/workflow.png</graph><graph mime="image/svg+xml">9c45bf46-0ebb-4801-8207-961b5213e8cb/workflow.svg</graph><graph mime="application/pdf">9c45bf46-0ebb-4801-8207-961b5213e8cb/workflow.pdf</graph><input name="ID"><mime type="text/xml"/></input><input name="NAME SPACE"><mime type="text/plain"/></input><output name="output"><mime type="text/plain"/></output><step name="fromClustalwToPhylip"><description><![CDATA[It converts a Clustalw data type into a Phylip Interleaved data type.]]></description></step><step name="parseExpressionStatistics" kind="moby"><description><![CDATA[Filters ExpressionStatistics by level, pattern, gene, structure and developmental stage.]]></description><secondaryInput name="filter_structure" isOptional="true" type="string" default="null"/><secondaryInput name="filter_stage" isOptional="true" type="string" default="0"/><secondaryInput name="filter_level_operator" isOptional="true" type="string" default="6"/><secondaryInput name="filter_development" isOptional="true" type="string" default="0"/><secondaryInput name="filter_gene" isOptional="true" type="string" default="null"/><secondaryInput name="filter_pattern" isOptional="true" type="string" default="null"/><secondaryInput name="operator" isOptional="true" type="string" default="greater"/></step><step name="parseExpressedGenes"><description><![CDATA[Extract expressed genes from ExpressionStatistics.]]></description></step><step name="displayClustering"><description><![CDATA[This service returns a GIF image representing a tree of clusters including their profiles. In addition, an URL to the image is returned in the output field 'legend']]></description></step><step name="fromMGIToRefSeq"><description><![CDATA[Converts a MGI id in a RefSeq id.]]></description></step><step name="Object"><description><![CDATA[an object]]></description></step><step name="getExpressionStatistics"><description><![CDATA[Gives the ExpressionStatistics from a set of structures or from a set of genes.]]></description></step><step name="getNucleotideSequence"><description><![CDATA[Retrieves an nucleotide sequence from an nucleotide database.]]></description></step><step name="runClustalwFast" kind="moby"><description><![CDATA[Makes a ClustalW multiple sequence alignment (fast method).]]></description><secondaryInput name="ktup" isOptional="true" type="string" default="1"/><secondaryInput name="gapext" isOptional="true" type="string" default=""/><secondaryInput name="transitions_weight" isOptional="true" type="string" default="0.5"/><secondaryInput name="window" isOptional="true" type="string" default="5"/><secondaryInput name="topdiag" isOptional="true" type="string" default="5"/><secondaryInput name="matrix" isOptional="true" type="string" default=""/><secondaryInput name="pairgap" isOptional="true" type="string" default="3"/><secondaryInput name="endgaps" isOptional="true" type="string" default="off"/><secondaryInput name="gapdist" isOptional="true" type="string" default="4"/><secondaryInput name="gapopen" isOptional="true" type="string" default=""/></step><step name="runPhylipDnaml" kind="moby"><description><![CDATA[Estimates phylogenies from nucleotide sequences by maximum likelihood.]]></description><secondaryInput name="randseed" isOptional="true" type="string" default="3"/><secondaryInput name="freqa" isOptional="true" type="string" default="0.25"/><secondaryInput name="freqg" isOptional="true" type="string" default="0.25"/><secondaryInput name="global" isOptional="true" type="string" default="no"/><secondaryInput name="lambda" isOptional="true" type="string" default="1.0"/><secondaryInput name="freqc" isOptional="true" type="string" default="0.25"/><secondaryInput name="randtimes" isOptional="true" type="string" default="3"/><secondaryInput name="freqt" isOptional="true" type="string" default="0.25"/><secondaryInput name="outgroup_root" isOptional="true" type="string" default="no"/><secondaryInput name="basefrequency" isOptional="true" type="string" default="yes"/><secondaryInput name="random" isOptional="true" type="string" default="no"/><secondaryInput name="ttratio" isOptional="true" type="string" default="2"/><secondaryInput name="outgnum" isOptional="true" type="string" default="1"/></step></release></workflow><workflow uuid="workflow:17db8532-9740-4fc1-b65d-9c42cc78ca4d" title="Conservation of Exon Structure"><release uuid="workflow:17db8532-9740-4fc1-b65d-9c42cc78ca4d" lsid="urn:lsid:net.sf.taverna:wfDefinition:15467c2e-c99a-492b-8bf2-67d0c82854c2" author="José Manuel Rodríguez Carrasco INB-GN2-CNIO (jmrodriguez@cnio.es)" title="Conservation of Exon Structure" path="17db8532-9740-4fc1-b65d-9c42cc78ca4d/workflow.xml" workflowType="application/vnd.taverna.scufl+xml" licenseName="CC Attribution-ShareAlike 3.0" licenseURI="http://creativecommons.org/licenses/by-sa/3.0/legalcode" responsibleMail="jmrodriguez@cnio.es" responsibleName="JM" date="2009-09-22T16:52:51+02:00"><description><![CDATA[CExonic is a method for the determination of conservation of exonic structure.

Transcription data are frequently being used to study alternative splicing in one species. However, splicing isoforms may be conserved between species of a certain evolutionary distance. Therefore, cross-species comparison of splicing isoforms may provide insight into the conservation of alternative splicing. The assessment of functional alternative splicing requires the identification of the gene product that retains the core biological function. The conservation of exonic structure between orthologous splicing isoforms of two species would suggest that they exist in both species and that their biological function may be conserved.

CExonic was developed in order to determine if splicing isoforms can be aligned to the genomic DNA of its orthologous gene in another species with conservation of exonic structure. Users can enter a VEGA identifier of a human gene or transcript and the CExonic server will align this transcript to the genomic DNA of its orthologous gene in mouse and assess whether its exonic structure is conserved in mouse. First, it determines the ortholous gene in the mouse genome, using tblastx. Then, the human transcript is aligned to this mouse genomic DNA using exonerate. Subsequently, the human and predicted mouse transcripts are aligned using muscle and the exon/intron coordinates are superimposed on the alignment, similar to exstral[1]. If the aligned intron positions coincide, the exonic structure is conserved.

http://cexonic.bioinfo.cnio.es
]]></description><graph mime="image/png">17db8532-9740-4fc1-b65d-9c42cc78ca4d/workflow.png</graph><graph mime="image/svg+xml">17db8532-9740-4fc1-b65d-9c42cc78ca4d/workflow.svg</graph><graph mime="application/pdf">17db8532-9740-4fc1-b65d-9c42cc78ca4d/workflow.pdf</graph><input name="chromosome"><mime type="text/plain"/></input><input name="start"><mime type="text/plain"/></input><input name="end"><mime type="text/plain"/></input><input name="id"><mime type="text/plain"/></input><input name="namespace"><mime type="text/plain"/></input><output name="alignment text"><mime type="text/xml"/></output><output name="first alignment image"><mime type="image/png"/><mime type="image/svg+xml"/><mime type="image/jpeg"/><mime type="application/pdf"/></output><output name="second alignment image"><mime type="image/svg+xml"/><mime type="image/png"/><mime type="image/jpeg"/></output><step name="Byte___to_String"/><step name="xpath_Image_Encoded"/><step name="Merge_string_list_to_string"/><step name="extract_encoded_image"/><step name="Decode_base64_to_byte"/><step name="extract_encoded_image1"/><step name="Merge_string_list_to_string1"/><step name="Decode_base64_to_byte1"/><step name="Byte___to_String1"/><step name="specie"/><step name="Merge_string_list_to_string2"/><step name="Integer1"><description><![CDATA[an int]]></description></step><step name="Integer"><description><![CDATA[an int]]></description></step><step name="Genomic_Location"><description><![CDATA[Genomic Location]]></description></step><step name="Integer2"><description><![CDATA[an int]]></description></step><step name="String"><description><![CDATA[a string]]></description></step><step name="runCExonic" kind="moby"><description><![CDATA[CExonic compares the conservation of exon structure between humans and other species. The method is applicable to any sequenced eukaryotic genome. At the moment, it details the human-mouse comparison.]]></description><secondaryInput name="image_format" isOptional="true" type="string" default="svg"/></step><step name="Parse_Moby_Data_CExonic_Alignment_Text"><description><![CDATA[Processor to parse the datatype CExonic_Alignment_Text]]></description></step></release></workflow></domain></workflowlist>
