[2] | 1 | #:gmaj |
---|
| 2 | |
---|
| 3 | #---------------------------------------------------------------- |
---|
| 4 | # This file specifies input parameters for a Gmaj dataset. |
---|
| 5 | # See below for explanatory comments. |
---|
| 6 | #---------------------------------------------------------------- |
---|
| 7 | |
---|
| 8 | title = "My favorite genomic region" |
---|
| 9 | datapath = /home/cathy/mydata/favreg/ |
---|
| 10 | alignfile = tba.maf mlagan.maf |
---|
| 11 | refseq = any |
---|
| 12 | reconorg = none |
---|
| 13 | tabext = .gff .gtf .bed .ct .trk |
---|
| 14 | nowarn = maf_version repeat_type_missing |
---|
| 15 | skipotherseq = false |
---|
| 16 | |
---|
| 17 | seq 0: |
---|
| 18 | seqname = human.chr11 hg17.chr11 human |
---|
| 19 | exons = human.exons.bed chr11 |
---|
| 20 | repeats = human.repeats |
---|
| 21 | links = human.links |
---|
| 22 | underlays = human.exons.bed chr11 exons |
---|
| 23 | #underlays.1 = human-mouse.underlays |
---|
| 24 | #underlays.2 = human-rat.underlays |
---|
| 25 | highlights = human.highlights |
---|
| 26 | offset = 4730995 |
---|
| 27 | |
---|
| 28 | seq 1: |
---|
| 29 | seqname = mouse.chr7 |
---|
| 30 | exons = mouse.exons |
---|
| 31 | repeats = mouse.repeats |
---|
| 32 | links = mouse.links |
---|
| 33 | underlays = mouse.underlays |
---|
| 34 | #underlays.0 = mouse-human.underlays |
---|
| 35 | #underlays.2 = mouse-rat.underlays |
---|
| 36 | highlights = mouse.highlights |
---|
| 37 | offset = 0 |
---|
| 38 | |
---|
| 39 | seq 2: |
---|
| 40 | seqname = rat.chr1 |
---|
| 41 | exons = rat.exons |
---|
| 42 | repeats = rat.repeats |
---|
| 43 | links = rat.links |
---|
| 44 | underlays = rat.underlays |
---|
| 45 | #underlays.0 = rat-human.underlays |
---|
| 46 | #underlays.1 = rat-mouse.underlays |
---|
| 47 | highlights = rat.highlights |
---|
| 48 | offset = 0 |
---|
| 49 | |
---|
| 50 | #---------------------------------------------------------------- |
---|
| 51 | # This file specifies input parameters for Gmaj, including the |
---|
| 52 | # names of all data files. You can omit this file and just give |
---|
| 53 | # Gmaj the name of your alignment file directly, but then you |
---|
| 54 | # don't get the opportunity to provide annotations, offsets, |
---|
| 55 | # additional alignment files, or other optional features. |
---|
| 56 | # |
---|
| 57 | # Syntax: |
---|
| 58 | # |
---|
| 59 | # Each key=value(s) pair must reside on its own single, separate |
---|
| 60 | # line. (Note that although the '=' was formerly optional, it |
---|
| 61 | # is now required.) Other than that, the format is fairly loose. |
---|
| 62 | # Even the order of lines is arbitrary, except that "seq N:" |
---|
| 63 | # defines the current sequence until it is superseded by a new |
---|
| 64 | # "seq N:" line. Values containing spaces must be enclosed in |
---|
| 65 | # double quotes. Embedded quotes in such strings can be escaped |
---|
| 66 | # with '\', but there is no way to escape the backslash: quoted |
---|
| 67 | # values should not end with '\' (insert a space before the |
---|
| 68 | # final quote if necessary). Lines with missing values are |
---|
| 69 | # skipped. A '#' at the beginning of a line marks a comment |
---|
| 70 | # that will be ignored, except for the identifier tag "#:gmaj" |
---|
| 71 | # at the top, which is mandatory. |
---|
| 72 | # |
---|
| 73 | # Required Fields: |
---|
| 74 | # |
---|
| 75 | # At least one alignfile is required. You do not have to provide |
---|
| 76 | # a section for every sequence (by default they will still be |
---|
| 77 | # displayed), but for each sequence you do mention, the "seq N:" |
---|
| 78 | # line and the seqname field are also required. Everything else |
---|
| 79 | # is optional. |
---|
| 80 | # |
---|
| 81 | # File Names and Locations: |
---|
| 82 | # |
---|
| 83 | # Filenames can be relative or absolute (fully qualified paths). |
---|
| 84 | # Gmaj will look for relative names in the following locations: |
---|
| 85 | # |
---|
| 86 | # 1. the separately specified "bundle" file (if any) |
---|
| 87 | # 2. the "datapath" specified here (if any) |
---|
| 88 | # 3. the same directory as this parameters file |
---|
| 89 | # |
---|
| 90 | # If you are using Gmaj's "bundle" feature, you must refer to |
---|
| 91 | # the files located in the bundle by their plain filenames, |
---|
| 92 | # without any path. |
---|
| 93 | # |
---|
| 94 | # Title: |
---|
| 95 | # |
---|
| 96 | # This string will be used as the title for the Gmaj windows. |
---|
| 97 | # Typically it describes the alignment data, including the name |
---|
| 98 | # of the locus. It does not control the applet button's label, |
---|
| 99 | # however, because the applet has not read this file yet; |
---|
| 100 | # instead there is a separate applet parameter for that. |
---|
| 101 | # |
---|
| 102 | # Reference Sequence: |
---|
| 103 | # |
---|
| 104 | # The refseq field identifies the reference sequence used in the |
---|
| 105 | # alignments. The default value "any" means that the alignments |
---|
| 106 | # were generated by a sequence-symmetric program such as TBA, so |
---|
| 107 | # the user should be allowed to select the reference sequence |
---|
| 108 | # interactively. Otherwise, the value must match the appropriate |
---|
| 109 | # sequence name from the MAF files (including the contig name, if |
---|
| 110 | # applicable). |
---|
| 111 | # |
---|
| 112 | # Reconstructed Sequence: |
---|
| 113 | # |
---|
| 114 | # If the alignment files include score rows for an ancestral |
---|
| 115 | # reconstruction, the reconorg field identifies which organism |
---|
| 116 | # these scores apply to. The default value "none" means Gmaj |
---|
| 117 | # will ignore the scores; otherwise the value must match the |
---|
| 118 | # species prefix of the appropriate sequence names from the MAF |
---|
| 119 | # files. Contig name extensions (e.g. ".chrX") are omitted, as |
---|
| 120 | # the scores can apply to any contig for that organism. A score |
---|
| 121 | # can be supplied only once for each base in the ancestral |
---|
| 122 | # genome. |
---|
| 123 | # |
---|
| 124 | # Tabular File Extensions: |
---|
| 125 | # |
---|
| 126 | # The tabext field specifies which filename extensions should |
---|
| 127 | # be treated as generic, tab-delimited formats (GFF/GTF/BED) |
---|
| 128 | # instead of the old PipMaker-style formats. The default list |
---|
| 129 | # is ".gff .gtf .bed .ct .trk". Note that it doesn't actually |
---|
| 130 | # matter which of these is used for a particular file, just |
---|
| 131 | # whether it is in the list. |
---|
| 132 | # |
---|
| 133 | # Warning Suppression: |
---|
| 134 | # |
---|
| 135 | # The nowarn field lists keywords for particular warning |
---|
| 136 | # messages that should not be displayed. This is especially |
---|
| 137 | # useful for applets, when the administrator has seen the |
---|
| 138 | # warning, checked the data, and determined that everything |
---|
| 139 | # is OK and the end user does not need to see the warning. |
---|
| 140 | # The keyword for each suppressible message is displayed at |
---|
| 141 | # the bottom of the message. |
---|
| 142 | # |
---|
| 143 | # Ignoring Sequences: |
---|
| 144 | # |
---|
| 145 | # The skipotherseq field specifies whether sequences that appear |
---|
| 146 | # in the MAF files but are not mentioned here should be ignored. |
---|
| 147 | # If so, these rows are simply skipped; no adjustments are made |
---|
| 148 | # to remove all-gap columns or join adjacent blocks, and empty |
---|
| 149 | # blocks are kept to preserve the MAF files' block numbering. |
---|
| 150 | # This feature is useful for saving memory, and for reducing the |
---|
| 151 | # number of pips when some species have many aligning contigs. |
---|
| 152 | # The default value is false, so all sequences are displayed. |
---|
| 153 | # |
---|
| 154 | # Sequence Numbers and Sequence Names: |
---|
| 155 | # |
---|
| 156 | # The seqname field serves to match up the parameter entries with |
---|
| 157 | # the sequence name in each row of the MAF alignments (including |
---|
| 158 | # the contig name, if applicable). The sequence number assigns |
---|
| 159 | # the display order, and is also used to identify the secondary |
---|
| 160 | # sequence for plot-specific underlays (see below). |
---|
| 161 | # |
---|
| 162 | # Multiple values can be given for each seqname keyword; in this |
---|
| 163 | # case the first is the primary name to be used for display, and |
---|
| 164 | # the rest are aliases for it. This is useful when two MAF files |
---|
| 165 | # use different names for the same sequences, or simply for |
---|
| 166 | # changing the display labels. Alias resolution is applied to |
---|
| 167 | # MAF seqnames, the refseq field, and the initzoom parameter, but |
---|
| 168 | # not to the reconorg field or annotation files. All primary and |
---|
| 169 | # alias names must be unique (except in the special case of |
---|
| 170 | # pairwise self-alignments). |
---|
| 171 | # |
---|
| 172 | # Sequence numbers start with 0 and must turn out to be |
---|
| 173 | # consecutive, after Gmaj fills in any gaps you leave with the |
---|
| 174 | # MAF sequences you don't mention here. Thus by default, if |
---|
| 175 | # the alignment files include ten sequences, the valid sequence |
---|
| 176 | # numbers would be 0-9, and Gmaj will assign any that you omit |
---|
| 177 | # (in the order it encounters them, which is affected by file |
---|
| 178 | # bundling). However if you set skipotherseq = true, then you |
---|
| 179 | # must assign consecutive numbers because Gmaj will not assign |
---|
| 180 | # any. |
---|
| 181 | # |
---|
| 182 | # File Specification Modifiers: |
---|
| 183 | # |
---|
| 184 | # The generic, tabular annotation formats (GFF/GTF/BED) allow |
---|
| 185 | # entries for several sequences to be combined in one file, |
---|
| 186 | # since they can be distinguished by the "seqname" or "chrom" |
---|
| 187 | # column. However in this case Gmaj will expect the column |
---|
| 188 | # value to match the seqname from the MAF alignments. If it |
---|
| 189 | # does not (e.g. if the MAF files include a species prefix but |
---|
| 190 | # the annotation file omits it), you can add a sequence |
---|
| 191 | # designation after the filename to tell Gmaj what to look for |
---|
| 192 | # in the annotation file. |
---|
| 193 | # |
---|
| 194 | # Gmaj has special support for annotation data that represents |
---|
| 195 | # exons or repeats (namely adding exon numbers and inferring |
---|
| 196 | # UTRs, or finding the PipMaker repeat category). For the exons |
---|
| 197 | # and repeats panels this is automatic, but you can also invoke |
---|
| 198 | # it explicitly for files used as linkbars, underlays, or text |
---|
| 199 | # highlights by adding a type hint of "exons" or "repeats" after |
---|
| 200 | # the filename. This only works if the file is in a generic |
---|
| 201 | # (GFF/GTF/BED) format and contains the appropriate type of data |
---|
| 202 | # (genes/exons or repeats). |
---|
| 203 | # |
---|
| 204 | # Underlays and Highlights: |
---|
| 205 | # |
---|
| 206 | # Gmaj allows you to specify color underlays independently for |
---|
| 207 | # each plot, i.e. for each combination of reference and |
---|
| 208 | # secondary sequences. Thus in the "seq 1:" section, the |
---|
| 209 | # "underlays.0" entry specifies the underlay file to be used |
---|
| 210 | # when sequence 1 is the reference and sequence 0 is the second |
---|
| 211 | # sequence. Note that there is e.g. no "underlays.1" entry in |
---|
| 212 | # the "seq 1:" section, since we do not usually have plots |
---|
| 213 | # aligning sequences with themselves. |
---|
| 214 | # |
---|
| 215 | # However, specifying a quadratic number of files quickly becomes |
---|
| 216 | # burdensome as the number of sequences grows. For the common |
---|
| 217 | # case where the same underlay file is used for most or all of a |
---|
| 218 | # particular reference sequence's plots, the plain "underlays" |
---|
| 219 | # entry (without a number) provides a default for that reference |
---|
| 220 | # sequence. This can still be overridden as needed by numbered |
---|
| 221 | # entries for special plots. |
---|
| 222 | # |
---|
| 223 | # The highlights file specifies colors for a particular row of |
---|
| 224 | # the text display, so there is only one for each sequence. If |
---|
| 225 | # you omit it, Gmaj will build default highlights based on the |
---|
| 226 | # exons file (if you provided one). |
---|
| 227 | # |
---|
| 228 | # Offsets: |
---|
| 229 | # |
---|
| 230 | # The offset parameter is used for display purposes only. It |
---|
| 231 | # specifies an adjustment to be added to all position labels and |
---|
| 232 | # displayed references for a particular sequence. For example, |
---|
| 233 | # this allows positions to be labeled with respect to some larger |
---|
| 234 | # region. However, note that all annotations must still be |
---|
| 235 | # specified relative to the sequences referred to in the MAF |
---|
| 236 | # files. |
---|
| 237 | # |
---|
| 238 | #---------------------------------------------------------------- |
---|
| 239 | # Cathy Riemer, June 2008 |
---|