Was this page helpful?

suplementary information

     

     

    Table S1 Sequence Composition

    Result of the EMBOSS compseq program.

    #
    # Output from 'compseq'
    #
    # The Expected frequencies are calculated on the (false) assumption that every
    # word has equal frequency.
    #
    # The input sequences are:
    # seq207


    Word size 2
    Total count 399999

    Word       Obs N    Obs Freq    Exp Freq   Obs/Exp Freq
    AA            34034   0.0850852  0.0625000  1.3613634
    AC            21280   0.0532001  0.0625000  0.8512021
    AG            26054   0.0651352  0.0625000  1.0421626
    AT            27792   0.0694802  0.0625000  1.1116828
    CA            27237   0.0680927  0.0625000  1.0894827
    CC            23008   0.0575201  0.0625000  0.9203223
    CG           14782    0.0369551  0.0625000  0.5912815
    CT            26095   0.0652377  0.0625000  1.0438026
    GA            25872   0.0646802  0.0625000  1.0348826
    GC            21038   0.0525951  0.0625000  0.8415221
    GG            22531   0.0563276  0.0625000  0.9012423
    GT            20234   0.0505851  0.0625000  0.8093620
    TA            22014   0.0550351  0.0625000  0.8805622
    TC            25793   0.0644827  0.0625000  1.0317226
    TG            26310   0.0657752  0.0625000  1.0524026
    TT            32996   0.0824902  0.0625000  1.3198433

    Other 2929  0.0073225 0.0000000 10000000000.0000000

     

     

    Table S2 CpG analysis

    CPGPLOT islands of unusual CG composition
    seq207 from 1 to 400000
    
         Observed/Expected ratio > 0.60
         Percent C + Percent G > 50.00
         Length > 200
    
     Length 321 (1282..1602)
    
     Length 202 (2908..3109)
    
     Length 411 (9129..9539)
    
     Length 1377 (15292..16668)
    
     Length 3841 (16691..20531)
    
     Length 638 (20641..21278)
    
     Length 1478 (21288..22765)
    
     Length 849 (22775..23623)
    
     Length 1061 (23814..24874)
    
     Length 325 (24929..25253)
    
     Length 443 (25496..25938)
    
     Length 532 (25954..26485)
    
     Length 954 (26535..27488)
    
     Length 1265 (27565..28829)
    
     Length 300 (28864..29163)
    
     Length 927 (35409..36335)
    
     Length 223 (42944..43166)
    
     Length 311 (43636..43946)
    
     Length 1626 (44249..45874)
    
     Length 316 (45882..46197)
    
     Length 235 (46204..46438)
    
     Length 529 (46465..46993)
    
     Length 297 (49607..49903)
    
     Length 563 (50048..50610)
    
     Length 890 (53737..54626)
    
     Length 474 (54958..55431)
    
     Length 364 (57059..57422)
    
     Length 476 (63091..63566)
    
     Length 280 (66657..66936)
    
     Length 267 (74216..74482)
    
     Length 400 (75381..75780)
    
     Length 814 (75836..76649)
    
     Length 290 (79538..79827)
    
     Length 209 (88376..88584)
    
     Length 296 (94782..95077)
    
     Length 397 (95141..95537)
    
     Length 250 (95900..96149)
    
     Length 287 (96916..97202)
    
     Length 902 (102089..102990)
    
     Length 400 (106065..106464)
    
     Length 816 (106520..107335)
    
     Length 214 (114202..114415)
    
     Length 474 (116794..117267)
    
     Length 219 (117912..118130)
    
     Length 321 (122104..122424)
    
     Length 254 (122960..123213)
    
     Length 439 (124589..125027)
    
     Length 487 (129083..129569)
    
     Length 204 (134668..134871)
    
     Length 201 (146920..147120)
    
     Length 265 (150344..150608)
    
     Length 227 (156240..156466)
    
     Length 244 (163682..163925)
    
     Length 838 (168278..169115)
    
     Length 419 (171494..171912)
    
     Length 341 (173365..173705)
    
     Length 224 (182437..182660)
    
     Length 976 (182704..183679)
    
     Length 706 (184770..185475)
    
     Length 1226 (185778..187003)
    
     Length 1103 (187280..188382)
    
     Length 350 (190410..190759)
    
     Length 383 (191439..191821)
    
     Length 491 (192005..192495)
    
     Length 256 (193709..193964)
    
     Length 288 (198612..198899)
    
     Length 534 (200095..200628)
    
     Length 573 (201200..201772)
    
     Length 587 (201800..202386)
    
     Length 996 (202552..203547)
    
     Length 493 (203776..204268)
    
     Length 1048 (204341..205388)
    
     Length 289 (205582..205870)
    
     Length 296 (206001..206296)
    
     Length 312 (207089..207400)
    
     Length 503 (208829..209331)
    
     Length 615 (212023..212637)
    
     Length 361 (221662..222022)
    
     Length 451 (223027..223477)
    
     Length 387 (223480..223866)
    
     Length 441 (223951..224391)
    
     Length 233 (225201..225433)
    
     Length 331 (228329..228659)
    
     Length 350 (228709..229058)
    
     Length 314 (229238..229551)
    
     Length 216 (237803..238018)
    
     Length 286 (242413..242698)
    
     Length 473 (242702..243174)
    
     Length 284 (243179..243462)
    
     Length 1695 (243511..245205)
    
     Length 968 (253495..254462)
    
     Length 449 (258222..258670)
    
     Length 297 (258755..259051)
    
     Length 284 (259063..259346)
    
     Length 220 (260505..260724)
    
     Length 402 (261531..261932)
    
     Length 712 (262247..262958)
    
     Length 838 (262989..263826)
    
     Length 411 (264019..264429)
    
     Length 246 (264455..264700)
    
     Length 309 (264722..265030)
    
     Length 578 (265032..265609)
    
     Length 204 (265991..266194)
    
     Length 326 (269530..269855)
    
     Length 212 (270624..270835)
    
     Length 290 (271472..271761)
    
     Length 357 (275680..276036)
    
     Length 885 (278524..279408)
    
     Length 884 (289741..290624)
    
     Length 277 (292454..292730)
    
     Length 304 (293927..294230)
    
     Length 884 (300397..301280)
    
     Length 277 (303110..303386)
    
     Length 350 (308585..308934)
    
     Length 851 (309967..310817)
    
     Length 407 (312451..312857)
    
     Length 467 (320533..320999)
    
     Length 266 (322148..322413)
    
     Length 319 (322747..323065)
    
     Length 293 (326887..327179)
    
     Length 301 (328737..329037)
    
     Length 202 (332360..332561)
    
     Length 249 (335658..335906)
    
     Length 276 (343710..343985)
    
     Length 277 (345598..345874)
    
     Length 871 (347700..348570)
    
     Length 307 (354728..355034)
    
     Length 280 (356227..356506)
    
     Length 324 (357789..358112)
    
     Length 548 (358116..358663)
    
     Length 239 (366691..366929)
    
     Length 446 (367114..367559)
    
     Length 565 (368836..369400)
    
     Length 208 (371138..371345)
    
     Length 304 (371804..372107)
    
     Length 221 (374997..375217)
    
     Length 566 (376464..377029)
    
     Length 294 (377693..377986)
    
     Length 379 (380678..381056)
    
     Length 888 (385299..386186)
    
     Length 216 (388080..388295)
    
     Length 210 (389302..389511)
    
     Length 871 (397225..398095)
    
     Length 210 (399734..399943)
    

     

    Table S3 Retroelement and Transposon Analysis

    http://www.repeatmasker.org/cgi-bin/WEBRepeatMasker

    Summary:

    ==================================================
    file name: RM2_207.txt_1287460289   
    sequences:             1
    total length:     400000 bp  (397100 bp excl N/X-runs)
    GC level:         45.53 %
    bases masked:     332617 bp ( 83.15 %)
    ==================================================
                   number of      length   percentage
                   elements*    occupied  of sequence
    --------------------------------------------------
    Retroelements          149       329315 bp   82.33 %
       SINEs:                0            0 bp    0.00 %
       Penelope              0            0 bp    0.00 %
       LINEs:               14         4888 bp    1.22 %
        CRE/SLACS            0            0 bp    0.00 %
         L2/CR1/Rex          0            0 bp    0.00 %
         R1/LOA/Jockey       0            0 bp    0.00 %
         R2/R4/NeSL          0            0 bp    0.00 %
         RTE/Bov-B           6         1100 bp    0.28 %
         L1/CIN4             8         3788 bp    0.95 %
       LTR elements:       135       324427 bp   81.11 %
         BEL/Pao             0            0 bp    0.00 %
         Ty1/Copia          53       113889 bp   28.47 %
         Gypsy/DIRS1        82       210538 bp   52.63 %
           Retroviral        0            0 bp    0.00 %
    
    DNA transposons         14         2433 bp    0.61 %
       hobo-Activator        6         1529 bp    0.38 %
       Tc1-IS630-Pogo        0            0 bp    0.00 %
       En-Spm                0            0 bp    0.00 %
       MuDR-IS905            1          199 bp    0.05 %
       PiggyBac              0            0 bp    0.00 %
       Tourist/Harbinger     1          156 bp    0.04 %
       Other (Mirage,        0            0 bp    0.00 %
        P-element, Transib)
    
    Rolling-circles          0            0 bp    0.00 %
    
    Unclassified:            0            0 bp    0.00 %
    
    Total interspersed repeats:      331748 bp   82.94 %
    
    
    Small RNA:               0            0 bp    0.00 %
    
    Satellites:              0            0 bp    0.00 %
    Simple repeats:         10          482 bp    0.12 %
    Low complexity:          9          387 bp    0.10 %
    ==================================================
    
    * most repeats fragmented by insertions or deletions
      have been counted as one element
                                                          
    
    The query species was assumed to be zea           
    RepeatMasker version open-3.2.9 , default mode
    

     

    Table S4 FGENESH predicted genes

    program: FGENESH, Softberry, Inc.

    input: repeat masked sequence

    FGENESH 2.6 Prediction of potential genes in Monocot genomic DNA
     Time    :   Sat Oct 23 23:10:33 2010
     Seq name: seq207 400001-400000 
     Length of sequence: 400000 
     Number of predicted genes 3: in +chain 3, in -chain 0.
     Number of predicted exons 32: in +chain 32, in -chain 0.
     Positions of predicted genes and exons: Variant   1 from   1, Score:225.844214 
       G Str   Feature   Start        End    Score           ORF           Len
    
       1 +      TSS     159546              -13.38
       1 +    1 CDSf    160825 -    161232   25.98    160825 -    161232    408
       1 +    2 CDSl    161893 -    162012   -3.53    161893 -    162012    120
       1 +      PolA    163555               -1.06
    
       2 +      TSS     211939               -3.58
       2 +    1 CDSf    212181 -    212216    9.79    212181 -    212216     36
       2 +    2 CDSi    212299 -    212618   15.28    212299 -    212616    318
       2 +    3 CDSi    212713 -    212866   15.76    212714 -    212866    153
       2 +    4 CDSi    212958 -    213026    6.65    212958 -    213026     69
       2 +    5 CDSi    213117 -    214064   43.89    213117 -    214064    948
       2 +    6 CDSi    216787 -    216892    0.77    216787 -    216891    105
       2 +    7 CDSi    217405 -    217469   -0.97    217407 -    217469     63
       2 +    8 CDSi    218091 -    218155    1.36    218091 -    218153     63
       2 +    9 CDSi    218290 -    218395    8.74    218291 -    218395    105
       2 +   10 CDSi    218470 -    218670   21.31    218470 -    218670    201
       2 +   11 CDSi    219924 -    220121   13.77    219924 -    220121    198
       2 +   12 CDSl    220892 -    221125    2.22    220892 -    221125    234
       2 +      PolA    222985                0.44
    
       3 +      TSS     229661               -7.08
       3 +    1 CDSf    230091 -    230171    3.07    230091 -    230171     81
       3 +    2 CDSi    230266 -    230505   15.02    230266 -    230505    240
       3 +    3 CDSi    230881 -    231093   22.20    230881 -    231093    213
       3 +    4 CDSi    231258 -    231674   10.82    231258 -    231674    417
       3 +    5 CDSi    231990 -    232142   17.13    231990 -    232142    153
       3 +    6 CDSi    233336 -    233542   11.95    233336 -    233542    207
       3 +    7 CDSi    245759 -    245977   18.63    245759 -    245977    219
       3 +    8 CDSi    246062 -    246179   12.84    246062 -    246178    117
       3 +    9 CDSi    247916 -    248892   20.89    247918 -    248892    975
       3 +   10 CDSi    250273 -    250467    4.69    250273 -    250467    195
       3 +   11 CDSi    250769 -    251028   22.17    250769 -    251026    258
       3 +   12 CDSi    251883 -    251958   -1.02    251884 -    251958     75
       3 +   13 CDSi    252048 -    252161    2.97    252048 -    252161    114
       3 +   14 CDSi    252274 -    252626    7.79    252274 -    252624    351
       3 +   15 CDSi    255440 -    255548    2.75    255441 -    255548    108
       3 +   16 CDSi    255623 -    255763    9.79    255623 -    255763    141
       3 +   17 CDSi    255865 -    255920    4.73    255865 -    255918     54
       3 +   18 CDSl    256611 -    256674   -0.03    256612 -    256674     63
       3 +      PolA    257326               -1.06
    

     

    Table S5 Genescan predicted gene

    Program: Eukaryotic GeneMark.hmm

    Input: Repeat masked sequence


    References:    
    1Borodovsky M. and Lukashin A. (unpublished)
    2Lomsadze A., Ter-Hovhannisyan V., Chernoff Y. and Borodovsky M.,
    "Gene identification in novel eukaryotic genomes by self-training algorithm",
    Nucleic Acids Research, 2005, Vol. 33, No. 20, 6494-6506

     

    Predicted genes/exons
    
    Gene Exon Strand Exon           Exon Range     Exon      Start/End
      #    #         Type                         Length       Frame
    
      1     1   +  Initial      32665     32751      87          1 3
      1     2   +  Terminal     32867     32962      96          1 3
    
      2     3   -  Terminal     75556     75599      44          3 2
      2     2   -  Internal     75695     75781      87          1 2
      2     1   -  Initial      75920     75971      52          1 1
    
      3     7   -  Terminal     94736     94794      59          3 2
      3     6   -  Internal     94896     94962      67          1 1
      3     5   -  Internal     95155     95227      73          3 3
      3     4   -  Internal     96019     96152     134          2 1
      3     3   -  Internal     96305     96382      78          3 1
      3     2   -  Internal     96691     96792     102          3 1
      3     1   -  Initial      97020     97100      81          3 1
    
      4     1   +  Initial     129228    129306      79          1 1
      4     2   +  Terminal    129413    129471      59          2 3
    
      5     1   +  Initial     160825    161186     362          1 2
      5     2   +  Terminal    161967    162012      46          3 3
    
      6     1   +  Initial     201954    202026      73          1 1
      6     2   +  Terminal    202121    202281     161          2 3
    
      7     1   +  Initial     212181    212216      36          1 3
      7     2   +  Internal    212299    212618     320          1 2
      7     3   +  Internal    212713    212866     154          3 3
      7     4   +  Internal    212958    213026      69          1 3
      7     5   +  Internal    213117    213788     672          1 3
      7     6   +  Internal    218470    218670     201          1 3
      7     7   +  Internal    220065    220121      57          1 3
      7     8   +  Internal    220630    220721      92          1 2
      7     9   +  Internal    220875    220974     100          3 3
      7    10   +  Terminal    221146    221229      84          1 3
    
      8     1   +  Initial     230091    230171      81          1 3
      8     2   +  Internal    230266    230505     240          1 3
      8     3   +  Terminal    230881    231144     264          1 3
    
      9     3   -  Terminal    253572    253864     293          3 2
      9     2   -  Internal    262841    263101     261          1 2
      9     1   -  Initial     265014    265050      37          1 1
    
     10     2   -  Terminal    299856    299876      21          3 1
     10     1   -  Initial     299986    300000      15          3 1
    
     11     1   +  Initial     342726    342730       5          1 2
     11     2   +  Terminal    344651    344660      10          3 3
    
     12     1   +  Initial     368559    368672     114          1 3
     12     2   +  Terminal    377954    378154     201          1 3
    
     13     1   +  Initial     385411    385784     374          1 2
     13     2   +  Terminal    385875    386100     226          3 3
    
    
    Was this page helpful?
    Tag page (Edit tags)
    You must login to post a comment.