Was this page helpful?

add geneinfo.pl

    Table of contents
    No headers
     
    #usr/bin/perl -w
    use DBI;
    use strict;
    use Data::Dumper;
     
     
    #Jie Yin
    #09-03-14
     
    ##e.g. perl add_geneinfo.pl Gene_description_GO_pathway.txt Gene_expression.txt Geneexpression_with_annotation.txt
     
     
    my $infile1 = $ARGV[0]; #Gene description, GO, pathway file 
    my $infile2 = $ARGV[1]; #Gene expression file 
    my $outfile = $ARGV[2]; #Output file with gene expression and gene information
    my %geneinfo=(); ## this is for the info for $infile1
    my %geneexp=();  ## this is for the info for $infile2
    my %output=();   ## this is the output from the previous two hashes
    my $header1;     ## header line from infile1;
    my $header2;     ## header line from infile2;
     
    open(READ,"<$infile1")or die "cannot open the file";
    ##the first file is the gene_description_GO_pathway.txt Tab delimited file 
    ## Gene stable IDGene descriptionGOPathway
    ## GRMZM2G002142GO:0004497|monooxygenase activity(+)-abscisic acid 8'-hydroxylase|1.14.13.93-RXN||EC-1.14.13.93|PWY-5271|
     ## 
    while(my $line=<READ>)
    {
    chomp($line);
    $line =~ s/\r|\n//g;
    if ($line  !~ /Gene/ ) { ##Not the header line start to parse
    (my $id, my $geneinfo)=split "\t",$line, 2;
    $geneinfo{$id}=$geneinfo;
    } if ($line =~ /^Gene/) {
         my @header1=split "\t", $line;
     ## the header line includes "Gene stable ID""Gene description""GO""Pathway"
     ## do NOT need "Gene stable ID"
     @header1=splice (@header1, 1,3); 
         $header1=join ("\t", @header1);  
     print "$header1\n";
          }
    }
     
     
    open(READ,"<$infile2") or die "cannot open the file";
    ##the second file is the gene expression file which is usually a csv file--comma separated file 
    ##GeneIDbaseMeanlog2FoldChangeFClfcSEstatpvaluepadj
    ##AC148152.3_FG0016.241710169-0.1054178720.9295356660.787884373-0.1337986590.8935617880.983999414
     
    while( my $line=<READ>) {
     
    chomp($line);
    $line =~ s/\r|\n//g;
        if ($line !~ /Gene/) {##NOT header line start to parse 
     
    (my $id, my $gene_comp)=split ",",$line,2; ## split the whole line into geneID and all other info
    my @gene_comp=split ",", $gene_comp; ##split gene expression info into a array;
    my $gene_comp2=join ("\t", @gene_comp); ## Join all the elements back using \t;
    $geneexp{$id}=$gene_comp2;
     
         } else {
           my @header2=split ",", $line;
    $header2=join "\t",@header2;
                    }
    }
     
     
    foreach my $key (keys %geneexp) {
           my $geneall='';
            if (defined $geneinfo{$key}) { 
      $geneall=join ("\t", $geneexp{$key},$geneinfo{$key});
      $output{$key}=$geneall;
    } else {}
     
    }
     
     
     
    open(WR, ">$outfile");
    print WR "$header2\t$header1\n"; 
    foreach my $key (keys %output) { 
       print WR "$key\t$output{$key}\n";
    }
    Was this page helpful?
    Tag page (Edit tags)
    • No tags
    You must login to post a comment.