=head1 NAME

iPE::Annotation::Transcript - A state sequence representing the features of a transcript.

=head1 DESCRIPTION

This is an object created by L<iPE::AnnotationPlugin> upon reading in the Annotation file and translating the features to L<iPE::Annotation::Feature>s via L<iPE::Annotation::FeatureMapping>s.  These presumably contain a complete transcript including a start and stop codon and any other annotated features.

The finalize () method affords any annotation format the benefit of not annotating redundant features, such as introns, which are implied by the flanking coding sequences.


=cut

package iPE::Annotation::Transcript;
use iPE;
use iPE::Globals;
use iPE::Util::Overlap;
use iPE::Annotation::Feature;
use strict;

our $tx_id_ = 0;
sub _next_tx_id    { $tx_id_++."iPEtxID"; }

=head1 CONSTANTS

=over 8

=item Blacklist Values

These functions explain a reason why a transcript was thrown away, if it was.  The value of the blacklisted member variable is set to one of these if nonzero.

=over 8

=item OVERLAP 

The transcript has an overlap.

=item OUTOFBOUNDS

The transcript is out of the bounds of the underlying sequence (begin is < 0 or end > length of sequence).

=item NEGLENGTH

A feature was created with a greater or equal start coordinate than end coordinate.

=item AMBIGUOUS

There was a point in the transcript where in finalizing an unfilled feature between two states had more than one potential states.

=item NOGAP

There was a gap between two states for which there was no valid state in the defined HMM to fill it with.

=item NOFEATURES

There were no features in the transcript when it was brought to finalize.

=item BADTRANS

There was a bad transition between two features which butted up against each other.

=back

=cut
sub OVERLAP     { 1 }
sub OUTOFBOUNDS { 2 }
sub NEGLENGTH   { 3 }
sub AMBIGUOUS   { 4 }
sub NOGAP       { 5 } 
sub NOFEATURES  { 6 }
sub BADTRANS    { 7 }
=back

=head1 FUNCTIONS

=over 8

=item new ([transID, [geneID]])

Create a new empty transcript.  Add features to the transcript with the add (), or via a FeatureMapping.  You may pass a transcript ID to identify it and optionally, you may pass a geneID.  If no ID is passed, one will be assigned.

=cut

sub new {
    my ($class, $transID, $geneID) = @_;

    my $this = bless {}, $class;

    if (defined $transID && length($transID)) { $this->{id_} = $transID;       }
    else                                      { $this->{id_} = _next_tx_id     }

    if (defined $geneID && length($geneID))  { $this->{geneID_} = $geneID;     }
    else                                     { $this->{geneID_} = $this->{id_} }

    #$this->{overlap_} = new iPE::Util::Overlap;
    $this->{features_}    = [];
    $this->{min_}       = -1;
    $this->{max_}       = -1;
    $this->{finalized_} = 0;
    $this->{blacklisted_} = 0;

    return $this;
}

=item clone

Create a copy of an object instance.

=cut
sub clone {
    my ($this) = @_;
    my $clone = bless {%$this}, ref($this);
    #$clone->{overlap_} = undef;
    $clone->{features_} = [];
    for my $feature (@{$this->features}) {
        push @{$clone->{features_}}, $feature->clone;
    }

    return $clone;
}

sub overlap   { 
    my ($this) = @_;
    $this->{overlap_} = new iPE::Util::Overlap 
        unless(defined($this->{overlap_}));
    return $this->{overlap_}    
}

=item id ()

Return the transcript ID.  This will only return something if the transcript has been finalized.

=cut
sub id        { shift->{id_}         }

=item geneID ()

Return the gene ID of the transcript.  This may or may not be defined depending on the AnnotationPlugin.

=cut
sub geneID   { shift->{geneID_}     }

=item features ()

Return an array reference to the features in the transcript.  These are of type iPE::Annotation::Feature.

=cut
sub features  { shift->{features_}     }

=item min ()

Return the 5' most coordinate of the transcript for all annotated features.

=cut
sub min       { shift->{min_}        }

=item max ()

Return the 3' most coordinate of the transcript for all annotated features.

=cut
sub max       { shift->{max_}        }

=item finalized ()

Return 1 if the transcript has been finalized, 0 otherwise.  A finalized transcript is a transcript whose has all positions covered from the min to max coordinate, and whose features are all sorted in order of position.

=cut
sub finalized { shift->{finalized_}  }

=item blacklisted ()

If the transcript has become useless because it has an odd feature, it becomes blacklisted.  All blacklisted transcripts should be ignored.  The value of the blacklist is the reason for blacklisting.

=cut
sub blacklisted { shift->{blacklisted_} }

=item addFeature (state, start, end[, startFrame[, endFrame]])

Add a feature to the transcript.  The state is expected to be an iPE::State object.  The start and end coordinates are a 0-based index and cannot overlap.  The script dies if the feature overlaps with another in the transcript.

If a start frame or end frame is not suppled, it is assumed to be 0.

These can be added automatically with the iPE::FeatureMapping object.

Returns the newly created feature.

=cut
sub addFeature {
    my ($this, $state, $start, $end, $startFrame, $endFrame) = @_;

    return if $this->blacklisted;

    $startFrame = 0 unless defined ($startFrame);
    $endFrame   = 0 unless defined ($endFrame);

        my $feature = new iPE::Annotation::Feature(  
                                       { state      => $state,
                                         start      => $start,
                                         end        => $end,
                                         startFrame => $startFrame,
                                         endFrame   => $endFrame,
                                         transcript => $this});

    push (@{$this->{features_}}, $feature);

    if($this->min == -1 || $start < $this->min) { $this->{min_} = $start; }
    if($this->max == -1 || $end   > $this->max) { $this->{max_} = $end;   }

    return $feature;
}

=item removeFeature(feature)

Remove a feature from a Transcript.  Pass a reference to the feature you wish to remove.

=cut
sub removeFeature {
    my ($this, $feature) = @_;

    for (my $i = 0; $i < scalar(@{$this->features}); $i++) {
        if($this->features->[$i] == $feature) {
            my $last_feat = pop @{$this->features};
            return if ($last_feat == $feature);
            $this->features->[$i] = $last_feat;
            return;
        }
    }
}
=item finalize (length[, transID[, geneID]])

Finalize the transcript by filling in the gaps.  length is the length of the underlying sequence.  It is used to check that the transcript does not overstep the bounds of the sequence.

=cut
sub finalize { 
    my ($this, $length, $transID, $geneID) = @_;

    return if $this->blacklisted;

    if(!scalar(@{$this->features})) {
        $this->{blacklisted_} = NOFEATURES;
        Warn(__PACKAGE__.": Discarding transcript ".$this->id.
        "\nbecause it has no features which map to features defined\n");
        return;
    }
    if($this->min < 0 || $this->max > $length) {
        $this->{blacklisted_} = OUTOFBOUNDS;
        Warn(__PACKAGE__.": Discarding transcript ".$this->id."\nbecause ".
            "it steps out of the bounds of the underlying sequence\n");
        return;
    }

    $this->{features_} = 
        [ sort { $a->start <=> $b->start } @{$this->features} ];

    die "Empty transcript $transID\n" if !scalar(@{$this->features});

    my $g = new iPE::Globals();

    # the number of features will increase in this loop, we only 
    # want to examine the ones which are there initially.
    my $overlap = new iPE::Util::Overlap;
    my $num_features = scalar(@{$this->features});
    for(my $i = 1; $i < $num_features; $i++) {
        my $prev_feature = $this->features->[$i-1];
        my $cur_feature = $this->features->[$i];
        my $prev_state = $prev_feature->state;

        # detect any overlapping features.
        if($prev_feature->end > $cur_feature->start) {
            Warn(__PACKAGE__.": ".$cur_feature->state->name.
                " caused an overlap in transcript \n".$this->id.
                ".  Check the feature map file to insure that there are no ".
                "\npotential conflicts with this state.\n");
        }

        # make sure none of the features have a greater start than end coord
        if($cur_feature->start > $cur_feature->end) {
            Warn(__PACKAGE__.": Could not create a feature for transcript "
                .$this->id."\nwith state ".$cur_feature->state->name. 
                " because it has greater start coordinate than end oordinate.".
                "\nStart:$cur_feature->start End:$cur_feature->end\n");
            $this->{blacklist_} = NEGLENGTH;
            #return;  not fatal.
        }

        # if the features butt up against each other there is no need
        # to fill in the gap.
        if ($cur_feature->start - $prev_feature->end == 1) {
            # if the two states don't represent a valid transition, though,
            # we need to discard the transcript
            my $found = 0;
            for my $pot_cur_state_name (@{$prev_state->transitions->dests}) {
                if ($pot_cur_state_name eq $cur_feature->state->name) {
                    $found = 1;
                    last;
                }
            }
            next if($found);
            Warn(__PACKAGE__.": Transcript ".$this->id." caused states ".
                $prev_feature->state->name." and ".
                $cur_feature->state->name." to meet without a gap.\n".
                "There is no valid transition between these states.  ".
                $prev_feature->start." ".$cur_feature->end."\n");
            unless($g->options->allowBadTransitions) {
                $this->{blacklisted_} = BADTRANS
                Warn("It will be discarded.");
            }
            next;
            #return;  # this is not fatal -- if the user wants to ignore 
                      # blacklist then ok.
        }


        my $gap_state = undef;
        for my $pot_gap_state_name (@{$prev_state->transitions->dests}) {
            my $pot_gap_state = $g->state($pot_gap_state_name);
            for my $pot_cur_state_name (@{$pot_gap_state->transitions->dests}) {
                if ($pot_cur_state_name eq $cur_feature->state->name) {
                    if(defined $gap_state) {
                        Warn(__PACKAGE__.": Transcript ".$this->id." contains ".
                            "an ambiguous transition\nbetween two defined ".
                            "states, ".$prev_feature->state->name." and ".
                            $cur_feature->state->name.
                            ".  It will be discarded.\n");
                        $this->{blacklisted_} = AMBIGUOUS;
                        #return; not fatal
                    }
                    $gap_state = $pot_gap_state;
                }
            }
        }
        if(!defined $gap_state) {
            Warn(__PACKAGE__.": Transcript ".$this->id." contains a gap ".
                "between states\n".$prev_feature->state->name." and ".
                $cur_feature->state->name." for which there is no state ".
                "to fill the gap.\n");
            if($g->options->allowGappedTranscripts) {
                next;
            }
            else {
                Warn("It will be discarded.\n");
                $this->{blacklisted_} = NOGAP;
                return;
            }
        }
        die "No transitions for undefined feature after ".
            $prev_state->name."\n"
            if($prev_state->transitions->nDests < 1);

        if($this->{blacklisted_} != AMBIGUOUS) {
            my $gap_start = $prev_feature->end+1;
            my $gap_end   = $cur_feature->start-1;
            my $start_frame = $prev_feature->endFrame;
            my $end_frame = $prev_feature->endFrame;
            push (@{$this->{features_}}, 
                new iPE::Annotation::Feature({state      => $gap_state,
                                              start      => $gap_start,
                                              end        => $gap_end,
                                              startFrame => $start_frame,
                                              endFrame   => $end_frame,
                                              transcript => $this}) );
            msg("Gap filled from $gap_start to $gap_end with ".
                "state ".$gap_state->name."\n");

            my $gap_size = $gap_end-$gap_start+1;
            Warn("SHORT GAP: ".$gap_size." filling between ".
                $prev_feature->state->name." and ".
                $cur_feature->state->name."\n")
                if ($gap_size < 10);
        }
    }

    # all the filled in gaps are now at the end of the transcript, so 
    # resort the transcript again.
    $this->{features_} = 
        [ sort { $a->start <=> $b->start } @{$this->features} ];

    # Attempt to find a non-N strand in the transcript.  If none exists, oh well
    my $strand;
    for my $feature (@{$this->features}) {
        $strand = $feature->strand;
        #print STDERR $feature->state->name."\n";
        last if($strand ne 'N');
    }
    for my $feature (@{$this->features}) {
        if ($feature->state->strand ne $strand 
                && $feature->state->strand ne 'N') {
            die __PACKAGE__.": A transcript has a feature on the plus strand".
                " and\nthe minus strand.  Check your state definitions and".
                " transitions to make sure\nnothing is inconsistent\n";
        }
    }

    for my $feature (@{$this->features}) {
        $feature->setID($this->id);
    }

    $this->{overlap_} = undef;

    $this->{finalized_} = 1;
}

=item setLevels (levels)

Set the level of all the features of a transcript according to the iPE::GCContent object passed in.

=cut
sub setLevels {
    my ($this, $levels) = @_;

    for my $feature(@{$this->features}) {
        $feature->setLevel($levels);
    }
}

sub _format {
    my ($this) = @_;

    my $str = $this->id.":\n";
    for my $f (sort { $a->start <=> $b->start } @{$this->features}) {
        $str .= sprintf "\t".$f->state->name."\t".$f->start."\t".$f->end."\n";
    }

    return $str;

}

=head1 SEE ALSO

L<iPE::Feature>, L<iPE::FeatureMap>, L<iPE::Annotation>, L<iPE>

=head1 AUTHOR

Bob Zimmermann (rpz@cse.wustl.edu).

=cut

1;
