| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # BioPerl module for Bio::SeqIO::chaos | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Chris Mungall | 
| 5 |  |  |  |  |  |  | # | 
| 6 |  |  |  |  |  |  | # You may distribute this module under the same terms as perl itself | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | # POD documentation - main docs before the code | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | =head1 NAME | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | Bio::SeqIO::chaos - chaos sequence input/output stream | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | #In general you will not want to use this module directly; | 
| 17 |  |  |  |  |  |  | #use the chaosxml format via SeqIO | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | $outstream = Bio::SeqIO->new(-file => $filename, | 
| 20 |  |  |  |  |  |  | -format => 'chaosxml'); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | while ( my $seq = $instream->next_seq() ) { | 
| 23 |  |  |  |  |  |  | $outstream->write_seq($seq); | 
| 24 |  |  |  |  |  |  | } | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | This is the guts of L - please refer to the | 
| 29 |  |  |  |  |  |  | documentation for this module | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | B | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | ChaosXML is an XML mapping of the chado relational database; for more | 
| 34 |  |  |  |  |  |  | information, see http://www.fruitfly.org/chaos-xml | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | chaos can be represented in various syntaxes - XML, S-Expressions or | 
| 37 |  |  |  |  |  |  | indented text. You should see the relevant SeqIO file. You will | 
| 38 |  |  |  |  |  |  | probably want to use L, which is a wrapper to | 
| 39 |  |  |  |  |  |  | this module. | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | =head2 USING STAG OBJECTS | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | B | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | This module (in write mode) is an B - it generates XML | 
| 46 |  |  |  |  |  |  | events via the L module. If you only care about the final | 
| 47 |  |  |  |  |  |  | end-product xml, use L | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | You can treat the resulting chaos-xml stream as stag XML objects; | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | $outstream = Bio::SeqIO->new(-file => $filename, -format => 'chaos'); | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | while ( my $seq = $instream->next_seq() ) { | 
| 54 |  |  |  |  |  |  | $outstream->write_seq($seq); | 
| 55 |  |  |  |  |  |  | } | 
| 56 |  |  |  |  |  |  | my $chaos = $outstream->handler->stag; | 
| 57 |  |  |  |  |  |  | # stag provides get/set methods for xml elements | 
| 58 |  |  |  |  |  |  | # (these are chaos objects, not bioperl objects) | 
| 59 |  |  |  |  |  |  | my @features = $chaos->get_feature; | 
| 60 |  |  |  |  |  |  | my @feature_relationships = $chaos->get_feature_relationships; | 
| 61 |  |  |  |  |  |  | # stag objects can be queried with functional-programming | 
| 62 |  |  |  |  |  |  | # style queries | 
| 63 |  |  |  |  |  |  | my @features_in_range = | 
| 64 |  |  |  |  |  |  | $chaos->where('feature', | 
| 65 |  |  |  |  |  |  | sub { | 
| 66 |  |  |  |  |  |  | my $featureloc = shift->get_featureloc; | 
| 67 |  |  |  |  |  |  | $featureloc->strand == 1 && | 
| 68 |  |  |  |  |  |  | $featureloc->nbeg > 10000 && | 
| 69 |  |  |  |  |  |  | $featureloc->nend < 20000; | 
| 70 |  |  |  |  |  |  | }); | 
| 71 |  |  |  |  |  |  | foreach my $feature (@features_in_range) { | 
| 72 |  |  |  |  |  |  | my $featureloc = $feature->get_featureloc; | 
| 73 |  |  |  |  |  |  | printf "%s [%d->%d on %s]\n", | 
| 74 |  |  |  |  |  |  | $feature->sget_name, | 
| 75 |  |  |  |  |  |  | $featureloc->sget_nbeg, | 
| 76 |  |  |  |  |  |  | $featureloc->sget_end, | 
| 77 |  |  |  |  |  |  | $featureloc->sget_srcfeature_id; | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | =head1 MODULES REQUIRED | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | L | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | Downloadable from CPAN; see also http://stag.sourceforge.net | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | =head1 FEEDBACK | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | =head2 Mailing Lists | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | User feedback is an integral part of the evolution of this and other | 
| 91 |  |  |  |  |  |  | Bioperl modules. Send your comments and suggestions preferably to one | 
| 92 |  |  |  |  |  |  | of the Bioperl mailing lists.  Your participation is much appreciated. | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | bioperl-l@bioperl.org                  - General discussion | 
| 95 |  |  |  |  |  |  | http://bioperl.org/wiki/Mailing_lists  - About the mailing lists | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | =head2 Support | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | Please direct usage questions or support issues to the mailing list: | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | I | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | rather than to the module maintainer directly. Many experienced and | 
| 104 |  |  |  |  |  |  | reponsive experts will be able look at the problem and quickly | 
| 105 |  |  |  |  |  |  | address it. Please include a thorough description of the problem | 
| 106 |  |  |  |  |  |  | with code and data examples if at all possible. | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | =head2 Reporting Bugs | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | Report bugs to the Bioperl bug tracking system to help us keep track | 
| 111 |  |  |  |  |  |  | the bugs and their resolution. | 
| 112 |  |  |  |  |  |  | Bug reports can be submitted via the web: | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | https://github.com/bioperl/bioperl-live/issues | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | =head1 AUTHOR - Chris Mungall | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | Email cjm@fruitfly.org | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | =head1 APPENDIX | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | =cut | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | # Let the code begin... | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | package Bio::SeqIO::chaos; | 
| 129 | 2 |  |  | 2 |  | 511 | use strict; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 47 |  | 
| 130 |  |  |  |  |  |  |  | 
| 131 | 2 |  |  | 2 |  | 346 | use Bio::SeqFeature::Generic; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 41 |  | 
| 132 | 2 |  |  | 2 |  | 288 | use Bio::Species; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 42 |  | 
| 133 | 2 |  |  | 2 |  | 8 | use Bio::Seq::SeqFactory; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 35 |  | 
| 134 | 2 |  |  | 2 |  | 6 | use Bio::Annotation::Collection; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 29 |  | 
| 135 | 2 |  |  | 2 |  | 275 | use Bio::Annotation::Comment; | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 36 |  | 
| 136 | 2 |  |  | 2 |  | 290 | use Bio::Annotation::Reference; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 52 |  | 
| 137 | 2 |  |  | 2 |  | 10 | use Bio::Annotation::DBLink; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 33 |  | 
| 138 | 2 |  |  | 2 |  | 618 | use Bio::SeqFeature::Tools::TypeMapper; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 43 |  | 
| 139 | 2 |  |  | 2 |  | 543 | use Bio::SeqFeature::Tools::FeatureNamer; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 44 |  | 
| 140 | 2 |  |  | 2 |  | 516 | use Bio::SeqFeature::Tools::IDHandler; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 47 |  | 
| 141 | 2 |  |  | 2 |  | 10 | use Data::Stag qw(:all); | 
|  | 2 |  |  |  |  | 1 |  | 
|  | 2 |  |  |  |  | 1250 |  | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 2 |  |  | 2 |  | 9 | use base qw(Bio::SeqIO); | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 1563 |  | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | our $TM = 'Bio::SeqFeature::Tools::TypeMapper'; | 
| 146 |  |  |  |  |  |  | our $FNAMER = 'Bio::SeqFeature::Tools::FeatureNamer'; | 
| 147 |  |  |  |  |  |  | our $IDH = 'Bio::SeqFeature::Tools::IDHandler'; | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | sub _initialize { | 
| 150 | 1 |  |  | 1 |  | 3 | my($self,@args) = @_; | 
| 151 |  |  |  |  |  |  |  | 
| 152 | 1 |  |  |  |  | 6 | $self->SUPER::_initialize(@args); | 
| 153 | 1 | 50 |  |  |  | 5 | if( ! defined $self->sequence_factory ) { | 
| 154 | 1 |  |  |  |  | 3 | $self->sequence_factory(Bio::Seq::SeqFactory->new | 
| 155 |  |  |  |  |  |  | (-verbose => $self->verbose(), | 
| 156 |  |  |  |  |  |  | -type => 'Bio::Seq::RichSeq')); | 
| 157 |  |  |  |  |  |  | } | 
| 158 | 1 |  |  |  |  | 3 | my $wclass = $self->default_handler_class; | 
| 159 | 1 |  |  |  |  | 176 | $self->handler($wclass); | 
| 160 | 1 | 50 |  |  |  | 3 | if ($self->_fh) { | 
| 161 | 1 |  |  |  |  | 2 | $self->handler->fh($self->_fh); | 
| 162 |  |  |  |  |  |  | } | 
| 163 | 1 |  |  |  |  | 4 | $self->{_end_of_data} = 0; | 
| 164 | 1 |  |  |  |  | 4 | $self->_type_by_id_h({}); | 
| 165 | 1 |  |  |  |  | 5 | my $t = time; | 
| 166 | 1 |  |  |  |  | 105 | my $ppt = localtime $t; | 
| 167 | 1 |  |  |  |  | 2 | $self->handler->S("chaos"); | 
| 168 |  |  |  |  |  |  | $self->handler->ev(chaos_metadata=>[ | 
| 169 |  |  |  |  |  |  | [chaos_version=>1], | 
| 170 |  |  |  |  |  |  | [chaos_flavour=>'bioperl'], | 
| 171 |  |  |  |  |  |  | [feature_unique_key=>'feature_id'], | 
| 172 |  |  |  |  |  |  | [equiv_chado_release=>'chado_1_01'], | 
| 173 |  |  |  |  |  |  | [export_unixtime=>$t], | 
| 174 |  |  |  |  |  |  | [export_localtime=>$ppt], | 
| 175 |  |  |  |  |  |  | [export_host=>$ENV{HOST}], | 
| 176 |  |  |  |  |  |  | [export_user=>$ENV{USER}], | 
| 177 | 1 |  |  |  |  | 92 | [export_perl5lib=>$ENV{PERL5LIB}], | 
| 178 |  |  |  |  |  |  | [export_program=>$0], | 
| 179 |  |  |  |  |  |  | [export_module=>'Bio::SeqIO::chaos'], | 
| 180 |  |  |  |  |  |  | [export_module_cvs_id=>'$Id$'], | 
| 181 |  |  |  |  |  |  | ]); | 
| 182 |  |  |  |  |  |  |  | 
| 183 | 1 |  |  |  |  | 1130 | return; | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | sub DESTROY { | 
| 187 | 1 |  |  | 1 |  | 656 | my $self = shift; | 
| 188 | 1 |  |  |  |  | 5 | $self->end_of_data(); | 
| 189 | 1 |  |  |  |  | 65 | $self->SUPER::DESTROY(); | 
| 190 |  |  |  |  |  |  | } | 
| 191 |  |  |  |  |  |  |  | 
| 192 |  |  |  |  |  |  | sub end_of_data { | 
| 193 | 1 |  |  | 1 | 0 | 6 | my $self = shift; | 
| 194 | 1 | 50 |  |  |  | 4 | return if $self->{_end_of_data}; | 
| 195 | 1 |  |  |  |  | 2 | $self->{_end_of_data} = 1; | 
| 196 | 1 |  |  |  |  | 2 | $self->handler->E("chaos"); | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | sub default_handler_class { | 
| 200 | 0 |  |  | 0 | 0 | 0 | return Data::Stag->makehandler; | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | =head2 context_namespace | 
| 204 |  |  |  |  |  |  |  | 
| 205 |  |  |  |  |  |  | Title   : context_namespace | 
| 206 |  |  |  |  |  |  | Usage   : $obj->context_namespace($newval) | 
| 207 |  |  |  |  |  |  | Function: | 
| 208 |  |  |  |  |  |  | Example : | 
| 209 |  |  |  |  |  |  | Returns : value of context_namespace (a scalar) | 
| 210 |  |  |  |  |  |  | Args    : on set, new value (a scalar or undef, optional) | 
| 211 |  |  |  |  |  |  |  | 
| 212 |  |  |  |  |  |  | IDs will be preceded with the context namespace | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | =cut | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | sub context_namespace{ | 
| 217 | 58 |  |  | 58 | 1 | 38 | my $self = shift; | 
| 218 |  |  |  |  |  |  |  | 
| 219 | 58 | 50 |  |  |  | 68 | return $self->{'context_namespace'} = shift if @_; | 
| 220 | 58 |  |  |  |  | 83 | return $self->{'context_namespace'}; | 
| 221 |  |  |  |  |  |  | } | 
| 222 |  |  |  |  |  |  |  | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | =head2 next_seq | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | Title   : next_seq | 
| 227 |  |  |  |  |  |  | Usage   : $seq = $stream->next_seq() | 
| 228 |  |  |  |  |  |  | Function: returns the next sequence in the stream | 
| 229 |  |  |  |  |  |  | Returns : Bio::Seq object | 
| 230 |  |  |  |  |  |  | Args    : | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  | =cut | 
| 233 |  |  |  |  |  |  |  | 
| 234 |  |  |  |  |  |  | sub next_seq { | 
| 235 | 0 |  |  | 0 | 1 | 0 | my ($self,@args) = @_; | 
| 236 | 0 |  |  |  |  | 0 | my $seq = $self->sequence_factory->create | 
| 237 |  |  |  |  |  |  | ( | 
| 238 |  |  |  |  |  |  | #         '-verbose' =>$self->verbose(), | 
| 239 |  |  |  |  |  |  | #	 %params, | 
| 240 |  |  |  |  |  |  | #	 -seq => $seqc, | 
| 241 |  |  |  |  |  |  | #	 -annotation => $annotation, | 
| 242 |  |  |  |  |  |  | #	 -features => \@features | 
| 243 |  |  |  |  |  |  | ); | 
| 244 | 0 |  |  |  |  | 0 | return $seq; | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | sub handler { | 
| 248 | 63 |  |  | 63 | 0 | 53 | my $self = shift; | 
| 249 | 63 | 100 |  |  |  | 81 | $self->{_handler} = shift if @_; | 
| 250 | 63 |  |  |  |  | 141 | return $self->{_handler}; | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  |  | 
| 254 |  |  |  |  |  |  | =head2 write_seq | 
| 255 |  |  |  |  |  |  |  | 
| 256 |  |  |  |  |  |  | Title   : write_seq | 
| 257 |  |  |  |  |  |  | Usage   : $stream->write_seq($seq) | 
| 258 |  |  |  |  |  |  | Function: writes the $seq object (must be seq) to the stream | 
| 259 |  |  |  |  |  |  | Returns : 1 for success and 0 for error | 
| 260 |  |  |  |  |  |  | Args    : Bio::Seq | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  |  | 
| 263 |  |  |  |  |  |  | =cut | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | sub write_seq { | 
| 266 | 1 |  |  | 1 | 1 | 7 | my ($self,$seq) = @_; | 
| 267 |  |  |  |  |  |  |  | 
| 268 | 1 | 50 |  |  |  | 4 | if( !defined $seq ) { | 
| 269 | 0 |  |  |  |  | 0 | $self->throw("Attempting to write with no seq!"); | 
| 270 |  |  |  |  |  |  | } | 
| 271 |  |  |  |  |  |  |  | 
| 272 | 1 | 50 | 33 |  |  | 7 | if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) { | 
| 273 | 0 |  |  |  |  | 0 | $self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!"); | 
| 274 |  |  |  |  |  |  | } | 
| 275 |  |  |  |  |  |  |  | 
| 276 |  |  |  |  |  |  | # get a handler - must inherit from Data::Stag::BaseHandler; | 
| 277 | 1 |  |  |  |  | 3 | my $w = $self->handler; | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | # start of data | 
| 280 |  |  |  |  |  |  | ###    $w->S("chaos_block"); | 
| 281 |  |  |  |  |  |  |  | 
| 282 | 1 |  |  |  |  | 1 | my $seq_chaos_feature_id; | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | # different seq objects have different version accessors - | 
| 285 |  |  |  |  |  |  | # weird but true | 
| 286 | 1 | 50 |  |  |  | 9 | my $version = $seq->can('seq_version') ? $seq->seq_version : $seq->version; | 
| 287 |  |  |  |  |  |  |  | 
| 288 | 1 |  |  |  |  | 11 | my $accversion = $seq->accession_number; | 
| 289 | 1 | 50 |  |  |  | 2 | if ($version) { | 
| 290 | 1 |  |  |  |  | 3 | $accversion .= ".$version"; | 
| 291 |  |  |  |  |  |  | } | 
| 292 |  |  |  |  |  |  |  | 
| 293 | 1 | 50 |  |  |  | 2 | if ($accversion) { | 
| 294 | 1 |  |  |  |  | 2 | $seq_chaos_feature_id = $accversion; | 
| 295 |  |  |  |  |  |  | } | 
| 296 |  |  |  |  |  |  | else { | 
| 297 | 0 |  |  |  |  | 0 | $seq_chaos_feature_id = $self->get_chaos_feature_id($seq); | 
| 298 | 0 |  |  |  |  | 0 | $accversion = $seq_chaos_feature_id; | 
| 299 |  |  |  |  |  |  | } | 
| 300 |  |  |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | # All ids must have a namespace prefix | 
| 302 | 1 | 50 |  |  |  | 3 | if ($seq_chaos_feature_id !~ /:/) { | 
| 303 | 1 |  |  |  |  | 2 | $seq_chaos_feature_id = "GenericSeqDB:$seq_chaos_feature_id"; | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | #    if ($seq->accession_number eq 'unknown') { | 
| 307 |  |  |  |  |  |  | #        $seq_chaos_feature_id = $self->get_chaos_feature_id('contig', $seq); | 
| 308 |  |  |  |  |  |  | #    } | 
| 309 |  |  |  |  |  |  |  | 
| 310 | 1 |  |  |  |  | 1 | my $haplotype; | 
| 311 | 1 | 50 |  |  |  | 4 | if ($seq->desc =~ /haplotype(.*)/i) { | 
| 312 |  |  |  |  |  |  | # yikes, no consistent way to specify haplotype in gb | 
| 313 | 0 |  |  |  |  | 0 | $haplotype = $1; | 
| 314 | 0 |  |  |  |  | 0 | $haplotype =~ s/\s+/_/g; | 
| 315 | 0 |  |  |  |  | 0 | $haplotype =~ s/\W+//g; | 
| 316 |  |  |  |  |  |  | } | 
| 317 |  |  |  |  |  |  |  | 
| 318 | 1 |  |  |  |  | 1 | my $OS; | 
| 319 |  |  |  |  |  |  | # Organism lines | 
| 320 | 1 | 50 |  |  |  | 5 | if (my $spec = $seq->species) { | 
| 321 | 1 |  |  |  |  | 6 | my ($species, $genus, @class) = $spec->classification(); | 
| 322 | 1 |  |  |  |  | 3 | $OS = "$genus $species"; | 
| 323 | 1 | 50 |  |  |  | 4 | if (my $ssp = $spec->sub_species) { | 
| 324 | 0 |  |  |  |  | 0 | $OS .= " $ssp"; | 
| 325 |  |  |  |  |  |  | } | 
| 326 | 1 |  |  |  |  | 5 | $self->genus_species($OS); | 
| 327 | 1 | 50 |  |  |  | 3 | if( $spec->common_name ) { | 
| 328 | 1 |  |  |  |  | 3 | my $common = $spec->common_name; | 
| 329 |  |  |  |  |  |  | # genbank parser sets species->common_name to | 
| 330 |  |  |  |  |  |  | # be "Genus Species (common name)" which is wrong; | 
| 331 |  |  |  |  |  |  | # we will correct for this; if common_name is set | 
| 332 |  |  |  |  |  |  | # correctly then carry on | 
| 333 | 1 | 50 |  |  |  | 6 | if ($common =~ /\((.*)\)/) { | 
| 334 | 0 |  |  |  |  | 0 | $common = $1; | 
| 335 |  |  |  |  |  |  | } | 
| 336 | 1 |  |  |  |  | 5 | $OS .= " (".$common.")"; | 
| 337 |  |  |  |  |  |  | } | 
| 338 |  |  |  |  |  |  | } | 
| 339 | 1 | 50 |  |  |  | 3 | if ($OS) { | 
| 340 | 1 |  |  |  |  | 3 | $self->organismstr($OS); | 
| 341 |  |  |  |  |  |  | } | 
| 342 | 1 | 50 |  |  |  | 2 | if ($haplotype) { | 
| 343 |  |  |  |  |  |  | # genus_species is part of uniquename - add haplotype | 
| 344 |  |  |  |  |  |  | # to make it genuinely unique | 
| 345 | 0 |  |  |  |  | 0 | $self->genus_species($self->genus_species .= " $haplotype"); | 
| 346 |  |  |  |  |  |  | } | 
| 347 |  |  |  |  |  |  |  | 
| 348 | 1 |  |  |  |  | 3 | my $uname = $self->make_uniquename($self->genus_species, $accversion); | 
| 349 |  |  |  |  |  |  |  | 
| 350 |  |  |  |  |  |  | # data structure representing the core sequence for this record | 
| 351 | 1 |  |  |  |  | 7 | my $seqnode = | 
| 352 |  |  |  |  |  |  | Data::Stag->new(feature=>[ | 
| 353 |  |  |  |  |  |  | [feature_id=>$seq_chaos_feature_id], | 
| 354 |  |  |  |  |  |  | [dbxrefstr=>'SEQDB:'.$accversion], | 
| 355 |  |  |  |  |  |  | [name=>$seq->display_name], | 
| 356 |  |  |  |  |  |  | [uniquename=>$uname], | 
| 357 |  |  |  |  |  |  | [residues=>$seq->seq], | 
| 358 |  |  |  |  |  |  | ]); | 
| 359 |  |  |  |  |  |  |  | 
| 360 |  |  |  |  |  |  | # soft properties | 
| 361 | 1 |  |  |  |  | 13 | my %prop = (); | 
| 362 |  |  |  |  |  |  |  | 
| 363 | 1 |  |  |  |  | 14 | $seqnode->set_type('databank_entry'); | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | map { | 
| 366 | 1 | 50 |  |  |  | 92 | $prop{$_} = $seq->$_() if $seq->can($_); | 
|  | 5 |  |  |  |  | 24 |  | 
| 367 |  |  |  |  |  |  | } qw(desc keywords division molecule is_circular); | 
| 368 | 1 | 50 |  |  |  | 6 | $prop{dates} = join("; ", $seq->get_dates) if $seq->can("get_dates"); | 
| 369 |  |  |  |  |  |  |  | 
| 370 | 1 |  |  |  |  | 5 | local($^W) = 0;   # supressing warnings about uninitialized fields. | 
| 371 |  |  |  |  |  |  |  | 
| 372 |  |  |  |  |  |  | # Reference lines | 
| 373 | 1 |  |  |  |  | 1 | my $count = 1; | 
| 374 | 1 |  |  |  |  | 2 | foreach my $ref ( $seq->annotation->get_Annotations('reference') ) { | 
| 375 |  |  |  |  |  |  | # TODO | 
| 376 |  |  |  |  |  |  | } | 
| 377 |  |  |  |  |  |  | # Comment lines | 
| 378 |  |  |  |  |  |  |  | 
| 379 | 1 | 50 |  |  |  | 3 | $seqnode->add_featureprop([[type=>'haplotype'],[value=>$haplotype]]) | 
| 380 |  |  |  |  |  |  | if $haplotype; | 
| 381 | 1 |  |  |  |  | 3 | foreach my $comment ( $seq->annotation->get_Annotations('comment') ) { | 
| 382 | 1 |  |  |  |  | 10 | $seqnode->add_featureprop([[type=>'comment'],[value=>$comment->text]]); | 
| 383 |  |  |  |  |  |  | } | 
| 384 | 1 | 50 |  |  |  | 86 | if ($OS) { | 
| 385 | 1 |  |  |  |  | 6 | $seqnode->set_organismstr($OS); | 
| 386 |  |  |  |  |  |  | } | 
| 387 |  |  |  |  |  |  |  | 
| 388 | 1 |  |  |  |  | 71 | my @sfs = $seq->get_SeqFeatures; | 
| 389 |  |  |  |  |  |  |  | 
| 390 |  |  |  |  |  |  | # genbank usually includes a 'source' feature - we just | 
| 391 |  |  |  |  |  |  | # migrate the data from this to the actual source feature | 
| 392 | 1 |  |  |  |  | 2 | my @sources = grep {$_->primary_tag eq 'source'} @sfs; | 
|  | 58 |  |  |  |  | 67 |  | 
| 393 | 1 |  |  |  |  | 2 | @sfs = grep {$_->primary_tag ne 'source'} @sfs; | 
|  | 58 |  |  |  |  | 64 |  | 
| 394 | 1 | 50 |  |  |  | 3 | $self->throw(">1 source types") if @sources > 1; | 
| 395 | 1 |  |  |  |  | 2 | my $source = shift @sources; | 
| 396 | 1 | 50 |  |  |  | 3 | if ($source) { | 
| 397 |  |  |  |  |  |  |  | 
| 398 | 1 |  |  |  |  | 7 | my $tempw = Data::Stag->makehandler; | 
| 399 | 1 |  |  |  |  | 65 | $self->write_sf($source, $seq_chaos_feature_id, $tempw); | 
| 400 | 1 |  |  |  |  | 2 | my $snode = $tempw->stag; | 
| 401 |  |  |  |  |  |  | $seqnode->add($_->name, $_->data) | 
| 402 | 1 |  |  |  |  | 11 | foreach ($snode->get_featureprop, | 
| 403 |  |  |  |  |  |  | $snode->get_feature_dbxref); | 
| 404 |  |  |  |  |  |  |  | 
| 405 |  |  |  |  |  |  | } | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  |  | 
| 408 |  |  |  |  |  |  | # throw the writer an event | 
| 409 | 1 |  |  |  |  | 614 | $w->ev(@$seqnode); | 
| 410 |  |  |  |  |  |  |  | 
| 411 | 1 |  |  |  |  | 4143 | $seqnode = undef;      # free memory | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | # make events for all the features within the record | 
| 414 | 1 |  |  |  |  | 8 | foreach my $sf ( @sfs ) { | 
| 415 | 57 |  |  |  |  | 133 | $FNAMER->name_feature($sf); | 
| 416 | 57 |  |  |  |  | 106 | $FNAMER->name_contained_features($sf); | 
| 417 | 57 |  |  |  |  | 88 | $self->write_sf($sf, $seq_chaos_feature_id); | 
| 418 |  |  |  |  |  |  | } | 
| 419 |  |  |  |  |  |  |  | 
| 420 |  |  |  |  |  |  | # data end | 
| 421 |  |  |  |  |  |  | ### $w->E("chaos_block"); | 
| 422 | 1 |  |  |  |  | 12 | return 1; | 
| 423 |  |  |  |  |  |  | } | 
| 424 |  |  |  |  |  |  |  | 
| 425 |  |  |  |  |  |  |  | 
| 426 |  |  |  |  |  |  | sub organismstr{ | 
| 427 | 115 |  |  | 115 | 0 | 88 | my $self = shift; | 
| 428 |  |  |  |  |  |  |  | 
| 429 | 115 | 100 |  |  |  | 137 | return $self->{'organismstr'} = shift if @_; | 
| 430 | 114 |  |  |  |  | 184 | return $self->{'organismstr'}; | 
| 431 |  |  |  |  |  |  | } | 
| 432 |  |  |  |  |  |  |  | 
| 433 |  |  |  |  |  |  |  | 
| 434 |  |  |  |  |  |  | sub genus_species{ | 
| 435 | 115 |  |  | 115 | 0 | 75 | my $self = shift; | 
| 436 |  |  |  |  |  |  |  | 
| 437 | 115 | 100 |  |  |  | 132 | return $self->{'genus_species'} = shift if @_; | 
| 438 | 114 |  |  |  |  | 259 | return $self->{'genus_species'}; | 
| 439 |  |  |  |  |  |  | } | 
| 440 |  |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  | # maps ID to type | 
| 443 |  |  |  |  |  |  | sub _type_by_id_h { | 
| 444 | 59 |  |  | 59 |  | 63 | my $self = shift; | 
| 445 | 59 | 100 |  |  |  | 74 | $self->{_type_by_id_h} = shift if @_; | 
| 446 | 59 |  |  |  |  | 133 | return $self->{_type_by_id_h}; | 
| 447 |  |  |  |  |  |  | } | 
| 448 |  |  |  |  |  |  |  | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | # ---- | 
| 452 |  |  |  |  |  |  | # writes a seq feature | 
| 453 |  |  |  |  |  |  | # ---- | 
| 454 |  |  |  |  |  |  |  | 
| 455 |  |  |  |  |  |  | sub write_sf { | 
| 456 | 58 |  |  | 58 | 0 | 41 | my $self = shift; | 
| 457 | 58 |  |  |  |  | 39 | my $sf = shift; | 
| 458 | 58 |  |  |  |  | 49 | my $seq_chaos_feature_id = shift; | 
| 459 | 58 |  | 66 |  |  | 115 | my $w = shift || $self->handler; | 
| 460 |  |  |  |  |  |  |  | 
| 461 |  |  |  |  |  |  | my %props = | 
| 462 |  |  |  |  |  |  | map { | 
| 463 | 58 |  |  |  |  | 102 | lc($_)=>[$sf->each_tag_value($_)] | 
|  | 244 |  |  |  |  | 332 |  | 
| 464 |  |  |  |  |  |  | } $sf->all_tags; | 
| 465 |  |  |  |  |  |  |  | 
| 466 | 58 |  |  |  |  | 134 | my $loc = $sf->location; | 
| 467 | 58 |  |  |  |  | 101 | my $name = $FNAMER->generate_feature_name($sf); | 
| 468 | 58 |  |  |  |  | 81 | my $type = $sf->primary_tag; | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  | # The CDS (eg in a genbank feature) implicitly represents | 
| 471 |  |  |  |  |  |  | # the protein | 
| 472 | 58 |  |  |  |  | 112 | $type =~ s/CDS/polypeptide/; | 
| 473 |  |  |  |  |  |  |  | 
| 474 | 58 |  |  |  |  | 78 | my @subsfs = $sf->sub_SeqFeature; | 
| 475 | 58 |  |  |  |  | 56 | my @locnodes = (); | 
| 476 | 58 | 50 |  |  |  | 107 | my $sid = $loc->is_remote ? $loc->seq_id : $seq_chaos_feature_id; | 
| 477 |  |  |  |  |  |  |  | 
| 478 | 58 |  |  |  |  | 45 | my $CREATE_SPLIT_SFS = 0; | 
| 479 |  |  |  |  |  |  |  | 
| 480 | 58 | 50 | 33 |  |  | 312 | if($CREATE_SPLIT_SFS && | 
|  |  | 50 |  |  |  |  |  | 
| 481 |  |  |  |  |  |  | $loc->isa("Bio::Location::SplitLocationI") ) { | 
| 482 |  |  |  |  |  |  | # turn splitlocs into subfeatures | 
| 483 | 0 |  |  |  |  | 0 | my $n = 1; | 
| 484 |  |  |  |  |  |  | push(@subsfs, | 
| 485 |  |  |  |  |  |  | map { | 
| 486 | 0 |  |  |  |  | 0 | my $ssf = | 
|  | 0 |  |  |  |  | 0 |  | 
| 487 |  |  |  |  |  |  | Bio::SeqFeature::Generic->new( | 
| 488 |  |  |  |  |  |  |  | 
| 489 |  |  |  |  |  |  | -start=>$_->start, | 
| 490 |  |  |  |  |  |  | -end=>$_->end, | 
| 491 |  |  |  |  |  |  | -strand=>$_->strand, | 
| 492 |  |  |  |  |  |  | -primary=>$self->subpartof($type), | 
| 493 |  |  |  |  |  |  | ); | 
| 494 | 0 | 0 |  |  |  | 0 | if ($_->is_remote) { | 
| 495 | 0 |  |  |  |  | 0 | $ssf->location->is_remote(1); | 
| 496 | 0 |  |  |  |  | 0 | $ssf->location->seq_id($_->seq_id); | 
| 497 |  |  |  |  |  |  | } | 
| 498 | 0 |  |  |  |  | 0 | $ssf; | 
| 499 |  |  |  |  |  |  | } $loc->each_Location); | 
| 500 |  |  |  |  |  |  | } | 
| 501 |  |  |  |  |  |  | elsif( $loc->isa("Bio::Location::RemoteLocationI") ) { | 
| 502 |  |  |  |  |  |  | # turn splitlocs into subfeatures | 
| 503 | 0 |  |  |  |  | 0 | my $n = 1; | 
| 504 |  |  |  |  |  |  | push(@subsfs, | 
| 505 |  |  |  |  |  |  | map { | 
| 506 | 0 |  |  |  |  | 0 | Bio::SeqFeature::Generic->new( | 
|  | 0 |  |  |  |  | 0 |  | 
| 507 |  |  |  |  |  |  | #                                               -name=>$name.'.'.$n++, | 
| 508 |  |  |  |  |  |  | -start=>$_->start, | 
| 509 |  |  |  |  |  |  | -end=>$_->end, | 
| 510 |  |  |  |  |  |  | -strand=>$_->strand, | 
| 511 |  |  |  |  |  |  | -primary=>$self->subpartof($type), | 
| 512 |  |  |  |  |  |  | ) | 
| 513 |  |  |  |  |  |  | } $loc->each_Location); | 
| 514 |  |  |  |  |  |  | } | 
| 515 |  |  |  |  |  |  | else { | 
| 516 | 58 |  |  |  |  | 91 | my ($beg, $end, $strand) = $self->bp2ib($loc); | 
| 517 | 58 | 50 |  |  |  | 87 | if (!$strand) { | 
| 518 | 2 |  |  | 2 |  | 164 | use Data::Dumper; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 1713 |  | 
| 519 | 0 |  |  |  |  | 0 | print Dumper $sf, $loc; | 
| 520 | 0 |  |  |  |  | 0 | $self->throw("($beg, $end, $strand) - no strand\n"); | 
| 521 |  |  |  |  |  |  | } | 
| 522 |  |  |  |  |  |  | @locnodes = ( | 
| 523 | 58 |  |  |  |  | 195 | [featureloc=>[ | 
| 524 |  |  |  |  |  |  | [nbeg=>$beg], | 
| 525 |  |  |  |  |  |  | [nend=>$end], | 
| 526 |  |  |  |  |  |  | [strand=>$strand], | 
| 527 |  |  |  |  |  |  | [srcfeature_id=>$sid], | 
| 528 |  |  |  |  |  |  | [locgroup=>0], | 
| 529 |  |  |  |  |  |  | [rank=>0], | 
| 530 |  |  |  |  |  |  | ] | 
| 531 |  |  |  |  |  |  | ] | 
| 532 |  |  |  |  |  |  | ); | 
| 533 |  |  |  |  |  |  | } | 
| 534 | 58 |  |  |  |  | 102 | my $feature_id = $self->get_chaos_feature_id($sf); | 
| 535 |  |  |  |  |  |  |  | 
| 536 | 58 | 50 |  |  |  | 83 | delete $props{id} if $props{id}; | 
| 537 |  |  |  |  |  |  | # do something with genbank stuff | 
| 538 | 58 |  |  |  |  | 44 | my $pid = $props{'protein_id'}; | 
| 539 | 58 |  |  |  |  | 48 | my $tn = $props{'translation'}; | 
| 540 | 58 | 100 |  |  |  | 40 | my @xrefs = @{$props{'db_xref'} || []}; | 
|  | 58 |  |  |  |  | 111 |  | 
| 541 | 58 | 100 |  |  |  | 90 | if ($pid) { | 
| 542 | 14 |  |  |  |  | 22 | push(@xrefs, "protein:$pid->[0]"); | 
| 543 |  |  |  |  |  |  | } | 
| 544 |  |  |  |  |  |  |  | 
| 545 | 58 | 100 |  |  |  | 66 | my $org = $props{organism} ? $props{organism}->[0] : undef; | 
| 546 | 58 | 100 | 66 |  |  | 116 | if (!$org && $self->organismstr) { | 
| 547 | 57 |  |  |  |  | 69 | $org = $self->organismstr; | 
| 548 |  |  |  |  |  |  | } | 
| 549 | 58 | 100 |  |  |  | 94 | my $uname = $name ? $name.'/'.$feature_id : $feature_id; | 
| 550 | 58 | 100 | 66 |  |  | 76 | if ($self->genus_species && $name) { | 
| 551 | 55 |  |  |  |  | 62 | $uname = $self->make_uniquename($self->genus_species, $name); | 
| 552 |  |  |  |  |  |  | } | 
| 553 | 58 | 50 |  |  |  | 93 | if (!$uname) { | 
| 554 | 0 |  |  |  |  | 0 | $self->throw("cannot make uniquename for $feature_id $name"); | 
| 555 |  |  |  |  |  |  | } | 
| 556 | 58 |  |  |  |  | 89 | $self->_type_by_id_h->{$feature_id} = $type; | 
| 557 |  |  |  |  |  |  | my $fnode = | 
| 558 |  |  |  |  |  |  | [feature=>[ | 
| 559 |  |  |  |  |  |  | [feature_id=>$feature_id], | 
| 560 |  |  |  |  |  |  | $name ? ([name=>$name]) : (), | 
| 561 |  |  |  |  |  |  | [uniquename=>$uname], | 
| 562 |  |  |  |  |  |  | [type=>$type], | 
| 563 |  |  |  |  |  |  | $tn ? ([residues=>$tn->[0]], | 
| 564 |  |  |  |  |  |  | [seqlen=>length($tn->[0])], | 
| 565 |  |  |  |  |  |  | #####[md5checksum=>md5checksum($tn->[0])], | 
| 566 |  |  |  |  |  |  | ) :(), | 
| 567 |  |  |  |  |  |  | $org ? ([organismstr=>$org]) : (), | 
| 568 |  |  |  |  |  |  | @locnodes, | 
| 569 |  |  |  |  |  |  | (map { | 
| 570 | 75 |  |  |  |  | 188 | [feature_dbxref=>[ | 
| 571 |  |  |  |  |  |  | [dbxrefstr=>$_] | 
| 572 |  |  |  |  |  |  | ] | 
| 573 |  |  |  |  |  |  | ] | 
| 574 |  |  |  |  |  |  | } @xrefs), | 
| 575 |  |  |  |  |  |  | (map { | 
| 576 | 58 | 100 |  |  |  | 238 | my $k = $_; | 
|  | 244 | 100 |  |  |  | 165 |  | 
|  |  | 50 |  |  |  |  |  | 
| 577 | 244 |  |  |  |  | 132 | my $rank=0; | 
| 578 | 244 |  |  |  |  | 131 | map { [featureprop=>[[type=>$k],[value=>$_],[rank=>$rank++]]] } @{$props{$k}} | 
|  | 258 |  |  |  |  | 706 |  | 
|  | 244 |  |  |  |  | 216 |  | 
| 579 |  |  |  |  |  |  | } keys %props), | 
| 580 |  |  |  |  |  |  | ]]; | 
| 581 | 58 |  |  |  |  | 182 | $w->ev(@$fnode); | 
| 582 |  |  |  |  |  |  |  | 
| 583 | 58 |  |  |  |  | 157024 | my $rank = 0; | 
| 584 | 58 | 50 |  |  |  | 99 | if (@subsfs) { | 
| 585 |  |  |  |  |  |  | # strand is always determined by FIRST feature listed | 
| 586 |  |  |  |  |  |  | # (see genbank entry for trans-spliced mod(mdg4) AE003734) | 
| 587 | 0 |  |  |  |  | 0 | my $strand = $subsfs[0]; | 
| 588 |  |  |  |  |  |  |  | 
| 589 |  |  |  |  |  |  | # almost all the time, all features are on same strand | 
| 590 | 0 |  |  |  |  | 0 | my @sfs_on_main_strand = grep {$_->strand == $strand} @subsfs; | 
|  | 0 |  |  |  |  | 0 |  | 
| 591 | 0 |  |  |  |  | 0 | my @sfs_on_other_strand = grep {$_->strand != $strand} @subsfs; | 
|  | 0 |  |  |  |  | 0 |  | 
| 592 |  |  |  |  |  |  |  | 
| 593 | 0 |  |  |  |  | 0 | sort_by_strand($strand, \@sfs_on_main_strand); | 
| 594 | 0 |  |  |  |  | 0 | sort_by_strand(0-$strand, \@sfs_on_other_strand); | 
| 595 | 0 |  |  |  |  | 0 | @subsfs = (@sfs_on_main_strand, @sfs_on_other_strand); | 
| 596 |  |  |  |  |  |  |  | 
| 597 | 0 |  |  |  |  | 0 | foreach my $ssf (@subsfs) { | 
| 598 | 0 |  |  |  |  | 0 | my $ssfid = $self->write_sf($ssf, $sid); | 
| 599 |  |  |  |  |  |  | #my $rtype = 'part_of'; | 
| 600 | 0 |  |  |  |  | 0 | my $rtype = | 
| 601 |  |  |  |  |  |  | $TM->get_relationship_type_by_parent_child($sf,$ssf); | 
| 602 | 0 | 0 |  |  |  | 0 | if ($ssf->primary_tag eq 'CDS') { | 
| 603 | 0 |  |  |  |  | 0 | $rtype = 'derives_from'; | 
| 604 |  |  |  |  |  |  | } | 
| 605 | 0 |  |  |  |  | 0 | $w->ev(feature_relationship=>[ | 
| 606 |  |  |  |  |  |  | [subject_id=>$ssfid], | 
| 607 |  |  |  |  |  |  | [object_id=>$feature_id], | 
| 608 |  |  |  |  |  |  | [type=>$rtype], | 
| 609 |  |  |  |  |  |  | [rank=>$rank++], | 
| 610 |  |  |  |  |  |  | ] | 
| 611 |  |  |  |  |  |  | ); | 
| 612 |  |  |  |  |  |  | } | 
| 613 |  |  |  |  |  |  | } | 
| 614 |  |  |  |  |  |  | else { | 
| 615 |  |  |  |  |  |  | # parents not stored as bioperl containment hierarchy | 
| 616 | 58 | 50 |  |  |  | 44 | my @parent_ids = @{$props{parent} || []}; | 
|  | 58 |  |  |  |  | 230 |  | 
| 617 | 58 |  |  |  |  | 86 | foreach my $parent_id (@parent_ids) { | 
| 618 |  |  |  |  |  |  | my $ptype = | 
| 619 | 0 |  | 0 |  |  | 0 | $self->_type_by_id_h->{$parent_id} || 'unknown'; | 
| 620 | 0 |  |  |  |  | 0 | my $rtype = | 
| 621 |  |  |  |  |  |  | $TM->get_relationship_type_by_parent_child($ptype,$type); | 
| 622 | 0 |  |  |  |  | 0 | $w->ev(feature_relationship=>[ | 
| 623 |  |  |  |  |  |  | [subject_id=>$feature_id], | 
| 624 |  |  |  |  |  |  | [object_id=>$parent_id], | 
| 625 |  |  |  |  |  |  | [type=>$rtype], | 
| 626 |  |  |  |  |  |  | [rank=>$rank++], | 
| 627 |  |  |  |  |  |  | ] | 
| 628 |  |  |  |  |  |  | ); | 
| 629 |  |  |  |  |  |  | } | 
| 630 |  |  |  |  |  |  | } | 
| 631 | 58 |  |  |  |  | 469 | return $feature_id; | 
| 632 |  |  |  |  |  |  | } | 
| 633 |  |  |  |  |  |  |  | 
| 634 |  |  |  |  |  |  | sub sort_by_strand { | 
| 635 | 0 |  | 0 | 0 | 0 | 0 | my $strand = shift || 1; | 
| 636 | 0 |  |  |  |  | 0 | my $sfs = shift; | 
| 637 | 0 |  |  |  |  | 0 | @$sfs = sort { ($a->start <=> $b->start) * $strand } @$sfs; | 
|  | 0 |  |  |  |  | 0 |  | 
| 638 | 0 |  |  |  |  | 0 | return; | 
| 639 |  |  |  |  |  |  | } | 
| 640 |  |  |  |  |  |  |  | 
| 641 |  |  |  |  |  |  | sub make_uniquename { | 
| 642 | 56 |  |  | 56 | 0 | 36 | my $self = shift; | 
| 643 | 56 |  |  |  |  | 45 | my $org = shift; | 
| 644 | 56 |  |  |  |  | 42 | my $name = shift; | 
| 645 |  |  |  |  |  |  |  | 
| 646 | 56 |  |  |  |  | 48 | my $os = $org; | 
| 647 | 56 |  |  |  |  | 256 | $os =~ s/\s+/_/g; | 
| 648 | 56 |  |  |  |  | 71 | $os =~ s/\(/_/g; | 
| 649 | 56 |  |  |  |  | 38 | $os =~ s/\)/_/g; | 
| 650 | 56 |  |  |  |  | 135 | $os =~ s/_+/_/g; | 
| 651 | 56 |  |  |  |  | 42 | $os =~ s/^_+//g; | 
| 652 | 56 |  |  |  |  | 84 | $os =~ s/_+$//g; | 
| 653 | 56 |  |  |  |  | 81 | return "$os:$name"; | 
| 654 |  |  |  |  |  |  | } | 
| 655 |  |  |  |  |  |  |  | 
| 656 |  |  |  |  |  |  |  | 
| 657 |  |  |  |  |  |  | sub get_chaos_feature_id { | 
| 658 | 58 |  |  | 58 | 0 | 44 | my $self = shift; | 
| 659 | 58 |  |  |  |  | 31 | my $ob = shift; | 
| 660 |  |  |  |  |  |  |  | 
| 661 | 58 |  |  |  |  | 37 | my $id; | 
| 662 | 58 | 50 |  |  |  | 172 | if ($ob->isa("Bio::SeqI")) { | 
| 663 | 0 | 0 |  |  |  | 0 | $id = $ob->accession_number . '.' . ($ob->can('seq_version') ? $ob->seq_version : $ob->version); | 
| 664 |  |  |  |  |  |  | } | 
| 665 |  |  |  |  |  |  | else { | 
| 666 | 58 | 50 |  |  |  | 110 | $ob->isa("Bio::SeqFeatureI") || $self->throw("$ob must be either SeqI or SeqFeatureI"); | 
| 667 |  |  |  |  |  |  |  | 
| 668 | 58 | 50 |  |  |  | 95 | if ($ob->primary_id) { | 
| 669 | 0 |  |  |  |  | 0 | $id = $ob->primary_id; | 
| 670 |  |  |  |  |  |  | } | 
| 671 |  |  |  |  |  |  | else { | 
| 672 | 58 |  |  |  |  | 55 | eval { | 
| 673 | 58 |  |  |  |  | 140 | $id = $IDH->generate_unique_persistent_id($ob); | 
| 674 |  |  |  |  |  |  | }; | 
| 675 | 58 | 50 |  |  |  | 88 | if ($@) { | 
| 676 | 0 |  |  |  |  | 0 | $self->warn($@); | 
| 677 | 0 |  |  |  |  | 0 | $id = "$ob"; # last resort - use memory pointer ref | 
| 678 |  |  |  |  |  |  | # will not be persistent, but will be unique | 
| 679 |  |  |  |  |  |  | } | 
| 680 |  |  |  |  |  |  | } | 
| 681 |  |  |  |  |  |  | } | 
| 682 | 58 | 50 |  |  |  | 82 | if (!$id) { | 
| 683 | 0 | 0 |  |  |  | 0 | if ($ob->isa("Bio::SeqFeatureI")) { | 
| 684 | 0 |  |  |  |  | 0 | $id = $IDH->generate_unique_persistent_id($ob); | 
| 685 |  |  |  |  |  |  | } | 
| 686 |  |  |  |  |  |  | else { | 
| 687 | 0 |  |  |  |  | 0 | $self->throw("Cannot generate a unique persistent ID for a Seq without either primary_id or accession"); | 
| 688 |  |  |  |  |  |  | } | 
| 689 |  |  |  |  |  |  | } | 
| 690 | 58 | 50 |  |  |  | 65 | if ($id) { | 
| 691 | 58 | 50 |  |  |  | 91 | $id = $self->context_namespace ? $self->context_namespace . ":" . $id : $id; | 
| 692 |  |  |  |  |  |  |  | 
| 693 |  |  |  |  |  |  | } | 
| 694 | 58 |  |  |  |  | 62 | return $id; | 
| 695 |  |  |  |  |  |  | } | 
| 696 |  |  |  |  |  |  |  | 
| 697 |  |  |  |  |  |  | # interbase and directional semantics | 
| 698 |  |  |  |  |  |  | sub bp2ib { | 
| 699 | 58 |  |  | 58 | 0 | 33 | my $self = shift; | 
| 700 | 58 |  |  |  |  | 39 | my $loc = shift; | 
| 701 | 58 | 50 |  |  |  | 140 | my ($s, $e, $str) = | 
| 702 |  |  |  |  |  |  | ref($loc) eq "ARRAY" ? (@$loc) : ($loc->start, $loc->end, $loc->strand); | 
| 703 | 58 |  |  |  |  | 97 | $s--; | 
| 704 | 58 | 100 |  |  |  | 84 | if ($str < 0) { | 
| 705 | 26 |  |  |  |  | 36 | ($s, $e) = ($e, $s); | 
| 706 |  |  |  |  |  |  | } | 
| 707 | 58 |  | 50 |  |  | 141 | return ($s, $e, $str || 1); | 
| 708 |  |  |  |  |  |  | } | 
| 709 |  |  |  |  |  |  |  | 
| 710 |  |  |  |  |  |  | sub subpartof { | 
| 711 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 712 | 0 |  |  |  |  |  | my $type = 'partof_'.shift; | 
| 713 | 0 |  |  |  |  |  | $type =~ s/partof_CDS/CDS_exon/; | 
| 714 | 0 |  |  |  |  |  | $type =~ s/partof_protein/CDS_exon/; | 
| 715 | 0 |  |  |  |  |  | $type =~ s/partof_polypeptide/CDS_exon/; | 
| 716 | 0 |  |  |  |  |  | $type =~ s/partof_\w*RNA/exon/; | 
| 717 | 0 |  |  |  |  |  | return $type; | 
| 718 |  |  |  |  |  |  | } | 
| 719 |  |  |  |  |  |  |  | 
| 720 |  |  |  |  |  |  | 1; |