| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package BioUtil::Util; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 37646 | use File::Path qw/remove_tree/; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 140 |  | 
| 4 | 1 |  |  | 1 |  | 905 | use Time::HiRes qw/time/; | 
|  | 1 |  |  |  |  | 3113 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | require Exporter; | 
| 7 |  |  |  |  |  |  | @ISA    = (Exporter); | 
| 8 |  |  |  |  |  |  | @EXPORT = qw( | 
| 9 |  |  |  |  |  |  | getopt | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | file_list_from_argv | 
| 12 |  |  |  |  |  |  | get_file_list | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | delete_string_elements_by_indexes | 
| 15 |  |  |  |  |  |  | delete_array_elements_by_indexes | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | extract_parameters_from_string | 
| 18 |  |  |  |  |  |  | get_parameters_from_file | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | get_list_from_file | 
| 21 |  |  |  |  |  |  | get_column_data | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | read_json_file | 
| 24 |  |  |  |  |  |  | write_json_file | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | run | 
| 27 |  |  |  |  |  |  | run_time | 
| 28 |  |  |  |  |  |  | readable_second | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | check_positive_integer | 
| 31 |  |  |  |  |  |  | mean_and_stdev | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | filename_prefix | 
| 34 |  |  |  |  |  |  | check_all_files_exist | 
| 35 |  |  |  |  |  |  | check_in_out_dir | 
| 36 |  |  |  |  |  |  | rm_and_mkdir | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | get_paired_fq_gz_file_from_dir | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | ); | 
| 41 |  |  |  |  |  |  |  | 
| 42 | 1 |  |  | 1 |  | 1734 | use vars qw($VERSION); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 65 |  | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 1 |  |  | 1 |  | 34 | use 5.010_000; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 46 |  | 
| 45 | 1 |  |  | 1 |  | 5 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 53 |  | 
| 46 | 1 |  |  | 1 |  | 5 | use warnings FATAL => 'all'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 57 |  | 
| 47 |  |  |  |  |  |  |  | 
| 48 | 1 |  |  | 1 |  | 2893 | use Encode qw/ encode_utf8 /; | 
|  | 1 |  |  |  |  | 19923 |  | 
|  | 1 |  |  |  |  | 149 |  | 
| 49 | 1 |  |  | 1 |  | 13 | use File::Path qw(make_path remove_tree); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 89 |  | 
| 50 | 1 |  |  | 1 |  | 9 | use File::Find; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 93 |  | 
| 51 | 1 |  |  | 1 |  | 23 | use File::Basename; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 105 |  | 
| 52 | 1 |  |  | 1 |  | 1838 | use JSON; | 
|  | 1 |  |  |  |  | 21669 |  | 
|  | 1 |  |  |  |  | 12 |  | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | =head1 NAME | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | BioUtil::Util - Utilities for operation on data or file | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | Some great modules like BioPerl provide many robust solutions. | 
| 59 |  |  |  |  |  |  | However, it is not easy to install for someone in some platforms. | 
| 60 |  |  |  |  |  |  | And for some simple task scripts, a lite module may be a good choice. | 
| 61 |  |  |  |  |  |  | So I reinvented some wheels and added some useful utilities into this module, | 
| 62 |  |  |  |  |  |  | hoping it would be helpful. | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | =head1 VERSION | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | Version 2015.0228 | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | =cut | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | our $VERSION = 2015.0228; | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | =head1 EXPORT | 
| 73 |  |  |  |  |  |  | getopt | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | file_list_from_argv | 
| 76 |  |  |  |  |  |  | get_file_list | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | delete_string_elements_by_indexes | 
| 79 |  |  |  |  |  |  | delete_array_elements_by_indexes | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | extract_parameters_from_string | 
| 82 |  |  |  |  |  |  | get_parameters_from_file | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | get_list_from_file | 
| 85 |  |  |  |  |  |  | get_column_data | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | read_json_file | 
| 88 |  |  |  |  |  |  | write_json_file | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | run | 
| 91 |  |  |  |  |  |  | run_time | 
| 92 |  |  |  |  |  |  | readable_second | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | check_positive_integer | 
| 95 |  |  |  |  |  |  | mean_and_stdev | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | filename_prefix | 
| 98 |  |  |  |  |  |  | check_all_files_exist | 
| 99 |  |  |  |  |  |  | check_in_out_dir | 
| 100 |  |  |  |  |  |  | rm_and_mkdir | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | get_paired_fq_gz_file_from_dir | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | use BioUtil::Util; | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | =head1 SUBROUTINES/METHODS | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | =head2 getopt | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | getopt FOR ME | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | Example | 
| 116 |  |  |  |  |  |  | -a b -c t tt -d bb -dbtype asdfafd -test | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | -a: b | 
| 119 |  |  |  |  |  |  | -c: ARRAY(0xee25e8) | 
| 120 |  |  |  |  |  |  | -d: bb | 
| 121 |  |  |  |  |  |  | -dbtype: asdfafd | 
| 122 |  |  |  |  |  |  | -infmt: fasta | 
| 123 |  |  |  |  |  |  | -test: 1 | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | =cut | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | sub getopt { | 
| 128 | 0 |  |  | 0 | 1 | 0 | my ( $opts, $list ) = @_; | 
| 129 | 0 | 0 | 0 |  |  | 0 | return "\$opts should be ref of hash, and \$list should be ref of list\n" | 
| 130 |  |  |  |  |  |  | unless ref $opts eq ref {} | 
| 131 |  |  |  |  |  |  | and ref $list eq ref []; | 
| 132 | 0 |  |  |  |  | 0 | my ( $o, $opt ) = (undef) x 2; | 
| 133 | 0 |  |  |  |  | 0 | while (@$list) { | 
| 134 | 0 |  |  |  |  | 0 | $o = shift @$list; | 
| 135 | 0 | 0 |  |  |  | 0 | if ( $o =~ /^\-/ ) { | 
| 136 | 0 |  |  |  |  | 0 | $opt = $o; | 
| 137 | 0 | 0 |  |  |  | 0 | $$opts{$opt} = 'http:shenwei.me' unless exists $$opts{$opt}; | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  | else { | 
| 140 | 0 | 0 |  |  |  | 0 | if ( $$opts{$opt} ne 'http:shenwei.me' ) { | 
| 141 | 0 | 0 |  |  |  | 0 | $$opts{$opt} = [ $$opts{$opt} ] | 
| 142 |  |  |  |  |  |  | if ref $$opts{$opt} ne ref []; | 
| 143 | 0 |  |  |  |  | 0 | push @{ $$opts{$opt} }, $o; | 
|  | 0 |  |  |  |  | 0 |  | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  | else { | 
| 146 | 0 |  |  |  |  | 0 | $$opts{$opt} = $o; | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  | } | 
| 150 | 0 | 0 |  |  |  | 0 | for ( keys %$opts ) { $$opts{$_} = 1 if $$opts{$_} eq 'http:shenwei.me'; } | 
|  | 0 |  |  |  |  | 0 |  | 
| 151 | 0 |  |  |  |  | 0 | return $opts; | 
| 152 |  |  |  |  |  |  | } | 
| 153 |  |  |  |  |  |  |  | 
| 154 |  |  |  |  |  |  | =head2 file_list_from_argv | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | Get file list from @ARGV. You should use this after parsing options! | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | When no arguments given, 'STDIN' will be added to | 
| 159 |  |  |  |  |  |  | the list, which could be further used by, e.g. FastaReader. | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | =cut | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | sub file_list_from_argv { | 
| 164 | 0 |  |  | 0 | 1 | 0 | my @files = (); | 
| 165 | 0 |  |  |  |  | 0 | for my $file (@_) { | 
| 166 | 0 |  |  |  |  | 0 | for my $f ( glob $file ) { | 
| 167 | 0 |  |  |  |  | 0 | push @files, $f; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  | } | 
| 170 | 0 | 0 |  |  |  | 0 | if ( @files == 0 ) { | 
| 171 | 0 |  |  |  |  | 0 | push @files, 'STDIN'; | 
| 172 |  |  |  |  |  |  | } | 
| 173 | 0 |  |  |  |  | 0 | return @files; | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | =head2 get_file_list | 
| 177 |  |  |  |  |  |  |  | 
| 178 |  |  |  |  |  |  | Find files/directories with custom filter, | 
| 179 |  |  |  |  |  |  | max serach depth could be specified. | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | Example (searching perl scripts) | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | my $dir   = "~"; | 
| 184 |  |  |  |  |  |  | my $depth = 2; | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | my $list = get_file_list( | 
| 187 |  |  |  |  |  |  | $dir, | 
| 188 |  |  |  |  |  |  | sub { | 
| 189 |  |  |  |  |  |  | if ( -d or /^\./i ) {  # ignore configuration file and folders | 
| 190 |  |  |  |  |  |  | return 0; | 
| 191 |  |  |  |  |  |  | } | 
| 192 |  |  |  |  |  |  | if (/\.pm/i or /\.pl/i) { | 
| 193 |  |  |  |  |  |  | return 1; | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  | return 0; | 
| 196 |  |  |  |  |  |  | }, | 
| 197 |  |  |  |  |  |  | $depth | 
| 198 |  |  |  |  |  |  | ); | 
| 199 |  |  |  |  |  |  | print "$_\n" for @$list; | 
| 200 |  |  |  |  |  |  |  | 
| 201 |  |  |  |  |  |  | =cut | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | sub get_file_list { | 
| 204 |  |  |  |  |  |  |  | 
| 205 |  |  |  |  |  |  | # filter is a subroutine to filter a file | 
| 206 | 1 |  |  | 1 | 1 | 15 | my ( $dir, $filter, $depth ) = @_; | 
| 207 | 1 |  |  |  |  | 10 | $dir =~ s/\/+/\//g; | 
| 208 | 1 |  |  |  |  | 6 | $dir =~ s/\/$//; | 
| 209 |  |  |  |  |  |  |  | 
| 210 | 1 | 50 |  |  |  | 6 | $depth = 1 << 30 unless defined $depth; | 
| 211 | 1 | 50 | 33 |  |  | 17 | unless ( $depth =~ /^\d+$/ and $depth > 0 ) { | 
| 212 | 0 |  |  |  |  | 0 | warn "depth should be positive integer\n"; | 
| 213 | 0 |  |  |  |  | 0 | return []; | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | # print "$dir\n"; | 
| 217 | 1 |  |  |  |  | 4 | my $depth0 = $dir =~ tr/\//\//; | 
| 218 |  |  |  |  |  |  |  | 
| 219 | 1 |  |  |  |  | 4 | my $files  = []; | 
| 220 |  |  |  |  |  |  | my $wanted = sub { | 
| 221 | 57 | 100 |  | 57 |  | 1006 | return if /^\.+$/; | 
| 222 | 56 | 50 |  |  |  | 118 | return if $_ eq $dir; | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | # check depth | 
| 225 | 56 | 100 |  |  |  | 1659 | return if $File::Find::name =~ tr/\//\// - $depth0 > $depth; | 
| 226 |  |  |  |  |  |  |  | 
| 227 | 14 | 100 |  |  |  | 42 | if ( &$filter($_) ) { | 
| 228 | 1 |  |  |  |  | 37 | push @$files, $File::Find::name; | 
| 229 |  |  |  |  |  |  | } | 
| 230 | 1 |  |  |  |  | 10 | }; | 
| 231 |  |  |  |  |  |  |  | 
| 232 | 1 |  |  |  |  | 127 | find( $wanted, ($dir) ); | 
| 233 |  |  |  |  |  |  |  | 
| 234 | 1 |  |  |  |  | 12 | return $files; | 
| 235 |  |  |  |  |  |  | } | 
| 236 |  |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  | =head2 delete_string_elements_by_indexes | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | Delete string elements by indexes, it uses delete_array_elements_by_indexes | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | =cut | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  | sub delete_string_elements_by_indexes { | 
| 244 | 0 |  |  | 0 | 1 | 0 | my ( $str, $ids ) = @_; | 
| 245 | 0 |  |  |  |  | 0 | my $t = ''; | 
| 246 | 0 | 0 | 0 |  |  | 0 | unless ( ref $str eq ref \$t and ref $ids eq ref [] ) { | 
| 247 | 0 |  |  |  |  | 0 | die "both arguments should be array reference\n"; | 
| 248 |  |  |  |  |  |  | } | 
| 249 | 0 |  |  |  |  | 0 | my @bytes = split //, $$str; | 
| 250 | 0 |  |  |  |  | 0 | return join "", @{ delete_array_elements_by_indexes( \@bytes, $ids ) }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | =head2 delete_array_elements_by_indexes | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | Delete array elements by given indexes. | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | Example: | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  | @list = qw(a b c d e f); | 
| 260 |  |  |  |  |  |  | @idx = (1, 2, 4); | 
| 261 |  |  |  |  |  |  | $list2 = delete_array_elements_by_indexes(\@list, \@idx); | 
| 262 |  |  |  |  |  |  | print "@$list2\n"; # result: a, d, f | 
| 263 |  |  |  |  |  |  |  | 
| 264 |  |  |  |  |  |  | =cut | 
| 265 |  |  |  |  |  |  |  | 
| 266 |  |  |  |  |  |  | sub delete_array_elements_by_indexes { | 
| 267 | 0 |  |  | 0 | 1 | 0 | my ( $array, $ids ) = @_; | 
| 268 | 0 | 0 | 0 |  |  | 0 | unless ( ref $array eq ref [] and ref $ids eq ref [] ) { | 
| 269 | 0 |  |  |  |  | 0 | die "both arguments should be array reference\n"; | 
| 270 |  |  |  |  |  |  | } | 
| 271 | 0 |  |  |  |  | 0 | my %omitted = map { $_ => 1 } @$ids; | 
|  | 0 |  |  |  |  | 0 |  | 
| 272 | 0 |  |  |  |  | 0 | my @newarray = (); | 
| 273 | 0 |  |  |  |  | 0 | for my $i ( 0 .. ( scalar(@$array) - 1 ) ) { | 
| 274 | 0 | 0 |  |  |  | 0 | next if exists $omitted{$i}; | 
| 275 | 0 |  |  |  |  | 0 | push @newarray, $$array[$i]; | 
| 276 |  |  |  |  |  |  | } | 
| 277 | 0 |  |  |  |  | 0 | return \@newarray; | 
| 278 |  |  |  |  |  |  | } | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | =head2 extract_parameters_from_string | 
| 281 |  |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  | Extract parameters from string. | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | The regular expression is | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | /([\w\d\_\-\.]+)\s*=\s*([^\=;]*)[\s;]*/ | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  | Example: | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  | # bad format, but could also be parsed | 
| 291 |  |  |  |  |  |  | # my $s = " s = b; a=test; b_c=12 3; a.b =; b | 
| 292 |  |  |  |  |  |  | # = asdf | 
| 293 |  |  |  |  |  |  | # sd; ads-f = 12313"; | 
| 294 |  |  |  |  |  |  |  | 
| 295 |  |  |  |  |  |  | # recommended | 
| 296 |  |  |  |  |  |  | my $s = "key1=abcde; key2=123; conf.a=file; conf.b=12; "; | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | my $pa = extract_parameters_from_string($s); | 
| 299 |  |  |  |  |  |  | print "=$_:$$p{$_}=\n" for sort keys %$pa; | 
| 300 |  |  |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | =cut | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  | sub extract_parameters_from_string { | 
| 304 | 1 |  |  | 1 | 1 | 1132 | my ($s) = @_; | 
| 305 | 1 |  |  |  |  | 4 | my $parameters = {}; | 
| 306 | 1 |  |  |  |  | 16 | while ( $s =~ /([\w\d\_\-\.]+)\s*=\s*([^\=;]*)[\s;]*/gm ) { | 
| 307 | 4 | 50 |  |  |  | 17 | warn "$1 was defined more than once\n" if defined $$parameters{$1}; | 
| 308 | 4 |  |  |  |  | 21 | $$parameters{$1} = $2; | 
| 309 |  |  |  |  |  |  | } | 
| 310 | 1 |  |  |  |  | 5 | return $parameters; | 
| 311 |  |  |  |  |  |  | } | 
| 312 |  |  |  |  |  |  |  | 
| 313 |  |  |  |  |  |  | =head2 get_parameters_from_file | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  | Get parameters from a file. | 
| 316 |  |  |  |  |  |  | Comments start with # are allowed in file. | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | Example: | 
| 319 |  |  |  |  |  |  |  | 
| 320 |  |  |  |  |  |  | my $pa = get_parameters_from_file("d.txt"); | 
| 321 |  |  |  |  |  |  | print "$_: $$pa{$_}\n" for sort keys %$pa; | 
| 322 |  |  |  |  |  |  |  | 
| 323 |  |  |  |  |  |  | For a file with content: | 
| 324 |  |  |  |  |  |  |  | 
| 325 |  |  |  |  |  |  | # cell phone | 
| 326 |  |  |  |  |  |  | apple = 1 # note | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | nokia = 2 # | 
| 329 |  |  |  |  |  |  |  | 
| 330 |  |  |  |  |  |  | output is: | 
| 331 |  |  |  |  |  |  |  | 
| 332 |  |  |  |  |  |  | apple: 1 | 
| 333 |  |  |  |  |  |  | nokia: 2 | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | =cut | 
| 336 |  |  |  |  |  |  |  | 
| 337 |  |  |  |  |  |  | sub get_parameters_from_file { | 
| 338 | 1 |  |  | 1 | 1 | 403 | my ($file) = @_; | 
| 339 | 1 |  |  |  |  | 4 | my $parameters = {}; | 
| 340 | 1 | 50 |  |  |  | 72 | open my $fh, $file or die "fail to open file $file\n"; | 
| 341 | 1 |  |  |  |  | 35 | while (<$fh>) { | 
| 342 | 4 |  |  |  |  | 27 | s/^\s+|\s+$//g; | 
| 343 | 4 | 100 | 100 |  |  | 32 | next if $_ eq ''    # blank line | 
| 344 |  |  |  |  |  |  | or /^#/;        # annotation | 
| 345 | 2 |  |  |  |  | 8 | s/#.*//g;           # delete annotation | 
| 346 |  |  |  |  |  |  |  | 
| 347 | 2 | 50 |  |  |  | 14 | next unless /([\w\d\_\-\.]+)\s*=\s*(.+)/; | 
| 348 | 2 |  |  |  |  | 21 | $$parameters{$1} = $2; | 
| 349 |  |  |  |  |  |  | } | 
| 350 | 1 |  |  |  |  | 13 | close $fh; | 
| 351 | 1 |  |  |  |  | 7 | return $parameters; | 
| 352 |  |  |  |  |  |  | } | 
| 353 |  |  |  |  |  |  |  | 
| 354 |  |  |  |  |  |  | =head2 get_list_from_file | 
| 355 |  |  |  |  |  |  |  | 
| 356 |  |  |  |  |  |  | Get list from a file. | 
| 357 |  |  |  |  |  |  | Comments start with # are allowed in file. | 
| 358 |  |  |  |  |  |  |  | 
| 359 |  |  |  |  |  |  | Example: | 
| 360 |  |  |  |  |  |  |  | 
| 361 |  |  |  |  |  |  | my $list = get_list_from_file("d.txt"); | 
| 362 |  |  |  |  |  |  | print "$_\n" for @$list; | 
| 363 |  |  |  |  |  |  |  | 
| 364 |  |  |  |  |  |  | For a file with content: | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | # cell phone | 
| 367 |  |  |  |  |  |  | apple # note | 
| 368 |  |  |  |  |  |  |  | 
| 369 |  |  |  |  |  |  | nokia | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | output is: | 
| 372 |  |  |  |  |  |  |  | 
| 373 |  |  |  |  |  |  | apple | 
| 374 |  |  |  |  |  |  | nokia | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | =cut | 
| 377 |  |  |  |  |  |  |  | 
| 378 |  |  |  |  |  |  | sub get_list_from_file { | 
| 379 | 1 |  |  | 1 | 1 | 322 | my ($file) = @_; | 
| 380 | 1 | 50 |  |  |  | 42 | open my $fh, "<", $file or die "fail to open file $file\n"; | 
| 381 | 1 |  |  |  |  | 3 | my @list = (); | 
| 382 | 1 |  |  |  |  | 13 | while (<$fh>) { | 
| 383 | 4 |  |  |  |  | 20 | s/\r?\n//g; | 
| 384 | 4 |  |  |  |  | 16 | s/^\s+|\s+$//g; | 
| 385 | 4 | 100 | 100 |  |  | 26 | next if $_ eq ''    # blank line | 
| 386 |  |  |  |  |  |  | or /^#/;        # annotation | 
| 387 | 2 |  |  |  |  | 6 | s/#.*//g;           # delete annotation | 
| 388 |  |  |  |  |  |  |  | 
| 389 | 2 |  |  |  |  | 12 | push @list, $_; | 
| 390 |  |  |  |  |  |  | } | 
| 391 | 1 |  |  |  |  | 8 | close $fh; | 
| 392 | 1 |  |  |  |  | 8 | return \@list; | 
| 393 |  |  |  |  |  |  | } | 
| 394 |  |  |  |  |  |  |  | 
| 395 |  |  |  |  |  |  | =head2 get_column_data | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | Get one column of a file. | 
| 398 |  |  |  |  |  |  |  | 
| 399 |  |  |  |  |  |  | Example: | 
| 400 |  |  |  |  |  |  |  | 
| 401 |  |  |  |  |  |  | my $list = get_column_data("d.txt", 2); | 
| 402 |  |  |  |  |  |  | print "$_\n" for @$list; | 
| 403 |  |  |  |  |  |  |  | 
| 404 |  |  |  |  |  |  | =cut | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  | sub get_column_data { | 
| 407 | 1 |  |  | 1 | 1 | 317 | my ( $file, $column, $delimiter ) = @_; | 
| 408 | 1 | 50 | 33 |  |  | 20 | unless ( $column =~ /^(\d+)$/ and $column > 0 ) { | 
| 409 | 0 |  |  |  |  | 0 | warn "column number ($column) should be positive integer\n"; | 
| 410 | 0 |  |  |  |  | 0 | $column = 1; | 
| 411 |  |  |  |  |  |  | } | 
| 412 | 1 | 50 |  |  |  | 5 | $delimiter = "\t" unless defined $delimiter; | 
| 413 |  |  |  |  |  |  |  | 
| 414 | 1 | 50 |  |  |  | 42 | open my $fh, "<", $file or die "failed to open file: $file\n"; | 
| 415 | 1 |  |  |  |  | 3 | my @linedata = (); | 
| 416 | 1 |  |  |  |  | 2 | my @data     = (); | 
| 417 | 1 |  |  |  |  | 3 | my $n        = 0; | 
| 418 | 1 |  |  |  |  | 12 | while (<$fh>) { | 
| 419 | 4 |  |  |  |  | 21 | s/\r?\n//; | 
| 420 | 4 | 50 |  |  |  | 12 | next if /^\s*#/; | 
| 421 | 4 |  |  |  |  | 40 | @linedata = split /$delimiter/, $_; | 
| 422 | 4 |  |  |  |  | 7 | $n = scalar @linedata; | 
| 423 | 4 | 100 |  |  |  | 19 | next unless $n > 0; | 
| 424 |  |  |  |  |  |  |  | 
| 425 | 3 | 50 |  |  |  | 5 | if ( $column > $n ) { | 
| 426 | 0 |  |  |  |  | 0 | die | 
| 427 |  |  |  |  |  |  | "number of columns of this line ($n) is less than given column number ($column)\n$_"; | 
| 428 |  |  |  |  |  |  | } | 
| 429 |  |  |  |  |  |  |  | 
| 430 | 3 |  |  |  |  | 14 | push @data, $linedata[ $column - 1 ]; | 
| 431 |  |  |  |  |  |  | } | 
| 432 | 1 |  |  |  |  | 9 | close $fh; | 
| 433 |  |  |  |  |  |  |  | 
| 434 | 1 |  |  |  |  | 8 | return \@data; | 
| 435 |  |  |  |  |  |  | } | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | =head2 read_json_file | 
| 438 |  |  |  |  |  |  |  | 
| 439 |  |  |  |  |  |  | Read json file and decode it into a hash ref. | 
| 440 |  |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  | Example: | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | my $hashref = read_json_file($file); | 
| 444 |  |  |  |  |  |  |  | 
| 445 |  |  |  |  |  |  | =cut | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | sub read_json_file { | 
| 448 | 1 |  |  | 1 | 1 | 8 | my ($file) = @_; | 
| 449 | 1 | 50 |  |  |  | 33 | open my $fh, "<:encoding(utf8)", $file | 
| 450 |  |  |  |  |  |  | or die "fail to open json file: $file\n"; | 
| 451 | 1 |  |  |  |  | 51 | my $text; | 
| 452 | 1 |  |  |  |  | 29 | while (<$fh>) { | 
| 453 | 5 |  |  |  |  | 40 | s/\s*#+.*\r?\n?//g;    # remove annotation | 
| 454 | 5 | 50 |  |  |  | 59 | $text .= $1 if / *(.+)/; | 
| 455 |  |  |  |  |  |  | } | 
| 456 | 1 |  |  |  |  | 16 | close $fh; | 
| 457 | 1 |  |  |  |  | 24 | my $hash = decode_json($text); | 
| 458 | 1 |  |  |  |  | 9 | return $hash; | 
| 459 |  |  |  |  |  |  | } | 
| 460 |  |  |  |  |  |  |  | 
| 461 |  |  |  |  |  |  | =head2 write_json_file | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | Write a hash ref into a file. | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  | Example: | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | my $hashref = { "a" => 1, "b" => 2 }; | 
| 468 |  |  |  |  |  |  | write_json_file($hashref, $file); | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  | =cut | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | sub write_json_file { | 
| 473 | 1 |  |  | 1 | 1 | 343 | my ( $hash, $file ) = @_; | 
| 474 | 1 |  |  |  |  | 28 | my $json = JSON->new->allow_nonref; | 
| 475 | 1 |  |  |  |  | 37 | my $text = $json->pretty->encode($hash); | 
| 476 | 1 |  |  |  |  | 9 | $text = encode_utf8($text); | 
| 477 | 1 | 50 |  | 1 |  | 13 | open my $fh2, ">:encoding(utf8)", $file | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 9 |  | 
|  | 1 |  |  |  |  | 71 |  | 
| 478 |  |  |  |  |  |  | or die "fail to open json file: $file\n"; | 
| 479 | 1 |  |  |  |  | 1868 | print $fh2 $text; | 
| 480 | 1 |  |  |  |  | 92 | close $fh2; | 
| 481 |  |  |  |  |  |  | } | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | =head2 run | 
| 484 |  |  |  |  |  |  |  | 
| 485 |  |  |  |  |  |  | Run a command | 
| 486 |  |  |  |  |  |  |  | 
| 487 |  |  |  |  |  |  | Example: | 
| 488 |  |  |  |  |  |  |  | 
| 489 |  |  |  |  |  |  | my $fail = run($cmd); | 
| 490 |  |  |  |  |  |  | die "failed to run:$cmd\n" if $fail; | 
| 491 |  |  |  |  |  |  |  | 
| 492 |  |  |  |  |  |  | =cut | 
| 493 |  |  |  |  |  |  |  | 
| 494 |  |  |  |  |  |  | sub run { | 
| 495 | 0 |  |  | 0 | 1 |  | my ($cmd) = @_; | 
| 496 | 0 |  |  |  |  |  | system($cmd); | 
| 497 |  |  |  |  |  |  |  | 
| 498 | 0 | 0 |  |  |  |  | if ( $? == -1 ) { | 
|  |  | 0 |  |  |  |  |  | 
| 499 | 0 |  |  |  |  |  | die "[ERROR] fail to run: $cmd. Command (" | 
| 500 |  |  |  |  |  |  | . ( split /\s+/, $cmd )[0] | 
| 501 |  |  |  |  |  |  | . ") not found\n"; | 
| 502 |  |  |  |  |  |  | } | 
| 503 |  |  |  |  |  |  | elsif ( $? & 127 ) { | 
| 504 | 0 | 0 |  |  |  |  | printf STDERR "[ERROR] command died with signal %d, %s coredump\n", | 
| 505 |  |  |  |  |  |  | ( $? & 127 ), ( $? & 128 ) ? 'with' : 'without'; | 
| 506 |  |  |  |  |  |  | } | 
| 507 |  |  |  |  |  |  | else { | 
| 508 |  |  |  |  |  |  | # 0, ok | 
| 509 |  |  |  |  |  |  | } | 
| 510 | 0 |  |  |  |  |  | return $?; | 
| 511 |  |  |  |  |  |  | } | 
| 512 |  |  |  |  |  |  |  | 
| 513 |  |  |  |  |  |  | =head2 run_time | 
| 514 |  |  |  |  |  |  |  | 
| 515 |  |  |  |  |  |  | Run a subroutine with given arguments N times, and return the mean and stdev | 
| 516 |  |  |  |  |  |  | of time. | 
| 517 |  |  |  |  |  |  |  | 
| 518 |  |  |  |  |  |  | Example: | 
| 519 |  |  |  |  |  |  |  | 
| 520 |  |  |  |  |  |  | my $read_by_record = sub { | 
| 521 |  |  |  |  |  |  | my ($file) = @_; | 
| 522 |  |  |  |  |  |  | my $next_seq = FastaReader($file); | 
| 523 |  |  |  |  |  |  | while ( my $fa = &$next_seq() ) { | 
| 524 |  |  |  |  |  |  | my ( $header, $seq ) = @$fa; | 
| 525 |  |  |  |  |  |  | # print ">$header\n$seq\n"; | 
| 526 |  |  |  |  |  |  | } | 
| 527 |  |  |  |  |  |  | }; | 
| 528 |  |  |  |  |  |  |  | 
| 529 |  |  |  |  |  |  | my ($mean, $stdev) = run_time( 8, $read_by_record, $file ); | 
| 530 |  |  |  |  |  |  | printf STDERR "\n## Compute time: %0.03f ± %0.03f s\n\n", $mean, $stdev; | 
| 531 |  |  |  |  |  |  |  | 
| 532 |  |  |  |  |  |  | =cut | 
| 533 |  |  |  |  |  |  |  | 
| 534 |  |  |  |  |  |  | sub run_time { | 
| 535 | 0 |  |  | 0 | 1 |  | my ( $n, $sub, @args ) = @_; | 
| 536 | 0 | 0 | 0 |  |  |  | die "first argument should be positive integer" | 
| 537 |  |  |  |  |  |  | unless $n =~ /^\d+$/ and $n > 0; | 
| 538 |  |  |  |  |  |  |  | 
| 539 | 0 |  |  |  |  |  | my $t0 = 0; | 
| 540 | 0 |  |  |  |  |  | my @ts = (); | 
| 541 | 0 |  |  |  |  |  | for ( 1 .. $n ) { | 
| 542 | 0 |  |  |  |  |  | $t0 = time; | 
| 543 | 0 |  |  |  |  |  | &$sub(@args);    # call $sub | 
| 544 | 0 |  |  |  |  |  | push @ts, time - $t0; | 
| 545 |  |  |  |  |  |  | } | 
| 546 |  |  |  |  |  |  |  | 
| 547 | 0 |  |  |  |  |  | return mean_and_stdev( \@ts ); | 
| 548 |  |  |  |  |  |  | } | 
| 549 |  |  |  |  |  |  |  | 
| 550 |  |  |  |  |  |  | =head2 readable_second | 
| 551 |  |  |  |  |  |  |  | 
| 552 |  |  |  |  |  |  | readable_second | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | Example: | 
| 555 |  |  |  |  |  |  |  | 
| 556 |  |  |  |  |  |  | print readable_second(11312314),"\n"; # 130 day 22 hour 18 min 34 sec | 
| 557 |  |  |  |  |  |  |  | 
| 558 |  |  |  |  |  |  | =cut | 
| 559 |  |  |  |  |  |  |  | 
| 560 |  |  |  |  |  |  | sub readable_second ($) { | 
| 561 | 0 |  |  | 0 | 1 |  | my ($seconds) = @_; | 
| 562 | 0 | 0 |  |  |  |  | return "Positive integer need." unless $seconds =~ /^\d+$/; | 
| 563 |  |  |  |  |  |  |  | 
| 564 | 0 |  |  |  |  |  | my $time            = ""; | 
| 565 | 0 |  |  |  |  |  | my $has_bigger_unit = 0; | 
| 566 |  |  |  |  |  |  |  | 
| 567 | 0 |  |  |  |  |  | my $days = $seconds / 86400; | 
| 568 | 0 | 0 |  |  |  |  | if ( $days >= 1 ) { | 
| 569 | 0 |  |  |  |  |  | $time .= ( int $days ) . " day "; | 
| 570 | 0 |  |  |  |  |  | $has_bigger_unit = 1; | 
| 571 |  |  |  |  |  |  | } | 
| 572 | 0 |  |  |  |  |  | $seconds = $seconds % 86400; | 
| 573 |  |  |  |  |  |  |  | 
| 574 | 0 |  |  |  |  |  | my $hours = $seconds / 3600; | 
| 575 | 0 | 0 |  |  |  |  | if ( $hours >= 1 ) { | 
|  |  | 0 |  |  |  |  |  | 
| 576 | 0 |  |  |  |  |  | $time .= ( int $hours ) . " hour "; | 
| 577 | 0 |  |  |  |  |  | $has_bigger_unit = 1; | 
| 578 |  |  |  |  |  |  | } | 
| 579 | 0 |  |  |  |  |  | elsif ($has_bigger_unit) { $time .= 0 . " hour "; } | 
| 580 | 0 |  |  |  |  |  | $seconds = $seconds % 3600; | 
| 581 |  |  |  |  |  |  |  | 
| 582 | 0 |  |  |  |  |  | my $minutes = $seconds / 60; | 
| 583 | 0 | 0 |  |  |  |  | if ( $minutes >= 1 ) { | 
|  |  | 0 |  |  |  |  |  | 
| 584 | 0 |  |  |  |  |  | $time .= ( int $minutes ) . " min "; | 
| 585 | 0 |  |  |  |  |  | $has_bigger_unit = 1; | 
| 586 |  |  |  |  |  |  | } | 
| 587 | 0 |  |  |  |  |  | elsif ($has_bigger_unit) { $time .= 0 . " min "; } | 
| 588 | 0 |  |  |  |  |  | $seconds = $seconds % 60; | 
| 589 |  |  |  |  |  |  |  | 
| 590 | 0 |  |  |  |  |  | $time .= $seconds . " sec"; | 
| 591 | 0 |  |  |  |  |  | return $time; | 
| 592 |  |  |  |  |  |  | } | 
| 593 |  |  |  |  |  |  |  | 
| 594 |  |  |  |  |  |  | =head2 check_positive_integer | 
| 595 |  |  |  |  |  |  |  | 
| 596 |  |  |  |  |  |  | Check Positive Integer | 
| 597 |  |  |  |  |  |  |  | 
| 598 |  |  |  |  |  |  | Example: | 
| 599 |  |  |  |  |  |  |  | 
| 600 |  |  |  |  |  |  | check_positive_integer(1); | 
| 601 |  |  |  |  |  |  |  | 
| 602 |  |  |  |  |  |  | =cut | 
| 603 |  |  |  |  |  |  |  | 
| 604 |  |  |  |  |  |  | sub check_positive_integer { | 
| 605 | 0 |  |  | 0 | 1 |  | my ($n) = @_; | 
| 606 | 0 | 0 | 0 |  |  |  | die "positive integer needed ($n given)" | 
| 607 |  |  |  |  |  |  | unless $n =~ /^\d+$/ and $n != 0; | 
| 608 |  |  |  |  |  |  | } | 
| 609 |  |  |  |  |  |  |  | 
| 610 |  |  |  |  |  |  | =head2 mean_and_stdev | 
| 611 |  |  |  |  |  |  |  | 
| 612 |  |  |  |  |  |  | return mean and stdev of a list | 
| 613 |  |  |  |  |  |  |  | 
| 614 |  |  |  |  |  |  | Example: | 
| 615 |  |  |  |  |  |  | my @list = qq/1 2 3/; | 
| 616 |  |  |  |  |  |  | mean_and_stdev(\@list); | 
| 617 |  |  |  |  |  |  |  | 
| 618 |  |  |  |  |  |  | =cut | 
| 619 |  |  |  |  |  |  | sub mean_and_stdev($) { | 
| 620 | 0 |  |  | 0 | 1 |  | my ($list) = @_; | 
| 621 | 0 | 0 |  |  |  |  | return ( 0, 0 ) if @$list == 0; | 
| 622 | 0 |  |  |  |  |  | my $sum = 0; | 
| 623 | 0 |  |  |  |  |  | $sum += $_ for @$list; | 
| 624 | 0 |  |  |  |  |  | my $sum_square = 0; | 
| 625 | 0 |  |  |  |  |  | $sum_square += $_ * $_ for @$list; | 
| 626 | 0 |  |  |  |  |  | my $mean     = $sum / @$list; | 
| 627 | 0 |  |  |  |  |  | my $variance = $sum_square / @$list - $mean * $mean; | 
| 628 | 0 |  |  |  |  |  | my $std      = sqrt $variance; | 
| 629 | 0 |  |  |  |  |  | return ( $mean, $std ); | 
| 630 |  |  |  |  |  |  | } | 
| 631 |  |  |  |  |  |  |  | 
| 632 |  |  |  |  |  |  | =head2 filename_prefix | 
| 633 |  |  |  |  |  |  |  | 
| 634 |  |  |  |  |  |  | Get filename prefix | 
| 635 |  |  |  |  |  |  |  | 
| 636 |  |  |  |  |  |  | Example: | 
| 637 |  |  |  |  |  |  |  | 
| 638 |  |  |  |  |  |  | filename_prefix("test.fa"); # "test" | 
| 639 |  |  |  |  |  |  | filename_prefix("tmp");     # "tmp" | 
| 640 |  |  |  |  |  |  |  | 
| 641 |  |  |  |  |  |  | =cut | 
| 642 |  |  |  |  |  |  |  | 
| 643 |  |  |  |  |  |  | sub filename_prefix { | 
| 644 | 0 |  |  | 0 | 1 |  | my ($file) = @_; | 
| 645 | 0 | 0 |  |  |  |  | if ( $file =~ /(.+)\..+?$/ ) { | 
| 646 | 0 |  |  |  |  |  | return $1; | 
| 647 |  |  |  |  |  |  | } | 
| 648 |  |  |  |  |  |  | else { | 
| 649 | 0 |  |  |  |  |  | return $file; | 
| 650 |  |  |  |  |  |  | } | 
| 651 |  |  |  |  |  |  | } | 
| 652 |  |  |  |  |  |  |  | 
| 653 |  |  |  |  |  |  | =head2 check_all_files_exist | 
| 654 |  |  |  |  |  |  |  | 
| 655 |  |  |  |  |  |  | Check whether all files existed. | 
| 656 |  |  |  |  |  |  |  | 
| 657 |  |  |  |  |  |  | =cut | 
| 658 |  |  |  |  |  |  |  | 
| 659 |  |  |  |  |  |  | sub check_all_files_exist { | 
| 660 | 0 |  |  | 0 | 1 |  | my $flag = 1; | 
| 661 | 0 |  |  |  |  |  | for (@_) { | 
| 662 | 0 | 0 |  |  |  |  | if ( not -e $_ ) { | 
| 663 | 0 |  |  |  |  |  | return 0; | 
| 664 |  |  |  |  |  |  | } | 
| 665 |  |  |  |  |  |  | } | 
| 666 | 0 |  |  |  |  |  | return 1; | 
| 667 |  |  |  |  |  |  | } | 
| 668 |  |  |  |  |  |  |  | 
| 669 |  |  |  |  |  |  | =head2 check_in_out_dir | 
| 670 |  |  |  |  |  |  |  | 
| 671 |  |  |  |  |  |  | Check in and $fh2 directory. | 
| 672 |  |  |  |  |  |  |  | 
| 673 |  |  |  |  |  |  | Example: | 
| 674 |  |  |  |  |  |  |  | 
| 675 |  |  |  |  |  |  | check_in_out_dir("~/dir", "~/dir.out"); | 
| 676 |  |  |  |  |  |  |  | 
| 677 |  |  |  |  |  |  | =cut | 
| 678 |  |  |  |  |  |  |  | 
| 679 |  |  |  |  |  |  | sub check_in_out_dir { | 
| 680 | 0 |  |  | 0 | 1 |  | my ( $in, $out ) = @_; | 
| 681 | 0 | 0 |  |  |  |  | die "dir $in not found." | 
| 682 |  |  |  |  |  |  | unless -e $in; | 
| 683 |  |  |  |  |  |  |  | 
| 684 | 0 | 0 |  |  |  |  | die "$in is not a directory.\n" | 
| 685 |  |  |  |  |  |  | unless -d $in; | 
| 686 |  |  |  |  |  |  |  | 
| 687 | 0 |  |  |  |  |  | $in =~ s/\/$//; | 
| 688 | 0 |  |  |  |  |  | $out =~ s/\/$//; | 
| 689 | 0 | 0 |  |  |  |  | die "out dir shoud be different from in dir!\n" | 
| 690 |  |  |  |  |  |  | if $in eq $out; | 
| 691 |  |  |  |  |  |  | } | 
| 692 |  |  |  |  |  |  |  | 
| 693 |  |  |  |  |  |  | =head2 rm_and_mkdir | 
| 694 |  |  |  |  |  |  |  | 
| 695 |  |  |  |  |  |  | Make a directory, remove it firstly if it exists. | 
| 696 |  |  |  |  |  |  |  | 
| 697 |  |  |  |  |  |  | Example: | 
| 698 |  |  |  |  |  |  |  | 
| 699 |  |  |  |  |  |  | rm_and_mkdir("out") | 
| 700 |  |  |  |  |  |  |  | 
| 701 |  |  |  |  |  |  | =cut | 
| 702 |  |  |  |  |  |  |  | 
| 703 |  |  |  |  |  |  | sub rm_and_mkdir { | 
| 704 | 0 |  |  | 0 | 1 |  | my ($dir) = @_; | 
| 705 | 0 | 0 |  |  |  |  | if ( -e $dir ) { | 
| 706 | 0 | 0 |  |  |  |  | remove_tree($dir) or die "fail to remove: $dir\n"; | 
| 707 |  |  |  |  |  |  | } | 
| 708 | 0 | 0 |  |  |  |  | mkdir $dir or die "fail to mkdir: $dir\n"; | 
| 709 |  |  |  |  |  |  | } | 
| 710 |  |  |  |  |  |  |  | 
| 711 |  |  |  |  |  |  | =head2 get_paired_fq_gz_file_from_dir | 
| 712 |  |  |  |  |  |  |  | 
| 713 |  |  |  |  |  |  | Example: | 
| 714 |  |  |  |  |  |  |  | 
| 715 |  |  |  |  |  |  | # . | 
| 716 |  |  |  |  |  |  | # ├── test_1.fq.gz | 
| 717 |  |  |  |  |  |  | # └── test_2.fq.gz | 
| 718 |  |  |  |  |  |  | for my $pe ( get_paired_fq_gz_file_from_dir($indir) ) { | 
| 719 |  |  |  |  |  |  | # test_1.fq.gz, test_1.fq.gz, test | 
| 720 |  |  |  |  |  |  | my ( $fqfile1, $fqfile2, $id ) = @$pe; | 
| 721 |  |  |  |  |  |  |  | 
| 722 |  |  |  |  |  |  | } | 
| 723 |  |  |  |  |  |  |  | 
| 724 |  |  |  |  |  |  | =cut | 
| 725 |  |  |  |  |  |  |  | 
| 726 |  |  |  |  |  |  | sub get_paired_fq_gz_file_from_dir { | 
| 727 | 0 |  |  | 0 | 1 |  | my ($dir) = @_; | 
| 728 | 0 |  |  |  |  |  | my @files; | 
| 729 | 0 |  |  |  |  |  | for ( sort glob "$dir/*_1.fq.gz" ) { | 
| 730 | 0 |  |  |  |  |  | /\/?([^\/]+)_1.fq.gz/; | 
| 731 | 0 |  |  |  |  |  | my $id = $1; | 
| 732 | 0 | 0 |  |  |  |  | next unless -e "$dir/${id}_2.fq.gz"; | 
| 733 | 0 |  |  |  |  |  | push @files, [ "$dir/${id}_1.fq.gz", "$dir/${id}_2.fq.gz", "$dir/${id}", $id ]; | 
| 734 |  |  |  |  |  |  | } | 
| 735 | 0 |  |  |  |  |  | return @files; | 
| 736 |  |  |  |  |  |  | } | 
| 737 |  |  |  |  |  |  | 1; |