File Coverage

blib/lib/Optimization/NSGAII.pm

Criterion	Covered	Total	%
statement	316	401	78.8
branch	52	68	76.4
condition	18	39	46.1
subroutine	19	21	90.4
pod	1	12	8.3
total	406	541	75.0

line	stmt	bran	cond	sub	pod	time	code
1							package Optimization::NSGAII;
2
3	23			23		1385014	use 5.006;
	23					69
4	23			23		92	use warnings;
	23					46
	23					460
5	23			23		92	use strict;
	23					23
	23					828
6
7	23			23		115	use feature 'say';
	23					46
	23					2691
8	23			23		138	use Exporter;
	23					46
	23					1173
9							our @ISA = ("Exporter");
10	23			23		13133	use Data::Dumper;
	23					141381
	23					1265
11	23			23		161	use List::Util qw/min max/;
	23					46
	23					2185
12	23			23		138	use Carp;
	23					46
	23					92920
13
14							=pod
15
16							=head1 NAME
17
18							Optimization::NSGAII - non dominant sorting genetic algorithm for multi-objective optimization (also known as NSGA2)
19
20							=head1 VERSION
21
22							Version 0.03
23
24							=cut
25
26							our $VERSION = "0.03";
27
28							=head1 SYNOPSIS
29
30							use Optimization::NSGAII qw/ f_Optim_NSGAII /;
31							use Data::Dumper;
32
33							# D E F I N E O B J E C T I V E S T O O P T I M I Z E
34							###########################################################
35
36							sub f_to_optimize {
37
38							my $x = shift; # load input parameters (genes constituting a single individual)
39
40							my $id = shift # load id of this particular individual of the population
41
42							# ... # do your things using these inputs $x->[0], $x->[1] ...
43							# and produce the outputs to be minimized $f1, $f2 ...
44
45							# examples of things you can do include:
46							# - mathematical formulas in perl to define $f1, $f2, ...
47							# - computations with commercial software and so:
48							# - create a directory using $id in the name
49							# - write input file using $x->[0] ...
50							# - run the computation, for example with perl system() function
51							# - locally or
52							# - on a server for example with 'qsub... '
53							# - wait simulation to end
54							# - postprocess its output and define $f1, $f2 ...
55							# - delete directory and contents
56							# - ...
57
58							my $out = [$f1,$f2,$f3]; # and finally return the set of these outputs for
59							return $out; # this single individual of the population
60
61							}
62
63							# D E F I N E B O U N D S [ A N D I N E Q U A L I T I E S ]
64							###################################################################
65
66							# define min and max bounds for $x->[0], $x->[1], ...
67							my $bounds = [[0,1],[0,1],[0,1]]; # example with 3 input parameters (genes) with min = 0 and max = 1:
68
69							sub f_inequality { # optional inequality constraints set
70
71							my $x =shift;
72
73							my @ineq =
74							(
75							$x->[1] + 1 , # equations >= 0
76							$x->[0] + $x->[1] - 9,
77							...
78							...
79
80							);
81
82							return \@ineq;
83							}
84
85							# R U N O P T I M I Z A T I O N
86							#################################
87
88							# execute NSGA-II algorithm
89
90							my $ref_input_output = f_Optim_NSGAII(
91							{
92
93							'nPop' => 50, # population size
94							'nGen' => 250, # final generation number
95							'bounds' => $bounds, # loads the previously defined bounds
96							'function' => \&f_to_optimize, # loads the subroutine to optimize (minimize)
97							'nProc' => 8, # how many individuals to evaluate in parallel as separate processes
98							'filesDir' => '/tmp', # work directory
99
100
101							# optional parameters:
102
103							'verboseFinal' => 1, # 0\|1: input and output values print at final generation, for each individual of the population
104							# default: print is made ( = 1)
105							'f_ineq' => \&f_inequality, # subroutine describing the constraining inequality set
106							# default: no constraint function
107
108							# parameters for mutation
109
110							'distrib' => [-1,0,1], # distribution of values (for example a Gaussian distribution), used to perturb individuals
111							# default: [-1,-0.5,0,0.5,1]
112							'scaleDistrib' => 0.05, # scaling of the distribution array
113							# default: 0 (no perturbation will be done)
114							'percentMut' => 5, # percentage of individual that are randomly perturbated (in all their genes)
115							# and also percentage of input parameters (genes) that are randomly mutated in each individual
116							# default: 5%
117							'startPop' => [[0.3,0.18,-0.1],# initial population
118							[-0.38,0.5,0.1], # default: random population satisfying the bounds
119							...,
120							]
121
122							},
123
124							# the following is the optional set of parameters for 'Pareto front' 2D live plot
125							# if the part below is not present, no plot will be made
126
127							{
128
129							'dx' => 200, # characters width and height of the plot
130							'dy' => 40,
131							'xlabel' => 'stiffness [N/mm]', # horizontal and vertical axis labels
132							'ylabel' => 'mass [kg]',
133							'xlim' => [0,1], # horizontal and vertical axis limits
134							'ylim' => [0,1],
135							'nfun' => [0,2], # which function to plot from return value by f_to_optimize ($out) ; 0=f1, 1=f2 ...
136							}
137							);
138
139							# U S E S O L U T I O N S
140							############################
141
142							# for example print of the input parameters and
143							# corresponding output functions' values of the final found Pareto front
144
145							my @ref_input = @{$ref_input_output->[0]};
146							my @ref_output = @{$ref_input_output->[1]};
147
148							print Dumper(\@ref_input);
149							print Dumper(\@ref_output);
150
151
152
153
154							=head1 EXPORT
155
156							=over
157
158							=item * f_Optim_NSGAII
159
160							=back
161
162							=cut
163
164							our @EXPORT_OK = qw/ f_Optim_NSGAII /;
165
166							=head1 DESCRIPTION
167
168
169							=head2 Reference
170
171							NSGAII.pm apply (with some variations) the NSGA-II algorithm described in the paper:
172
173							A Fast and Elitist Multiobjective Genetic Algorithm:NSGA-II
174
175							=over
176
177							Kalyanmoy Deb, Associate Member, IEEE, Amrit Pratap, Sameer Agarwal, and T. Meyarivan
178
179							=back
180
181
182							=head2 Objective
183
184							C performs multi-objective optimization using a genetic algorithm approach: it searches the input parameters (genes) which minimize a set of output functions and with some luck a Pareto front is produced.
185							In the Pareto front no solution is better than the others because each solution is a trade-off.
186
187							=head2 Function to optimize
188
189							This module requires to define a perl subroutine (C in the code above) which can take the input parameters and gives the corresponding outputs (in other words, it requires a subroutine to evaluate an individual of this population)
190
191
192							=head2 Features
193
194							The optimization is done:
195
196							=over 3
197
198							=item * considering allowed B
199
200							=item * considering optional B (x1^2 + sqrt(x2) -x3 >= 0 , ...)
201
202							=item * with a B of the subroutine to optimize (and so of individuals) in each generation, by using perl fork() function
203
204							=back
205
206							The inequalities must be given by a subroutine which calculate the error, look below in the example: basically all the LHS of the inequalities in the form "... >=0" are put in an array.
207
208							The number of B of C, and so the value of C, can be for example the max number of parallel C computation that you want and can:
209
210							=over
211
212							=item * run on your pc if you run the computation locally (e.g. 4)
213
214							=item * run on a remote server if you run (inside the C) the computation there (e.g. 32)
215
216							=back
217
218							C should be multiple of C, to optimize resources use, but it is not necessary.
219
220							Problems with this modules are expected on systems not supporting fork() perl function.
221
222							A B<2D plot> can be activated to control in real time the convergence of the algorithm on two chosen output functions (to assist at the formation of the Pareto front, generation after generation).
223
224							Each time a new generation finish, all information of the population are written in the C directory:
225
226							=over
227
228							=item * F: input (parameters values)
229
230							=item * F: output (corresponding functions values)
231
232							=back
233
234							The algorithm can start by default with a random initial population (satisfying the bounds) or the B can be assigned by assigning it to the C option.
235
236							Assigning the population at the start can be useful for example if:
237
238							=over
239
240							=item * there was an unexpected termination of the program during the optimization, so that one can restart by using the content of one of the last saved F
241
242							=item * there is the interest in continuing the optimization with different parameters
243
244							=item * there is already an idea of some input parameters which could give a good output
245
246							=back
247
248							For an example of use see F.
249
250							=head2 Mutation
251
252							The implementation of the B part has been done in a B.
253
254							In particular B are applied in sequence:
255
256							=over
257
258							=item 1) mutation of all the input parameters (the genes), but only on a certain percentage C of the population:
259
260							-> Small perturbation of the each gene by adding a number chosen randomly from the given C (scaled with both C and the difference between the two bounds).
261
262							=item 2) mutation of all the individuals of the population, but only on a certain percentage C of the input parameters (the genes)
263
264							-> Random change (inside the permitted bounds)
265
266							=back
267
268
269							=head2 Verification
270
271							This module has been tested, successfully, on many of the test problems defined in the paper described in the Reference section (see EXAMPLE section)
272
273							The performance (convergence for same population number and max generation number) seems to be comparable to that described in that paper.
274
275
276
277
278							=head1 EXAMPLE
279
280							B (the same test problems contained in the paper described in the Reference section) are available in the test folder (F containing ZDT1, ZDT2, TNK, ...).
281
282							Here you see the B problem with:
283
284							=over
285
286							=item * two input parameters $x->[0] and $x->[1]
287
288							=item * two output functions to optimize f1 and f2
289
290							=item * two constraining equations between the input parameters
291
292							=item * 8 process in parallel (8 subroutine f_CONTRS are evaluated in parallel as indipendent processes)
293
294							=back
295
296							use Optimization::NSGAII qw/ f_Optim_NSGAII /;
297
298							# function to minimize
299							sub f_CONSTR {
300
301							my $x = shift;
302
303							my $n = scalar(@$x);
304
305							my $f1 = $x->[0];
306							my $f2 = (1 + $x->[1])/$x->[0];
307
308							my $out = [$f1,$f2];
309
310							return $out;
311							}
312
313							# inequality constraints set
314							sub f_inequality {
315							my $x =shift;
316
317							# equation >= 0
318							my @ineq =
319							(
320							$x->[1] + 9*$x->[0] - 6 ,
321							-$x->[1] + 9*$x->[0] -1
322							);
323
324							return \@ineq;
325							}
326
327							my $bounds = [[0.1,1],[0,5]];
328
329							my $ref_input_output = f_Optim_NSGAII(
330							{
331							'nPop' => 50,
332							'nGen' => 100,
333							'bounds' => $bounds,
334							'function' => \&f_CONSTR,
335							'f_ineq' => \&f_inequality,
336							'nProc' => 8,
337							'filesDir' => '/tmp',
338							'verboseFinal' => 1,
339							'distrib' => [-1,-0.9,-0.8,-0.7,-0.6,-0.5,-0.4,-0.3,-0.2,-0.1,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
340							'scaleDistrib' => 0.05,
341							'percentMut' => 5,
342							},
343							{
344							'dx' => 100,
345							'dy' => 40,
346							'xlabel' => 'stiffness [N/mm]',
347							'ylabel' => 'mass [kg]',
348							'xlim' => [0.1,1],
349							'ylim' => [0,10],
350							'nfun' => [0,1],
351							}
352							);
353
354
355
356
357							=head1 OUTPUT PREVIEW
358
359							This below is a typical output of the final Pareto front (problem TNK).
360
361							The numbers represent the rank of the points: in the initial generations you can see points of rank 1,2,3... where points with rank 1 dominate points of rank 2 and so on.
362
363							Generation after generation all the points go on the Pareto front, so all the points become rank 1 (not dominated, nothing best is present in this population)
364
365							The points will also expand to occupy the entire front.
366
367							GENERATION 250
368							m\|
369							a\|
370							s\|
371							s\|
372							\|
373							[\|
374							k\|
375							g\| 1
376							]\| 11
377							\| 11
378							\| 1 1
379							\| 11
380							\| 1
381							\| 11 1 1 1 1
382							\| 1
383							\| 1
384							\|
385							\| 1
386							\| 1 1
387							\| 1111
388							\| 1
389							\|
390							\|
391							\|
392							\|
393							\| 1
394							\| 11
395							\| 1
396							\| 1
397							\|
398							\|______________________________________________________________________
399							stiffness [N/mm]
400
401
402
403
404							=head1 INSTALLATION
405
406							Following the instruction in perlmodinstall:
407
408							=over
409
410							=item * download the F
411
412							=item * decompress and unpack
413
414							=over
415
416							=item * C
417
418							=item * C
419
420							=back
421
422							=item * C
423
424							=item * C
425
426							=item * C
427
428							=item * C
429
430							=item * C
431
432							to install it locally use this instead of C:
433
434							C if you want to install it in /my/folder
435							then you will have to use in your script: C before using C
436
437							=back
438
439
440
441
442
443							=head1 AUTHOR
444
445							Dario Rubino, C<< >>
446
447
448
449
450							=head1 BUGS
451
452							Solutions (input-output pairs) often contain duplicates, this would require some investigation.
453
454							Please report any bugs to C, or through
455							the web interface at L. I will be notified, and then you'll
456							automatically be notified of progress on your bug as I make changes.
457
458
459
460
461							=head1 SUPPORT
462
463							You can find documentation for this module with the perldoc command.
464
465							perldoc Optimization::NSGAII
466
467
468							You can also look for information at:
469
470							=over 4
471
472							=item * RT: CPAN's request tracker (report bugs here)
473
474							L
475
476							=item * CPAN Ratings
477
478							L
479
480							=item * Search CPAN
481
482							L
483
484							=back
485
486							=head1 LICENSE AND COPYRIGHT
487
488							This software is Copyright (c) 2022 by Dario Rubino.
489
490							This is free software, licensed under:
491
492							The Artistic License 2.0 (GPL Compatible)
493
494							=cut
495
496							# A point in the population is defined by:
497							# - input values for the objective functions (input parameters) -> contained in the reference VP
498							# - correspondent output population (objective function values) -> contained in the reference P
499							#
500							# P is the most important set of values used in this package, because the objective functions' values lead the algorithm
501							# VP elements then are postprocessed accordingly so that P and VP maintain always a one to one index correspondence
502							#
503							# VARIABLES
504							#
505							# VP: input values for each population's point
506							# : [[x11,x12,...x1D],...]
507							# P : population (objective values for each point)
508							# : [[fun11,fun12,..fun1M],..]
509
510							# D : number of dimension of the problem (how many input values for a point)
511							# N : size of population P
512
513							# p,q : index of a single solution of the population
514							# : id_sol_ith
515							# Sp: set of solution dominated by p (worse than p) for each point
516							# : [[id_sol_i,id_sol_j,...],..]
517
518							# np: number of solutions which dominate p (better than p), np = zero for first front solutions
519							# : [np1,np2,..]
520							# F: set of solution in front ith
521							# : [[id_sol_k,id_sol_t,...],...]
522							# rank : rank of each solution = front number , rank = 1 for first front solutions
523							# : [rank1,rank2...rankN];
524
525							########################################################################################################
526
527							my @out_print;
528							my $inf = 1e9;
529
530							sub f_ineq2maxerr {
531							# max error calculation in inequalities
532							# input: lhs inequalities ref (errors)
533							# output: max error
534	4358544			4358544	0	4713732	my $ref_ineq = shift;
535
536	4358544					4450764	my @ineq = @{$ref_ineq};
	4358544					5440192
537
538	4358544					4845023	my $err = 0;
539	4358544					4964812	foreach my $el(@ineq){
540	4358544					6576993	$err = max( $err, -$el);
541							}
542	4358544					5771139	return $err;
543							}
544
545
546							sub f_fast_non_dominated_sort {
547							# reference to population
548	121			121	0	324	my $P = shift;
549	121					161	my $VP = shift;
550	121					259	my $f_ineq = shift;
551
552
553	121					512	my $Sp;
554							my $np;
555	121					0	my $F;
556	121					0	my $rank;
557
558	121					187	my $N = scalar(@{$P});
	121					303
559
560	121					1008	foreach my $p(0..$N-1){
561	12100					20054	$np->[$p] = 0;
562	12100					18876	foreach my $q(0..$N-1){
563	1210000	100				1661107	next if $p == $q; # max error <-- inequalities <-- input pars
564	1197900	100				1434569	if ( f_dominates($P->[$p],$P->[$q], f_ineq2maxerr( &{$f_ineq}($VP->[$p]) ), f_ineq2maxerr( &{$f_ineq}($VP->[$q])) ) ){
	1197900	100				1472821
	1197900					1449047
565	216528					226588	push @{$Sp->[$p]}, $q;
	216528					428706
566							}
567	981372					1190581	elsif ( f_dominates($P->[$q],$P->[$p], f_ineq2maxerr( &{$f_ineq}($VP->[$q]) ), f_ineq2maxerr( &{$f_ineq}($VP->[$p])) ) ){
	981372					1209859
568	211582					391030	$np->[$p] += 1;
569							}
570							}
571	12100	100				23460	if ($np->[$p] == 0){
572	2194					4653	$rank->[$p]=1; # front number, 1 = best
573	2194					2633	push @{$F->[0]}, $p;
	2194					5839
574							}
575							}
576
577							# all other fronts
578	121					247	my $i = 0;
579
580	121					862	while (scalar(@{$F->[$i]})){
	1650					3614
581	1529					1941	my @Q;# members of next front
582
583	1529					1894	foreach my $p(@{$F->[$i]}){
	1529					2358
584	12100					12345	foreach my $q(@{$Sp->[$p]}){
	12100					19559
585	216528					222987	$np->[$q] -=1;
586	216528	100				313694	if ($np->[$q] == 0){
587	9906					11510	$rank->[$q] = $i + 1 + 1;
588	9906					13275	push @Q, $q;
589							}
590							}
591							}
592	1529					1856	$i++;
593	1529					4400	$F->[$i] = [@Q];
594
595							}
596
597	121					400	for my $p(0..$N-1){
598	12100					14374	push @out_print, join(' ',($rank->[$p],@{$P->[$p]}));
	12100					29438
599							}
600
601	121					6135	return ($rank,$F); # rank for each point, points in each front
602
603							}
604
605							########################################################################################################
606
607							sub f_dominates {
608							# input: two elements of P
609							# output: 1 if P1 dominate P2, 0 otherwise
610
611	2179272			2179272	0	2297421	my $P1 = shift;
612	2179272					2234004	my $P2 = shift;
613
614							# constraints errors
615	2179272					2206641	my $err1 = shift;
616	2179272					2290209	my $err2 = shift;
617
618
619							# number of objective (dimensions)
620	2179272					2212620	my $M = scalar(@{$P1});
	2179272					2333742
621
622	2179272					2296951	my $P1_dominate_P2_count = 0;
623
624	2179272					2947255	for my $kM(0..$M-1){
625	4358544	100				6691154	if ($P1->[$kM] <= $P2->[$kM]){
626	2395800					2778003	$P1_dominate_P2_count++;
627							}
628							}
629
630	2179272					2207675	my $P1_dominate_P2_p;
631							# if 1 has lower constraints errors, then 1 dominate 2, else if the error is the same (e.g 0) then the dominance is decided on objective value
632							# else simply 1 doesn't dominate 2
633	2179272	100	66			5778032	if (
			66
634							$err1 < $err2
635							\|\|
636							($err1 == $err2 && $P1_dominate_P2_count == $M)
637							){
638	428110					462449	$P1_dominate_P2_p = 1;
639							}
640	1751162					1938278	else { $P1_dominate_P2_p = 0}
641
642	2179272					4049362	return $P1_dominate_P2_p;
643
644							}
645
646							########################################################################################################
647
648							sub f_crowding_distance_assignment{
649							# input: ref of array of a subset of population P, population P
650							# output: distance value for each point of this set
651
652							# global ids of the set of non dominated points of interest
653	569			569	0	789	my $ids = shift;
654							# objectives of all points
655	569					1245	my $P = shift;
656
657							# build the set of objectives for these global ids (I is a subset of P)
658	569					1562	my $I = [@{$P}[@{$ids}]];
	569					1915
	569					751
659
660							# initialize distance
661	569					738	my $Dist;
662
663							# number of objective (dimensions)
664	569					619	my $M = scalar(@{$P->[0]});
	569					1099
665							# number of points in I
666	569					714	my $n = scalar(@{$ids});
	569					734
667
668							# for each objective
669	569					1656	for my $kM(0..$M-1){
670
671							# local id of the points, sorted by objective
672	1138					4806	my @index_sort = sort{$I->[$a][$kM] <=> $I->[$b][$kM]} 0..($n-1);
	38695					45843
673
674							# min & max for this objective
675	1138					2088	my $fmin = $I->[$index_sort[0]][$kM];
676	1138					1498	my $fmax = $I->[$index_sort[-1]][$kM];
677
678	1138	50				2166	if ($fmax-$fmin<0.00001){$fmax += 0.00001}
	0					0
679
680							# build the distance
681	1138					1632	$Dist->[$index_sort[0]] = $inf;
682	1138					1500	$Dist->[$index_sort[-1]] = $inf;
683							# and for all other intermediate point
684	1138					2280	for my $i(1..($n-2)){
685	11836					20088	$Dist->[$index_sort[$i]] += ($I->[$index_sort[$i+1]][$kM] - $I->[$index_sort[$i-1]][$kM])/($fmax-$fmin);
686							}
687
688							}
689							# return distances for each point, $Dist->[ith] is distance for point $ids->[ith]
690	569					1122	return $Dist;
691							}
692
693							########################################################################################################
694
695							sub f_crowding_distance_operator {
696							# input: rank and distance of two element of the population (selected by GA)
697							# output: 1 if first dominate second, else 0
698
699	12100			12100	0	13722	my $rank = shift;
700	12100					12458	my $Dist = shift;
701
702	12100					12206	my $P1_dominate_P2_p;
703
704	12100	100	100			30846	if (
			100
705							$rank->[0] < $rank->[1]
706							\|\|
707							($rank->[0] == $rank->[1]) && ($Dist->[0] > $Dist->[1])
708							){
709	6048					7152	$P1_dominate_P2_p = 1
710							}
711	6052					6868	else {$P1_dominate_P2_p = 0}
712	12100					14900	return $P1_dominate_P2_p;
713							}
714
715							########################################################################################################
716
717							sub f_NSGAII {
718							# input: current function input values VP(t) & VQ(t) ( VQ is obtained by GA)
719							# current population Pt & Qt (obtained by evaluating objective function using VPt & VQt)
720							# output: VP(t+1), P(t+1), rank & distance for each point of this new population
721
722							# input variables' values
723	121			121	0	598	my $VPt = shift;
724	121					644	my $VQt = shift;
725
726							# population (objective function values)
727	121					287	my $Pt = shift;
728	121					202	my $Qt = shift;
729
730							# constraints function
731	121					302	my $f_ineq = shift;
732
733	121					10406	my $Rt = [(@$Pt,@$Qt)];
734	121					14838	my $VRt = [(@$VPt,@$VQt)];
735	121					305	my $N = scalar(@$Pt);
736
737	121					1247	my ($temp,$F) = f_fast_non_dominated_sort($Rt,$VRt,$f_ineq);
738
739							# input variables for the new population P_t+1
740	121					383	my $VPtp1=[];
741							# new population
742	121					221	my $Ptp1=[];
743	121					251	my $Dist=[];
744	121					199	my $rank=[];
745
746	121					251	my $i=0;
747							# push the best fronts in final population & store crowding distance
748	121					305	while ( scalar(@{$Ptp1}) + scalar(@{$F->[$i]}) <= $N ){
	569					806
	569					1250
749	448					666	push @$Ptp1, @$Rt[@{$F->[$i]}];
	448					1619
750	448					595	push @$VPtp1,@$VRt[@{$F->[$i]}];
	448					1621
751	448					951	my $Dist_ = f_crowding_distance_assignment($F->[$i],$Rt);
752	448					1337	push @$rank, ($i+1) x @$Dist_;
753	448					1177	push @$Dist, @$Dist_;
754	448					656	$i++;
755							}
756
757							# only part of the following front will be pushed in final population, sorting by crowding
758							# here rank is the same for all points, so the crowded-comparison operators reduce to a comparison of crowding distance
759	121					352	my $Dist_ = f_crowding_distance_assignment($F->[$i],$Rt);
760
761	121					180	my $nf = scalar(@{$F->[$i]});
	121					240
762
763	121					348	my @index_sort = sort{$Dist_->[$b] <=> $Dist_->[$a]} 0..($nf-1);
	6817					9159
764
765							# cut to fill only the remaining part of Ptp1
766	121					532	@index_sort = @index_sort[0..(($N-scalar(@$Ptp1))-1)];
767
768	121					298	push @$Ptp1, @$Rt[@{$F->[$i]}[@index_sort]];
	121					516
769	121					555	push @$VPtp1, @$VRt[@{$F->[$i]}[@index_sort]];
	121					580
770	121					884	push @$rank, ($i+1) x @index_sort;
771	121					976	push @$Dist, @$Dist_[@index_sort];
772
773
774	121					2813	return $VPtp1,$Ptp1,$rank,$Dist;
775
776							}
777
778							########################################################################################################
779
780							sub f_SBX {
781	6050			6050	0	6389	my $contiguity = shift; # >0 (usually between 2 and 5), greater value produces child values similar to those of the parents
782	6050					6463	my $VP1_ = shift; # input values of parent 1 (ref)
783	6050					6167	my $VP2_ = shift; # input values of parent 2 (ref)
784
785							# array of equal length
786	6050					8705	my @VP1 = @$VP1_;
787	6050					8254	my @VP2 = @$VP2_;
788	6050					6552	my @out;
789
790	6050					9203	for my $kp(0..$#VP1){
791							# for array of length=1 or rand<0.5 the cross is made
792	18150	100	66			39618	if ( $#VP1 == 0 \|\| rand(1)<0.5){
793
794	9422					10869	my $u = rand(1);
795
796	9422					12232	my $exponent = (1/($contiguity+1));
797	9422	100				18117	my $beta = ($u < 0.5)? (2$u)$exponent : (0.5/(1-$u))*$exponent;
798
799	9422	100				12308	my $sign = (rand(1) < 0.5)? -1 : 1;
800	9422					20236	$out[$kp] = (($VP1[$kp] + $VP2[$kp])/2) + $sign * $beta0.5abs($VP1[$kp] - $VP2[$kp])
801
802							}
803							else {
804	8728					12391	$out[$kp] = $VP1[$kp]
805							}
806							}
807	6050					10173	return \@out;
808							}
809
810							########################################################################################################
811
812							sub f_GA {
813							# input: input values, rank and Dist of the population P(t+1)
814							# output: input value of the population Q(t+1)
815
816	121			121	0	285	my $VPtp1 = shift;
817	121					202	my $rank = shift;
818	121					215	my $Dist = shift;
819	121					179	my $contiguity = shift;
820
821	121					633	my $bounds = shift; # for mutation
822
823							# optional paramters for mutation:
824	121					333	my $distrib = shift;
825	121					170	my $scaleDistrib = shift;
826	121					238	my $percentMut =shift;
827
828	121					251	my $VQtp1 = [];
829
830							# binary tournament
831							# two random element are compared
832							# the best according crowding distance operator is selected as parent 1
833							# the same for selecting parent 2
834							# parent 1 and 2 are crossed with SBX to give a child
835							# the procedure is repeated to fill Q(t+1)
836
837	121					279	my $N = scalar(@$VPtp1);
838
839	121					789	for my $kt(0..$N-1){
840
841							# selection of the two parent
842	6050					6424	my @index_parent;
843	6050					7662	for (1..2){
844	12100					18051	my ($r1,$r2) = (int(rand($N)),int(rand($N)));
845	12100					23805	my $P1_dominate_P2_p = f_crowding_distance_operator( [$rank->[$r1],$rank->[$r2]] , [$Dist->[$r1],$Dist->[$r2]] );
846
847	12100	100				19765	if ($P1_dominate_P2_p == 1){push @index_parent, $r1}
	6048					8683
848	6052					8553	else {push @index_parent, $r2}
849							}
850							# crossover of the two parent
851	6050					9701	my $out = f_SBX( $contiguity , $VPtp1->[$index_parent[0]] , $VPtp1->[$index_parent[1]] );
852	6050					10232	push @$VQtp1, $out;
853							}
854
855							# mutation 1
856							# perturbation using distribution array
857	121					419	for my $k(0..(scalar(@$VQtp1) - 1)){
858							# $percentMut percentage of individuals are changed in all their elements (all input parameter will be perturbated)
859	6050	100				9330	if (rand(1)> 1-$percentMut/100){
860	120					536	for my $d(0..(scalar(@{$VQtp1->[0]}) - 1)){
	120					730
861							# increment the current value by a random value chosen from the distribution (many time it will be near to zero for a gaussian distribution) scaled with the delta_bounds
862	360					1715	$VQtp1->[$k][$d] += ($bounds->[$d][1] - $bounds->[$d][0]) * $scaleDistrib * $distrib->[int(rand(scalar(@$distrib)))];
863							}
864							}
865							}
866
867							# mutation 2
868							# 100% probability of mutation inside VQ(t+1), so the intention is to act on all individual of the population
869	121					499	for my $k(0..int((scalar(@$VQtp1) - 1) * 100/100)){
870	6050					6480	for my $d(0..(scalar(@{$VQtp1->[0]}) - 1)){
	6050					7883
871							# $percentMut percentage of values are changed inside each single individual, value is inside bounds
872	18150	100				28654	if (rand(1)> 1-$percentMut/100){
873	978					1993	$VQtp1->[$k][$d] = rand(1) * ($bounds->[$d][1] - $bounds->[$d][0]) + $bounds->[$d][0];
874							}
875							}
876							}
877
878	121					405	return $VQtp1;
879
880							}
881
882
883
884							sub f_Optim_NSGAII {
885
886	23			23	1	7268	my $par = shift;
887
888	23					69	my $par_plot = shift;
889
890
891
892	23					46	my %par = %{$par};
	23					115
893
894	23					69	my $nPop = $par{'nPop'};
895	23					46	my $nGen = $par{'nGen'};
896
897	23					46	my $bounds = $par{'bounds'};
898	23					46	my $n = scalar @$bounds;
899
900	23					46	my $fun = $par{'function'};
901
902							# optional paramters:
903
904	23		50	4358544		253	my $f_ineq = $par{'f_ineq'} // sub {return [0]};
	4358544					6239434
905
906	23		50			92	my $verboseFinal = $par{'verboseFinal'} // 1;
907
908	23		50			69	my $distrib = $par{'distrib'} // [-1,-0.5,0,0.5,1]; # for mutation, default [-1,-0.5,0,0.5,1]
909	23		50			69	my $scaleDistrib = $par{'scaleDistrib'} // 0; # for mutation, default = 0, no perturbation
910	23		50			92	my $percentMut = $par{'percentMut'} // 5; # for mutation, default = 5%
911
912	23		50			322	my $startPop = $par{'startPop'} // 'nostartPop';
913
914							# control for no wrong use of keys
915	23					161	my @keys_ok = ('nPop','nGen','bounds','function','nProc','filesDir','verboseFinal','f_ineq','distrib','scaleDistrib','percentMut','startPop');
916	23					299	for my $key(keys %par){
917	207	50				2852	unless ( grep( /^$key$/, @keys_ok ) ) {
918	0					0	die 'E R R O R : the use of "'.$key.'" in the function to optimize is not defined! Compare with documentation.';
919							}
920							}
921
922	23					69	my $VPt;
923							my $VQt;
924
925
926	23					69	for my $gen(0..$nGen){
927	143	100				631	if ($gen == 0){
928							# input
929	23					92	for (0..$nPop-1){
930	1150	50				1679	if ($startPop eq 'nostartPop'){
931	1150					1334	$VPt->[$_]=[map { rand(1) * ($bounds->[$_-1][1] - $bounds->[$_-1][0]) + $bounds->[$_-1][0] } 1..$n];
	3450					7360
932							}
933							else {
934	0					0	$VPt = $startPop;
935							}
936	1150					1794	$VQt->[$_]=[map { rand(1) * ($bounds->[$_-1][1] - $bounds->[$_-1][0]) + $bounds->[$_-1][0] } 1..$n];
	3450					6578
937							}
938							}
939
940
941
942
943
944
945
946
947	143					478	my $nProc = $par{'nProc'};
948	143					301	my $filesDir = $par{'filesDir'};
949
950							# if ($nPop%$nProc != 0){warn "\n nPop should be divisible by nProc!\n\n"};
951
952	143					292	my $fork = 0;
953
954	143					611	for (1..$nProc){
955
956	275					265986	my $pid = fork;
957	275	50				7736	die $! unless defined $pid;
958	275					777	$fork++;
959
960							# say "in PID = $$ with child PID = $pid fork# = $fork";
961							# parent process stop here, child processes go on
962	275	100				5442	next if $pid;
963
964	22					1805	my $r = 0;
965
966	22					1308	my $nameFileP = $filesDir.'/Pt_'.$fork.'.txt';
967	22					691	my $nameFileQ = $filesDir.'/Qt_'.$fork.'.txt';
968
969							# remove existing input file for this process number
970	22					72689	system ('rm -f '.$nameFileP);
971	22					67413	system ('rm -f '.$nameFileQ);
972
973
974	22					1235	my $id_from = ($fork-1)*int($nPop/$nProc);
975	22					316	my $id_to = $fork *int($nPop/$nProc)-1;
976	22	100				1002	if ($fork == $nProc){$id_to = $nPop - 1};
	11					124
977
978							# output
979	22	50				5126	open my $fileoP, '>>', $nameFileP or croak "E R R O R : problem in writing the file ".$nameFileP.' -> "filesDir" path not reachable? -- ';
980	22	50				2400	open my $fileoQ, '>>', $nameFileQ or croak "E R R O R : problem in writing the file ".$nameFileQ.' -> "filesDir" path not reachable? -- ';
981	22					487	for ($id_from .. $id_to){
982	550					1127	my $Pt_ = &{$fun}($VPt->[$_],$_);
	550					2770
983	550					14028	my $Qt_ = &{$fun}($VQt->[$_],$_);
	550					2064
984	550					11388	say $fileoP join ',',@{$Pt_};
	550					10897
985	550					793	say $fileoQ join ',',@{$Qt_};
	550					2481
986							}
987	22					1218	close $fileoP;
988	22					771	close $fileoQ;
989	22					23636	exit;
990							}
991
992							# wait for the processes to finish
993	121					1595	my $kid;
994	121					514	do {
995	363					43982526	$kid = waitpid -1, 0;
996							} while ($kid>0);
997
998	121					680	my $Pt;
999							my $Qt;
1000
1001							# collect data together
1002	121					1663	for (1..$nProc){
1003
1004	242					4187	my $nameFileP = $filesDir.'/Pt_'.$_.'.txt';
1005	242					1645	my $nameFileQ = $filesDir.'/Qt_'.$_.'.txt';
1006	242	50				17655	open my $fileiP, '<', $nameFileP or croak "E R R O R : problem in reading the file ".$nameFileP.' -> "filesDir" path not reachable? ';
1007	242	50				10384	open my $fileiQ, '<', $nameFileQ or croak "E R R O R : problem in reading the file ".$nameFileQ.' -> "filesDir" path not reachable? ';
1008
1009	242					28407	while (my $line = <$fileiP>){
1010	6050					8252	chomp $line;
1011	6050					14474	my @vals = split ',', $line;
1012	6050					19106	push @$Pt, \@vals;
1013							}
1014	242					4852	while (my $line = <$fileiQ>){
1015	6050					9060	chomp $line;
1016	6050					12341	my @vals = split ',', $line;
1017	6050					18344	push @$Qt, \@vals;
1018							}
1019	242					2631	close $fileiP;
1020	242					3080	close $fileiQ;
1021							}
1022
1023
1024
1025							# new input
1026	121					2033	my ($VPtp1,$Ptp1,$rank,$Dist)=f_NSGAII($VPt,$VQt,$Pt,$Qt,$f_ineq);
1027
1028							# save inputs and corresponding outputs at this generation
1029	121					348	my $to_print;
1030							my $FILEO;
1031							# inputs (genes for each individual)
1032	121	50				19407	open $FILEO, '>', $filesDir.'/VPt_gen'.sprintf('%05d',$gen).'.txt' or croak 'E R R O R : problem in writing the generation file -> "filesDir" path not reachable? ';
1033	121					1440	$to_print = f_print_columns($VPt,'%25.15f');
1034	121					4784	print $FILEO (join "\n", @$to_print);
1035	121					6342	close $FILEO;
1036							# outputs (f1, f2 ... for each individual)
1037	121	50				8480	open $FILEO, '>', $filesDir.'/Pt_gen'.sprintf('%05d',$gen).'.txt' or croak 'E R R O R : problem in writing the generation file -> "filesDir" path not reachable? ';
1038	121					613	$to_print = f_print_columns($Pt,'%25.15f');
1039	121					2105	print $FILEO (join "\n", @$to_print);
1040	121					3574	close $FILEO;
1041
1042
1043							#print " example input values: " ;
1044							#say join ' ', @{$VPtp1->[0]};
1045
1046	121	50				582	if (defined $par_plot){
1047	0					0	f_plot($par_plot,$gen,$Ptp1,$rank);
1048	0					0	say '';
1049
1050	0					0	my $max = -$inf;
1051	0					0	my $min = $inf;
1052
1053	0					0	for (0..$nPop-1){
1054	0					0	$max = max(@{$Pt->[$_]},@{$Qt->[$_]},$max);
	0					0
	0					0
1055	0					0	$min = min(@{$Pt->[$_]},@{$Qt->[$_]},$min);
	0					0
	0					0
1056							}
1057	0					0	say 'max output = '.$max;
1058	0					0	say 'min output = '.$min;
1059
1060
1061	0					0	my $maxV = -$inf;
1062	0					0	my $minV= $inf;
1063
1064	0					0	my $minD = min(@$Dist);
1065
1066
1067	0					0	for (0..$nPop-1){
1068
1069	0					0	$maxV = max(@{$VPt->[$_]},@{$VQt->[$_]},$maxV);
	0					0
	0					0
1070	0					0	$minV = min(@{$VPt->[$_]},@{$VQt->[$_]},$minV);
	0					0
	0					0
1071
1072							}
1073	0					0	say 'max input = '.$maxV;
1074	0					0	say 'min input = '.$minV;
1075
1076	0					0	say 'min Dist = '.$minD;
1077							}
1078
1079
1080	121					741	my ($VQtp1) = f_GA($VPtp1,$rank,$Dist,2.0,$bounds,$distrib,$scaleDistrib,$percentMut);
1081
1082							# correction of input values produced by GA to respect bounds
1083	121					294	for my $p(0..$nPop-1){
1084	6050					7682	for my $d(0..$n-1){
1085	18150	100				26604	if($VQtp1->[$p][$d] < $bounds->[$d][0]){$VQtp1->[$p][$d] = $bounds->[$d][0]};
	19					190
1086	18150	100				26699	if($VQtp1->[$p][$d] > $bounds->[$d][1]){$VQtp1->[$p][$d] = $bounds->[$d][1]};
	57					95
1087							}
1088							}
1089
1090							# new output
1091	121					235	my $Qtp1;
1092	121					277	for (0..$nPop-1){
1093	6050					87659	$Qtp1->[$_]=&{$fun}($VQtp1->[$_],$_);
	6050					9945
1094							}
1095
1096							# new became old
1097	121					6351	$VPt = [@$VPtp1];
1098	121					5436	$VQt = [@$VQtp1];
1099
1100
1101							# output final
1102	121	100				6209	if($gen == $nGen){
1103	1	50				16	if ($verboseFinal == 1){
1104	0					0	say '------------------------- I N P U T -------------------------:';
1105	0					0	map {say join ' ',@{$_}} @$VPt;
	0					0
	0					0
1106	0					0	say '------------------------- O U T P U T -------------------------';
1107	0					0	map {say join ' ',@{$_}} @$Pt;
	0					0
	0					0
1108							}
1109
1110	1					98	return [$VPt,$Pt];
1111
1112							}
1113
1114							}
1115
1116							}
1117
1118
1119
1120							sub f_print_columns {
1121							# to print in columns format the content of $P, $Ptp1 ...
1122	242			242	0	625	my $P = shift;
1123	242					1383	my $formato = shift; # e.g. '%25.15f'
1124
1125							# how many input parameters (genes)
1126	242					523	my $n = scalar @{$P->[0]};
	242					690
1127							# population number
1128	242					467	my $nPop = scalar @$P;
1129
1130							# print, each line contains the genes of an individual
1131	242					490	my @to_print;
1132	242					1552	for my $kl(0..($nPop-1)){
1133	12100					12877	my $line;
1134	12100					16421	for my $kx(0..($n-1)){
1135	30250					88978	$line.= sprintf($formato,$P->[$kl][$kx]) . ' ';
1136							}
1137	12100					20742	push @to_print, $line;
1138							}
1139
1140	242					976	return \@to_print;
1141							}
1142
1143							########################################################################################################
1144
1145							sub f_plot {
1146	0			0	0		my $par_ref = shift;
1147
1148	0						my $gen = shift;
1149
1150	0						my $P = shift;
1151	0						my $rank = shift;
1152
1153
1154
1155
1156	0						my %par = %{$par_ref};
	0
1157
1158							# nfun: what objective to plot from all (x=f1 y=f4 -> [0,3])
1159	0						my $nfun = $par{'nfun'};
1160
1161
1162	0						my $title = 'GENERATION '.$gen;
1163
1164							# inizialization of all lines of output
1165	0						my @output = map{ ' ' x $par{'dx'} } 1..$par{'dy'};
	0
1166
1167	0						my @nameFront =(0..9,"a".."z","A".."Z");
1168
1169							# print points of the front
1170	0						my $xASCIImin = 0;
1171	0						my $xASCIImax = $par{'dx'}-1;
1172	0						my $yASCIImin = $par{'dy'}-1;
1173	0						my $yASCIImax = 0;
1174	0						my $n_outliers = 0;
1175
1176	0						for my $kp(1..scalar(@{$P})-1){
	0
1177							# conversion x,y -> xASCII,yASCII
1178	0						my $x = $P->[$kp][$nfun->[0]];
1179	0						my $y = $P->[$kp][$nfun->[1]];
1180	0						my $xmin = $par{'xlim'}->[0];
1181	0						my $xmax = $par{'xlim'}->[1];
1182	0						my $ymin = $par{'ylim'}->[0];
1183	0						my $ymax = $par{'ylim'}->[1];
1184
1185	0						my $xASCII;
1186							my $yASCII;
1187
1188
1189	0	0	0				if ($x < $xmax && $x > $xmin && $y < $ymax && $y > $ymin && $rank->[$kp] < $#nameFront){
			0
			0
			0
1190	0						$xASCII = f_interp($xmin,$xmax,$xASCIImin,$xASCIImax,$x);
1191	0						$yASCII = f_interp($ymin,$ymax,$yASCIImin,$yASCIImax,$y);
1192
1193	0						substr($output[$yASCII],$xASCII,1,$nameFront[$rank->[$kp]]);
1194
1195							}
1196	0						else {$n_outliers++};
1197							}
1198
1199							# add axis
1200	0						push @output, '_' x $par{'dx'};
1201	0						@output = map{'\|'.$_} @output;
	0
1202							# add axis labels
1203	0						push @output, sprintf("%$par{'dx'}s",$par{'xlabel'});
1204	0						my @ylabelv = split '',$par{'ylabel'};
1205	0	0					@output = map{ defined $ylabelv[$_] ? $ylabelv[$_].$output[$_] : ' '.$output[$_]} 0..$#output;
	0
1206							# add statistics
1207	0						unshift @output, sprintf("%".int(($par{'dx'}+length($title))/2)."s",$title);
1208	0						push @output, "xlim: ".(join ' ',@{$par{'xlim'}});
	0
1209	0						push @output, "ylim: ".(join ' ',@{$par{'ylim'}});
	0
1210	0						push @output, "#outliers: ".$n_outliers;
1211
1212	0						print join "\n", @output;
1213
1214							}
1215
1216
1217							sub f_interp{
1218	0			0	0		my ($xmin,$xmax,$xASCIImin,$xASCIImax,$x) = @_;
1219	0						my $xASCII = ($xASCIImax-$xASCIImin)/($xmax-$xmin)*($x-$xmin)+$xASCIImin;
1220	0						$xASCII = sprintf("%.0f",$xASCII);
1221	0						return $xASCII
1222							}
1223
1224
1225
1226
1227
1228
1229							1;