File Coverage

blib/lib/Optimization/NSGAII.pm

Criterion	Covered	Total	%
statement	314	398	78.8
branch	50	66	75.7
condition	17	37	45.9
subroutine	19	21	90.4
pod	1	12	8.3
total	401	534	75.0

line	stmt	bran	cond	sub	pod	time	code
1							package Optimization::NSGAII;
2
3	12			12		802608	use 5.006;
	12					48
4	12			12		48	use warnings;
	12					12
	12					228
5	12			12		48	use strict;
	12					24
	12					372
6
7	12			12		60	use feature 'say';
	12					24
	12					1248
8	12			12		60	use Exporter;
	12					24
	12					540
9							our @ISA = ("Exporter");
10	12			12		5520	use Data::Dumper;
	12					60216
	12					576
11	12			12		60	use List::Util qw/min max/;
	12					24
	12					1056
12	12			12		60	use Carp;
	12					12
	12					42444
13
14							=pod
15
16							=head1 NAME
17
18							Optimization::NSGAII - non dominant sorting genetic algorithm for multi-objective optimization
19
20							=head1 VERSION
21
22							Version 0.01
23
24							=cut
25
26							our $VERSION = "0.01";
27
28							=head1 SYNOPSIS
29
30							use Optimization::NSGAII qw/ f_Optim_NSGAII /;
31							use Data::Dumper;
32
33							# D E F I N E O B J E C T I V E S T O O P T I M I Z E
34							###########################################################
35
36							sub f_to_optimize {
37
38							my $x = shift; # load input parameters (genes constituting a single individual)
39
40							# ... # do your things using these inputs $x->[0], $x->[1] ...
41							# and produce the outputs to be minimized $f1, $f2 ...
42
43							# examples of things you can do include:
44							# - mathematical formulas in perl to define $f1, $f2, ...
45							# - computations with commercial software and so:
46							# - write input file using $x->[0] ...
47							# - run the computation, for example with perl system() function
48							# - locally or
49							# - on a server for example with 'qsub... '
50							# - wait simulation to end
51							# - postprocess its output and define $f1, $f2 ...
52							# - ...
53
54							my $out = [$f1,$f2,$f3]; # and finally return the set of these outputs for
55							return $out; # this single individual of the population
56
57							}
58
59							# D E F I N E B O U N D S [ A N D I N E Q U A L I T I E S ]
60							###################################################################
61
62							# define min and max bounds for $x->[0], $x->[1], ...
63							my $bounds = [[0,1],[0,1],[0,1]]; # example with 3 input parameters (genes) with min = 0 and max = 1:
64
65							sub f_inequality { # optional inequality constraints set
66
67							my $x =shift;
68
69							my @ineq =
70							(
71							$x->[1] + 1 , # equations >= 0
72							$x->[0] + $x->[1] - 9,
73							...
74							...
75
76							);
77
78							return \@ineq;
79							}
80
81							# R U N O P T I M I Z A T I O N
82							#################################
83
84							# execute NSGA-II algorithm
85
86							my $ref_input_output = f_Optim_NSGAII(
87							{
88
89							'nPop' => 50, # population size
90							'nGen' => 250, # final generation number
91							'bounds' => $bounds, # loads the previously defined bounds
92							'function' => \&f_to_optimize, # loads the subroutine to optimize (minimize)
93							'nProc' => 8, # how many individuals to evaluate in parallel as separate processes
94							'filesDir' => '/tmp', # work directory
95
96
97							# optional parameters:
98
99							'verboseFinal' => 1, # 0\|1: input and output values print at final generation, for each individual of the population
100							# default: print is made ( = 1)
101							'f_ineq' => \&f_inequality, # subroutine describing the constraining inequality set
102							# default: no constraint function
103
104							# parameters for mutation
105
106							'distrib' => [-1,0,1], # distribution of values (for example a Gaussian distribution), used to perturb individuals
107							# default: [-1,-0.5,0,0.5,1]
108							'scaleDistrib' => 0.05, # scaling of the distribution array
109							# default: 0 (no perturbation will be done)
110							'percentMut' => 5, # percentage of individual that are randomly perturbated (in all their genes)
111							# and also percentage of input parameters (genes) that are randomly mutated in each individual
112							# default: 5%
113							},
114
115							# the following is the optional set of parameters for 'Pareto front' 2D live plot
116							# if the part below is not present, no plot will be made
117
118							{
119
120							'dx' => 200, # characters width and height of the plot
121							'dy' => 40,
122							'xlabel' => 'stiffness [N/mm]', # horizontal and vertical axis labels
123							'ylabel' => 'mass [kg]',
124							'xlim' => [0,1], # horizontal and vertical axis limits
125							'ylim' => [0,1],
126							'nfun' => [0,2], # which function to plot from return value by f_to_optimize ($out) ; 0=f1, 1=f2 ...
127							}
128							);
129
130							# U S E S O L U T I O N S
131							############################
132
133							# for example print of the input parameters and
134							# corresponding output functions' values of the final found Pareto front
135
136							my @ref_input = @{$ref_input_output->[0]};
137							my @ref_output = @{$ref_input_output->[1]};
138
139							print Dumper(\@ref_input);
140							print Dumper(\@ref_output);
141
142
143
144
145							=head1 EXPORT
146
147							=over
148
149							=item * f_Optim_NSGAII
150
151							=back
152
153							=cut
154
155							our @EXPORT_OK = qw/ f_Optim_NSGAII /;
156
157							=head1 DESCRIPTION
158
159
160							=head2 Reference
161
162							NSGAII.pm apply (with some variations) the NSGA-II algorithm described in the paper:
163
164							A Fast and Elitist Multiobjective Genetic Algorithm:NSGA-II
165
166							=over
167
168							Kalyanmoy Deb, Associate Member, IEEE, Amrit Pratap, Sameer Agarwal, and T. Meyarivan
169
170							=back
171
172
173							=head2 Objective
174
175							C performs multi-objective optimization using a genetic algorithm approach: it searches the input parameters (genes) which minimize a set of output functions and with some luck a Pareto front is produced.
176							In the Pareto front no solution is better than the others because each solution is a trade-off.
177
178							=head2 Function to optimize
179
180							This module requires to define a perl subroutine (C in the code above) which can take the input parameters and gives the corresponding outputs (in other words, it requires a subroutine to evaluate an individual of this population)
181
182
183							=head2 Features
184
185							The optimization is done:
186
187							=over 3
188
189							=item * considering allowed B
190
191							=item * considering optional B (x1^2 + sqrt(x2) -x3 >= 0 , ...)
192
193							=item * with a B of the subroutine to optimize (and so of individuals) in each generation, by using perl fork() function
194
195							=back
196
197							The inequalities must be given by a subroutine which calculate the error, look below in the example: basically all the LHS of the inequalities in the form "... >=0" are put in an array.
198
199							The number of B of C, and so the value of C, can be for example the max number of parallel C computation that you want and can:
200
201							=over
202
203							=item * run on your pc if you run the computation locally (e.g. 4)
204
205							=item * run on a remote server if you run (inside the C) the computation there (e.g. 256)
206
207							=back
208
209							C should be multiple of C, to optimize resources use, but it is not necessary.
210
211							Problems with this modules are expected on systems not supporting fork() perl function.
212
213							A B<2D plot> can be activated to control in real time the convergence of the algorithm on two chosen output functions (to assist at the formation of the Pareto front, generation after generation).
214
215							Each time a new generation finish, all information of the population are written in the C directory:
216
217							=over
218
219							=item * VPt_genXXXXX.txt: input (parameters values)
220
221							=item * Pt_genXXXXX.txt: output (corresponding functions values)
222
223							=back
224
225
226
227							=head2 Mutation
228
229							The implementation of the B part has been done in a B.
230
231							In particular B are applied in sequence:
232
233							=over
234
235							=item 1) mutation of all the input parameters (the genes), but only on a certain percentage C of the population:
236
237							-> Small perturbation of the each gene by adding a number chosen randomly from the given C (scaled with both C and the difference between the two bounds).
238
239							=item 2) mutation of all the individuals of the population, but only on a certain percentage C of the input parameters (the genes)
240
241							-> Random change (inside the permitted bounds)
242
243							=back
244
245
246							=head2 Verification
247
248							This module has been tested, successfully, on many of the test problems defined in the paper described in the Reference section (see EXAMPLE section)
249
250							The performance (convergence for same population number and max generation number) seems to be comparable to that described in that paper.
251
252
253
254
255							=head1 EXAMPLE
256
257							B (the same test problems contained in the paper described in the Reference section) are available in the test folder (F containing ZDT1, ZDT2, TNK, ...).
258
259							Here you see the B problem with:
260
261							=over
262
263							=item * two input parameters $x->[0] and $x->[1]
264
265							=item * two output functions to optimize f1 and f2
266
267							=item * two constraining equations between the input parameters
268
269							=item * 8 process in parallel (8 subroutine f_CONTRS are evaluated in parallel as indipendent processes)
270
271							=back
272
273							use Optimization::NSGAII qw/ f_Optim_NSGAII /;
274
275							# function to minimize
276							sub f_CONSTR {
277
278							my $x = shift;
279
280							my $n = scalar(@$x);
281
282							my $f1 = $x->[0];
283							my $f2 = (1 + $x->[1])/$x->[0];
284
285							my $out = [$f1,$f2];
286
287							return $out;
288							}
289
290							# inequality constraints set
291							sub f_inequality {
292							my $x =shift;
293
294							# equation >= 0
295							my @ineq =
296							(
297							$x->[1] + 9*$x->[0] - 6 ,
298							-$x->[1] + 9*$x->[0] -1
299							);
300
301							return \@ineq;
302							}
303
304							my $bounds = [[0.1,1],[0,5]];
305
306							my $ref_input_output = f_Optim_NSGAII(
307							{
308							'nPop' => 50,
309							'nGen' => 100,
310							'bounds' => $bounds,
311							'function' => \&f_CONSTR,
312							'f_ineq' => \&f_inequality,
313							'nProc' => 8,
314							'filesDir' => '/tmp',
315							'verboseFinal' => 1,
316							'distrib' => [-1,-0.9,-0.8,-0.7,-0.6,-0.5,-0.4,-0.3,-0.2,-0.1,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
317							'scaleDistrib' => 0.05,
318							'percentMut' => 5,
319							},
320							{
321							'dx' => 100,
322							'dy' => 40,
323							'xlabel' => 'stiffness [N/mm]',
324							'ylabel' => 'mass [kg]',
325							'xlim' => [0.1,1],
326							'ylim' => [0,10],
327							'nfun' => [0,1],
328							}
329							);
330
331
332
333
334							=head1 OUTPUT PREVIEW
335
336							This below is a typical output of the final Pareto front (problem TNK).
337
338							The numbers represent the rank of the points: in the initial generations you can see points of rank 1,2,3... where points with rank 1 dominate points of rank 2 and so on.
339
340							Generation after generation all the points go on the Pareto front, so all the points become rank 1 (not dominated, nothing best is present in this population)
341
342							The points will also expand to occupy the entire front.
343
344							GENERATION 250
345							m\|
346							a\|
347							s\|
348							s\|
349							\|
350							[\|
351							k\|
352							g\| 1
353							]\| 11
354							\| 11
355							\| 1 1
356							\| 11
357							\| 1
358							\| 11 1 1 1 1
359							\| 1
360							\| 1
361							\|
362							\| 1
363							\| 1 1
364							\| 1111
365							\| 1
366							\|
367							\|
368							\|
369							\|
370							\| 1
371							\| 11
372							\| 1
373							\| 1
374							\|
375							\|______________________________________________________________________
376							stiffness [N/mm]
377
378
379
380
381							=head1 INSTALLATION
382
383							execute:
384
385							C
386
387							or (following the instruction in perlmodinstall)
388
389							=over
390
391							=item * download the F
392
393							=item * decompress and unpack
394
395							=over
396
397							=item * C
398
399							=item * C
400
401							=back
402
403							=item * C
404
405							=item * C
406
407							=item * C
408
409							=item * C
410
411							=item * C
412
413							to install it locally use this instead of C:
414
415							C if you want to install it in /my/folder
416							then you will have to use in your script: C before using C
417
418							=back
419
420
421
422
423
424							=head1 AUTHOR
425
426							Dario Rubino, C<< >>
427
428
429
430
431							=head1 BUGS
432
433							Solutions (input-output pairs) often contain duplicates, this would require some investigation.
434
435							Please report any bugs to C, or through
436							the web interface at L. I will be notified, and then you'll
437							automatically be notified of progress on your bug as I make changes.
438
439
440
441
442							=head1 SUPPORT
443
444							You can find documentation for this module with the perldoc command.
445
446							perldoc Optimization::NSGAII
447
448
449							You can also look for information at:
450
451							=over 4
452
453							=item * RT: CPAN's request tracker (report bugs here)
454
455							L
456
457							=item * CPAN Ratings
458
459							L
460
461							=item * Search CPAN
462
463							L
464
465							=back
466
467							=head1 LICENSE AND COPYRIGHT
468
469							This software is Copyright (c) 2022 by Dario Rubino.
470
471							This is free software, licensed under:
472
473							The Artistic License 2.0 (GPL Compatible)
474
475							=cut
476
477							# A point in the population is defined by:
478							# - input values for the objective functions (input parameters) -> contained in the reference VP
479							# - correspondent output population (objective function values) -> contained in the reference P
480							#
481							# P is the most important set of values used in this package, because the objective functions' values lead the algorithm
482							# VP elements then are postprocessed accordingly so that P and VP maintain always a one to one index correspondence
483							#
484							# VARIABLES
485							#
486							# VP: input values for each population's point
487							# : [[x11,x12,...x1D],...]
488							# P : population (objective values for each point)
489							# : [[fun11,fun12,..fun1M],..]
490
491							# D : number of dimension of the problem (how many input values for a point)
492							# N : size of population P
493
494							# p,q : index of a single solution of the population
495							# : id_sol_ith
496							# Sp: set of solution dominated by p (worse than p) for each point
497							# : [[id_sol_i,id_sol_j,...],..]
498
499							# np: number of solutions which dominate p (better than p), np = zero for first front solutions
500							# : [np1,np2,..]
501							# F: set of solution in front ith
502							# : [[id_sol_k,id_sol_t,...],...]
503							# rank : rank of each solution = front number , rank = 1 for first front solutions
504							# : [rank1,rank2...rankN];
505
506							########################################################################################################
507
508							my @out_print;
509							my $inf = 1e9;
510
511							sub f_ineq2maxerr {
512							# max error calculation in inequalities
513							# input: lhs inequalities ref (errors)
514							# output: max error
515	2359786			2359786	0	2544755	my $ref_ineq = shift;
516
517	2359786					2339026	my @ineq = @{$ref_ineq};
	2359786					2983545
518
519	2359786					2533949	my $err = 0;
520	2359786					2649070	foreach my $el(@ineq){
521	2359786					3595654	$err = max( $err, -$el);
522							}
523	2359786					3106441	return $err;
524							}
525
526
527							sub f_fast_non_dominated_sort {
528							# reference to population
529	66			66	0	181	my $P = shift;
530	66					119	my $VP = shift;
531	66					117	my $f_ineq = shift;
532
533
534	66					333	my $Sp;
535							my $np;
536	66					0	my $F;
537	66					0	my $rank;
538
539	66					90	my $N = scalar(@{$P});
	66					189
540
541	66					495	foreach my $p(0..$N-1){
542	6600					9798	$np->[$p] = 0;
543	6600					9693	foreach my $q(0..$N-1){
544	660000	100				896910	next if $p == $q; # max error <-- inequalities <-- input pars
545	653400	100				775025	if ( f_dominates($P->[$p],$P->[$q], f_ineq2maxerr( &{$f_ineq}($VP->[$p]) ), f_ineq2maxerr( &{$f_ineq}($VP->[$q])) ) ){
	653400	100				777092
	653400					827219
546	126907					139595	push @{$Sp->[$p]}, $q;
	126907					257873
547							}
548	526493					644078	elsif ( f_dominates($P->[$q],$P->[$p], f_ineq2maxerr( &{$f_ineq}($VP->[$q]) ), f_ineq2maxerr( &{$f_ineq}($VP->[$p])) ) ){
	526493					613497
549	124287					230153	$np->[$p] += 1;
550							}
551							}
552	6600	100				13128	if ($np->[$p] == 0){
553	1305					2702	$rank->[$p]=1; # front number, 1 = best
554	1305					1731	push @{$F->[0]}, $p;
	1305					3212
555							}
556							}
557
558							# all other fronts
559	66					117	my $i = 0;
560
561	66					585	while (scalar(@{$F->[$i]})){
	884					1867
562	818					1167	my @Q;# members of next front
563
564	818					1044	foreach my $p(@{$F->[$i]}){
	818					1171
565	6600					6523	foreach my $q(@{$Sp->[$p]}){
	6600					10393
566	126907					128193	$np->[$q] -=1;
567	126907	100				178207	if ($np->[$q] == 0){
568	5295					6058	$rank->[$q] = $i + 1 + 1;
569	5295					6826	push @Q, $q;
570							}
571							}
572							}
573	818					816	$i++;
574	818					2159	$F->[$i] = [@Q];
575
576							}
577
578	66					165	for my $p(0..$N-1){
579	6600					8100	push @out_print, join(' ',($rank->[$p],@{$P->[$p]}));
	6600					15377
580							}
581
582	66					3028	return ($rank,$F); # rank for each point, points in each front
583
584							}
585
586							########################################################################################################
587
588							sub f_dominates {
589							# input: two elements of P
590							# output: 1 if P1 dominate P2, 0 otherwise
591
592	1179893			1179893	0	1312236	my $P1 = shift;
593	1179893					1209309	my $P2 = shift;
594
595							# constraints errors
596	1179893					1176958	my $err1 = shift;
597	1179893					1196395	my $err2 = shift;
598
599
600							# number of objective (dimensions)
601	1179893					1166678	my $M = scalar(@{$P1});
	1179893					1242205
602
603	1179893					1224381	my $P1_dominate_P2_count = 0;
604
605	1179893					1576647	for my $kM(0..$M-1){
606	2359786	100				3538009	if ($P1->[$kM] <= $P2->[$kM]){
607	1306800					1492031	$P1_dominate_P2_count++;
608							}
609							}
610
611	1179893					1151186	my $P1_dominate_P2_p;
612							# if 1 has lower constraints errors, then 1 dominate 2, else if the error is the same (e.g 0) then the dominance is decided on objective value
613							# else simply 1 doesn't dominate 2
614	1179893	100	66			3235003	if (
			66
615							$err1 < $err2
616							\|\|
617							($err1 == $err2 && $P1_dominate_P2_count == $M)
618							){
619	251194					283552	$P1_dominate_P2_p = 1;
620							}
621	928699					1014256	else { $P1_dominate_P2_p = 0}
622
623	1179893					2140191	return $P1_dominate_P2_p;
624
625							}
626
627							########################################################################################################
628
629							sub f_crowding_distance_assignment{
630							# input: ref of array of a subset of population P, population P
631							# output: distance value for each point of this set
632
633							# global ids of the set of non dominated points of interest
634	331			331	0	443	my $ids = shift;
635							# objectives of all points
636	331					350	my $P = shift;
637
638							# build the set of objectives for these global ids (I is a subset of P)
639	331					368	my $I = [@{$P}[@{$ids}]];
	331					1017
	331					444
640
641							# initialize distance
642	331					420	my $Dist;
643
644							# number of objective (dimensions)
645	331					585	my $M = scalar(@{$P->[0]});
	331					464
646							# number of points in I
647	331					410	my $n = scalar(@{$ids});
	331					374
648
649							# for each objective
650	331					872	for my $kM(0..$M-1){
651
652							# local id of the points, sorted by objective
653	662					2426	my @index_sort = sort{$I->[$a][$kM] <=> $I->[$b][$kM]} 0..($n-1);
	21587					24890
654
655							# min & max for this objective
656	662					1247	my $fmin = $I->[$index_sort[0]][$kM];
657	662					906	my $fmax = $I->[$index_sort[-1]][$kM];
658
659	662	50				1058	if ($fmax-$fmin<0.00001){$fmax += 0.00001}
	0					0
660
661							# build the distance
662	662					994	$Dist->[$index_sort[0]] = $inf;
663	662					872	$Dist->[$index_sort[-1]] = $inf;
664							# and for all other intermediate point
665	662					931	for my $i(1..($n-2)){
666	6498					10822	$Dist->[$index_sort[$i]] += ($I->[$index_sort[$i+1]][$kM] - $I->[$index_sort[$i-1]][$kM])/($fmax-$fmin);
667							}
668
669							}
670							# return distances for each point, $Dist->[ith] is distance for point $ids->[ith]
671	331					646	return $Dist;
672							}
673
674							########################################################################################################
675
676							sub f_crowding_distance_operator {
677							# input: rank and distance of two element of the population (selected by GA)
678							# output: 1 if first dominate second, else 0
679
680	6600			6600	0	7245	my $rank = shift;
681	6600					6400	my $Dist = shift;
682
683	6600					6511	my $P1_dominate_P2_p;
684
685	6600	100	100			16939	if (
			100
686							$rank->[0] < $rank->[1]
687							\|\|
688							($rank->[0] == $rank->[1]) && ($Dist->[0] > $Dist->[1])
689							){
690	3319					3554	$P1_dominate_P2_p = 1
691							}
692	3281					3738	else {$P1_dominate_P2_p = 0}
693	6600					8087	return $P1_dominate_P2_p;
694							}
695
696							########################################################################################################
697
698							sub f_NSGAII {
699							# input: current function input values VP(t) & VQ(t) ( VQ is obtained by GA)
700							# current population Pt & Qt (obtained by evaluating objective function using VPt & VQt)
701							# output: VP(t+1), P(t+1), rank & distance for each point of this new population
702
703							# input variables' values
704	66			66	0	508	my $VPt = shift;
705	66					276	my $VQt = shift;
706
707							# population (objective function values)
708	66					325	my $Pt = shift;
709	66					153	my $Qt = shift;
710
711							# constraints function
712	66					362	my $f_ineq = shift;
713
714	66					5385	my $Rt = [(@$Pt,@$Qt)];
715	66					7555	my $VRt = [(@$VPt,@$VQt)];
716	66					194	my $N = scalar(@$Pt);
717
718	66					605	my ($temp,$F) = f_fast_non_dominated_sort($Rt,$VRt,$f_ineq);
719
720							# input variables for the new population P_t+1
721	66					227	my $VPtp1=[];
722							# new population
723	66					133	my $Ptp1=[];
724	66					96	my $Dist=[];
725	66					90	my $rank=[];
726
727	66					98	my $i=0;
728							# push the best fronts in final population & store crowding distance
729	66					136	while ( scalar(@{$Ptp1}) + scalar(@{$F->[$i]}) <= $N ){
	331					436
	331					748
730	265					359	push @$Ptp1, @$Rt[@{$F->[$i]}];
	265					819
731	265					349	push @$VPtp1,@$VRt[@{$F->[$i]}];
	265					747
732	265					533	my $Dist_ = f_crowding_distance_assignment($F->[$i],$Rt);
733	265					721	push @$rank, ($i+1) x @$Dist_;
734	265					549	push @$Dist, @$Dist_;
735	265					413	$i++;
736							}
737
738							# only part of the following front will be pushed in final population, sorting by crowding
739							# here rank is the same for all points, so the crowded-comparison operators reduce to a comparison of crowding distance
740	66					219	my $Dist_ = f_crowding_distance_assignment($F->[$i],$Rt);
741
742	66					118	my $nf = scalar(@{$F->[$i]});
	66					359
743
744	66					273	my @index_sort = sort{$Dist_->[$b] <=> $Dist_->[$a]} 0..($nf-1);
	4998					5732
745
746							# cut to fill only the remaining part of Ptp1
747	66					298	@index_sort = @index_sort[0..(($N-scalar(@$Ptp1))-1)];
748
749	66					158	push @$Ptp1, @$Rt[@{$F->[$i]}[@index_sort]];
	66					264
750	66					158	push @$VPtp1, @$VRt[@{$F->[$i]}[@index_sort]];
	66					286
751	66					416	push @$rank, ($i+1) x @index_sort;
752	66					402	push @$Dist, @$Dist_[@index_sort];
753
754
755	66					1358	return $VPtp1,$Ptp1,$rank,$Dist;
756
757							}
758
759							########################################################################################################
760
761							sub f_SBX {
762	3300			3300	0	3634	my $contiguity = shift; # >0 (usually between 2 and 5), greater value produces child values similar to those of the parents
763	3300					3334	my $VP1_ = shift; # input values of parent 1 (ref)
764	3300					3401	my $VP2_ = shift; # input values of parent 2 (ref)
765
766							# array of equal length
767	3300					4820	my @VP1 = @$VP1_;
768	3300					4090	my @VP2 = @$VP2_;
769	3300					3547	my @out;
770
771	3300					4859	for my $kp(0..$#VP1){
772							# for array of length=1 or rand<0.5 the cross is made
773	9900	100	66			21972	if ( $#VP1 == 0 \|\| rand(1)<0.5){
774
775	4712					5262	my $u = rand(1);
776
777	4712					6010	my $exponent = (1/($contiguity+1));
778	4712	100				9230	my $beta = ($u < 0.5)? (2$u)$exponent : (0.5/(1-$u))*$exponent;
779
780	4712	100				6260	my $sign = (rand(1) < 0.5)? -1 : 1;
781	4712					10248	$out[$kp] = (($VP1[$kp] + $VP2[$kp])/2) + $sign * $beta0.5abs($VP1[$kp] - $VP2[$kp])
782
783							}
784							else {
785	5188					7446	$out[$kp] = $VP1[$kp]
786							}
787							}
788	3300					5484	return \@out;
789							}
790
791							########################################################################################################
792
793							sub f_GA {
794							# input: input values, rank and Dist of the population P(t+1)
795							# output: input value of the population Q(t+1)
796
797	66			66	0	115	my $VPtp1 = shift;
798	66					242	my $rank = shift;
799	66					100	my $Dist = shift;
800	66					79	my $contiguity = shift;
801
802	66					106	my $bounds = shift; # for mutation
803
804							# optional paramters for mutation:
805	66					95	my $distrib = shift;
806	66					93	my $scaleDistrib = shift;
807	66					99	my $percentMut =shift;
808
809	66					102	my $VQtp1 = [];
810
811							# binary tournament
812							# two random element are compared
813							# the best according crowding distance operator is selected as parent 1
814							# the same for selecting parent 2
815							# parent 1 and 2 are crossed with SBX to give a child
816							# the procedure is repeated to fill Q(t+1)
817
818	66					116	my $N = scalar(@$VPtp1);
819
820	66					461	for my $kt(0..$N-1){
821
822							# selection of the two parent
823	3300					3449	my @index_parent;
824	3300					5801	for (1..2){
825	6600					9963	my ($r1,$r2) = (int(rand($N)),int(rand($N)));
826	6600					12201	my $P1_dominate_P2_p = f_crowding_distance_operator( [$rank->[$r1],$rank->[$r2]] , [$Dist->[$r1],$Dist->[$r2]] );
827
828	6600	100				10730	if ($P1_dominate_P2_p == 1){push @index_parent, $r1}
	3319					4776
829	3281					4720	else {push @index_parent, $r2}
830							}
831							# crossover of the two parent
832	3300					5199	my $out = f_SBX( $contiguity , $VPtp1->[$index_parent[0]] , $VPtp1->[$index_parent[1]] );
833	3300					5019	push @$VQtp1, $out;
834							}
835
836							# mutation 1
837							# perturbation using distribution array
838	66					613	for my $k(0..(scalar(@$VQtp1) - 1)){
839							# $percentMut percentage of individuals are changed in all their elements (all input parameter will be perturbated)
840	3300	100				5039	if (rand(1)> 1-$percentMut/100){
841	182					186	for my $d(0..(scalar(@{$VQtp1->[0]}) - 1)){
	182					313
842							# increment the current value by a random value chosen from the distribution (many time it will be near to zero for a gaussian distribution) scaled with the delta_bounds
843	546					1098	$VQtp1->[$k][$d] += ($bounds->[$d][1] - $bounds->[$d][0]) * $scaleDistrib * $distrib->[int(rand(scalar(@$distrib)))];
844							}
845							}
846							}
847
848							# mutation 2
849							# 100% probability of mutation inside VQ(t+1), so the intention is to act on all individual of the population
850	66					204	for my $k(0..int((scalar(@$VQtp1) - 1) * 100/100)){
851	3300					3230	for my $d(0..(scalar(@{$VQtp1->[0]}) - 1)){
	3300					3915
852							# $percentMut percentage of values are changed inside each single individual, value is inside bounds
853	9900	100				14903	if (rand(1)> 1-$percentMut/100){
854	457					745	$VQtp1->[$k][$d] = rand(1) * ($bounds->[$d][1] - $bounds->[$d][0]) + $bounds->[$d][0];
855							}
856							}
857							}
858
859	66					151	return $VQtp1;
860
861							}
862
863
864
865							sub f_Optim_NSGAII {
866
867	12			12	1	3228	my $par = shift;
868
869	12					24	my $par_plot = shift;
870
871
872
873	12					24	my %par = %{$par};
	12					60
874
875	12					36	my $nPop = $par{'nPop'};
876	12					24	my $nGen = $par{'nGen'};
877
878	12					24	my $bounds = $par{'bounds'};
879	12					24	my $n = scalar @$bounds;
880
881	12					24	my $fun = $par{'function'};
882
883							# optional paramters:
884
885	12		50	2359786		108	my $f_ineq = $par{'f_ineq'} // sub {return [0]};
	2359786					3312041
886
887	12		50			36	my $verboseFinal = $par{'verboseFinal'} // 1;
888
889	12		50			36	my $distrib = $par{'distrib'} // [-1,-0.5,0,0.5,1]; # for mutation, default [-1,-0.5,0,0.5,1]
890	12		50			36	my $scaleDistrib = $par{'scaleDistrib'} // 0; # for mutation, default = 0, no perturbation
891	12		50			48	my $percentMut = $par{'percentMut'} // 5; # for mutation, default = 5%
892
893							# control for no wrong use of keys
894	12					60	my @keys_ok = ('nPop','nGen','bounds','function','nProc','filesDir','verboseFinal','f_ineq','distrib','scaleDistrib','percentMut');
895	12					48	for my $key(keys %par){
896	108	50				1512	unless ( grep( /^$key$/, @keys_ok ) ) {
897	0					0	die 'E R R O R : the use of "'.$key.'" in the function to optimize is not defined! Compare with documentation.';
898							}
899							}
900
901	12					36	my $VPt;
902							my $VQt;
903
904
905	12					36	for my $gen(0..$nGen){
906	77	100				310	if ($gen == 0){
907							# input
908	12					48	for (0..$nPop-1){
909	600					708	$VPt->[$_]=[map { rand(1) * ($bounds->[$_-1][1] - $bounds->[$_-1][0]) + $bounds->[$_-1][0] } 1..$n];
	1800					3012
910	600					768	$VQt->[$_]=[map { rand(1) * ($bounds->[$_-1][1] - $bounds->[$_-1][0]) + $bounds->[$_-1][0] } 1..$n];
	1800					3120
911							}
912							}
913
914
915
916
917
918
919	77					228	my $nProc = $par{'nProc'};
920	77					151	my $filesDir = $par{'filesDir'};
921
922							# if ($nPop%$nProc != 0){warn "\n nPop should be divisible by nProc!\n\n"};
923
924	77					134	my $fork = 0;
925
926	77					222	for (1..$nProc){
927
928	77					82794	my $pid = fork;
929	77	50				1977	die $! unless defined $pid;
930	77					368	$fork++;
931
932							# say "in PID = $$ with child PID = $pid fork# = $fork";
933							# parent process stop here, child processes go on
934	77	100				2366	next if $pid;
935
936	11					1378	my $r = 0;
937
938	11					799	my $nameFileP = $filesDir.'/Pt_'.$fork.'.txt';
939	11					238	my $nameFileQ = $filesDir.'/Qt_'.$fork.'.txt';
940
941							# remove existing input file for this process number
942	11					31503	system ('rm -f '.$nameFileP);
943	11					32869	system ('rm -f '.$nameFileQ);
944
945
946	11					516	my $id_from = ($fork-1)*int($nPop/$nProc);
947	11					200	my $id_to = $fork *int($nPop/$nProc)-1;
948	11	50				679	if ($fork == $nProc){$id_to = $nPop - 1};
	11					118
949
950							# output
951	11	50				2838	open my $fileoP, '>>', $nameFileP or croak "E R R O R : problem in writing the file ".$nameFileP.' -> "filesDir" path not reachable? -- ';
952	11	50				938	open my $fileoQ, '>>', $nameFileQ or croak "E R R O R : problem in writing the file ".$nameFileQ.' -> "filesDir" path not reachable? -- ';
953	11					165	for ($id_from .. $id_to){
954	550					857	my $Pt_ = &{$fun}($VPt->[$_]);
	550					2364
955	550					11920	my $Qt_ = &{$fun}($VQt->[$_]);
	550					1926
956	550					11044	say $fileoP join ',',@{$Pt_};
	550					6198
957	550					738	say $fileoQ join ',',@{$Qt_};
	550					2199
958							}
959	11					735	close $fileoP;
960	11					384	close $fileoQ;
961	11					10510	exit;
962							}
963
964							# wait for the processes to finish
965	66					222	my $kid;
966	66					94	do {
967	132					22684665	$kid = waitpid -1, 0;
968							} while ($kid>0);
969
970	66					265	my $Pt;
971							my $Qt;
972
973							# collect data together
974	66					834	for (1..$nProc){
975
976	66					1462	my $nameFileP = $filesDir.'/Pt_'.$_.'.txt';
977	66					664	my $nameFileQ = $filesDir.'/Qt_'.$_.'.txt';
978	66	50				7011	open my $fileiP, '<', $nameFileP or croak "E R R O R : problem in reading the file ".$nameFileP.' -> "filesDir" path not reachable? ';
979	66	50				3588	open my $fileiQ, '<', $nameFileQ or croak "E R R O R : problem in reading the file ".$nameFileQ.' -> "filesDir" path not reachable? ';
980
981	66					12620	while (my $line = <$fileiP>){
982	3300					4370	chomp $line;
983	3300					8536	my @vals = split ',', $line;
984	3300					9799	push @$Pt, \@vals;
985							}
986	66					1716	while (my $line = <$fileiQ>){
987	3300					3917	chomp $line;
988	3300					6445	my @vals = split ',', $line;
989	3300					9267	push @$Qt, \@vals;
990							}
991	66					848	close $fileiP;
992	66					783	close $fileiQ;
993							}
994
995
996
997							# new input
998	66					967	my ($VPtp1,$Ptp1,$rank,$Dist)=f_NSGAII($VPt,$VQt,$Pt,$Qt,$f_ineq);
999
1000							# save inputs and corresponding outputs at this generation
1001	66					159	my $to_print;
1002							my $FILEO;
1003							# inputs (genes for each individual)
1004	66	50				8920	open $FILEO, '>', $filesDir.'/VPt_gen'.sprintf('%05d',$gen).'.txt' or croak 'E R R O R : problem in writing the generation file -> "filesDir" path not reachable? ';
1005	66					821	$to_print = f_print_columns($VPt,'%25.15f');
1006	66					2307	print $FILEO (join "\n", @$to_print);
1007	66					3193	close $FILEO;
1008							# outputs (f1, f2 ... for each individual)
1009	66	50				4412	open $FILEO, '>', $filesDir.'/Pt_gen'.sprintf('%05d',$gen).'.txt' or croak 'E R R O R : problem in writing the generation file -> "filesDir" path not reachable? ';
1010	66					297	$to_print = f_print_columns($Pt,'%25.15f');
1011	66					803	print $FILEO (join "\n", @$to_print);
1012	66					1848	close $FILEO;
1013
1014
1015							#print " example input values: " ;
1016							#say join ' ', @{$VPtp1->[0]};
1017
1018	66	50				286	if (defined $par_plot){
1019	0					0	f_plot($par_plot,$gen,$Ptp1,$rank);
1020	0					0	say '';
1021
1022	0					0	my $max = -$inf;
1023	0					0	my $min = $inf;
1024
1025	0					0	for (0..$nPop-1){
1026	0					0	$max = max(@{$Pt->[$_]},@{$Qt->[$_]},$max);
	0					0
	0					0
1027	0					0	$min = min(@{$Pt->[$_]},@{$Qt->[$_]},$min);
	0					0
	0					0
1028							}
1029	0					0	say 'max output = '.$max;
1030	0					0	say 'min output = '.$min;
1031
1032
1033	0					0	my $maxV = -$inf;
1034	0					0	my $minV= $inf;
1035
1036	0					0	my $minD = min(@$Dist);
1037
1038
1039	0					0	for (0..$nPop-1){
1040
1041	0					0	$maxV = max(@{$VPt->[$_]},@{$VQt->[$_]},$maxV);
	0					0
	0					0
1042	0					0	$minV = min(@{$VPt->[$_]},@{$VQt->[$_]},$minV);
	0					0
	0					0
1043
1044							}
1045	0					0	say 'max input = '.$maxV;
1046	0					0	say 'min input = '.$minV;
1047
1048	0					0	say 'min Dist = '.$minD;
1049							}
1050
1051
1052	66					373	my ($VQtp1) = f_GA($VPtp1,$rank,$Dist,2.0,$bounds,$distrib,$scaleDistrib,$percentMut);
1053
1054							# correction of input values produced by GA to respect bounds
1055	66					151	for my $p(0..$nPop-1){
1056	3300					3786	for my $d(0..$n-1){
1057	9900	100				13638	if($VQtp1->[$p][$d] < $bounds->[$d][0]){$VQtp1->[$p][$d] = $bounds->[$d][0]};
	44					44
1058	9900	100				15363	if($VQtp1->[$p][$d] > $bounds->[$d][1]){$VQtp1->[$p][$d] = $bounds->[$d][1]};
	36					83
1059							}
1060							}
1061
1062							# new output
1063	66					127	my $Qtp1;
1064	66					262	for (0..$nPop-1){
1065	3300					45613	$Qtp1->[$_]=&{$fun}($VQtp1->[$_]);
	3300					5231
1066							}
1067
1068							# new became old
1069	66					3652	$VPt = [@$VPtp1];
1070	66					2753	$VQt = [@$VQtp1];
1071
1072
1073							# output final
1074	66	100				3223	if($gen == $nGen){
1075	1	50				19	if ($verboseFinal == 1){
1076	0					0	say '------------------------- I N P U T -------------------------:';
1077	0					0	map {say join ' ',@{$_}} @$VPt;
	0					0
	0					0
1078	0					0	say '------------------------- O U T P U T -------------------------';
1079	0					0	map {say join ' ',@{$_}} @$Pt;
	0					0
	0					0
1080							}
1081
1082	1					101	return [$VPt,$Pt];
1083
1084							}
1085
1086							}
1087
1088							}
1089
1090
1091
1092							sub f_print_columns {
1093							# to print in columns format the content of $P, $Ptp1 ...
1094	132			132	0	322	my $P = shift;
1095	132					695	my $formato = shift; # e.g. '%25.15f'
1096
1097							# how many input parameters (genes)
1098	132					252	my $n = scalar @{$P->[0]};
	132					318
1099							# population number
1100	132					225	my $nPop = scalar @$P;
1101
1102							# print, each line contains the genes of an individual
1103	132					199	my @to_print;
1104	132					644	for my $kl(0..($nPop-1)){
1105	6600					6724	my $line;
1106	6600					8144	for my $kx(0..($n-1)){
1107	16500					47094	$line.= sprintf($formato,$P->[$kl][$kx]) . ' ';
1108							}
1109	6600					10589	push @to_print, $line;
1110							}
1111
1112	132					506	return \@to_print;
1113							}
1114
1115							########################################################################################################
1116
1117							sub f_plot {
1118	0			0	0		my $par_ref = shift;
1119
1120	0						my $gen = shift;
1121
1122	0						my $P = shift;
1123	0						my $rank = shift;
1124
1125
1126
1127
1128	0						my %par = %{$par_ref};
	0
1129
1130							# nfun: what objective to plot from all (x=f1 y=f4 -> [0,3])
1131	0						my $nfun = $par{'nfun'};
1132
1133
1134	0						my $title = 'GENERATION '.$gen;
1135
1136							# inizialization of all lines of output
1137	0						my @output = map{ ' ' x $par{'dx'} } 1..$par{'dy'};
	0
1138
1139	0						my @nameFront =(0..9,"a".."z","A".."Z");
1140
1141							# print points of the front
1142	0						my $xASCIImin = 0;
1143	0						my $xASCIImax = $par{'dx'}-1;
1144	0						my $yASCIImin = $par{'dy'}-1;
1145	0						my $yASCIImax = 0;
1146	0						my $n_outliers = 0;
1147
1148	0						for my $kp(1..scalar(@{$P})-1){
	0
1149							# conversion x,y -> xASCII,yASCII
1150	0						my $x = $P->[$kp][$nfun->[0]];
1151	0						my $y = $P->[$kp][$nfun->[1]];
1152	0						my $xmin = $par{'xlim'}->[0];
1153	0						my $xmax = $par{'xlim'}->[1];
1154	0						my $ymin = $par{'ylim'}->[0];
1155	0						my $ymax = $par{'ylim'}->[1];
1156
1157	0						my $xASCII;
1158							my $yASCII;
1159
1160
1161	0	0	0				if ($x < $xmax && $x > $xmin && $y < $ymax && $y > $ymin && $rank->[$kp] < $#nameFront){
			0
			0
			0
1162	0						$xASCII = f_interp($xmin,$xmax,$xASCIImin,$xASCIImax,$x);
1163	0						$yASCII = f_interp($ymin,$ymax,$yASCIImin,$yASCIImax,$y);
1164
1165	0						substr($output[$yASCII],$xASCII,1,$nameFront[$rank->[$kp]]);
1166
1167							}
1168	0						else {$n_outliers++};
1169							}
1170
1171							# add axis
1172	0						push @output, '_' x $par{'dx'};
1173	0						@output = map{'\|'.$_} @output;
	0
1174							# add axis labels
1175	0						push @output, sprintf("%$par{'dx'}s",$par{'xlabel'});
1176	0						my @ylabelv = split '',$par{'ylabel'};
1177	0	0					@output = map{ defined $ylabelv[$_] ? $ylabelv[$_].$output[$_] : ' '.$output[$_]} 0..$#output;
	0
1178							# add statistics
1179	0						unshift @output, sprintf("%".int(($par{'dx'}+length($title))/2)."s",$title);
1180	0						push @output, "xlim: ".(join ' ',@{$par{'xlim'}});
	0
1181	0						push @output, "ylim: ".(join ' ',@{$par{'ylim'}});
	0
1182	0						push @output, "#outliers: ".$n_outliers;
1183
1184	0						print join "\n", @output;
1185
1186							}
1187
1188
1189							sub f_interp{
1190	0			0	0		my ($xmin,$xmax,$xASCIImin,$xASCIImax,$x) = @_;
1191	0						my $xASCII = ($xASCIImax-$xASCIImin)/($xmax-$xmin)*($x-$xmin)+$xASCIImin;
1192	0						$xASCII = sprintf("%.0f",$xASCII);
1193	0						return $xASCII
1194							}
1195
1196
1197
1198
1199
1200
1201							1;