File Coverage

lib/Convert/Pheno.pm
Criterion Covered Total %
statement 216 245 88.1
branch 43 68 63.2
condition 12 20 60.0
subroutine 39 42 92.8
pod 0 13 0.0
total 310 388 79.9


line stmt bran cond sub pod time code
1             package Convert::Pheno;
2              
3 6     6   644946 use strict;
  6         72  
  6         177  
4 6     6   38 use warnings;
  6         8  
  6         157  
5 6     6   4400 use autodie;
  6         97241  
  6         27  
6 6     6   41768 use feature qw(say);
  6         14  
  6         616  
7 6     6   2211 use File::Spec::Functions qw(catdir catfile);
  6         3836  
  6         480  
8 6     6   748 use Data::Dumper;
  6         7537  
  6         323  
9 6     6   5786 use Path::Tiny;
  6         82615  
  6         331  
10 6     6   56 use File::Basename;
  6         12  
  6         699  
11 6     6   3486 use File::ShareDir::ProjectDistDir;
  6         255041  
  6         50  
12 6     6   3878 use List::Util qw(any uniq);
  6         11  
  6         738  
13 6     6   44 use Carp qw(confess);
  6         10  
  6         278  
14 6     6   3021 use XML::Fast;
  6         95013  
  6         343  
15 6     6   3526 use Moo;
  6         44880  
  6         27  
16 6     6   15551 use Types::Standard qw(Str Int Num Enum ArrayRef Undef);
  6         702363  
  6         68  
17 6     6   20299 use File::ShareDir::ProjectDistDir qw(dist_dir);
  6         15  
  6         85  
18              
19             #use Devel::Size qw(size total_size);
20 6     6   6982 use Convert::Pheno::CSV;
  6         22  
  6         591  
21 6     6   49 use Convert::Pheno::IO;
  6         13  
  6         321  
22 6     6   39 use Convert::Pheno::SQLite;
  6         16  
  6         641  
23 6     6   43 use Convert::Pheno::Mapping;
  6         22  
  6         641  
24 6     6   48 use Convert::Pheno::OMOP;
  6         17  
  6         652  
25 6     6   3188 use Convert::Pheno::PXF;
  6         22  
  6         333  
26 6     6   2663 use Convert::Pheno::BFF;
  6         15  
  6         434  
27 6     6   2493 use Convert::Pheno::CDISC;
  6         21  
  6         340  
28 6     6   54 use Convert::Pheno::REDCap;
  6         17  
  6         231  
29              
30 6     6   34 use Exporter 'import';
  6         10  
  6         273  
31             our @EXPORT =
32             qw($VERSION io_yaml_or_json omop2bff_stream_processing share_dir); # Symbols imported by default
33              
34             #our @EXPORT_OK = qw(foo bar); # Symbols imported by request
35              
36 6     6   43 use constant DEVEL_MODE => 0;
  6         11  
  6         21011  
37              
38             # Global variables:
39             our $VERSION = '0.12_4';
40             our $share_dir = dist_dir('Convert-Pheno');
41              
42             ############################################
43             # Start declaring attributes for the class #
44             ############################################
45              
46             # Complex defaults here
47             has search => (
48              
49             default => 'exact',
50             is => 'ro',
51             coerce => sub { $_[0] // 'exact' },
52             isa => Enum [qw(exact mixed)]
53             );
54              
55             has text_similarity_method => (
56              
57             #default => 'cosine',
58             is => 'ro',
59             coerce => sub { $_[0] // 'cosine' },
60             isa => Enum [qw(cosine dice)]
61             );
62              
63             has min_text_similarity_score => (
64              
65             #default => 0.8,
66             is => 'ro',
67             coerce => sub { $_[0] // 0.8 },
68             isa => sub {
69             die "Only values between 0 .. 1 supported!"
70             unless ( $_[0] >= 0.0 && $_[0] <= 1.0 );
71             }
72             );
73              
74             has username => (
75              
76             #default => ( $ENV{LOGNAME} || $ENV{USER} || getpwuid($<) ) , # getpwuid not implemented in Windows
77             default => $ENV{'LOGNAME'} || $ENV{'USER'} || $ENV{'USERNAME'} || 'dummy-user',
78             is => 'ro',
79             coerce => sub {
80             $_[0] // ( $ENV{'LOGNAME'} || $ENV{'USER'} || $ENV{'USERNAME'} || 'dummy-user' );
81             },
82             isa => Str
83             );
84              
85             has max_lines_sql => (
86             default => 500, # Limit to speed up runtime
87             is => 'ro',
88             coerce => sub { $_[0] // 500 },
89             isa => Int
90             );
91              
92             has omop_tables => (
93              
94             # Table <CONCEPT> is always required
95             default => sub { [@omop_essential_tables] },
96             coerce => sub {
97             @{ $_[0] }
98             ? $_[0] =
99             [ map { uc($_) } ( uniq( @{ $_[0] }, 'CONCEPT', 'PERSON' ) ) ]
100             : \@omop_essential_tables;
101             },
102             is => 'rw',
103             isa => ArrayRef
104             );
105              
106             has exposures_file => (
107              
108             default =>
109             catfile( $share_dir, 'db', '/concepts_candidates_2_exposure.csv' ),
110             coerce => sub {
111             $_[0]
112             // catfile( $share_dir, 'db', 'concepts_candidates_2_exposure.csv' );
113             },
114             is => 'ro',
115             isa => Str
116             );
117              
118             # Miscellanea atributes here
119             has [qw /test print_hidden_labels self_validate_schema path_to_ohdsi_db/] =>
120             ( default => undef, is => 'ro' );
121              
122             has [qw /stream ohdsi_db/] => ( default => 0, is => 'ro' );
123              
124             has [qw /in_files/] => ( default => sub { [] }, is => 'ro' );
125              
126             has [
127             qw /out_file out_dir in_textfile in_file sep sql2csv redcap_dictionary mapping_file schema_file debug log verbose/
128             ] => ( is => 'ro' );
129              
130             has [qw /data method/] => ( is => 'rw' );
131              
132             ##########################################
133             # End declaring attributes for the class #
134             ##########################################
135              
136             # NB: In general, we'll only display terms that exist and have content
137              
138             #############
139             #############
140             # PXF2BFF #
141             #############
142             #############
143              
144             sub pxf2bff {
145              
146             # <array_dispatcher> will deal with JSON arrays
147 2     2 0 163 return array_dispatcher(shift);
148             }
149              
150             #############
151             #############
152             # BFF2PXF #
153             #############
154             #############
155              
156             sub bff2pxf {
157              
158             # <array_dispatcher> will deal with JSON arrays
159 2     2 0 90 return array_dispatcher(shift);
160             }
161              
162             ################
163             ################
164             # REDCAP2BFF #
165             ################
166             ################
167              
168             sub redcap2bff {
169              
170 12     12 0 890 my $self = shift;
171              
172             # Read and load data from REDCap export
173 12         88 my $data = read_csv( { in => $self->{in_file}, sep => undef } );
174             my ( $data_redcap_dict, $data_mapping_file ) =
175             read_redcap_dict_and_mapping_file(
176             {
177             redcap_dictionary => $self->{redcap_dictionary},
178             mapping_file => $self->{mapping_file},
179             self_validate_schema => $self->{self_validate_schema},
180             schema_file => $self->{schema_file}
181             }
182 11         178 );
183              
184             # Load data in $self
185 4         25 $self->{data} = $data; # Dynamically adding attributes (setter)
186 4         18 $self->{data_redcap_dict} = $data_redcap_dict; # Dynamically adding attributes (setter)
187 4         13 $self->{data_mapping_file} = $data_mapping_file; # Dynamically adding attributes (setter)
188              
189             # array_dispatcher will deal with JSON arrays
190 4         24 return array_dispatcher($self);
191             }
192              
193             ################
194             ################
195             # REDCAP2PXF #
196             ################
197             ################
198              
199             sub redcap2pxf {
200              
201 1     1 0 46 my $self = shift;
202              
203             # First iteration: redcap2bff
204 1         4 $self->{method} = 'redcap2bff'; # setter - we have to change the value of attr {method}
205 1         4 my $bff = redcap2bff($self); # array
206              
207             # Preparing for second iteration: bff2pxf
208 1         19 $self->{method} = 'bff2pxf'; # setter
209 1         7295 $self->{data} = $bff; # setter
210 1         8 $self->{in_textfile} = 0; # setter
211              
212             # Run second iteration
213 1         4 return array_dispatcher($self);
214             }
215              
216             ##############
217             ##############
218             # OMOP2BFF #
219             ##############
220             ##############
221              
222             sub omop2bff {
223              
224 3     3 0 116 my $self = shift;
225              
226             #############
227             # IMPORTANT #
228             #############
229              
230             # SMALL TO MEDIUM FILES < 1M rows
231             #
232             # In many cases, because people are downsizing their DBs for data sharing,
233             # PostgreSQL dumps or CSVs will be < 1M rows.
234             # Providing we have enough memory (4-16GB), we'll able to load data in RAM,
235             # and consolidate individual values (MEASURES, DRUGS, etc.)
236              
237             # HUMONGOUS FILES > 1M rows
238             # NB: Interesting read on the topic
239             # https://www.perlmonks.org/?node_id=1033692
240             # Since we're relying heavily on hashes we need to resort to another strategy(es) to load the data
241             #
242             # * Option A *: Parellel processing - No change in our code
243             # Without changing the code, we ask the user to create mini-instances (or split CSV's in chunks) and use
244             # some sort of parallel processing (e.g., GNU parallel, snakemake, HPC, etc.)
245             # CONS: Concurrent jobs may fail due to SQLite been opened by multiple threads
246             #
247             # * Option B *: Keeping data consolidated at the individual-object level (as we do with small to medium files)
248             # --no-stream
249             # To do this, we have two options:
250             # a) Externalize (save to file) THE WHOLE HASH w/ DBM:Deep (but it's very slow)
251             # b) First dump CSV (me or users) and then use *nix to sort by person_id (or loadSQLite and sort there).
252             # Then, since rows for each individual are adjacent, we can load individual data together. Still,
253             # we'll by reading one table (e.g. MEASUREMENTS) at a time, thus, this is not relly helping much to consolidate...
254             #
255             # * Option C *: Parsing files line by line (one row of CSV/SQL per JSON object) <=========== IMPLEMENTED ==========
256             # --stream
257             # BFF / PXF JSONs are just intermediate files. It's nice that they contain data grouped by individual
258             # (for visually inspection and display), but at the end of the day they'll end up in Mongo DB.
259             # If all entries contain the primary key 'person_id' then it's up to the Beacon v2 API to deal with them.
260             # It's a similar issue to the one we had with genomicVariations in the B2RI, where a given variant belong to many individuals.
261             # Here, multiple JSON documents/objects (MEASUREMENTS, DRUGS, etc.) will belong to the same individual.
262             # Now, since we allow for CSV and SQL as an input, we need to minimize the numer of steps to a minimum.
263             #
264             # - Problems that may arise:
265             # 1 - <CONCEPT> table is mandatory, but it can be so huge that it takes all RAM memory.
266             # For instance, <CONCEPT.csv> with 5_808_095 lines = 735 MB
267             # <CONCEPT_light.csv> with 5_808_094 lines but only 4 columns = 501 MB
268             # Anything more than 2M lines kills a 8GB Ram machine.
269             # Solutions:
270             # a) Not loading the table at all and resort to --ohdsi-db
271             # b) Creating a temporary SQLite instance for <CONCEPT>
272             # 2 - How to read line-by-line from an SQL dump
273             # If the PostgreSQL dump weights, say, 20GB, do we create CSV tables from it (another ~20GB)?
274             # Solutions:
275             # a) Yep, we read @stream_ram_memory_tables and export the needed tables to CSV and go from there.
276             # b) Nope, we read PostgreSQL file twice, one time to load @stream_ram_memory_tables
277             # and the second time to load the remaining TABLES. <=========== IMPLEMENTED ==========
278             # 3 - In --stream mode, do we still allow for --sql2csv? NOPE !!!! <=========== IMPLEMENTED ==========
279             # We would need to go from functional mode (csv) to filehandles and it will take tons of space.
280             # Then, --stream and -sql2csv are mutually exclusive.
281             #
282              
283             # Load variables
284 3         13 my $data;
285             my $filepath;
286 3         0 my @filepaths;
287             $self->{method_ori} =
288 3 100       17 exists $self->{method_ori} ? $self->{method_ori} : 'omop2bff'; # setter
289 3         8 $self->{prev_omop_tables} = [ @{ $self->{omop_tables} } ]; # setter - 1D clone
  3         19  
290              
291             # Check if data comes from variable or from file
292             # Variable
293 3 50       12 if ( exists $self->{data} ) {
294 0         0 $self->{omop_cli} = 0; # setter
295 0         0 $data = $self->{data};
296             }
297              
298             # File(s)
299             else {
300              
301             # Read and load data from OMOP-CDM export
302 3         10 $self->{omop_cli} = 1; # setter
303              
304             # First we need to know if we have PostgreSQL dump or a bunch of csv
305             # File extensions to check
306 3         10 my @exts = map { $_, $_ . '.gz' } qw(.csv .tsv .sql);
  9         28  
307              
308             # Proceed
309             # The idea here is that we'll load ONLY ESSENTIAL TABLES
310             # regardless of wheter they are concepts or truly records.
311             # Dictionaries (e.g. <CONCEPT>) will be parsed latter from $data
312              
313 3         9 for my $file ( @{ $self->{in_files} } ) {
  3         13  
314 3         335 my ( $table_name, undef, $ext ) = fileparse( $file, @exts );
315 3 50       35 if ( $ext =~ m/\.sql/i ) {
316              
317             #######################
318             # Loading OMOP tables #
319             #######################
320              
321             # --no-stream
322 3 100       14 if ( !$self->{stream} ) {
323              
324             # We read all tables in memory
325 2         26 $data = read_sqldump( { in => $file, self => $self } );
326              
327             # Exporting to CSV if --sql2csv
328 2 50       21 sqldump2csv( $data, $self->{out_dir} ) if $self->{sql2csv};
329             }
330              
331             # --stream
332             else {
333              
334             # We'll ONLY load @stream_ram_memory_tables
335             # in RAM and the other tables as $fh
336 1         5 $self->{omop_tables} = [@stream_ram_memory_tables]; # setter
337 1         8 $data = read_sqldump( { in => $file, self => $self } );
338             }
339              
340             # We keep the filepath for later
341 3         92 $filepath = $file;
342              
343             # Exit loop
344 3         18 last;
345             }
346             else {
347              
348             # We'll load all OMOP tables that the user is providing as -iomop
349             # as long as they have a match in @omop_essential_tables
350             # NB: --omop-tables has no effect
351             warn "<$table_name> is not a valid table in OMOP-CDM\n" and next
352              
353             #unless (any { $_ eq $table_name } @{ $omop_main_table->{$omop_version} };
354 0 0 0 0   0 unless any { $_ eq $table_name } @omop_essential_tables; # global
  0         0  
355              
356             # --no-stream
357 0 0       0 if ( !$self->{stream} ) {
358              
359             # We read all tables in memory
360             $data->{$table_name} =
361 0         0 read_csv( { in => $file, sep => $self->{sep} } );
362             }
363              
364             # --stream
365             else {
366             # We'll ONLY load @stream_ram_memory_tables
367             # in RAM and the other tables as $fh
368 0 0   0   0 if ( any { $_ eq $table_name } @stream_ram_memory_tables ) {
  0         0  
369             $data->{$table_name} =
370 0         0 read_csv( { in => $file, sep => $self->{sep} } );
371             }
372             else {
373 0         0 push @filepaths, $file;
374             }
375             }
376             }
377             }
378             }
379              
380             #print Dumper_concise($data) and die;
381             #print Dumper_concise($self) and die;
382              
383             # Primarily with CSVs, it can happen that user does not provide <CONCEPT.csv>
384             confess 'We could not find table <CONCEPT> from your input files'
385 3 50       17 unless exists $data->{CONCEPT};
386              
387             # We create a dictionary for $data->{CONCEPT}
388 3         22 $self->{data_ohdsi_dic} = transpose_ohdsi_dictionary( $data->{CONCEPT} ); # Dynamically adding attributes (setter)
389              
390             # We load the allowed concept_id for exposures as hashref (for --no--stream and --stream)
391 3         20 $self->{exposures} = load_exposures( $self->{exposures_file} ); # Dynamically adding attributes (setter)
392              
393             # We transpose $self->{data}{VISIT_OCCURRENCE} if present
394 3 50       21 if ( exists $data->{VISIT_OCCURRENCE} ) {
395             $self->{visit_occurrence} =
396 3         21 transpose_visit_occurrence( $data->{VISIT_OCCURRENCE} ); # Dynamically adding attributes (setter)
397 3         115 delete $data->{VISIT_OCCURRENCE};
398             }
399              
400             # Now we need to perform a tranformation of the data where 'person_id' is one row of data
401             # NB: Transformation is due ONLY IN $omop_main_table FIELDS, the rest of the tables are not used
402             # The transformation is performed in --no-stream mode
403             $self->{data} =
404 3 100       33 $self->{stream} ? $data : transpose_omop_data_structure($data); # Dynamically adding attributes (setter)
405              
406             # Giving some memory back to the system
407 3         884 $data = undef;
408              
409             # --stream
410 3 100       50 if ( $self->{stream} ) {
411 1         9 omop_stream_dispatcher(
412             { self => $self, filepath => $filepath, filepaths => \@filepaths }
413             );
414             }
415              
416             # --no-stream
417             else {
418             # array_dispatcher will deal with JSON arrays
419 2         14 return array_dispatcher($self);
420             }
421             }
422              
423             ##############
424             ##############
425             # OMOP2PXF #
426             ##############
427             ##############
428              
429             sub omop2pxf {
430              
431 1     1 0 142 my $self = shift;
432              
433             # We have two possibilities:
434             #
435             # 1 - Module (Variables)
436             # 2 - CLI (I/O files)
437              
438             # Variable
439 1 50       7 if ( exists $self->{data} ) {
440              
441             # First iteration: omop2bff
442 0         0 $self->{omop_cli} = 0;
443 0         0 $self->{method} = 'omop2bff'; # setter - we have to change the value of attr {method}
444 0         0 my $bff = omop2bff($self); # array
445              
446             # Preparing for second iteration: bff2pxf
447             # NB: This 2nd round may take a while if #inviduals > 1000!!!
448 0         0 $self->{method} = 'bff2pxf'; # setter
449 0         0 $self->{data} = $bff; # setter
450 0         0 $self->{in_textfile} = 0; # setter
451              
452             # Run second iteration
453 0         0 return array_dispatcher($self);
454              
455             # CLI
456             }
457             else {
458             # $self->{method} will be always 'omop2bff'
459             # $self->{method_ori} will tell us the original one
460 1         3 $self->{method_ori} = 'omop2pxf'; # setter
461 1         3 $self->{method} = 'omop2bff'; # setter
462 1         2 $self->{omop_cli} = 1; # setter
463              
464             # Run 1st and 2nd iteration
465 1         6 return omop2bff($self);
466             }
467             }
468              
469             ###############
470             ###############
471             # CDISC2BFF #
472             ###############
473             ###############
474              
475             sub cdisc2bff {
476              
477 2     2 0 56 my $self = shift;
478 2         18 my $str = path( $self->{in_file} )->slurp_utf8;
479 2         7356 my $hash = xml2hash $str, attr => '-', text => '~';
480 2         68538 my $data = cdisc2redcap($hash);
481              
482             my ( $data_redcap_dict, $data_mapping_file ) =
483             read_redcap_dict_and_mapping_file(
484             {
485             redcap_dictionary => $self->{redcap_dictionary},
486             mapping_file => $self->{mapping_file},
487             self_validate_schema => $self->{self_validate_schema},
488             schema_file => $self->{schema_file}
489             }
490 2         31 );
491              
492             # Load data in $self
493 2         12 $self->{data} = $data; # Dynamically adding attributes (setter)
494 2         11 $self->{data_redcap_dict} = $data_redcap_dict; # Dynamically adding attributes (setter)
495 2         7 $self->{data_mapping_file} = $data_mapping_file; # Dynamically adding attributes (setter)
496              
497             # array_dispatcher will deal with JSON arrays
498 2         12 return array_dispatcher($self);
499             }
500              
501             ###############
502             ###############
503             # CDISC2PXF #
504             ###############
505             ###############
506              
507             sub cdisc2pxf {
508              
509 1     1 0 56 my $self = shift;
510              
511             # First iteration: cdisc2bff
512 1         3 $self->{method} = 'cdisc2bff'; # setter - we have to change the value of attr {method}
513 1         7 my $bff = cdisc2bff($self); # array
514              
515             # Preparing for second iteration: bff2pxf
516 1         8 $self->{method} = 'bff2pxf'; # setter
517 1         4322 $self->{data} = $bff; # setter
518 1         11 $self->{in_textfile} = 0; # setter
519              
520             # Run second iteration
521 1         23 return array_dispatcher($self);
522             }
523              
524             ######################
525             ######################
526             # MISCELLANEA SUBS #
527             ######################
528             ######################
529              
530             sub array_dispatcher {
531              
532 14     14 0 45 my $self = shift;
533              
534             # Load the input data as Perl data structure
535             my $in_data =
536             ( $self->{in_textfile} && $self->{method} !~ m/^redcap2|^omop2|^cdisc2/ )
537             ? io_yaml_or_json( { filepath => $self->{in_file}, mode => 'read' } )
538 14 100 100     220 : $self->{data};
539              
540             # Define the methods to call (naming 'func' to avoid confussion with $self->{method})
541 13         195 my %func = (
542             pxf2bff => \&do_pxf2bff,
543             redcap2bff => \&do_redcap2bff,
544             cdisc2bff => \&do_cdisc2bff,
545             omop2bff => \&do_omop2bff,
546             bff2pxf => \&do_bff2pxf
547             );
548              
549             # Open connection to SQLlite databases ONCE
550 13 100       110 open_connections_SQLite($self) if $self->{method} ne 'bff2pxf';
551              
552             # Open filehandle if omop2bff
553 13         40 my $fh_out;
554 13 50 66     72 if ( $self->{method} eq 'omop2bff' && $self->{omop_cli} ) {
555 2         16 $fh_out = open_filehandle( $self->{out_file}, 'a' );
556 2         34 say $fh_out "[";
557             }
558              
559             # Proceed depending if we have an ARRAY or not
560             # NB: Caution with RAM (we store all in memory except for omop2bff)
561 13         25 my $out_data;
562 13 100       83 if ( ref $in_data eq ref [] ) {
563              
564             # Print if we have ARRAY
565 12 50       50 say "$self->{method}: ARRAY" if $self->{debug};
566              
567             # Initialize needed variables
568 12         22 my $count = 0;
569 12         28 my $total = 0;
570 12         28 my $elements = scalar @{$in_data};
  12         31  
571              
572             # Start looping
573             # In $self->{data} we have all participants data, but,
574             # WE DELIBERATELY SEPARATE ARRAY ELEMENTS FROM $self->{data}
575              
576 12         39 for ( @{$in_data} ) {
  12         46  
577 2228         2717 $count++;
578              
579             # Print imfo
580 2228 50       3711 say "[$count] ARRAY ELEMENT from $elements" if $self->{debug};
581              
582             # NB: If we get "null" participants the validator will complain
583             # about not having "id" or any other required property
584 2228         5844 my $method_result = $func{ $self->{method} }->( $self, $_ ); # Method
585              
586             # Only proceeding if we got value from method
587 2228 100       4348 if ($method_result) {
588 1288         1430 $total++;
589 1288 50       2474 say " * [$count] ARRAY ELEMENT is defined" if $self->{debug};
590              
591             # For omop2bff and omop2pxf we serialize by individual
592 1288 100 66     3987 if ( exists $self->{omop_cli} && $self->{omop_cli} ) {
593 1000         1747 my $out = omop_dispatcher( $self, $method_result );
594 1000         54628 print $fh_out $$out;
595             print $fh_out ",\n"
596             unless ( $total == $elements
597 1000 100 66     18447 || $total == $self->{max_lines_sql} );
598             }
599              
600             # For the other we have array_ref $out_data and serialize at once
601             else {
602 288         350 push @{$out_data}, $method_result;
  288         726  
603              
604             #say total_size($out_data);
605             }
606             }
607             }
608              
609             say "==============\nIndividuals total: $total\n"
610 12 50 33     80 if ( $self->{verbose} && $self->{method} eq 'omop2bff' );
611             }
612              
613             # NOT ARRAY
614             else {
615 1 50       3 say "$self->{method}: NOT ARRAY" if $self->{debug};
616 1         5 $out_data = $func{ $self->{method} }->( $self, $in_data ); # Method
617             }
618              
619             # Close connections ONCE
620 13 100       114 close_connections_SQLite($self) unless $self->{method} eq 'bff2pxf';
621              
622             # Close filehandle if omop2bff (w/ premature return)
623 13 50 66     86 if ( exists $self->{omop_cli} && $self->{omop_cli} ) {
624 2         12 say $fh_out "\n]";
625 2         18 close $fh_out;
626 2         1620 return 1;
627             }
628              
629             # Return data
630 11         13400 return $out_data;
631             }
632              
633             sub omop_dispatcher {
634              
635 1000     1000 0 1564 my ( $self, $method_result ) = @_;
636              
637             # For omop2bff and omop2pxf we serialize by individual
638 1000         1221 my $out;
639              
640             # omop2bff encode directly
641 1000 100       1957 if ( $self->{method_ori} ne 'omop2pxf' ) {
642 500         39101 $out = JSON::XS->new->utf8->canonical->pretty->encode($method_result);
643             }
644              
645             # omop2pxf convert to PXF
646             else {
647 500         1206 my $pxf = do_bff2pxf( $self, $method_result );
648 500         11145 $out = JSON::XS->new->utf8->canonical->pretty->encode($pxf);
649             }
650 1000         4398 chomp $out;
651 1000         2013 return \$out;
652             }
653              
654             sub omop_stream_dispatcher {
655              
656 1     1 0 3 my $arg = shift;
657 1         3 my $self = $arg->{self};
658 1         2 my $filepath = $arg->{filepath};
659 1         2 my $filepaths = $arg->{filepaths};
660 1         3 my $omop_tables = $self->{prev_omop_tables};
661              
662             # Open connection to SQLite databases ONCE
663 1 50       9 open_connections_SQLite($self) if $self->{method} ne 'bff2pxf';
664              
665             # First we do transformations from AoH to HoH to speed up the calculation
666 1         2 my $person = { map { $_->{person_id} => $_ } @{ $self->{data}{PERSON} } };
  2694         6416  
  1         29  
667              
668             # Give back memory to RAM
669 1         255 delete $self->{data}{PERSON};
670              
671             # CSVs
672 1 50       8 if (@$filepaths) {
673 0         0 for (@$filepaths) {
674 0 0       0 say "Processing file ... <$_>" if $self->{verbose};
675             read_csv_stream(
676             {
677             in => $_,
678             sep => $self->{sep},
679 0         0 self => $self,
680             person => $person
681             }
682             );
683             }
684             }
685              
686             # PosgreSQL dump
687             else {
688              
689             # Now iterate
690 1         2 for my $table ( @{$omop_tables} ) {
  1         65  
691              
692             # We already loaded @stream_ram_memory_tables;
693 3 100   6   206 next if any { $_ eq $table } @stream_ram_memory_tables;
  6         38  
694 1 50       7 say "Processing table ... <$table>" if $self->{verbose};
695 1         6 $self->{omop_tables} = [$table];
696 1         11 read_sqldump_stream(
697             { in => $filepath, self => $self, person => $person } );
698             }
699             }
700              
701             # Close connections ONCE
702 1 50       25 close_connections_SQLite($self) unless $self->{method} eq 'bff2pxf';
703 1         9751 return 1;
704             }
705              
706             sub omop2bff_stream_processing {
707              
708 67707     67707 0 104921 my ( $self, $data ) = @_;
709              
710             # We have this subroutine here because the class was initiated in Pheno.pm
711 67707         163628 return do_omop2bff( $self, $data ); # Method
712             }
713              
714             sub Dumper_concise {
715             {
716 0     0 0   local $Data::Dumper::Terse = 1;
  0            
717 0           local $Data::Dumper::Indent = 1;
718 0           local $Data::Dumper::Useqq = 1;
719 0           local $Data::Dumper::Deparse = 1;
720 0           local $Data::Dumper::Quotekeys = 1;
721 0           local $Data::Dumper::Sortkeys = 1;
722 0           local $Data::Dumper::Pair = ' : ';
723 0           print Dumper shift;
724             }
725             }
726              
727             1;
728              
729             =head1 NAME
730              
731             Convert::Pheno - A module to interconvert common data models for phenotypic data
732            
733             =head1 SYNOPSIS
734              
735             use Convert::Pheno;
736              
737             # Define data
738             my $my_pxf_json_data = {
739             "phenopacket" => {
740             "id" => "P0007500",
741             "subject" => {
742             "id" => "P0007500",
743             "dateOfBirth" => "unknown-01-01T00:00:00Z",
744             "sex" => "FEMALE"
745             }
746             }
747             };
748              
749             # Create object
750             my $convert = Convert::Pheno->new(
751             {
752             data => $my_pxf_json_data,
753             method => 'pxf2json'
754             }
755             );
756              
757             # Apply a method
758             my $data = $convert->pxf2json;
759              
760             =head1 DESCRIPTION
761              
762             For a better description, please read the following documentation:
763              
764             =over
765              
766             =item General:
767              
768             L<https://cnag-biomedical-informatics.github.io/convert-pheno>
769              
770             =item Command-Line Interface:
771              
772             L<https://github.com/CNAG-Biomedical-Informatics/convert-pheno#readme>
773              
774             =back
775              
776             =head1 CITATION
777              
778             The author requests that any published work that utilizes C<Convert-Pheno> includes a cite to the the following reference:
779              
780             Rueda, M. et al. "Convert-Pheno: A software toolkit for the interconversion of standard data models for phenotypic data", (2023), I<Journal of Biomedical Informatics>.
781              
782             =head1 AUTHOR
783              
784             Written by Manuel Rueda, PhD. Info about CNAG can be found at L<https://www.cnag.eu>.
785              
786             =head1 METHODS
787              
788             See L<https://cnag-biomedical-informatics.github.io/convert-pheno/use-as-a-module>.
789              
790             =head1 COPYRIGHT
791              
792             This PERL file is copyrighted. See the LICENSE file included in this distribution.
793              
794             =cut