File Coverage

blib/lib/RandomJungle/TestData.pm
Criterion Covered Total %
statement 7 7 100.0
branch n/a
condition n/a
subroutine 3 3 100.0
pod 0 1 0.0
total 10 11 90.9


line stmt bran cond sub pod time code
1             package RandomJungle::TestData;
2              
3             =head1 NAME
4              
5             RandomJungle::TestData - Test data for the RandomJungle* modules
6              
7             =cut
8              
9 5     5   289084 use strict;
  5         15  
  5         360  
10 5     5   43 use warnings;
  5         10  
  5         6746  
11              
12             =head1 VERSION
13              
14             Version 0.01
15              
16             =cut
17              
18             our $VERSION = 0.01;
19              
20             our @ISA = ( "Exporter" );
21             our @EXPORT_OK = qw( get_exp_data );
22              
23             # This package contains the expected values for the 'testdata_20samples_10vars' dataset
24              
25             my %testdata;
26              
27             sub get_exp_data
28             {
29 3     3 0 205 return \%testdata;
30             }
31              
32             #*************************************************
33             # XML
34             #*************************************************
35              
36             $testdata{XML}{filename} = 'testdata_20samples_10vars.jungle.xml';
37              
38             @{ $testdata{XML}{tree_ids} } = ( 0 .. 9 );
39              
40             $testdata{XML}{options} =
41             {
42             file => '/home/freir2/RandomJungle/testdata/testdata_20samples_10vars.raw',
43             delimiter => ' ', treetype => 1, ntree => 10, mtry => 3, depvar => 1,
44             depvarname => 'PHENOTYPE', nrow => 20, ncol => 12, varnamesrow => 0,
45             depvarcol => 0, outprefix => 'testdata_20samples_10vars', skiprow => 0,
46             skipcol => 0, missingcode => 3, impmeasure => 3, backsel => 0, nimpvar => 100,
47             downsampling => 0, verbose => 0, memMode => 2, write => 2, predict => '',
48             varproximities => 0, summary => 0, testlib => 0, plugin => '',
49             colselection => '', impute => 0, gwa => 0, impcont => 0, transpose => 0,
50             sampleproximities => 0, weightsim => 0, extractdata => 0, yaimp => 0,
51             seeed => 1, nthreads => 0, pluginpar => '', maxtreedepth => 100,
52             targetpartitionsize => 1, pedfile => 1,
53             };
54              
55             $testdata{XML}{treedata}{1} =
56             {
57             varID => '((9,11,4,6,10,0,8,11,0,0,0,3,2,0,0,4,0,0,0,0,0))',
58             values => '(((1)),((1)),((1)),((0)),((0)),((1)),((0)),((0)),((1)),((2)),((1)),((0)),((1)),((1)),((2)),((0)),((1)),((2)),((2)),((2)),((1)))',
59             branches => '((1,20),(2,19),(3,18),(4,11),(5,6),(0,0),(7,10),(8,9),(0,0),(0,0),(0,0),(12,15),(13,14),(0,0),(0,0),(16,17),(0,0),(0,0),(0,0),(0,0),(0,0))',
60             nodes_at_vector_i => {
61             0 => {
62             vector_index => 0,
63             is_terminal => 0,
64             variable_index => 9,
65             next_vector_i => [ 1, 1, 20 ],
66             path => [ 0 ],
67             },
68             1 => {
69             vector_index => 1,
70             is_terminal => 0,
71             variable_index => 11,
72             next_vector_i => [ 2, 2, 19 ],
73             index_of_parent_node => 0,
74             path => [ 0, 1 ],
75             },
76             2 => {
77             vector_index => 2,
78             is_terminal => 0,
79             variable_index => 4,
80             next_vector_i => [ 3, 3, 18 ],
81             index_of_parent_node => 1,
82             path => [ 0, 1, 2 ],
83             },
84             3 => {
85             vector_index => 3,
86             is_terminal => 0,
87             variable_index => 6,
88             next_vector_i => [ 4, 11, 11 ],
89             index_of_parent_node => 2,
90             path => [ 0, 1, 2, 3 ],
91             },
92             4 => {
93             vector_index => 4,
94             is_terminal => 0,
95             variable_index => 10,
96             next_vector_i => [ 5, 6, 6 ],
97             index_of_parent_node => 3,
98             path => [ 0, 1, 2, 3, 4 ],
99             },
100             5 => {
101             vector_index => 5,
102             is_terminal => 1,
103             terminal_value => 1,
104             index_of_parent_node => 4,
105             path => [ 0, 1, 2, 3, 4, 5 ],
106             },
107             6 => {
108             vector_index => 6,
109             is_terminal => 0,
110             variable_index => 8,
111             next_vector_i => [ 7, 10, 10 ],
112             index_of_parent_node => 4,
113             path => [ 0, 1, 2, 3, 4, 6 ],
114             },
115             7 => {
116             vector_index => 7,
117             is_terminal => 0,
118             variable_index => 11,
119             next_vector_i => [ 8, 9, 9 ],
120             index_of_parent_node => 6,
121             path => [ 0, 1, 2, 3, 4, 6, 7 ],
122             },
123             8 => {
124             vector_index => 8,
125             is_terminal => 1,
126             terminal_value => 1,
127             index_of_parent_node => 7,
128             path => [ 0, 1, 2, 3, 4, 6, 7, 8 ],
129             },
130             9 => {
131             vector_index => 9,
132             is_terminal => 1,
133             terminal_value => 2,
134             index_of_parent_node => 7,
135             path => [ 0, 1, 2, 3, 4, 6, 7, 9 ],
136             },
137             10 => {
138             vector_index => 10,
139             is_terminal => 1,
140             terminal_value => 1,
141             index_of_parent_node => 6,
142             path => [ 0, 1, 2, 3, 4, 6, 10 ],
143             },
144             11 => {
145             vector_index => 11,
146             is_terminal => 0,
147             variable_index => 3,
148             next_vector_i => [ 12, 15, 15 ],
149             index_of_parent_node => 3,
150             path => [ 0, 1, 2, 3, 11 ],
151             },
152             12 => {
153             vector_index => 12,
154             is_terminal => 0,
155             variable_index => 2,
156             next_vector_i => [ 13, 13, 14 ],
157             index_of_parent_node => 11,
158             path => [ 0, 1, 2, 3, 11, 12 ],
159             },
160             13 => {
161             vector_index => 13,
162             is_terminal => 1,
163             terminal_value => 1,
164             index_of_parent_node => 12,
165             path => [ 0, 1, 2, 3, 11, 12, 13 ],
166             },
167             14 => {
168             vector_index => 14,
169             is_terminal => 1,
170             terminal_value => 2,
171             index_of_parent_node => 12,
172             path => [ 0, 1, 2, 3, 11, 12, 14 ],
173             },
174             15 => {
175             vector_index => 15,
176             is_terminal => 0,
177             variable_index => 4,
178             next_vector_i => [ 16, 17, 17 ],
179             index_of_parent_node => 11,
180             path => [ 0, 1, 2, 3, 11, 15 ],
181             },
182             16 => {
183             vector_index => 16,
184             is_terminal => 1,
185             terminal_value => 1,
186             index_of_parent_node => 15,
187             path => [ 0, 1, 2, 3, 11, 15, 16 ],
188             },
189             17 => {
190             vector_index => 17,
191             is_terminal => 1,
192             terminal_value => 2,
193             index_of_parent_node => 15,
194             path => [ 0, 1, 2, 3, 11, 15, 17 ],
195             },
196             18 => {
197             vector_index => 18,
198             is_terminal => 1,
199             terminal_value => 2,
200             index_of_parent_node => 2,
201             path => [ 0, 1, 2, 18 ],
202             },
203             19 => {
204             vector_index => 19,
205             is_terminal => 1,
206             terminal_value => 2,
207             index_of_parent_node => 1,
208             path => [ 0, 1, 19 ],
209             },
210             20 => {
211             vector_index => 20,
212             is_terminal => 1,
213             terminal_value => 1,
214             index_of_parent_node => 0,
215             path => [ 0, 20 ],
216             },
217             },
218             };
219              
220             $testdata{XML}{treedata}{1}{var_indices_used_in_tree} =
221             do
222             {
223             my @nonterm_noderefs =
224             grep { defined $_->{variable_index} }
225             ( values %{ $testdata{XML}{treedata}{1}{nodes_at_vector_i} } );
226             my @var_indices = map { $_->{variable_index} } @nonterm_noderefs;
227             my %uniq_var_i = map { $_ => 1 } @var_indices;
228             [ sort { $a <=> $b } ( keys %uniq_var_i ) ];
229             };
230              
231             #*************************************************
232             # OOB
233             #*************************************************
234              
235             $testdata{OOB}{filename} = 'testdata_20samples_10vars.oob';
236              
237             $testdata{OOB}{matrix} =
238             [
239             '0 0 0 1 1 0 1 0 0 0',
240             '0 1 1 0 1 1 1 1 0 1',
241             '0 0 0 0 1 0 0 1 0 1',
242             '0 0 0 1 0 0 0 0 0 1',
243             '0 0 1 0 0 0 0 0 0 0',
244             '0 1 0 0 0 0 0 0 0 1',
245             '0 0 0 0 0 0 1 0 0 0',
246             '1 0 0 0 0 0 1 1 0 0',
247             '0 0 0 0 1 1 0 0 0 1',
248             '0 1 1 0 0 0 0 1 0 0',
249             '0 0 1 1 1 1 1 1 0 0',
250             '1 0 0 1 1 1 0 0 0 1',
251             '0 0 1 0 0 1 0 0 0 1',
252             '1 0 1 0 0 1 0 0 1 1',
253             '1 0 1 1 1 0 0 0 0 1',
254             '0 0 0 1 1 0 0 0 1 0',
255             '0 1 1 1 0 0 1 1 0 0',
256             '0 0 0 0 0 0 0 1 1 1',
257             '1 1 0 0 0 0 1 0 0 1',
258             '1 0 0 1 0 0 1 0 1 0',
259             ];
260              
261             $testdata{OOB}{data_by_sample_index}{0} =
262             {
263             sample_used_to_construct_trees => [ 0, 1, 2, 5, 7, 8, 9 ],
264             sample_not_used_to_construct_trees => [ 3, 4, 6 ],
265             state_for_tree => [ 0, 0, 0, 1, 1, 0, 1, 0, 0, 0 ],
266             };
267             # removed key (not needed?): trees_with_unrecognized_OOB_state => [],
268              
269             $testdata{OOB}{data_by_sample_index}{19} =
270             {
271             sample_used_to_construct_trees => [ 1, 2, 4, 5, 7, 9 ],
272             sample_not_used_to_construct_trees => [ 0, 3, 6, 8 ],
273             state_for_tree => [ 1, 0, 0, 1, 0, 0, 1, 0, 1, 0 ],
274             };
275             # removed key (not needed?): trees_with_unrecognized_OOB_state => [],
276              
277             $testdata{OOB}{data_by_tree_index}{2} =
278             {
279             oob_samples => [ qw( s2 s5 s10 s11 s13 s14 s15 s17 ) ],
280             in_bag_samples => [ qw( s1 s3 s4 s6 s7 s8 s9 s12 s16 s18 s19 s20 ) ],
281             state_for_sample_index => [ 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0 ],
282             };
283             # removed key (not needed?): trees_with_unrecognized_OOB_state => [],
284              
285             #*************************************************
286             # RAW
287             #*************************************************
288              
289             $testdata{RAW}{filename} = 'testdata_20samples_10vars.raw';
290              
291             $testdata{RAW}{variable_labels} = [ 'SEX', 'PHENOTYPE', 'Var1', 'Var2', 'Var3', 'Var4',
292             'Var5', 'Var6', 'Var7', 'Var8', 'Var9', 'Var10' ];
293              
294             $testdata{RAW}{header_labels} = [ 'FID', 'IID', 'PAT', 'MAT' ];
295              
296             foreach my $i ( 0 .. 19 )
297             {
298             $testdata{RAW}{sample_labels}[$i] = 's' . ($i+1);
299             $testdata{RAW}{data_by_sample_label}{ 's' . ($i+1) }{i} = $i;
300             }
301              
302             $testdata{RAW}{data_by_sample_label}{s1}{phenotype} = 1;
303             $testdata{RAW}{data_by_sample_label}{s2}{phenotype} = 1;
304             $testdata{RAW}{data_by_sample_label}{s3}{phenotype} = 1;
305             $testdata{RAW}{data_by_sample_label}{s4}{phenotype} = 2;
306             $testdata{RAW}{data_by_sample_label}{s5}{phenotype} = 1;
307             $testdata{RAW}{data_by_sample_label}{s6}{phenotype} = 1;
308             $testdata{RAW}{data_by_sample_label}{s7}{phenotype} = 2;
309             $testdata{RAW}{data_by_sample_label}{s8}{phenotype} = 2;
310             $testdata{RAW}{data_by_sample_label}{s9}{phenotype} = 1;
311             $testdata{RAW}{data_by_sample_label}{s10}{phenotype} = 1;
312             $testdata{RAW}{data_by_sample_label}{s11}{phenotype} = 2;
313             $testdata{RAW}{data_by_sample_label}{s12}{phenotype} = 1;
314             $testdata{RAW}{data_by_sample_label}{s13}{phenotype} = 1;
315             $testdata{RAW}{data_by_sample_label}{s14}{phenotype} = 2;
316             $testdata{RAW}{data_by_sample_label}{s15}{phenotype} = 2;
317             $testdata{RAW}{data_by_sample_label}{s16}{phenotype} = 1;
318             $testdata{RAW}{data_by_sample_label}{s17}{phenotype} = 2;
319             $testdata{RAW}{data_by_sample_label}{s18}{phenotype} = 2;
320             $testdata{RAW}{data_by_sample_label}{s19}{phenotype} = 1;
321             $testdata{RAW}{data_by_sample_label}{s20}{phenotype} = 2;
322              
323             $testdata{RAW}{data_by_sample_index}{0} =
324             {
325             label => 's1',
326             phenotype => 1,
327             orig_data => '1 s1 0 0 1 1 1 0 0 0 1 1 1 0 0 0',
328             spliced_data => [ 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0 ],
329             };
330              
331             $testdata{RAW}{data_by_sample_index}{19} =
332             {
333             label => 's20',
334             phenotype => 2,
335             orig_data => '20 s20 0 0 1 2 0 1 1 0 1 1 0 1 0',
336             spliced_data => [ 1, 2, 0, 1, 1, 0, 1, 1, 0, 1, 0 ],
337             };
338              
339             #*************************************************
340             # CLASSIFICATION
341             #*************************************************
342              
343             # manually-determined classification data
344             # $testdata{classification}{$tree_id}{$sample_label} = $vector_index_of_terminal_node
345             $testdata{classification}{1} =
346             {
347             s1 => 13,
348             s2 => 17,
349             s3 => 5,
350             s4 => 19,
351             s5 => 16,
352             s6 => 10,
353             s7 => 14,
354             s8 => 9,
355             s9 => 20,
356             s10 => 13,
357             s11 => 18,
358             s12 => 10,
359             s13 => 8,
360             s14 => 18,
361             s15 => 19,
362             s16 => 5,
363             s17 => 13,
364             s18 => 17,
365             s19 => 8,
366             s20 => 19,
367             };
368              
369             # computationally determined classification data
370             # $testdata{classification}{data_by_sample_label}{$sample_label}{by_tree}{$tree_id} = href
371              
372             $testdata{classification}{data_by_sample_label}{s1}{by_tree} =
373             {
374             0 => { pred_pheno => 1, term_node_vi => 4 },
375             1 => { pred_pheno => 1, term_node_vi => 13 },
376             2 => { pred_pheno => 1, term_node_vi => 9 },
377             3 => { pred_pheno => 1, term_node_vi => 5 },
378             4 => { pred_pheno => 1, term_node_vi => 12 },
379             5 => { pred_pheno => 1, term_node_vi => 8 },
380             6 => { pred_pheno => 1, term_node_vi => 2 },
381             7 => { pred_pheno => 1, term_node_vi => 12 },
382             8 => { pred_pheno => 1, term_node_vi => 12 },
383             9 => { pred_pheno => 1, term_node_vi => 7 },
384             };
385              
386             #*************************************************
387              
388             =head1 SEE ALSO
389              
390             RandomJungle::Jungle, RandomJungle::Tree, RandomJungle::Tree::Node,
391             RandomJungle::XML, RandomJungle::OOB, RandomJungle::RAW,
392             RandomJungle::DB, RandomJungle::Classification_DB
393              
394             =head1 AUTHOR
395              
396             Robert R. Freimuth
397              
398             =head1 COPYRIGHT
399              
400             Copyright (c) 2011 Mayo Foundation for Medical Education and Research. All rights reserved.
401              
402             This program is free software; you can redistribute it and/or modify
403             it under the same terms as Perl itself.
404              
405             The full text of the license can be found in the
406             LICENSE file included with this module.
407              
408             =cut
409              
410             1;