File Coverage

blib/lib/NNexus/Classification.pm
Criterion Covered Total %
statement 87 96 90.6
branch 15 24 62.5
condition 9 14 64.2
subroutine 9 10 90.0
pod 2 6 33.3
total 122 150 81.3


line stmt bran cond sub pod time code
1             # /=====================================================================\ #
2             # | NNexus Autolinker | #
3             # | Named Entity Classification Module | #
4             # |=====================================================================| #
5             # | Part of the Planetary project: http://trac.mathweb.org/planetary | #
6             # | Research software, produced as part of work done by: | #
7             # | the KWARC group at Jacobs University | #
8             # | Copyright (c) 2012 | #
9             # | Released under the MIT License (MIT) | #
10             # |---------------------------------------------------------------------| #
11             # | Adapted from the original NNexus code by | #
12             # | James Gardner and Aaron Krowne | #
13             # |---------------------------------------------------------------------| #
14             # | Deyan Ginev #_# | #
15             # | http://kwarc.info/people/dginev (o o) | #
16             # \=========================================================ooo==U==ooo=/ #
17             package NNexus::Classification;
18 6     6   21346 use strict;
  6         8  
  6         206  
19 6     6   24 use warnings;
  6         6  
  6         147  
20 6     6   21 use Exporter;
  6         37  
  6         333  
21             our @ISA = qw(Exporter);
22             our @EXPORT_OK = qw(msc_similarity disambiguate);
23 6     6   34 use List::Util qw(max min);
  6         8  
  6         19610  
24              
25             # Let's do things differently here.
26             # We will use Jan Wilken Doerrie's MSC similarity metric:
27             # "we defined the similarity between two categories as the size of the
28             # intersections divided by the size of the union over the ZBL dataset"
29             our $msc_similarities = [ # 63x63 matrix, top-level MSC 2000 categories
30             [1.00000000,0.08157364,0.05611695,0.01857567,0.00526125,0.00493453,0.02162828,0.01738086,0.01488942,0.01995192,0.01850128,0.01315144,0.01112859,0.00875912,0.00258325,0.01958233,0.01059040,0.02694878,0.00814224,0.01508530,0.00391065,0.01222424,0.00659643,0.01744963,0.02033044,0.01715241,0.00549930,0.00368565,0.00844784,0.01205280,0.00590242,0.00640019,0.00470555,0.01450557,0.01117073,0.01519435,0.01915299,0.00949692,0.02112821,0.01075672,0.00831446,0.01284788,0.02376349,0.01846751,0.02192419,0.02827359,0.06235569,0.01871924,0.02326626,0.02549734,0.01149905,0.00904571,0.02978857,0.01858157,0.01913664,0.00986617,0.00980736,0.02622097,0.02015345,0.02370473,0.02309777,0.02525110,0.06049483],
31             [0.08157364,1.00000000,0.04843149,0.00532875,0.00205666,0.00135896,0.01867335,0.01162555,0.00454580,0.00914432,0.00561143,0.00339066,0.00233557,0.00318598,0.00112062,0.00596528,0.00474509,0.01721411,0.00592865,0.00682061,0.00180499,0.00344266,0.00441784,0.00357584,0.00276735,0.00367641,0.00180428,0.00557237,0.00315267,0.00397678,0.00230947,0.00158525,0.00194050,0.00436769,0.00245270,0.00323992,0.03266539,0.00462011,0.00786873,0.00592230,0.00625668,0.00554406,0.00367927,0.00894575,0.00961623,0.00339433,0.00442661,0.02332687,0.00304362,0.00367554,0.00783214,0.00359950,0.01357759,0.00583133,0.01961088,0.03410275,0.00370477,0.00360240,0.00517674,0.00367842,0.00215416,0.00470674,0.02079996],
32             [0.05611695,0.04843149,1.00000000,0.01860533,0.09942421,0.05170597,0.01116272,0.02366804,0.01088601,0.00678883,0.00619870,0.01188854,0.00194541,0.03826328,0.00037826,0.03378654,0.00411333,0.01952391,0.02813212,0.00144777,0.00048211,0.00200346,0.00019596,0.00120727,0.00040013,0.00283891,0.00242860,0.00174449,0.00086392,0.00117239,0.00139450,0.00026806,0.00036410,0.01135132,0.00259196,0.00098411,0.00869706,0.00278418,0.00065182,0.04870472,0.00162055,0.00190805,0.00122017,0.01033098,0.00408488,0.00158937,0.11329349,0.00131901,0.00031955,0.00016804,0.00014428,0.00025776,0.01361661,0.00071465,0.00278636,0.00058838,0.00033706,0.01095038,0.02561634,0.00333059,0.00669844,0.02532072,0.00174317],
33             [0.01857567,0.00532875,0.01860533,1.00000000,0.03854465,0.00695286,0.06147985,0.00381552,0.01111890,0.01036229,0.04460750,0.01316603,0.01265964,0.00515676,0.00079911,0.07192152,0.00451860,0.00357714,0.00334221,0.00286208,0.00143900,0.00147393,0.02179999,0.00186239,0.00088413,0.00484911,0.00313808,0.00180966,0.00263198,0.00221683,0.00284620,0.00101324,0.00022575,0.00341334,0.00269952,0.00114372,0.05704468,0.04862097,0.00177969,0.00785647,0.00544478,0.01902742,0.00199649,0.02516203,0.01832947,0.00795814,0.07579728,0.00124556,0.00085965,0.00014810,0.00034212,0.00039567,0.00564823,0.01156505,0.00036484,0.00003067,0.00026446,0.04216157,0.01448784,0.02353333,0.00356946,0.04458390,0.00179613],
34             [0.00526125,0.00205666,0.09942421,0.03854465,1.00000000,0.13498574,0.00433243,0.01252955,0.02119390,0.00273427,0.01431677,0.03376084,0.00576282,0.05441052,0.00177224,0.05178723,0.00922929,0.00392307,0.01936738,0.00051527,0.00099050,0.00025145,0.00058066,0.00029417,0.00013702,0.00106777,0.00244524,0.00110532,0.00055186,0.00040533,0.00124399,0.00035144,0.00010310,0.01638482,0.00445366,0.00120617,0.01113947,0.01531835,0.00039070,0.06202833,0.00706150,0.00336538,0.00045634,0.00300478,0.00139167,0.00075193,0.01245292,0.00033423,0.00002379,0.00001954,0.00003722,0.00006753,0.00970181,0.00050743,0.00086061,0.00000000,0.00009151,0.00290705,0.00745701,0.00204881,0.00119390,0.02242817,0.00025653],
35             [0.00493453,0.00135896,0.05170597,0.00695286,0.13498574,1.00000000,0.00108207,0.00591517,0.00605827,0.00141397,0.00741604,0.02646703,0.01336303,0.06039531,0.00035010,0.06902687,0.00574293,0.00052350,0.00053657,0.00002714,0.00000000,0.00000000,0.00005061,0.00000000,0.00003237,0.00004214,0.00101535,0.00008925,0.00014061,0.00003739,0.00007630,0.00016497,0.00004694,0.00062654,0.00021500,0.00000000,0.00378012,0.00184760,0.00017622,0.00733195,0.00199509,0.00039905,0.00009479,0.00026701,0.00007847,0.00065581,0.00679404,0.00022507,0.00002622,0.00001058,0.00000000,0.00009166,0.00075771,0.00011908,0.00010628,0.00000000,0.00000000,0.00028208,0.00056563,0.00035722,0.00077455,0.00193900,0.00064115],
36             [0.02162828,0.01867335,0.01116272,0.06147985,0.00433243,0.00108207,1.00000000,0.07510125,0.02964458,0.12328037,0.02564406,0.01861066,0.01072190,0.00610098,0.01736422,0.04876552,0.03999814,0.01013660,0.01648549,0.02365574,0.00114353,0.01283800,0.04351610,0.00310510,0.00250883,0.01927488,0.00578289,0.01215249,0.00733156,0.01013116,0.00939150,0.00237590,0.00047886,0.00438082,0.00267569,0.00027081,0.01065916,0.02057247,0.00373513,0.00293046,0.00387222,0.00807042,0.00871024,0.00918703,0.00147008,0.01090241,0.01622746,0.00087207,0.00010859,0.00011596,0.00047994,0.00016857,0.00977301,0.00545278,0.00160478,0.00010979,0.00000000,0.00213201,0.00103991,0.00105356,0.00090291,0.04688816,0.00337673],
37             [0.01738086,0.01162555,0.02366804,0.00381552,0.01252955,0.00591517,0.07510125,1.00000000,0.10327601,0.05552831,0.02090534,0.04187867,0.00738352,0.01105315,0.01170594,0.02435063,0.00429508,0.03557033,0.00178563,0.02655667,0.00033904,0.00973898,0.00672954,0.00693442,0.00078453,0.00241796,0.00796306,0.00207254,0.00332360,0.00160225,0.00290728,0.00200535,0.00017666,0.00755212,0.00168273,0.00028101,0.01917842,0.00421842,0.00106842,0.00377618,0.00492809,0.00231786,0.00275841,0.00050158,0.00015461,0.00975019,0.00727802,0.00089392,0.00012882,0.00002086,0.00009797,0.00021588,0.00095943,0.00042942,0.00054114,0.00007965,0.00000000,0.00062483,0.00020720,0.00027694,0.00276635,0.00486968,0.00339206],
38             [0.01488942,0.00454580,0.01088601,0.01111890,0.02119390,0.00605827,0.02964458,0.10327601,1.00000000,0.14900523,0.02532176,0.09917895,0.01360819,0.05210757,0.01503290,0.03515368,0.00379712,0.00287966,0.00067947,0.00302452,0.00014375,0.02226528,0.00235095,0.00174760,0.00080893,0.00196637,0.00355291,0.00048213,0.00217216,0.00078930,0.00063203,0.00112325,0.00006953,0.00426375,0.00128080,0.00022091,0.00456440,0.00935890,0.00140085,0.00611161,0.01957090,0.00565262,0.00378400,0.00031160,0.00065168,0.00249695,0.00858768,0.00057049,0.00008359,0.00002941,0.00011305,0.00010245,0.00105292,0.00026795,0.00014010,0.00000000,0.00027875,0.00106974,0.00004685,0.00060664,0.00289897,0.00612092,0.00117948],
39             [0.01995192,0.00914432,0.00678883,0.01036229,0.00273427,0.00141397,0.12328037,0.05552831,0.14900523,1.00000000,0.01372342,0.04089925,0.03216888,0.03963411,0.03271138,0.04915837,0.02431038,0.00392283,0.00104910,0.02161542,0.00081625,0.17399932,0.01041594,0.00381929,0.00396809,0.01526674,0.00204542,0.00011577,0.00177630,0.00079854,0.00312736,0.00160699,0.00015678,0.00310573,0.00173009,0.00057235,0.02585302,0.01645332,0.03430529,0.00272510,0.03361024,0.04910434,0.03047494,0.00050122,0.00031984,0.00272203,0.00552471,0.00299371,0.00008030,0.00011027,0.00051862,0.00008856,0.02816400,0.00223054,0.00878367,0.00005787,0.00002673,0.00080236,0.00028778,0.00047446,0.00134026,0.01790618,0.00051373],
40             [0.01850128,0.00561143,0.00619870,0.04460750,0.01431677,0.00741604,0.02564406,0.02090534,0.02532176,0.01372342,1.00000000,0.04495687,0.01809097,0.00825840,0.00188975,0.03007862,0.00764406,0.01730886,0.00282589,0.01172287,0.00164933,0.00389351,0.01197920,0.01455147,0.00347447,0.00669667,0.01056136,0.00352644,0.00909528,0.01061630,0.00248232,0.00446782,0.00385592,0.01283188,0.03121173,0.00698622,0.02200911,0.01214765,0.01022627,0.00180644,0.00246790,0.00323702,0.00523446,0.01917991,0.01575161,0.06158560,0.00760292,0.00694049,0.00478471,0.00082096,0.00367486,0.00057709,0.01518055,0.01160495,0.00572667,0.00021805,0.00098801,0.01278296,0.00415772,0.00773684,0.04709159,0.01208866,0.00336093],
41             [0.01315144,0.00339066,0.01188854,0.01316603,0.03376084,0.02646703,0.01861066,0.04187867,0.09917895,0.04089925,0.04495687,1.00000000,0.13736403,0.11612482,0.02659091,0.12470886,0.01323026,0.00047464,0.00044968,0.00112657,0.00009768,0.00573489,0.00318360,0.00051594,0.00070273,0.00211532,0.00196784,0.00059630,0.00037158,0.00027242,0.00282797,0.00033981,0.00015541,0.01651029,0.00546494,0.00018146,0.00558436,0.00267307,0.00489924,0.00526033,0.01933054,0.01241364,0.00974384,0.00055166,0.00006572,0.00038290,0.00337312,0.00055779,0.00002137,0.00000000,0.00000000,0.00000000,0.02064372,0.00222026,0.00081263,0.00003506,0.00000000,0.00023440,0.00009475,0.00013492,0.00134141,0.00240426,0.00027885],
42             [0.01112859,0.00233557,0.00194541,0.01265964,0.00576282,0.01336303,0.01072190,0.00738352,0.01360819,0.03216888,0.01809097,0.13736403,1.00000000,0.03104971,0.00777443,0.05533815,0.10059134,0.00042804,0.00058611,0.00250784,0.00021171,0.01568708,0.02072003,0.00317848,0.00615162,0.02370322,0.00575064,0.00032687,0.00036216,0.00062237,0.00832029,0.00094027,0.00022213,0.02323206,0.00493813,0.00064563,0.01001453,0.00277862,0.03036682,0.00078355,0.01349284,0.02104511,0.03578595,0.00150099,0.00051297,0.00038859,0.00126309,0.00544643,0.00013874,0.00025775,0.00085371,0.00031220,0.10424722,0.01947444,0.00894971,0.00018670,0.00000000,0.00040926,0.00020963,0.00277772,0.00225085,0.00098062,0.00005553],
43             [0.00875912,0.00318598,0.03826328,0.00515676,0.05441052,0.06039531,0.00610098,0.01105315,0.05210757,0.03963411,0.00825840,0.11612482,0.03104971,1.00000000,0.05955507,0.03995468,0.01494897,0.00099281,0.00313536,0.00031539,0.00028115,0.00767646,0.00029001,0.00020468,0.00041422,0.00140823,0.00028409,0.00044925,0.00042652,0.00006426,0.00205914,0.00012121,0.00003894,0.01414366,0.00140343,0.00026078,0.00422643,0.00332764,0.00447246,0.04956246,0.12995965,0.03171323,0.01188014,0.00057622,0.00013891,0.00014967,0.01075779,0.00056373,0.00007438,0.00001011,0.00004264,0.00003817,0.00792679,0.00036874,0.00104283,0.00000000,0.00000000,0.00023474,0.00106642,0.00086267,0.00166201,0.00145186,0.00008213],
44             [0.00258325,0.00112062,0.00037826,0.00079911,0.00177224,0.00035010,0.01736422,0.01170594,0.01503290,0.03271138,0.00188975,0.02659091,0.00777443,0.05955507,1.00000000,0.01562602,0.00983284,0.00014027,0.00041045,0.00035932,0.00000000,0.00281889,0.00183886,0.00012794,0.00019978,0.00122685,0.00022561,0.00000000,0.00000000,0.00012876,0.00093168,0.00000000,0.00011200,0.01759005,0.00173629,0.00005821,0.00064098,0.00152619,0.00245656,0.00099108,0.05874367,0.01961069,0.01879199,0.00010133,0.00001166,0.00000000,0.00002908,0.00028928,0.00000000,0.00000000,0.00000000,0.00000000,0.00347836,0.00033174,0.00080709,0.00000000,0.00000000,0.00001225,0.00001882,0.00002594,0.00015389,0.00002476,0.00022975],
45             [0.01958233,0.00596528,0.03378654,0.07192152,0.05178723,0.06902687,0.04876552,0.02435063,0.03515368,0.04915837,0.03007862,0.12470886,0.05533815,0.03995468,0.01562602,1.00000000,0.06595953,0.00136487,0.00350335,0.01802493,0.00096360,0.00909453,0.00515694,0.00092596,0.00073283,0.00797362,0.00525956,0.00057556,0.00034906,0.00149558,0.01390702,0.00054471,0.00018725,0.00700920,0.00276790,0.00021263,0.05220301,0.01430250,0.01125201,0.01183416,0.03213102,0.06774395,0.00526822,0.00410551,0.00065532,0.00117392,0.01695153,0.00125465,0.00081774,0.00016668,0.00056620,0.00011811,0.01195643,0.00888301,0.00146184,0.00014386,0.00001942,0.00067740,0.00087217,0.00157362,0.00068008,0.01158942,0.00059678],
46             [0.01059040,0.00474509,0.00411333,0.00451860,0.00922929,0.00574293,0.03999814,0.00429508,0.00379712,0.02431038,0.00764406,0.01323026,0.10059134,0.01494897,0.00983284,0.06595953,1.00000000,0.00328817,0.02482425,0.00616447,0.00591990,0.03196992,0.02214633,0.00303539,0.00808711,0.02152074,0.00508249,0.00264278,0.00161911,0.01106370,0.21195908,0.00576058,0.00091710,0.03571950,0.01124942,0.00191355,0.01234284,0.00343306,0.04837319,0.04712816,0.01933463,0.04609447,0.04692010,0.00572997,0.00019301,0.00129479,0.00076547,0.00962075,0.00104078,0.00109856,0.00264999,0.00041869,0.05004606,0.00436722,0.00915766,0.00048918,0.00030055,0.00011663,0.00039512,0.00076144,0.00225356,0.00133797,0.00017619],
47             [0.02694878,0.01721411,0.01952391,0.00357714,0.00392307,0.00052350,0.01013660,0.03557033,0.00287966,0.00392283,0.01730886,0.00047464,0.00042804,0.00099281,0.00014027,0.00136487,0.00328817,1.00000000,0.08067590,0.04178413,0.01288847,0.00814343,0.05918013,0.02287265,0.01192139,0.01255827,0.05727140,0.02851493,0.04112494,0.05247327,0.00817775,0.03127318,0.03346962,0.05308678,0.02299337,0.04035755,0.00753177,0.02359920,0.00502414,0.04179025,0.00149856,0.00215435,0.01544949,0.01176310,0.00279332,0.01809550,0.00277590,0.00414399,0.00252518,0.00136921,0.00191274,0.00121252,0.00204549,0.00226252,0.00068219,0.00029128,0.00048363,0.01441252,0.00342977,0.00180629,0.00600656,0.00517165,0.00897340],
48             [0.00814224,0.00592865,0.02813212,0.00334221,0.01936738,0.00053657,0.01648549,0.00178563,0.00067947,0.00104910,0.00282589,0.00044968,0.00058611,0.00313536,0.00041045,0.00350335,0.02482425,0.08067590,1.00000000,0.01136176,0.02128583,0.00313779,0.00177016,0.00395005,0.00439948,0.06575925,0.00852211,0.00751651,0.00731073,0.02724912,0.03242017,0.00630792,0.00369276,0.07529430,0.02643964,0.01881616,0.00519386,0.01723189,0.00673282,0.06325264,0.00181273,0.00496519,0.01507412,0.04863530,0.00300932,0.00262918,0.00308379,0.00202089,0.00218238,0.00155296,0.00134257,0.00131987,0.01042747,0.00960167,0.00138919,0.00065992,0.00177403,0.00228709,0.00542576,0.00281322,0.00247036,0.00861962,0.00110538],
49             [0.01508530,0.00682061,0.00144777,0.00286208,0.00051527,0.00002714,0.02365574,0.02655667,0.00302452,0.02161542,0.01172287,0.00112657,0.00250784,0.00031539,0.00035932,0.01802493,0.00616447,0.04178413,0.01136176,1.00000000,0.06483758,0.08307318,0.03566022,0.01725834,0.01750644,0.02539841,0.01688718,0.02514467,0.05486780,0.03688048,0.00521973,0.01688746,0.02445290,0.04768928,0.03471644,0.00333698,0.00610840,0.00442747,0.01372850,0.00426017,0.00223644,0.02063272,0.01163316,0.00536271,0.00062260,0.01746793,0.00129649,0.00160921,0.00404504,0.00413838,0.00337126,0.00066456,0.00454784,0.00171182,0.00098014,0.00013873,0.00077143,0.00063688,0.00013513,0.00050885,0.00794608,0.00238844,0.00064367],
50             [0.00391065,0.00180499,0.00048211,0.00143900,0.00099050,0.00000000,0.00114353,0.00033904,0.00014375,0.00081625,0.00164933,0.00009768,0.00021171,0.00028115,0.00000000,0.00096360,0.00591990,0.01288847,0.02128583,0.06483758,1.00000000,0.04072639,0.00850963,0.00148968,0.03424890,0.00161710,0.00330852,0.00214345,0.01283577,0.02128869,0.01600221,0.01043247,0.02031329,0.01823453,0.01338593,0.00553621,0.00141886,0.00394744,0.00880579,0.00196628,0.00030094,0.00124320,0.01546540,0.01752062,0.00012141,0.00815143,0.00007725,0.00166190,0.00659144,0.00314795,0.01158046,0.00120625,0.00136223,0.00162939,0.00054020,0.00071395,0.00815323,0.00026597,0.00018976,0.00073762,0.00035807,0.00090650,0.00010881],
51             [0.01222424,0.00344266,0.00200346,0.00147393,0.00025145,0.00000000,0.01283800,0.00973898,0.02226528,0.17399932,0.00389351,0.00573489,0.01568708,0.00767646,0.00281889,0.00909453,0.03196992,0.00814343,0.00313779,0.08307318,0.04072639,1.00000000,0.01126956,0.00503505,0.01361538,0.01960735,0.00316527,0.00331739,0.00744073,0.01068346,0.01134893,0.00646498,0.00267195,0.03957653,0.01929061,0.00228865,0.00421853,0.00776764,0.06510780,0.00242980,0.02196256,0.05102092,0.06929174,0.00146184,0.00017783,0.00105556,0.00039866,0.00099751,0.00020387,0.00014078,0.00093308,0.00008869,0.01896512,0.00131588,0.00768026,0.00008613,0.00007672,0.00024719,0.00005833,0.00012964,0.00082056,0.00037605,0.00015148],
52             [0.00659643,0.00441784,0.00019596,0.02179999,0.00058066,0.00005061,0.04351610,0.00672954,0.00235095,0.01041594,0.01197920,0.00318360,0.02072003,0.00029001,0.00183886,0.00515694,0.02214633,0.05918013,0.00177016,0.03566022,0.00850963,0.01126956,1.00000000,0.02045685,0.00826220,0.00633983,0.03213753,0.02469657,0.04730327,0.08274742,0.02489030,0.08616438,0.01621454,0.00693314,0.00677325,0.00158068,0.00223943,0.00308515,0.00227818,0.00029899,0.00053250,0.00147263,0.00375682,0.00824269,0.00760135,0.01816660,0.00190250,0.00329502,0.00193978,0.00143710,0.00700996,0.00206318,0.02359644,0.00999129,0.00189036,0.00279543,0.00183423,0.00047399,0.00054415,0.00170902,0.00370215,0.00409053,0.00128667],
53             [0.01744963,0.00357584,0.00120727,0.00186239,0.00029417,0.00000000,0.00310510,0.00693442,0.00174760,0.00381929,0.01455147,0.00051594,0.00317848,0.00020468,0.00012794,0.00092596,0.00303539,0.02287265,0.00395005,0.01725834,0.00148968,0.00503505,0.02045685,1.00000000,0.05145871,0.12779436,0.03636016,0.00283634,0.01066241,0.00982807,0.00257735,0.01096068,0.02880374,0.00972467,0.10046547,0.03624167,0.00056914,0.00067807,0.00583958,0.00657362,0.00220362,0.00263033,0.02207083,0.02274367,0.00194055,0.09990713,0.00528151,0.05241284,0.01205260,0.00876405,0.00796532,0.00761985,0.01903268,0.00936378,0.00189993,0.00146593,0.00266354,0.00441917,0.00655674,0.10888766,0.08290100,0.00726229,0.00045993],
54             [0.02033044,0.00276735,0.00040013,0.00088413,0.00013702,0.00003237,0.00250883,0.00078453,0.00080893,0.00396809,0.00347447,0.00070273,0.00615162,0.00041422,0.00019978,0.00073283,0.00808711,0.01192139,0.00439948,0.01750644,0.03424890,0.01361538,0.00826220,0.05145871,1.00000000,0.08066205,0.00847989,0.00086344,0.00613541,0.01587066,0.00450803,0.00917661,0.03289855,0.03249862,0.09045805,0.06328122,0.00048236,0.00143716,0.02809991,0.00126278,0.00192920,0.00161724,0.07735493,0.02717634,0.00155401,0.19320418,0.00435925,0.00996765,0.06535708,0.13841556,0.06412573,0.04164011,0.05750162,0.04546989,0.01269074,0.00447161,0.01606400,0.00400217,0.00554911,0.05259490,0.02729086,0.00299233,0.00016756],
55             [0.01715241,0.00367641,0.00283891,0.00484911,0.00106777,0.00004214,0.01927488,0.00241796,0.00196637,0.01526674,0.00669667,0.00211532,0.02370322,0.00140823,0.00122685,0.00797362,0.02152074,0.01255827,0.06575925,0.02539841,0.00161710,0.01960735,0.00633983,0.12779436,0.08066205,1.00000000,0.03859051,0.00113611,0.00203467,0.00471129,0.00409870,0.00081985,0.00343621,0.00824681,0.01813158,0.01059378,0.00205531,0.00545895,0.05316721,0.04592582,0.00994601,0.03556494,0.07198694,0.02189391,0.00529453,0.02358360,0.01043343,0.11826228,0.00809985,0.02347113,0.01625107,0.00605939,0.04742529,0.04541076,0.01082100,0.00415774,0.00848348,0.00436409,0.01243201,0.07245937,0.04159908,0.01913399,0.00060123],
56             [0.00549930,0.00180428,0.00242860,0.00313808,0.00244524,0.00101535,0.00578289,0.00796306,0.00355291,0.00204542,0.01056136,0.00196784,0.00575064,0.00028409,0.00022561,0.00525956,0.00508249,0.05727140,0.00852211,0.01688718,0.00330852,0.00316527,0.03213753,0.03636016,0.00847989,0.03859051,1.00000000,0.00972854,0.00657131,0.00892829,0.00628014,0.00991010,0.01018037,0.01401221,0.02144435,0.00420855,0.00420610,0.00367435,0.00175623,0.00667293,0.00038671,0.00074318,0.00382104,0.00550945,0.00170323,0.00992406,0.00108417,0.00348879,0.00098750,0.00099727,0.00218474,0.00048967,0.00714997,0.00495844,0.00084966,0.00043909,0.00015966,0.00121366,0.00499164,0.02679881,0.01159190,0.00893648,0.00031932],
57             [0.00368565,0.00557237,0.00174449,0.00180966,0.00110532,0.00008925,0.01215249,0.00207254,0.00048213,0.00011577,0.00352644,0.00059630,0.00032687,0.00044925,0.00000000,0.00057556,0.00264278,0.02851493,0.00751651,0.02514467,0.00214345,0.00331739,0.02469657,0.00283634,0.00086344,0.00113611,0.00972854,1.00000000,0.02446085,0.06612813,0.00528649,0.01944665,0.00185438,0.02321463,0.00769444,0.00070282,0.00087736,0.00123164,0.00031833,0.00472461,0.00011277,0.00018688,0.00042154,0.00321139,0.00040724,0.00479677,0.00035664,0.00062726,0.00030474,0.00013869,0.00043804,0.00009519,0.00096055,0.00042709,0.00007295,0.00000000,0.00022639,0.00023733,0.00023313,0.00033997,0.00068031,0.00083669,0.00214654],
58             [0.00844784,0.00315267,0.00086392,0.00263198,0.00055186,0.00014061,0.00733156,0.00332360,0.00217216,0.00177630,0.00909528,0.00037158,0.00036216,0.00042652,0.00000000,0.00034906,0.00161911,0.04112494,0.00731073,0.05486780,0.01283577,0.00744073,0.04730327,0.01066241,0.00613541,0.00203467,0.00657131,0.02446085,1.00000000,0.11138003,0.00710961,0.01516372,0.00903694,0.04806795,0.02464383,0.01008555,0.00222670,0.00667785,0.00197022,0.00722700,0.00074335,0.00100007,0.00245419,0.00719443,0.00560381,0.08068350,0.00451608,0.00176721,0.00455153,0.00254111,0.00270384,0.00094113,0.00199997,0.00147935,0.00026871,0.00047024,0.00173808,0.00682661,0.00119168,0.00224231,0.00897971,0.00852850,0.00045398],
59             [0.01205280,0.00397678,0.00117239,0.00221683,0.00040533,0.00003739,0.01013116,0.00160225,0.00078930,0.00079854,0.01061630,0.00027242,0.00062237,0.00006426,0.00012876,0.00149558,0.01106370,0.05247327,0.02724912,0.03688048,0.02128869,0.01068346,0.08274742,0.00982807,0.01587066,0.00471129,0.00892829,0.06612813,0.11138003,1.00000000,0.07763328,0.07034465,0.01753972,0.06965379,0.04399287,0.00363571,0.00118424,0.00530821,0.00161086,0.00142676,0.00021036,0.00037829,0.00491597,0.01279794,0.00798530,0.02766847,0.00439205,0.00178918,0.00290064,0.00250747,0.00538406,0.00131325,0.00517861,0.00293871,0.00032591,0.00150324,0.00420256,0.00080150,0.00089756,0.00500701,0.00921155,0.03720525,0.00026633],
60             [0.00590242,0.00230947,0.00139450,0.00284620,0.00124399,0.00007630,0.00939150,0.00290728,0.00063203,0.00312736,0.00248232,0.00282797,0.00832029,0.00205914,0.00093168,0.01390702,0.21195908,0.00817775,0.03242017,0.00521973,0.01600221,0.01134893,0.02489030,0.00257735,0.00450803,0.00409870,0.00628014,0.00528649,0.00710961,0.07763328,1.00000000,0.03420485,0.00317056,0.04405387,0.01900739,0.00074559,0.00213527,0.00279654,0.00937419,0.00591989,0.00142592,0.00335003,0.01447748,0.01179839,0.00050940,0.00142761,0.00031669,0.00068340,0.00005170,0.00005228,0.00133899,0.00013094,0.00446244,0.00121102,0.00054575,0.00032507,0.00085792,0.00003481,0.00005202,0.00053387,0.00066170,0.00349448,0.00011249],
61             [0.00640019,0.00158525,0.00026806,0.00101324,0.00035144,0.00016497,0.00237590,0.00200535,0.00112325,0.00160699,0.00446782,0.00033981,0.00094027,0.00012121,0.00000000,0.00054471,0.00576058,0.03127318,0.00630792,0.01688746,0.01043247,0.00646498,0.08616438,0.01096068,0.00917661,0.00081985,0.00991010,0.01944665,0.01516372,0.07034465,0.03420485,1.00000000,0.04656485,0.03144514,0.01743365,0.00247936,0.00125290,0.00289379,0.00578328,0.00050220,0.00026804,0.00014454,0.00440834,0.00535735,0.00195926,0.01604359,0.00073699,0.00124962,0.00579896,0.00314838,0.00823689,0.00647544,0.00259548,0.00237396,0.00028234,0.00317951,0.00642179,0.00061039,0.00109380,0.01472719,0.00625958,0.00628051,0.00050588],
62             [0.00470555,0.00194050,0.00036410,0.00022575,0.00010310,0.00004694,0.00047886,0.00017666,0.00006953,0.00015678,0.00385592,0.00015541,0.00022213,0.00003894,0.00011200,0.00018725,0.00091710,0.03346962,0.00369276,0.02445290,0.02031329,0.00267195,0.01621454,0.02880374,0.03289855,0.00343621,0.01018037,0.00185438,0.00903694,0.01753972,0.00317056,0.04656485,1.00000000,0.01346135,0.07012036,0.01053257,0.00003529,0.00033151,0.00139163,0.00164366,0.00086229,0.00016281,0.00327332,0.00782047,0.00218277,0.07406401,0.00063875,0.00304542,0.02846726,0.01254924,0.03420190,0.01311560,0.00436480,0.03379355,0.00047982,0.02170573,0.00668109,0.00078414,0.00283927,0.02753618,0.00716731,0.00222120,0.00000000],
63             [0.01450557,0.00436769,0.01135132,0.00341334,0.01638482,0.00062654,0.00438082,0.00755212,0.00426375,0.00310573,0.01283188,0.01651029,0.02323206,0.01414366,0.01759005,0.00700920,0.03571950,0.05308678,0.07529430,0.04768928,0.01823453,0.03957653,0.00693314,0.00972467,0.03249862,0.00824681,0.01401221,0.02321463,0.04806795,0.06965379,0.04405387,0.03144514,0.01346135,1.00000000,0.22332256,0.03047097,0.00362515,0.02587995,0.00860720,0.04333949,0.00895941,0.00767373,0.04035949,0.02896817,0.00429136,0.00798823,0.00091424,0.00197470,0.00328806,0.00244236,0.00150984,0.00056344,0.05075724,0.01291364,0.00323281,0.00021794,0.00041444,0.00805088,0.00339569,0.00165429,0.00734813,0.00457324,0.00019363],
64             [0.01117073,0.00245270,0.00259196,0.00269952,0.00445366,0.00021500,0.00267569,0.00168273,0.00128080,0.00173009,0.03121173,0.00546494,0.00493813,0.00140343,0.00173629,0.00276790,0.01124942,0.02299337,0.02643964,0.03471644,0.01338593,0.01929061,0.00677325,0.10046547,0.09045805,0.01813158,0.02144435,0.00769444,0.02464383,0.04399287,0.01900739,0.01743365,0.07012036,0.22332256,1.00000000,0.06098303,0.00077042,0.00716332,0.00413977,0.06062982,0.01079193,0.00174967,0.04156133,0.02700926,0.00180154,0.04599695,0.00104510,0.00314268,0.00849010,0.00488137,0.00490207,0.00185002,0.04563527,0.01657738,0.00096511,0.00110621,0.00110381,0.01077644,0.00514171,0.00978555,0.01815607,0.00403095,0.00012445],
65             [0.01519435,0.00323992,0.00098411,0.00114372,0.00120617,0.00000000,0.00027081,0.00028101,0.00022091,0.00057235,0.00698622,0.00018146,0.00064563,0.00026078,0.00005821,0.00021263,0.00191355,0.04035755,0.01881616,0.00333698,0.00553621,0.00228865,0.00158068,0.03624167,0.06328122,0.01059378,0.00420855,0.00070282,0.01008555,0.00363571,0.00074559,0.00247936,0.01053257,0.03047097,0.06098303,1.00000000,0.00165044,0.01853358,0.02860934,0.01705040,0.00252622,0.00299878,0.04582252,0.01576011,0.00392172,0.06257874,0.00413081,0.02740352,0.04932696,0.01498342,0.00862626,0.00987188,0.00586699,0.00936342,0.00236967,0.00108098,0.00381416,0.12164423,0.04318634,0.02021245,0.13947737,0.00631943,0.00011909],
66             [0.01915299,0.03266539,0.00869706,0.05704468,0.01113947,0.00378012,0.01065916,0.01917842,0.00456440,0.02585302,0.02200911,0.00558436,0.01001453,0.00422643,0.00064098,0.05220301,0.01234284,0.00753177,0.00519386,0.00610840,0.00141886,0.00421853,0.00223943,0.00056914,0.00048236,0.00205531,0.00420610,0.00087736,0.00222670,0.00118424,0.00213527,0.00125290,0.00003529,0.00362515,0.00077042,0.00165044,1.00000000,0.09649299,0.02691656,0.00795024,0.00473072,0.01771804,0.00439487,0.00116296,0.00113255,0.00426294,0.00557289,0.00516613,0.00072015,0.00011814,0.00168547,0.00017330,0.00208905,0.00386485,0.00598437,0.00169900,0.00109036,0.00130272,0.00055026,0.00199908,0.00033131,0.01332835,0.01469944],
67             [0.00949692,0.00462011,0.00278418,0.04862097,0.01531835,0.00184760,0.02057247,0.00421842,0.00935890,0.01645332,0.01214765,0.00267307,0.00277862,0.00332764,0.00152619,0.01430250,0.00343306,0.02359920,0.01723189,0.00442747,0.00394744,0.00776764,0.00308515,0.00067807,0.00143716,0.00545895,0.00367435,0.00123164,0.00667785,0.00530821,0.00279654,0.00289379,0.00033151,0.02587995,0.00716332,0.01853358,0.09649299,1.00000000,0.02542187,0.01524772,0.01455425,0.02331533,0.00530730,0.01223921,0.00147280,0.00553666,0.01314248,0.00205700,0.00079142,0.00013946,0.00072233,0.00010843,0.00125165,0.01166432,0.00085113,0.00035170,0.00055254,0.02302896,0.00358856,0.00321930,0.00172338,0.00418654,0.00271172],
68             [0.02112821,0.00786873,0.00065182,0.00177969,0.00039070,0.00017622,0.00373513,0.00106842,0.00140085,0.03430529,0.01022627,0.00489924,0.03036682,0.00447246,0.00245656,0.01125201,0.04837319,0.00502414,0.00673282,0.01372850,0.00880579,0.06510780,0.00227818,0.00583958,0.02809991,0.05316721,0.00175623,0.00031833,0.00197022,0.00161086,0.00937419,0.00578328,0.00139163,0.00860720,0.00413977,0.02860934,0.02691656,0.02542187,1.00000000,0.00624394,0.02793209,0.09534102,0.18153533,0.00475902,0.00232323,0.00477584,0.00256019,0.02937592,0.00495045,0.00336073,0.00602078,0.00156047,0.06711762,0.00411568,0.14722449,0.00328211,0.00092243,0.00065248,0.00072151,0.00266505,0.00213053,0.00120634,0.00078964],
69             [0.01075672,0.00592230,0.04870472,0.00785647,0.06202833,0.00733195,0.00293046,0.00377618,0.00611161,0.00272510,0.00180644,0.00526033,0.00078355,0.04956246,0.00099108,0.01183416,0.04712816,0.04179025,0.06325264,0.00426017,0.00196628,0.00242980,0.00029899,0.00657362,0.00126278,0.04592582,0.00667293,0.00472461,0.00722700,0.00142676,0.00591989,0.00050220,0.00164366,0.04333949,0.06062982,0.01705040,0.00795024,0.01524772,0.00624394,1.00000000,0.07036638,0.04675877,0.01179371,0.00527734,0.00064642,0.00217038,0.00347852,0.00118570,0.00030999,0.00014375,0.00023369,0.00013175,0.00105149,0.00066307,0.00179034,0.00008588,0.00005294,0.00526645,0.00864329,0.00143054,0.00235497,0.00149506,0.00034901],
70             [0.00831446,0.00625668,0.00162055,0.00544478,0.00706150,0.00199509,0.00387222,0.00492809,0.01957090,0.03361024,0.00246790,0.01933054,0.01349284,0.12995965,0.05874367,0.03213102,0.01933463,0.00149856,0.00181273,0.00223644,0.00030094,0.02196256,0.00053250,0.00220362,0.00192920,0.00994601,0.00038671,0.00011277,0.00074335,0.00021036,0.00142592,0.00026804,0.00086229,0.00895941,0.01079193,0.00252622,0.00473072,0.01455425,0.02793209,0.07036638,1.00000000,0.23565814,0.05010477,0.00050604,0.00016712,0.00125798,0.00180972,0.00289651,0.00037433,0.00011872,0.00050773,0.00007054,0.00756966,0.00102191,0.00258941,0.00000000,0.00000000,0.00062218,0.00101418,0.00051524,0.00104060,0.00035737,0.00013958],
71             [0.01284788,0.00554406,0.00190805,0.01902742,0.00336538,0.00039905,0.00807042,0.00231786,0.00565262,0.04910434,0.00323702,0.01241364,0.02104511,0.03171323,0.01961069,0.06774395,0.04609447,0.00215435,0.00496519,0.02063272,0.00124320,0.05102092,0.00147263,0.00263033,0.00161724,0.03556494,0.00074318,0.00018688,0.00100007,0.00037829,0.00335003,0.00014454,0.00016281,0.00767373,0.00174967,0.00299878,0.01771804,0.02331533,0.09534102,0.04675877,0.23565814,1.00000000,0.10544492,0.00120475,0.00021955,0.00065430,0.00211425,0.00428711,0.00067512,0.00070849,0.00112639,0.00037462,0.01987232,0.00510775,0.00778948,0.00067253,0.00016885,0.00047698,0.00042981,0.00396239,0.00254832,0.00057251,0.00059960],
72             [0.02376349,0.00367927,0.00122017,0.00199649,0.00045634,0.00009479,0.00871024,0.00275841,0.00378400,0.03047494,0.00523446,0.00974384,0.03578595,0.01188014,0.01879199,0.00526822,0.04692010,0.01544949,0.01507412,0.01163316,0.01546540,0.06929174,0.00375682,0.02207083,0.07735493,0.07198694,0.00382104,0.00042154,0.00245419,0.00491597,0.01447748,0.00440834,0.00327332,0.04035949,0.04156133,0.04582252,0.00439487,0.00530730,0.18153533,0.01179371,0.05010477,0.10544492,1.00000000,0.01800460,0.00088198,0.00487004,0.00120767,0.02345498,0.00500422,0.00548267,0.00906789,0.00289471,0.08020179,0.01362142,0.03209294,0.00152357,0.00142448,0.00434502,0.00288680,0.00374848,0.00593906,0.00111861,0.00021871],
73             [0.01846751,0.00894575,0.01033098,0.02516203,0.00300478,0.00026701,0.00918703,0.00050158,0.00031160,0.00050122,0.01917991,0.00055166,0.00150099,0.00057622,0.00010133,0.00410551,0.00572997,0.01176310,0.04863530,0.00536271,0.01752062,0.00146184,0.00824269,0.02274367,0.02717634,0.02189391,0.00550945,0.00321139,0.00719443,0.01279794,0.01179839,0.00535735,0.00782047,0.02896817,0.02700926,0.01576011,0.00116296,0.01223921,0.00475902,0.00527734,0.00050604,0.00120475,0.01800460,1.00000000,0.14325369,0.03572809,0.01769275,0.00618835,0.00404324,0.00713058,0.00249292,0.00271148,0.02514607,0.07519441,0.00092286,0.00068632,0.00376590,0.08263703,0.05462587,0.04486943,0.06067439,0.02265862,0.00113842],
74             [0.02192419,0.00961623,0.00408488,0.01832947,0.00139167,0.00007847,0.00147008,0.00015461,0.00065168,0.00031984,0.01575161,0.00006572,0.00051297,0.00013891,0.00001166,0.00065532,0.00019301,0.00279332,0.00300932,0.00062260,0.00012141,0.00017783,0.00760135,0.00194055,0.00155401,0.00529453,0.00170323,0.00040724,0.00560381,0.00798530,0.00050940,0.00195926,0.00218277,0.00429136,0.00180154,0.00392172,0.00113255,0.00147280,0.00232323,0.00064642,0.00016712,0.00021955,0.00088198,0.14325369,1.00000000,0.10514595,0.02354699,0.00141278,0.00299712,0.00090253,0.00119456,0.00085844,0.00269840,0.00333976,0.00134696,0.00347246,0.01236056,0.03587032,0.07669852,0.05432662,0.03723326,0.02739749,0.00155304],
75             [0.02827359,0.00339433,0.00158937,0.00795814,0.00075193,0.00065581,0.01090241,0.00975019,0.00249695,0.00272203,0.06158560,0.00038290,0.00038859,0.00014967,0.00000000,0.00117392,0.00129479,0.01809550,0.00262918,0.01746793,0.00815143,0.00105556,0.01816660,0.09990713,0.19320418,0.02358360,0.00992406,0.00479677,0.08068350,0.02766847,0.00142761,0.01604359,0.07406401,0.00798823,0.04599695,0.06257874,0.00426294,0.00553666,0.00477584,0.00217038,0.00125798,0.00065430,0.00487004,0.03572809,0.10514595,1.00000000,0.04705589,0.01459540,0.05663378,0.08734657,0.04325459,0.02502914,0.01335659,0.02266056,0.00137775,0.00532641,0.01872218,0.10620462,0.01714580,0.04982741,0.03309934,0.02584118,0.00083280],
76             [0.06235569,0.00442661,0.11329349,0.07579728,0.01245292,0.00679404,0.01622746,0.00727802,0.00858768,0.00552471,0.00760292,0.00337312,0.00126309,0.01075779,0.00002908,0.01695153,0.00076547,0.00277590,0.00308379,0.00129649,0.00007725,0.00039866,0.00190250,0.00528151,0.00435925,0.01043343,0.00108417,0.00035664,0.00451608,0.00439205,0.00031669,0.00073699,0.00063875,0.00091424,0.00104510,0.00413081,0.00557289,0.01314248,0.00256019,0.00347852,0.00180972,0.00211425,0.00120767,0.01769275,0.02354699,0.04705589,1.00000000,0.00499088,0.00324442,0.00276915,0.00230305,0.00104103,0.00991123,0.00605850,0.00047069,0.00057773,0.00218800,0.07395106,0.02744583,0.04985920,0.03208429,0.09567894,0.00264640],
77             [0.01871924,0.02332687,0.00131901,0.00124556,0.00033423,0.00022507,0.00087207,0.00089392,0.00057049,0.00299371,0.00694049,0.00055779,0.00544643,0.00056373,0.00028928,0.00125465,0.00962075,0.00414399,0.00202089,0.00160921,0.00166190,0.00099751,0.00329502,0.05241284,0.00996765,0.11826228,0.00348879,0.00062726,0.00176721,0.00178918,0.00068340,0.00124962,0.00304542,0.00197470,0.00314268,0.02740352,0.00516613,0.00205700,0.02937592,0.00118570,0.00289651,0.00428711,0.02345498,0.00618835,0.00141278,0.01459540,0.00499088,1.00000000,0.04780304,0.01753946,0.02230113,0.00498698,0.03513683,0.01497822,0.02916777,0.03924092,0.00497768,0.00279354,0.00132169,0.00759703,0.05759177,0.00232030,0.00078112],
78             [0.02326626,0.00304362,0.00031955,0.00085965,0.00002379,0.00002622,0.00010859,0.00012882,0.00008359,0.00008030,0.00478471,0.00002137,0.00013874,0.00007438,0.00000000,0.00081774,0.00104078,0.00252518,0.00218238,0.00404504,0.00659144,0.00020387,0.00193978,0.01205260,0.06535708,0.00809985,0.00098750,0.00030474,0.00455153,0.00290064,0.00005170,0.00579896,0.02846726,0.00328806,0.00849010,0.04932696,0.00072015,0.00079142,0.00495045,0.00030999,0.00037433,0.00067512,0.00500422,0.00404324,0.00299712,0.05663378,0.00324442,0.04780304,1.00000000,0.06433982,0.01949774,0.04861192,0.00223968,0.02276722,0.00192441,0.00080812,0.02324735,0.00682948,0.00084406,0.01766422,0.01920178,0.00094351,0.00012488],
79             [0.02549734,0.00367554,0.00016804,0.00014810,0.00001954,0.00001058,0.00011596,0.00002086,0.00002941,0.00011027,0.00082096,0.00000000,0.00025775,0.00001011,0.00000000,0.00016668,0.00109856,0.00136921,0.00155296,0.00413838,0.00314795,0.00014078,0.00143710,0.00876405,0.13841556,0.02347113,0.00099727,0.00013869,0.00254111,0.00250747,0.00005228,0.00314838,0.01254924,0.00244236,0.00488137,0.01498342,0.00011814,0.00013946,0.00336073,0.00014375,0.00011872,0.00070849,0.00548267,0.00713058,0.00090253,0.08734657,0.00276915,0.01753946,0.06433982,1.00000000,0.02163343,0.16374857,0.00520633,0.06743083,0.01195431,0.02952770,0.08726531,0.00222538,0.00088424,0.02395618,0.00662449,0.00103668,0.00018821],
80             [0.01149905,0.00783214,0.00014428,0.00034212,0.00003722,0.00000000,0.00047994,0.00009797,0.00011305,0.00051862,0.00367486,0.00000000,0.00085371,0.00004264,0.00000000,0.00056620,0.00264999,0.00191274,0.00134257,0.00337126,0.01158046,0.00093308,0.00700996,0.00796532,0.06412573,0.01625107,0.00218474,0.00043804,0.00270384,0.00538406,0.00133899,0.00823689,0.03420190,0.00150984,0.00490207,0.00862626,0.00168547,0.00072233,0.00602078,0.00023369,0.00050773,0.00112639,0.00906789,0.00249292,0.00119456,0.04325459,0.00230305,0.02230113,0.01949774,0.02163343,1.00000000,0.03909415,0.02983356,0.04637260,0.01669265,0.01548775,0.01718709,0.00285431,0.00095862,0.01814512,0.00698152,0.01188710,0.00161540],
81             [0.00904571,0.00359950,0.00025776,0.00039567,0.00006753,0.00009166,0.00016857,0.00021588,0.00010245,0.00008856,0.00057709,0.00000000,0.00031220,0.00003817,0.00000000,0.00011811,0.00041869,0.00121252,0.00131987,0.00066456,0.00120625,0.00008869,0.00206318,0.00761985,0.04164011,0.00605939,0.00048967,0.00009519,0.00094113,0.00131325,0.00013094,0.00647544,0.01311560,0.00056344,0.00185002,0.00987188,0.00017330,0.00010843,0.00156047,0.00013175,0.00007054,0.00037462,0.00289471,0.00271148,0.00085844,0.02502914,0.00104103,0.00498698,0.04861192,0.16374857,0.03909415,1.00000000,0.00615838,0.03567149,0.02437421,0.01141797,0.01910548,0.00159134,0.00073817,0.02505985,0.00600777,0.00288950,0.00068166],
82             [0.02978857,0.01357759,0.01361661,0.00564823,0.00970181,0.00075771,0.00977301,0.00095943,0.00105292,0.02816400,0.01518055,0.02064372,0.10424722,0.00792679,0.00347836,0.01195643,0.05004606,0.00204549,0.01042747,0.00454784,0.00136223,0.01896512,0.02359644,0.01903268,0.05750162,0.04742529,0.00714997,0.00096055,0.00199997,0.00517861,0.00446244,0.00259548,0.00436480,0.05075724,0.04563527,0.00586699,0.00208905,0.00125165,0.06711762,0.00105149,0.00756966,0.01987232,0.08020179,0.02514607,0.00269840,0.01335659,0.00991123,0.03513683,0.00223968,0.00520633,0.02983356,0.00615838,1.00000000,0.11796095,0.15722729,0.00953404,0.00063211,0.00130397,0.00314726,0.00615594,0.00281269,0.02691022,0.00068706],
83             [0.01858157,0.00583133,0.00071465,0.01156505,0.00050743,0.00011908,0.00545278,0.00042942,0.00026795,0.00223054,0.01160495,0.00222026,0.01947444,0.00036874,0.00033174,0.00888301,0.00436722,0.00226252,0.00960167,0.00171182,0.00162939,0.00131588,0.00999129,0.00936378,0.04546989,0.04541076,0.00495844,0.00042709,0.00147935,0.00293871,0.00121102,0.00237396,0.03379355,0.01291364,0.01657738,0.00936342,0.00386485,0.01166432,0.00411568,0.00066307,0.00102191,0.00510775,0.01362142,0.07519441,0.00333976,0.02266056,0.00605850,0.01497822,0.02276722,0.06743083,0.04637260,0.03567149,0.11796095,1.00000000,0.01279065,0.01390689,0.00468608,0.00411594,0.00532310,0.02350313,0.00355548,0.01051787,0.00054470],
84             [0.01913664,0.01961088,0.00278636,0.00036484,0.00086061,0.00010628,0.00160478,0.00054114,0.00014010,0.00878367,0.00572667,0.00081263,0.00894971,0.00104283,0.00080709,0.00146184,0.00915766,0.00068219,0.00138919,0.00098014,0.00054020,0.00768026,0.00189036,0.00189993,0.01269074,0.01082100,0.00084966,0.00007295,0.00026871,0.00032591,0.00054575,0.00028234,0.00047982,0.00323281,0.00096511,0.00236967,0.00598437,0.00085113,0.14722449,0.00179034,0.00258941,0.00778948,0.03209294,0.00092286,0.00134696,0.00137775,0.00047069,0.02916777,0.00192441,0.01195431,0.01669265,0.02437421,0.15722729,0.01279065,1.00000000,0.12141985,0.00128826,0.00002962,0.00019234,0.00044672,0.00046079,0.00436314,0.00308205],
85             [0.00986617,0.03410275,0.00058838,0.00003067,0.00000000,0.00000000,0.00010979,0.00007965,0.00000000,0.00005787,0.00021805,0.00003506,0.00018670,0.00000000,0.00000000,0.00014386,0.00048918,0.00029128,0.00065992,0.00013873,0.00071395,0.00008613,0.00279543,0.00146593,0.00447161,0.00415774,0.00043909,0.00000000,0.00047024,0.00150324,0.00032507,0.00317951,0.02170573,0.00021794,0.00110621,0.00108098,0.00169900,0.00035170,0.00328211,0.00008588,0.00000000,0.00067253,0.00152357,0.00068632,0.00347246,0.00532641,0.00057773,0.03924092,0.00080812,0.02952770,0.01548775,0.01141797,0.00953404,0.01390689,0.12141985,1.00000000,0.01628836,0.00016611,0.00007172,0.00084973,0.00050277,0.00202156,0.00600214],
86             [0.00980736,0.00370477,0.00033706,0.00026446,0.00009151,0.00000000,0.00000000,0.00000000,0.00027875,0.00002673,0.00098801,0.00000000,0.00000000,0.00000000,0.00000000,0.00001942,0.00030055,0.00048363,0.00177403,0.00077143,0.00815323,0.00007672,0.00183423,0.00266354,0.01606400,0.00848348,0.00015966,0.00022639,0.00173808,0.00420256,0.00085792,0.00642179,0.00668109,0.00041444,0.00110381,0.00381416,0.00109036,0.00055254,0.00092243,0.00005294,0.00000000,0.00016885,0.00142448,0.00376590,0.01236056,0.01872218,0.00218800,0.00497768,0.02324735,0.08726531,0.01718709,0.01910548,0.00063211,0.00468608,0.00128826,0.01628836,1.00000000,0.00189372,0.00133057,0.00722112,0.00391637,0.00331270,0.00091412],
87             [0.02622097,0.00360240,0.01095038,0.04216157,0.00290705,0.00028208,0.00213201,0.00062483,0.00106974,0.00080236,0.01278296,0.00023440,0.00040926,0.00023474,0.00001225,0.00067740,0.00011663,0.01441252,0.00228709,0.00063688,0.00026597,0.00024719,0.00047399,0.00441917,0.00400217,0.00436409,0.00121366,0.00023733,0.00682661,0.00080150,0.00003481,0.00061039,0.00078414,0.00805088,0.01077644,0.12164423,0.00130272,0.02302896,0.00065248,0.00526645,0.00062218,0.00047698,0.00434502,0.08263703,0.03587032,0.10620462,0.07395106,0.00279354,0.00682948,0.00222538,0.00285431,0.00159134,0.00130397,0.00411594,0.00002962,0.00016611,0.00189372,1.00000000,0.11510114,0.01939883,0.05791038,0.02146187,0.00081616],
88             [0.02015345,0.00517674,0.02561634,0.01448784,0.00745701,0.00056563,0.00103991,0.00020720,0.00004685,0.00028778,0.00415772,0.00009475,0.00020963,0.00106642,0.00001882,0.00087217,0.00039512,0.00342977,0.00542576,0.00013513,0.00018976,0.00005833,0.00054415,0.00655674,0.00554911,0.01243201,0.00499164,0.00023313,0.00119168,0.00089756,0.00005202,0.00109380,0.00283927,0.00339569,0.00514171,0.04318634,0.00055026,0.00358856,0.00072151,0.00864329,0.00101418,0.00042981,0.00288680,0.05462587,0.07669852,0.01714580,0.02744583,0.00132169,0.00084406,0.00088424,0.00095862,0.00073817,0.00314726,0.00532310,0.00019234,0.00007172,0.00133057,0.11510114,1.00000000,0.03310225,0.04122635,0.01095066,0.00273120],
89             [0.02370473,0.00367842,0.00333059,0.02353333,0.00204881,0.00035722,0.00105356,0.00027694,0.00060664,0.00047446,0.00773684,0.00013492,0.00277772,0.00086267,0.00002594,0.00157362,0.00076144,0.00180629,0.00281322,0.00050885,0.00073762,0.00012964,0.00170902,0.10888766,0.05259490,0.07245937,0.02679881,0.00033997,0.00224231,0.00500701,0.00053387,0.01472719,0.02753618,0.00165429,0.00978555,0.02021245,0.00199908,0.00321930,0.00266505,0.00143054,0.00051524,0.00396239,0.00374848,0.04486943,0.05432662,0.04982741,0.04985920,0.00759703,0.01766422,0.02395618,0.01814512,0.02505985,0.00615594,0.02350313,0.00044672,0.00084973,0.00722112,0.01939883,0.03310225,1.00000000,0.05860653,0.02368163,0.00127072],
90             [0.02309777,0.00215416,0.00669844,0.00356946,0.00119390,0.00077455,0.00090291,0.00276635,0.00289897,0.00134026,0.04709159,0.00134141,0.00225085,0.00166201,0.00015389,0.00068008,0.00225356,0.00600656,0.00247036,0.00794608,0.00035807,0.00082056,0.00370215,0.08290100,0.02729086,0.04159908,0.01159190,0.00068031,0.00897971,0.00921155,0.00066170,0.00625958,0.00716731,0.00734813,0.01815607,0.13947737,0.00033131,0.00172338,0.00213053,0.00235497,0.00104060,0.00254832,0.00593906,0.06067439,0.03723326,0.03309934,0.03208429,0.05759177,0.01920178,0.00662449,0.00698152,0.00600777,0.00281269,0.00355548,0.00046079,0.00050277,0.00391637,0.05791038,0.04122635,0.05860653,1.00000000,0.05373646,0.00045164],
91             [0.02525110,0.00470674,0.02532072,0.04458390,0.02242817,0.00193900,0.04688816,0.00486968,0.00612092,0.01790618,0.01208866,0.00240426,0.00098062,0.00145186,0.00002476,0.01158942,0.00133797,0.00517165,0.00861962,0.00238844,0.00090650,0.00037605,0.00409053,0.00726229,0.00299233,0.01913399,0.00893648,0.00083669,0.00852850,0.03720525,0.00349448,0.00628051,0.00222120,0.00457324,0.00403095,0.00631943,0.01332835,0.00418654,0.00120634,0.00149506,0.00035737,0.00057251,0.00111861,0.02265862,0.02739749,0.02584118,0.09567894,0.00232030,0.00094351,0.00103668,0.01188710,0.00288950,0.02691022,0.01051787,0.00436314,0.00202156,0.00331270,0.02146187,0.01095066,0.02368163,0.05373646,1.00000000,0.00093386],
92             [0.06049483,0.02079996,0.00174317,0.00179613,0.00025653,0.00064115,0.00337673,0.00339206,0.00117948,0.00051373,0.00336093,0.00027885,0.00005553,0.00008213,0.00022975,0.00059678,0.00017619,0.00897340,0.00110538,0.00064367,0.00010881,0.00015148,0.00128667,0.00045993,0.00016756,0.00060123,0.00031932,0.00214654,0.00045398,0.00026633,0.00011249,0.00050588,0.00000000,0.00019363,0.00012445,0.00011909,0.01469944,0.00271172,0.00078964,0.00034901,0.00013958,0.00059960,0.00021871,0.00113842,0.00155304,0.00083280,0.00264640,0.00078112,0.00012488,0.00018821,0.00161540,0.00068166,0.00068706,0.00054470,0.00308205,0.00600214,0.00091412,0.00081616,0.00273120,0.00127072,0.00045164,0.00093386,1.00000000]
93             ];
94              
95             # Precompute Logs, make a finite penalty for 0 entries, where logs would be undefined
96 0     0 0 0 sub log10 { log($_[0])/log(10); }
97             our $msc_log_similarities = [map {[map {$_ ? log($_) : undef} @$_]} @$msc_similarities];
98             our $underflow_penalty = min(grep {defined} map {@$_} @$msc_log_similarities) - 1;
99             # We only do this once, so no need to overoptimize
100             $msc_log_similarities = [map {[map {$_ ? log($_) : $underflow_penalty} @$_]} @$msc_similarities];
101              
102             our $msc_to_array_index = {
103             '00'=>0, '01'=>1, '03'=>2, '05'=>3, '06'=>4, '08'=>5, 11=>6, 12=>7, 13=>8, 14=>9, 15=>10,
104             16=>11, 17=>12, 18=>13, 19=>14, 20=>15, 22=>16, 26=>17, 28=>18, 30=>19, 31=>20,
105             32=>21, 33=>22, 34=>23, 35=>24, 37=>25, 39=>26, 40=>27, 41=>28, 42=>29, 43=>30,
106             44=>31, 45=>32, 46=>33, 47=>34, 49=>35, 51=>36, 52=>37, 53=>38, 54=>39, 55=>40,
107             57=>41, 58=>42, 60=>43, 62=>44, 65=>45, 68=>46, 70=>47, 74=>48, 76=>49, 78=>50,
108             80=>51, 81=>52, 82=>53, 83=>54, 85=>55, 86=>56, 90=>57, 91=>58, 92=>59, 93=>60,
109             94=>61, 97=>62 };
110              
111             # Until we have some metric that determines the term-likelihood of a given word,
112             # we will use a simple threshold on the number of characters in a concept,
113             # bailing out on words that are not long enough, as they are most likely to
114             # have informal uses. For the moment, 7 seems a good value.
115             our $word_length_threshold = 7;
116 19 100   19 0 76 sub msc_to_array_index { defined $_[0] ? $msc_to_array_index->{"".substr($_[0],0,2)} : undef; }
117             sub msc_similarity {
118 5     5 1 387 my ($category1, $category2) = @_;
119             # Top-level MSC categories only at the moment:
120 5         12 my $index1 = msc_to_array_index($category1);
121 5         7 my $index2 = msc_to_array_index($category2);
122 5 100 100     36 ((defined $index1) && (defined $index2)) ?
123             # Well-defined, lookup in matrix
124             return $msc_similarities->[$index1]
125             ->[$index2] :
126             # Ill-defined means no similarity
127             return 0; }
128              
129             # Discover the most similar cluster of concepts
130             sub disambiguate {
131 9     9 1 47 my ($candidates,%options) = @_;
132 9         14 my %category_view = ();
133             #print STDERR "\n Text length: ",($options{text_length}||0),"\n";
134             # Algorithm:
135 9 50       26 if ($options{include_all}) {
136 0         0 return $candidates; }
137             # 0. Dropping anything uncategorized:
138 9         21 @$candidates = grep {$_->{scheme} eq 'msc'} @$candidates; # TODO: Map everything into MSC!
  27         150  
139 9         21 @$candidates = grep {$_->{category} !~ /^XX/} @$candidates; # TODO: Can we do something with uncategorized concepts?
  17         58  
140 9         16 @$candidates = grep {length($_->{concept}) >= $word_length_threshold} @$candidates; # TEMPORARY: We really need term-likelihood here
  12         34  
141             # 1. group by top-level MSC category and point into the original candidates array
142 9 50       27 print STDERR "[NNexus::Classification] Eligible concepts: ",scalar(@$candidates),"\n" if $options{verbosity};
143 9         36 foreach my $index(0..$#$candidates) {
144 9         19 my $candidate = $candidates->[$index];
145 9         16 my $link = $candidate->{link};
146 9         24 my $msc_index = msc_to_array_index($candidate->{category});
147             # 1.0. Skip fine-grained distinctions on the MSC class from the same URL (for now at least)
148             # 45H07 and 45H05 are just "45" with the current metric, so we only need one of them
149 9 100 50     31 next if ($link && (grep {($candidates->[$_]->{link}||'') eq $link} @{$category_view{$msc_index}}));
  3   66     26  
  9         39  
150             # 1.1. also, use the similarity indeces, for faster lookups
151 8         12 push @{$category_view{$msc_index}}, $index;
  8         25  
152             }
153             # 2. Greedy search through the ordered %category_view:
154             # 2.1. Precompute category weights (sum of length of concepts)
155 9         31 my @category_keys = keys %category_view;
156 9         33 my %category_weights = map {$_ => (weigh_category($category_view{$_},$candidates))} @category_keys;
  6         22  
157             # 2.2. Order by weights
158 9         92 my @ordered_categories = sort {$category_weights{$b} cmp $category_weights{$a}} @category_keys;
  0         0  
159             # 2.3. Precompute concept sizes in each category (for greedy cutoff)
160 9         15 my %category_sizes = map {$_ => scalar(@{$category_view{$_}})} @category_keys;
  6         7  
  6         18  
161             # print STDERR Dumper(\%category_view);
162             # print STDERR Dumper(\%category_weights);
163             # print STDERR Dumper(\%category_sizes);
164             # So: maximize the sum of lengths of all concepts currently grouped and all log_similarities!
165 9         34 my $max_clique = maximize_clique(weights=>\%category_weights,sizes=>\%category_sizes,queue=>\@ordered_categories, );
166             # Grab the corresponding candidates from %category_view, and then splice the $candidates array:
167 9         13 my @final_candidates_indexes = map { @{$category_view{$_}} } @{$max_clique->{clique}};
  6         6  
  6         32  
  9         24  
168 9         27 my @final_candidates = map {$candidates->[$_]} sort {$a<=>$b} @final_candidates_indexes;
  8         14  
  2         7  
169 9 50       22 print STDERR "[NNexus::Classification] Disambiguated concepts: ",scalar(@final_candidates),"\n" if $options{verbosity};
170 9         46 return \@final_candidates; # mockup
171             }
172              
173             sub weigh_category {
174 6     6 0 10 my ($concept_indexes,$candidates) = @_;
175 6         10 my $weight = 0;
176             # - Weigh by: the (sum of lengths)/4 of all concepts in the category
177 6         18 foreach my $index(@$concept_indexes) {
178 8         19 my $current_concept = $candidates->[$index]->{concept};
179 8         14 my $current_weight = length($current_concept) - $word_length_threshold;
180             # Possible Alternatives: geometric via division:
181             # my $current_weight = length($current_concept) / $word_length_threshold;
182 8         15 $weight += $current_weight;
183             }
184             # Concepts of length 4 or less are less "termy" than longer concepts.
185             # TODO: How certain are we? If we're really certain long phrases are termy, we can subtract 4 rather than divide.
186             # Though subtracting 4 is unintelligent... maybe using the "number of words" in a concept?
187             #$weight = $weight / 4;
188 6         30 return $weight; }
189              
190             sub maximize_clique {
191 15     15 0 63 my (%options) = @_;
192 15         30 my ($weights, $sizes, $queue, $score, $size, $clique) = map {$options{$_}} qw(weights sizes queue score size clique);
  90         107  
193 15         28 my @traversal_queue = @$queue;
194 15 100       73 return {score=>$score,size=>$size,clique=>$clique} unless @traversal_queue; # Base case
195 6         7 my $greedy_bound = 0;
196 6   50     34 $score //= 0;
197 6   50     20 $size //= 0;
198 6   50     26 $clique //= [];
199 6 50       17 my @candidate_cliques= $score ? ({score=>$score,size=>$size,clique=>$clique}) : ();
200             # 2 entries in category 10 , and 4 entries in category 80 = 2^((length(a1)+length(a2)+...)/4)*sim(10,80)
201             #
202             # 2 in 10, 4 in 80, 3 in 53 = 2^(sum of lengths / 4)*sim(10,80) *sim(10,53) * sim(53,80)
203             # Take logs to simplify:
204             # (sum of lengths) / 4 -log(sim(10,80)) -log(sim(10,53)) -log(sim(53,80))
205 6         16 while(@traversal_queue) {
206             # Next extension index:
207 6         9 my $next_index = shift @traversal_queue;
208 6 50       20 last if $sizes->{$next_index} < $greedy_bound; # Greedy, don't go beyond the bound
209 6         10 my $next_weight = $weights->{$next_index};
210 6         14 my $next_size = $sizes->{$next_index};
211 6         7 my $similarity_score=0;
212 6         9 my $well_defined = 1;
213 6         17 foreach my $category_index(@$clique) {
214 0         0 my $similarity = $msc_log_similarities->[$next_index]->[$category_index];
215 0 0       0 if (! $similarity) {
216             # Ill-defined, skip the $next_index
217 0         0 $well_defined = 0;
218 0         0 last;
219             }
220 0         0 $similarity_score += $similarity;
221             }
222 6 50       18 next if (! $well_defined);
223 6         16 my $extended_score = $score + $next_weight + $similarity_score; # ?!
224 6         8 my $extended_size = $size + $next_size;
225             # DEPRECATED: Maximizing score
226 6 50       17 next if $extended_score < $score; # No improvement, next
227             # NEW: Maximizing clique, while keeping a positive score
228             #next if $extended_score <= 0;
229             # Improvement! Update the score and clique
230 6         14 my $extended_clique = [@$clique,$next_index];
231 6         49 push @candidate_cliques, maximize_clique(weights=>$weights, sizes=>$sizes, queue=>\@traversal_queue,
232             score=>$extended_score, size=>$extended_size, clique=>$extended_clique);
233             # Heuristic: Let's be greedy here to save time. The moment a category with size N can be added
234             # to the current cluster, don't look in categories of size N-1 or smaller in the current merge pass
235             # (of course we look at them as further additions to the now extended cluster)
236             # Update the greedy bound:
237 6         22 $greedy_bound = $sizes->{$next_index};
238             }
239              
240             # DEPRECATED: Maximizing score
241             # # We've gathered a number of candidate cliques, return the best scoring one:
242 6         15 @candidate_cliques = sort { $b->{score} <=> $a->{score} } @candidate_cliques;
  0         0  
243             # We've gathered a number of candidate cliques, return the one with most concepts:
244             #@candidate_cliques = sort { $b->{size} <=> $a->{size} } @candidate_cliques;
245              
246 6         19 return $candidate_cliques[0];
247             }
248              
249             1;
250             __END__