File Coverage

levmar-2.5/misc_core.c

Criterion	Covered	Total	%
statement	182	246	73.9
branch	242	320	75.6
condition			n/a
subroutine			n/a
pod			n/a
total	424	566	74.9

line	stmt	bran	code
1			/////////////////////////////////////////////////////////////////////////////////
2			//
3			// Levenberg - Marquardt non-linear minimization algorithm
4			// Copyright (C) 2004-05 Manolis Lourakis (lourakis at ics forth gr)
5			// Institute of Computer Science, Foundation for Research & Technology - Hellas
6			// Heraklion, Crete, Greece.
7			//
8			// This program is free software; you can redistribute it and/or modify
9			// it under the terms of the GNU General Public License as published by
10			// the Free Software Foundation; either version 2 of the License, or
11			// (at your option) any later version.
12			//
13			// This program is distributed in the hope that it will be useful,
14			// but WITHOUT ANY WARRANTY; without even the implied warranty of
15			// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16			// GNU General Public License for more details.
17			//
18			/////////////////////////////////////////////////////////////////////////////////
19
20			#ifndef LM_REAL // not included by misc.c
21			#error This file should not be compiled directly!
22			#endif
23
24
25			/* precision-specific definitions */
26			#define LEVMAR_CHKJAC LM_ADD_PREFIX(levmar_chkjac)
27			#define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx)
28			#define LEVMAR_FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_cent_jac_approx)
29			#define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)
30			#define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)
31			#define LEVMAR_STDDEV LM_ADD_PREFIX(levmar_stddev)
32			#define LEVMAR_CORCOEF LM_ADD_PREFIX(levmar_corcoef)
33			#define LEVMAR_R2 LM_ADD_PREFIX(levmar_R2)
34			#define LEVMAR_BOX_CHECK LM_ADD_PREFIX(levmar_box_check)
35			#define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy)
36
37			#ifdef HAVE_LAPACK
38			#define LEVMAR_PSEUDOINVERSE LM_ADD_PREFIX(levmar_pseudoinverse)
39			static int LEVMAR_PSEUDOINVERSE(LM_REAL A, LM_REAL B, int m);
40
41			/* BLAS matrix multiplication & LAPACK SVD routines */
42			#ifdef LM_BLAS_PREFIX
43			#define GEMM LM_CAT_(LM_BLAS_PREFIX, LM_ADD_PREFIX(LM_CAT_(gemm, LM_BLAS_SUFFIX)))
44			#else
45			#define GEMM LM_ADD_PREFIX(LM_CAT_(gemm, LM_BLAS_SUFFIX))
46			#endif
47			/* C := alphaop( A )op( B ) + betaC /
48			extern void GEMM(char transa, char transb, int m, int n, int *k,
49			LM_REAL alpha, LM_REAL a, int lda, LM_REAL b, int ldb, LM_REAL beta, LM_REAL c, int ldc);
50
51			#define GESVD LM_MK_LAPACK_NAME(gesvd)
52			#define GESDD LM_MK_LAPACK_NAME(gesdd)
53			extern int GESVD(char jobu, char jobvt, int m, int n, LM_REAL a, int lda, LM_REAL s, LM_REAL u, int *ldu,
54			LM_REAL vt, int ldvt, LM_REAL work, int lwork, int *info);
55
56			/* lapack 3.0 new SVD routine, faster than xgesvd() */
57			extern int GESDD(char jobz, int m, int n, LM_REAL a, int lda, LM_REAL s, LM_REAL u, int ldu, LM_REAL vt, int ldvt,
58			LM_REAL work, int lwork, int iwork, int info);
59
60			/* Cholesky decomposition */
61			#define POTF2 LM_MK_LAPACK_NAME(potf2)
62			extern int POTF2(char uplo, int n, LM_REAL a, int lda, int *info);
63
64			#define LEVMAR_CHOLESKY LM_ADD_PREFIX(levmar_chol)
65
66			#else
67			#define LEVMAR_LUINVERSE LM_ADD_PREFIX(levmar_LUinverse_noLapack)
68
69			static int LEVMAR_LUINVERSE(LM_REAL A, LM_REAL B, int m);
70			#endif /* HAVE_LAPACK */
71
72			/* blocked multiplication of the transpose of the nxm matrix a with itself (i.e. a^T a)
73			* using a block size of bsize. The product is returned in b.
74			* Since a^T a is symmetric, its computation can be sped up by computing only its
75			* upper triangular part and copying it to the lower part.
76			*
77			* More details on blocking can be found at
78			* http://www-2.cs.cmu.edu/afs/cs/academic/class/15213-f02/www/R07/section_a/Recitation07-SectionA.pdf
79			*/
80	747		void LEVMAR_TRANS_MAT_MAT_MULT(LM_REAL a, LM_REAL b, int n, int m)
81			{
82			#ifdef HAVE_LAPACK /* use BLAS matrix multiply */
83
84			LM_REAL alpha=LM_CNST(1.0), beta=LM_CNST(0.0);
85			/* Fool BLAS to compute a^T*a avoiding transposing a: a is equivalent to a^T in column major,
86			* therefore BLAS computes aa^T with a and aa^T in column major, which is equivalent to
87			* computing a^T*a in row major!
88			*/
89			GEMM("N", "T", &m, &m, &n, &alpha, a, &m, a, &m, &beta, b, &m);
90
91			#else /* no LAPACK, use blocking-based multiply */
92
93			register int i, j, k, jj, kk;
94			register LM_REAL sum, bim, akm;
95	747		const int bsize=__BLOCKSZ__;
96
97			#define __MIN__(x, y) (((x)<=(y))? (x) : (y))
98			#define __MAX__(x, y) (((x)>=(y))? (x) : (y))
99
100			/* compute upper triangular part using blocking */
101	1494	100	for(jj=0; jj
		100
102	2359	100	for(i=0; i
		100
103	1612		bim=b+i*m;
104	4207	100	for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j)
		100
105	2595		bim[j]=0.0; //b[i*m+j]=0.0;
106			}
107
108	81569	100	for(kk=0; kk
		100
109	253704	100	for(i=0; i
		100
110	172882		bim=b+i*m;
111	449062	100	for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j){
		100
112	276180		sum=0.0;
113	9058536	100	for(k=kk; k<__MIN__(kk+bsize, n); ++k){
		100
114	8782356		akm=a+k*m;
115	8782356		sum+=akm[i]akm[j]; //a[km+i]a[km+j];
116			}
117	276180		bim[j]+=sum; //b[i*m+j]+=sum;
118			}
119			}
120			}
121			}
122
123			/* copy upper triangular part to the lower one */
124	2359	100	for(i=0; i
		100
125	2595	100	for(j=0; j
		100
126	983		b[im+j]=b[jm+i];
127
128			#undef __MIN__
129			#undef __MAX__
130
131			#endif /* HAVE_LAPACK */
132	747		}
133
134			/* forward finite difference approximation to the Jacobian of func */
135	15392		void LEVMAR_FDIF_FORW_JAC_APPROX(
136			void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata),
137			/* function to differentiate */
138			LM_REAL p, / I: current parameter estimate, mx1 */
139			LM_REAL hx, / I: func evaluated at p, i.e. hx=func(p), nx1 */
140			LM_REAL hxx, / W/O: work array for evaluating func(p+delta), nx1 */
141			LM_REAL delta, /* increment for computing the Jacobian */
142			LM_REAL jac, / O: array for storing approximated Jacobian, nxm */
143			int m,
144			int n,
145			void *adata)
146			{
147			register int i, j;
148			LM_REAL tmp;
149			register LM_REAL d;
150
151	76687	100	for(j=0; j
		100
152			/* determine d=max(1E-04\|p[j]\|, delta), see HZ /
153	61295		d=LM_CNST(1E-04)*p[j]; // force evaluation
154	61295	100	d=FABS(d);
		100
155	61295	100	if(d
		100
156	150		d=delta;
157
158	61295		tmp=p[j];
159	61295		p[j]+=d;
160
161	61295		(*func)(p, hxx, m, n, adata);
162
163	61295		p[j]=tmp; /* restore */
164
165	61295		d=LM_CNST(1.0)/d; /* invert so that divisions can be carried out faster as multiplications */
166	629231	100	for(i=0; i
		100
167	567936		jac[im+j]=(hxx[i]-hx[i])d;
168			}
169			}
170	15392		}
171
172			/* central finite difference approximation to the Jacobian of func */
173	0		void LEVMAR_FDIF_CENT_JAC_APPROX(
174			void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata),
175			/* function to differentiate */
176			LM_REAL p, / I: current parameter estimate, mx1 */
177			LM_REAL hxm, / W/O: work array for evaluating func(p-delta), nx1 */
178			LM_REAL hxp, / W/O: work array for evaluating func(p+delta), nx1 */
179			LM_REAL delta, /* increment for computing the Jacobian */
180			LM_REAL jac, / O: array for storing approximated Jacobian, nxm */
181			int m,
182			int n,
183			void *adata)
184			{
185			register int i, j;
186			LM_REAL tmp;
187			register LM_REAL d;
188
189	0	0	for(j=0; j
		0
190			/* determine d=max(1E-04\|p[j]\|, delta), see HZ /
191	0		d=LM_CNST(1E-04)*p[j]; // force evaluation
192	0	0	d=FABS(d);
		0
193	0	0	if(d
		0
194	0		d=delta;
195
196	0		tmp=p[j];
197	0		p[j]-=d;
198	0		(*func)(p, hxm, m, n, adata);
199
200	0		p[j]=tmp+d;
201	0		(*func)(p, hxp, m, n, adata);
202	0		p[j]=tmp; /* restore */
203
204	0		d=LM_CNST(0.5)/d; /* invert so that divisions can be carried out faster as multiplications */
205	0	0	for(i=0; i
		0
206	0		jac[im+j]=(hxp[i]-hxm[i])d;
207			}
208			}
209	0		}
210
211			/*
212			* Check the Jacobian of a n-valued nonlinear function in m variables
213			* evaluated at a point p, for consistency with the function itself.
214			*
215			* Based on fortran77 subroutine CHKDER by
216			* Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More
217			* Argonne National Laboratory. MINPACK project. March 1980.
218			*
219			*
220			* func points to a function from R^m --> R^n: Given a p in R^m it yields hx in R^n
221			* jacf points to a function implementing the Jacobian of func, whose correctness
222			* is to be tested. Given a p in R^m, jacf computes into the nxm matrix j the
223			* Jacobian of func at p. Note that row i of j corresponds to the gradient of
224			* the i-th component of func, evaluated at p.
225			* p is an input array of length m containing the point of evaluation.
226			* m is the number of variables
227			* n is the number of functions
228			* adata points to possible additional data and is passed uninterpreted
229			* to func, jacf.
230			* err is an array of length n. On output, err contains measures
231			* of correctness of the respective gradients. if there is
232			* no severe loss of significance, then if err[i] is 1.0 the
233			* i-th gradient is correct, while if err[i] is 0.0 the i-th
234			* gradient is incorrect. For values of err between 0.0 and 1.0,
235			* the categorization is less certain. In general, a value of
236			* err[i] greater than 0.5 indicates that the i-th gradient is
237			* probably correct, while a value of err[i] less than 0.5
238			* indicates that the i-th gradient is probably incorrect.
239			*
240			*
241			* The function does not perform reliably if cancellation or
242			* rounding errors cause a severe loss of significance in the
243			* evaluation of a function. therefore, none of the components
244			* of p should be unusually small (in particular, zero) or any
245			* other value which may cause loss of significance.
246			*/
247
248	4		void LEVMAR_CHKJAC(
249			void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata),
250			void (jacf)(LM_REAL p, LM_REAL j, int m, int n, void adata),
251			LM_REAL p, int m, int n, void adata, LM_REAL *err)
252			{
253	4		LM_REAL factor=LM_CNST(100.0);
254	4		LM_REAL one=LM_CNST(1.0);
255	4		LM_REAL zero=LM_CNST(0.0);
256			LM_REAL fvec, fjac, pp, fvecp, *buf;
257
258			register int i, j;
259			LM_REAL eps, epsf, temp, epsmch;
260			LM_REAL epslog;
261	4		int fvec_sz=n, fjac_sz=n*m, pp_sz=m, fvecp_sz=n;
262
263	4		epsmch=LM_REAL_EPSILON;
264	4		eps=(LM_REAL)sqrt(epsmch);
265
266	4		buf=(LM_REAL )malloc((fvec_sz + fjac_sz + pp_sz + fvecp_sz)sizeof(LM_REAL));
267	4	50	if(!buf){
		50
268	0		fprintf(stderr, LCAT(LEVMAR_CHKJAC, "(): memory allocation request failed\n"));
269	0		exit(1);
270			}
271	4		fvec=buf;
272	4		fjac=fvec+fvec_sz;
273	4		pp=fjac+fjac_sz;
274	4		fvecp=pp+pp_sz;
275
276			/* compute fvec=func(p) */
277	4		(*func)(p, fvec, m, n, adata);
278
279			/* compute the Jacobian at p */
280	4		(*jacf)(p, fjac, m, n, adata);
281
282			/* compute pp */
283	12	100	for(j=0; j
		100
284	8	50	temp=eps*FABS(p[j]);
		50
285	8	50	if(temp==zero) temp=eps;
		50
286	8		pp[j]=p[j]+temp;
287			}
288
289			/* compute fvecp=func(pp) */
290	4		(*func)(pp, fvecp, m, n, adata);
291
292	4		epsf=factor*epsmch;
293	4		epslog=(LM_REAL)log10(eps);
294
295	44	100	for(i=0; i
		100
296	40		err[i]=zero;
297
298	12	100	for(j=0; j
		100
299	8	50	temp=FABS(p[j]);
		50
300	8	50	if(temp==zero) temp=one;
		50
301
302	88	100	for(i=0; i
		100
303	80		err[i]+=tempfjac[im+j];
304			}
305
306	44	100	for(i=0; i
		100
307	40		temp=one;
308	40	50	if(fvec[i]!=zero && fvecp[i]!=zero && FABS(fvecp[i]-fvec[i])>=epsf*FABS(fvec[i]))
		50
		100
		50
		50
		50
		50
		100
		50
		50
309	40	100	temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i]));
		50
		50
		100
		50
		50
310	40		err[i]=one;
311	40	100	if(temp>epsmch && temp
		50
		100
		50
312	12		err[i]=((LM_REAL)log10(temp) - epslog)/epslog;
313	40	50	if(temp>=eps) err[i]=zero;
		50
314			}
315
316	4		free(buf);
317
318	4		return;
319			}
320
321			#ifdef HAVE_LAPACK
322			/*
323			* This function computes the pseudoinverse of a square matrix A
324			* into B using SVD. A and B can coincide
325			*
326			* The function returns 0 in case of error (e.g. A is singular),
327			* the rank of A if successful
328			*
329			* A, B are mxm
330			*
331			*/
332			static int LEVMAR_PSEUDOINVERSE(LM_REAL A, LM_REAL B, int m)
333			{
334			LM_REAL *buf=NULL;
335			int buf_sz=0;
336			static LM_REAL eps=LM_CNST(-1.0);
337
338			register int i, j;
339			LM_REAL a, u, s, vt, *work;
340			int a_sz, u_sz, s_sz, vt_sz, tot_sz;
341			LM_REAL thresh, one_over_denom;
342			int info, rank, worksz, *iwork, iworksz;
343
344			/* calculate required memory size */
345			worksz=5*m; // min worksize for GESVD
346			//worksz=m(7m+4); // min worksize for GESDD
347			iworksz=8*m;
348			a_sz=m*m;
349			u_sz=mm; s_sz=m; vt_sz=mm;
350
351			tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)sizeof(LM_REAL) + iworkszsizeof(int); /* should be arranged in that order for proper doubles alignment */
352
353			buf_sz=tot_sz;
354			buf=(LM_REAL *)malloc(buf_sz);
355			if(!buf){
356			fprintf(stderr, RCAT("memory allocation in ", LEVMAR_PSEUDOINVERSE) "() failed!\n");
357			return 0; /* error */
358			}
359
360			a=buf;
361			u=a+a_sz;
362			s=u+u_sz;
363			vt=s+s_sz;
364			work=vt+vt_sz;
365			iwork=(int *)(work+worksz);
366
367			/* store A (column major!) into a */
368			for(i=0; i
369			for(j=0; j
370			a[i+jm]=A[im+j];
371
372			/* SVD decomposition of A */
373			GESVD("A", "A", (int )&m, (int )&m, a, (int )&m, s, u, (int )&m, vt, (int )&m, work, (int )&worksz, &info);
374			//GESDD("A", (int )&m, (int )&m, a, (int )&m, s, u, (int )&m, vt, (int )&m, work, (int )&worksz, iwork, &info);
375
376			/* error treatment */
377			if(info!=0){
378			if(info<0){
379			fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", LEVMAR_PSEUDOINVERSE) "()\n", -info);
380			}
381			else{
382			fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", LEVMAR_PSEUDOINVERSE) "() [info=%d]\n", info);
383			}
384			free(buf);
385			return 0;
386			}
387
388			if(eps<0.0){
389			LM_REAL aux;
390
391			/* compute machine epsilon */
392			for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))
393			;
394			eps*=LM_CNST(2.0);
395			}
396
397			/* compute the pseudoinverse in B */
398			for(i=0; i
399			for(rank=0, thresh=eps*s[0]; rankthresh; rank++){
400			one_over_denom=LM_CNST(1.0)/s[rank];
401
402			for(j=0; j
403			for(i=0; i
404			B[im+j]+=vt[rank+im]u[j+rankm]*one_over_denom;
405			}
406
407			free(buf);
408
409			return rank;
410			}
411			#else // no LAPACK
412
413			/*
414			* This function computes the inverse of A in B. A and B can coincide
415			*
416			* The function employs LAPACK-free LU decomposition of A to solve m linear
417			* systems A*B_i=I_i, where B_i and I_i are the i-th columns of B and I.
418			*
419			* A and B are mxm
420			*
421			* The function returns 0 in case of error, 1 if successful
422			*
423			*/
424	159		static int LEVMAR_LUINVERSE(LM_REAL A, LM_REAL B, int m)
425			{
426	159		void *buf=NULL;
427	159		int buf_sz=0;
428
429			register int i, j, k, l;
430	159		int *idx, maxi=-1, idx_sz, a_sz, x_sz, work_sz, tot_sz;
431			LM_REAL a, x, *work, max, sum, tmp;
432
433			/* calculate required memory size */
434	159		idx_sz=m;
435	159		a_sz=m*m;
436	159		x_sz=m;
437	159		work_sz=m;
438	159		tot_sz=(a_sz + x_sz + work_sz)sizeof(LM_REAL) + idx_szsizeof(int); /* should be arranged in that order for proper doubles alignment */
439
440	159		buf_sz=tot_sz;
441	159		buf=(void *)malloc(tot_sz);
442	159	50	if(!buf){
		50
443	0		fprintf(stderr, RCAT("memory allocation in ", LEVMAR_LUINVERSE) "() failed!\n");
444	0		return 0; /* error */
445			}
446
447	159		a=buf;
448	159		x=a+a_sz;
449	159		work=x+x_sz;
450	159		idx=(int *)(work+work_sz);
451
452			/* avoid destroying A by copying it to a */
453	1058	100	for(i=0; i
		100
454
455			/* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */
456	524	100	for(i=0; i
		100
457	365		max=0.0;
458	1264	100	for(j=0; j
		100
459	899	100	if((tmp=FABS(a[i*m+j]))>max)
		100
		100
		100
460	556		max=tmp;
461	365	50	if(max==0.0){
		50
462	0		fprintf(stderr, RCAT("Singular matrix A in ", LEVMAR_LUINVERSE) "()!\n");
463	0		free(buf);
464
465	0		return 0;
466			}
467	365		work[i]=LM_CNST(1.0)/max;
468			}
469
470	524	100	for(j=0; j
		100
471	632	100	for(i=0; i
		100
472	267		sum=a[i*m+j];
473	342	100	for(k=0; k
		100
474	75		sum-=a[im+k]a[k*m+j];
475	267		a[i*m+j]=sum;
476			}
477	365		max=0.0;
478	997	100	for(i=j; i
		100
479	632		sum=a[i*m+j];
480	974	100	for(k=0; k
		100
481	342		sum-=a[im+k]a[k*m+j];
482	632		a[i*m+j]=sum;
483	632	100	if((tmp=work[i]*FABS(sum))>=max){
		100
		100
		100
484	458		max=tmp;
485	458		maxi=i;
486			}
487			}
488	365	100	if(j!=maxi){
		100
489	317	100	for(k=0; k
		100
490	226		tmp=a[maxi*m+k];
491	226		a[maxim+k]=a[jm+k];
492	226		a[j*m+k]=tmp;
493			}
494	91		work[maxi]=work[j];
495			}
496	365		idx[j]=maxi;
497	365	50	if(a[j*m+j]==0.0)
		100
498	2		a[j*m+j]=LM_REAL_EPSILON;
499	365	100	if(j!=m-1){
		100
500	206		tmp=LM_CNST(1.0)/(a[j*m+j]);
501	473	100	for(i=j+1; i
		100
502	267		a[im+j]=tmp;
503			}
504			}
505
506			/* The decomposition has now replaced a. Solve the m linear systems using
507			* forward and back substitution
508			*/
509	524	100	for(l=0; l
		100
510	1264	100	for(i=0; i
		100
511	365		x[l]=LM_CNST(1.0);
512
513	1264	100	for(i=k=0; i
		100
514	899		j=idx[i];
515	899		sum=x[j];
516	899		x[j]=x[i];
517	899	100	if(k!=0)
		100
518	609	100	for(j=k-1; j
		100
519	342		sum-=a[im+j]x[j];
520			else
521	632	100	if(sum!=0.0)
		100
522	365		k=i+1;
523	899		x[i]=sum;
524			}
525
526	1264	100	for(i=m-1; i>=0; --i){
		100
527	899		sum=x[i];
528	1658	100	for(j=i+1; j
		100
529	759		sum-=a[im+j]x[j];
530	899		x[i]=sum/a[i*m+i];
531			}
532
533	1264	100	for(i=0; i
		100
534	899		B[i*m+l]=x[i];
535			}
536
537	159		free(buf);
538
539	159		return 1;
540			}
541			#endif /* HAVE_LAPACK */
542
543			/*
544			* This function computes in C the covariance matrix corresponding to a least
545			* squares fit. JtJ is the approximate Hessian at the solution (i.e. J^T*J, where
546			* J is the Jacobian at the solution), sumsq is the sum of squared residuals
547			* (i.e. goodnes of fit) at the solution, m is the number of parameters (variables)
548			* and n the number of observations. JtJ can coincide with C.
549			*
550			* if JtJ is of full rank, C is computed as sumsq/(n-m)*(JtJ)^-1
551			* otherwise and if LAPACK is available, C=sumsq/(n-r)*(JtJ)^+
552			* where r is JtJ's rank and ^+ denotes the pseudoinverse
553			* The diagonal of C is made up from the estimates of the variances
554			* of the estimated regression coefficients.
555			* See the documentation of routine E04YCF from the NAG fortran lib
556			*
557			* The function returns the rank of JtJ if successful, 0 on error
558			*
559			* A and C are mxm
560			*
561			*/
562	159		int LEVMAR_COVAR(LM_REAL JtJ, LM_REAL C, LM_REAL sumsq, int m, int n)
563			{
564			register int i;
565			int rnk;
566			LM_REAL fact;
567
568			#ifdef HAVE_LAPACK
569			rnk=LEVMAR_PSEUDOINVERSE(JtJ, C, m);
570			if(!rnk) return 0;
571			#else
572			#ifdef _MSC_VER
573			#pragma message("LAPACK not available, LU will be used for matrix inversion when computing the covariance; this might be unstable at times")
574			#else
575			#warning LAPACK not available, LU will be used for matrix inversion when computing the covariance; this might be unstable at times
576			#endif // _MSC_VER
577
578	159		rnk=LEVMAR_LUINVERSE(JtJ, C, m);
579	159	50	if(!rnk) return 0;
		50
580
581	159		rnk=m; /* assume full rank */
582			#endif /* HAVE_LAPACK */
583
584	159		fact=sumsq/(LM_REAL)(n-rnk);
585	1058	100	for(i=0; i
		100
586	899		C[i]*=fact;
587
588	159		return rnk;
589			}
590
591			/* standard deviation of the best-fit parameter i.
592			* covar is the mxm covariance matrix of the best-fit parameters (see also LEVMAR_COVAR()).
593			*
594			* The standard deviation is computed as \sigma_{i} = \sqrt{C_{ii}}
595			*/
596	0		LM_REAL LEVMAR_STDDEV(LM_REAL *covar, int m, int i)
597			{
598	0		return (LM_REAL)sqrt(covar[i*m+i]);
599			}
600
601			/* Pearson's correlation coefficient of the best-fit parameters i and j.
602			* covar is the mxm covariance matrix of the best-fit parameters (see also LEVMAR_COVAR()).
603			*
604			* The coefficient is computed as \rho_{ij} = C_{ij} / sqrt(C_{ii} C_{jj})
605			*/
606	0		LM_REAL LEVMAR_CORCOEF(LM_REAL *covar, int m, int i, int j)
607			{
608	0		return (LM_REAL)(covar[im+j]/sqrt(covar[im+i]covar[jm+j]));
609			}
610
611			/* coefficient of determination.
612			* see http://en.wikipedia.org/wiki/Coefficient_of_determination
613			*/
614	0		LM_REAL LEVMAR_R2(void (func)(LM_REAL p, LM_REAL hx, int m, int n, void adata),
615			LM_REAL p, LM_REAL x, int m, int n, void *adata)
616			{
617			register int i;
618			register LM_REAL tmp;
619			LM_REAL SSerr, // sum of squared errors, i.e. residual sum of squares \sum_i (x_i-hx_i)^2
620			SStot, // \sum_i (x_i-xavg)^2
621			*hx, xavg;
622
623
624	0	0	if((hx=(LM_REAL )malloc(nsizeof(LM_REAL)))==NULL){
		0
625	0		fprintf(stderr, RCAT("memory allocation request failed in ", LEVMAR_R2) "()\n");
626	0		exit(1);
627			}
628
629			/* hx=f(p) */
630	0		(*func)(p, hx, m, n, adata);
631
632	0	0	for(i=0, tmp=0.0; i
		0
633	0		tmp+=x[i];
634	0		xavg=tmp/(LM_REAL)n;
635
636	0	0	for(i=0, SSerr=SStot=0.0; i
		0
637	0		tmp=x[i]-hx[i];
638	0		SSerr+=tmp*tmp;
639
640	0		tmp=x[i]-xavg;
641	0		SStot+=tmp*tmp;
642			}
643
644	0		free(hx);
645
646	0		return LM_CNST(1.0) - SSerr/SStot;
647			}
648
649			/* check box constraints for consistency */
650	16		int LEVMAR_BOX_CHECK(LM_REAL lb, LM_REAL ub, int m)
651			{
652			register int i;
653
654	16	50	if(!lb \|\| !ub) return 1;
		50
		50
		50
655
656	78	100	for(i=0; i
		100
657	62	50	if(lb[i]>ub[i]) return 0;
		50
658
659	16		return 1;
660			}
661
662			#ifdef HAVE_LAPACK
663
664			/* compute the Cholesky decomposition of C in W, s.t. C=W^t W and W is upper triangular */
665			int LEVMAR_CHOLESKY(LM_REAL C, LM_REAL W, int m)
666			{
667			register int i, j;
668			int info;
669
670			/* copy weights array C to W so that LAPACK won't destroy it;
671			* C is assumed symmetric, hence no transposition is needed
672			*/
673			for(i=0, j=m*m; i
674			W[i]=C[i];
675
676			/* Cholesky decomposition */
677			POTF2("U", (int )&m, W, (int )&m, (int *)&info);
678			/* error treatment */
679			if(info!=0){
680			if(info<0){
681			fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2 in %s\n", -info, LCAT(LEVMAR_CHOLESKY, "()"));
682			}
683			else{
684			fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s()\n", info,
685			RCAT("and the Cholesky factorization could not be completed in ", LEVMAR_CHOLESKY));
686			}
687			return LM_ERROR;
688			}
689
690			/* the decomposition is in the upper part of W (in column-major order!).
691			* copying it to the lower part and zeroing the upper transposes
692			* W in row-major order
693			*/
694			for(i=0; i
695			for(j=0; j
696			W[i+jm]=W[j+im];
697			W[j+i*m]=0.0;
698			}
699
700			return 0;
701			}
702			#endif /* HAVE_LAPACK */
703
704
705			/* Compute e=x-y for two n-vectors x and y and return the squared L2 norm of e.
706			* e can coincide with either x or y; x can be NULL, in which case it is assumed
707			* to be equal to the zero vector.
708			* Uses loop unrolling and blocking to reduce bookkeeping overhead & pipeline
709			* stalls and increase instruction-level parallelism; see http://www.abarnett.demon.co.uk/tutorial.html
710			*/
711
712	232671		LM_REAL LEVMAR_L2NRMXMY(LM_REAL e, LM_REAL x, LM_REAL *y, int n)
713			{
714	232671		const int blocksize=8, bpwr=3; /* 8=2^3 */
715			register int i;
716			int j1, j2, j3, j4, j5, j6, j7;
717			int blockn;
718	232671		register LM_REAL sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0;
719
720			/* n may not be divisible by blocksize,
721			* go as near as we can first, then tidy up.
722			*/
723	232671		blockn = (n>>bpwr)<
724
725			/* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */
726	232671	50	if(x){
		50
727	696374	100	for(i=blockn-1; i>0; i-=blocksize){
		100
728	463703		e[i ]=x[i ]-y[i ]; sum0+=e[i ]*e[i ];
729	463703		j1=i-1; e[j1]=x[j1]-y[j1]; sum1+=e[j1]*e[j1];
730	463703		j2=i-2; e[j2]=x[j2]-y[j2]; sum2+=e[j2]*e[j2];
731	463703		j3=i-3; e[j3]=x[j3]-y[j3]; sum3+=e[j3]*e[j3];
732	463703		j4=i-4; e[j4]=x[j4]-y[j4]; sum0+=e[j4]*e[j4];
733	463703		j5=i-5; e[j5]=x[j5]-y[j5]; sum1+=e[j5]*e[j5];
734	463703		j6=i-6; e[j6]=x[j6]-y[j6]; sum2+=e[j6]*e[j6];
735	463703		j7=i-7; e[j7]=x[j7]-y[j7]; sum3+=e[j7]*e[j7];
736			}
737
738			/*
739			* There may be some left to do.
740			* This could be done as a simple for() loop,
741			* but a switch is faster (and more interesting)
742			*/
743
744	232671		i=blockn;
745	232671	100	if(i
		50
746			/* Jump into the case at the place that will allow
747			* us to finish off the appropriate number of items.
748			*/
749
750	231536		switch(n - i){
751	0		case 7 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
752	0		case 6 : e[i]=x[i]-y[i]; sum1+=e[i]*e[i]; ++i;
753	0		case 5 : e[i]=x[i]-y[i]; sum2+=e[i]*e[i]; ++i;
754	191017		case 4 : e[i]=x[i]-y[i]; sum3+=e[i]*e[i]; ++i;
755	191334		case 3 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
756	231508		case 2 : e[i]=x[i]-y[i]; sum1+=e[i]*e[i]; ++i;
757	232671		case 1 : e[i]=x[i]-y[i]; sum2+=e[i]*e[i]; //++i;
758			}
759			}
760			}
761			else{ /* x==0 */
762	0	0	for(i=blockn-1; i>0; i-=blocksize){
		0
763	0		e[i ]=-y[i ]; sum0+=e[i ]*e[i ];
764	0		j1=i-1; e[j1]=-y[j1]; sum1+=e[j1]*e[j1];
765	0		j2=i-2; e[j2]=-y[j2]; sum2+=e[j2]*e[j2];
766	0		j3=i-3; e[j3]=-y[j3]; sum3+=e[j3]*e[j3];
767	0		j4=i-4; e[j4]=-y[j4]; sum0+=e[j4]*e[j4];
768	0		j5=i-5; e[j5]=-y[j5]; sum1+=e[j5]*e[j5];
769	0		j6=i-6; e[j6]=-y[j6]; sum2+=e[j6]*e[j6];
770	0		j7=i-7; e[j7]=-y[j7]; sum3+=e[j7]*e[j7];
771			}
772
773			/*
774			* There may be some left to do.
775			* This could be done as a simple for() loop,
776			* but a switch is faster (and more interesting)
777			*/
778
779	0		i=blockn;
780	0	0	if(i
		0
781			/* Jump into the case at the place that will allow
782			* us to finish off the appropriate number of items.
783			*/
784
785	0		switch(n - i){
786	0		case 7 : e[i]=-y[i]; sum0+=e[i]*e[i]; ++i;
787	0		case 6 : e[i]=-y[i]; sum1+=e[i]*e[i]; ++i;
788	0		case 5 : e[i]=-y[i]; sum2+=e[i]*e[i]; ++i;
789	0		case 4 : e[i]=-y[i]; sum3+=e[i]*e[i]; ++i;
790	0		case 3 : e[i]=-y[i]; sum0+=e[i]*e[i]; ++i;
791	0		case 2 : e[i]=-y[i]; sum1+=e[i]*e[i]; ++i;
792	0		case 1 : e[i]=-y[i]; sum2+=e[i]*e[i]; //++i;
793			}
794			}
795			}
796
797	232671		return sum0+sum1+sum2+sum3;
798			}
799
800			/* undefine everything. THIS MUST REMAIN AT THE END OF THE FILE */
801			#undef LEVMAR_PSEUDOINVERSE
802			#undef LEVMAR_LUINVERSE
803			#undef LEVMAR_BOX_CHECK
804			#undef LEVMAR_CHOLESKY
805			#undef LEVMAR_COVAR
806			#undef LEVMAR_STDDEV
807			#undef LEVMAR_CORCOEF
808			#undef LEVMAR_R2
809			#undef LEVMAR_CHKJAC
810			#undef LEVMAR_FDIF_FORW_JAC_APPROX
811			#undef LEVMAR_FDIF_CENT_JAC_APPROX
812			#undef LEVMAR_TRANS_MAT_MAT_MULT
813			#undef LEVMAR_L2NRMXMY