File Coverage

Transform.xs

Criterion	Covered	Total	%
statement	627	2679	23.4
branch	368	1766	20.8
condition			n/a
subroutine			n/a
pod			n/a
total	995	4445	22.3

line	stmt	bran	code
1
2			/*
3			* THIS FILE WAS GENERATED BY PDLA::PP! Do not modify!
4			*/
5
6			#define PDLA_COMMENT(comment)
7			PDLA_COMMENT("This preprocessor symbol is used to add commentary in the PDLA ")
8			PDLA_COMMENT("autogenerated code. Normally, one would use typical C-style ")
9			PDLA_COMMENT("multiline comments (i.e. /* comment */). However, because such ")
10			PDLA_COMMENT("comments do not nest, it's not possible for PDLA::PP users to ")
11			PDLA_COMMENT("comment-out sections of code using multiline comments, as is ")
12			PDLA_COMMENT("often the practice when debugging, for example. So, when you ")
13			PDLA_COMMENT("see something like this: ")
14			PDLA_COMMENT(" ")
15			PDLA_COMMENT("Memory access")
16			PDLA_COMMENT(" ")
17			PDLA_COMMENT("just think of it as a C multiline comment like: ")
18			PDLA_COMMENT(" ")
19			PDLA_COMMENT(" /* Memory access */ ")
20
21			#include "EXTERN.h"
22			#include "perl.h"
23			#include "XSUB.h"
24			#include "pdl.h"
25			#include "pdlcore.h"
26			static Core* PDLA; PDLA_COMMENT("Structure hold core C functions")
27			static int __pdl_debugging = 0;
28			static int __pdl_boundscheck = 0;
29			static SV* CoreSV; PDLA_COMMENT("Gets pointer to perl var holding core structure")
30
31			#if ! 1
32			# define PP_INDTERM(max, at) at
33			#else
34			# define PP_INDTERM(max, at) (__pdl_boundscheck? PDLA->safe_indterm(max,at, __FILE__, __LINE__) : at)
35			#endif
36
37			#include
38
39
40			/*
41			* Singular-value decomposition code is borrowed from
42			* MatrixOps -- cut-and-pasted here because of linker trouble.
43			* It's used by the auxiliary matrix manipulation code, below.
44			*
45			*/
46
47	737701		void pdl_xform_svd(PDLA_Double W, PDLA_Double Z, int nRow, int nCol)
48			{
49			int i, j, k, EstColRank, RotCount, SweepCount, slimit;
50			PDLA_Double eps, e2, tol, vt, p, h2, x0, y0, q, r, c0, s0, c2, d1, d2;
51	737701		eps = 1e-6;
52	737701		slimit = nCol/4;
53	737701	50	if (slimit < 6.0)
54	737701		slimit = 6;
55	737701		SweepCount = 0;
56	737701		e2 = 10.0nRoweps*eps;
57	737701		tol = eps*.1;
58	737701		EstColRank = nCol;
59	2213103	100	for (i=0; i
60	4426206	100	for (j=0; j
61	2950804		W[nCol*(nRow+i)+j] = 0.0;
62			}
63	1475402		W[nCol(nRow+i)+i] = 1.0; / rjrw 7/7/99: moved this line out of j loop */
64			}
65	737701		RotCount = EstColRank*(EstColRank-1)/2;
66	4096192	100	while (RotCount != 0 && SweepCount <= slimit)
		100
67			{
68	3358491		RotCount = EstColRank*(EstColRank-1)/2;
69	3358491		SweepCount++;
70	6716982	100	for (j=0; j
71			{
72	6716982	100	for (k=j+1; k
73			{
74	3358491		p = q = r = 0.0;
75	10075473	100	for (i=0; i
76			{
77	6716982		x0 = W[nColi+j]; y0 = W[nColi+k];
78	6716982		p += x0y0; q += x0x0; r += y0*y0;
79			}
80	3358491		Z[j] = q; Z[k] = r;
81	3358491	100	if (q >= r)
82			{
83	526731	50	if (q<=e2Z[0] \|\| fabs(p)<=tolq) RotCount--;
		100
84			else
85			{
86	165910		p /= q; r = 1 - r/q; vt = sqrt(4pp+r*r);
87	165910		c0 = sqrt(fabs(.5(1+r/vt))); s0 = p/(vtc0);
88	1190371	100	for (i=0; i
89			{
90	663640		d1 = W[nColi+j]; d2 = W[nColi+k];
91	663640		W[nColi+j] = d1c0+d2s0; W[nColi+k] = -d1s0+d2c0;
92			}
93			}
94			}
95			else
96			{
97	2831760		p /= r; q = q/r-1; vt = sqrt(4pp+q*q);
98	2831760		s0 = sqrt(fabs(.5*(1-q/vt)));
99	2831760	100	if (p<0) s0 = -s0;
100	2831760		c0 = p/(vt*s0);
101	14158800	100	for (i=0; i
102			{
103	11327040		d1 = W[nColi+j]; d2 = W[nColi+k];
104	11327040		W[nColi+j] = d1c0+d2s0; W[nColi+k] = -d1s0+d2c0;
105			}
106			}
107			}
108			}
109	3358491	50	while (EstColRank>=3 && Z[(EstColRank-1)]<=Z[0]tol+toltol)
		0
110	0		EstColRank--;
111			}
112	737701		}
113
114			/*
115			* PDLA_xform_aux:
116			* This handles the matrix manipulation part of the Jacobian filtered
117			* mapping code. It's separate from the main code because it's
118			* independent of the data type of the original arrays.
119			*
120			*Given a pre-allocated workspace and
121			* an integer set of coordinates, generate the discrete Jacobian
122			* from the map, pad the singular values, and return the inverse
123			* Jacobian, the largest singular value of the Jacobian itself, and
124			* the determinant of the original Jacobian. Boundary values use the
125			* asymmetric discrete Jacobian; others use the symmetric discrete Jacobian.
126			*
127			* The map and workspace must be of type PDLA_D. If the dimensionality is
128			* d, then the workspace must have at least 3*n^2+n elements. The
129			* inverse of the padded Jacobian is returned in the first n^2 elements.
130			* The determinant of the original Jacobian gets stuffed into the n^2
131			* element of the workspace. The largest padded singular value is returned.
132			*
133			*/
134
135	737701		PDLA_Double PDLA_xform_aux ( pdl map, PDLA_Indx coords, PDLA_Double *tmp, PDLA_Double sv_min) {
136			short ndims;
137			PDLA_Long i, j, k;
138			PDLA_Long offset;
139			PDLA_Double det;
140			PDLA_Double *jptr;
141			PDLA_Double *svptr;
142			PDLA_Double aptr,bptr;
143	737701		PDLA_Double max_sv = 0.0;
144
145	737701		ndims = map->ndims-1;
146
147			/****** Accumulate the Jacobian */
148			/* Accumulate the offset into the map array */
149	2213103	100	for( i=offset=0; i
150	1475402		offset += coords[i] * map->dimincs[i+1];
151
152	737701		jptr = tmp + ndims*ndims;
153
154	2213103	100	for( i=0; i
155	1475402		char bot = (coords[i] <=0);
156	1475402		char top = (coords[i] >= map->dims[i+1]-1);
157	1475402	100	char symmetric = !(bot \|\| top);
		100
158			PDLA_Double ohi,olo;
159	1475402		PDLA_Long diff = map->dimincs[i+1];
160	1475402	100	ohi = ((PDLA_Double *)map->data) + ( offset + ( top ? 0 : diff ));
161	1475402	100	olo = ((PDLA_Double *)map->data) + ( offset - ( bot ? 0 : diff ));
162
163	4426206	100	for( j=0; j
164	2950804		PDLA_Double jel = ohi - olo;
165
166	2950804		ohi += map->dimincs[0];
167	2950804		olo += map->dimincs[0];
168
169	2950804	100	if(symmetric)
170	2935020		jel /= 2;
171	2950804		*(jptr++) = jel;
172			}
173			}
174
175
176			/****** Singular-value decompose the Jacobian
177			* The svd routine produces the squares of the singular values,
178			* and requires normalization for one of the rotation matrices.
179			*/
180
181	737701		jptr = tmp + ndims*ndims;
182	737701		svptr = tmp + 3ndimsndims;
183	737701		pdl_xform_svd(jptr,svptr,ndims,ndims);
184
185	737701		aptr = svptr;
186	2213103	100	for (i=0;i
187	1475402		aptr = sqrt(aptr);
188
189
190			/* fix-up matrices here */
191	737701		bptr = jptr;
192	2213103	100	for(i=0; i
193	1475402		aptr = svptr;
194	4426206	100	for(j=0;j
195	2950804		(bptr++) /= (aptr++);
196			}
197
198			/****** Store the determinant, and pad the singular values as necessary.*/
199	737701		aptr = svptr;
200	737701		det = 1.0;
201	2213103	100	for(i=0;i
202	1475402		det = aptr;
203	1475402	100	if(*aptr < sv_min)
204	720800		*aptr = sv_min;
205	1475402	100	if(*aptr > max_sv )
206	360821		max_sv = *aptr;
207	1475402		aptr++;
208			}
209
210			/****** Generate the inverse matrix */
211			/* Multiply B-transpose times 1/S times A-transpose. */
212			/* since S is diagonal we just divide by the appropriate element. */
213			/* */
214	737701		aptr = tmp + ndims*ndims;
215	737701		bptr = aptr + ndims*ndims;
216	737701		jptr= tmp;
217
218	2213103	100	for(i=0;i
219	4426206	100	for(j=0;j
220	2950804		*jptr = 0;
221
222	8852412	100	for(k=0;k
223	5901608		jptr += aptr[jndims + k] * bptr[kndims + i] / svptr;
224
225	2950804		jptr++;
226			}
227	1475402		svptr++;
228			}
229
230	737701		*jptr = det;
231	737701		return max_sv;
232			}
233
234			typedef struct pdl_map_struct {
235			PDLA_TRANS_START(1);
236			pdl_thread __pdlthread;
237			SV in;SV out;SV map;SV boundary;SV method;SV big;SV blur;SV sv_min;SV flux;SV bv;
238			char __ddone; PDLA_COMMENT("Dims done")
239			} pdl_map_struct;
240
241	12		void pdl_map_redodims(pdl_trans *__tr ) {
242			int __dim;
243	12		pdl_map_struct __privtrans = (pdl_map_struct ) __tr;
244
245			{
246			PDLA_Indx __creating[1];
247	12		__creating[0] = 0;
248			{
249			{PDLA_COMMENT("Start generic loop")
250
251	12		switch(__privtrans->__datatype) { case -42: PDLA_COMMENT("Warning eater") {(void)1;
252	0		} break; case PDLA_B: {
253	0	0	PDLA_Byte * k0_datap = ((PDLA_Byte *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
254	0		PDLA_Byte * k0_physdatap = ((PDLA_Byte *)((__privtrans->pdls[0])->data));
255
256			{
257			PDLA_COMMENT("none")
258	0		} } break; case PDLA_S: {
259	0	0	PDLA_Short * k0_datap = ((PDLA_Short *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
260	0		PDLA_Short * k0_physdatap = ((PDLA_Short *)((__privtrans->pdls[0])->data));
261
262			{
263			PDLA_COMMENT("none")
264	0		} } break; case PDLA_US: {
265	0	0	PDLA_Ushort * k0_datap = ((PDLA_Ushort *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
266	0		PDLA_Ushort * k0_physdatap = ((PDLA_Ushort *)((__privtrans->pdls[0])->data));
267
268			{
269			PDLA_COMMENT("none")
270	0		} } break; case PDLA_L: {
271	5	50	PDLA_Long * k0_datap = ((PDLA_Long *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
272	5		PDLA_Long * k0_physdatap = ((PDLA_Long *)((__privtrans->pdls[0])->data));
273
274			{
275			PDLA_COMMENT("none")
276	5		} } break; case PDLA_IND: {
277	0	0	PDLA_Indx * k0_datap = ((PDLA_Indx *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
278	0		PDLA_Indx * k0_physdatap = ((PDLA_Indx *)((__privtrans->pdls[0])->data));
279
280			{
281			PDLA_COMMENT("none")
282	0		} } break; case PDLA_LL: {
283	0	0	PDLA_LongLong * k0_datap = ((PDLA_LongLong *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
284	0		PDLA_LongLong * k0_physdatap = ((PDLA_LongLong *)((__privtrans->pdls[0])->data));
285
286			{
287			PDLA_COMMENT("none")
288	0		} } break; case PDLA_F: {
289	0	0	PDLA_Float * k0_datap = ((PDLA_Float *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
290	0		PDLA_Float * k0_physdatap = ((PDLA_Float *)((__privtrans->pdls[0])->data));
291
292			{
293			PDLA_COMMENT("none")
294	0		} } break; case PDLA_D: {
295	7	50	PDLA_Double * k0_datap = ((PDLA_Double *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
296	7		PDLA_Double * k0_physdatap = ((PDLA_Double *)((__privtrans->pdls[0])->data));
297
298			{
299			PDLA_COMMENT("none")
300	7		} break;}
301	0		default:barf("PP INTERNAL ERROR! PLEASE MAKE A BUG REPORT\n");}
302			}
303			}
304			{
305			static char *__parnames[] = {"k0"};
306			static PDLA_Indx __realdims[] = {0};
307			static char __funcname[] = "PDLA::Transform::map";
308			static pdl_errorinfo __einfo = {
309			__funcname, __parnames, 1
310			};
311
312	12		PDLA->initthreadstruct(2,__privtrans->pdls,
313			__realdims,__creating,1,
314			&__einfo,&(__privtrans->__pdlthread),
315	12		__privtrans->vtable->per_pdl_flags,
316			0 );
317			}
318
319			{ PDLA_COMMENT("convenience block")
320	12		void *hdrp = NULL;
321	12		char propagate_hdrcpy = 0;
322	12		SV *hdr_copy = NULL;
323	12	50	if(!hdrp &&
		50
324	0	0	__privtrans->pdls[0]->hdrsv &&
325	0		(__privtrans->pdls[0]->state & PDLA_HDRCPY)
326			) {
327	0		hdrp = __privtrans->pdls[0]->hdrsv;
328	0		propagate_hdrcpy = ((__privtrans->pdls[0]->state & PDLA_HDRCPY) != 0);
329			}
330	12	50	if (hdrp) {
331	0	0	if(hdrp == &PL_sv_undef)
332	0		hdr_copy = &PL_sv_undef;
333			else { PDLA_COMMENT("Call the perl routine _hdr_copy...")
334			int count;
335			PDLA_COMMENT("Call the perl routine PDLA::_hdr_copy(hdrp)")
336	0		dSP;
337	0		ENTER ;
338	0		SAVETMPS ;
339	0	0	PUSHMARK(SP) ;
340	0	0	XPUSHs( hdrp );
341	0		PUTBACK ;
342	0		count = call_pv("PDLA::_hdr_copy",G_SCALAR);
343	0		SPAGAIN ;
344	0	0	if(count != 1)
345	0		croak("PDLA::_hdr_copy didn't return a single value - please report this bug (A).");
346
347	0		hdr_copy = (SV *)POPs;
348
349	0	0	if(hdr_copy && hdr_copy != &PL_sv_undef) {
		0
350	0		(void)SvREFCNT_inc(hdr_copy); PDLA_COMMENT("Keep hdr_copy from vanishing during FREETMPS")
351			}
352
353	0	0	FREETMPS ;
354	0		LEAVE ;
355
356
357			} PDLA_COMMENT("end of callback block")
358
359
360	0	0	if(hdr_copy != &PL_sv_undef)
361	0		SvREFCNT_dec(hdr_copy); PDLA_COMMENT("make hdr_copy mortal again")
362			} PDLA_COMMENT("end of if(hdrp) block")
363			} PDLA_COMMENT("end of conv. block")
364	12		__privtrans->__ddone = 1;
365			}
366	12		}
367
368
369	0		pdl_trans * pdl_map_copy(pdl_trans *__tr ) {
370			int __dim;
371	0		pdl_map_struct __privtrans = (pdl_map_struct ) __tr;
372
373			{
374	0		pdl_map_struct *__copy = malloc(sizeof(pdl_map_struct));
375	0		PDLA_THR_CLRMAGIC(&__copy->__pdlthread); PDLA_TR_CLRMAGIC(__copy);
376	0		__copy->has_badvalue = __privtrans->has_badvalue;
377	0		__copy->badvalue = __privtrans->badvalue;
378	0		__copy->flags = __privtrans->flags;
379	0		__copy->vtable = __privtrans->vtable;
380	0		__copy->__datatype = __privtrans->__datatype;
381	0		__copy->freeproc = NULL;
382	0		__copy->__ddone = __privtrans->__ddone;
383			{int i;
384	0	0	for(i=0; i<__copy->vtable->npdls; i++)
385	0		__copy->pdls[i] = __privtrans->pdls[i];
386			}
387	0		(__copy->in) = newSVsv(__privtrans->in);;(__copy->out) = newSVsv(__privtrans->out);;(__copy->map) = newSVsv(__privtrans->map);;(__copy->boundary) = newSVsv(__privtrans->boundary);;(__copy->method) = newSVsv(__privtrans->method);;(__copy->big) = newSVsv(__privtrans->big);;(__copy->blur) = newSVsv(__privtrans->blur);;(__copy->sv_min) = newSVsv(__privtrans->sv_min);;(__copy->flux) = newSVsv(__privtrans->flux);;(__copy->bv) = newSVsv(__privtrans->bv);;
388	0	0	if(__copy->__ddone) {
389	0		PDLA->thread_copy(&(__privtrans->__pdlthread),&(__copy->__pdlthread));
390			}
391	0		return (pdl_trans*)__copy;
392			}
393			}
394
395
396	12		void pdl_map_readdata(pdl_trans *__tr ) {
397			int __dim;
398	12		pdl_map_struct __privtrans = (pdl_map_struct ) __tr;
399
400			{
401			{PDLA_COMMENT("Start generic loop")
402
403	12		switch(__privtrans->__datatype) { case -42: PDLA_COMMENT("Warning eater") {(void)1;
404	0		} break; case PDLA_B: {
405	0	0	PDLA_Byte * k0_datap = ((PDLA_Byte *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
406	0		PDLA_Byte * k0_physdatap = ((PDLA_Byte *)((__privtrans->pdls[0])->data));
407
408
409			PDLA_COMMENT("THREADLOOPBEGIN")
410	0	0	if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
411	0		do { register PDLA_Indx __tind1=0,__tind2=0;
412	0		register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
413	0		register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
414	0		register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
415	0		register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
416	0		register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
417	0		register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
418	0		k0_datap += __offsp[0];
419	0	0	for( __tind2 = 0 ;
420			__tind2 < __tdims1 ;
421	0		__tind2++
422	0		,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
423			)
424			{
425	0	0	for( __tind1 = 0 ;
426			__tind1 < __tdims0 ;
427	0		__tind1++
428	0		,k0_datap += __tinc0_0
429			)
430			{ PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
431
432			/*
433			* Pixel interpolation & averaging code
434			*
435			* Calls a common coordinate-transformation block (see following hdr)
436			* that isn't dependent on the type of the input variable.
437			*
438			* The inputs are SVs to avoid hassling with threadloops; threading
439			* is handled internally. To simplify the threading business, any
440			* thread dimensions should all be collapsed to a single one by the
441			* perl front-end.
442			*
443			*/
444
445			short ndims; /* Number of dimensions we're working in */
446			PDLA_Double tmp; / Workspace for prefrobnication */
447			PDLA_Indx ovec; / output pixel loop vector */
448			PDLA_Indx ivec; / input pixel loop vector */
449			PDLA_Indx ibvec; / input pixel base offset vector */
450			PDLA_Double dvec; / Residual vector for linearization */
451			PDLA_Double tvec; / Temporary floating-point vector */
452			PDLA_Double acc; / Threaded accumulator */
453			PDLA_Double wgt; / Threaded weight accumulator */
454			PDLA_Double wgt2; / Threaded weight accumulator for badval finding */
455			char bounds; / Boundary condition packed string */
456			PDLA_Indx index_stash; / Stash to store the opening index of dim sample scans */
457			char method; /* Method identifier (gets one of 'h','g') */
458			PDLA_Long big; /* Max size of input footprint for each pix */
459			PDLA_Double blur; /* Scaling of filter */
460			PDLA_Double sv_min; /* minimum singular value */
461			char flux; /* Flag to indicate flux conservation */
462			PDLA_Double *map_ptr;
463			PDLA_Long i, j;
464	0	0	PDLA_Byte badval = SvNV(__privtrans->bv);
465			#define HANNING_LOOKUP_SIZE 2500
466			static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
467			static int needs_hanning_calc = 1;
468			PDLA_Double zeta;
469			PDLA_Double hanning_offset;
470
471			#define GAUSSIAN_LOOKUP_SIZE 4000
472			#define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
473			static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
474			static int needs_gaussian_calc = 1;
475
476	0		pdl *in = PDLA->SvPDLAV(__privtrans->in);
477	0		pdl *out = PDLA->SvPDLAV(__privtrans->out);
478	0		pdl *map = PDLA->SvPDLAV(__privtrans->map);
479
480	0		PDLA->make_physical(in);
481	0		PDLA->make_physical(out);
482	0		PDLA->make_physical(map);
483
484	0		ndims = map->ndims -1;
485
486			/*
487			* Allocate all our dynamic workspaces at once
488			* */
489	0		ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
490	0		( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3ndims) + sizeof(PDLA_Double) in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndimsndims + ndims + sizeof(char) ndims + sizeof(PDLA_Indx) * ndims )
491			)
492			);
493	0		ivec = &(ovec[ndims]);
494	0		ibvec = &(ivec[ndims]);
495	0		dvec = (PDLA_Double *)(&(ibvec[ndims]));
496	0		tvec = &(dvec[ndims]);
497	0		acc = &(tvec[ndims]);
498	0		wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
499	0		bounds = (char )(&(tmp [3ndims*ndims+ndims]));
500	0		index_stash = (PDLA_Indx *) &(bounds[ndims]);
501
502
503			/***
504			* Fill in the boundary condition array
505			*/
506			{
507			char *bstr;
508			STRLEN blen;
509	0	0	bstr = SvPV(__privtrans->boundary,blen);
510
511	0	0	if(blen == 0) {
512			/* If no boundary is specified then every dim gets truncated */
513			int i;
514	0	0	for (i=0;i
515	0		bounds[i] = 1;
516			} else {
517			int i;
518	0	0	for(i=0;i
519	0	0	switch(bstr[i < blen ? i : blen-1 ]) {
520			case '0': case 'f': case 'F': /* forbid */
521	0		bounds[i] = 0;
522	0		break;
523			case '1': case 't': case 'T': /* truncate */
524	0		bounds[i] = 1;
525	0		break;
526			case '2': case 'e': case 'E': /* extend */
527	0		bounds[i] = 2;
528	0		break;
529			case '3': case 'p': case 'P': /* periodic */
530	0		bounds[i] = 3;
531	0		break;
532			case '4': case 'm': case 'M': /* mirror */
533	0		bounds[i] = 4;
534	0		break;
535			default:
536			{
537			char buf[BUFSIZ];
538	0		sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
539	0		barf("%s", buf);
540			}
541	0		break;
542			}
543			}
544			}
545			}
546
547			/***
548			* Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
549			*/
550	0	0	big = labs((PDLA_Long) (SvNV(__privtrans->big)));
551	0	0	if(big <= 0)
552	0		barf("%s","map: 'big' parameter must be >0");
553
554	0	0	blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
555	0	0	if(blur < 0)
556	0		barf("%s","map: 'blur' parameter must be >= 0");
557
558	0	0	sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
559	0	0	if(sv_min < 0)
560	0		barf("%s","map: 'sv_min' parameter must be >= 0");
561
562	0	0	flux = (SvNV(__privtrans->flux) != 0);
563
564			{
565			char *mstr;
566			STRLEN mlen;
567	0	0	mstr = SvPV(__privtrans->method,mlen);
568
569	0	0	if(mlen==0)
570	0		method = 'h';
571	0		else switch(*mstr) {
572	0		case 'H': method='H'; break;
573	0		case 'h': method = 'h';
574	0	0	if( needs_hanning_calc ) {
575			int i;
576	0	0	for(i=0;i
577	0		hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
578			}
579	0		hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
580	0		hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
581	0		needs_hanning_calc = 0;
582			}
583	0		zeta = HANNING_LOOKUP_SIZE / blur;
584	0		hanning_offset = (blur >= 1) ?
585	0	0	0 :
586	0		0.5 * (1.0 - blur);
587	0		break;
588
589	0		case 'g': case 'j': method = 'g';
590	0		zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
591
592	0	0	if( needs_gaussian_calc ) {
593			int i;
594	0	0	for(i=0;i
595	0		gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
596			}
597	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
598	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
599	0		needs_gaussian_calc = 0;
600			}
601	0		break;
602
603	0		case 'G': case 'J': method = 'G'; break;
604			default:
605			{
606			char err[80];
607	0		sprintf(err,"Bug in map: unknown method '%c'",*mstr);
608	0		barf("%s", err);
609			}
610	0		break;
611			}
612			}
613
614
615
616			/* End of initialization */
617			/*************************************************************/
618			/* Start of Real Work */
619
620			/* Initialize coordinate vector and map offset
621			*/
622	0	0	for(i=0;i
623	0		ovec[i] = 0;
624
625	0		map_ptr = (PDLA_Double *)(map->data);
626
627
628			/* Main pixel loop (iterates over pixels in the output plane) */
629			do {
630			PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
631			/* Prefrobnicate the transformation matrix */
632	0		psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
633
634			#ifdef DEBUG_MAP
635			{
636			int k; PDLA_Indx foo = 0;
637			printf("ovec: [");
638			for(k=0;k
639			foo += ovec[k] * map->dimincs[k+1];
640			printf(" %2d ",(int)(ovec[k]));
641			}
642			printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
643			for(k=0;k
644			printf("%8.2f",(double)(((PDLA_Byte )(map->data))[foo + kmap->dimincs[0]]));
645			}
646			printf("]\n");
647			}
648			#endif
649
650			/* Don't bother accumulating output if psize is too large */
651	0	0	if(psize <= big) {
652			/* Use the prefrobnicated matrix to generate a local linearization.
653			* dvec gets the delta; ibvec gets the base.
654			*/
655			{
656	0		PDLA_Double *mp = map_ptr;
657	0	0	for (i=0;i
658	0		dvec[i] = mp - ( ibvec[i] = (PDLA_Long)(mp + 0.5)); /* assignment */
659	0		mp += map->dimincs[0];
660			}
661			}
662
663			/* Initialize input delta vector */
664	0	0	for(i=0;i
665	0		ivec[i] = -psize;
666
667			/* Initialize accumulators */
668			{
669	0		PDLA_Double *ac = acc;
670	0	0	for(i=0; i < in->dims[ndims]; i++)
671	0		*(ac++) = 0.0;
672
673			}
674			{
675	0		PDLA_Double *wg = wgt;
676	0	0	for(i=0;i < in->dims[ndims]; i++)
677	0		*(wg++) = 0.0;
678			}
679			{
680	0		PDLA_Double *wg = wgt2;
681	0	0	for(i=0;i < in->dims[ndims]; i++)
682	0		*(wg++) = 0.0;
683			}
684
685
686			/*
687			* Calculate the original offset into the data array, to enable
688			* delta calculations in the pixel loop
689			*
690			* i runs over dims; j holds the working integer index in the
691			* current dim.
692			*
693			* This code matches the incrementation code at the bottom of the accumulation loop
694			*/
695
696	0		t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
697	0		i_off = 0;
698	0	0	for(i=0;i
699	0		j = ivec[i] + ibvec[i];
700	0	0	if(j<0 \|\| j >= in->dims[i]) {
		0
701	0		switch(bounds[i]) {
702			case 0: /* no breakage allowed */
703	0		barf("%s","index out-of-bounds in map");
704	0		break;
705			case 1: /* truncation */
706	0		t_vio++;
707			/* fall through */
708			case 2: /* extension -- crop */
709	0	0	if(j<0)
710	0		j=0;
711	0		else j = in->dims[i] - 1;
712	0		break;
713			case 3: /* periodic -- mod it */
714	0		j %= in->dims[i];
715	0	0	if(j<0)
716	0		j += in->dims[i];
717	0		break;
718			case 4: /* mirror -- reflect off the edges */
719	0		j += in->dims[i];
720	0		j %= (in->dims[i]*2);
721	0	0	if(j<0)
722	0		j += in->dims[i]*2;
723	0		j -= in->dims[i];
724	0	0	if(j<0) {
725	0		j *= -1;
726	0		j -= 1;
727			}
728	0		break;
729			default:
730	0		barf("%s", "Unknown boundary condition in map -- bug alert!");
731	0		break;
732			}
733			}
734	0		i_off += in->dimincs[i] * j;
735			}
736
737			/* Initialize index stashes for later reference as we scan the footprint */
738			/* It's a pain in the ass to deal with boundaries, and doubly so at the */
739			/* end of a dimensional scan. So we stash the index location at the */
740			/* start of each dimensional scan here. When we finish incrementing */
741			/* through a particular dim, we pull its value back out of the stash. */
742	0	0	for(i=0;i
743	0		index_stash[i] = i_off;
744			}
745
746			/* The input accumulation loop is the hotspot for the entire operation. */
747			/* We loop over pixels in the region of interest (+/- psize in each dimension) */
748			/* in the input array, use the linearized transform to bring each pixel center */
749			/* forward to the output plane, and calculate a weighting based on the chosen */
750			/* filter function. 'h' is a fast Hanning window rolloff using a lookup */
751			/* table that is initialized the first time through the code. 'H' is the */
752			/* same process, but explicitly calculated for each interation (~2x slower). */
753			/* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
754			/* into the input array fresh from the current input array vector each time, */
755			/* we walk through the array using dimincs and the old offset. This saves */
756			/* about half of the time spent on index calculation. */
757
758			do { /* Input accumulation loop */
759			PDLA_Double *cp;
760			PDLA_Double alpha;
761			/* Calculate the weight of the current input point. Don't bother if we're
762			* violating any truncation boundaries (in that case our value is zero, but
763			* for the interpolation we also set the weight to zero).
764			*/
765	0	0	if( !t_vio ) {
766
767	0		PDLA_Double *ap = tvec;
768	0		PDLA_Double *bp = dvec;
769	0		PDLA_Indx *ip = ivec;
770	0	0	for(i=0; i
771	0		(ap++) = (ip++) - *(bp++);
772
773	0		switch(method) {
774			PDLA_Double dd;
775			case 'h':
776			/* This is the Hanning window rolloff. It is a product of a simple */
777			/* cos^2(theta) rolloff in each dimension. Using a lookup table */
778			/* is about 2x faster than using cos(theta) directly in each */
779			/* weighting calculation, so we do. Using 2500 entries and linear */
780			/* interpolation is accurate to about 10^-7, and should preserve */
781			/* the contents of cache pretty well. */
782	0		alpha = 1;
783	0		cp = tmp;
784	0	0	for(i=0; i
785			int lodex;
786			int hidex;
787			PDLA_Double beta;
788	0		dd = 0;
789	0		ap = tvec;
790			/* Get the matrix-multiply element for this dimension */
791	0	0	for(j=0;j
792	0		dd += (ap++) *(cp++);
793
794			/* Do linear interpolation from the table */
795			/* The table captures a hanning window centered 0.5 pixel from center. */
796			/* We scale the filter by the blur parameter -- but if blur is less */
797			/* than unity, we shrink the hanning blur window while keeping the 0.5 */
798			/* value on the pixel edge at 0.5. For blur greater than unity, we */
799			/* scale simply. */
800	0		beta = fabs(dd) - hanning_offset;
801	0	0	if(beta > 0) {
802	0	0	if(beta >= blur) {
803	0		alpha = 0;
804	0		i = ndims;
805			} else {
806	0		beta *= zeta;
807	0		lodex = beta;
808	0	0	beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
809	0		lodex = HANNING_LOOKUP_SIZE;
810	0		hidex = lodex+1;
811	0		alpha = hanning_lookup[hidex]beta + hanning_lookup[lodex]*(1-beta);
812			} /* end of interpolation branch */
813			} /* end of beta > 0 branch */
814			} /* end of dimension loop */
815	0		break;
816
817			case 'H':
818			/* This is the Hanning window rolloff with explicit calculation, preserved */
819			/* in case someone actually wants the slower longer method. */
820	0		alpha = 1;
821	0		cp = tmp;
822	0	0	for(i=0; i
823	0		dd = 0;
824	0		ap = tvec;
825	0	0	for(j=0;j
826	0		dd += (ap++) *(cp++);
827	0		dd = (fabs(dd) - hanning_offset) / blur;
828	0	0	if( dd > 1 ) {
829	0		alpha = 0;
830	0		i = ndims;
831			} else
832	0		alpha = (0.5 + 0.5 cos( dd * 3.1415926536 ));
833			}
834	0		break;
835
836			case 'g':
837			/* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
838			{
839	0		PDLA_Double sum = 0;
840	0		cp = tmp;
841	0	0	for(i=0; i
842	0		dd = 0;
843	0		ap = tvec;
844	0	0	for(j=0;j
845	0		dd += (ap++) *(cp++);
846	0		dd /= blur;
847	0		sum += dd * dd;
848	0	0	if(sum > GAUSSIAN_MAXVAL) {
849	0		i = ndims; /* exit early if we're too far out */
850	0		alpha = 0;
851			}
852			}
853	0	0	if( sum > GAUSSIAN_MAXVAL \|\| !isfinite(sum) \|\| isnan(sum) ) {
		0
		0
854	0		alpha = 0;
855			} else {
856			int lodex,hidex;
857	0		PDLA_Double beta = fabs(zeta * sum);
858
859	0		lodex = beta;
860	0		beta -= lodex; hidex = lodex+1;
861	0		alpha = gaussian_lookup[hidex]beta + gaussian_lookup[lodex](1 - beta);
862
863			}
864			}
865	0		break;
866
867			case 'G':
868			/* This is the Gaussian rolloff with explicit calculation, preserved */
869			/* in case someone actually wants the slower longer method. */
870			{
871	0		PDLA_Double sum = 0;
872	0		cp = tmp;
873	0	0	for(i=0; i
874	0		dd = 0;
875	0		ap = tvec;
876	0	0	for(j=0;j
877	0		dd += (ap++) *(cp++);
878	0		dd /= blur;
879	0		sum += dd * dd;
880	0	0	if(sum > 4) /* 2 pixels -- four half-widths */
881	0		i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
882			}
883
884	0	0	if(sum > GAUSSIAN_MAXVAL)
885	0		alpha = 0;
886			else
887	0		alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
888			}
889	0		break;
890			default:
891			{
892			char buf[80];
893	0		sprintf(buf,"This can't happen: method='%c'",method);
894	0		barf("%s", buf);
895			}
896			}
897
898			{ /* convenience block -- accumulate the current point into the weighted sum. */
899			/* This is more than simple assignment because we have our own explicit poor */
900			/* man's threadloop here, so we accumulate each threaded element separately. */
901	0		PDLA_Byte dat = ((PDLA_Byte )(in->data)) + i_off;
902	0		PDLA_Indx max = out->dims[ndims];
903	0	0	for( i=0; i < max; i++ ) {
904	0	0	if( (badval==0) \|\| (*dat != badval) ) {
		0
905	0		acc[i] += dat alpha;
906	0		dat += in->dimincs[ndims];
907	0		wgt[i] += alpha;
908			}
909	0		wgt2[i] += alpha; }
910			}
911			} /* end of t_vio check (i.e. of input accumulation) */
912
913
914			/* Advance input accumulation loop. */
915			/* We both increment the total vector and also advance the index. */
916	0		carry = 1;
917	0	0	for(i=0; i
		0
918			/* Advance the current element of the offset vector */
919	0		ivec[i]++;
920	0		j = ivec[i] + ibvec[i];
921
922			/* Advance the offset into the data array */
923	0	0	if( j > 0 && j <= in->dims[i]-1 ) {
		0
924			/* Normal case -- just advance the input vector */
925	0		i_off += in->dimincs[i];
926			} else {
927			/* Busted a boundary - either before or after. */
928	0		switch(bounds[i]){
929			case 0: /* no breakage allowed -- treat as truncation for interpolation */
930			case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
931	0	0	if( j == 0 )
932	0		t_vio--;
933	0	0	else if( j == in->dims[i] )
934	0		t_vio++;
935	0		break;
936			case 2: /* extension -- do nothing (so the same input point is re-used) */
937	0		break;
938			case 3: /* periodic -- advance and mod into the allowed range */
939	0	0	if((j % in->dims[i]) == 0) {
940	0		i_off -= in->dimincs[i] * (in->dims[i]-1);
941			} else {
942	0		i_off += in->dimincs[i];
943			}
944	0		break;
945			case 4: /* mirror -- advance or retreat depending on phase */
946	0		j += in->dims[i];
947	0		j %= (in->dims[i]*2);
948	0		j -= in->dims[i];
949	0	0	if( j!=0 && j!= -in->dims[i] ) {
		0
950	0	0	if(j<0)
951	0		i_off -= in->dimincs[i];
952			else
953	0		i_off += in->dimincs[i];
954			}
955	0		break;
956			}
957			}
958
959			/* Now check for carry */
960	0	0	if(ivec[i] <= psize) {
961			/* Normal case -- copy the current offset to the faster-running dim stashes */
962			int k;
963	0	0	for(k=0;k
964	0		index_stash[k] = i_off;
965			}
966	0		carry = 0;
967
968			} else { /* End of this scan -- recover the last position, and mark carry */
969	0		i_off = index_stash[i];
970	0	0	if(bounds[i]==1) {
971	0		j = ivec[i] + ibvec[i];
972	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
973	0		t_vio--;
974	0		ivec[i] = -psize;
975	0		j = ivec[i] + ibvec[i];
976	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
977	0		t_vio++;
978	0		carry = 1;
979			} else {
980	0		ivec[i] = -psize;
981			}
982			}
983			} /* End of counter-advance loop */
984	0	0	} while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
985
986			{
987	0		PDLA_Double *ac = acc;
988	0		PDLA_Double *wg = wgt;
989	0		PDLA_Double *wg2 = wgt2;
990	0		PDLA_Byte *dat = out->data;
991
992			/* Calculate output vector offset */
993	0	0	for(i=0;i
994	0		dat += out->dimincs[i] * ovec[i];
995
996	0	0	if(!flux) {
997			/* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
998	0	0	for(i=0; i < out->dims[ndims]; i++) {
999	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
1000	0		dat = (ac++) / *(wg++);
1001	0		wg2++;
1002			} else {
1003	0		*dat = badval;
1004	0		ac++; wg++; wg2++;
1005			}
1006	0		dat += out->dimincs[ndims];
1007			}
1008			} else {
1009			/* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
1010	0		PDLA_Double det = tmp[ndims*ndims];
1011	0	0	for(i=0; i < out->dims[ndims]; i++) {
1012	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
1013	0		dat = (ac++) / (wg++) det;
1014	0		wg2++;
1015			} else {
1016	0		*dat = badval;
1017	0		ac++; wg++; wg2++;
1018			}
1019	0		dat += out->dimincs[ndims];
1020			} /* end of for loop */
1021			} /* end of flux flag set conditional */
1022			} /* end of convenience block */
1023
1024			/* End of code for normal pixels */
1025			} else {
1026			/* The pixel was ludicrously huge -- just set this pixel to nan */
1027	0		PDLA_Byte *dat = out->data;
1028	0	0	for(i=0;i
1029	0		dat += out->dimincs[i] * ovec[i];
1030	0	0	for(i=0;idims[ndims];i++) {
1031	0		dat = badval; / Should handle bad values too -- not yet */
1032	0		dat += out->dimincs[ndims];
1033			}
1034			}
1035
1036			/* Increment the pixel counter */
1037			{
1038	0	0	for(i=0;
1039	0	0	(i
1040	0	0	(map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
1041	0		(++(ovec[i]) >= out->dims[i]); /* Actual carry test */
1042	0		i++) {
1043	0		ovec[i] = 0;
1044	0		map_ptr -= out->dims[i] * map->dimincs[i+1];
1045			}
1046			}
1047	0	0	} while(i
1048
1049
1050
1051			}
1052			PDLA_COMMENT("THREADLOOPEND")
1053			}
1054			}
1055	0		k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
1056	0	0	} while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_S: {
1057	0	0	PDLA_Short * k0_datap = ((PDLA_Short *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
1058	0		PDLA_Short * k0_physdatap = ((PDLA_Short *)((__privtrans->pdls[0])->data));
1059
1060
1061			PDLA_COMMENT("THREADLOOPBEGIN")
1062	0	0	if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
1063	0		do { register PDLA_Indx __tind1=0,__tind2=0;
1064	0		register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
1065	0		register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
1066	0		register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
1067	0		register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
1068	0		register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
1069	0		register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
1070	0		k0_datap += __offsp[0];
1071	0	0	for( __tind2 = 0 ;
1072			__tind2 < __tdims1 ;
1073	0		__tind2++
1074	0		,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
1075			)
1076			{
1077	0	0	for( __tind1 = 0 ;
1078			__tind1 < __tdims0 ;
1079	0		__tind1++
1080	0		,k0_datap += __tinc0_0
1081			)
1082			{ PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
1083
1084			/*
1085			* Pixel interpolation & averaging code
1086			*
1087			* Calls a common coordinate-transformation block (see following hdr)
1088			* that isn't dependent on the type of the input variable.
1089			*
1090			* The inputs are SVs to avoid hassling with threadloops; threading
1091			* is handled internally. To simplify the threading business, any
1092			* thread dimensions should all be collapsed to a single one by the
1093			* perl front-end.
1094			*
1095			*/
1096
1097			short ndims; /* Number of dimensions we're working in */
1098			PDLA_Double tmp; / Workspace for prefrobnication */
1099			PDLA_Indx ovec; / output pixel loop vector */
1100			PDLA_Indx ivec; / input pixel loop vector */
1101			PDLA_Indx ibvec; / input pixel base offset vector */
1102			PDLA_Double dvec; / Residual vector for linearization */
1103			PDLA_Double tvec; / Temporary floating-point vector */
1104			PDLA_Double acc; / Threaded accumulator */
1105			PDLA_Double wgt; / Threaded weight accumulator */
1106			PDLA_Double wgt2; / Threaded weight accumulator for badval finding */
1107			char bounds; / Boundary condition packed string */
1108			PDLA_Indx index_stash; / Stash to store the opening index of dim sample scans */
1109			char method; /* Method identifier (gets one of 'h','g') */
1110			PDLA_Long big; /* Max size of input footprint for each pix */
1111			PDLA_Double blur; /* Scaling of filter */
1112			PDLA_Double sv_min; /* minimum singular value */
1113			char flux; /* Flag to indicate flux conservation */
1114			PDLA_Double *map_ptr;
1115			PDLA_Long i, j;
1116	0	0	PDLA_Short badval = SvNV(__privtrans->bv);
1117			#define HANNING_LOOKUP_SIZE 2500
1118			static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
1119			static int needs_hanning_calc = 1;
1120			PDLA_Double zeta;
1121			PDLA_Double hanning_offset;
1122
1123			#define GAUSSIAN_LOOKUP_SIZE 4000
1124			#define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
1125			static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
1126			static int needs_gaussian_calc = 1;
1127
1128	0		pdl *in = PDLA->SvPDLAV(__privtrans->in);
1129	0		pdl *out = PDLA->SvPDLAV(__privtrans->out);
1130	0		pdl *map = PDLA->SvPDLAV(__privtrans->map);
1131
1132	0		PDLA->make_physical(in);
1133	0		PDLA->make_physical(out);
1134	0		PDLA->make_physical(map);
1135
1136	0		ndims = map->ndims -1;
1137
1138			/*
1139			* Allocate all our dynamic workspaces at once
1140			* */
1141	0		ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
1142	0		( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3ndims) + sizeof(PDLA_Double) in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndimsndims + ndims + sizeof(char) ndims + sizeof(PDLA_Indx) * ndims )
1143			)
1144			);
1145	0		ivec = &(ovec[ndims]);
1146	0		ibvec = &(ivec[ndims]);
1147	0		dvec = (PDLA_Double *)(&(ibvec[ndims]));
1148	0		tvec = &(dvec[ndims]);
1149	0		acc = &(tvec[ndims]);
1150	0		wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
1151	0		bounds = (char )(&(tmp [3ndims*ndims+ndims]));
1152	0		index_stash = (PDLA_Indx *) &(bounds[ndims]);
1153
1154
1155			/***
1156			* Fill in the boundary condition array
1157			*/
1158			{
1159			char *bstr;
1160			STRLEN blen;
1161	0	0	bstr = SvPV(__privtrans->boundary,blen);
1162
1163	0	0	if(blen == 0) {
1164			/* If no boundary is specified then every dim gets truncated */
1165			int i;
1166	0	0	for (i=0;i
1167	0		bounds[i] = 1;
1168			} else {
1169			int i;
1170	0	0	for(i=0;i
1171	0	0	switch(bstr[i < blen ? i : blen-1 ]) {
1172			case '0': case 'f': case 'F': /* forbid */
1173	0		bounds[i] = 0;
1174	0		break;
1175			case '1': case 't': case 'T': /* truncate */
1176	0		bounds[i] = 1;
1177	0		break;
1178			case '2': case 'e': case 'E': /* extend */
1179	0		bounds[i] = 2;
1180	0		break;
1181			case '3': case 'p': case 'P': /* periodic */
1182	0		bounds[i] = 3;
1183	0		break;
1184			case '4': case 'm': case 'M': /* mirror */
1185	0		bounds[i] = 4;
1186	0		break;
1187			default:
1188			{
1189			char buf[BUFSIZ];
1190	0		sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
1191	0		barf("%s", buf);
1192			}
1193	0		break;
1194			}
1195			}
1196			}
1197			}
1198
1199			/***
1200			* Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
1201			*/
1202	0	0	big = labs((PDLA_Long) (SvNV(__privtrans->big)));
1203	0	0	if(big <= 0)
1204	0		barf("%s","map: 'big' parameter must be >0");
1205
1206	0	0	blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
1207	0	0	if(blur < 0)
1208	0		barf("%s","map: 'blur' parameter must be >= 0");
1209
1210	0	0	sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
1211	0	0	if(sv_min < 0)
1212	0		barf("%s","map: 'sv_min' parameter must be >= 0");
1213
1214	0	0	flux = (SvNV(__privtrans->flux) != 0);
1215
1216			{
1217			char *mstr;
1218			STRLEN mlen;
1219	0	0	mstr = SvPV(__privtrans->method,mlen);
1220
1221	0	0	if(mlen==0)
1222	0		method = 'h';
1223	0		else switch(*mstr) {
1224	0		case 'H': method='H'; break;
1225	0		case 'h': method = 'h';
1226	0	0	if( needs_hanning_calc ) {
1227			int i;
1228	0	0	for(i=0;i
1229	0		hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
1230			}
1231	0		hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
1232	0		hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
1233	0		needs_hanning_calc = 0;
1234			}
1235	0		zeta = HANNING_LOOKUP_SIZE / blur;
1236	0		hanning_offset = (blur >= 1) ?
1237	0	0	0 :
1238	0		0.5 * (1.0 - blur);
1239	0		break;
1240
1241	0		case 'g': case 'j': method = 'g';
1242	0		zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
1243
1244	0	0	if( needs_gaussian_calc ) {
1245			int i;
1246	0	0	for(i=0;i
1247	0		gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
1248			}
1249	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
1250	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
1251	0		needs_gaussian_calc = 0;
1252			}
1253	0		break;
1254
1255	0		case 'G': case 'J': method = 'G'; break;
1256			default:
1257			{
1258			char err[80];
1259	0		sprintf(err,"Bug in map: unknown method '%c'",*mstr);
1260	0		barf("%s", err);
1261			}
1262	0		break;
1263			}
1264			}
1265
1266
1267
1268			/* End of initialization */
1269			/*************************************************************/
1270			/* Start of Real Work */
1271
1272			/* Initialize coordinate vector and map offset
1273			*/
1274	0	0	for(i=0;i
1275	0		ovec[i] = 0;
1276
1277	0		map_ptr = (PDLA_Double *)(map->data);
1278
1279
1280			/* Main pixel loop (iterates over pixels in the output plane) */
1281			do {
1282			PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
1283			/* Prefrobnicate the transformation matrix */
1284	0		psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
1285
1286			#ifdef DEBUG_MAP
1287			{
1288			int k; PDLA_Indx foo = 0;
1289			printf("ovec: [");
1290			for(k=0;k
1291			foo += ovec[k] * map->dimincs[k+1];
1292			printf(" %2d ",(int)(ovec[k]));
1293			}
1294			printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
1295			for(k=0;k
1296			printf("%8.2f",(double)(((PDLA_Short )(map->data))[foo + kmap->dimincs[0]]));
1297			}
1298			printf("]\n");
1299			}
1300			#endif
1301
1302			/* Don't bother accumulating output if psize is too large */
1303	0	0	if(psize <= big) {
1304			/* Use the prefrobnicated matrix to generate a local linearization.
1305			* dvec gets the delta; ibvec gets the base.
1306			*/
1307			{
1308	0		PDLA_Double *mp = map_ptr;
1309	0	0	for (i=0;i
1310	0		dvec[i] = mp - ( ibvec[i] = (PDLA_Long)(mp + 0.5)); /* assignment */
1311	0		mp += map->dimincs[0];
1312			}
1313			}
1314
1315			/* Initialize input delta vector */
1316	0	0	for(i=0;i
1317	0		ivec[i] = -psize;
1318
1319			/* Initialize accumulators */
1320			{
1321	0		PDLA_Double *ac = acc;
1322	0	0	for(i=0; i < in->dims[ndims]; i++)
1323	0		*(ac++) = 0.0;
1324
1325			}
1326			{
1327	0		PDLA_Double *wg = wgt;
1328	0	0	for(i=0;i < in->dims[ndims]; i++)
1329	0		*(wg++) = 0.0;
1330			}
1331			{
1332	0		PDLA_Double *wg = wgt2;
1333	0	0	for(i=0;i < in->dims[ndims]; i++)
1334	0		*(wg++) = 0.0;
1335			}
1336
1337
1338			/*
1339			* Calculate the original offset into the data array, to enable
1340			* delta calculations in the pixel loop
1341			*
1342			* i runs over dims; j holds the working integer index in the
1343			* current dim.
1344			*
1345			* This code matches the incrementation code at the bottom of the accumulation loop
1346			*/
1347
1348	0		t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
1349	0		i_off = 0;
1350	0	0	for(i=0;i
1351	0		j = ivec[i] + ibvec[i];
1352	0	0	if(j<0 \|\| j >= in->dims[i]) {
		0
1353	0		switch(bounds[i]) {
1354			case 0: /* no breakage allowed */
1355	0		barf("%s","index out-of-bounds in map");
1356	0		break;
1357			case 1: /* truncation */
1358	0		t_vio++;
1359			/* fall through */
1360			case 2: /* extension -- crop */
1361	0	0	if(j<0)
1362	0		j=0;
1363	0		else j = in->dims[i] - 1;
1364	0		break;
1365			case 3: /* periodic -- mod it */
1366	0		j %= in->dims[i];
1367	0	0	if(j<0)
1368	0		j += in->dims[i];
1369	0		break;
1370			case 4: /* mirror -- reflect off the edges */
1371	0		j += in->dims[i];
1372	0		j %= (in->dims[i]*2);
1373	0	0	if(j<0)
1374	0		j += in->dims[i]*2;
1375	0		j -= in->dims[i];
1376	0	0	if(j<0) {
1377	0		j *= -1;
1378	0		j -= 1;
1379			}
1380	0		break;
1381			default:
1382	0		barf("%s", "Unknown boundary condition in map -- bug alert!");
1383	0		break;
1384			}
1385			}
1386	0		i_off += in->dimincs[i] * j;
1387			}
1388
1389			/* Initialize index stashes for later reference as we scan the footprint */
1390			/* It's a pain in the ass to deal with boundaries, and doubly so at the */
1391			/* end of a dimensional scan. So we stash the index location at the */
1392			/* start of each dimensional scan here. When we finish incrementing */
1393			/* through a particular dim, we pull its value back out of the stash. */
1394	0	0	for(i=0;i
1395	0		index_stash[i] = i_off;
1396			}
1397
1398			/* The input accumulation loop is the hotspot for the entire operation. */
1399			/* We loop over pixels in the region of interest (+/- psize in each dimension) */
1400			/* in the input array, use the linearized transform to bring each pixel center */
1401			/* forward to the output plane, and calculate a weighting based on the chosen */
1402			/* filter function. 'h' is a fast Hanning window rolloff using a lookup */
1403			/* table that is initialized the first time through the code. 'H' is the */
1404			/* same process, but explicitly calculated for each interation (~2x slower). */
1405			/* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
1406			/* into the input array fresh from the current input array vector each time, */
1407			/* we walk through the array using dimincs and the old offset. This saves */
1408			/* about half of the time spent on index calculation. */
1409
1410			do { /* Input accumulation loop */
1411			PDLA_Double *cp;
1412			PDLA_Double alpha;
1413			/* Calculate the weight of the current input point. Don't bother if we're
1414			* violating any truncation boundaries (in that case our value is zero, but
1415			* for the interpolation we also set the weight to zero).
1416			*/
1417	0	0	if( !t_vio ) {
1418
1419	0		PDLA_Double *ap = tvec;
1420	0		PDLA_Double *bp = dvec;
1421	0		PDLA_Indx *ip = ivec;
1422	0	0	for(i=0; i
1423	0		(ap++) = (ip++) - *(bp++);
1424
1425	0		switch(method) {
1426			PDLA_Double dd;
1427			case 'h':
1428			/* This is the Hanning window rolloff. It is a product of a simple */
1429			/* cos^2(theta) rolloff in each dimension. Using a lookup table */
1430			/* is about 2x faster than using cos(theta) directly in each */
1431			/* weighting calculation, so we do. Using 2500 entries and linear */
1432			/* interpolation is accurate to about 10^-7, and should preserve */
1433			/* the contents of cache pretty well. */
1434	0		alpha = 1;
1435	0		cp = tmp;
1436	0	0	for(i=0; i
1437			int lodex;
1438			int hidex;
1439			PDLA_Double beta;
1440	0		dd = 0;
1441	0		ap = tvec;
1442			/* Get the matrix-multiply element for this dimension */
1443	0	0	for(j=0;j
1444	0		dd += (ap++) *(cp++);
1445
1446			/* Do linear interpolation from the table */
1447			/* The table captures a hanning window centered 0.5 pixel from center. */
1448			/* We scale the filter by the blur parameter -- but if blur is less */
1449			/* than unity, we shrink the hanning blur window while keeping the 0.5 */
1450			/* value on the pixel edge at 0.5. For blur greater than unity, we */
1451			/* scale simply. */
1452	0		beta = fabs(dd) - hanning_offset;
1453	0	0	if(beta > 0) {
1454	0	0	if(beta >= blur) {
1455	0		alpha = 0;
1456	0		i = ndims;
1457			} else {
1458	0		beta *= zeta;
1459	0		lodex = beta;
1460	0	0	beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
1461	0		lodex = HANNING_LOOKUP_SIZE;
1462	0		hidex = lodex+1;
1463	0		alpha = hanning_lookup[hidex]beta + hanning_lookup[lodex]*(1-beta);
1464			} /* end of interpolation branch */
1465			} /* end of beta > 0 branch */
1466			} /* end of dimension loop */
1467	0		break;
1468
1469			case 'H':
1470			/* This is the Hanning window rolloff with explicit calculation, preserved */
1471			/* in case someone actually wants the slower longer method. */
1472	0		alpha = 1;
1473	0		cp = tmp;
1474	0	0	for(i=0; i
1475	0		dd = 0;
1476	0		ap = tvec;
1477	0	0	for(j=0;j
1478	0		dd += (ap++) *(cp++);
1479	0		dd = (fabs(dd) - hanning_offset) / blur;
1480	0	0	if( dd > 1 ) {
1481	0		alpha = 0;
1482	0		i = ndims;
1483			} else
1484	0		alpha = (0.5 + 0.5 cos( dd * 3.1415926536 ));
1485			}
1486	0		break;
1487
1488			case 'g':
1489			/* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
1490			{
1491	0		PDLA_Double sum = 0;
1492	0		cp = tmp;
1493	0	0	for(i=0; i
1494	0		dd = 0;
1495	0		ap = tvec;
1496	0	0	for(j=0;j
1497	0		dd += (ap++) *(cp++);
1498	0		dd /= blur;
1499	0		sum += dd * dd;
1500	0	0	if(sum > GAUSSIAN_MAXVAL) {
1501	0		i = ndims; /* exit early if we're too far out */
1502	0		alpha = 0;
1503			}
1504			}
1505	0	0	if( sum > GAUSSIAN_MAXVAL \|\| !isfinite(sum) \|\| isnan(sum) ) {
		0
		0
1506	0		alpha = 0;
1507			} else {
1508			int lodex,hidex;
1509	0		PDLA_Double beta = fabs(zeta * sum);
1510
1511	0		lodex = beta;
1512	0		beta -= lodex; hidex = lodex+1;
1513	0		alpha = gaussian_lookup[hidex]beta + gaussian_lookup[lodex](1 - beta);
1514
1515			}
1516			}
1517	0		break;
1518
1519			case 'G':
1520			/* This is the Gaussian rolloff with explicit calculation, preserved */
1521			/* in case someone actually wants the slower longer method. */
1522			{
1523	0		PDLA_Double sum = 0;
1524	0		cp = tmp;
1525	0	0	for(i=0; i
1526	0		dd = 0;
1527	0		ap = tvec;
1528	0	0	for(j=0;j
1529	0		dd += (ap++) *(cp++);
1530	0		dd /= blur;
1531	0		sum += dd * dd;
1532	0	0	if(sum > 4) /* 2 pixels -- four half-widths */
1533	0		i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
1534			}
1535
1536	0	0	if(sum > GAUSSIAN_MAXVAL)
1537	0		alpha = 0;
1538			else
1539	0		alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
1540			}
1541	0		break;
1542			default:
1543			{
1544			char buf[80];
1545	0		sprintf(buf,"This can't happen: method='%c'",method);
1546	0		barf("%s", buf);
1547			}
1548			}
1549
1550			{ /* convenience block -- accumulate the current point into the weighted sum. */
1551			/* This is more than simple assignment because we have our own explicit poor */
1552			/* man's threadloop here, so we accumulate each threaded element separately. */
1553	0		PDLA_Short dat = ((PDLA_Short )(in->data)) + i_off;
1554	0		PDLA_Indx max = out->dims[ndims];
1555	0	0	for( i=0; i < max; i++ ) {
1556	0	0	if( (badval==0) \|\| (*dat != badval) ) {
		0
1557	0		acc[i] += dat alpha;
1558	0		dat += in->dimincs[ndims];
1559	0		wgt[i] += alpha;
1560			}
1561	0		wgt2[i] += alpha; }
1562			}
1563			} /* end of t_vio check (i.e. of input accumulation) */
1564
1565
1566			/* Advance input accumulation loop. */
1567			/* We both increment the total vector and also advance the index. */
1568	0		carry = 1;
1569	0	0	for(i=0; i
		0
1570			/* Advance the current element of the offset vector */
1571	0		ivec[i]++;
1572	0		j = ivec[i] + ibvec[i];
1573
1574			/* Advance the offset into the data array */
1575	0	0	if( j > 0 && j <= in->dims[i]-1 ) {
		0
1576			/* Normal case -- just advance the input vector */
1577	0		i_off += in->dimincs[i];
1578			} else {
1579			/* Busted a boundary - either before or after. */
1580	0		switch(bounds[i]){
1581			case 0: /* no breakage allowed -- treat as truncation for interpolation */
1582			case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
1583	0	0	if( j == 0 )
1584	0		t_vio--;
1585	0	0	else if( j == in->dims[i] )
1586	0		t_vio++;
1587	0		break;
1588			case 2: /* extension -- do nothing (so the same input point is re-used) */
1589	0		break;
1590			case 3: /* periodic -- advance and mod into the allowed range */
1591	0	0	if((j % in->dims[i]) == 0) {
1592	0		i_off -= in->dimincs[i] * (in->dims[i]-1);
1593			} else {
1594	0		i_off += in->dimincs[i];
1595			}
1596	0		break;
1597			case 4: /* mirror -- advance or retreat depending on phase */
1598	0		j += in->dims[i];
1599	0		j %= (in->dims[i]*2);
1600	0		j -= in->dims[i];
1601	0	0	if( j!=0 && j!= -in->dims[i] ) {
		0
1602	0	0	if(j<0)
1603	0		i_off -= in->dimincs[i];
1604			else
1605	0		i_off += in->dimincs[i];
1606			}
1607	0		break;
1608			}
1609			}
1610
1611			/* Now check for carry */
1612	0	0	if(ivec[i] <= psize) {
1613			/* Normal case -- copy the current offset to the faster-running dim stashes */
1614			int k;
1615	0	0	for(k=0;k
1616	0		index_stash[k] = i_off;
1617			}
1618	0		carry = 0;
1619
1620			} else { /* End of this scan -- recover the last position, and mark carry */
1621	0		i_off = index_stash[i];
1622	0	0	if(bounds[i]==1) {
1623	0		j = ivec[i] + ibvec[i];
1624	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
1625	0		t_vio--;
1626	0		ivec[i] = -psize;
1627	0		j = ivec[i] + ibvec[i];
1628	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
1629	0		t_vio++;
1630	0		carry = 1;
1631			} else {
1632	0		ivec[i] = -psize;
1633			}
1634			}
1635			} /* End of counter-advance loop */
1636	0	0	} while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
1637
1638			{
1639	0		PDLA_Double *ac = acc;
1640	0		PDLA_Double *wg = wgt;
1641	0		PDLA_Double *wg2 = wgt2;
1642	0		PDLA_Short *dat = out->data;
1643
1644			/* Calculate output vector offset */
1645	0	0	for(i=0;i
1646	0		dat += out->dimincs[i] * ovec[i];
1647
1648	0	0	if(!flux) {
1649			/* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
1650	0	0	for(i=0; i < out->dims[ndims]; i++) {
1651	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
1652	0		dat = (ac++) / *(wg++);
1653	0		wg2++;
1654			} else {
1655	0		*dat = badval;
1656	0		ac++; wg++; wg2++;
1657			}
1658	0		dat += out->dimincs[ndims];
1659			}
1660			} else {
1661			/* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
1662	0		PDLA_Double det = tmp[ndims*ndims];
1663	0	0	for(i=0; i < out->dims[ndims]; i++) {
1664	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
1665	0		dat = (ac++) / (wg++) det;
1666	0		wg2++;
1667			} else {
1668	0		*dat = badval;
1669	0		ac++; wg++; wg2++;
1670			}
1671	0		dat += out->dimincs[ndims];
1672			} /* end of for loop */
1673			} /* end of flux flag set conditional */
1674			} /* end of convenience block */
1675
1676			/* End of code for normal pixels */
1677			} else {
1678			/* The pixel was ludicrously huge -- just set this pixel to nan */
1679	0		PDLA_Short *dat = out->data;
1680	0	0	for(i=0;i
1681	0		dat += out->dimincs[i] * ovec[i];
1682	0	0	for(i=0;idims[ndims];i++) {
1683	0		dat = badval; / Should handle bad values too -- not yet */
1684	0		dat += out->dimincs[ndims];
1685			}
1686			}
1687
1688			/* Increment the pixel counter */
1689			{
1690	0	0	for(i=0;
1691	0	0	(i
1692	0	0	(map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
1693	0		(++(ovec[i]) >= out->dims[i]); /* Actual carry test */
1694	0		i++) {
1695	0		ovec[i] = 0;
1696	0		map_ptr -= out->dims[i] * map->dimincs[i+1];
1697			}
1698			}
1699	0	0	} while(i
1700
1701
1702
1703			}
1704			PDLA_COMMENT("THREADLOOPEND")
1705			}
1706			}
1707	0		k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
1708	0	0	} while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_US: {
1709	0	0	PDLA_Ushort * k0_datap = ((PDLA_Ushort *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
1710	0		PDLA_Ushort * k0_physdatap = ((PDLA_Ushort *)((__privtrans->pdls[0])->data));
1711
1712
1713			PDLA_COMMENT("THREADLOOPBEGIN")
1714	0	0	if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
1715	0		do { register PDLA_Indx __tind1=0,__tind2=0;
1716	0		register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
1717	0		register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
1718	0		register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
1719	0		register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
1720	0		register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
1721	0		register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
1722	0		k0_datap += __offsp[0];
1723	0	0	for( __tind2 = 0 ;
1724			__tind2 < __tdims1 ;
1725	0		__tind2++
1726	0		,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
1727			)
1728			{
1729	0	0	for( __tind1 = 0 ;
1730			__tind1 < __tdims0 ;
1731	0		__tind1++
1732	0		,k0_datap += __tinc0_0
1733			)
1734			{ PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
1735
1736			/*
1737			* Pixel interpolation & averaging code
1738			*
1739			* Calls a common coordinate-transformation block (see following hdr)
1740			* that isn't dependent on the type of the input variable.
1741			*
1742			* The inputs are SVs to avoid hassling with threadloops; threading
1743			* is handled internally. To simplify the threading business, any
1744			* thread dimensions should all be collapsed to a single one by the
1745			* perl front-end.
1746			*
1747			*/
1748
1749			short ndims; /* Number of dimensions we're working in */
1750			PDLA_Double tmp; / Workspace for prefrobnication */
1751			PDLA_Indx ovec; / output pixel loop vector */
1752			PDLA_Indx ivec; / input pixel loop vector */
1753			PDLA_Indx ibvec; / input pixel base offset vector */
1754			PDLA_Double dvec; / Residual vector for linearization */
1755			PDLA_Double tvec; / Temporary floating-point vector */
1756			PDLA_Double acc; / Threaded accumulator */
1757			PDLA_Double wgt; / Threaded weight accumulator */
1758			PDLA_Double wgt2; / Threaded weight accumulator for badval finding */
1759			char bounds; / Boundary condition packed string */
1760			PDLA_Indx index_stash; / Stash to store the opening index of dim sample scans */
1761			char method; /* Method identifier (gets one of 'h','g') */
1762			PDLA_Long big; /* Max size of input footprint for each pix */
1763			PDLA_Double blur; /* Scaling of filter */
1764			PDLA_Double sv_min; /* minimum singular value */
1765			char flux; /* Flag to indicate flux conservation */
1766			PDLA_Double *map_ptr;
1767			PDLA_Long i, j;
1768	0	0	PDLA_Ushort badval = SvNV(__privtrans->bv);
1769			#define HANNING_LOOKUP_SIZE 2500
1770			static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
1771			static int needs_hanning_calc = 1;
1772			PDLA_Double zeta;
1773			PDLA_Double hanning_offset;
1774
1775			#define GAUSSIAN_LOOKUP_SIZE 4000
1776			#define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
1777			static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
1778			static int needs_gaussian_calc = 1;
1779
1780	0		pdl *in = PDLA->SvPDLAV(__privtrans->in);
1781	0		pdl *out = PDLA->SvPDLAV(__privtrans->out);
1782	0		pdl *map = PDLA->SvPDLAV(__privtrans->map);
1783
1784	0		PDLA->make_physical(in);
1785	0		PDLA->make_physical(out);
1786	0		PDLA->make_physical(map);
1787
1788	0		ndims = map->ndims -1;
1789
1790			/*
1791			* Allocate all our dynamic workspaces at once
1792			* */
1793	0		ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
1794	0		( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3ndims) + sizeof(PDLA_Double) in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndimsndims + ndims + sizeof(char) ndims + sizeof(PDLA_Indx) * ndims )
1795			)
1796			);
1797	0		ivec = &(ovec[ndims]);
1798	0		ibvec = &(ivec[ndims]);
1799	0		dvec = (PDLA_Double *)(&(ibvec[ndims]));
1800	0		tvec = &(dvec[ndims]);
1801	0		acc = &(tvec[ndims]);
1802	0		wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
1803	0		bounds = (char )(&(tmp [3ndims*ndims+ndims]));
1804	0		index_stash = (PDLA_Indx *) &(bounds[ndims]);
1805
1806
1807			/***
1808			* Fill in the boundary condition array
1809			*/
1810			{
1811			char *bstr;
1812			STRLEN blen;
1813	0	0	bstr = SvPV(__privtrans->boundary,blen);
1814
1815	0	0	if(blen == 0) {
1816			/* If no boundary is specified then every dim gets truncated */
1817			int i;
1818	0	0	for (i=0;i
1819	0		bounds[i] = 1;
1820			} else {
1821			int i;
1822	0	0	for(i=0;i
1823	0	0	switch(bstr[i < blen ? i : blen-1 ]) {
1824			case '0': case 'f': case 'F': /* forbid */
1825	0		bounds[i] = 0;
1826	0		break;
1827			case '1': case 't': case 'T': /* truncate */
1828	0		bounds[i] = 1;
1829	0		break;
1830			case '2': case 'e': case 'E': /* extend */
1831	0		bounds[i] = 2;
1832	0		break;
1833			case '3': case 'p': case 'P': /* periodic */
1834	0		bounds[i] = 3;
1835	0		break;
1836			case '4': case 'm': case 'M': /* mirror */
1837	0		bounds[i] = 4;
1838	0		break;
1839			default:
1840			{
1841			char buf[BUFSIZ];
1842	0		sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
1843	0		barf("%s", buf);
1844			}
1845	0		break;
1846			}
1847			}
1848			}
1849			}
1850
1851			/***
1852			* Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
1853			*/
1854	0	0	big = labs((PDLA_Long) (SvNV(__privtrans->big)));
1855	0	0	if(big <= 0)
1856	0		barf("%s","map: 'big' parameter must be >0");
1857
1858	0	0	blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
1859	0	0	if(blur < 0)
1860	0		barf("%s","map: 'blur' parameter must be >= 0");
1861
1862	0	0	sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
1863	0	0	if(sv_min < 0)
1864	0		barf("%s","map: 'sv_min' parameter must be >= 0");
1865
1866	0	0	flux = (SvNV(__privtrans->flux) != 0);
1867
1868			{
1869			char *mstr;
1870			STRLEN mlen;
1871	0	0	mstr = SvPV(__privtrans->method,mlen);
1872
1873	0	0	if(mlen==0)
1874	0		method = 'h';
1875	0		else switch(*mstr) {
1876	0		case 'H': method='H'; break;
1877	0		case 'h': method = 'h';
1878	0	0	if( needs_hanning_calc ) {
1879			int i;
1880	0	0	for(i=0;i
1881	0		hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
1882			}
1883	0		hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
1884	0		hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
1885	0		needs_hanning_calc = 0;
1886			}
1887	0		zeta = HANNING_LOOKUP_SIZE / blur;
1888	0		hanning_offset = (blur >= 1) ?
1889	0	0	0 :
1890	0		0.5 * (1.0 - blur);
1891	0		break;
1892
1893	0		case 'g': case 'j': method = 'g';
1894	0		zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
1895
1896	0	0	if( needs_gaussian_calc ) {
1897			int i;
1898	0	0	for(i=0;i
1899	0		gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
1900			}
1901	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
1902	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
1903	0		needs_gaussian_calc = 0;
1904			}
1905	0		break;
1906
1907	0		case 'G': case 'J': method = 'G'; break;
1908			default:
1909			{
1910			char err[80];
1911	0		sprintf(err,"Bug in map: unknown method '%c'",*mstr);
1912	0		barf("%s", err);
1913			}
1914	0		break;
1915			}
1916			}
1917
1918
1919
1920			/* End of initialization */
1921			/*************************************************************/
1922			/* Start of Real Work */
1923
1924			/* Initialize coordinate vector and map offset
1925			*/
1926	0	0	for(i=0;i
1927	0		ovec[i] = 0;
1928
1929	0		map_ptr = (PDLA_Double *)(map->data);
1930
1931
1932			/* Main pixel loop (iterates over pixels in the output plane) */
1933			do {
1934			PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
1935			/* Prefrobnicate the transformation matrix */
1936	0		psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
1937
1938			#ifdef DEBUG_MAP
1939			{
1940			int k; PDLA_Indx foo = 0;
1941			printf("ovec: [");
1942			for(k=0;k
1943			foo += ovec[k] * map->dimincs[k+1];
1944			printf(" %2d ",(int)(ovec[k]));
1945			}
1946			printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
1947			for(k=0;k
1948			printf("%8.2f",(double)(((PDLA_Ushort )(map->data))[foo + kmap->dimincs[0]]));
1949			}
1950			printf("]\n");
1951			}
1952			#endif
1953
1954			/* Don't bother accumulating output if psize is too large */
1955	0	0	if(psize <= big) {
1956			/* Use the prefrobnicated matrix to generate a local linearization.
1957			* dvec gets the delta; ibvec gets the base.
1958			*/
1959			{
1960	0		PDLA_Double *mp = map_ptr;
1961	0	0	for (i=0;i
1962	0		dvec[i] = mp - ( ibvec[i] = (PDLA_Long)(mp + 0.5)); /* assignment */
1963	0		mp += map->dimincs[0];
1964			}
1965			}
1966
1967			/* Initialize input delta vector */
1968	0	0	for(i=0;i
1969	0		ivec[i] = -psize;
1970
1971			/* Initialize accumulators */
1972			{
1973	0		PDLA_Double *ac = acc;
1974	0	0	for(i=0; i < in->dims[ndims]; i++)
1975	0		*(ac++) = 0.0;
1976
1977			}
1978			{
1979	0		PDLA_Double *wg = wgt;
1980	0	0	for(i=0;i < in->dims[ndims]; i++)
1981	0		*(wg++) = 0.0;
1982			}
1983			{
1984	0		PDLA_Double *wg = wgt2;
1985	0	0	for(i=0;i < in->dims[ndims]; i++)
1986	0		*(wg++) = 0.0;
1987			}
1988
1989
1990			/*
1991			* Calculate the original offset into the data array, to enable
1992			* delta calculations in the pixel loop
1993			*
1994			* i runs over dims; j holds the working integer index in the
1995			* current dim.
1996			*
1997			* This code matches the incrementation code at the bottom of the accumulation loop
1998			*/
1999
2000	0		t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
2001	0		i_off = 0;
2002	0	0	for(i=0;i
2003	0		j = ivec[i] + ibvec[i];
2004	0	0	if(j<0 \|\| j >= in->dims[i]) {
		0
2005	0		switch(bounds[i]) {
2006			case 0: /* no breakage allowed */
2007	0		barf("%s","index out-of-bounds in map");
2008	0		break;
2009			case 1: /* truncation */
2010	0		t_vio++;
2011			/* fall through */
2012			case 2: /* extension -- crop */
2013	0	0	if(j<0)
2014	0		j=0;
2015	0		else j = in->dims[i] - 1;
2016	0		break;
2017			case 3: /* periodic -- mod it */
2018	0		j %= in->dims[i];
2019	0	0	if(j<0)
2020	0		j += in->dims[i];
2021	0		break;
2022			case 4: /* mirror -- reflect off the edges */
2023	0		j += in->dims[i];
2024	0		j %= (in->dims[i]*2);
2025	0	0	if(j<0)
2026	0		j += in->dims[i]*2;
2027	0		j -= in->dims[i];
2028	0	0	if(j<0) {
2029	0		j *= -1;
2030	0		j -= 1;
2031			}
2032	0		break;
2033			default:
2034	0		barf("%s", "Unknown boundary condition in map -- bug alert!");
2035	0		break;
2036			}
2037			}
2038	0		i_off += in->dimincs[i] * j;
2039			}
2040
2041			/* Initialize index stashes for later reference as we scan the footprint */
2042			/* It's a pain in the ass to deal with boundaries, and doubly so at the */
2043			/* end of a dimensional scan. So we stash the index location at the */
2044			/* start of each dimensional scan here. When we finish incrementing */
2045			/* through a particular dim, we pull its value back out of the stash. */
2046	0	0	for(i=0;i
2047	0		index_stash[i] = i_off;
2048			}
2049
2050			/* The input accumulation loop is the hotspot for the entire operation. */
2051			/* We loop over pixels in the region of interest (+/- psize in each dimension) */
2052			/* in the input array, use the linearized transform to bring each pixel center */
2053			/* forward to the output plane, and calculate a weighting based on the chosen */
2054			/* filter function. 'h' is a fast Hanning window rolloff using a lookup */
2055			/* table that is initialized the first time through the code. 'H' is the */
2056			/* same process, but explicitly calculated for each interation (~2x slower). */
2057			/* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
2058			/* into the input array fresh from the current input array vector each time, */
2059			/* we walk through the array using dimincs and the old offset. This saves */
2060			/* about half of the time spent on index calculation. */
2061
2062			do { /* Input accumulation loop */
2063			PDLA_Double *cp;
2064			PDLA_Double alpha;
2065			/* Calculate the weight of the current input point. Don't bother if we're
2066			* violating any truncation boundaries (in that case our value is zero, but
2067			* for the interpolation we also set the weight to zero).
2068			*/
2069	0	0	if( !t_vio ) {
2070
2071	0		PDLA_Double *ap = tvec;
2072	0		PDLA_Double *bp = dvec;
2073	0		PDLA_Indx *ip = ivec;
2074	0	0	for(i=0; i
2075	0		(ap++) = (ip++) - *(bp++);
2076
2077	0		switch(method) {
2078			PDLA_Double dd;
2079			case 'h':
2080			/* This is the Hanning window rolloff. It is a product of a simple */
2081			/* cos^2(theta) rolloff in each dimension. Using a lookup table */
2082			/* is about 2x faster than using cos(theta) directly in each */
2083			/* weighting calculation, so we do. Using 2500 entries and linear */
2084			/* interpolation is accurate to about 10^-7, and should preserve */
2085			/* the contents of cache pretty well. */
2086	0		alpha = 1;
2087	0		cp = tmp;
2088	0	0	for(i=0; i
2089			int lodex;
2090			int hidex;
2091			PDLA_Double beta;
2092	0		dd = 0;
2093	0		ap = tvec;
2094			/* Get the matrix-multiply element for this dimension */
2095	0	0	for(j=0;j
2096	0		dd += (ap++) *(cp++);
2097
2098			/* Do linear interpolation from the table */
2099			/* The table captures a hanning window centered 0.5 pixel from center. */
2100			/* We scale the filter by the blur parameter -- but if blur is less */
2101			/* than unity, we shrink the hanning blur window while keeping the 0.5 */
2102			/* value on the pixel edge at 0.5. For blur greater than unity, we */
2103			/* scale simply. */
2104	0		beta = fabs(dd) - hanning_offset;
2105	0	0	if(beta > 0) {
2106	0	0	if(beta >= blur) {
2107	0		alpha = 0;
2108	0		i = ndims;
2109			} else {
2110	0		beta *= zeta;
2111	0		lodex = beta;
2112	0	0	beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
2113	0		lodex = HANNING_LOOKUP_SIZE;
2114	0		hidex = lodex+1;
2115	0		alpha = hanning_lookup[hidex]beta + hanning_lookup[lodex]*(1-beta);
2116			} /* end of interpolation branch */
2117			} /* end of beta > 0 branch */
2118			} /* end of dimension loop */
2119	0		break;
2120
2121			case 'H':
2122			/* This is the Hanning window rolloff with explicit calculation, preserved */
2123			/* in case someone actually wants the slower longer method. */
2124	0		alpha = 1;
2125	0		cp = tmp;
2126	0	0	for(i=0; i
2127	0		dd = 0;
2128	0		ap = tvec;
2129	0	0	for(j=0;j
2130	0		dd += (ap++) *(cp++);
2131	0		dd = (fabs(dd) - hanning_offset) / blur;
2132	0	0	if( dd > 1 ) {
2133	0		alpha = 0;
2134	0		i = ndims;
2135			} else
2136	0		alpha = (0.5 + 0.5 cos( dd * 3.1415926536 ));
2137			}
2138	0		break;
2139
2140			case 'g':
2141			/* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
2142			{
2143	0		PDLA_Double sum = 0;
2144	0		cp = tmp;
2145	0	0	for(i=0; i
2146	0		dd = 0;
2147	0		ap = tvec;
2148	0	0	for(j=0;j
2149	0		dd += (ap++) *(cp++);
2150	0		dd /= blur;
2151	0		sum += dd * dd;
2152	0	0	if(sum > GAUSSIAN_MAXVAL) {
2153	0		i = ndims; /* exit early if we're too far out */
2154	0		alpha = 0;
2155			}
2156			}
2157	0	0	if( sum > GAUSSIAN_MAXVAL \|\| !isfinite(sum) \|\| isnan(sum) ) {
		0
		0
2158	0		alpha = 0;
2159			} else {
2160			int lodex,hidex;
2161	0		PDLA_Double beta = fabs(zeta * sum);
2162
2163	0		lodex = beta;
2164	0		beta -= lodex; hidex = lodex+1;
2165	0		alpha = gaussian_lookup[hidex]beta + gaussian_lookup[lodex](1 - beta);
2166
2167			}
2168			}
2169	0		break;
2170
2171			case 'G':
2172			/* This is the Gaussian rolloff with explicit calculation, preserved */
2173			/* in case someone actually wants the slower longer method. */
2174			{
2175	0		PDLA_Double sum = 0;
2176	0		cp = tmp;
2177	0	0	for(i=0; i
2178	0		dd = 0;
2179	0		ap = tvec;
2180	0	0	for(j=0;j
2181	0		dd += (ap++) *(cp++);
2182	0		dd /= blur;
2183	0		sum += dd * dd;
2184	0	0	if(sum > 4) /* 2 pixels -- four half-widths */
2185	0		i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
2186			}
2187
2188	0	0	if(sum > GAUSSIAN_MAXVAL)
2189	0		alpha = 0;
2190			else
2191	0		alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
2192			}
2193	0		break;
2194			default:
2195			{
2196			char buf[80];
2197	0		sprintf(buf,"This can't happen: method='%c'",method);
2198	0		barf("%s", buf);
2199			}
2200			}
2201
2202			{ /* convenience block -- accumulate the current point into the weighted sum. */
2203			/* This is more than simple assignment because we have our own explicit poor */
2204			/* man's threadloop here, so we accumulate each threaded element separately. */
2205	0		PDLA_Ushort dat = ((PDLA_Ushort )(in->data)) + i_off;
2206	0		PDLA_Indx max = out->dims[ndims];
2207	0	0	for( i=0; i < max; i++ ) {
2208	0	0	if( (badval==0) \|\| (*dat != badval) ) {
		0
2209	0		acc[i] += dat alpha;
2210	0		dat += in->dimincs[ndims];
2211	0		wgt[i] += alpha;
2212			}
2213	0		wgt2[i] += alpha; }
2214			}
2215			} /* end of t_vio check (i.e. of input accumulation) */
2216
2217
2218			/* Advance input accumulation loop. */
2219			/* We both increment the total vector and also advance the index. */
2220	0		carry = 1;
2221	0	0	for(i=0; i
		0
2222			/* Advance the current element of the offset vector */
2223	0		ivec[i]++;
2224	0		j = ivec[i] + ibvec[i];
2225
2226			/* Advance the offset into the data array */
2227	0	0	if( j > 0 && j <= in->dims[i]-1 ) {
		0
2228			/* Normal case -- just advance the input vector */
2229	0		i_off += in->dimincs[i];
2230			} else {
2231			/* Busted a boundary - either before or after. */
2232	0		switch(bounds[i]){
2233			case 0: /* no breakage allowed -- treat as truncation for interpolation */
2234			case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
2235	0	0	if( j == 0 )
2236	0		t_vio--;
2237	0	0	else if( j == in->dims[i] )
2238	0		t_vio++;
2239	0		break;
2240			case 2: /* extension -- do nothing (so the same input point is re-used) */
2241	0		break;
2242			case 3: /* periodic -- advance and mod into the allowed range */
2243	0	0	if((j % in->dims[i]) == 0) {
2244	0		i_off -= in->dimincs[i] * (in->dims[i]-1);
2245			} else {
2246	0		i_off += in->dimincs[i];
2247			}
2248	0		break;
2249			case 4: /* mirror -- advance or retreat depending on phase */
2250	0		j += in->dims[i];
2251	0		j %= (in->dims[i]*2);
2252	0		j -= in->dims[i];
2253	0	0	if( j!=0 && j!= -in->dims[i] ) {
		0
2254	0	0	if(j<0)
2255	0		i_off -= in->dimincs[i];
2256			else
2257	0		i_off += in->dimincs[i];
2258			}
2259	0		break;
2260			}
2261			}
2262
2263			/* Now check for carry */
2264	0	0	if(ivec[i] <= psize) {
2265			/* Normal case -- copy the current offset to the faster-running dim stashes */
2266			int k;
2267	0	0	for(k=0;k
2268	0		index_stash[k] = i_off;
2269			}
2270	0		carry = 0;
2271
2272			} else { /* End of this scan -- recover the last position, and mark carry */
2273	0		i_off = index_stash[i];
2274	0	0	if(bounds[i]==1) {
2275	0		j = ivec[i] + ibvec[i];
2276	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
2277	0		t_vio--;
2278	0		ivec[i] = -psize;
2279	0		j = ivec[i] + ibvec[i];
2280	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
2281	0		t_vio++;
2282	0		carry = 1;
2283			} else {
2284	0		ivec[i] = -psize;
2285			}
2286			}
2287			} /* End of counter-advance loop */
2288	0	0	} while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
2289
2290			{
2291	0		PDLA_Double *ac = acc;
2292	0		PDLA_Double *wg = wgt;
2293	0		PDLA_Double *wg2 = wgt2;
2294	0		PDLA_Ushort *dat = out->data;
2295
2296			/* Calculate output vector offset */
2297	0	0	for(i=0;i
2298	0		dat += out->dimincs[i] * ovec[i];
2299
2300	0	0	if(!flux) {
2301			/* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
2302	0	0	for(i=0; i < out->dims[ndims]; i++) {
2303	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
2304	0		dat = (ac++) / *(wg++);
2305	0		wg2++;
2306			} else {
2307	0		*dat = badval;
2308	0		ac++; wg++; wg2++;
2309			}
2310	0		dat += out->dimincs[ndims];
2311			}
2312			} else {
2313			/* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
2314	0		PDLA_Double det = tmp[ndims*ndims];
2315	0	0	for(i=0; i < out->dims[ndims]; i++) {
2316	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
2317	0		dat = (ac++) / (wg++) det;
2318	0		wg2++;
2319			} else {
2320	0		*dat = badval;
2321	0		ac++; wg++; wg2++;
2322			}
2323	0		dat += out->dimincs[ndims];
2324			} /* end of for loop */
2325			} /* end of flux flag set conditional */
2326			} /* end of convenience block */
2327
2328			/* End of code for normal pixels */
2329			} else {
2330			/* The pixel was ludicrously huge -- just set this pixel to nan */
2331	0		PDLA_Ushort *dat = out->data;
2332	0	0	for(i=0;i
2333	0		dat += out->dimincs[i] * ovec[i];
2334	0	0	for(i=0;idims[ndims];i++) {
2335	0		dat = badval; / Should handle bad values too -- not yet */
2336	0		dat += out->dimincs[ndims];
2337			}
2338			}
2339
2340			/* Increment the pixel counter */
2341			{
2342	0	0	for(i=0;
2343	0	0	(i
2344	0	0	(map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
2345	0		(++(ovec[i]) >= out->dims[i]); /* Actual carry test */
2346	0		i++) {
2347	0		ovec[i] = 0;
2348	0		map_ptr -= out->dims[i] * map->dimincs[i+1];
2349			}
2350			}
2351	0	0	} while(i
2352
2353
2354
2355			}
2356			PDLA_COMMENT("THREADLOOPEND")
2357			}
2358			}
2359	0		k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
2360	0	0	} while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_L: {
2361	5	50	PDLA_Long * k0_datap = ((PDLA_Long *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
2362	5		PDLA_Long * k0_physdatap = ((PDLA_Long *)((__privtrans->pdls[0])->data));
2363
2364
2365			PDLA_COMMENT("THREADLOOPBEGIN")
2366	5	50	if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
2367	5		do { register PDLA_Indx __tind1=0,__tind2=0;
2368	5		register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
2369	5		register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
2370	5		register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
2371	5		register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
2372	5		register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
2373	5		register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
2374	5		k0_datap += __offsp[0];
2375	10	100	for( __tind2 = 0 ;
2376			__tind2 < __tdims1 ;
2377	5		__tind2++
2378	5		,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
2379			)
2380			{
2381	10	100	for( __tind1 = 0 ;
2382			__tind1 < __tdims0 ;
2383	5		__tind1++
2384	5		,k0_datap += __tinc0_0
2385			)
2386			{ PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
2387
2388			/*
2389			* Pixel interpolation & averaging code
2390			*
2391			* Calls a common coordinate-transformation block (see following hdr)
2392			* that isn't dependent on the type of the input variable.
2393			*
2394			* The inputs are SVs to avoid hassling with threadloops; threading
2395			* is handled internally. To simplify the threading business, any
2396			* thread dimensions should all be collapsed to a single one by the
2397			* perl front-end.
2398			*
2399			*/
2400
2401			short ndims; /* Number of dimensions we're working in */
2402			PDLA_Double tmp; / Workspace for prefrobnication */
2403			PDLA_Indx ovec; / output pixel loop vector */
2404			PDLA_Indx ivec; / input pixel loop vector */
2405			PDLA_Indx ibvec; / input pixel base offset vector */
2406			PDLA_Double dvec; / Residual vector for linearization */
2407			PDLA_Double tvec; / Temporary floating-point vector */
2408			PDLA_Double acc; / Threaded accumulator */
2409			PDLA_Double wgt; / Threaded weight accumulator */
2410			PDLA_Double wgt2; / Threaded weight accumulator for badval finding */
2411			char bounds; / Boundary condition packed string */
2412			PDLA_Indx index_stash; / Stash to store the opening index of dim sample scans */
2413			char method; /* Method identifier (gets one of 'h','g') */
2414			PDLA_Long big; /* Max size of input footprint for each pix */
2415			PDLA_Double blur; /* Scaling of filter */
2416			PDLA_Double sv_min; /* minimum singular value */
2417			char flux; /* Flag to indicate flux conservation */
2418			PDLA_Double *map_ptr;
2419			PDLA_Long i, j;
2420	5	50	PDLA_Long badval = SvNV(__privtrans->bv);
2421			#define HANNING_LOOKUP_SIZE 2500
2422			static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
2423			static int needs_hanning_calc = 1;
2424			PDLA_Double zeta;
2425			PDLA_Double hanning_offset;
2426
2427			#define GAUSSIAN_LOOKUP_SIZE 4000
2428			#define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
2429			static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
2430			static int needs_gaussian_calc = 1;
2431
2432	5		pdl *in = PDLA->SvPDLAV(__privtrans->in);
2433	5		pdl *out = PDLA->SvPDLAV(__privtrans->out);
2434	5		pdl *map = PDLA->SvPDLAV(__privtrans->map);
2435
2436	5		PDLA->make_physical(in);
2437	5		PDLA->make_physical(out);
2438	5		PDLA->make_physical(map);
2439
2440	5		ndims = map->ndims -1;
2441
2442			/*
2443			* Allocate all our dynamic workspaces at once
2444			* */
2445	5		ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
2446	5		( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3ndims) + sizeof(PDLA_Double) in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndimsndims + ndims + sizeof(char) ndims + sizeof(PDLA_Indx) * ndims )
2447			)
2448			);
2449	5		ivec = &(ovec[ndims]);
2450	5		ibvec = &(ivec[ndims]);
2451	5		dvec = (PDLA_Double *)(&(ibvec[ndims]));
2452	5		tvec = &(dvec[ndims]);
2453	5		acc = &(tvec[ndims]);
2454	5		wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
2455	5		bounds = (char )(&(tmp [3ndims*ndims+ndims]));
2456	5		index_stash = (PDLA_Indx *) &(bounds[ndims]);
2457
2458
2459			/***
2460			* Fill in the boundary condition array
2461			*/
2462			{
2463			char *bstr;
2464			STRLEN blen;
2465	5	50	bstr = SvPV(__privtrans->boundary,blen);
2466
2467	5	50	if(blen == 0) {
2468			/* If no boundary is specified then every dim gets truncated */
2469			int i;
2470	0	0	for (i=0;i
2471	0		bounds[i] = 1;
2472			} else {
2473			int i;
2474	15	100	for(i=0;i
2475	10	100	switch(bstr[i < blen ? i : blen-1 ]) {
2476			case '0': case 'f': case 'F': /* forbid */
2477	0		bounds[i] = 0;
2478	0		break;
2479			case '1': case 't': case 'T': /* truncate */
2480	10		bounds[i] = 1;
2481	10		break;
2482			case '2': case 'e': case 'E': /* extend */
2483	0		bounds[i] = 2;
2484	0		break;
2485			case '3': case 'p': case 'P': /* periodic */
2486	0		bounds[i] = 3;
2487	0		break;
2488			case '4': case 'm': case 'M': /* mirror */
2489	0		bounds[i] = 4;
2490	0		break;
2491			default:
2492			{
2493			char buf[BUFSIZ];
2494	0		sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
2495	0		barf("%s", buf);
2496			}
2497	0		break;
2498			}
2499			}
2500			}
2501			}
2502
2503			/***
2504			* Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
2505			*/
2506	5	50	big = labs((PDLA_Long) (SvNV(__privtrans->big)));
2507	5	50	if(big <= 0)
2508	0		barf("%s","map: 'big' parameter must be >0");
2509
2510	5	50	blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
2511	5	50	if(blur < 0)
2512	0		barf("%s","map: 'blur' parameter must be >= 0");
2513
2514	5	50	sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
2515	5	50	if(sv_min < 0)
2516	0		barf("%s","map: 'sv_min' parameter must be >= 0");
2517
2518	5	50	flux = (SvNV(__privtrans->flux) != 0);
2519
2520			{
2521			char *mstr;
2522			STRLEN mlen;
2523	5	50	mstr = SvPV(__privtrans->method,mlen);
2524
2525	5	50	if(mlen==0)
2526	0		method = 'h';
2527	5		else switch(*mstr) {
2528	1		case 'H': method='H'; break;
2529	1		case 'h': method = 'h';
2530	1	50	if( needs_hanning_calc ) {
2531			int i;
2532	2501	100	for(i=0;i
2533	2500		hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
2534			}
2535	1		hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
2536	1		hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
2537	1		needs_hanning_calc = 0;
2538			}
2539	1		zeta = HANNING_LOOKUP_SIZE / blur;
2540	1		hanning_offset = (blur >= 1) ?
2541	1	50	0 :
2542	0		0.5 * (1.0 - blur);
2543	1		break;
2544
2545	2		case 'g': case 'j': method = 'g';
2546	2		zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
2547
2548	2	100	if( needs_gaussian_calc ) {
2549			int i;
2550	4001	100	for(i=0;i
2551	4000		gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
2552			}
2553	1		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
2554	1		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
2555	1		needs_gaussian_calc = 0;
2556			}
2557	2		break;
2558
2559	1		case 'G': case 'J': method = 'G'; break;
2560			default:
2561			{
2562			char err[80];
2563	0		sprintf(err,"Bug in map: unknown method '%c'",*mstr);
2564	0		barf("%s", err);
2565			}
2566	0		break;
2567			}
2568			}
2569
2570
2571
2572			/* End of initialization */
2573			/*************************************************************/
2574			/* Start of Real Work */
2575
2576			/* Initialize coordinate vector and map offset
2577			*/
2578	15	100	for(i=0;i
2579	10		ovec[i] = 0;
2580
2581	5		map_ptr = (PDLA_Double *)(map->data);
2582
2583
2584			/* Main pixel loop (iterates over pixels in the output plane) */
2585			do {
2586			PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
2587			/* Prefrobnicate the transformation matrix */
2588	737280		psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
2589
2590			#ifdef DEBUG_MAP
2591			{
2592			int k; PDLA_Indx foo = 0;
2593			printf("ovec: [");
2594			for(k=0;k
2595			foo += ovec[k] * map->dimincs[k+1];
2596			printf(" %2d ",(int)(ovec[k]));
2597			}
2598			printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
2599			for(k=0;k
2600			printf("%8.2f",(double)(((PDLA_Long )(map->data))[foo + kmap->dimincs[0]]));
2601			}
2602			printf("]\n");
2603			}
2604			#endif
2605
2606			/* Don't bother accumulating output if psize is too large */
2607	737280	50	if(psize <= big) {
2608			/* Use the prefrobnicated matrix to generate a local linearization.
2609			* dvec gets the delta; ibvec gets the base.
2610			*/
2611			{
2612	737280		PDLA_Double *mp = map_ptr;
2613	2211840	100	for (i=0;i
2614	1474560		dvec[i] = mp - ( ibvec[i] = (PDLA_Long)(mp + 0.5)); /* assignment */
2615	1474560		mp += map->dimincs[0];
2616			}
2617			}
2618
2619			/* Initialize input delta vector */
2620	2211840	100	for(i=0;i
2621	1474560		ivec[i] = -psize;
2622
2623			/* Initialize accumulators */
2624			{
2625	737280		PDLA_Double *ac = acc;
2626	1474560	100	for(i=0; i < in->dims[ndims]; i++)
2627	737280		*(ac++) = 0.0;
2628
2629			}
2630			{
2631	737280		PDLA_Double *wg = wgt;
2632	1474560	100	for(i=0;i < in->dims[ndims]; i++)
2633	737280		*(wg++) = 0.0;
2634			}
2635			{
2636	737280		PDLA_Double *wg = wgt2;
2637	1474560	100	for(i=0;i < in->dims[ndims]; i++)
2638	737280		*(wg++) = 0.0;
2639			}
2640
2641
2642			/*
2643			* Calculate the original offset into the data array, to enable
2644			* delta calculations in the pixel loop
2645			*
2646			* i runs over dims; j holds the working integer index in the
2647			* current dim.
2648			*
2649			* This code matches the incrementation code at the bottom of the accumulation loop
2650			*/
2651
2652	737280		t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
2653	737280		i_off = 0;
2654	2211840	100	for(i=0;i
2655	1474560		j = ivec[i] + ibvec[i];
2656	1474560	100	if(j<0 \|\| j >= in->dims[i]) {
		50
2657	1118280		switch(bounds[i]) {
2658			case 0: /* no breakage allowed */
2659	0		barf("%s","index out-of-bounds in map");
2660	0		break;
2661			case 1: /* truncation */
2662	1118280		t_vio++;
2663			/* fall through */
2664			case 2: /* extension -- crop */
2665	1118280	50	if(j<0)
2666	1118280		j=0;
2667	0		else j = in->dims[i] - 1;
2668	1118280		break;
2669			case 3: /* periodic -- mod it */
2670	0		j %= in->dims[i];
2671	0	0	if(j<0)
2672	0		j += in->dims[i];
2673	0		break;
2674			case 4: /* mirror -- reflect off the edges */
2675	0		j += in->dims[i];
2676	0		j %= (in->dims[i]*2);
2677	0	0	if(j<0)
2678	0		j += in->dims[i]*2;
2679	0		j -= in->dims[i];
2680	0	0	if(j<0) {
2681	0		j *= -1;
2682	0		j -= 1;
2683			}
2684	0		break;
2685			default:
2686	0		barf("%s", "Unknown boundary condition in map -- bug alert!");
2687	0		break;
2688			}
2689			}
2690	1474560		i_off += in->dimincs[i] * j;
2691			}
2692
2693			/* Initialize index stashes for later reference as we scan the footprint */
2694			/* It's a pain in the ass to deal with boundaries, and doubly so at the */
2695			/* end of a dimensional scan. So we stash the index location at the */
2696			/* start of each dimensional scan here. When we finish incrementing */
2697			/* through a particular dim, we pull its value back out of the stash. */
2698	2211840	100	for(i=0;i
2699	1474560		index_stash[i] = i_off;
2700			}
2701
2702			/* The input accumulation loop is the hotspot for the entire operation. */
2703			/* We loop over pixels in the region of interest (+/- psize in each dimension) */
2704			/* in the input array, use the linearized transform to bring each pixel center */
2705			/* forward to the output plane, and calculate a weighting based on the chosen */
2706			/* filter function. 'h' is a fast Hanning window rolloff using a lookup */
2707			/* table that is initialized the first time through the code. 'H' is the */
2708			/* same process, but explicitly calculated for each interation (~2x slower). */
2709			/* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
2710			/* into the input array fresh from the current input array vector each time, */
2711			/* we walk through the array using dimincs and the old offset. This saves */
2712			/* about half of the time spent on index calculation. */
2713
2714			do { /* Input accumulation loop */
2715			PDLA_Double *cp;
2716			PDLA_Double alpha;
2717			/* Calculate the weight of the current input point. Don't bother if we're
2718			* violating any truncation boundaries (in that case our value is zero, but
2719			* for the interpolation we also set the weight to zero).
2720			*/
2721	12401920	100	if( !t_vio ) {
2722
2723	3043295		PDLA_Double *ap = tvec;
2724	3043295		PDLA_Double *bp = dvec;
2725	3043295		PDLA_Indx *ip = ivec;
2726	9129885	100	for(i=0; i
2727	6086590		(ap++) = (ip++) - *(bp++);
2728
2729	3043295		switch(method) {
2730			PDLA_Double dd;
2731			case 'h':
2732			/* This is the Hanning window rolloff. It is a product of a simple */
2733			/* cos^2(theta) rolloff in each dimension. Using a lookup table */
2734			/* is about 2x faster than using cos(theta) directly in each */
2735			/* weighting calculation, so we do. Using 2500 entries and linear */
2736			/* interpolation is accurate to about 10^-7, and should preserve */
2737			/* the contents of cache pretty well. */
2738	608659		alpha = 1;
2739	608659		cp = tmp;
2740	1482006	100	for(i=0; i
2741			int lodex;
2742			int hidex;
2743			PDLA_Double beta;
2744	873347		dd = 0;
2745	873347		ap = tvec;
2746			/* Get the matrix-multiply element for this dimension */
2747	2620041	100	for(j=0;j
2748	1746694		dd += (ap++) *(cp++);
2749
2750			/* Do linear interpolation from the table */
2751			/* The table captures a hanning window centered 0.5 pixel from center. */
2752			/* We scale the filter by the blur parameter -- but if blur is less */
2753			/* than unity, we shrink the hanning blur window while keeping the 0.5 */
2754			/* value on the pixel edge at 0.5. For blur greater than unity, we */
2755			/* scale simply. */
2756	873347		beta = fabs(dd) - hanning_offset;
2757	873347	100	if(beta > 0) {
2758	865595	100	if(beta >= blur) {
2759	511183		alpha = 0;
2760	511183		i = ndims;
2761			} else {
2762	354412		beta *= zeta;
2763	354412		lodex = beta;
2764	354412	50	beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
2765	0		lodex = HANNING_LOOKUP_SIZE;
2766	354412		hidex = lodex+1;
2767	354412		alpha = hanning_lookup[hidex]beta + hanning_lookup[lodex]*(1-beta);
2768			} /* end of interpolation branch */
2769			} /* end of beta > 0 branch */
2770			} /* end of dimension loop */
2771	608659		break;
2772
2773			case 'H':
2774			/* This is the Hanning window rolloff with explicit calculation, preserved */
2775			/* in case someone actually wants the slower longer method. */
2776	608659		alpha = 1;
2777	608659		cp = tmp;
2778	1482014	100	for(i=0; i
2779	873355		dd = 0;
2780	873355		ap = tvec;
2781	2620065	100	for(j=0;j
2782	1746710		dd += (ap++) *(cp++);
2783	873355		dd = (fabs(dd) - hanning_offset) / blur;
2784	873355	100	if( dd > 1 ) {
2785	511175		alpha = 0;
2786	511175		i = ndims;
2787			} else
2788	362180		alpha = (0.5 + 0.5 cos( dd * 3.1415926536 ));
2789			}
2790	608659		break;
2791
2792			case 'g':
2793			/* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
2794			{
2795	1217318		PDLA_Double sum = 0;
2796	1217318		cp = tmp;
2797	3577400	100	for(i=0; i
2798	2360082		dd = 0;
2799	2360082		ap = tvec;
2800	7080246	100	for(j=0;j
2801	4720164		dd += (ap++) *(cp++);
2802	2360082		dd /= blur;
2803	2360082		sum += dd * dd;
2804	2360082	100	if(sum > GAUSSIAN_MAXVAL) {
2805	308982		i = ndims; /* exit early if we're too far out */
2806	308982		alpha = 0;
2807			}
2808			}
2809	1217318	100	if( sum > GAUSSIAN_MAXVAL \|\| !isfinite(sum) \|\| isnan(sum) ) {
		100
		50
2810	316734		alpha = 0;
2811			} else {
2812			int lodex,hidex;
2813	900584		PDLA_Double beta = fabs(zeta * sum);
2814
2815	900584		lodex = beta;
2816	900584		beta -= lodex; hidex = lodex+1;
2817	900584		alpha = gaussian_lookup[hidex]beta + gaussian_lookup[lodex](1 - beta);
2818
2819			}
2820			}
2821	1217318		break;
2822
2823			case 'G':
2824			/* This is the Gaussian rolloff with explicit calculation, preserved */
2825			/* in case someone actually wants the slower longer method. */
2826			{
2827	608659		PDLA_Double sum = 0;
2828	608659		cp = tmp;
2829	1708727	100	for(i=0; i
2830	1100068		dd = 0;
2831	1100068		ap = tvec;
2832	3300204	100	for(j=0;j
2833	2200136		dd += (ap++) *(cp++);
2834	1100068		dd /= blur;
2835	1100068		sum += dd * dd;
2836	1100068	100	if(sum > 4) /* 2 pixels -- four half-widths */
2837	316983		i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
2838			}
2839
2840	608659	100	if(sum > GAUSSIAN_MAXVAL)
2841	123548		alpha = 0;
2842			else
2843	485111		alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
2844			}
2845	608659		break;
2846			default:
2847			{
2848			char buf[80];
2849	0		sprintf(buf,"This can't happen: method='%c'",method);
2850	0		barf("%s", buf);
2851			}
2852			}
2853
2854			{ /* convenience block -- accumulate the current point into the weighted sum. */
2855			/* This is more than simple assignment because we have our own explicit poor */
2856			/* man's threadloop here, so we accumulate each threaded element separately. */
2857	3043295		PDLA_Long dat = ((PDLA_Long )(in->data)) + i_off;
2858	3043295		PDLA_Indx max = out->dims[ndims];
2859	6086590	100	for( i=0; i < max; i++ ) {
2860	3043295	50	if( (badval==0) \|\| (*dat != badval) ) {
		0
2861	3043295		acc[i] += dat alpha;
2862	3043295		dat += in->dimincs[ndims];
2863	3043295		wgt[i] += alpha;
2864			}
2865	3043295		wgt2[i] += alpha; }
2866			}
2867			} /* end of t_vio check (i.e. of input accumulation) */
2868
2869
2870			/* Advance input accumulation loop. */
2871			/* We both increment the total vector and also advance the index. */
2872	12401920		carry = 1;
2873	27736480	100	for(i=0; i
		100
2874			/* Advance the current element of the offset vector */
2875	15334560		ivec[i]++;
2876	15334560		j = ivec[i] + ibvec[i];
2877
2878			/* Advance the offset into the data array */
2879	15334560	100	if( j > 0 && j <= in->dims[i]-1 ) {
		50
2880			/* Normal case -- just advance the input vector */
2881	6289185		i_off += in->dimincs[i];
2882			} else {
2883			/* Busted a boundary - either before or after. */
2884	9045375		switch(bounds[i]){
2885			case 0: /* no breakage allowed -- treat as truncation for interpolation */
2886			case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
2887	9045375	100	if( j == 0 )
2888	521970		t_vio--;
2889	8523405	50	else if( j == in->dims[i] )
2890	0		t_vio++;
2891	9045375		break;
2892			case 2: /* extension -- do nothing (so the same input point is re-used) */
2893	0		break;
2894			case 3: /* periodic -- advance and mod into the allowed range */
2895	0	0	if((j % in->dims[i]) == 0) {
2896	0		i_off -= in->dimincs[i] * (in->dims[i]-1);
2897			} else {
2898	0		i_off += in->dimincs[i];
2899			}
2900	0		break;
2901			case 4: /* mirror -- advance or retreat depending on phase */
2902	0		j += in->dims[i];
2903	0		j %= (in->dims[i]*2);
2904	0		j -= in->dims[i];
2905	0	0	if( j!=0 && j!= -in->dims[i] ) {
		0
2906	0	0	if(j<0)
2907	0		i_off -= in->dimincs[i];
2908			else
2909	0		i_off += in->dimincs[i];
2910			}
2911	0		break;
2912			}
2913			}
2914
2915			/* Now check for carry */
2916	15334560	100	if(ivec[i] <= psize) {
2917			/* Normal case -- copy the current offset to the faster-running dim stashes */
2918			int k;
2919	13860000	100	for(k=0;k
2920	2195360		index_stash[k] = i_off;
2921			}
2922	11664640		carry = 0;
2923
2924			} else { /* End of this scan -- recover the last position, and mark carry */
2925	3669920		i_off = index_stash[i];
2926	3669920	50	if(bounds[i]==1) {
2927	3669920		j = ivec[i] + ibvec[i];
2928	3669920	100	if( j < 0 \|\| j >= in->dims[i] )
		50
2929	2138120		t_vio--;
2930	3669920		ivec[i] = -psize;
2931	3669920		j = ivec[i] + ibvec[i];
2932	3669920	100	if( j < 0 \|\| j >= in->dims[i] )
		50
2933	2660090		t_vio++;
2934	3669920		carry = 1;
2935			} else {
2936	0		ivec[i] = -psize;
2937			}
2938			}
2939			} /* End of counter-advance loop */
2940	12401920	100	} while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
2941
2942			{
2943	737280		PDLA_Double *ac = acc;
2944	737280		PDLA_Double *wg = wgt;
2945	737280		PDLA_Double *wg2 = wgt2;
2946	737280		PDLA_Long *dat = out->data;
2947
2948			/* Calculate output vector offset */
2949	2211840	100	for(i=0;i
2950	1474560		dat += out->dimincs[i] * ovec[i];
2951
2952	737280	50	if(!flux) {
2953			/* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
2954	1474560	100	for(i=0; i < out->dims[ndims]; i++) {
2955	737280	100	if(wg && (wg2 / *wg) < 1.5 ) {
		100
2956	138062		dat = (ac++) / *(wg++);
2957	138062		wg2++;
2958			} else {
2959	599218		*dat = badval;
2960	599218		ac++; wg++; wg2++;
2961			}
2962	737280		dat += out->dimincs[ndims];
2963			}
2964			} else {
2965			/* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
2966	0		PDLA_Double det = tmp[ndims*ndims];
2967	737280	0	for(i=0; i < out->dims[ndims]; i++) {
2968	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
2969	0		dat = (ac++) / (wg++) det;
2970	0		wg2++;
2971			} else {
2972	0		*dat = badval;
2973	0		ac++; wg++; wg2++;
2974			}
2975	0		dat += out->dimincs[ndims];
2976			} /* end of for loop */
2977			} /* end of flux flag set conditional */
2978			} /* end of convenience block */
2979
2980			/* End of code for normal pixels */
2981			} else {
2982			/* The pixel was ludicrously huge -- just set this pixel to nan */
2983	0		PDLA_Long *dat = out->data;
2984	0	0	for(i=0;i
2985	0		dat += out->dimincs[i] * ovec[i];
2986	0	0	for(i=0;idims[ndims];i++) {
2987	0		dat = badval; / Should handle bad values too -- not yet */
2988	0		dat += out->dimincs[ndims];
2989			}
2990			}
2991
2992			/* Increment the pixel counter */
2993			{
2994	739205	100	for(i=0;
2995	739200	50	(i
2996	739200	100	(map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
2997	1478400		(++(ovec[i]) >= out->dims[i]); /* Actual carry test */
2998	1925		i++) {
2999	1925		ovec[i] = 0;
3000	1925		map_ptr -= out->dims[i] * map->dimincs[i+1];
3001			}
3002			}
3003	737280	100	} while(i
3004
3005
3006
3007			}
3008			PDLA_COMMENT("THREADLOOPEND")
3009			}
3010			}
3011	5		k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
3012	5	50	} while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_IND: {
3013	0	0	PDLA_Indx * k0_datap = ((PDLA_Indx *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
3014	0		PDLA_Indx * k0_physdatap = ((PDLA_Indx *)((__privtrans->pdls[0])->data));
3015
3016
3017			PDLA_COMMENT("THREADLOOPBEGIN")
3018	0	0	if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
3019	0		do { register PDLA_Indx __tind1=0,__tind2=0;
3020	0		register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
3021	0		register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
3022	0		register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
3023	0		register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
3024	0		register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
3025	0		register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
3026	0		k0_datap += __offsp[0];
3027	0	0	for( __tind2 = 0 ;
3028			__tind2 < __tdims1 ;
3029	0		__tind2++
3030	0		,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
3031			)
3032			{
3033	0	0	for( __tind1 = 0 ;
3034			__tind1 < __tdims0 ;
3035	0		__tind1++
3036	0		,k0_datap += __tinc0_0
3037			)
3038			{ PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
3039
3040			/*
3041			* Pixel interpolation & averaging code
3042			*
3043			* Calls a common coordinate-transformation block (see following hdr)
3044			* that isn't dependent on the type of the input variable.
3045			*
3046			* The inputs are SVs to avoid hassling with threadloops; threading
3047			* is handled internally. To simplify the threading business, any
3048			* thread dimensions should all be collapsed to a single one by the
3049			* perl front-end.
3050			*
3051			*/
3052
3053			short ndims; /* Number of dimensions we're working in */
3054			PDLA_Double tmp; / Workspace for prefrobnication */
3055			PDLA_Indx ovec; / output pixel loop vector */
3056			PDLA_Indx ivec; / input pixel loop vector */
3057			PDLA_Indx ibvec; / input pixel base offset vector */
3058			PDLA_Double dvec; / Residual vector for linearization */
3059			PDLA_Double tvec; / Temporary floating-point vector */
3060			PDLA_Double acc; / Threaded accumulator */
3061			PDLA_Double wgt; / Threaded weight accumulator */
3062			PDLA_Double wgt2; / Threaded weight accumulator for badval finding */
3063			char bounds; / Boundary condition packed string */
3064			PDLA_Indx index_stash; / Stash to store the opening index of dim sample scans */
3065			char method; /* Method identifier (gets one of 'h','g') */
3066			PDLA_Long big; /* Max size of input footprint for each pix */
3067			PDLA_Double blur; /* Scaling of filter */
3068			PDLA_Double sv_min; /* minimum singular value */
3069			char flux; /* Flag to indicate flux conservation */
3070			PDLA_Double *map_ptr;
3071			PDLA_Long i, j;
3072	0	0	PDLA_Indx badval = SvNV(__privtrans->bv);
3073			#define HANNING_LOOKUP_SIZE 2500
3074			static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
3075			static int needs_hanning_calc = 1;
3076			PDLA_Double zeta;
3077			PDLA_Double hanning_offset;
3078
3079			#define GAUSSIAN_LOOKUP_SIZE 4000
3080			#define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
3081			static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
3082			static int needs_gaussian_calc = 1;
3083
3084	0		pdl *in = PDLA->SvPDLAV(__privtrans->in);
3085	0		pdl *out = PDLA->SvPDLAV(__privtrans->out);
3086	0		pdl *map = PDLA->SvPDLAV(__privtrans->map);
3087
3088	0		PDLA->make_physical(in);
3089	0		PDLA->make_physical(out);
3090	0		PDLA->make_physical(map);
3091
3092	0		ndims = map->ndims -1;
3093
3094			/*
3095			* Allocate all our dynamic workspaces at once
3096			* */
3097	0		ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
3098	0		( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3ndims) + sizeof(PDLA_Double) in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndimsndims + ndims + sizeof(char) ndims + sizeof(PDLA_Indx) * ndims )
3099			)
3100			);
3101	0		ivec = &(ovec[ndims]);
3102	0		ibvec = &(ivec[ndims]);
3103	0		dvec = (PDLA_Double *)(&(ibvec[ndims]));
3104	0		tvec = &(dvec[ndims]);
3105	0		acc = &(tvec[ndims]);
3106	0		wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
3107	0		bounds = (char )(&(tmp [3ndims*ndims+ndims]));
3108	0		index_stash = (PDLA_Indx *) &(bounds[ndims]);
3109
3110
3111			/***
3112			* Fill in the boundary condition array
3113			*/
3114			{
3115			char *bstr;
3116			STRLEN blen;
3117	0	0	bstr = SvPV(__privtrans->boundary,blen);
3118
3119	0	0	if(blen == 0) {
3120			/* If no boundary is specified then every dim gets truncated */
3121			int i;
3122	0	0	for (i=0;i
3123	0		bounds[i] = 1;
3124			} else {
3125			int i;
3126	0	0	for(i=0;i
3127	0	0	switch(bstr[i < blen ? i : blen-1 ]) {
3128			case '0': case 'f': case 'F': /* forbid */
3129	0		bounds[i] = 0;
3130	0		break;
3131			case '1': case 't': case 'T': /* truncate */
3132	0		bounds[i] = 1;
3133	0		break;
3134			case '2': case 'e': case 'E': /* extend */
3135	0		bounds[i] = 2;
3136	0		break;
3137			case '3': case 'p': case 'P': /* periodic */
3138	0		bounds[i] = 3;
3139	0		break;
3140			case '4': case 'm': case 'M': /* mirror */
3141	0		bounds[i] = 4;
3142	0		break;
3143			default:
3144			{
3145			char buf[BUFSIZ];
3146	0		sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
3147	0		barf("%s", buf);
3148			}
3149	0		break;
3150			}
3151			}
3152			}
3153			}
3154
3155			/***
3156			* Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
3157			*/
3158	0	0	big = labs((PDLA_Long) (SvNV(__privtrans->big)));
3159	0	0	if(big <= 0)
3160	0		barf("%s","map: 'big' parameter must be >0");
3161
3162	0	0	blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
3163	0	0	if(blur < 0)
3164	0		barf("%s","map: 'blur' parameter must be >= 0");
3165
3166	0	0	sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
3167	0	0	if(sv_min < 0)
3168	0		barf("%s","map: 'sv_min' parameter must be >= 0");
3169
3170	0	0	flux = (SvNV(__privtrans->flux) != 0);
3171
3172			{
3173			char *mstr;
3174			STRLEN mlen;
3175	0	0	mstr = SvPV(__privtrans->method,mlen);
3176
3177	0	0	if(mlen==0)
3178	0		method = 'h';
3179	0		else switch(*mstr) {
3180	0		case 'H': method='H'; break;
3181	0		case 'h': method = 'h';
3182	0	0	if( needs_hanning_calc ) {
3183			int i;
3184	0	0	for(i=0;i
3185	0		hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
3186			}
3187	0		hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
3188	0		hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
3189	0		needs_hanning_calc = 0;
3190			}
3191	0		zeta = HANNING_LOOKUP_SIZE / blur;
3192	0		hanning_offset = (blur >= 1) ?
3193	0	0	0 :
3194	0		0.5 * (1.0 - blur);
3195	0		break;
3196
3197	0		case 'g': case 'j': method = 'g';
3198	0		zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
3199
3200	0	0	if( needs_gaussian_calc ) {
3201			int i;
3202	0	0	for(i=0;i
3203	0		gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
3204			}
3205	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
3206	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
3207	0		needs_gaussian_calc = 0;
3208			}
3209	0		break;
3210
3211	0		case 'G': case 'J': method = 'G'; break;
3212			default:
3213			{
3214			char err[80];
3215	0		sprintf(err,"Bug in map: unknown method '%c'",*mstr);
3216	0		barf("%s", err);
3217			}
3218	0		break;
3219			}
3220			}
3221
3222
3223
3224			/* End of initialization */
3225			/*************************************************************/
3226			/* Start of Real Work */
3227
3228			/* Initialize coordinate vector and map offset
3229			*/
3230	0	0	for(i=0;i
3231	0		ovec[i] = 0;
3232
3233	0		map_ptr = (PDLA_Double *)(map->data);
3234
3235
3236			/* Main pixel loop (iterates over pixels in the output plane) */
3237			do {
3238			PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
3239			/* Prefrobnicate the transformation matrix */
3240	0		psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
3241
3242			#ifdef DEBUG_MAP
3243			{
3244			int k; PDLA_Indx foo = 0;
3245			printf("ovec: [");
3246			for(k=0;k
3247			foo += ovec[k] * map->dimincs[k+1];
3248			printf(" %2d ",(int)(ovec[k]));
3249			}
3250			printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
3251			for(k=0;k
3252			printf("%8.2f",(double)(((PDLA_Indx )(map->data))[foo + kmap->dimincs[0]]));
3253			}
3254			printf("]\n");
3255			}
3256			#endif
3257
3258			/* Don't bother accumulating output if psize is too large */
3259	0	0	if(psize <= big) {
3260			/* Use the prefrobnicated matrix to generate a local linearization.
3261			* dvec gets the delta; ibvec gets the base.
3262			*/
3263			{
3264	0		PDLA_Double *mp = map_ptr;
3265	0	0	for (i=0;i
3266	0		dvec[i] = mp - ( ibvec[i] = (PDLA_Long)(mp + 0.5)); /* assignment */
3267	0		mp += map->dimincs[0];
3268			}
3269			}
3270
3271			/* Initialize input delta vector */
3272	0	0	for(i=0;i
3273	0		ivec[i] = -psize;
3274
3275			/* Initialize accumulators */
3276			{
3277	0		PDLA_Double *ac = acc;
3278	0	0	for(i=0; i < in->dims[ndims]; i++)
3279	0		*(ac++) = 0.0;
3280
3281			}
3282			{
3283	0		PDLA_Double *wg = wgt;
3284	0	0	for(i=0;i < in->dims[ndims]; i++)
3285	0		*(wg++) = 0.0;
3286			}
3287			{
3288	0		PDLA_Double *wg = wgt2;
3289	0	0	for(i=0;i < in->dims[ndims]; i++)
3290	0		*(wg++) = 0.0;
3291			}
3292
3293
3294			/*
3295			* Calculate the original offset into the data array, to enable
3296			* delta calculations in the pixel loop
3297			*
3298			* i runs over dims; j holds the working integer index in the
3299			* current dim.
3300			*
3301			* This code matches the incrementation code at the bottom of the accumulation loop
3302			*/
3303
3304	0		t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
3305	0		i_off = 0;
3306	0	0	for(i=0;i
3307	0		j = ivec[i] + ibvec[i];
3308	0	0	if(j<0 \|\| j >= in->dims[i]) {
		0
3309	0		switch(bounds[i]) {
3310			case 0: /* no breakage allowed */
3311	0		barf("%s","index out-of-bounds in map");
3312	0		break;
3313			case 1: /* truncation */
3314	0		t_vio++;
3315			/* fall through */
3316			case 2: /* extension -- crop */
3317	0	0	if(j<0)
3318	0		j=0;
3319	0		else j = in->dims[i] - 1;
3320	0		break;
3321			case 3: /* periodic -- mod it */
3322	0		j %= in->dims[i];
3323	0	0	if(j<0)
3324	0		j += in->dims[i];
3325	0		break;
3326			case 4: /* mirror -- reflect off the edges */
3327	0		j += in->dims[i];
3328	0		j %= (in->dims[i]*2);
3329	0	0	if(j<0)
3330	0		j += in->dims[i]*2;
3331	0		j -= in->dims[i];
3332	0	0	if(j<0) {
3333	0		j *= -1;
3334	0		j -= 1;
3335			}
3336	0		break;
3337			default:
3338	0		barf("%s", "Unknown boundary condition in map -- bug alert!");
3339	0		break;
3340			}
3341			}
3342	0		i_off += in->dimincs[i] * j;
3343			}
3344
3345			/* Initialize index stashes for later reference as we scan the footprint */
3346			/* It's a pain in the ass to deal with boundaries, and doubly so at the */
3347			/* end of a dimensional scan. So we stash the index location at the */
3348			/* start of each dimensional scan here. When we finish incrementing */
3349			/* through a particular dim, we pull its value back out of the stash. */
3350	0	0	for(i=0;i
3351	0		index_stash[i] = i_off;
3352			}
3353
3354			/* The input accumulation loop is the hotspot for the entire operation. */
3355			/* We loop over pixels in the region of interest (+/- psize in each dimension) */
3356			/* in the input array, use the linearized transform to bring each pixel center */
3357			/* forward to the output plane, and calculate a weighting based on the chosen */
3358			/* filter function. 'h' is a fast Hanning window rolloff using a lookup */
3359			/* table that is initialized the first time through the code. 'H' is the */
3360			/* same process, but explicitly calculated for each interation (~2x slower). */
3361			/* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
3362			/* into the input array fresh from the current input array vector each time, */
3363			/* we walk through the array using dimincs and the old offset. This saves */
3364			/* about half of the time spent on index calculation. */
3365
3366			do { /* Input accumulation loop */
3367			PDLA_Double *cp;
3368			PDLA_Double alpha;
3369			/* Calculate the weight of the current input point. Don't bother if we're
3370			* violating any truncation boundaries (in that case our value is zero, but
3371			* for the interpolation we also set the weight to zero).
3372			*/
3373	0	0	if( !t_vio ) {
3374
3375	0		PDLA_Double *ap = tvec;
3376	0		PDLA_Double *bp = dvec;
3377	0		PDLA_Indx *ip = ivec;
3378	0	0	for(i=0; i
3379	0		(ap++) = (ip++) - *(bp++);
3380
3381	0		switch(method) {
3382			PDLA_Double dd;
3383			case 'h':
3384			/* This is the Hanning window rolloff. It is a product of a simple */
3385			/* cos^2(theta) rolloff in each dimension. Using a lookup table */
3386			/* is about 2x faster than using cos(theta) directly in each */
3387			/* weighting calculation, so we do. Using 2500 entries and linear */
3388			/* interpolation is accurate to about 10^-7, and should preserve */
3389			/* the contents of cache pretty well. */
3390	0		alpha = 1;
3391	0		cp = tmp;
3392	0	0	for(i=0; i
3393			int lodex;
3394			int hidex;
3395			PDLA_Double beta;
3396	0		dd = 0;
3397	0		ap = tvec;
3398			/* Get the matrix-multiply element for this dimension */
3399	0	0	for(j=0;j
3400	0		dd += (ap++) *(cp++);
3401
3402			/* Do linear interpolation from the table */
3403			/* The table captures a hanning window centered 0.5 pixel from center. */
3404			/* We scale the filter by the blur parameter -- but if blur is less */
3405			/* than unity, we shrink the hanning blur window while keeping the 0.5 */
3406			/* value on the pixel edge at 0.5. For blur greater than unity, we */
3407			/* scale simply. */
3408	0		beta = fabs(dd) - hanning_offset;
3409	0	0	if(beta > 0) {
3410	0	0	if(beta >= blur) {
3411	0		alpha = 0;
3412	0		i = ndims;
3413			} else {
3414	0		beta *= zeta;
3415	0		lodex = beta;
3416	0	0	beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
3417	0		lodex = HANNING_LOOKUP_SIZE;
3418	0		hidex = lodex+1;
3419	0		alpha = hanning_lookup[hidex]beta + hanning_lookup[lodex]*(1-beta);
3420			} /* end of interpolation branch */
3421			} /* end of beta > 0 branch */
3422			} /* end of dimension loop */
3423	0		break;
3424
3425			case 'H':
3426			/* This is the Hanning window rolloff with explicit calculation, preserved */
3427			/* in case someone actually wants the slower longer method. */
3428	0		alpha = 1;
3429	0		cp = tmp;
3430	0	0	for(i=0; i
3431	0		dd = 0;
3432	0		ap = tvec;
3433	0	0	for(j=0;j
3434	0		dd += (ap++) *(cp++);
3435	0		dd = (fabs(dd) - hanning_offset) / blur;
3436	0	0	if( dd > 1 ) {
3437	0		alpha = 0;
3438	0		i = ndims;
3439			} else
3440	0		alpha = (0.5 + 0.5 cos( dd * 3.1415926536 ));
3441			}
3442	0		break;
3443
3444			case 'g':
3445			/* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
3446			{
3447	0		PDLA_Double sum = 0;
3448	0		cp = tmp;
3449	0	0	for(i=0; i
3450	0		dd = 0;
3451	0		ap = tvec;
3452	0	0	for(j=0;j
3453	0		dd += (ap++) *(cp++);
3454	0		dd /= blur;
3455	0		sum += dd * dd;
3456	0	0	if(sum > GAUSSIAN_MAXVAL) {
3457	0		i = ndims; /* exit early if we're too far out */
3458	0		alpha = 0;
3459			}
3460			}
3461	0	0	if( sum > GAUSSIAN_MAXVAL \|\| !isfinite(sum) \|\| isnan(sum) ) {
		0
		0
3462	0		alpha = 0;
3463			} else {
3464			int lodex,hidex;
3465	0		PDLA_Double beta = fabs(zeta * sum);
3466
3467	0		lodex = beta;
3468	0		beta -= lodex; hidex = lodex+1;
3469	0		alpha = gaussian_lookup[hidex]beta + gaussian_lookup[lodex](1 - beta);
3470
3471			}
3472			}
3473	0		break;
3474
3475			case 'G':
3476			/* This is the Gaussian rolloff with explicit calculation, preserved */
3477			/* in case someone actually wants the slower longer method. */
3478			{
3479	0		PDLA_Double sum = 0;
3480	0		cp = tmp;
3481	0	0	for(i=0; i
3482	0		dd = 0;
3483	0		ap = tvec;
3484	0	0	for(j=0;j
3485	0		dd += (ap++) *(cp++);
3486	0		dd /= blur;
3487	0		sum += dd * dd;
3488	0	0	if(sum > 4) /* 2 pixels -- four half-widths */
3489	0		i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
3490			}
3491
3492	0	0	if(sum > GAUSSIAN_MAXVAL)
3493	0		alpha = 0;
3494			else
3495	0		alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
3496			}
3497	0		break;
3498			default:
3499			{
3500			char buf[80];
3501	0		sprintf(buf,"This can't happen: method='%c'",method);
3502	0		barf("%s", buf);
3503			}
3504			}
3505
3506			{ /* convenience block -- accumulate the current point into the weighted sum. */
3507			/* This is more than simple assignment because we have our own explicit poor */
3508			/* man's threadloop here, so we accumulate each threaded element separately. */
3509	0		PDLA_Indx dat = ((PDLA_Indx )(in->data)) + i_off;
3510	0		PDLA_Indx max = out->dims[ndims];
3511	0	0	for( i=0; i < max; i++ ) {
3512	0	0	if( (badval==0) \|\| (*dat != badval) ) {
		0
3513	0		acc[i] += dat alpha;
3514	0		dat += in->dimincs[ndims];
3515	0		wgt[i] += alpha;
3516			}
3517	0		wgt2[i] += alpha; }
3518			}
3519			} /* end of t_vio check (i.e. of input accumulation) */
3520
3521
3522			/* Advance input accumulation loop. */
3523			/* We both increment the total vector and also advance the index. */
3524	0		carry = 1;
3525	0	0	for(i=0; i
		0
3526			/* Advance the current element of the offset vector */
3527	0		ivec[i]++;
3528	0		j = ivec[i] + ibvec[i];
3529
3530			/* Advance the offset into the data array */
3531	0	0	if( j > 0 && j <= in->dims[i]-1 ) {
		0
3532			/* Normal case -- just advance the input vector */
3533	0		i_off += in->dimincs[i];
3534			} else {
3535			/* Busted a boundary - either before or after. */
3536	0		switch(bounds[i]){
3537			case 0: /* no breakage allowed -- treat as truncation for interpolation */
3538			case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
3539	0	0	if( j == 0 )
3540	0		t_vio--;
3541	0	0	else if( j == in->dims[i] )
3542	0		t_vio++;
3543	0		break;
3544			case 2: /* extension -- do nothing (so the same input point is re-used) */
3545	0		break;
3546			case 3: /* periodic -- advance and mod into the allowed range */
3547	0	0	if((j % in->dims[i]) == 0) {
3548	0		i_off -= in->dimincs[i] * (in->dims[i]-1);
3549			} else {
3550	0		i_off += in->dimincs[i];
3551			}
3552	0		break;
3553			case 4: /* mirror -- advance or retreat depending on phase */
3554	0		j += in->dims[i];
3555	0		j %= (in->dims[i]*2);
3556	0		j -= in->dims[i];
3557	0	0	if( j!=0 && j!= -in->dims[i] ) {
		0
3558	0	0	if(j<0)
3559	0		i_off -= in->dimincs[i];
3560			else
3561	0		i_off += in->dimincs[i];
3562			}
3563	0		break;
3564			}
3565			}
3566
3567			/* Now check for carry */
3568	0	0	if(ivec[i] <= psize) {
3569			/* Normal case -- copy the current offset to the faster-running dim stashes */
3570			int k;
3571	0	0	for(k=0;k
3572	0		index_stash[k] = i_off;
3573			}
3574	0		carry = 0;
3575
3576			} else { /* End of this scan -- recover the last position, and mark carry */
3577	0		i_off = index_stash[i];
3578	0	0	if(bounds[i]==1) {
3579	0		j = ivec[i] + ibvec[i];
3580	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
3581	0		t_vio--;
3582	0		ivec[i] = -psize;
3583	0		j = ivec[i] + ibvec[i];
3584	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
3585	0		t_vio++;
3586	0		carry = 1;
3587			} else {
3588	0		ivec[i] = -psize;
3589			}
3590			}
3591			} /* End of counter-advance loop */
3592	0	0	} while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
3593
3594			{
3595	0		PDLA_Double *ac = acc;
3596	0		PDLA_Double *wg = wgt;
3597	0		PDLA_Double *wg2 = wgt2;
3598	0		PDLA_Indx *dat = out->data;
3599
3600			/* Calculate output vector offset */
3601	0	0	for(i=0;i
3602	0		dat += out->dimincs[i] * ovec[i];
3603
3604	0	0	if(!flux) {
3605			/* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
3606	0	0	for(i=0; i < out->dims[ndims]; i++) {
3607	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
3608	0		dat = (ac++) / *(wg++);
3609	0		wg2++;
3610			} else {
3611	0		*dat = badval;
3612	0		ac++; wg++; wg2++;
3613			}
3614	0		dat += out->dimincs[ndims];
3615			}
3616			} else {
3617			/* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
3618	0		PDLA_Double det = tmp[ndims*ndims];
3619	0	0	for(i=0; i < out->dims[ndims]; i++) {
3620	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
3621	0		dat = (ac++) / (wg++) det;
3622	0		wg2++;
3623			} else {
3624	0		*dat = badval;
3625	0		ac++; wg++; wg2++;
3626			}
3627	0		dat += out->dimincs[ndims];
3628			} /* end of for loop */
3629			} /* end of flux flag set conditional */
3630			} /* end of convenience block */
3631
3632			/* End of code for normal pixels */
3633			} else {
3634			/* The pixel was ludicrously huge -- just set this pixel to nan */
3635	0		PDLA_Indx *dat = out->data;
3636	0	0	for(i=0;i
3637	0		dat += out->dimincs[i] * ovec[i];
3638	0	0	for(i=0;idims[ndims];i++) {
3639	0		dat = badval; / Should handle bad values too -- not yet */
3640	0		dat += out->dimincs[ndims];
3641			}
3642			}
3643
3644			/* Increment the pixel counter */
3645			{
3646	0	0	for(i=0;
3647	0	0	(i
3648	0	0	(map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
3649	0		(++(ovec[i]) >= out->dims[i]); /* Actual carry test */
3650	0		i++) {
3651	0		ovec[i] = 0;
3652	0		map_ptr -= out->dims[i] * map->dimincs[i+1];
3653			}
3654			}
3655	0	0	} while(i
3656
3657
3658
3659			}
3660			PDLA_COMMENT("THREADLOOPEND")
3661			}
3662			}
3663	0		k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
3664	0	0	} while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_LL: {
3665	0	0	PDLA_LongLong * k0_datap = ((PDLA_LongLong *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
3666	0		PDLA_LongLong * k0_physdatap = ((PDLA_LongLong *)((__privtrans->pdls[0])->data));
3667
3668
3669			PDLA_COMMENT("THREADLOOPBEGIN")
3670	0	0	if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
3671	0		do { register PDLA_Indx __tind1=0,__tind2=0;
3672	0		register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
3673	0		register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
3674	0		register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
3675	0		register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
3676	0		register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
3677	0		register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
3678	0		k0_datap += __offsp[0];
3679	0	0	for( __tind2 = 0 ;
3680			__tind2 < __tdims1 ;
3681	0		__tind2++
3682	0		,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
3683			)
3684			{
3685	0	0	for( __tind1 = 0 ;
3686			__tind1 < __tdims0 ;
3687	0		__tind1++
3688	0		,k0_datap += __tinc0_0
3689			)
3690			{ PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
3691
3692			/*
3693			* Pixel interpolation & averaging code
3694			*
3695			* Calls a common coordinate-transformation block (see following hdr)
3696			* that isn't dependent on the type of the input variable.
3697			*
3698			* The inputs are SVs to avoid hassling with threadloops; threading
3699			* is handled internally. To simplify the threading business, any
3700			* thread dimensions should all be collapsed to a single one by the
3701			* perl front-end.
3702			*
3703			*/
3704
3705			short ndims; /* Number of dimensions we're working in */
3706			PDLA_Double tmp; / Workspace for prefrobnication */
3707			PDLA_Indx ovec; / output pixel loop vector */
3708			PDLA_Indx ivec; / input pixel loop vector */
3709			PDLA_Indx ibvec; / input pixel base offset vector */
3710			PDLA_Double dvec; / Residual vector for linearization */
3711			PDLA_Double tvec; / Temporary floating-point vector */
3712			PDLA_Double acc; / Threaded accumulator */
3713			PDLA_Double wgt; / Threaded weight accumulator */
3714			PDLA_Double wgt2; / Threaded weight accumulator for badval finding */
3715			char bounds; / Boundary condition packed string */
3716			PDLA_Indx index_stash; / Stash to store the opening index of dim sample scans */
3717			char method; /* Method identifier (gets one of 'h','g') */
3718			PDLA_Long big; /* Max size of input footprint for each pix */
3719			PDLA_Double blur; /* Scaling of filter */
3720			PDLA_Double sv_min; /* minimum singular value */
3721			char flux; /* Flag to indicate flux conservation */
3722			PDLA_Double *map_ptr;
3723			PDLA_Long i, j;
3724	0	0	PDLA_LongLong badval = SvNV(__privtrans->bv);
3725			#define HANNING_LOOKUP_SIZE 2500
3726			static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
3727			static int needs_hanning_calc = 1;
3728			PDLA_Double zeta;
3729			PDLA_Double hanning_offset;
3730
3731			#define GAUSSIAN_LOOKUP_SIZE 4000
3732			#define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
3733			static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
3734			static int needs_gaussian_calc = 1;
3735
3736	0		pdl *in = PDLA->SvPDLAV(__privtrans->in);
3737	0		pdl *out = PDLA->SvPDLAV(__privtrans->out);
3738	0		pdl *map = PDLA->SvPDLAV(__privtrans->map);
3739
3740	0		PDLA->make_physical(in);
3741	0		PDLA->make_physical(out);
3742	0		PDLA->make_physical(map);
3743
3744	0		ndims = map->ndims -1;
3745
3746			/*
3747			* Allocate all our dynamic workspaces at once
3748			* */
3749	0		ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
3750	0		( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3ndims) + sizeof(PDLA_Double) in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndimsndims + ndims + sizeof(char) ndims + sizeof(PDLA_Indx) * ndims )
3751			)
3752			);
3753	0		ivec = &(ovec[ndims]);
3754	0		ibvec = &(ivec[ndims]);
3755	0		dvec = (PDLA_Double *)(&(ibvec[ndims]));
3756	0		tvec = &(dvec[ndims]);
3757	0		acc = &(tvec[ndims]);
3758	0		wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
3759	0		bounds = (char )(&(tmp [3ndims*ndims+ndims]));
3760	0		index_stash = (PDLA_Indx *) &(bounds[ndims]);
3761
3762
3763			/***
3764			* Fill in the boundary condition array
3765			*/
3766			{
3767			char *bstr;
3768			STRLEN blen;
3769	0	0	bstr = SvPV(__privtrans->boundary,blen);
3770
3771	0	0	if(blen == 0) {
3772			/* If no boundary is specified then every dim gets truncated */
3773			int i;
3774	0	0	for (i=0;i
3775	0		bounds[i] = 1;
3776			} else {
3777			int i;
3778	0	0	for(i=0;i
3779	0	0	switch(bstr[i < blen ? i : blen-1 ]) {
3780			case '0': case 'f': case 'F': /* forbid */
3781	0		bounds[i] = 0;
3782	0		break;
3783			case '1': case 't': case 'T': /* truncate */
3784	0		bounds[i] = 1;
3785	0		break;
3786			case '2': case 'e': case 'E': /* extend */
3787	0		bounds[i] = 2;
3788	0		break;
3789			case '3': case 'p': case 'P': /* periodic */
3790	0		bounds[i] = 3;
3791	0		break;
3792			case '4': case 'm': case 'M': /* mirror */
3793	0		bounds[i] = 4;
3794	0		break;
3795			default:
3796			{
3797			char buf[BUFSIZ];
3798	0		sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
3799	0		barf("%s", buf);
3800			}
3801	0		break;
3802			}
3803			}
3804			}
3805			}
3806
3807			/***
3808			* Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
3809			*/
3810	0	0	big = labs((PDLA_Long) (SvNV(__privtrans->big)));
3811	0	0	if(big <= 0)
3812	0		barf("%s","map: 'big' parameter must be >0");
3813
3814	0	0	blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
3815	0	0	if(blur < 0)
3816	0		barf("%s","map: 'blur' parameter must be >= 0");
3817
3818	0	0	sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
3819	0	0	if(sv_min < 0)
3820	0		barf("%s","map: 'sv_min' parameter must be >= 0");
3821
3822	0	0	flux = (SvNV(__privtrans->flux) != 0);
3823
3824			{
3825			char *mstr;
3826			STRLEN mlen;
3827	0	0	mstr = SvPV(__privtrans->method,mlen);
3828
3829	0	0	if(mlen==0)
3830	0		method = 'h';
3831	0		else switch(*mstr) {
3832	0		case 'H': method='H'; break;
3833	0		case 'h': method = 'h';
3834	0	0	if( needs_hanning_calc ) {
3835			int i;
3836	0	0	for(i=0;i
3837	0		hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
3838			}
3839	0		hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
3840	0		hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
3841	0		needs_hanning_calc = 0;
3842			}
3843	0		zeta = HANNING_LOOKUP_SIZE / blur;
3844	0		hanning_offset = (blur >= 1) ?
3845	0	0	0 :
3846	0		0.5 * (1.0 - blur);
3847	0		break;
3848
3849	0		case 'g': case 'j': method = 'g';
3850	0		zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
3851
3852	0	0	if( needs_gaussian_calc ) {
3853			int i;
3854	0	0	for(i=0;i
3855	0		gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
3856			}
3857	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
3858	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
3859	0		needs_gaussian_calc = 0;
3860			}
3861	0		break;
3862
3863	0		case 'G': case 'J': method = 'G'; break;
3864			default:
3865			{
3866			char err[80];
3867	0		sprintf(err,"Bug in map: unknown method '%c'",*mstr);
3868	0		barf("%s", err);
3869			}
3870	0		break;
3871			}
3872			}
3873
3874
3875
3876			/* End of initialization */
3877			/*************************************************************/
3878			/* Start of Real Work */
3879
3880			/* Initialize coordinate vector and map offset
3881			*/
3882	0	0	for(i=0;i
3883	0		ovec[i] = 0;
3884
3885	0		map_ptr = (PDLA_Double *)(map->data);
3886
3887
3888			/* Main pixel loop (iterates over pixels in the output plane) */
3889			do {
3890			PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
3891			/* Prefrobnicate the transformation matrix */
3892	0		psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
3893
3894			#ifdef DEBUG_MAP
3895			{
3896			int k; PDLA_Indx foo = 0;
3897			printf("ovec: [");
3898			for(k=0;k
3899			foo += ovec[k] * map->dimincs[k+1];
3900			printf(" %2d ",(int)(ovec[k]));
3901			}
3902			printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
3903			for(k=0;k
3904			printf("%8.2f",(double)(((PDLA_LongLong )(map->data))[foo + kmap->dimincs[0]]));
3905			}
3906			printf("]\n");
3907			}
3908			#endif
3909
3910			/* Don't bother accumulating output if psize is too large */
3911	0	0	if(psize <= big) {
3912			/* Use the prefrobnicated matrix to generate a local linearization.
3913			* dvec gets the delta; ibvec gets the base.
3914			*/
3915			{
3916	0		PDLA_Double *mp = map_ptr;
3917	0	0	for (i=0;i
3918	0		dvec[i] = mp - ( ibvec[i] = (PDLA_Long)(mp + 0.5)); /* assignment */
3919	0		mp += map->dimincs[0];
3920			}
3921			}
3922
3923			/* Initialize input delta vector */
3924	0	0	for(i=0;i
3925	0		ivec[i] = -psize;
3926
3927			/* Initialize accumulators */
3928			{
3929	0		PDLA_Double *ac = acc;
3930	0	0	for(i=0; i < in->dims[ndims]; i++)
3931	0		*(ac++) = 0.0;
3932
3933			}
3934			{
3935	0		PDLA_Double *wg = wgt;
3936	0	0	for(i=0;i < in->dims[ndims]; i++)
3937	0		*(wg++) = 0.0;
3938			}
3939			{
3940	0		PDLA_Double *wg = wgt2;
3941	0	0	for(i=0;i < in->dims[ndims]; i++)
3942	0		*(wg++) = 0.0;
3943			}
3944
3945
3946			/*
3947			* Calculate the original offset into the data array, to enable
3948			* delta calculations in the pixel loop
3949			*
3950			* i runs over dims; j holds the working integer index in the
3951			* current dim.
3952			*
3953			* This code matches the incrementation code at the bottom of the accumulation loop
3954			*/
3955
3956	0		t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
3957	0		i_off = 0;
3958	0	0	for(i=0;i
3959	0		j = ivec[i] + ibvec[i];
3960	0	0	if(j<0 \|\| j >= in->dims[i]) {
		0
3961	0		switch(bounds[i]) {
3962			case 0: /* no breakage allowed */
3963	0		barf("%s","index out-of-bounds in map");
3964	0		break;
3965			case 1: /* truncation */
3966	0		t_vio++;
3967			/* fall through */
3968			case 2: /* extension -- crop */
3969	0	0	if(j<0)
3970	0		j=0;
3971	0		else j = in->dims[i] - 1;
3972	0		break;
3973			case 3: /* periodic -- mod it */
3974	0		j %= in->dims[i];
3975	0	0	if(j<0)
3976	0		j += in->dims[i];
3977	0		break;
3978			case 4: /* mirror -- reflect off the edges */
3979	0		j += in->dims[i];
3980	0		j %= (in->dims[i]*2);
3981	0	0	if(j<0)
3982	0		j += in->dims[i]*2;
3983	0		j -= in->dims[i];
3984	0	0	if(j<0) {
3985	0		j *= -1;
3986	0		j -= 1;
3987			}
3988	0		break;
3989			default:
3990	0		barf("%s", "Unknown boundary condition in map -- bug alert!");
3991	0		break;
3992			}
3993			}
3994	0		i_off += in->dimincs[i] * j;
3995			}
3996
3997			/* Initialize index stashes for later reference as we scan the footprint */
3998			/* It's a pain in the ass to deal with boundaries, and doubly so at the */
3999			/* end of a dimensional scan. So we stash the index location at the */
4000			/* start of each dimensional scan here. When we finish incrementing */
4001			/* through a particular dim, we pull its value back out of the stash. */
4002	0	0	for(i=0;i
4003	0		index_stash[i] = i_off;
4004			}
4005
4006			/* The input accumulation loop is the hotspot for the entire operation. */
4007			/* We loop over pixels in the region of interest (+/- psize in each dimension) */
4008			/* in the input array, use the linearized transform to bring each pixel center */
4009			/* forward to the output plane, and calculate a weighting based on the chosen */
4010			/* filter function. 'h' is a fast Hanning window rolloff using a lookup */
4011			/* table that is initialized the first time through the code. 'H' is the */
4012			/* same process, but explicitly calculated for each interation (~2x slower). */
4013			/* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
4014			/* into the input array fresh from the current input array vector each time, */
4015			/* we walk through the array using dimincs and the old offset. This saves */
4016			/* about half of the time spent on index calculation. */
4017
4018			do { /* Input accumulation loop */
4019			PDLA_Double *cp;
4020			PDLA_Double alpha;
4021			/* Calculate the weight of the current input point. Don't bother if we're
4022			* violating any truncation boundaries (in that case our value is zero, but
4023			* for the interpolation we also set the weight to zero).
4024			*/
4025	0	0	if( !t_vio ) {
4026
4027	0		PDLA_Double *ap = tvec;
4028	0		PDLA_Double *bp = dvec;
4029	0		PDLA_Indx *ip = ivec;
4030	0	0	for(i=0; i
4031	0		(ap++) = (ip++) - *(bp++);
4032
4033	0		switch(method) {
4034			PDLA_Double dd;
4035			case 'h':
4036			/* This is the Hanning window rolloff. It is a product of a simple */
4037			/* cos^2(theta) rolloff in each dimension. Using a lookup table */
4038			/* is about 2x faster than using cos(theta) directly in each */
4039			/* weighting calculation, so we do. Using 2500 entries and linear */
4040			/* interpolation is accurate to about 10^-7, and should preserve */
4041			/* the contents of cache pretty well. */
4042	0		alpha = 1;
4043	0		cp = tmp;
4044	0	0	for(i=0; i
4045			int lodex;
4046			int hidex;
4047			PDLA_Double beta;
4048	0		dd = 0;
4049	0		ap = tvec;
4050			/* Get the matrix-multiply element for this dimension */
4051	0	0	for(j=0;j
4052	0		dd += (ap++) *(cp++);
4053
4054			/* Do linear interpolation from the table */
4055			/* The table captures a hanning window centered 0.5 pixel from center. */
4056			/* We scale the filter by the blur parameter -- but if blur is less */
4057			/* than unity, we shrink the hanning blur window while keeping the 0.5 */
4058			/* value on the pixel edge at 0.5. For blur greater than unity, we */
4059			/* scale simply. */
4060	0		beta = fabs(dd) - hanning_offset;
4061	0	0	if(beta > 0) {
4062	0	0	if(beta >= blur) {
4063	0		alpha = 0;
4064	0		i = ndims;
4065			} else {
4066	0		beta *= zeta;
4067	0		lodex = beta;
4068	0	0	beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
4069	0		lodex = HANNING_LOOKUP_SIZE;
4070	0		hidex = lodex+1;
4071	0		alpha = hanning_lookup[hidex]beta + hanning_lookup[lodex]*(1-beta);
4072			} /* end of interpolation branch */
4073			} /* end of beta > 0 branch */
4074			} /* end of dimension loop */
4075	0		break;
4076
4077			case 'H':
4078			/* This is the Hanning window rolloff with explicit calculation, preserved */
4079			/* in case someone actually wants the slower longer method. */
4080	0		alpha = 1;
4081	0		cp = tmp;
4082	0	0	for(i=0; i
4083	0		dd = 0;
4084	0		ap = tvec;
4085	0	0	for(j=0;j
4086	0		dd += (ap++) *(cp++);
4087	0		dd = (fabs(dd) - hanning_offset) / blur;
4088	0	0	if( dd > 1 ) {
4089	0		alpha = 0;
4090	0		i = ndims;
4091			} else
4092	0		alpha = (0.5 + 0.5 cos( dd * 3.1415926536 ));
4093			}
4094	0		break;
4095
4096			case 'g':
4097			/* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
4098			{
4099	0		PDLA_Double sum = 0;
4100	0		cp = tmp;
4101	0	0	for(i=0; i
4102	0		dd = 0;
4103	0		ap = tvec;
4104	0	0	for(j=0;j
4105	0		dd += (ap++) *(cp++);
4106	0		dd /= blur;
4107	0		sum += dd * dd;
4108	0	0	if(sum > GAUSSIAN_MAXVAL) {
4109	0		i = ndims; /* exit early if we're too far out */
4110	0		alpha = 0;
4111			}
4112			}
4113	0	0	if( sum > GAUSSIAN_MAXVAL \|\| !isfinite(sum) \|\| isnan(sum) ) {
		0
		0
4114	0		alpha = 0;
4115			} else {
4116			int lodex,hidex;
4117	0		PDLA_Double beta = fabs(zeta * sum);
4118
4119	0		lodex = beta;
4120	0		beta -= lodex; hidex = lodex+1;
4121	0		alpha = gaussian_lookup[hidex]beta + gaussian_lookup[lodex](1 - beta);
4122
4123			}
4124			}
4125	0		break;
4126
4127			case 'G':
4128			/* This is the Gaussian rolloff with explicit calculation, preserved */
4129			/* in case someone actually wants the slower longer method. */
4130			{
4131	0		PDLA_Double sum = 0;
4132	0		cp = tmp;
4133	0	0	for(i=0; i
4134	0		dd = 0;
4135	0		ap = tvec;
4136	0	0	for(j=0;j
4137	0		dd += (ap++) *(cp++);
4138	0		dd /= blur;
4139	0		sum += dd * dd;
4140	0	0	if(sum > 4) /* 2 pixels -- four half-widths */
4141	0		i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
4142			}
4143
4144	0	0	if(sum > GAUSSIAN_MAXVAL)
4145	0		alpha = 0;
4146			else
4147	0		alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
4148			}
4149	0		break;
4150			default:
4151			{
4152			char buf[80];
4153	0		sprintf(buf,"This can't happen: method='%c'",method);
4154	0		barf("%s", buf);
4155			}
4156			}
4157
4158			{ /* convenience block -- accumulate the current point into the weighted sum. */
4159			/* This is more than simple assignment because we have our own explicit poor */
4160			/* man's threadloop here, so we accumulate each threaded element separately. */
4161	0		PDLA_LongLong dat = ((PDLA_LongLong )(in->data)) + i_off;
4162	0		PDLA_Indx max = out->dims[ndims];
4163	0	0	for( i=0; i < max; i++ ) {
4164	0	0	if( (badval==0) \|\| (*dat != badval) ) {
		0
4165	0		acc[i] += dat alpha;
4166	0		dat += in->dimincs[ndims];
4167	0		wgt[i] += alpha;
4168			}
4169	0		wgt2[i] += alpha; }
4170			}
4171			} /* end of t_vio check (i.e. of input accumulation) */
4172
4173
4174			/* Advance input accumulation loop. */
4175			/* We both increment the total vector and also advance the index. */
4176	0		carry = 1;
4177	0	0	for(i=0; i
		0
4178			/* Advance the current element of the offset vector */
4179	0		ivec[i]++;
4180	0		j = ivec[i] + ibvec[i];
4181
4182			/* Advance the offset into the data array */
4183	0	0	if( j > 0 && j <= in->dims[i]-1 ) {
		0
4184			/* Normal case -- just advance the input vector */
4185	0		i_off += in->dimincs[i];
4186			} else {
4187			/* Busted a boundary - either before or after. */
4188	0		switch(bounds[i]){
4189			case 0: /* no breakage allowed -- treat as truncation for interpolation */
4190			case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
4191	0	0	if( j == 0 )
4192	0		t_vio--;
4193	0	0	else if( j == in->dims[i] )
4194	0		t_vio++;
4195	0		break;
4196			case 2: /* extension -- do nothing (so the same input point is re-used) */
4197	0		break;
4198			case 3: /* periodic -- advance and mod into the allowed range */
4199	0	0	if((j % in->dims[i]) == 0) {
4200	0		i_off -= in->dimincs[i] * (in->dims[i]-1);
4201			} else {
4202	0		i_off += in->dimincs[i];
4203			}
4204	0		break;
4205			case 4: /* mirror -- advance or retreat depending on phase */
4206	0		j += in->dims[i];
4207	0		j %= (in->dims[i]*2);
4208	0		j -= in->dims[i];
4209	0	0	if( j!=0 && j!= -in->dims[i] ) {
		0
4210	0	0	if(j<0)
4211	0		i_off -= in->dimincs[i];
4212			else
4213	0		i_off += in->dimincs[i];
4214			}
4215	0		break;
4216			}
4217			}
4218
4219			/* Now check for carry */
4220	0	0	if(ivec[i] <= psize) {
4221			/* Normal case -- copy the current offset to the faster-running dim stashes */
4222			int k;
4223	0	0	for(k=0;k
4224	0		index_stash[k] = i_off;
4225			}
4226	0		carry = 0;
4227
4228			} else { /* End of this scan -- recover the last position, and mark carry */
4229	0		i_off = index_stash[i];
4230	0	0	if(bounds[i]==1) {
4231	0		j = ivec[i] + ibvec[i];
4232	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
4233	0		t_vio--;
4234	0		ivec[i] = -psize;
4235	0		j = ivec[i] + ibvec[i];
4236	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
4237	0		t_vio++;
4238	0		carry = 1;
4239			} else {
4240	0		ivec[i] = -psize;
4241			}
4242			}
4243			} /* End of counter-advance loop */
4244	0	0	} while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
4245
4246			{
4247	0		PDLA_Double *ac = acc;
4248	0		PDLA_Double *wg = wgt;
4249	0		PDLA_Double *wg2 = wgt2;
4250	0		PDLA_LongLong *dat = out->data;
4251
4252			/* Calculate output vector offset */
4253	0	0	for(i=0;i
4254	0		dat += out->dimincs[i] * ovec[i];
4255
4256	0	0	if(!flux) {
4257			/* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
4258	0	0	for(i=0; i < out->dims[ndims]; i++) {
4259	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
4260	0		dat = (ac++) / *(wg++);
4261	0		wg2++;
4262			} else {
4263	0		*dat = badval;
4264	0		ac++; wg++; wg2++;
4265			}
4266	0		dat += out->dimincs[ndims];
4267			}
4268			} else {
4269			/* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
4270	0		PDLA_Double det = tmp[ndims*ndims];
4271	0	0	for(i=0; i < out->dims[ndims]; i++) {
4272	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
4273	0		dat = (ac++) / (wg++) det;
4274	0		wg2++;
4275			} else {
4276	0		*dat = badval;
4277	0		ac++; wg++; wg2++;
4278			}
4279	0		dat += out->dimincs[ndims];
4280			} /* end of for loop */
4281			} /* end of flux flag set conditional */
4282			} /* end of convenience block */
4283
4284			/* End of code for normal pixels */
4285			} else {
4286			/* The pixel was ludicrously huge -- just set this pixel to nan */
4287	0		PDLA_LongLong *dat = out->data;
4288	0	0	for(i=0;i
4289	0		dat += out->dimincs[i] * ovec[i];
4290	0	0	for(i=0;idims[ndims];i++) {
4291	0		dat = badval; / Should handle bad values too -- not yet */
4292	0		dat += out->dimincs[ndims];
4293			}
4294			}
4295
4296			/* Increment the pixel counter */
4297			{
4298	0	0	for(i=0;
4299	0	0	(i
4300	0	0	(map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
4301	0		(++(ovec[i]) >= out->dims[i]); /* Actual carry test */
4302	0		i++) {
4303	0		ovec[i] = 0;
4304	0		map_ptr -= out->dims[i] * map->dimincs[i+1];
4305			}
4306			}
4307	0	0	} while(i
4308
4309
4310
4311			}
4312			PDLA_COMMENT("THREADLOOPEND")
4313			}
4314			}
4315	0		k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
4316	0	0	} while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_F: {
4317	0	0	PDLA_Float * k0_datap = ((PDLA_Float *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
4318	0		PDLA_Float * k0_physdatap = ((PDLA_Float *)((__privtrans->pdls[0])->data));
4319
4320
4321			PDLA_COMMENT("THREADLOOPBEGIN")
4322	0	0	if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
4323	0		do { register PDLA_Indx __tind1=0,__tind2=0;
4324	0		register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
4325	0		register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
4326	0		register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
4327	0		register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
4328	0		register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
4329	0		register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
4330	0		k0_datap += __offsp[0];
4331	0	0	for( __tind2 = 0 ;
4332			__tind2 < __tdims1 ;
4333	0		__tind2++
4334	0		,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
4335			)
4336			{
4337	0	0	for( __tind1 = 0 ;
4338			__tind1 < __tdims0 ;
4339	0		__tind1++
4340	0		,k0_datap += __tinc0_0
4341			)
4342			{ PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
4343
4344			/*
4345			* Pixel interpolation & averaging code
4346			*
4347			* Calls a common coordinate-transformation block (see following hdr)
4348			* that isn't dependent on the type of the input variable.
4349			*
4350			* The inputs are SVs to avoid hassling with threadloops; threading
4351			* is handled internally. To simplify the threading business, any
4352			* thread dimensions should all be collapsed to a single one by the
4353			* perl front-end.
4354			*
4355			*/
4356
4357			short ndims; /* Number of dimensions we're working in */
4358			PDLA_Double tmp; / Workspace for prefrobnication */
4359			PDLA_Indx ovec; / output pixel loop vector */
4360			PDLA_Indx ivec; / input pixel loop vector */
4361			PDLA_Indx ibvec; / input pixel base offset vector */
4362			PDLA_Double dvec; / Residual vector for linearization */
4363			PDLA_Double tvec; / Temporary floating-point vector */
4364			PDLA_Double acc; / Threaded accumulator */
4365			PDLA_Double wgt; / Threaded weight accumulator */
4366			PDLA_Double wgt2; / Threaded weight accumulator for badval finding */
4367			char bounds; / Boundary condition packed string */
4368			PDLA_Indx index_stash; / Stash to store the opening index of dim sample scans */
4369			char method; /* Method identifier (gets one of 'h','g') */
4370			PDLA_Long big; /* Max size of input footprint for each pix */
4371			PDLA_Double blur; /* Scaling of filter */
4372			PDLA_Double sv_min; /* minimum singular value */
4373			char flux; /* Flag to indicate flux conservation */
4374			PDLA_Double *map_ptr;
4375			PDLA_Long i, j;
4376	0	0	PDLA_Float badval = SvNV(__privtrans->bv);
4377			#define HANNING_LOOKUP_SIZE 2500
4378			static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
4379			static int needs_hanning_calc = 1;
4380			PDLA_Double zeta;
4381			PDLA_Double hanning_offset;
4382
4383			#define GAUSSIAN_LOOKUP_SIZE 4000
4384			#define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
4385			static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
4386			static int needs_gaussian_calc = 1;
4387
4388	0		pdl *in = PDLA->SvPDLAV(__privtrans->in);
4389	0		pdl *out = PDLA->SvPDLAV(__privtrans->out);
4390	0		pdl *map = PDLA->SvPDLAV(__privtrans->map);
4391
4392	0		PDLA->make_physical(in);
4393	0		PDLA->make_physical(out);
4394	0		PDLA->make_physical(map);
4395
4396	0		ndims = map->ndims -1;
4397
4398			/*
4399			* Allocate all our dynamic workspaces at once
4400			* */
4401	0		ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
4402	0		( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3ndims) + sizeof(PDLA_Double) in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndimsndims + ndims + sizeof(char) ndims + sizeof(PDLA_Indx) * ndims )
4403			)
4404			);
4405	0		ivec = &(ovec[ndims]);
4406	0		ibvec = &(ivec[ndims]);
4407	0		dvec = (PDLA_Double *)(&(ibvec[ndims]));
4408	0		tvec = &(dvec[ndims]);
4409	0		acc = &(tvec[ndims]);
4410	0		wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
4411	0		bounds = (char )(&(tmp [3ndims*ndims+ndims]));
4412	0		index_stash = (PDLA_Indx *) &(bounds[ndims]);
4413
4414
4415			/***
4416			* Fill in the boundary condition array
4417			*/
4418			{
4419			char *bstr;
4420			STRLEN blen;
4421	0	0	bstr = SvPV(__privtrans->boundary,blen);
4422
4423	0	0	if(blen == 0) {
4424			/* If no boundary is specified then every dim gets truncated */
4425			int i;
4426	0	0	for (i=0;i
4427	0		bounds[i] = 1;
4428			} else {
4429			int i;
4430	0	0	for(i=0;i
4431	0	0	switch(bstr[i < blen ? i : blen-1 ]) {
4432			case '0': case 'f': case 'F': /* forbid */
4433	0		bounds[i] = 0;
4434	0		break;
4435			case '1': case 't': case 'T': /* truncate */
4436	0		bounds[i] = 1;
4437	0		break;
4438			case '2': case 'e': case 'E': /* extend */
4439	0		bounds[i] = 2;
4440	0		break;
4441			case '3': case 'p': case 'P': /* periodic */
4442	0		bounds[i] = 3;
4443	0		break;
4444			case '4': case 'm': case 'M': /* mirror */
4445	0		bounds[i] = 4;
4446	0		break;
4447			default:
4448			{
4449			char buf[BUFSIZ];
4450	0		sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
4451	0		barf("%s", buf);
4452			}
4453	0		break;
4454			}
4455			}
4456			}
4457			}
4458
4459			/***
4460			* Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
4461			*/
4462	0	0	big = labs((PDLA_Long) (SvNV(__privtrans->big)));
4463	0	0	if(big <= 0)
4464	0		barf("%s","map: 'big' parameter must be >0");
4465
4466	0	0	blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
4467	0	0	if(blur < 0)
4468	0		barf("%s","map: 'blur' parameter must be >= 0");
4469
4470	0	0	sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
4471	0	0	if(sv_min < 0)
4472	0		barf("%s","map: 'sv_min' parameter must be >= 0");
4473
4474	0	0	flux = (SvNV(__privtrans->flux) != 0);
4475
4476			{
4477			char *mstr;
4478			STRLEN mlen;
4479	0	0	mstr = SvPV(__privtrans->method,mlen);
4480
4481	0	0	if(mlen==0)
4482	0		method = 'h';
4483	0		else switch(*mstr) {
4484	0		case 'H': method='H'; break;
4485	0		case 'h': method = 'h';
4486	0	0	if( needs_hanning_calc ) {
4487			int i;
4488	0	0	for(i=0;i
4489	0		hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
4490			}
4491	0		hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
4492	0		hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
4493	0		needs_hanning_calc = 0;
4494			}
4495	0		zeta = HANNING_LOOKUP_SIZE / blur;
4496	0		hanning_offset = (blur >= 1) ?
4497	0	0	0 :
4498	0		0.5 * (1.0 - blur);
4499	0		break;
4500
4501	0		case 'g': case 'j': method = 'g';
4502	0		zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
4503
4504	0	0	if( needs_gaussian_calc ) {
4505			int i;
4506	0	0	for(i=0;i
4507	0		gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
4508			}
4509	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
4510	0		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
4511	0		needs_gaussian_calc = 0;
4512			}
4513	0		break;
4514
4515	0		case 'G': case 'J': method = 'G'; break;
4516			default:
4517			{
4518			char err[80];
4519	0		sprintf(err,"Bug in map: unknown method '%c'",*mstr);
4520	0		barf("%s", err);
4521			}
4522	0		break;
4523			}
4524			}
4525
4526
4527
4528			/* End of initialization */
4529			/*************************************************************/
4530			/* Start of Real Work */
4531
4532			/* Initialize coordinate vector and map offset
4533			*/
4534	0	0	for(i=0;i
4535	0		ovec[i] = 0;
4536
4537	0		map_ptr = (PDLA_Double *)(map->data);
4538
4539
4540			/* Main pixel loop (iterates over pixels in the output plane) */
4541			do {
4542			PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
4543			/* Prefrobnicate the transformation matrix */
4544	0		psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
4545
4546			#ifdef DEBUG_MAP
4547			{
4548			int k; PDLA_Indx foo = 0;
4549			printf("ovec: [");
4550			for(k=0;k
4551			foo += ovec[k] * map->dimincs[k+1];
4552			printf(" %2d ",(int)(ovec[k]));
4553			}
4554			printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
4555			for(k=0;k
4556			printf("%8.2f",(double)(((PDLA_Float )(map->data))[foo + kmap->dimincs[0]]));
4557			}
4558			printf("]\n");
4559			}
4560			#endif
4561
4562			/* Don't bother accumulating output if psize is too large */
4563	0	0	if(psize <= big) {
4564			/* Use the prefrobnicated matrix to generate a local linearization.
4565			* dvec gets the delta; ibvec gets the base.
4566			*/
4567			{
4568	0		PDLA_Double *mp = map_ptr;
4569	0	0	for (i=0;i
4570	0		dvec[i] = mp - ( ibvec[i] = (PDLA_Long)(mp + 0.5)); /* assignment */
4571	0		mp += map->dimincs[0];
4572			}
4573			}
4574
4575			/* Initialize input delta vector */
4576	0	0	for(i=0;i
4577	0		ivec[i] = -psize;
4578
4579			/* Initialize accumulators */
4580			{
4581	0		PDLA_Double *ac = acc;
4582	0	0	for(i=0; i < in->dims[ndims]; i++)
4583	0		*(ac++) = 0.0;
4584
4585			}
4586			{
4587	0		PDLA_Double *wg = wgt;
4588	0	0	for(i=0;i < in->dims[ndims]; i++)
4589	0		*(wg++) = 0.0;
4590			}
4591			{
4592	0		PDLA_Double *wg = wgt2;
4593	0	0	for(i=0;i < in->dims[ndims]; i++)
4594	0		*(wg++) = 0.0;
4595			}
4596
4597
4598			/*
4599			* Calculate the original offset into the data array, to enable
4600			* delta calculations in the pixel loop
4601			*
4602			* i runs over dims; j holds the working integer index in the
4603			* current dim.
4604			*
4605			* This code matches the incrementation code at the bottom of the accumulation loop
4606			*/
4607
4608	0		t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
4609	0		i_off = 0;
4610	0	0	for(i=0;i
4611	0		j = ivec[i] + ibvec[i];
4612	0	0	if(j<0 \|\| j >= in->dims[i]) {
		0
4613	0		switch(bounds[i]) {
4614			case 0: /* no breakage allowed */
4615	0		barf("%s","index out-of-bounds in map");
4616	0		break;
4617			case 1: /* truncation */
4618	0		t_vio++;
4619			/* fall through */
4620			case 2: /* extension -- crop */
4621	0	0	if(j<0)
4622	0		j=0;
4623	0		else j = in->dims[i] - 1;
4624	0		break;
4625			case 3: /* periodic -- mod it */
4626	0		j %= in->dims[i];
4627	0	0	if(j<0)
4628	0		j += in->dims[i];
4629	0		break;
4630			case 4: /* mirror -- reflect off the edges */
4631	0		j += in->dims[i];
4632	0		j %= (in->dims[i]*2);
4633	0	0	if(j<0)
4634	0		j += in->dims[i]*2;
4635	0		j -= in->dims[i];
4636	0	0	if(j<0) {
4637	0		j *= -1;
4638	0		j -= 1;
4639			}
4640	0		break;
4641			default:
4642	0		barf("%s", "Unknown boundary condition in map -- bug alert!");
4643	0		break;
4644			}
4645			}
4646	0		i_off += in->dimincs[i] * j;
4647			}
4648
4649			/* Initialize index stashes for later reference as we scan the footprint */
4650			/* It's a pain in the ass to deal with boundaries, and doubly so at the */
4651			/* end of a dimensional scan. So we stash the index location at the */
4652			/* start of each dimensional scan here. When we finish incrementing */
4653			/* through a particular dim, we pull its value back out of the stash. */
4654	0	0	for(i=0;i
4655	0		index_stash[i] = i_off;
4656			}
4657
4658			/* The input accumulation loop is the hotspot for the entire operation. */
4659			/* We loop over pixels in the region of interest (+/- psize in each dimension) */
4660			/* in the input array, use the linearized transform to bring each pixel center */
4661			/* forward to the output plane, and calculate a weighting based on the chosen */
4662			/* filter function. 'h' is a fast Hanning window rolloff using a lookup */
4663			/* table that is initialized the first time through the code. 'H' is the */
4664			/* same process, but explicitly calculated for each interation (~2x slower). */
4665			/* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
4666			/* into the input array fresh from the current input array vector each time, */
4667			/* we walk through the array using dimincs and the old offset. This saves */
4668			/* about half of the time spent on index calculation. */
4669
4670			do { /* Input accumulation loop */
4671			PDLA_Double *cp;
4672			PDLA_Double alpha;
4673			/* Calculate the weight of the current input point. Don't bother if we're
4674			* violating any truncation boundaries (in that case our value is zero, but
4675			* for the interpolation we also set the weight to zero).
4676			*/
4677	0	0	if( !t_vio ) {
4678
4679	0		PDLA_Double *ap = tvec;
4680	0		PDLA_Double *bp = dvec;
4681	0		PDLA_Indx *ip = ivec;
4682	0	0	for(i=0; i
4683	0		(ap++) = (ip++) - *(bp++);
4684
4685	0		switch(method) {
4686			PDLA_Double dd;
4687			case 'h':
4688			/* This is the Hanning window rolloff. It is a product of a simple */
4689			/* cos^2(theta) rolloff in each dimension. Using a lookup table */
4690			/* is about 2x faster than using cos(theta) directly in each */
4691			/* weighting calculation, so we do. Using 2500 entries and linear */
4692			/* interpolation is accurate to about 10^-7, and should preserve */
4693			/* the contents of cache pretty well. */
4694	0		alpha = 1;
4695	0		cp = tmp;
4696	0	0	for(i=0; i
4697			int lodex;
4698			int hidex;
4699			PDLA_Double beta;
4700	0		dd = 0;
4701	0		ap = tvec;
4702			/* Get the matrix-multiply element for this dimension */
4703	0	0	for(j=0;j
4704	0		dd += (ap++) *(cp++);
4705
4706			/* Do linear interpolation from the table */
4707			/* The table captures a hanning window centered 0.5 pixel from center. */
4708			/* We scale the filter by the blur parameter -- but if blur is less */
4709			/* than unity, we shrink the hanning blur window while keeping the 0.5 */
4710			/* value on the pixel edge at 0.5. For blur greater than unity, we */
4711			/* scale simply. */
4712	0		beta = fabs(dd) - hanning_offset;
4713	0	0	if(beta > 0) {
4714	0	0	if(beta >= blur) {
4715	0		alpha = 0;
4716	0		i = ndims;
4717			} else {
4718	0		beta *= zeta;
4719	0		lodex = beta;
4720	0	0	beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
4721	0		lodex = HANNING_LOOKUP_SIZE;
4722	0		hidex = lodex+1;
4723	0		alpha = hanning_lookup[hidex]beta + hanning_lookup[lodex]*(1-beta);
4724			} /* end of interpolation branch */
4725			} /* end of beta > 0 branch */
4726			} /* end of dimension loop */
4727	0		break;
4728
4729			case 'H':
4730			/* This is the Hanning window rolloff with explicit calculation, preserved */
4731			/* in case someone actually wants the slower longer method. */
4732	0		alpha = 1;
4733	0		cp = tmp;
4734	0	0	for(i=0; i
4735	0		dd = 0;
4736	0		ap = tvec;
4737	0	0	for(j=0;j
4738	0		dd += (ap++) *(cp++);
4739	0		dd = (fabs(dd) - hanning_offset) / blur;
4740	0	0	if( dd > 1 ) {
4741	0		alpha = 0;
4742	0		i = ndims;
4743			} else
4744	0		alpha = (0.5 + 0.5 cos( dd * 3.1415926536 ));
4745			}
4746	0		break;
4747
4748			case 'g':
4749			/* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
4750			{
4751	0		PDLA_Double sum = 0;
4752	0		cp = tmp;
4753	0	0	for(i=0; i
4754	0		dd = 0;
4755	0		ap = tvec;
4756	0	0	for(j=0;j
4757	0		dd += (ap++) *(cp++);
4758	0		dd /= blur;
4759	0		sum += dd * dd;
4760	0	0	if(sum > GAUSSIAN_MAXVAL) {
4761	0		i = ndims; /* exit early if we're too far out */
4762	0		alpha = 0;
4763			}
4764			}
4765	0	0	if( sum > GAUSSIAN_MAXVAL \|\| !isfinite(sum) \|\| isnan(sum) ) {
		0
		0
4766	0		alpha = 0;
4767			} else {
4768			int lodex,hidex;
4769	0		PDLA_Double beta = fabs(zeta * sum);
4770
4771	0		lodex = beta;
4772	0		beta -= lodex; hidex = lodex+1;
4773	0		alpha = gaussian_lookup[hidex]beta + gaussian_lookup[lodex](1 - beta);
4774
4775			}
4776			}
4777	0		break;
4778
4779			case 'G':
4780			/* This is the Gaussian rolloff with explicit calculation, preserved */
4781			/* in case someone actually wants the slower longer method. */
4782			{
4783	0		PDLA_Double sum = 0;
4784	0		cp = tmp;
4785	0	0	for(i=0; i
4786	0		dd = 0;
4787	0		ap = tvec;
4788	0	0	for(j=0;j
4789	0		dd += (ap++) *(cp++);
4790	0		dd /= blur;
4791	0		sum += dd * dd;
4792	0	0	if(sum > 4) /* 2 pixels -- four half-widths */
4793	0		i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
4794			}
4795
4796	0	0	if(sum > GAUSSIAN_MAXVAL)
4797	0		alpha = 0;
4798			else
4799	0		alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
4800			}
4801	0		break;
4802			default:
4803			{
4804			char buf[80];
4805	0		sprintf(buf,"This can't happen: method='%c'",method);
4806	0		barf("%s", buf);
4807			}
4808			}
4809
4810			{ /* convenience block -- accumulate the current point into the weighted sum. */
4811			/* This is more than simple assignment because we have our own explicit poor */
4812			/* man's threadloop here, so we accumulate each threaded element separately. */
4813	0		PDLA_Float dat = ((PDLA_Float )(in->data)) + i_off;
4814	0		PDLA_Indx max = out->dims[ndims];
4815	0	0	for( i=0; i < max; i++ ) {
4816	0	0	if( (badval==0) \|\| (*dat != badval) ) {
		0
4817	0		acc[i] += dat alpha;
4818	0		dat += in->dimincs[ndims];
4819	0		wgt[i] += alpha;
4820			}
4821	0		wgt2[i] += alpha; }
4822			}
4823			} /* end of t_vio check (i.e. of input accumulation) */
4824
4825
4826			/* Advance input accumulation loop. */
4827			/* We both increment the total vector and also advance the index. */
4828	0		carry = 1;
4829	0	0	for(i=0; i
		0
4830			/* Advance the current element of the offset vector */
4831	0		ivec[i]++;
4832	0		j = ivec[i] + ibvec[i];
4833
4834			/* Advance the offset into the data array */
4835	0	0	if( j > 0 && j <= in->dims[i]-1 ) {
		0
4836			/* Normal case -- just advance the input vector */
4837	0		i_off += in->dimincs[i];
4838			} else {
4839			/* Busted a boundary - either before or after. */
4840	0		switch(bounds[i]){
4841			case 0: /* no breakage allowed -- treat as truncation for interpolation */
4842			case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
4843	0	0	if( j == 0 )
4844	0		t_vio--;
4845	0	0	else if( j == in->dims[i] )
4846	0		t_vio++;
4847	0		break;
4848			case 2: /* extension -- do nothing (so the same input point is re-used) */
4849	0		break;
4850			case 3: /* periodic -- advance and mod into the allowed range */
4851	0	0	if((j % in->dims[i]) == 0) {
4852	0		i_off -= in->dimincs[i] * (in->dims[i]-1);
4853			} else {
4854	0		i_off += in->dimincs[i];
4855			}
4856	0		break;
4857			case 4: /* mirror -- advance or retreat depending on phase */
4858	0		j += in->dims[i];
4859	0		j %= (in->dims[i]*2);
4860	0		j -= in->dims[i];
4861	0	0	if( j!=0 && j!= -in->dims[i] ) {
		0
4862	0	0	if(j<0)
4863	0		i_off -= in->dimincs[i];
4864			else
4865	0		i_off += in->dimincs[i];
4866			}
4867	0		break;
4868			}
4869			}
4870
4871			/* Now check for carry */
4872	0	0	if(ivec[i] <= psize) {
4873			/* Normal case -- copy the current offset to the faster-running dim stashes */
4874			int k;
4875	0	0	for(k=0;k
4876	0		index_stash[k] = i_off;
4877			}
4878	0		carry = 0;
4879
4880			} else { /* End of this scan -- recover the last position, and mark carry */
4881	0		i_off = index_stash[i];
4882	0	0	if(bounds[i]==1) {
4883	0		j = ivec[i] + ibvec[i];
4884	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
4885	0		t_vio--;
4886	0		ivec[i] = -psize;
4887	0		j = ivec[i] + ibvec[i];
4888	0	0	if( j < 0 \|\| j >= in->dims[i] )
		0
4889	0		t_vio++;
4890	0		carry = 1;
4891			} else {
4892	0		ivec[i] = -psize;
4893			}
4894			}
4895			} /* End of counter-advance loop */
4896	0	0	} while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
4897
4898			{
4899	0		PDLA_Double *ac = acc;
4900	0		PDLA_Double *wg = wgt;
4901	0		PDLA_Double *wg2 = wgt2;
4902	0		PDLA_Float *dat = out->data;
4903
4904			/* Calculate output vector offset */
4905	0	0	for(i=0;i
4906	0		dat += out->dimincs[i] * ovec[i];
4907
4908	0	0	if(!flux) {
4909			/* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
4910	0	0	for(i=0; i < out->dims[ndims]; i++) {
4911	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
4912	0		dat = (ac++) / *(wg++);
4913	0		wg2++;
4914			} else {
4915	0		*dat = badval;
4916	0		ac++; wg++; wg2++;
4917			}
4918	0		dat += out->dimincs[ndims];
4919			}
4920			} else {
4921			/* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
4922	0		PDLA_Double det = tmp[ndims*ndims];
4923	0	0	for(i=0; i < out->dims[ndims]; i++) {
4924	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
4925	0		dat = (ac++) / (wg++) det;
4926	0		wg2++;
4927			} else {
4928	0		*dat = badval;
4929	0		ac++; wg++; wg2++;
4930			}
4931	0		dat += out->dimincs[ndims];
4932			} /* end of for loop */
4933			} /* end of flux flag set conditional */
4934			} /* end of convenience block */
4935
4936			/* End of code for normal pixels */
4937			} else {
4938			/* The pixel was ludicrously huge -- just set this pixel to nan */
4939	0		PDLA_Float *dat = out->data;
4940	0	0	for(i=0;i
4941	0		dat += out->dimincs[i] * ovec[i];
4942	0	0	for(i=0;idims[ndims];i++) {
4943	0		dat = badval; / Should handle bad values too -- not yet */
4944	0		dat += out->dimincs[ndims];
4945			}
4946			}
4947
4948			/* Increment the pixel counter */
4949			{
4950	0	0	for(i=0;
4951	0	0	(i
4952	0	0	(map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
4953	0		(++(ovec[i]) >= out->dims[i]); /* Actual carry test */
4954	0		i++) {
4955	0		ovec[i] = 0;
4956	0		map_ptr -= out->dims[i] * map->dimincs[i+1];
4957			}
4958			}
4959	0	0	} while(i
4960
4961
4962
4963			}
4964			PDLA_COMMENT("THREADLOOPEND")
4965			}
4966			}
4967	0		k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
4968	0	0	} while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_D: {
4969	7	50	PDLA_Double * k0_datap = ((PDLA_Double *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
		0
4970	7		PDLA_Double * k0_physdatap = ((PDLA_Double *)((__privtrans->pdls[0])->data));
4971
4972
4973			PDLA_COMMENT("THREADLOOPBEGIN")
4974	7	50	if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
4975	7		do { register PDLA_Indx __tind1=0,__tind2=0;
4976	7		register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
4977	7		register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
4978	7		register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
4979	7		register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
4980	7		register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
4981	7		register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
4982	7		k0_datap += __offsp[0];
4983	14	100	for( __tind2 = 0 ;
4984			__tind2 < __tdims1 ;
4985	7		__tind2++
4986	7		,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
4987			)
4988			{
4989	14	100	for( __tind1 = 0 ;
4990			__tind1 < __tdims0 ;
4991	7		__tind1++
4992	7		,k0_datap += __tinc0_0
4993			)
4994			{ PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
4995
4996			/*
4997			* Pixel interpolation & averaging code
4998			*
4999			* Calls a common coordinate-transformation block (see following hdr)
5000			* that isn't dependent on the type of the input variable.
5001			*
5002			* The inputs are SVs to avoid hassling with threadloops; threading
5003			* is handled internally. To simplify the threading business, any
5004			* thread dimensions should all be collapsed to a single one by the
5005			* perl front-end.
5006			*
5007			*/
5008
5009			short ndims; /* Number of dimensions we're working in */
5010			PDLA_Double tmp; / Workspace for prefrobnication */
5011			PDLA_Indx ovec; / output pixel loop vector */
5012			PDLA_Indx ivec; / input pixel loop vector */
5013			PDLA_Indx ibvec; / input pixel base offset vector */
5014			PDLA_Double dvec; / Residual vector for linearization */
5015			PDLA_Double tvec; / Temporary floating-point vector */
5016			PDLA_Double acc; / Threaded accumulator */
5017			PDLA_Double wgt; / Threaded weight accumulator */
5018			PDLA_Double wgt2; / Threaded weight accumulator for badval finding */
5019			char bounds; / Boundary condition packed string */
5020			PDLA_Indx index_stash; / Stash to store the opening index of dim sample scans */
5021			char method; /* Method identifier (gets one of 'h','g') */
5022			PDLA_Long big; /* Max size of input footprint for each pix */
5023			PDLA_Double blur; /* Scaling of filter */
5024			PDLA_Double sv_min; /* minimum singular value */
5025			char flux; /* Flag to indicate flux conservation */
5026			PDLA_Double *map_ptr;
5027			PDLA_Long i, j;
5028	7	100	PDLA_Double badval = SvNV(__privtrans->bv);
5029			#define HANNING_LOOKUP_SIZE 2500
5030			static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
5031			static int needs_hanning_calc = 1;
5032			PDLA_Double zeta;
5033			PDLA_Double hanning_offset;
5034
5035			#define GAUSSIAN_LOOKUP_SIZE 4000
5036			#define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
5037			static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
5038			static int needs_gaussian_calc = 1;
5039
5040	7		pdl *in = PDLA->SvPDLAV(__privtrans->in);
5041	7		pdl *out = PDLA->SvPDLAV(__privtrans->out);
5042	7		pdl *map = PDLA->SvPDLAV(__privtrans->map);
5043
5044	7		PDLA->make_physical(in);
5045	7		PDLA->make_physical(out);
5046	7		PDLA->make_physical(map);
5047
5048	7		ndims = map->ndims -1;
5049
5050			/*
5051			* Allocate all our dynamic workspaces at once
5052			* */
5053	7		ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
5054	7		( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3ndims) + sizeof(PDLA_Double) in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndimsndims + ndims + sizeof(char) ndims + sizeof(PDLA_Indx) * ndims )
5055			)
5056			);
5057	7		ivec = &(ovec[ndims]);
5058	7		ibvec = &(ivec[ndims]);
5059	7		dvec = (PDLA_Double *)(&(ibvec[ndims]));
5060	7		tvec = &(dvec[ndims]);
5061	7		acc = &(tvec[ndims]);
5062	7		wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
5063	7		bounds = (char )(&(tmp [3ndims*ndims+ndims]));
5064	7		index_stash = (PDLA_Indx *) &(bounds[ndims]);
5065
5066
5067			/***
5068			* Fill in the boundary condition array
5069			*/
5070			{
5071			char *bstr;
5072			STRLEN blen;
5073	7	50	bstr = SvPV(__privtrans->boundary,blen);
5074
5075	7	50	if(blen == 0) {
5076			/* If no boundary is specified then every dim gets truncated */
5077			int i;
5078	0	0	for (i=0;i
5079	0		bounds[i] = 1;
5080			} else {
5081			int i;
5082	21	100	for(i=0;i
5083	14	100	switch(bstr[i < blen ? i : blen-1 ]) {
5084			case '0': case 'f': case 'F': /* forbid */
5085	0		bounds[i] = 0;
5086	0		break;
5087			case '1': case 't': case 'T': /* truncate */
5088	12		bounds[i] = 1;
5089	12		break;
5090			case '2': case 'e': case 'E': /* extend */
5091	0		bounds[i] = 2;
5092	0		break;
5093			case '3': case 'p': case 'P': /* periodic */
5094	1		bounds[i] = 3;
5095	1		break;
5096			case '4': case 'm': case 'M': /* mirror */
5097	1		bounds[i] = 4;
5098	1		break;
5099			default:
5100			{
5101			char buf[BUFSIZ];
5102	0		sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
5103	0		barf("%s", buf);
5104			}
5105	0		break;
5106			}
5107			}
5108			}
5109			}
5110
5111			/***
5112			* Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
5113			*/
5114	7	50	big = labs((PDLA_Long) (SvNV(__privtrans->big)));
5115	7	50	if(big <= 0)
5116	0		barf("%s","map: 'big' parameter must be >0");
5117
5118	7	100	blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
5119	7	50	if(blur < 0)
5120	0		barf("%s","map: 'blur' parameter must be >= 0");
5121
5122	7	50	sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
5123	7	50	if(sv_min < 0)
5124	0		barf("%s","map: 'sv_min' parameter must be >= 0");
5125
5126	7	50	flux = (SvNV(__privtrans->flux) != 0);
5127
5128			{
5129			char *mstr;
5130			STRLEN mlen;
5131	7	50	mstr = SvPV(__privtrans->method,mlen);
5132
5133	7	50	if(mlen==0)
5134	0		method = 'h';
5135	7		else switch(*mstr) {
5136	0		case 'H': method='H'; break;
5137	6		case 'h': method = 'h';
5138	6	100	if( needs_hanning_calc ) {
5139			int i;
5140	2501	100	for(i=0;i
5141	2500		hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
5142			}
5143	1		hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
5144	1		hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
5145	1		needs_hanning_calc = 0;
5146			}
5147	6		zeta = HANNING_LOOKUP_SIZE / blur;
5148	6		hanning_offset = (blur >= 1) ?
5149	6	50	0 :
5150	0		0.5 * (1.0 - blur);
5151	6		break;
5152
5153	1		case 'g': case 'j': method = 'g';
5154	1		zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
5155
5156	1	50	if( needs_gaussian_calc ) {
5157			int i;
5158	4001	100	for(i=0;i
5159	4000		gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
5160			}
5161	1		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
5162	1		gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
5163	1		needs_gaussian_calc = 0;
5164			}
5165	1		break;
5166
5167	0		case 'G': case 'J': method = 'G'; break;
5168			default:
5169			{
5170			char err[80];
5171	0		sprintf(err,"Bug in map: unknown method '%c'",*mstr);
5172	0		barf("%s", err);
5173			}
5174	0		break;
5175			}
5176			}
5177
5178
5179
5180			/* End of initialization */
5181			/*************************************************************/
5182			/* Start of Real Work */
5183
5184			/* Initialize coordinate vector and map offset
5185			*/
5186	21	100	for(i=0;i
5187	14		ovec[i] = 0;
5188
5189	7		map_ptr = (PDLA_Double *)(map->data);
5190
5191
5192			/* Main pixel loop (iterates over pixels in the output plane) */
5193			do {
5194			PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
5195			/* Prefrobnicate the transformation matrix */
5196	421		psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
5197
5198			#ifdef DEBUG_MAP
5199			{
5200			int k; PDLA_Indx foo = 0;
5201			printf("ovec: [");
5202			for(k=0;k
5203			foo += ovec[k] * map->dimincs[k+1];
5204			printf(" %2d ",(int)(ovec[k]));
5205			}
5206			printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
5207			for(k=0;k
5208			printf("%8.2f",(double)(((PDLA_Double )(map->data))[foo + kmap->dimincs[0]]));
5209			}
5210			printf("]\n");
5211			}
5212			#endif
5213
5214			/* Don't bother accumulating output if psize is too large */
5215	421	50	if(psize <= big) {
5216			/* Use the prefrobnicated matrix to generate a local linearization.
5217			* dvec gets the delta; ibvec gets the base.
5218			*/
5219			{
5220	421		PDLA_Double *mp = map_ptr;
5221	1263	100	for (i=0;i
5222	842		dvec[i] = mp - ( ibvec[i] = (PDLA_Long)(mp + 0.5)); /* assignment */
5223	842		mp += map->dimincs[0];
5224			}
5225			}
5226
5227			/* Initialize input delta vector */
5228	1263	100	for(i=0;i
5229	842		ivec[i] = -psize;
5230
5231			/* Initialize accumulators */
5232			{
5233	421		PDLA_Double *ac = acc;
5234	842	100	for(i=0; i < in->dims[ndims]; i++)
5235	421		*(ac++) = 0.0;
5236
5237			}
5238			{
5239	421		PDLA_Double *wg = wgt;
5240	842	100	for(i=0;i < in->dims[ndims]; i++)
5241	421		*(wg++) = 0.0;
5242			}
5243			{
5244	421		PDLA_Double *wg = wgt2;
5245	842	100	for(i=0;i < in->dims[ndims]; i++)
5246	421		*(wg++) = 0.0;
5247			}
5248
5249
5250			/*
5251			* Calculate the original offset into the data array, to enable
5252			* delta calculations in the pixel loop
5253			*
5254			* i runs over dims; j holds the working integer index in the
5255			* current dim.
5256			*
5257			* This code matches the incrementation code at the bottom of the accumulation loop
5258			*/
5259
5260	421		t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
5261	421		i_off = 0;
5262	1263	100	for(i=0;i
5263	842		j = ivec[i] + ibvec[i];
5264	842	100	if(j<0 \|\| j >= in->dims[i]) {
		100
5265	368		switch(bounds[i]) {
5266			case 0: /* no breakage allowed */
5267	0		barf("%s","index out-of-bounds in map");
5268	0		break;
5269			case 1: /* truncation */
5270	268		t_vio++;
5271			/* fall through */
5272			case 2: /* extension -- crop */
5273	268	100	if(j<0)
5274	208		j=0;
5275	60		else j = in->dims[i] - 1;
5276	268		break;
5277			case 3: /* periodic -- mod it */
5278	50		j %= in->dims[i];
5279	50	100	if(j<0)
5280	20		j += in->dims[i];
5281	50		break;
5282			case 4: /* mirror -- reflect off the edges */
5283	50		j += in->dims[i];
5284	50		j %= (in->dims[i]*2);
5285	50	50	if(j<0)
5286	0		j += in->dims[i]*2;
5287	50		j -= in->dims[i];
5288	50	50	if(j<0) {
5289	50		j *= -1;
5290	50		j -= 1;
5291			}
5292	50		break;
5293			default:
5294	0		barf("%s", "Unknown boundary condition in map -- bug alert!");
5295	0		break;
5296			}
5297			}
5298	842		i_off += in->dimincs[i] * j;
5299			}
5300
5301			/* Initialize index stashes for later reference as we scan the footprint */
5302			/* It's a pain in the ass to deal with boundaries, and doubly so at the */
5303			/* end of a dimensional scan. So we stash the index location at the */
5304			/* start of each dimensional scan here. When we finish incrementing */
5305			/* through a particular dim, we pull its value back out of the stash. */
5306	1263	100	for(i=0;i
5307	842		index_stash[i] = i_off;
5308			}
5309
5310			/* The input accumulation loop is the hotspot for the entire operation. */
5311			/* We loop over pixels in the region of interest (+/- psize in each dimension) */
5312			/* in the input array, use the linearized transform to bring each pixel center */
5313			/* forward to the output plane, and calculate a weighting based on the chosen */
5314			/* filter function. 'h' is a fast Hanning window rolloff using a lookup */
5315			/* table that is initialized the first time through the code. 'H' is the */
5316			/* same process, but explicitly calculated for each interation (~2x slower). */
5317			/* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
5318			/* into the input array fresh from the current input array vector each time, */
5319			/* we walk through the array using dimincs and the old offset. This saves */
5320			/* about half of the time spent on index calculation. */
5321
5322			do { /* Input accumulation loop */
5323			PDLA_Double *cp;
5324			PDLA_Double alpha;
5325			/* Calculate the weight of the current input point. Don't bother if we're
5326			* violating any truncation boundaries (in that case our value is zero, but
5327			* for the interpolation we also set the weight to zero).
5328			*/
5329	11701	100	if( !t_vio ) {
5330
5331	7182		PDLA_Double *ap = tvec;
5332	7182		PDLA_Double *bp = dvec;
5333	7182		PDLA_Indx *ip = ivec;
5334	21546	100	for(i=0; i
5335	14364		(ap++) = (ip++) - *(bp++);
5336
5337	7182		switch(method) {
5338			PDLA_Double dd;
5339			case 'h':
5340			/* This is the Hanning window rolloff. It is a product of a simple */
5341			/* cos^2(theta) rolloff in each dimension. Using a lookup table */
5342			/* is about 2x faster than using cos(theta) directly in each */
5343			/* weighting calculation, so we do. Using 2500 entries and linear */
5344			/* interpolation is accurate to about 10^-7, and should preserve */
5345			/* the contents of cache pretty well. */
5346	6341		alpha = 1;
5347	6341		cp = tmp;
5348	14694	100	for(i=0; i
5349			int lodex;
5350			int hidex;
5351			PDLA_Double beta;
5352	8353		dd = 0;
5353	8353		ap = tvec;
5354			/* Get the matrix-multiply element for this dimension */
5355	25059	100	for(j=0;j
5356	16706		dd += (ap++) *(cp++);
5357
5358			/* Do linear interpolation from the table */
5359			/* The table captures a hanning window centered 0.5 pixel from center. */
5360			/* We scale the filter by the blur parameter -- but if blur is less */
5361			/* than unity, we shrink the hanning blur window while keeping the 0.5 */
5362			/* value on the pixel edge at 0.5. For blur greater than unity, we */
5363			/* scale simply. */
5364	8353		beta = fabs(dd) - hanning_offset;
5365	8353	100	if(beta > 0) {
5366	6875	100	if(beta >= blur) {
5367	5707		alpha = 0;
5368	5707		i = ndims;
5369			} else {
5370	1168		beta *= zeta;
5371	1168		lodex = beta;
5372	1168	50	beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
5373	0		lodex = HANNING_LOOKUP_SIZE;
5374	1168		hidex = lodex+1;
5375	1168		alpha = hanning_lookup[hidex]beta + hanning_lookup[lodex]*(1-beta);
5376			} /* end of interpolation branch */
5377			} /* end of beta > 0 branch */
5378			} /* end of dimension loop */
5379	6341		break;
5380
5381			case 'H':
5382			/* This is the Hanning window rolloff with explicit calculation, preserved */
5383			/* in case someone actually wants the slower longer method. */
5384	0		alpha = 1;
5385	0		cp = tmp;
5386	0	0	for(i=0; i
5387	0		dd = 0;
5388	0		ap = tvec;
5389	0	0	for(j=0;j
5390	0		dd += (ap++) *(cp++);
5391	0		dd = (fabs(dd) - hanning_offset) / blur;
5392	0	0	if( dd > 1 ) {
5393	0		alpha = 0;
5394	0		i = ndims;
5395			} else
5396	0		alpha = (0.5 + 0.5 cos( dd * 3.1415926536 ));
5397			}
5398	0		break;
5399
5400			case 'g':
5401			/* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
5402			{
5403	841		PDLA_Double sum = 0;
5404	841		cp = tmp;
5405	2523	100	for(i=0; i
5406	1682		dd = 0;
5407	1682		ap = tvec;
5408	5046	100	for(j=0;j
5409	3364		dd += (ap++) *(cp++);
5410	1682		dd /= blur;
5411	1682		sum += dd * dd;
5412	1682	100	if(sum > GAUSSIAN_MAXVAL) {
5413	100		i = ndims; /* exit early if we're too far out */
5414	100		alpha = 0;
5415			}
5416			}
5417	841	100	if( sum > GAUSSIAN_MAXVAL \|\| !isfinite(sum) \|\| isnan(sum) ) {
		50
		50
5418	100		alpha = 0;
5419			} else {
5420			int lodex,hidex;
5421	741		PDLA_Double beta = fabs(zeta * sum);
5422
5423	741		lodex = beta;
5424	741		beta -= lodex; hidex = lodex+1;
5425	741		alpha = gaussian_lookup[hidex]beta + gaussian_lookup[lodex](1 - beta);
5426
5427			}
5428			}
5429	841		break;
5430
5431			case 'G':
5432			/* This is the Gaussian rolloff with explicit calculation, preserved */
5433			/* in case someone actually wants the slower longer method. */
5434			{
5435	0		PDLA_Double sum = 0;
5436	0		cp = tmp;
5437	0	0	for(i=0; i
5438	0		dd = 0;
5439	0		ap = tvec;
5440	0	0	for(j=0;j
5441	0		dd += (ap++) *(cp++);
5442	0		dd /= blur;
5443	0		sum += dd * dd;
5444	0	0	if(sum > 4) /* 2 pixels -- four half-widths */
5445	0		i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
5446			}
5447
5448	0	0	if(sum > GAUSSIAN_MAXVAL)
5449	0		alpha = 0;
5450			else
5451	0		alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
5452			}
5453	0		break;
5454			default:
5455			{
5456			char buf[80];
5457	0		sprintf(buf,"This can't happen: method='%c'",method);
5458	0		barf("%s", buf);
5459			}
5460			}
5461
5462			{ /* convenience block -- accumulate the current point into the weighted sum. */
5463			/* This is more than simple assignment because we have our own explicit poor */
5464			/* man's threadloop here, so we accumulate each threaded element separately. */
5465	7182		PDLA_Double dat = ((PDLA_Double )(in->data)) + i_off;
5466	7182		PDLA_Indx max = out->dims[ndims];
5467	14364	100	for( i=0; i < max; i++ ) {
5468	7182	100	if( (badval==0) \|\| (*dat != badval) ) {
		50
5469	7182		acc[i] += dat alpha;
5470	7182		dat += in->dimincs[ndims];
5471	7182		wgt[i] += alpha;
5472			}
5473	7182		wgt2[i] += alpha; }
5474			}
5475			} /* end of t_vio check (i.e. of input accumulation) */
5476
5477
5478			/* Advance input accumulation loop. */
5479			/* We both increment the total vector and also advance the index. */
5480	11701		carry = 1;
5481	25605	100	for(i=0; i
		100
5482			/* Advance the current element of the offset vector */
5483	13904		ivec[i]++;
5484	13904		j = ivec[i] + ibvec[i];
5485
5486			/* Advance the offset into the data array */
5487	13904	100	if( j > 0 && j <= in->dims[i]-1 ) {
		100
5488			/* Normal case -- just advance the input vector */
5489	7651		i_off += in->dimincs[i];
5490			} else {
5491			/* Busted a boundary - either before or after. */
5492	6253		switch(bounds[i]){
5493			case 0: /* no breakage allowed -- treat as truncation for interpolation */
5494			case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
5495	4273	100	if( j == 0 )
5496	642		t_vio--;
5497	3631	100	else if( j == in->dims[i] )
5498	950		t_vio++;
5499	4273		break;
5500			case 2: /* extension -- do nothing (so the same input point is re-used) */
5501	0		break;
5502			case 3: /* periodic -- advance and mod into the allowed range */
5503	330	100	if((j % in->dims[i]) == 0) {
5504	100		i_off -= in->dimincs[i] * (in->dims[i]-1);
5505			} else {
5506	230		i_off += in->dimincs[i];
5507			}
5508	330		break;
5509			case 4: /* mirror -- advance or retreat depending on phase */
5510	1650		j += in->dims[i];
5511	1650		j %= (in->dims[i]*2);
5512	1650		j -= in->dims[i];
5513	1650	100	if( j!=0 && j!= -in->dims[i] ) {
		100
5514	1150	100	if(j<0)
5515	1000		i_off -= in->dimincs[i];
5516			else
5517	150		i_off += in->dimincs[i];
5518			}
5519	1650		break;
5520			}
5521			}
5522
5523			/* Now check for carry */
5524	13904	100	if(ivec[i] <= psize) {
5525			/* Normal case -- copy the current offset to the faster-running dim stashes */
5526			int k;
5527	13062	100	for(k=0;k
5528	1782		index_stash[k] = i_off;
5529			}
5530	11280		carry = 0;
5531
5532			} else { /* End of this scan -- recover the last position, and mark carry */
5533	2624		i_off = index_stash[i];
5534	2624	100	if(bounds[i]==1) {
5535	2024		j = ivec[i] + ibvec[i];
5536	2024	50	if( j < 0 \|\| j >= in->dims[i] )
		100
5537	1130		t_vio--;
5538	2024		ivec[i] = -psize;
5539	2024		j = ivec[i] + ibvec[i];
5540	2024	100	if( j < 0 \|\| j >= in->dims[i] )
		100
5541	822		t_vio++;
5542	2024		carry = 1;
5543			} else {
5544	600		ivec[i] = -psize;
5545			}
5546			}
5547			} /* End of counter-advance loop */
5548	11701	100	} while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
5549
5550			{
5551	421		PDLA_Double *ac = acc;
5552	421		PDLA_Double *wg = wgt;
5553	421		PDLA_Double *wg2 = wgt2;
5554	421		PDLA_Double *dat = out->data;
5555
5556			/* Calculate output vector offset */
5557	1263	100	for(i=0;i
5558	842		dat += out->dimincs[i] * ovec[i];
5559
5560	421	50	if(!flux) {
5561			/* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
5562	842	100	for(i=0; i < out->dims[ndims]; i++) {
5563	421	100	if(wg && (wg2 / *wg) < 1.5 ) {
		50
5564	326		dat = (ac++) / *(wg++);
5565	326		wg2++;
5566			} else {
5567	95		*dat = badval;
5568	95		ac++; wg++; wg2++;
5569			}
5570	421		dat += out->dimincs[ndims];
5571			}
5572			} else {
5573			/* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
5574	0		PDLA_Double det = tmp[ndims*ndims];
5575	421	0	for(i=0; i < out->dims[ndims]; i++) {
5576	0	0	if(wg && (wg2 / *wg) < 1.5 ) {
		0
5577	0		dat = (ac++) / (wg++) det;
5578	0		wg2++;
5579			} else {
5580	0		*dat = badval;
5581	0		ac++; wg++; wg2++;
5582			}
5583	0		dat += out->dimincs[ndims];
5584			} /* end of for loop */
5585			} /* end of flux flag set conditional */
5586			} /* end of convenience block */
5587
5588			/* End of code for normal pixels */
5589			} else {
5590			/* The pixel was ludicrously huge -- just set this pixel to nan */
5591	0		PDLA_Double *dat = out->data;
5592	0	0	for(i=0;i
5593	0		dat += out->dimincs[i] * ovec[i];
5594	0	0	for(i=0;idims[ndims];i++) {
5595	0		dat = badval; / Should handle bad values too -- not yet */
5596	0		dat += out->dimincs[ndims];
5597			}
5598			}
5599
5600			/* Increment the pixel counter */
5601			{
5602	481	100	for(i=0;
5603	474	50	(i
5604	474	100	(map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
5605	948		(++(ovec[i]) >= out->dims[i]); /* Actual carry test */
5606	60		i++) {
5607	60		ovec[i] = 0;
5608	60		map_ptr -= out->dims[i] * map->dimincs[i+1];
5609			}
5610			}
5611	421	100	} while(i
5612
5613
5614
5615			}
5616			PDLA_COMMENT("THREADLOOPEND")
5617			}
5618			}
5619	7		k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
5620	7	50	} while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); break;}
5621	0		default:barf("PP INTERNAL ERROR! PLEASE MAKE A BUG REPORT\n");}
5622			}
5623			}
5624			}
5625
5626
5627
5628
5629	12		void pdl_map_free(pdl_trans *__tr ) {
5630			int __dim;
5631	12		pdl_map_struct __privtrans = (pdl_map_struct ) __tr;
5632
5633			{
5634
5635	12		PDLA_TR_CLRMAGIC(__privtrans);
5636	12		SvREFCNT_dec(__privtrans->in);;SvREFCNT_dec(__privtrans->out);;SvREFCNT_dec(__privtrans->map);;SvREFCNT_dec(__privtrans->boundary);;SvREFCNT_dec(__privtrans->method);;SvREFCNT_dec(__privtrans->big);;SvREFCNT_dec(__privtrans->blur);;SvREFCNT_dec(__privtrans->sv_min);;SvREFCNT_dec(__privtrans->flux);;SvREFCNT_dec(__privtrans->bv);;
5637	12	50	if(__privtrans->__ddone) {
5638	12		PDLA->freethreadloop(&(__privtrans->__pdlthread));
5639			;
5640			}
5641
5642			}
5643	12		}
5644
5645
5646
5647
5648			static char pdl_map_vtable_flags[] =
5649			{ PDLA_TPDLA_VAFFINE_OK};
5650			pdl_transvtable pdl_map_vtable = {
5651			0,0, 1, 1, pdl_map_vtable_flags,
5652			pdl_map_redodims, pdl_map_readdata, NULL,
5653			pdl_map_free,NULL,NULL,pdl_map_copy,
5654			sizeof(pdl_map_struct),"pdl_map_vtable"
5655			};
5656
5657
5658
5659			MODULE = PDLA::Transform PACKAGE = PDLA::Transform
5660
5661			PROTOTYPES: ENABLE
5662
5663			int
5664			set_debugging(i)
5665			int i;
5666			CODE:
5667	0		RETVAL = __pdl_debugging;
5668	0		__pdl_debugging = i;
5669			OUTPUT:
5670			RETVAL
5671
5672			int
5673			set_boundscheck(i)
5674			int i;
5675			CODE:
5676			if (! 1)
5677			warn("Bounds checking is disabled for PDLA::Transform");
5678	0		RETVAL = __pdl_boundscheck;
5679	0		__pdl_boundscheck = i;
5680			OUTPUT:
5681			RETVAL
5682
5683
5684			MODULE = PDLA::Transform PACKAGE = PDLA
5685
5686
5687			void
5688			_map_int(k0,in,out,map,boundary,method,big,blur,sv_min,flux,bv)
5689			pdl *k0
5690			SV *in
5691			SV *out
5692			SV *map
5693			SV *boundary
5694			SV *method
5695			SV *big
5696			SV *blur
5697			SV *sv_min
5698			SV *flux
5699			SV *bv
5700			CODE:
5701			{ pdl_map_struct *__privtrans;
5702	12		int badflag_cache = 0;
5703	12		__privtrans = malloc(sizeof(*__privtrans));
5704	12		PDLA_THR_CLRMAGIC(&__privtrans->__pdlthread);
5705	12		PDLA_TR_SETMAGIC(__privtrans);
5706	12		__privtrans->flags = 0;
5707	12		__privtrans->__ddone = 0;
5708	12		__privtrans->vtable = &pdl_map_vtable;
5709	12		__privtrans->freeproc = PDLA->trans_mallocfreeproc;
5710	12		__privtrans->bvalflag = 0;
5711	12		badflag_cache = ((k0->state & PDLA_BADVAL) > 0);
5712	12	100	if (badflag_cache) __privtrans->bvalflag = 1;
5713	12	50	__privtrans->__datatype = 0;if(__privtrans->__datatype < k0->datatype) {
5714	12		__privtrans->__datatype = k0->datatype;
5715			}
5716	12	50	if(__privtrans->__datatype == PDLA_B) {}
5717	12	50	else if(__privtrans->__datatype == PDLA_S) {}
5718	12	50	else if(__privtrans->__datatype == PDLA_U) {}
5719	12	100	else if(__privtrans->__datatype == PDLA_L) {}
5720	7	50	else if(__privtrans->__datatype == PDLA_N) {}
5721	7	50	else if(__privtrans->__datatype == PDLA_Q) {}
5722	7	50	else if(__privtrans->__datatype == PDLA_F) {}
5723	7	50	else if(__privtrans->__datatype == PDLA_D) {}
5724	0		else __privtrans->__datatype = PDLA_D;
5725	12	50	if(__privtrans->__datatype != k0->datatype) {
5726	0		k0 = PDLA->get_convertedpdl(k0,__privtrans->__datatype);
5727	12		}{(__privtrans->in) = newSVsv(in);(__privtrans->out) = newSVsv(out);(__privtrans->map) = newSVsv(map);(__privtrans->boundary) = newSVsv(boundary);(__privtrans->method) = newSVsv(method);(__privtrans->big) = newSVsv(big);(__privtrans->blur) = newSVsv(blur);(__privtrans->sv_min) = newSVsv(sv_min);(__privtrans->flux) = newSVsv(flux);(__privtrans->bv) = newSVsv(bv);}PDLA_COMMENT("No flow")__privtrans->__pdlthread.inds = 0;__privtrans->pdls[0] = k0;
5728	12		PDLA->make_trans_mutual((pdl_trans *)__privtrans);
5729			if (badflag_cache) {
5730			}
5731	12		XSRETURN(0);
5732			}
5733
5734
5735			BOOT:
5736
5737			PDLA_COMMENT("Get pointer to structure of core shared C routines")
5738			PDLA_COMMENT("make sure PDLA::Core is loaded")
5739
5740	1		perl_require_pv ("PDLA/Core.pm"); /* make sure PDLA::Core is loaded */
5741			#ifndef aTHX_
5742			#define aTHX_
5743			#endif
5744	1	50	if (SvTRUE (ERRSV)) Perl_croak(aTHX_ "%s",SvPV_nolen (ERRSV));
		50
		50
		50
		0
		50
		50
		0
		0
		0
		0
		50
		50
		50
		50
		50
		50
		50
		50
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		50
		0
		0
		0
		0
5745	1		CoreSV = perl_get_sv("PDLA::SHARE",FALSE); /* SV* value */
5746	1	50	if (CoreSV==NULL)
5747	0		Perl_croak(aTHX_ "We require the PDLA::Core module, which was not found");
5748	1	50	PDLA = INT2PTR(Core,SvIV( CoreSV )); / Core* value */
5749	1	50	if (PDLA->Version != PDLA_CORE_VERSION)
5750	0		Perl_croak(aTHX_ "[PDLA->Version: %d PDLA_CORE_VERSION: %d XS_VERSION: %s] PDLA::Transform needs to be recompiled against the newly installed PDLA", PDLA->Version, PDLA_CORE_VERSION, XS_VERSION);
5751
5752
5753