File Coverage

Transform.xs
Criterion Covered Total %
statement 627 2679 23.4
branch 368 1766 20.8
condition n/a
subroutine n/a
pod n/a
total 995 4445 22.3


line stmt bran cond sub pod time code
1              
2             /*
3             * THIS FILE WAS GENERATED BY PDLA::PP! Do not modify!
4             */
5              
6             #define PDLA_COMMENT(comment)
7             PDLA_COMMENT("This preprocessor symbol is used to add commentary in the PDLA ")
8             PDLA_COMMENT("autogenerated code. Normally, one would use typical C-style ")
9             PDLA_COMMENT("multiline comments (i.e. /* comment */). However, because such ")
10             PDLA_COMMENT("comments do not nest, it's not possible for PDLA::PP users to ")
11             PDLA_COMMENT("comment-out sections of code using multiline comments, as is ")
12             PDLA_COMMENT("often the practice when debugging, for example. So, when you ")
13             PDLA_COMMENT("see something like this: ")
14             PDLA_COMMENT(" ")
15             PDLA_COMMENT("Memory access")
16             PDLA_COMMENT(" ")
17             PDLA_COMMENT("just think of it as a C multiline comment like: ")
18             PDLA_COMMENT(" ")
19             PDLA_COMMENT(" /* Memory access */ ")
20              
21             #include "EXTERN.h"
22             #include "perl.h"
23             #include "XSUB.h"
24             #include "pdl.h"
25             #include "pdlcore.h"
26             static Core* PDLA; PDLA_COMMENT("Structure hold core C functions")
27             static int __pdl_debugging = 0;
28             static int __pdl_boundscheck = 0;
29             static SV* CoreSV; PDLA_COMMENT("Gets pointer to perl var holding core structure")
30              
31             #if ! 1
32             # define PP_INDTERM(max, at) at
33             #else
34             # define PP_INDTERM(max, at) (__pdl_boundscheck? PDLA->safe_indterm(max,at, __FILE__, __LINE__) : at)
35             #endif
36              
37             #include
38              
39              
40             /*
41             * Singular-value decomposition code is borrowed from
42             * MatrixOps -- cut-and-pasted here because of linker trouble.
43             * It's used by the auxiliary matrix manipulation code, below.
44             *
45             */
46              
47 737701           void pdl_xform_svd(PDLA_Double *W, PDLA_Double *Z, int nRow, int nCol)
48             {
49             int i, j, k, EstColRank, RotCount, SweepCount, slimit;
50             PDLA_Double eps, e2, tol, vt, p, h2, x0, y0, q, r, c0, s0, c2, d1, d2;
51 737701           eps = 1e-6;
52 737701           slimit = nCol/4;
53 737701 50         if (slimit < 6.0)
54 737701           slimit = 6;
55 737701           SweepCount = 0;
56 737701           e2 = 10.0*nRow*eps*eps;
57 737701           tol = eps*.1;
58 737701           EstColRank = nCol;
59 2213103 100         for (i=0; i
60 4426206 100         for (j=0; j
61 2950804           W[nCol*(nRow+i)+j] = 0.0;
62             }
63 1475402           W[nCol*(nRow+i)+i] = 1.0; /* rjrw 7/7/99: moved this line out of j loop */
64             }
65 737701           RotCount = EstColRank*(EstColRank-1)/2;
66 4096192 100         while (RotCount != 0 && SweepCount <= slimit)
    100          
67             {
68 3358491           RotCount = EstColRank*(EstColRank-1)/2;
69 3358491           SweepCount++;
70 6716982 100         for (j=0; j
71             {
72 6716982 100         for (k=j+1; k
73             {
74 3358491           p = q = r = 0.0;
75 10075473 100         for (i=0; i
76             {
77 6716982           x0 = W[nCol*i+j]; y0 = W[nCol*i+k];
78 6716982           p += x0*y0; q += x0*x0; r += y0*y0;
79             }
80 3358491           Z[j] = q; Z[k] = r;
81 3358491 100         if (q >= r)
82             {
83 526731 50         if (q<=e2*Z[0] || fabs(p)<=tol*q) RotCount--;
    100          
84             else
85             {
86 165910           p /= q; r = 1 - r/q; vt = sqrt(4*p*p+r*r);
87 165910           c0 = sqrt(fabs(.5*(1+r/vt))); s0 = p/(vt*c0);
88 1190371 100         for (i=0; i
89             {
90 663640           d1 = W[nCol*i+j]; d2 = W[nCol*i+k];
91 663640           W[nCol*i+j] = d1*c0+d2*s0; W[nCol*i+k] = -d1*s0+d2*c0;
92             }
93             }
94             }
95             else
96             {
97 2831760           p /= r; q = q/r-1; vt = sqrt(4*p*p+q*q);
98 2831760           s0 = sqrt(fabs(.5*(1-q/vt)));
99 2831760 100         if (p<0) s0 = -s0;
100 2831760           c0 = p/(vt*s0);
101 14158800 100         for (i=0; i
102             {
103 11327040           d1 = W[nCol*i+j]; d2 = W[nCol*i+k];
104 11327040           W[nCol*i+j] = d1*c0+d2*s0; W[nCol*i+k] = -d1*s0+d2*c0;
105             }
106             }
107             }
108             }
109 3358491 50         while (EstColRank>=3 && Z[(EstColRank-1)]<=Z[0]*tol+tol*tol)
    0          
110 0           EstColRank--;
111             }
112 737701           }
113              
114             /*
115             * PDLA_xform_aux:
116             * This handles the matrix manipulation part of the Jacobian filtered
117             * mapping code. It's separate from the main code because it's
118             * independent of the data type of the original arrays.
119             *
120             *Given a pre-allocated workspace and
121             * an integer set of coordinates, generate the discrete Jacobian
122             * from the map, pad the singular values, and return the inverse
123             * Jacobian, the largest singular value of the Jacobian itself, and
124             * the determinant of the original Jacobian. Boundary values use the
125             * asymmetric discrete Jacobian; others use the symmetric discrete Jacobian.
126             *
127             * The map and workspace must be of type PDLA_D. If the dimensionality is
128             * d, then the workspace must have at least 3*n^2+n elements. The
129             * inverse of the padded Jacobian is returned in the first n^2 elements.
130             * The determinant of the original Jacobian gets stuffed into the n^2
131             * element of the workspace. The largest padded singular value is returned.
132             *
133             */
134              
135 737701           PDLA_Double PDLA_xform_aux ( pdl *map, PDLA_Indx *coords, PDLA_Double *tmp, PDLA_Double sv_min) {
136             short ndims;
137             PDLA_Long i, j, k;
138             PDLA_Long offset;
139             PDLA_Double det;
140             PDLA_Double *jptr;
141             PDLA_Double *svptr;
142             PDLA_Double *aptr,*bptr;
143 737701           PDLA_Double max_sv = 0.0;
144              
145 737701           ndims = map->ndims-1;
146              
147             /****** Accumulate the Jacobian */
148             /* Accumulate the offset into the map array */
149 2213103 100         for( i=offset=0; i
150 1475402           offset += coords[i] * map->dimincs[i+1];
151              
152 737701           jptr = tmp + ndims*ndims;
153              
154 2213103 100         for( i=0; i
155 1475402           char bot = (coords[i] <=0);
156 1475402           char top = (coords[i] >= map->dims[i+1]-1);
157 1475402 100         char symmetric = !(bot || top);
    100          
158             PDLA_Double *ohi,*olo;
159 1475402           PDLA_Long diff = map->dimincs[i+1];
160 1475402 100         ohi = ((PDLA_Double *)map->data) + ( offset + ( top ? 0 : diff ));
161 1475402 100         olo = ((PDLA_Double *)map->data) + ( offset - ( bot ? 0 : diff ));
162              
163 4426206 100         for( j=0; j
164 2950804           PDLA_Double jel = *ohi - *olo;
165              
166 2950804           ohi += map->dimincs[0];
167 2950804           olo += map->dimincs[0];
168              
169 2950804 100         if(symmetric)
170 2935020           jel /= 2;
171 2950804           *(jptr++) = jel;
172             }
173             }
174              
175              
176             /****** Singular-value decompose the Jacobian
177             * The svd routine produces the squares of the singular values,
178             * and requires normalization for one of the rotation matrices.
179             */
180              
181 737701           jptr = tmp + ndims*ndims;
182 737701           svptr = tmp + 3*ndims*ndims;
183 737701           pdl_xform_svd(jptr,svptr,ndims,ndims);
184              
185 737701           aptr = svptr;
186 2213103 100         for (i=0;i
187 1475402           *aptr = sqrt(*aptr);
188              
189              
190             /* fix-up matrices here */
191 737701           bptr = jptr;
192 2213103 100         for(i=0; i
193 1475402           aptr = svptr;
194 4426206 100         for(j=0;j
195 2950804           *(bptr++) /= *(aptr++);
196             }
197              
198             /****** Store the determinant, and pad the singular values as necessary.*/
199 737701           aptr = svptr;
200 737701           det = 1.0;
201 2213103 100         for(i=0;i
202 1475402           det *= *aptr;
203 1475402 100         if(*aptr < sv_min)
204 720800           *aptr = sv_min;
205 1475402 100         if(*aptr > max_sv )
206 360821           max_sv = *aptr;
207 1475402           aptr++;
208             }
209              
210             /****** Generate the inverse matrix */
211             /* Multiply B-transpose times 1/S times A-transpose. */
212             /* since S is diagonal we just divide by the appropriate element. */
213             /* */
214 737701           aptr = tmp + ndims*ndims;
215 737701           bptr = aptr + ndims*ndims;
216 737701           jptr= tmp;
217              
218 2213103 100         for(i=0;i
219 4426206 100         for(j=0;j
220 2950804           *jptr = 0;
221              
222 8852412 100         for(k=0;k
223 5901608           *jptr += aptr[j*ndims + k] * bptr[k*ndims + i] / *svptr;
224              
225 2950804           jptr++;
226             }
227 1475402           svptr++;
228             }
229              
230 737701           *jptr = det;
231 737701           return max_sv;
232             }
233              
234             typedef struct pdl_map_struct {
235             PDLA_TRANS_START(1);
236             pdl_thread __pdlthread;
237             SV *in;SV *out;SV *map;SV *boundary;SV *method;SV *big;SV *blur;SV *sv_min;SV *flux;SV *bv;
238             char __ddone; PDLA_COMMENT("Dims done")
239             } pdl_map_struct;
240              
241 12           void pdl_map_redodims(pdl_trans *__tr ) {
242             int __dim;
243 12           pdl_map_struct *__privtrans = (pdl_map_struct *) __tr;
244            
245             {
246             PDLA_Indx __creating[1];
247 12           __creating[0] = 0;
248             {
249             {PDLA_COMMENT("Start generic loop")
250              
251 12           switch(__privtrans->__datatype) { case -42: PDLA_COMMENT("Warning eater") {(void)1;
252 0           } break; case PDLA_B: {
253 0 0         PDLA_Byte * k0_datap = ((PDLA_Byte *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
254 0           PDLA_Byte * k0_physdatap = ((PDLA_Byte *)((__privtrans->pdls[0])->data));
255              
256             {
257             PDLA_COMMENT("none")
258 0           } } break; case PDLA_S: {
259 0 0         PDLA_Short * k0_datap = ((PDLA_Short *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
260 0           PDLA_Short * k0_physdatap = ((PDLA_Short *)((__privtrans->pdls[0])->data));
261              
262             {
263             PDLA_COMMENT("none")
264 0           } } break; case PDLA_US: {
265 0 0         PDLA_Ushort * k0_datap = ((PDLA_Ushort *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
266 0           PDLA_Ushort * k0_physdatap = ((PDLA_Ushort *)((__privtrans->pdls[0])->data));
267              
268             {
269             PDLA_COMMENT("none")
270 0           } } break; case PDLA_L: {
271 5 50         PDLA_Long * k0_datap = ((PDLA_Long *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
272 5           PDLA_Long * k0_physdatap = ((PDLA_Long *)((__privtrans->pdls[0])->data));
273              
274             {
275             PDLA_COMMENT("none")
276 5           } } break; case PDLA_IND: {
277 0 0         PDLA_Indx * k0_datap = ((PDLA_Indx *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
278 0           PDLA_Indx * k0_physdatap = ((PDLA_Indx *)((__privtrans->pdls[0])->data));
279              
280             {
281             PDLA_COMMENT("none")
282 0           } } break; case PDLA_LL: {
283 0 0         PDLA_LongLong * k0_datap = ((PDLA_LongLong *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
284 0           PDLA_LongLong * k0_physdatap = ((PDLA_LongLong *)((__privtrans->pdls[0])->data));
285              
286             {
287             PDLA_COMMENT("none")
288 0           } } break; case PDLA_F: {
289 0 0         PDLA_Float * k0_datap = ((PDLA_Float *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
290 0           PDLA_Float * k0_physdatap = ((PDLA_Float *)((__privtrans->pdls[0])->data));
291              
292             {
293             PDLA_COMMENT("none")
294 0           } } break; case PDLA_D: {
295 7 50         PDLA_Double * k0_datap = ((PDLA_Double *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
296 7           PDLA_Double * k0_physdatap = ((PDLA_Double *)((__privtrans->pdls[0])->data));
297              
298             {
299             PDLA_COMMENT("none")
300 7           } break;}
301 0           default:barf("PP INTERNAL ERROR! PLEASE MAKE A BUG REPORT\n");}
302             }
303             }
304             {
305             static char *__parnames[] = {"k0"};
306             static PDLA_Indx __realdims[] = {0};
307             static char __funcname[] = "PDLA::Transform::map";
308             static pdl_errorinfo __einfo = {
309             __funcname, __parnames, 1
310             };
311            
312 12           PDLA->initthreadstruct(2,__privtrans->pdls,
313             __realdims,__creating,1,
314             &__einfo,&(__privtrans->__pdlthread),
315 12           __privtrans->vtable->per_pdl_flags,
316             0 );
317             }
318              
319             { PDLA_COMMENT("convenience block")
320 12           void *hdrp = NULL;
321 12           char propagate_hdrcpy = 0;
322 12           SV *hdr_copy = NULL;
323 12 50         if(!hdrp &&
    50          
324 0 0         __privtrans->pdls[0]->hdrsv &&
325 0           (__privtrans->pdls[0]->state & PDLA_HDRCPY)
326             ) {
327 0           hdrp = __privtrans->pdls[0]->hdrsv;
328 0           propagate_hdrcpy = ((__privtrans->pdls[0]->state & PDLA_HDRCPY) != 0);
329             }
330 12 50         if (hdrp) {
331 0 0         if(hdrp == &PL_sv_undef)
332 0           hdr_copy = &PL_sv_undef;
333             else { PDLA_COMMENT("Call the perl routine _hdr_copy...")
334             int count;
335             PDLA_COMMENT("Call the perl routine PDLA::_hdr_copy(hdrp)")
336 0           dSP;
337 0           ENTER ;
338 0           SAVETMPS ;
339 0 0         PUSHMARK(SP) ;
340 0 0         XPUSHs( hdrp );
341 0           PUTBACK ;
342 0           count = call_pv("PDLA::_hdr_copy",G_SCALAR);
343 0           SPAGAIN ;
344 0 0         if(count != 1)
345 0           croak("PDLA::_hdr_copy didn't return a single value - please report this bug (A).");
346              
347 0           hdr_copy = (SV *)POPs;
348              
349 0 0         if(hdr_copy && hdr_copy != &PL_sv_undef) {
    0          
350 0           (void)SvREFCNT_inc(hdr_copy); PDLA_COMMENT("Keep hdr_copy from vanishing during FREETMPS")
351             }
352              
353 0 0         FREETMPS ;
354 0           LEAVE ;
355              
356              
357             } PDLA_COMMENT("end of callback block")
358              
359              
360 0 0         if(hdr_copy != &PL_sv_undef)
361 0           SvREFCNT_dec(hdr_copy); PDLA_COMMENT("make hdr_copy mortal again")
362             } PDLA_COMMENT("end of if(hdrp) block")
363             } PDLA_COMMENT("end of conv. block")
364 12           __privtrans->__ddone = 1;
365             }
366 12           }
367            
368              
369 0           pdl_trans * pdl_map_copy(pdl_trans *__tr ) {
370             int __dim;
371 0           pdl_map_struct *__privtrans = (pdl_map_struct *) __tr;
372            
373             {
374 0           pdl_map_struct *__copy = malloc(sizeof(pdl_map_struct));
375 0           PDLA_THR_CLRMAGIC(&__copy->__pdlthread); PDLA_TR_CLRMAGIC(__copy);
376 0           __copy->has_badvalue = __privtrans->has_badvalue;
377 0           __copy->badvalue = __privtrans->badvalue;
378 0           __copy->flags = __privtrans->flags;
379 0           __copy->vtable = __privtrans->vtable;
380 0           __copy->__datatype = __privtrans->__datatype;
381 0           __copy->freeproc = NULL;
382 0           __copy->__ddone = __privtrans->__ddone;
383             {int i;
384 0 0         for(i=0; i<__copy->vtable->npdls; i++)
385 0           __copy->pdls[i] = __privtrans->pdls[i];
386             }
387 0           (__copy->in) = newSVsv(__privtrans->in);;(__copy->out) = newSVsv(__privtrans->out);;(__copy->map) = newSVsv(__privtrans->map);;(__copy->boundary) = newSVsv(__privtrans->boundary);;(__copy->method) = newSVsv(__privtrans->method);;(__copy->big) = newSVsv(__privtrans->big);;(__copy->blur) = newSVsv(__privtrans->blur);;(__copy->sv_min) = newSVsv(__privtrans->sv_min);;(__copy->flux) = newSVsv(__privtrans->flux);;(__copy->bv) = newSVsv(__privtrans->bv);;
388 0 0         if(__copy->__ddone) {
389 0           PDLA->thread_copy(&(__privtrans->__pdlthread),&(__copy->__pdlthread));
390             }
391 0           return (pdl_trans*)__copy;
392             }
393             }
394            
395              
396 12           void pdl_map_readdata(pdl_trans *__tr ) {
397             int __dim;
398 12           pdl_map_struct *__privtrans = (pdl_map_struct *) __tr;
399            
400             {
401             {PDLA_COMMENT("Start generic loop")
402              
403 12           switch(__privtrans->__datatype) { case -42: PDLA_COMMENT("Warning eater") {(void)1;
404 0           } break; case PDLA_B: {
405 0 0         PDLA_Byte * k0_datap = ((PDLA_Byte *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
406 0           PDLA_Byte * k0_physdatap = ((PDLA_Byte *)((__privtrans->pdls[0])->data));
407              
408              
409             PDLA_COMMENT("THREADLOOPBEGIN")
410 0 0         if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
411 0           do { register PDLA_Indx __tind1=0,__tind2=0;
412 0           register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
413 0           register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
414 0           register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
415 0           register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
416 0           register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
417 0           register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
418 0           k0_datap += __offsp[0];
419 0 0         for( __tind2 = 0 ;
420             __tind2 < __tdims1 ;
421 0           __tind2++
422 0           ,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
423             )
424             {
425 0 0         for( __tind1 = 0 ;
426             __tind1 < __tdims0 ;
427 0           __tind1++
428 0           ,k0_datap += __tinc0_0
429             )
430             { PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
431              
432             /*
433             * Pixel interpolation & averaging code
434             *
435             * Calls a common coordinate-transformation block (see following hdr)
436             * that isn't dependent on the type of the input variable.
437             *
438             * The inputs are SVs to avoid hassling with threadloops; threading
439             * is handled internally. To simplify the threading business, any
440             * thread dimensions should all be collapsed to a single one by the
441             * perl front-end.
442             *
443             */
444              
445             short ndims; /* Number of dimensions we're working in */
446             PDLA_Double *tmp; /* Workspace for prefrobnication */
447             PDLA_Indx *ovec; /* output pixel loop vector */
448             PDLA_Indx *ivec; /* input pixel loop vector */
449             PDLA_Indx *ibvec; /* input pixel base offset vector */
450             PDLA_Double *dvec; /* Residual vector for linearization */
451             PDLA_Double *tvec; /* Temporary floating-point vector */
452             PDLA_Double *acc; /* Threaded accumulator */
453             PDLA_Double *wgt; /* Threaded weight accumulator */
454             PDLA_Double *wgt2; /* Threaded weight accumulator for badval finding */
455             char *bounds; /* Boundary condition packed string */
456             PDLA_Indx *index_stash; /* Stash to store the opening index of dim sample scans */
457             char method; /* Method identifier (gets one of 'h','g') */
458             PDLA_Long big; /* Max size of input footprint for each pix */
459             PDLA_Double blur; /* Scaling of filter */
460             PDLA_Double sv_min; /* minimum singular value */
461             char flux; /* Flag to indicate flux conservation */
462             PDLA_Double *map_ptr;
463             PDLA_Long i, j;
464 0 0         PDLA_Byte badval = SvNV(__privtrans->bv);
465             #define HANNING_LOOKUP_SIZE 2500
466             static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
467             static int needs_hanning_calc = 1;
468             PDLA_Double zeta;
469             PDLA_Double hanning_offset;
470              
471             #define GAUSSIAN_LOOKUP_SIZE 4000
472             #define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
473             static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
474             static int needs_gaussian_calc = 1;
475              
476 0           pdl *in = PDLA->SvPDLAV(__privtrans->in);
477 0           pdl *out = PDLA->SvPDLAV(__privtrans->out);
478 0           pdl *map = PDLA->SvPDLAV(__privtrans->map);
479              
480 0           PDLA->make_physical(in);
481 0           PDLA->make_physical(out);
482 0           PDLA->make_physical(map);
483              
484 0           ndims = map->ndims -1;
485              
486             /*
487             * Allocate all our dynamic workspaces at once
488             * */
489 0           ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
490 0           ( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3*ndims) + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndims*ndims + ndims + sizeof(char) * ndims + sizeof(PDLA_Indx) * ndims )
491             )
492             );
493 0           ivec = &(ovec[ndims]);
494 0           ibvec = &(ivec[ndims]);
495 0           dvec = (PDLA_Double *)(&(ibvec[ndims]));
496 0           tvec = &(dvec[ndims]);
497 0           acc = &(tvec[ndims]);
498 0           wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
499 0           bounds = (char *)(&(tmp [3*ndims*ndims+ndims]));
500 0           index_stash = (PDLA_Indx *) &(bounds[ndims]);
501              
502              
503             /***
504             * Fill in the boundary condition array
505             */
506             {
507             char *bstr;
508             STRLEN blen;
509 0 0         bstr = SvPV(__privtrans->boundary,blen);
510              
511 0 0         if(blen == 0) {
512             /* If no boundary is specified then every dim gets truncated */
513             int i;
514 0 0         for (i=0;i
515 0           bounds[i] = 1;
516             } else {
517             int i;
518 0 0         for(i=0;i
519 0 0         switch(bstr[i < blen ? i : blen-1 ]) {
520             case '0': case 'f': case 'F': /* forbid */
521 0           bounds[i] = 0;
522 0           break;
523             case '1': case 't': case 'T': /* truncate */
524 0           bounds[i] = 1;
525 0           break;
526             case '2': case 'e': case 'E': /* extend */
527 0           bounds[i] = 2;
528 0           break;
529             case '3': case 'p': case 'P': /* periodic */
530 0           bounds[i] = 3;
531 0           break;
532             case '4': case 'm': case 'M': /* mirror */
533 0           bounds[i] = 4;
534 0           break;
535             default:
536             {
537             char buf[BUFSIZ];
538 0           sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
539 0           barf("%s", buf);
540             }
541 0           break;
542             }
543             }
544             }
545             }
546              
547             /***
548             * Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
549             */
550 0 0         big = labs((PDLA_Long) (SvNV(__privtrans->big)));
551 0 0         if(big <= 0)
552 0           barf("%s","map: 'big' parameter must be >0");
553              
554 0 0         blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
555 0 0         if(blur < 0)
556 0           barf("%s","map: 'blur' parameter must be >= 0");
557              
558 0 0         sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
559 0 0         if(sv_min < 0)
560 0           barf("%s","map: 'sv_min' parameter must be >= 0");
561              
562 0 0         flux = (SvNV(__privtrans->flux) != 0);
563              
564             {
565             char *mstr;
566             STRLEN mlen;
567 0 0         mstr = SvPV(__privtrans->method,mlen);
568              
569 0 0         if(mlen==0)
570 0           method = 'h';
571 0           else switch(*mstr) {
572 0           case 'H': method='H'; break;
573 0           case 'h': method = 'h';
574 0 0         if( needs_hanning_calc ) {
575             int i;
576 0 0         for(i=0;i
577 0           hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
578             }
579 0           hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
580 0           hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
581 0           needs_hanning_calc = 0;
582             }
583 0           zeta = HANNING_LOOKUP_SIZE / blur;
584 0           hanning_offset = (blur >= 1) ?
585 0 0         0 :
586 0           0.5 * (1.0 - blur);
587 0           break;
588              
589 0           case 'g': case 'j': method = 'g';
590 0           zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
591              
592 0 0         if( needs_gaussian_calc ) {
593             int i;
594 0 0         for(i=0;i
595 0           gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
596             }
597 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
598 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
599 0           needs_gaussian_calc = 0;
600             }
601 0           break;
602              
603 0           case 'G': case 'J': method = 'G'; break;
604             default:
605             {
606             char err[80];
607 0           sprintf(err,"Bug in map: unknown method '%c'",*mstr);
608 0           barf("%s", err);
609             }
610 0           break;
611             }
612             }
613              
614              
615              
616             /* End of initialization */
617             /*************************************************************/
618             /* Start of Real Work */
619              
620             /* Initialize coordinate vector and map offset
621             */
622 0 0         for(i=0;i
623 0           ovec[i] = 0;
624              
625 0           map_ptr = (PDLA_Double *)(map->data);
626              
627              
628             /* Main pixel loop (iterates over pixels in the output plane) */
629             do {
630             PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
631             /* Prefrobnicate the transformation matrix */
632 0           psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
633              
634             #ifdef DEBUG_MAP
635             {
636             int k; PDLA_Indx foo = 0;
637             printf("ovec: [");
638             for(k=0;k
639             foo += ovec[k] * map->dimincs[k+1];
640             printf(" %2d ",(int)(ovec[k]));
641             }
642             printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
643             for(k=0;k
644             printf("%8.2f",(double)(((PDLA_Byte *)(map->data))[foo + k*map->dimincs[0]]));
645             }
646             printf("]\n");
647             }
648             #endif
649              
650             /* Don't bother accumulating output if psize is too large */
651 0 0         if(psize <= big) {
652             /* Use the prefrobnicated matrix to generate a local linearization.
653             * dvec gets the delta; ibvec gets the base.
654             */
655             {
656 0           PDLA_Double *mp = map_ptr;
657 0 0         for (i=0;i
658 0           dvec[i] = *mp - ( ibvec[i] = (PDLA_Long)(*mp + 0.5)); /* assignment */
659 0           mp += map->dimincs[0];
660             }
661             }
662              
663             /* Initialize input delta vector */
664 0 0         for(i=0;i
665 0           ivec[i] = -psize;
666              
667             /* Initialize accumulators */
668             {
669 0           PDLA_Double *ac = acc;
670 0 0         for(i=0; i < in->dims[ndims]; i++)
671 0           *(ac++) = 0.0;
672              
673             }
674             {
675 0           PDLA_Double *wg = wgt;
676 0 0         for(i=0;i < in->dims[ndims]; i++)
677 0           *(wg++) = 0.0;
678             }
679             {
680 0           PDLA_Double *wg = wgt2;
681 0 0         for(i=0;i < in->dims[ndims]; i++)
682 0           *(wg++) = 0.0;
683             }
684              
685              
686             /*
687             * Calculate the original offset into the data array, to enable
688             * delta calculations in the pixel loop
689             *
690             * i runs over dims; j holds the working integer index in the
691             * current dim.
692             *
693             * This code matches the incrementation code at the bottom of the accumulation loop
694             */
695            
696 0           t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
697 0           i_off = 0;
698 0 0         for(i=0;i
699 0           j = ivec[i] + ibvec[i];
700 0 0         if(j<0 || j >= in->dims[i]) {
    0          
701 0           switch(bounds[i]) {
702             case 0: /* no breakage allowed */
703 0           barf("%s","index out-of-bounds in map");
704 0           break;
705             case 1: /* truncation */
706 0           t_vio++;
707             /* fall through */
708             case 2: /* extension -- crop */
709 0 0         if(j<0)
710 0           j=0;
711 0           else j = in->dims[i] - 1;
712 0           break;
713             case 3: /* periodic -- mod it */
714 0           j %= in->dims[i];
715 0 0         if(j<0)
716 0           j += in->dims[i];
717 0           break;
718             case 4: /* mirror -- reflect off the edges */
719 0           j += in->dims[i];
720 0           j %= (in->dims[i]*2);
721 0 0         if(j<0)
722 0           j += in->dims[i]*2;
723 0           j -= in->dims[i];
724 0 0         if(j<0) {
725 0           j *= -1;
726 0           j -= 1;
727             }
728 0           break;
729             default:
730 0           barf("%s", "Unknown boundary condition in map -- bug alert!");
731 0           break;
732             }
733             }
734 0           i_off += in->dimincs[i] * j;
735             }
736              
737             /* Initialize index stashes for later reference as we scan the footprint */
738             /* It's a pain in the ass to deal with boundaries, and doubly so at the */
739             /* end of a dimensional scan. So we stash the index location at the */
740             /* start of each dimensional scan here. When we finish incrementing */
741             /* through a particular dim, we pull its value back out of the stash. */
742 0 0         for(i=0;i
743 0           index_stash[i] = i_off;
744             }
745              
746             /* The input accumulation loop is the hotspot for the entire operation. */
747             /* We loop over pixels in the region of interest (+/- psize in each dimension) */
748             /* in the input array, use the linearized transform to bring each pixel center */
749             /* forward to the output plane, and calculate a weighting based on the chosen */
750             /* filter function. 'h' is a fast Hanning window rolloff using a lookup */
751             /* table that is initialized the first time through the code. 'H' is the */
752             /* same process, but explicitly calculated for each interation (~2x slower). */
753             /* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
754             /* into the input array fresh from the current input array vector each time, */
755             /* we walk through the array using dimincs and the old offset. This saves */
756             /* about half of the time spent on index calculation. */
757              
758             do { /* Input accumulation loop */
759             PDLA_Double *cp;
760             PDLA_Double alpha;
761             /* Calculate the weight of the current input point. Don't bother if we're
762             * violating any truncation boundaries (in that case our value is zero, but
763             * for the interpolation we also set the weight to zero).
764             */
765 0 0         if( !t_vio ) {
766              
767 0           PDLA_Double *ap = tvec;
768 0           PDLA_Double *bp = dvec;
769 0           PDLA_Indx *ip = ivec;
770 0 0         for(i=0; i
771 0           *(ap++) = *(ip++) - *(bp++);
772              
773 0           switch(method) {
774             PDLA_Double dd;
775             case 'h':
776             /* This is the Hanning window rolloff. It is a product of a simple */
777             /* cos^2(theta) rolloff in each dimension. Using a lookup table */
778             /* is about 2x faster than using cos(theta) directly in each */
779             /* weighting calculation, so we do. Using 2500 entries and linear */
780             /* interpolation is accurate to about 10^-7, and should preserve */
781             /* the contents of cache pretty well. */
782 0           alpha = 1;
783 0           cp = tmp;
784 0 0         for(i=0; i
785             int lodex;
786             int hidex;
787             PDLA_Double beta;
788 0           dd = 0;
789 0           ap = tvec;
790             /* Get the matrix-multiply element for this dimension */
791 0 0         for(j=0;j
792 0           dd += *(ap++) * *(cp++);
793              
794             /* Do linear interpolation from the table */
795             /* The table captures a hanning window centered 0.5 pixel from center. */
796             /* We scale the filter by the blur parameter -- but if blur is less */
797             /* than unity, we shrink the hanning blur window while keeping the 0.5 */
798             /* value on the pixel edge at 0.5. For blur greater than unity, we */
799             /* scale simply. */
800 0           beta = fabs(dd) - hanning_offset;
801 0 0         if(beta > 0) {
802 0 0         if(beta >= blur) {
803 0           alpha = 0;
804 0           i = ndims;
805             } else {
806 0           beta *= zeta;
807 0           lodex = beta;
808 0 0         beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
809 0           lodex = HANNING_LOOKUP_SIZE;
810 0           hidex = lodex+1;
811 0           alpha *= hanning_lookup[hidex]*beta + hanning_lookup[lodex]*(1-beta);
812             } /* end of interpolation branch */
813             } /* end of beta > 0 branch */
814             } /* end of dimension loop */
815 0           break;
816              
817             case 'H':
818             /* This is the Hanning window rolloff with explicit calculation, preserved */
819             /* in case someone actually wants the slower longer method. */
820 0           alpha = 1;
821 0           cp = tmp;
822 0 0         for(i=0; i
823 0           dd = 0;
824 0           ap = tvec;
825 0 0         for(j=0;j
826 0           dd += *(ap++) * *(cp++);
827 0           dd = (fabs(dd) - hanning_offset) / blur;
828 0 0         if( dd > 1 ) {
829 0           alpha = 0;
830 0           i = ndims;
831             } else
832 0           alpha *= (0.5 + 0.5 * cos( dd * 3.1415926536 ));
833             }
834 0           break;
835              
836             case 'g':
837             /* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
838             {
839 0           PDLA_Double sum = 0;
840 0           cp = tmp;
841 0 0         for(i=0; i
842 0           dd = 0;
843 0           ap = tvec;
844 0 0         for(j=0;j
845 0           dd += *(ap++) * *(cp++);
846 0           dd /= blur;
847 0           sum += dd * dd;
848 0 0         if(sum > GAUSSIAN_MAXVAL) {
849 0           i = ndims; /* exit early if we're too far out */
850 0           alpha = 0;
851             }
852             }
853 0 0         if( sum > GAUSSIAN_MAXVAL || !isfinite(sum) || isnan(sum) ) {
    0          
    0          
854 0           alpha = 0;
855             } else {
856             int lodex,hidex;
857 0           PDLA_Double beta = fabs(zeta * sum);
858              
859 0           lodex = beta;
860 0           beta -= lodex; hidex = lodex+1;
861 0           alpha = gaussian_lookup[hidex]*beta + gaussian_lookup[lodex]*(1 - beta);
862              
863             }
864             }
865 0           break;
866              
867             case 'G':
868             /* This is the Gaussian rolloff with explicit calculation, preserved */
869             /* in case someone actually wants the slower longer method. */
870             {
871 0           PDLA_Double sum = 0;
872 0           cp = tmp;
873 0 0         for(i=0; i
874 0           dd = 0;
875 0           ap = tvec;
876 0 0         for(j=0;j
877 0           dd += *(ap++) * *(cp++);
878 0           dd /= blur;
879 0           sum += dd * dd;
880 0 0         if(sum > 4) /* 2 pixels -- four half-widths */
881 0           i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
882             }
883              
884 0 0         if(sum > GAUSSIAN_MAXVAL)
885 0           alpha = 0;
886             else
887 0           alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
888             }
889 0           break;
890             default:
891             {
892             char buf[80];
893 0           sprintf(buf,"This can't happen: method='%c'",method);
894 0           barf("%s", buf);
895             }
896             }
897              
898             { /* convenience block -- accumulate the current point into the weighted sum. */
899             /* This is more than simple assignment because we have our own explicit poor */
900             /* man's threadloop here, so we accumulate each threaded element separately. */
901 0           PDLA_Byte *dat = ((PDLA_Byte *)(in->data)) + i_off;
902 0           PDLA_Indx max = out->dims[ndims];
903 0 0         for( i=0; i < max; i++ ) {
904 0 0         if( (badval==0) || (*dat != badval) ) {
    0          
905 0           acc[i] += *dat * alpha;
906 0           dat += in->dimincs[ndims];
907 0           wgt[i] += alpha;
908             }
909 0           wgt2[i] += alpha; }
910             }
911             } /* end of t_vio check (i.e. of input accumulation) */
912              
913              
914             /* Advance input accumulation loop. */
915             /* We both increment the total vector and also advance the index. */
916 0           carry = 1;
917 0 0         for(i=0; i
    0          
918             /* Advance the current element of the offset vector */
919 0           ivec[i]++;
920 0           j = ivec[i] + ibvec[i];
921              
922             /* Advance the offset into the data array */
923 0 0         if( j > 0 && j <= in->dims[i]-1 ) {
    0          
924             /* Normal case -- just advance the input vector */
925 0           i_off += in->dimincs[i];
926             } else {
927             /* Busted a boundary - either before or after. */
928 0           switch(bounds[i]){
929             case 0: /* no breakage allowed -- treat as truncation for interpolation */
930             case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
931 0 0         if( j == 0 )
932 0           t_vio--;
933 0 0         else if( j == in->dims[i] )
934 0           t_vio++;
935 0           break;
936             case 2: /* extension -- do nothing (so the same input point is re-used) */
937 0           break;
938             case 3: /* periodic -- advance and mod into the allowed range */
939 0 0         if((j % in->dims[i]) == 0) {
940 0           i_off -= in->dimincs[i] * (in->dims[i]-1);
941             } else {
942 0           i_off += in->dimincs[i];
943             }
944 0           break;
945             case 4: /* mirror -- advance or retreat depending on phase */
946 0           j += in->dims[i];
947 0           j %= (in->dims[i]*2);
948 0           j -= in->dims[i];
949 0 0         if( j!=0 && j!= -in->dims[i] ) {
    0          
950 0 0         if(j<0)
951 0           i_off -= in->dimincs[i];
952             else
953 0           i_off += in->dimincs[i];
954             }
955 0           break;
956             }
957             }
958              
959             /* Now check for carry */
960 0 0         if(ivec[i] <= psize) {
961             /* Normal case -- copy the current offset to the faster-running dim stashes */
962             int k;
963 0 0         for(k=0;k
964 0           index_stash[k] = i_off;
965             }
966 0           carry = 0;
967              
968             } else { /* End of this scan -- recover the last position, and mark carry */
969 0           i_off = index_stash[i];
970 0 0         if(bounds[i]==1) {
971 0           j = ivec[i] + ibvec[i];
972 0 0         if( j < 0 || j >= in->dims[i] )
    0          
973 0           t_vio--;
974 0           ivec[i] = -psize;
975 0           j = ivec[i] + ibvec[i];
976 0 0         if( j < 0 || j >= in->dims[i] )
    0          
977 0           t_vio++;
978 0           carry = 1;
979             } else {
980 0           ivec[i] = -psize;
981             }
982             }
983             } /* End of counter-advance loop */
984 0 0         } while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
985              
986             {
987 0           PDLA_Double *ac = acc;
988 0           PDLA_Double *wg = wgt;
989 0           PDLA_Double *wg2 = wgt2;
990 0           PDLA_Byte *dat = out->data;
991              
992             /* Calculate output vector offset */
993 0 0         for(i=0;i
994 0           dat += out->dimincs[i] * ovec[i];
995              
996 0 0         if(!flux) {
997             /* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
998 0 0         for(i=0; i < out->dims[ndims]; i++) {
999 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
1000 0           *dat = *(ac++) / *(wg++);
1001 0           wg2++;
1002             } else {
1003 0           *dat = badval;
1004 0           ac++; wg++; wg2++;
1005             }
1006 0           dat += out->dimincs[ndims];
1007             }
1008             } else {
1009             /* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
1010 0           PDLA_Double det = tmp[ndims*ndims];
1011 0 0         for(i=0; i < out->dims[ndims]; i++) {
1012 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
1013 0           *dat = *(ac++) / *(wg++) * det;
1014 0           wg2++;
1015             } else {
1016 0           *dat = badval;
1017 0           ac++; wg++; wg2++;
1018             }
1019 0           dat += out->dimincs[ndims];
1020             } /* end of for loop */
1021             } /* end of flux flag set conditional */
1022             } /* end of convenience block */
1023            
1024             /* End of code for normal pixels */
1025             } else {
1026             /* The pixel was ludicrously huge -- just set this pixel to nan */
1027 0           PDLA_Byte *dat = out->data;
1028 0 0         for(i=0;i
1029 0           dat += out->dimincs[i] * ovec[i];
1030 0 0         for(i=0;idims[ndims];i++) {
1031 0           *dat = badval; /* Should handle bad values too -- not yet */
1032 0           dat += out->dimincs[ndims];
1033             }
1034             }
1035              
1036             /* Increment the pixel counter */
1037             {
1038 0 0         for(i=0;
1039 0 0         (i
1040 0 0         (map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
1041 0           (++(ovec[i]) >= out->dims[i]); /* Actual carry test */
1042 0           i++) {
1043 0           ovec[i] = 0;
1044 0           map_ptr -= out->dims[i] * map->dimincs[i+1];
1045             }
1046             }
1047 0 0         } while(i
1048              
1049              
1050              
1051             }
1052             PDLA_COMMENT("THREADLOOPEND")
1053             }
1054             }
1055 0           k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
1056 0 0         } while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_S: {
1057 0 0         PDLA_Short * k0_datap = ((PDLA_Short *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
1058 0           PDLA_Short * k0_physdatap = ((PDLA_Short *)((__privtrans->pdls[0])->data));
1059              
1060              
1061             PDLA_COMMENT("THREADLOOPBEGIN")
1062 0 0         if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
1063 0           do { register PDLA_Indx __tind1=0,__tind2=0;
1064 0           register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
1065 0           register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
1066 0           register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
1067 0           register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
1068 0           register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
1069 0           register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
1070 0           k0_datap += __offsp[0];
1071 0 0         for( __tind2 = 0 ;
1072             __tind2 < __tdims1 ;
1073 0           __tind2++
1074 0           ,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
1075             )
1076             {
1077 0 0         for( __tind1 = 0 ;
1078             __tind1 < __tdims0 ;
1079 0           __tind1++
1080 0           ,k0_datap += __tinc0_0
1081             )
1082             { PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
1083              
1084             /*
1085             * Pixel interpolation & averaging code
1086             *
1087             * Calls a common coordinate-transformation block (see following hdr)
1088             * that isn't dependent on the type of the input variable.
1089             *
1090             * The inputs are SVs to avoid hassling with threadloops; threading
1091             * is handled internally. To simplify the threading business, any
1092             * thread dimensions should all be collapsed to a single one by the
1093             * perl front-end.
1094             *
1095             */
1096              
1097             short ndims; /* Number of dimensions we're working in */
1098             PDLA_Double *tmp; /* Workspace for prefrobnication */
1099             PDLA_Indx *ovec; /* output pixel loop vector */
1100             PDLA_Indx *ivec; /* input pixel loop vector */
1101             PDLA_Indx *ibvec; /* input pixel base offset vector */
1102             PDLA_Double *dvec; /* Residual vector for linearization */
1103             PDLA_Double *tvec; /* Temporary floating-point vector */
1104             PDLA_Double *acc; /* Threaded accumulator */
1105             PDLA_Double *wgt; /* Threaded weight accumulator */
1106             PDLA_Double *wgt2; /* Threaded weight accumulator for badval finding */
1107             char *bounds; /* Boundary condition packed string */
1108             PDLA_Indx *index_stash; /* Stash to store the opening index of dim sample scans */
1109             char method; /* Method identifier (gets one of 'h','g') */
1110             PDLA_Long big; /* Max size of input footprint for each pix */
1111             PDLA_Double blur; /* Scaling of filter */
1112             PDLA_Double sv_min; /* minimum singular value */
1113             char flux; /* Flag to indicate flux conservation */
1114             PDLA_Double *map_ptr;
1115             PDLA_Long i, j;
1116 0 0         PDLA_Short badval = SvNV(__privtrans->bv);
1117             #define HANNING_LOOKUP_SIZE 2500
1118             static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
1119             static int needs_hanning_calc = 1;
1120             PDLA_Double zeta;
1121             PDLA_Double hanning_offset;
1122              
1123             #define GAUSSIAN_LOOKUP_SIZE 4000
1124             #define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
1125             static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
1126             static int needs_gaussian_calc = 1;
1127              
1128 0           pdl *in = PDLA->SvPDLAV(__privtrans->in);
1129 0           pdl *out = PDLA->SvPDLAV(__privtrans->out);
1130 0           pdl *map = PDLA->SvPDLAV(__privtrans->map);
1131              
1132 0           PDLA->make_physical(in);
1133 0           PDLA->make_physical(out);
1134 0           PDLA->make_physical(map);
1135              
1136 0           ndims = map->ndims -1;
1137              
1138             /*
1139             * Allocate all our dynamic workspaces at once
1140             * */
1141 0           ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
1142 0           ( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3*ndims) + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndims*ndims + ndims + sizeof(char) * ndims + sizeof(PDLA_Indx) * ndims )
1143             )
1144             );
1145 0           ivec = &(ovec[ndims]);
1146 0           ibvec = &(ivec[ndims]);
1147 0           dvec = (PDLA_Double *)(&(ibvec[ndims]));
1148 0           tvec = &(dvec[ndims]);
1149 0           acc = &(tvec[ndims]);
1150 0           wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
1151 0           bounds = (char *)(&(tmp [3*ndims*ndims+ndims]));
1152 0           index_stash = (PDLA_Indx *) &(bounds[ndims]);
1153              
1154              
1155             /***
1156             * Fill in the boundary condition array
1157             */
1158             {
1159             char *bstr;
1160             STRLEN blen;
1161 0 0         bstr = SvPV(__privtrans->boundary,blen);
1162              
1163 0 0         if(blen == 0) {
1164             /* If no boundary is specified then every dim gets truncated */
1165             int i;
1166 0 0         for (i=0;i
1167 0           bounds[i] = 1;
1168             } else {
1169             int i;
1170 0 0         for(i=0;i
1171 0 0         switch(bstr[i < blen ? i : blen-1 ]) {
1172             case '0': case 'f': case 'F': /* forbid */
1173 0           bounds[i] = 0;
1174 0           break;
1175             case '1': case 't': case 'T': /* truncate */
1176 0           bounds[i] = 1;
1177 0           break;
1178             case '2': case 'e': case 'E': /* extend */
1179 0           bounds[i] = 2;
1180 0           break;
1181             case '3': case 'p': case 'P': /* periodic */
1182 0           bounds[i] = 3;
1183 0           break;
1184             case '4': case 'm': case 'M': /* mirror */
1185 0           bounds[i] = 4;
1186 0           break;
1187             default:
1188             {
1189             char buf[BUFSIZ];
1190 0           sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
1191 0           barf("%s", buf);
1192             }
1193 0           break;
1194             }
1195             }
1196             }
1197             }
1198              
1199             /***
1200             * Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
1201             */
1202 0 0         big = labs((PDLA_Long) (SvNV(__privtrans->big)));
1203 0 0         if(big <= 0)
1204 0           barf("%s","map: 'big' parameter must be >0");
1205              
1206 0 0         blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
1207 0 0         if(blur < 0)
1208 0           barf("%s","map: 'blur' parameter must be >= 0");
1209              
1210 0 0         sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
1211 0 0         if(sv_min < 0)
1212 0           barf("%s","map: 'sv_min' parameter must be >= 0");
1213              
1214 0 0         flux = (SvNV(__privtrans->flux) != 0);
1215              
1216             {
1217             char *mstr;
1218             STRLEN mlen;
1219 0 0         mstr = SvPV(__privtrans->method,mlen);
1220              
1221 0 0         if(mlen==0)
1222 0           method = 'h';
1223 0           else switch(*mstr) {
1224 0           case 'H': method='H'; break;
1225 0           case 'h': method = 'h';
1226 0 0         if( needs_hanning_calc ) {
1227             int i;
1228 0 0         for(i=0;i
1229 0           hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
1230             }
1231 0           hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
1232 0           hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
1233 0           needs_hanning_calc = 0;
1234             }
1235 0           zeta = HANNING_LOOKUP_SIZE / blur;
1236 0           hanning_offset = (blur >= 1) ?
1237 0 0         0 :
1238 0           0.5 * (1.0 - blur);
1239 0           break;
1240              
1241 0           case 'g': case 'j': method = 'g';
1242 0           zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
1243              
1244 0 0         if( needs_gaussian_calc ) {
1245             int i;
1246 0 0         for(i=0;i
1247 0           gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
1248             }
1249 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
1250 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
1251 0           needs_gaussian_calc = 0;
1252             }
1253 0           break;
1254              
1255 0           case 'G': case 'J': method = 'G'; break;
1256             default:
1257             {
1258             char err[80];
1259 0           sprintf(err,"Bug in map: unknown method '%c'",*mstr);
1260 0           barf("%s", err);
1261             }
1262 0           break;
1263             }
1264             }
1265              
1266              
1267              
1268             /* End of initialization */
1269             /*************************************************************/
1270             /* Start of Real Work */
1271              
1272             /* Initialize coordinate vector and map offset
1273             */
1274 0 0         for(i=0;i
1275 0           ovec[i] = 0;
1276              
1277 0           map_ptr = (PDLA_Double *)(map->data);
1278              
1279              
1280             /* Main pixel loop (iterates over pixels in the output plane) */
1281             do {
1282             PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
1283             /* Prefrobnicate the transformation matrix */
1284 0           psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
1285              
1286             #ifdef DEBUG_MAP
1287             {
1288             int k; PDLA_Indx foo = 0;
1289             printf("ovec: [");
1290             for(k=0;k
1291             foo += ovec[k] * map->dimincs[k+1];
1292             printf(" %2d ",(int)(ovec[k]));
1293             }
1294             printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
1295             for(k=0;k
1296             printf("%8.2f",(double)(((PDLA_Short *)(map->data))[foo + k*map->dimincs[0]]));
1297             }
1298             printf("]\n");
1299             }
1300             #endif
1301              
1302             /* Don't bother accumulating output if psize is too large */
1303 0 0         if(psize <= big) {
1304             /* Use the prefrobnicated matrix to generate a local linearization.
1305             * dvec gets the delta; ibvec gets the base.
1306             */
1307             {
1308 0           PDLA_Double *mp = map_ptr;
1309 0 0         for (i=0;i
1310 0           dvec[i] = *mp - ( ibvec[i] = (PDLA_Long)(*mp + 0.5)); /* assignment */
1311 0           mp += map->dimincs[0];
1312             }
1313             }
1314              
1315             /* Initialize input delta vector */
1316 0 0         for(i=0;i
1317 0           ivec[i] = -psize;
1318              
1319             /* Initialize accumulators */
1320             {
1321 0           PDLA_Double *ac = acc;
1322 0 0         for(i=0; i < in->dims[ndims]; i++)
1323 0           *(ac++) = 0.0;
1324              
1325             }
1326             {
1327 0           PDLA_Double *wg = wgt;
1328 0 0         for(i=0;i < in->dims[ndims]; i++)
1329 0           *(wg++) = 0.0;
1330             }
1331             {
1332 0           PDLA_Double *wg = wgt2;
1333 0 0         for(i=0;i < in->dims[ndims]; i++)
1334 0           *(wg++) = 0.0;
1335             }
1336              
1337              
1338             /*
1339             * Calculate the original offset into the data array, to enable
1340             * delta calculations in the pixel loop
1341             *
1342             * i runs over dims; j holds the working integer index in the
1343             * current dim.
1344             *
1345             * This code matches the incrementation code at the bottom of the accumulation loop
1346             */
1347            
1348 0           t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
1349 0           i_off = 0;
1350 0 0         for(i=0;i
1351 0           j = ivec[i] + ibvec[i];
1352 0 0         if(j<0 || j >= in->dims[i]) {
    0          
1353 0           switch(bounds[i]) {
1354             case 0: /* no breakage allowed */
1355 0           barf("%s","index out-of-bounds in map");
1356 0           break;
1357             case 1: /* truncation */
1358 0           t_vio++;
1359             /* fall through */
1360             case 2: /* extension -- crop */
1361 0 0         if(j<0)
1362 0           j=0;
1363 0           else j = in->dims[i] - 1;
1364 0           break;
1365             case 3: /* periodic -- mod it */
1366 0           j %= in->dims[i];
1367 0 0         if(j<0)
1368 0           j += in->dims[i];
1369 0           break;
1370             case 4: /* mirror -- reflect off the edges */
1371 0           j += in->dims[i];
1372 0           j %= (in->dims[i]*2);
1373 0 0         if(j<0)
1374 0           j += in->dims[i]*2;
1375 0           j -= in->dims[i];
1376 0 0         if(j<0) {
1377 0           j *= -1;
1378 0           j -= 1;
1379             }
1380 0           break;
1381             default:
1382 0           barf("%s", "Unknown boundary condition in map -- bug alert!");
1383 0           break;
1384             }
1385             }
1386 0           i_off += in->dimincs[i] * j;
1387             }
1388              
1389             /* Initialize index stashes for later reference as we scan the footprint */
1390             /* It's a pain in the ass to deal with boundaries, and doubly so at the */
1391             /* end of a dimensional scan. So we stash the index location at the */
1392             /* start of each dimensional scan here. When we finish incrementing */
1393             /* through a particular dim, we pull its value back out of the stash. */
1394 0 0         for(i=0;i
1395 0           index_stash[i] = i_off;
1396             }
1397              
1398             /* The input accumulation loop is the hotspot for the entire operation. */
1399             /* We loop over pixels in the region of interest (+/- psize in each dimension) */
1400             /* in the input array, use the linearized transform to bring each pixel center */
1401             /* forward to the output plane, and calculate a weighting based on the chosen */
1402             /* filter function. 'h' is a fast Hanning window rolloff using a lookup */
1403             /* table that is initialized the first time through the code. 'H' is the */
1404             /* same process, but explicitly calculated for each interation (~2x slower). */
1405             /* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
1406             /* into the input array fresh from the current input array vector each time, */
1407             /* we walk through the array using dimincs and the old offset. This saves */
1408             /* about half of the time spent on index calculation. */
1409              
1410             do { /* Input accumulation loop */
1411             PDLA_Double *cp;
1412             PDLA_Double alpha;
1413             /* Calculate the weight of the current input point. Don't bother if we're
1414             * violating any truncation boundaries (in that case our value is zero, but
1415             * for the interpolation we also set the weight to zero).
1416             */
1417 0 0         if( !t_vio ) {
1418              
1419 0           PDLA_Double *ap = tvec;
1420 0           PDLA_Double *bp = dvec;
1421 0           PDLA_Indx *ip = ivec;
1422 0 0         for(i=0; i
1423 0           *(ap++) = *(ip++) - *(bp++);
1424              
1425 0           switch(method) {
1426             PDLA_Double dd;
1427             case 'h':
1428             /* This is the Hanning window rolloff. It is a product of a simple */
1429             /* cos^2(theta) rolloff in each dimension. Using a lookup table */
1430             /* is about 2x faster than using cos(theta) directly in each */
1431             /* weighting calculation, so we do. Using 2500 entries and linear */
1432             /* interpolation is accurate to about 10^-7, and should preserve */
1433             /* the contents of cache pretty well. */
1434 0           alpha = 1;
1435 0           cp = tmp;
1436 0 0         for(i=0; i
1437             int lodex;
1438             int hidex;
1439             PDLA_Double beta;
1440 0           dd = 0;
1441 0           ap = tvec;
1442             /* Get the matrix-multiply element for this dimension */
1443 0 0         for(j=0;j
1444 0           dd += *(ap++) * *(cp++);
1445              
1446             /* Do linear interpolation from the table */
1447             /* The table captures a hanning window centered 0.5 pixel from center. */
1448             /* We scale the filter by the blur parameter -- but if blur is less */
1449             /* than unity, we shrink the hanning blur window while keeping the 0.5 */
1450             /* value on the pixel edge at 0.5. For blur greater than unity, we */
1451             /* scale simply. */
1452 0           beta = fabs(dd) - hanning_offset;
1453 0 0         if(beta > 0) {
1454 0 0         if(beta >= blur) {
1455 0           alpha = 0;
1456 0           i = ndims;
1457             } else {
1458 0           beta *= zeta;
1459 0           lodex = beta;
1460 0 0         beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
1461 0           lodex = HANNING_LOOKUP_SIZE;
1462 0           hidex = lodex+1;
1463 0           alpha *= hanning_lookup[hidex]*beta + hanning_lookup[lodex]*(1-beta);
1464             } /* end of interpolation branch */
1465             } /* end of beta > 0 branch */
1466             } /* end of dimension loop */
1467 0           break;
1468              
1469             case 'H':
1470             /* This is the Hanning window rolloff with explicit calculation, preserved */
1471             /* in case someone actually wants the slower longer method. */
1472 0           alpha = 1;
1473 0           cp = tmp;
1474 0 0         for(i=0; i
1475 0           dd = 0;
1476 0           ap = tvec;
1477 0 0         for(j=0;j
1478 0           dd += *(ap++) * *(cp++);
1479 0           dd = (fabs(dd) - hanning_offset) / blur;
1480 0 0         if( dd > 1 ) {
1481 0           alpha = 0;
1482 0           i = ndims;
1483             } else
1484 0           alpha *= (0.5 + 0.5 * cos( dd * 3.1415926536 ));
1485             }
1486 0           break;
1487              
1488             case 'g':
1489             /* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
1490             {
1491 0           PDLA_Double sum = 0;
1492 0           cp = tmp;
1493 0 0         for(i=0; i
1494 0           dd = 0;
1495 0           ap = tvec;
1496 0 0         for(j=0;j
1497 0           dd += *(ap++) * *(cp++);
1498 0           dd /= blur;
1499 0           sum += dd * dd;
1500 0 0         if(sum > GAUSSIAN_MAXVAL) {
1501 0           i = ndims; /* exit early if we're too far out */
1502 0           alpha = 0;
1503             }
1504             }
1505 0 0         if( sum > GAUSSIAN_MAXVAL || !isfinite(sum) || isnan(sum) ) {
    0          
    0          
1506 0           alpha = 0;
1507             } else {
1508             int lodex,hidex;
1509 0           PDLA_Double beta = fabs(zeta * sum);
1510              
1511 0           lodex = beta;
1512 0           beta -= lodex; hidex = lodex+1;
1513 0           alpha = gaussian_lookup[hidex]*beta + gaussian_lookup[lodex]*(1 - beta);
1514              
1515             }
1516             }
1517 0           break;
1518              
1519             case 'G':
1520             /* This is the Gaussian rolloff with explicit calculation, preserved */
1521             /* in case someone actually wants the slower longer method. */
1522             {
1523 0           PDLA_Double sum = 0;
1524 0           cp = tmp;
1525 0 0         for(i=0; i
1526 0           dd = 0;
1527 0           ap = tvec;
1528 0 0         for(j=0;j
1529 0           dd += *(ap++) * *(cp++);
1530 0           dd /= blur;
1531 0           sum += dd * dd;
1532 0 0         if(sum > 4) /* 2 pixels -- four half-widths */
1533 0           i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
1534             }
1535              
1536 0 0         if(sum > GAUSSIAN_MAXVAL)
1537 0           alpha = 0;
1538             else
1539 0           alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
1540             }
1541 0           break;
1542             default:
1543             {
1544             char buf[80];
1545 0           sprintf(buf,"This can't happen: method='%c'",method);
1546 0           barf("%s", buf);
1547             }
1548             }
1549              
1550             { /* convenience block -- accumulate the current point into the weighted sum. */
1551             /* This is more than simple assignment because we have our own explicit poor */
1552             /* man's threadloop here, so we accumulate each threaded element separately. */
1553 0           PDLA_Short *dat = ((PDLA_Short *)(in->data)) + i_off;
1554 0           PDLA_Indx max = out->dims[ndims];
1555 0 0         for( i=0; i < max; i++ ) {
1556 0 0         if( (badval==0) || (*dat != badval) ) {
    0          
1557 0           acc[i] += *dat * alpha;
1558 0           dat += in->dimincs[ndims];
1559 0           wgt[i] += alpha;
1560             }
1561 0           wgt2[i] += alpha; }
1562             }
1563             } /* end of t_vio check (i.e. of input accumulation) */
1564              
1565              
1566             /* Advance input accumulation loop. */
1567             /* We both increment the total vector and also advance the index. */
1568 0           carry = 1;
1569 0 0         for(i=0; i
    0          
1570             /* Advance the current element of the offset vector */
1571 0           ivec[i]++;
1572 0           j = ivec[i] + ibvec[i];
1573              
1574             /* Advance the offset into the data array */
1575 0 0         if( j > 0 && j <= in->dims[i]-1 ) {
    0          
1576             /* Normal case -- just advance the input vector */
1577 0           i_off += in->dimincs[i];
1578             } else {
1579             /* Busted a boundary - either before or after. */
1580 0           switch(bounds[i]){
1581             case 0: /* no breakage allowed -- treat as truncation for interpolation */
1582             case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
1583 0 0         if( j == 0 )
1584 0           t_vio--;
1585 0 0         else if( j == in->dims[i] )
1586 0           t_vio++;
1587 0           break;
1588             case 2: /* extension -- do nothing (so the same input point is re-used) */
1589 0           break;
1590             case 3: /* periodic -- advance and mod into the allowed range */
1591 0 0         if((j % in->dims[i]) == 0) {
1592 0           i_off -= in->dimincs[i] * (in->dims[i]-1);
1593             } else {
1594 0           i_off += in->dimincs[i];
1595             }
1596 0           break;
1597             case 4: /* mirror -- advance or retreat depending on phase */
1598 0           j += in->dims[i];
1599 0           j %= (in->dims[i]*2);
1600 0           j -= in->dims[i];
1601 0 0         if( j!=0 && j!= -in->dims[i] ) {
    0          
1602 0 0         if(j<0)
1603 0           i_off -= in->dimincs[i];
1604             else
1605 0           i_off += in->dimincs[i];
1606             }
1607 0           break;
1608             }
1609             }
1610              
1611             /* Now check for carry */
1612 0 0         if(ivec[i] <= psize) {
1613             /* Normal case -- copy the current offset to the faster-running dim stashes */
1614             int k;
1615 0 0         for(k=0;k
1616 0           index_stash[k] = i_off;
1617             }
1618 0           carry = 0;
1619              
1620             } else { /* End of this scan -- recover the last position, and mark carry */
1621 0           i_off = index_stash[i];
1622 0 0         if(bounds[i]==1) {
1623 0           j = ivec[i] + ibvec[i];
1624 0 0         if( j < 0 || j >= in->dims[i] )
    0          
1625 0           t_vio--;
1626 0           ivec[i] = -psize;
1627 0           j = ivec[i] + ibvec[i];
1628 0 0         if( j < 0 || j >= in->dims[i] )
    0          
1629 0           t_vio++;
1630 0           carry = 1;
1631             } else {
1632 0           ivec[i] = -psize;
1633             }
1634             }
1635             } /* End of counter-advance loop */
1636 0 0         } while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
1637              
1638             {
1639 0           PDLA_Double *ac = acc;
1640 0           PDLA_Double *wg = wgt;
1641 0           PDLA_Double *wg2 = wgt2;
1642 0           PDLA_Short *dat = out->data;
1643              
1644             /* Calculate output vector offset */
1645 0 0         for(i=0;i
1646 0           dat += out->dimincs[i] * ovec[i];
1647              
1648 0 0         if(!flux) {
1649             /* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
1650 0 0         for(i=0; i < out->dims[ndims]; i++) {
1651 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
1652 0           *dat = *(ac++) / *(wg++);
1653 0           wg2++;
1654             } else {
1655 0           *dat = badval;
1656 0           ac++; wg++; wg2++;
1657             }
1658 0           dat += out->dimincs[ndims];
1659             }
1660             } else {
1661             /* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
1662 0           PDLA_Double det = tmp[ndims*ndims];
1663 0 0         for(i=0; i < out->dims[ndims]; i++) {
1664 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
1665 0           *dat = *(ac++) / *(wg++) * det;
1666 0           wg2++;
1667             } else {
1668 0           *dat = badval;
1669 0           ac++; wg++; wg2++;
1670             }
1671 0           dat += out->dimincs[ndims];
1672             } /* end of for loop */
1673             } /* end of flux flag set conditional */
1674             } /* end of convenience block */
1675            
1676             /* End of code for normal pixels */
1677             } else {
1678             /* The pixel was ludicrously huge -- just set this pixel to nan */
1679 0           PDLA_Short *dat = out->data;
1680 0 0         for(i=0;i
1681 0           dat += out->dimincs[i] * ovec[i];
1682 0 0         for(i=0;idims[ndims];i++) {
1683 0           *dat = badval; /* Should handle bad values too -- not yet */
1684 0           dat += out->dimincs[ndims];
1685             }
1686             }
1687              
1688             /* Increment the pixel counter */
1689             {
1690 0 0         for(i=0;
1691 0 0         (i
1692 0 0         (map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
1693 0           (++(ovec[i]) >= out->dims[i]); /* Actual carry test */
1694 0           i++) {
1695 0           ovec[i] = 0;
1696 0           map_ptr -= out->dims[i] * map->dimincs[i+1];
1697             }
1698             }
1699 0 0         } while(i
1700              
1701              
1702              
1703             }
1704             PDLA_COMMENT("THREADLOOPEND")
1705             }
1706             }
1707 0           k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
1708 0 0         } while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_US: {
1709 0 0         PDLA_Ushort * k0_datap = ((PDLA_Ushort *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
1710 0           PDLA_Ushort * k0_physdatap = ((PDLA_Ushort *)((__privtrans->pdls[0])->data));
1711              
1712              
1713             PDLA_COMMENT("THREADLOOPBEGIN")
1714 0 0         if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
1715 0           do { register PDLA_Indx __tind1=0,__tind2=0;
1716 0           register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
1717 0           register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
1718 0           register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
1719 0           register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
1720 0           register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
1721 0           register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
1722 0           k0_datap += __offsp[0];
1723 0 0         for( __tind2 = 0 ;
1724             __tind2 < __tdims1 ;
1725 0           __tind2++
1726 0           ,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
1727             )
1728             {
1729 0 0         for( __tind1 = 0 ;
1730             __tind1 < __tdims0 ;
1731 0           __tind1++
1732 0           ,k0_datap += __tinc0_0
1733             )
1734             { PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
1735              
1736             /*
1737             * Pixel interpolation & averaging code
1738             *
1739             * Calls a common coordinate-transformation block (see following hdr)
1740             * that isn't dependent on the type of the input variable.
1741             *
1742             * The inputs are SVs to avoid hassling with threadloops; threading
1743             * is handled internally. To simplify the threading business, any
1744             * thread dimensions should all be collapsed to a single one by the
1745             * perl front-end.
1746             *
1747             */
1748              
1749             short ndims; /* Number of dimensions we're working in */
1750             PDLA_Double *tmp; /* Workspace for prefrobnication */
1751             PDLA_Indx *ovec; /* output pixel loop vector */
1752             PDLA_Indx *ivec; /* input pixel loop vector */
1753             PDLA_Indx *ibvec; /* input pixel base offset vector */
1754             PDLA_Double *dvec; /* Residual vector for linearization */
1755             PDLA_Double *tvec; /* Temporary floating-point vector */
1756             PDLA_Double *acc; /* Threaded accumulator */
1757             PDLA_Double *wgt; /* Threaded weight accumulator */
1758             PDLA_Double *wgt2; /* Threaded weight accumulator for badval finding */
1759             char *bounds; /* Boundary condition packed string */
1760             PDLA_Indx *index_stash; /* Stash to store the opening index of dim sample scans */
1761             char method; /* Method identifier (gets one of 'h','g') */
1762             PDLA_Long big; /* Max size of input footprint for each pix */
1763             PDLA_Double blur; /* Scaling of filter */
1764             PDLA_Double sv_min; /* minimum singular value */
1765             char flux; /* Flag to indicate flux conservation */
1766             PDLA_Double *map_ptr;
1767             PDLA_Long i, j;
1768 0 0         PDLA_Ushort badval = SvNV(__privtrans->bv);
1769             #define HANNING_LOOKUP_SIZE 2500
1770             static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
1771             static int needs_hanning_calc = 1;
1772             PDLA_Double zeta;
1773             PDLA_Double hanning_offset;
1774              
1775             #define GAUSSIAN_LOOKUP_SIZE 4000
1776             #define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
1777             static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
1778             static int needs_gaussian_calc = 1;
1779              
1780 0           pdl *in = PDLA->SvPDLAV(__privtrans->in);
1781 0           pdl *out = PDLA->SvPDLAV(__privtrans->out);
1782 0           pdl *map = PDLA->SvPDLAV(__privtrans->map);
1783              
1784 0           PDLA->make_physical(in);
1785 0           PDLA->make_physical(out);
1786 0           PDLA->make_physical(map);
1787              
1788 0           ndims = map->ndims -1;
1789              
1790             /*
1791             * Allocate all our dynamic workspaces at once
1792             * */
1793 0           ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
1794 0           ( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3*ndims) + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndims*ndims + ndims + sizeof(char) * ndims + sizeof(PDLA_Indx) * ndims )
1795             )
1796             );
1797 0           ivec = &(ovec[ndims]);
1798 0           ibvec = &(ivec[ndims]);
1799 0           dvec = (PDLA_Double *)(&(ibvec[ndims]));
1800 0           tvec = &(dvec[ndims]);
1801 0           acc = &(tvec[ndims]);
1802 0           wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
1803 0           bounds = (char *)(&(tmp [3*ndims*ndims+ndims]));
1804 0           index_stash = (PDLA_Indx *) &(bounds[ndims]);
1805              
1806              
1807             /***
1808             * Fill in the boundary condition array
1809             */
1810             {
1811             char *bstr;
1812             STRLEN blen;
1813 0 0         bstr = SvPV(__privtrans->boundary,blen);
1814              
1815 0 0         if(blen == 0) {
1816             /* If no boundary is specified then every dim gets truncated */
1817             int i;
1818 0 0         for (i=0;i
1819 0           bounds[i] = 1;
1820             } else {
1821             int i;
1822 0 0         for(i=0;i
1823 0 0         switch(bstr[i < blen ? i : blen-1 ]) {
1824             case '0': case 'f': case 'F': /* forbid */
1825 0           bounds[i] = 0;
1826 0           break;
1827             case '1': case 't': case 'T': /* truncate */
1828 0           bounds[i] = 1;
1829 0           break;
1830             case '2': case 'e': case 'E': /* extend */
1831 0           bounds[i] = 2;
1832 0           break;
1833             case '3': case 'p': case 'P': /* periodic */
1834 0           bounds[i] = 3;
1835 0           break;
1836             case '4': case 'm': case 'M': /* mirror */
1837 0           bounds[i] = 4;
1838 0           break;
1839             default:
1840             {
1841             char buf[BUFSIZ];
1842 0           sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
1843 0           barf("%s", buf);
1844             }
1845 0           break;
1846             }
1847             }
1848             }
1849             }
1850              
1851             /***
1852             * Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
1853             */
1854 0 0         big = labs((PDLA_Long) (SvNV(__privtrans->big)));
1855 0 0         if(big <= 0)
1856 0           barf("%s","map: 'big' parameter must be >0");
1857              
1858 0 0         blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
1859 0 0         if(blur < 0)
1860 0           barf("%s","map: 'blur' parameter must be >= 0");
1861              
1862 0 0         sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
1863 0 0         if(sv_min < 0)
1864 0           barf("%s","map: 'sv_min' parameter must be >= 0");
1865              
1866 0 0         flux = (SvNV(__privtrans->flux) != 0);
1867              
1868             {
1869             char *mstr;
1870             STRLEN mlen;
1871 0 0         mstr = SvPV(__privtrans->method,mlen);
1872              
1873 0 0         if(mlen==0)
1874 0           method = 'h';
1875 0           else switch(*mstr) {
1876 0           case 'H': method='H'; break;
1877 0           case 'h': method = 'h';
1878 0 0         if( needs_hanning_calc ) {
1879             int i;
1880 0 0         for(i=0;i
1881 0           hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
1882             }
1883 0           hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
1884 0           hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
1885 0           needs_hanning_calc = 0;
1886             }
1887 0           zeta = HANNING_LOOKUP_SIZE / blur;
1888 0           hanning_offset = (blur >= 1) ?
1889 0 0         0 :
1890 0           0.5 * (1.0 - blur);
1891 0           break;
1892              
1893 0           case 'g': case 'j': method = 'g';
1894 0           zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
1895              
1896 0 0         if( needs_gaussian_calc ) {
1897             int i;
1898 0 0         for(i=0;i
1899 0           gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
1900             }
1901 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
1902 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
1903 0           needs_gaussian_calc = 0;
1904             }
1905 0           break;
1906              
1907 0           case 'G': case 'J': method = 'G'; break;
1908             default:
1909             {
1910             char err[80];
1911 0           sprintf(err,"Bug in map: unknown method '%c'",*mstr);
1912 0           barf("%s", err);
1913             }
1914 0           break;
1915             }
1916             }
1917              
1918              
1919              
1920             /* End of initialization */
1921             /*************************************************************/
1922             /* Start of Real Work */
1923              
1924             /* Initialize coordinate vector and map offset
1925             */
1926 0 0         for(i=0;i
1927 0           ovec[i] = 0;
1928              
1929 0           map_ptr = (PDLA_Double *)(map->data);
1930              
1931              
1932             /* Main pixel loop (iterates over pixels in the output plane) */
1933             do {
1934             PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
1935             /* Prefrobnicate the transformation matrix */
1936 0           psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
1937              
1938             #ifdef DEBUG_MAP
1939             {
1940             int k; PDLA_Indx foo = 0;
1941             printf("ovec: [");
1942             for(k=0;k
1943             foo += ovec[k] * map->dimincs[k+1];
1944             printf(" %2d ",(int)(ovec[k]));
1945             }
1946             printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
1947             for(k=0;k
1948             printf("%8.2f",(double)(((PDLA_Ushort *)(map->data))[foo + k*map->dimincs[0]]));
1949             }
1950             printf("]\n");
1951             }
1952             #endif
1953              
1954             /* Don't bother accumulating output if psize is too large */
1955 0 0         if(psize <= big) {
1956             /* Use the prefrobnicated matrix to generate a local linearization.
1957             * dvec gets the delta; ibvec gets the base.
1958             */
1959             {
1960 0           PDLA_Double *mp = map_ptr;
1961 0 0         for (i=0;i
1962 0           dvec[i] = *mp - ( ibvec[i] = (PDLA_Long)(*mp + 0.5)); /* assignment */
1963 0           mp += map->dimincs[0];
1964             }
1965             }
1966              
1967             /* Initialize input delta vector */
1968 0 0         for(i=0;i
1969 0           ivec[i] = -psize;
1970              
1971             /* Initialize accumulators */
1972             {
1973 0           PDLA_Double *ac = acc;
1974 0 0         for(i=0; i < in->dims[ndims]; i++)
1975 0           *(ac++) = 0.0;
1976              
1977             }
1978             {
1979 0           PDLA_Double *wg = wgt;
1980 0 0         for(i=0;i < in->dims[ndims]; i++)
1981 0           *(wg++) = 0.0;
1982             }
1983             {
1984 0           PDLA_Double *wg = wgt2;
1985 0 0         for(i=0;i < in->dims[ndims]; i++)
1986 0           *(wg++) = 0.0;
1987             }
1988              
1989              
1990             /*
1991             * Calculate the original offset into the data array, to enable
1992             * delta calculations in the pixel loop
1993             *
1994             * i runs over dims; j holds the working integer index in the
1995             * current dim.
1996             *
1997             * This code matches the incrementation code at the bottom of the accumulation loop
1998             */
1999            
2000 0           t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
2001 0           i_off = 0;
2002 0 0         for(i=0;i
2003 0           j = ivec[i] + ibvec[i];
2004 0 0         if(j<0 || j >= in->dims[i]) {
    0          
2005 0           switch(bounds[i]) {
2006             case 0: /* no breakage allowed */
2007 0           barf("%s","index out-of-bounds in map");
2008 0           break;
2009             case 1: /* truncation */
2010 0           t_vio++;
2011             /* fall through */
2012             case 2: /* extension -- crop */
2013 0 0         if(j<0)
2014 0           j=0;
2015 0           else j = in->dims[i] - 1;
2016 0           break;
2017             case 3: /* periodic -- mod it */
2018 0           j %= in->dims[i];
2019 0 0         if(j<0)
2020 0           j += in->dims[i];
2021 0           break;
2022             case 4: /* mirror -- reflect off the edges */
2023 0           j += in->dims[i];
2024 0           j %= (in->dims[i]*2);
2025 0 0         if(j<0)
2026 0           j += in->dims[i]*2;
2027 0           j -= in->dims[i];
2028 0 0         if(j<0) {
2029 0           j *= -1;
2030 0           j -= 1;
2031             }
2032 0           break;
2033             default:
2034 0           barf("%s", "Unknown boundary condition in map -- bug alert!");
2035 0           break;
2036             }
2037             }
2038 0           i_off += in->dimincs[i] * j;
2039             }
2040              
2041             /* Initialize index stashes for later reference as we scan the footprint */
2042             /* It's a pain in the ass to deal with boundaries, and doubly so at the */
2043             /* end of a dimensional scan. So we stash the index location at the */
2044             /* start of each dimensional scan here. When we finish incrementing */
2045             /* through a particular dim, we pull its value back out of the stash. */
2046 0 0         for(i=0;i
2047 0           index_stash[i] = i_off;
2048             }
2049              
2050             /* The input accumulation loop is the hotspot for the entire operation. */
2051             /* We loop over pixels in the region of interest (+/- psize in each dimension) */
2052             /* in the input array, use the linearized transform to bring each pixel center */
2053             /* forward to the output plane, and calculate a weighting based on the chosen */
2054             /* filter function. 'h' is a fast Hanning window rolloff using a lookup */
2055             /* table that is initialized the first time through the code. 'H' is the */
2056             /* same process, but explicitly calculated for each interation (~2x slower). */
2057             /* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
2058             /* into the input array fresh from the current input array vector each time, */
2059             /* we walk through the array using dimincs and the old offset. This saves */
2060             /* about half of the time spent on index calculation. */
2061              
2062             do { /* Input accumulation loop */
2063             PDLA_Double *cp;
2064             PDLA_Double alpha;
2065             /* Calculate the weight of the current input point. Don't bother if we're
2066             * violating any truncation boundaries (in that case our value is zero, but
2067             * for the interpolation we also set the weight to zero).
2068             */
2069 0 0         if( !t_vio ) {
2070              
2071 0           PDLA_Double *ap = tvec;
2072 0           PDLA_Double *bp = dvec;
2073 0           PDLA_Indx *ip = ivec;
2074 0 0         for(i=0; i
2075 0           *(ap++) = *(ip++) - *(bp++);
2076              
2077 0           switch(method) {
2078             PDLA_Double dd;
2079             case 'h':
2080             /* This is the Hanning window rolloff. It is a product of a simple */
2081             /* cos^2(theta) rolloff in each dimension. Using a lookup table */
2082             /* is about 2x faster than using cos(theta) directly in each */
2083             /* weighting calculation, so we do. Using 2500 entries and linear */
2084             /* interpolation is accurate to about 10^-7, and should preserve */
2085             /* the contents of cache pretty well. */
2086 0           alpha = 1;
2087 0           cp = tmp;
2088 0 0         for(i=0; i
2089             int lodex;
2090             int hidex;
2091             PDLA_Double beta;
2092 0           dd = 0;
2093 0           ap = tvec;
2094             /* Get the matrix-multiply element for this dimension */
2095 0 0         for(j=0;j
2096 0           dd += *(ap++) * *(cp++);
2097              
2098             /* Do linear interpolation from the table */
2099             /* The table captures a hanning window centered 0.5 pixel from center. */
2100             /* We scale the filter by the blur parameter -- but if blur is less */
2101             /* than unity, we shrink the hanning blur window while keeping the 0.5 */
2102             /* value on the pixel edge at 0.5. For blur greater than unity, we */
2103             /* scale simply. */
2104 0           beta = fabs(dd) - hanning_offset;
2105 0 0         if(beta > 0) {
2106 0 0         if(beta >= blur) {
2107 0           alpha = 0;
2108 0           i = ndims;
2109             } else {
2110 0           beta *= zeta;
2111 0           lodex = beta;
2112 0 0         beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
2113 0           lodex = HANNING_LOOKUP_SIZE;
2114 0           hidex = lodex+1;
2115 0           alpha *= hanning_lookup[hidex]*beta + hanning_lookup[lodex]*(1-beta);
2116             } /* end of interpolation branch */
2117             } /* end of beta > 0 branch */
2118             } /* end of dimension loop */
2119 0           break;
2120              
2121             case 'H':
2122             /* This is the Hanning window rolloff with explicit calculation, preserved */
2123             /* in case someone actually wants the slower longer method. */
2124 0           alpha = 1;
2125 0           cp = tmp;
2126 0 0         for(i=0; i
2127 0           dd = 0;
2128 0           ap = tvec;
2129 0 0         for(j=0;j
2130 0           dd += *(ap++) * *(cp++);
2131 0           dd = (fabs(dd) - hanning_offset) / blur;
2132 0 0         if( dd > 1 ) {
2133 0           alpha = 0;
2134 0           i = ndims;
2135             } else
2136 0           alpha *= (0.5 + 0.5 * cos( dd * 3.1415926536 ));
2137             }
2138 0           break;
2139              
2140             case 'g':
2141             /* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
2142             {
2143 0           PDLA_Double sum = 0;
2144 0           cp = tmp;
2145 0 0         for(i=0; i
2146 0           dd = 0;
2147 0           ap = tvec;
2148 0 0         for(j=0;j
2149 0           dd += *(ap++) * *(cp++);
2150 0           dd /= blur;
2151 0           sum += dd * dd;
2152 0 0         if(sum > GAUSSIAN_MAXVAL) {
2153 0           i = ndims; /* exit early if we're too far out */
2154 0           alpha = 0;
2155             }
2156             }
2157 0 0         if( sum > GAUSSIAN_MAXVAL || !isfinite(sum) || isnan(sum) ) {
    0          
    0          
2158 0           alpha = 0;
2159             } else {
2160             int lodex,hidex;
2161 0           PDLA_Double beta = fabs(zeta * sum);
2162              
2163 0           lodex = beta;
2164 0           beta -= lodex; hidex = lodex+1;
2165 0           alpha = gaussian_lookup[hidex]*beta + gaussian_lookup[lodex]*(1 - beta);
2166              
2167             }
2168             }
2169 0           break;
2170              
2171             case 'G':
2172             /* This is the Gaussian rolloff with explicit calculation, preserved */
2173             /* in case someone actually wants the slower longer method. */
2174             {
2175 0           PDLA_Double sum = 0;
2176 0           cp = tmp;
2177 0 0         for(i=0; i
2178 0           dd = 0;
2179 0           ap = tvec;
2180 0 0         for(j=0;j
2181 0           dd += *(ap++) * *(cp++);
2182 0           dd /= blur;
2183 0           sum += dd * dd;
2184 0 0         if(sum > 4) /* 2 pixels -- four half-widths */
2185 0           i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
2186             }
2187              
2188 0 0         if(sum > GAUSSIAN_MAXVAL)
2189 0           alpha = 0;
2190             else
2191 0           alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
2192             }
2193 0           break;
2194             default:
2195             {
2196             char buf[80];
2197 0           sprintf(buf,"This can't happen: method='%c'",method);
2198 0           barf("%s", buf);
2199             }
2200             }
2201              
2202             { /* convenience block -- accumulate the current point into the weighted sum. */
2203             /* This is more than simple assignment because we have our own explicit poor */
2204             /* man's threadloop here, so we accumulate each threaded element separately. */
2205 0           PDLA_Ushort *dat = ((PDLA_Ushort *)(in->data)) + i_off;
2206 0           PDLA_Indx max = out->dims[ndims];
2207 0 0         for( i=0; i < max; i++ ) {
2208 0 0         if( (badval==0) || (*dat != badval) ) {
    0          
2209 0           acc[i] += *dat * alpha;
2210 0           dat += in->dimincs[ndims];
2211 0           wgt[i] += alpha;
2212             }
2213 0           wgt2[i] += alpha; }
2214             }
2215             } /* end of t_vio check (i.e. of input accumulation) */
2216              
2217              
2218             /* Advance input accumulation loop. */
2219             /* We both increment the total vector and also advance the index. */
2220 0           carry = 1;
2221 0 0         for(i=0; i
    0          
2222             /* Advance the current element of the offset vector */
2223 0           ivec[i]++;
2224 0           j = ivec[i] + ibvec[i];
2225              
2226             /* Advance the offset into the data array */
2227 0 0         if( j > 0 && j <= in->dims[i]-1 ) {
    0          
2228             /* Normal case -- just advance the input vector */
2229 0           i_off += in->dimincs[i];
2230             } else {
2231             /* Busted a boundary - either before or after. */
2232 0           switch(bounds[i]){
2233             case 0: /* no breakage allowed -- treat as truncation for interpolation */
2234             case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
2235 0 0         if( j == 0 )
2236 0           t_vio--;
2237 0 0         else if( j == in->dims[i] )
2238 0           t_vio++;
2239 0           break;
2240             case 2: /* extension -- do nothing (so the same input point is re-used) */
2241 0           break;
2242             case 3: /* periodic -- advance and mod into the allowed range */
2243 0 0         if((j % in->dims[i]) == 0) {
2244 0           i_off -= in->dimincs[i] * (in->dims[i]-1);
2245             } else {
2246 0           i_off += in->dimincs[i];
2247             }
2248 0           break;
2249             case 4: /* mirror -- advance or retreat depending on phase */
2250 0           j += in->dims[i];
2251 0           j %= (in->dims[i]*2);
2252 0           j -= in->dims[i];
2253 0 0         if( j!=0 && j!= -in->dims[i] ) {
    0          
2254 0 0         if(j<0)
2255 0           i_off -= in->dimincs[i];
2256             else
2257 0           i_off += in->dimincs[i];
2258             }
2259 0           break;
2260             }
2261             }
2262              
2263             /* Now check for carry */
2264 0 0         if(ivec[i] <= psize) {
2265             /* Normal case -- copy the current offset to the faster-running dim stashes */
2266             int k;
2267 0 0         for(k=0;k
2268 0           index_stash[k] = i_off;
2269             }
2270 0           carry = 0;
2271              
2272             } else { /* End of this scan -- recover the last position, and mark carry */
2273 0           i_off = index_stash[i];
2274 0 0         if(bounds[i]==1) {
2275 0           j = ivec[i] + ibvec[i];
2276 0 0         if( j < 0 || j >= in->dims[i] )
    0          
2277 0           t_vio--;
2278 0           ivec[i] = -psize;
2279 0           j = ivec[i] + ibvec[i];
2280 0 0         if( j < 0 || j >= in->dims[i] )
    0          
2281 0           t_vio++;
2282 0           carry = 1;
2283             } else {
2284 0           ivec[i] = -psize;
2285             }
2286             }
2287             } /* End of counter-advance loop */
2288 0 0         } while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
2289              
2290             {
2291 0           PDLA_Double *ac = acc;
2292 0           PDLA_Double *wg = wgt;
2293 0           PDLA_Double *wg2 = wgt2;
2294 0           PDLA_Ushort *dat = out->data;
2295              
2296             /* Calculate output vector offset */
2297 0 0         for(i=0;i
2298 0           dat += out->dimincs[i] * ovec[i];
2299              
2300 0 0         if(!flux) {
2301             /* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
2302 0 0         for(i=0; i < out->dims[ndims]; i++) {
2303 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
2304 0           *dat = *(ac++) / *(wg++);
2305 0           wg2++;
2306             } else {
2307 0           *dat = badval;
2308 0           ac++; wg++; wg2++;
2309             }
2310 0           dat += out->dimincs[ndims];
2311             }
2312             } else {
2313             /* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
2314 0           PDLA_Double det = tmp[ndims*ndims];
2315 0 0         for(i=0; i < out->dims[ndims]; i++) {
2316 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
2317 0           *dat = *(ac++) / *(wg++) * det;
2318 0           wg2++;
2319             } else {
2320 0           *dat = badval;
2321 0           ac++; wg++; wg2++;
2322             }
2323 0           dat += out->dimincs[ndims];
2324             } /* end of for loop */
2325             } /* end of flux flag set conditional */
2326             } /* end of convenience block */
2327            
2328             /* End of code for normal pixels */
2329             } else {
2330             /* The pixel was ludicrously huge -- just set this pixel to nan */
2331 0           PDLA_Ushort *dat = out->data;
2332 0 0         for(i=0;i
2333 0           dat += out->dimincs[i] * ovec[i];
2334 0 0         for(i=0;idims[ndims];i++) {
2335 0           *dat = badval; /* Should handle bad values too -- not yet */
2336 0           dat += out->dimincs[ndims];
2337             }
2338             }
2339              
2340             /* Increment the pixel counter */
2341             {
2342 0 0         for(i=0;
2343 0 0         (i
2344 0 0         (map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
2345 0           (++(ovec[i]) >= out->dims[i]); /* Actual carry test */
2346 0           i++) {
2347 0           ovec[i] = 0;
2348 0           map_ptr -= out->dims[i] * map->dimincs[i+1];
2349             }
2350             }
2351 0 0         } while(i
2352              
2353              
2354              
2355             }
2356             PDLA_COMMENT("THREADLOOPEND")
2357             }
2358             }
2359 0           k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
2360 0 0         } while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_L: {
2361 5 50         PDLA_Long * k0_datap = ((PDLA_Long *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
2362 5           PDLA_Long * k0_physdatap = ((PDLA_Long *)((__privtrans->pdls[0])->data));
2363              
2364              
2365             PDLA_COMMENT("THREADLOOPBEGIN")
2366 5 50         if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
2367 5           do { register PDLA_Indx __tind1=0,__tind2=0;
2368 5           register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
2369 5           register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
2370 5           register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
2371 5           register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
2372 5           register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
2373 5           register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
2374 5           k0_datap += __offsp[0];
2375 10 100         for( __tind2 = 0 ;
2376             __tind2 < __tdims1 ;
2377 5           __tind2++
2378 5           ,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
2379             )
2380             {
2381 10 100         for( __tind1 = 0 ;
2382             __tind1 < __tdims0 ;
2383 5           __tind1++
2384 5           ,k0_datap += __tinc0_0
2385             )
2386             { PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
2387              
2388             /*
2389             * Pixel interpolation & averaging code
2390             *
2391             * Calls a common coordinate-transformation block (see following hdr)
2392             * that isn't dependent on the type of the input variable.
2393             *
2394             * The inputs are SVs to avoid hassling with threadloops; threading
2395             * is handled internally. To simplify the threading business, any
2396             * thread dimensions should all be collapsed to a single one by the
2397             * perl front-end.
2398             *
2399             */
2400              
2401             short ndims; /* Number of dimensions we're working in */
2402             PDLA_Double *tmp; /* Workspace for prefrobnication */
2403             PDLA_Indx *ovec; /* output pixel loop vector */
2404             PDLA_Indx *ivec; /* input pixel loop vector */
2405             PDLA_Indx *ibvec; /* input pixel base offset vector */
2406             PDLA_Double *dvec; /* Residual vector for linearization */
2407             PDLA_Double *tvec; /* Temporary floating-point vector */
2408             PDLA_Double *acc; /* Threaded accumulator */
2409             PDLA_Double *wgt; /* Threaded weight accumulator */
2410             PDLA_Double *wgt2; /* Threaded weight accumulator for badval finding */
2411             char *bounds; /* Boundary condition packed string */
2412             PDLA_Indx *index_stash; /* Stash to store the opening index of dim sample scans */
2413             char method; /* Method identifier (gets one of 'h','g') */
2414             PDLA_Long big; /* Max size of input footprint for each pix */
2415             PDLA_Double blur; /* Scaling of filter */
2416             PDLA_Double sv_min; /* minimum singular value */
2417             char flux; /* Flag to indicate flux conservation */
2418             PDLA_Double *map_ptr;
2419             PDLA_Long i, j;
2420 5 50         PDLA_Long badval = SvNV(__privtrans->bv);
2421             #define HANNING_LOOKUP_SIZE 2500
2422             static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
2423             static int needs_hanning_calc = 1;
2424             PDLA_Double zeta;
2425             PDLA_Double hanning_offset;
2426              
2427             #define GAUSSIAN_LOOKUP_SIZE 4000
2428             #define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
2429             static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
2430             static int needs_gaussian_calc = 1;
2431              
2432 5           pdl *in = PDLA->SvPDLAV(__privtrans->in);
2433 5           pdl *out = PDLA->SvPDLAV(__privtrans->out);
2434 5           pdl *map = PDLA->SvPDLAV(__privtrans->map);
2435              
2436 5           PDLA->make_physical(in);
2437 5           PDLA->make_physical(out);
2438 5           PDLA->make_physical(map);
2439              
2440 5           ndims = map->ndims -1;
2441              
2442             /*
2443             * Allocate all our dynamic workspaces at once
2444             * */
2445 5           ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
2446 5           ( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3*ndims) + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndims*ndims + ndims + sizeof(char) * ndims + sizeof(PDLA_Indx) * ndims )
2447             )
2448             );
2449 5           ivec = &(ovec[ndims]);
2450 5           ibvec = &(ivec[ndims]);
2451 5           dvec = (PDLA_Double *)(&(ibvec[ndims]));
2452 5           tvec = &(dvec[ndims]);
2453 5           acc = &(tvec[ndims]);
2454 5           wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
2455 5           bounds = (char *)(&(tmp [3*ndims*ndims+ndims]));
2456 5           index_stash = (PDLA_Indx *) &(bounds[ndims]);
2457              
2458              
2459             /***
2460             * Fill in the boundary condition array
2461             */
2462             {
2463             char *bstr;
2464             STRLEN blen;
2465 5 50         bstr = SvPV(__privtrans->boundary,blen);
2466              
2467 5 50         if(blen == 0) {
2468             /* If no boundary is specified then every dim gets truncated */
2469             int i;
2470 0 0         for (i=0;i
2471 0           bounds[i] = 1;
2472             } else {
2473             int i;
2474 15 100         for(i=0;i
2475 10 100         switch(bstr[i < blen ? i : blen-1 ]) {
2476             case '0': case 'f': case 'F': /* forbid */
2477 0           bounds[i] = 0;
2478 0           break;
2479             case '1': case 't': case 'T': /* truncate */
2480 10           bounds[i] = 1;
2481 10           break;
2482             case '2': case 'e': case 'E': /* extend */
2483 0           bounds[i] = 2;
2484 0           break;
2485             case '3': case 'p': case 'P': /* periodic */
2486 0           bounds[i] = 3;
2487 0           break;
2488             case '4': case 'm': case 'M': /* mirror */
2489 0           bounds[i] = 4;
2490 0           break;
2491             default:
2492             {
2493             char buf[BUFSIZ];
2494 0           sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
2495 0           barf("%s", buf);
2496             }
2497 0           break;
2498             }
2499             }
2500             }
2501             }
2502              
2503             /***
2504             * Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
2505             */
2506 5 50         big = labs((PDLA_Long) (SvNV(__privtrans->big)));
2507 5 50         if(big <= 0)
2508 0           barf("%s","map: 'big' parameter must be >0");
2509              
2510 5 50         blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
2511 5 50         if(blur < 0)
2512 0           barf("%s","map: 'blur' parameter must be >= 0");
2513              
2514 5 50         sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
2515 5 50         if(sv_min < 0)
2516 0           barf("%s","map: 'sv_min' parameter must be >= 0");
2517              
2518 5 50         flux = (SvNV(__privtrans->flux) != 0);
2519              
2520             {
2521             char *mstr;
2522             STRLEN mlen;
2523 5 50         mstr = SvPV(__privtrans->method,mlen);
2524              
2525 5 50         if(mlen==0)
2526 0           method = 'h';
2527 5           else switch(*mstr) {
2528 1           case 'H': method='H'; break;
2529 1           case 'h': method = 'h';
2530 1 50         if( needs_hanning_calc ) {
2531             int i;
2532 2501 100         for(i=0;i
2533 2500           hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
2534             }
2535 1           hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
2536 1           hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
2537 1           needs_hanning_calc = 0;
2538             }
2539 1           zeta = HANNING_LOOKUP_SIZE / blur;
2540 1           hanning_offset = (blur >= 1) ?
2541 1 50         0 :
2542 0           0.5 * (1.0 - blur);
2543 1           break;
2544              
2545 2           case 'g': case 'j': method = 'g';
2546 2           zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
2547              
2548 2 100         if( needs_gaussian_calc ) {
2549             int i;
2550 4001 100         for(i=0;i
2551 4000           gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
2552             }
2553 1           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
2554 1           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
2555 1           needs_gaussian_calc = 0;
2556             }
2557 2           break;
2558              
2559 1           case 'G': case 'J': method = 'G'; break;
2560             default:
2561             {
2562             char err[80];
2563 0           sprintf(err,"Bug in map: unknown method '%c'",*mstr);
2564 0           barf("%s", err);
2565             }
2566 0           break;
2567             }
2568             }
2569              
2570              
2571              
2572             /* End of initialization */
2573             /*************************************************************/
2574             /* Start of Real Work */
2575              
2576             /* Initialize coordinate vector and map offset
2577             */
2578 15 100         for(i=0;i
2579 10           ovec[i] = 0;
2580              
2581 5           map_ptr = (PDLA_Double *)(map->data);
2582              
2583              
2584             /* Main pixel loop (iterates over pixels in the output plane) */
2585             do {
2586             PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
2587             /* Prefrobnicate the transformation matrix */
2588 737280           psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
2589              
2590             #ifdef DEBUG_MAP
2591             {
2592             int k; PDLA_Indx foo = 0;
2593             printf("ovec: [");
2594             for(k=0;k
2595             foo += ovec[k] * map->dimincs[k+1];
2596             printf(" %2d ",(int)(ovec[k]));
2597             }
2598             printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
2599             for(k=0;k
2600             printf("%8.2f",(double)(((PDLA_Long *)(map->data))[foo + k*map->dimincs[0]]));
2601             }
2602             printf("]\n");
2603             }
2604             #endif
2605              
2606             /* Don't bother accumulating output if psize is too large */
2607 737280 50         if(psize <= big) {
2608             /* Use the prefrobnicated matrix to generate a local linearization.
2609             * dvec gets the delta; ibvec gets the base.
2610             */
2611             {
2612 737280           PDLA_Double *mp = map_ptr;
2613 2211840 100         for (i=0;i
2614 1474560           dvec[i] = *mp - ( ibvec[i] = (PDLA_Long)(*mp + 0.5)); /* assignment */
2615 1474560           mp += map->dimincs[0];
2616             }
2617             }
2618              
2619             /* Initialize input delta vector */
2620 2211840 100         for(i=0;i
2621 1474560           ivec[i] = -psize;
2622              
2623             /* Initialize accumulators */
2624             {
2625 737280           PDLA_Double *ac = acc;
2626 1474560 100         for(i=0; i < in->dims[ndims]; i++)
2627 737280           *(ac++) = 0.0;
2628              
2629             }
2630             {
2631 737280           PDLA_Double *wg = wgt;
2632 1474560 100         for(i=0;i < in->dims[ndims]; i++)
2633 737280           *(wg++) = 0.0;
2634             }
2635             {
2636 737280           PDLA_Double *wg = wgt2;
2637 1474560 100         for(i=0;i < in->dims[ndims]; i++)
2638 737280           *(wg++) = 0.0;
2639             }
2640              
2641              
2642             /*
2643             * Calculate the original offset into the data array, to enable
2644             * delta calculations in the pixel loop
2645             *
2646             * i runs over dims; j holds the working integer index in the
2647             * current dim.
2648             *
2649             * This code matches the incrementation code at the bottom of the accumulation loop
2650             */
2651            
2652 737280           t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
2653 737280           i_off = 0;
2654 2211840 100         for(i=0;i
2655 1474560           j = ivec[i] + ibvec[i];
2656 1474560 100         if(j<0 || j >= in->dims[i]) {
    50          
2657 1118280           switch(bounds[i]) {
2658             case 0: /* no breakage allowed */
2659 0           barf("%s","index out-of-bounds in map");
2660 0           break;
2661             case 1: /* truncation */
2662 1118280           t_vio++;
2663             /* fall through */
2664             case 2: /* extension -- crop */
2665 1118280 50         if(j<0)
2666 1118280           j=0;
2667 0           else j = in->dims[i] - 1;
2668 1118280           break;
2669             case 3: /* periodic -- mod it */
2670 0           j %= in->dims[i];
2671 0 0         if(j<0)
2672 0           j += in->dims[i];
2673 0           break;
2674             case 4: /* mirror -- reflect off the edges */
2675 0           j += in->dims[i];
2676 0           j %= (in->dims[i]*2);
2677 0 0         if(j<0)
2678 0           j += in->dims[i]*2;
2679 0           j -= in->dims[i];
2680 0 0         if(j<0) {
2681 0           j *= -1;
2682 0           j -= 1;
2683             }
2684 0           break;
2685             default:
2686 0           barf("%s", "Unknown boundary condition in map -- bug alert!");
2687 0           break;
2688             }
2689             }
2690 1474560           i_off += in->dimincs[i] * j;
2691             }
2692              
2693             /* Initialize index stashes for later reference as we scan the footprint */
2694             /* It's a pain in the ass to deal with boundaries, and doubly so at the */
2695             /* end of a dimensional scan. So we stash the index location at the */
2696             /* start of each dimensional scan here. When we finish incrementing */
2697             /* through a particular dim, we pull its value back out of the stash. */
2698 2211840 100         for(i=0;i
2699 1474560           index_stash[i] = i_off;
2700             }
2701              
2702             /* The input accumulation loop is the hotspot for the entire operation. */
2703             /* We loop over pixels in the region of interest (+/- psize in each dimension) */
2704             /* in the input array, use the linearized transform to bring each pixel center */
2705             /* forward to the output plane, and calculate a weighting based on the chosen */
2706             /* filter function. 'h' is a fast Hanning window rolloff using a lookup */
2707             /* table that is initialized the first time through the code. 'H' is the */
2708             /* same process, but explicitly calculated for each interation (~2x slower). */
2709             /* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
2710             /* into the input array fresh from the current input array vector each time, */
2711             /* we walk through the array using dimincs and the old offset. This saves */
2712             /* about half of the time spent on index calculation. */
2713              
2714             do { /* Input accumulation loop */
2715             PDLA_Double *cp;
2716             PDLA_Double alpha;
2717             /* Calculate the weight of the current input point. Don't bother if we're
2718             * violating any truncation boundaries (in that case our value is zero, but
2719             * for the interpolation we also set the weight to zero).
2720             */
2721 12401920 100         if( !t_vio ) {
2722              
2723 3043295           PDLA_Double *ap = tvec;
2724 3043295           PDLA_Double *bp = dvec;
2725 3043295           PDLA_Indx *ip = ivec;
2726 9129885 100         for(i=0; i
2727 6086590           *(ap++) = *(ip++) - *(bp++);
2728              
2729 3043295           switch(method) {
2730             PDLA_Double dd;
2731             case 'h':
2732             /* This is the Hanning window rolloff. It is a product of a simple */
2733             /* cos^2(theta) rolloff in each dimension. Using a lookup table */
2734             /* is about 2x faster than using cos(theta) directly in each */
2735             /* weighting calculation, so we do. Using 2500 entries and linear */
2736             /* interpolation is accurate to about 10^-7, and should preserve */
2737             /* the contents of cache pretty well. */
2738 608659           alpha = 1;
2739 608659           cp = tmp;
2740 1482006 100         for(i=0; i
2741             int lodex;
2742             int hidex;
2743             PDLA_Double beta;
2744 873347           dd = 0;
2745 873347           ap = tvec;
2746             /* Get the matrix-multiply element for this dimension */
2747 2620041 100         for(j=0;j
2748 1746694           dd += *(ap++) * *(cp++);
2749              
2750             /* Do linear interpolation from the table */
2751             /* The table captures a hanning window centered 0.5 pixel from center. */
2752             /* We scale the filter by the blur parameter -- but if blur is less */
2753             /* than unity, we shrink the hanning blur window while keeping the 0.5 */
2754             /* value on the pixel edge at 0.5. For blur greater than unity, we */
2755             /* scale simply. */
2756 873347           beta = fabs(dd) - hanning_offset;
2757 873347 100         if(beta > 0) {
2758 865595 100         if(beta >= blur) {
2759 511183           alpha = 0;
2760 511183           i = ndims;
2761             } else {
2762 354412           beta *= zeta;
2763 354412           lodex = beta;
2764 354412 50         beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
2765 0           lodex = HANNING_LOOKUP_SIZE;
2766 354412           hidex = lodex+1;
2767 354412           alpha *= hanning_lookup[hidex]*beta + hanning_lookup[lodex]*(1-beta);
2768             } /* end of interpolation branch */
2769             } /* end of beta > 0 branch */
2770             } /* end of dimension loop */
2771 608659           break;
2772              
2773             case 'H':
2774             /* This is the Hanning window rolloff with explicit calculation, preserved */
2775             /* in case someone actually wants the slower longer method. */
2776 608659           alpha = 1;
2777 608659           cp = tmp;
2778 1482014 100         for(i=0; i
2779 873355           dd = 0;
2780 873355           ap = tvec;
2781 2620065 100         for(j=0;j
2782 1746710           dd += *(ap++) * *(cp++);
2783 873355           dd = (fabs(dd) - hanning_offset) / blur;
2784 873355 100         if( dd > 1 ) {
2785 511175           alpha = 0;
2786 511175           i = ndims;
2787             } else
2788 362180           alpha *= (0.5 + 0.5 * cos( dd * 3.1415926536 ));
2789             }
2790 608659           break;
2791              
2792             case 'g':
2793             /* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
2794             {
2795 1217318           PDLA_Double sum = 0;
2796 1217318           cp = tmp;
2797 3577400 100         for(i=0; i
2798 2360082           dd = 0;
2799 2360082           ap = tvec;
2800 7080246 100         for(j=0;j
2801 4720164           dd += *(ap++) * *(cp++);
2802 2360082           dd /= blur;
2803 2360082           sum += dd * dd;
2804 2360082 100         if(sum > GAUSSIAN_MAXVAL) {
2805 308982           i = ndims; /* exit early if we're too far out */
2806 308982           alpha = 0;
2807             }
2808             }
2809 1217318 100         if( sum > GAUSSIAN_MAXVAL || !isfinite(sum) || isnan(sum) ) {
    100          
    50          
2810 316734           alpha = 0;
2811             } else {
2812             int lodex,hidex;
2813 900584           PDLA_Double beta = fabs(zeta * sum);
2814              
2815 900584           lodex = beta;
2816 900584           beta -= lodex; hidex = lodex+1;
2817 900584           alpha = gaussian_lookup[hidex]*beta + gaussian_lookup[lodex]*(1 - beta);
2818              
2819             }
2820             }
2821 1217318           break;
2822              
2823             case 'G':
2824             /* This is the Gaussian rolloff with explicit calculation, preserved */
2825             /* in case someone actually wants the slower longer method. */
2826             {
2827 608659           PDLA_Double sum = 0;
2828 608659           cp = tmp;
2829 1708727 100         for(i=0; i
2830 1100068           dd = 0;
2831 1100068           ap = tvec;
2832 3300204 100         for(j=0;j
2833 2200136           dd += *(ap++) * *(cp++);
2834 1100068           dd /= blur;
2835 1100068           sum += dd * dd;
2836 1100068 100         if(sum > 4) /* 2 pixels -- four half-widths */
2837 316983           i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
2838             }
2839              
2840 608659 100         if(sum > GAUSSIAN_MAXVAL)
2841 123548           alpha = 0;
2842             else
2843 485111           alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
2844             }
2845 608659           break;
2846             default:
2847             {
2848             char buf[80];
2849 0           sprintf(buf,"This can't happen: method='%c'",method);
2850 0           barf("%s", buf);
2851             }
2852             }
2853              
2854             { /* convenience block -- accumulate the current point into the weighted sum. */
2855             /* This is more than simple assignment because we have our own explicit poor */
2856             /* man's threadloop here, so we accumulate each threaded element separately. */
2857 3043295           PDLA_Long *dat = ((PDLA_Long *)(in->data)) + i_off;
2858 3043295           PDLA_Indx max = out->dims[ndims];
2859 6086590 100         for( i=0; i < max; i++ ) {
2860 3043295 50         if( (badval==0) || (*dat != badval) ) {
    0          
2861 3043295           acc[i] += *dat * alpha;
2862 3043295           dat += in->dimincs[ndims];
2863 3043295           wgt[i] += alpha;
2864             }
2865 3043295           wgt2[i] += alpha; }
2866             }
2867             } /* end of t_vio check (i.e. of input accumulation) */
2868              
2869              
2870             /* Advance input accumulation loop. */
2871             /* We both increment the total vector and also advance the index. */
2872 12401920           carry = 1;
2873 27736480 100         for(i=0; i
    100          
2874             /* Advance the current element of the offset vector */
2875 15334560           ivec[i]++;
2876 15334560           j = ivec[i] + ibvec[i];
2877              
2878             /* Advance the offset into the data array */
2879 15334560 100         if( j > 0 && j <= in->dims[i]-1 ) {
    50          
2880             /* Normal case -- just advance the input vector */
2881 6289185           i_off += in->dimincs[i];
2882             } else {
2883             /* Busted a boundary - either before or after. */
2884 9045375           switch(bounds[i]){
2885             case 0: /* no breakage allowed -- treat as truncation for interpolation */
2886             case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
2887 9045375 100         if( j == 0 )
2888 521970           t_vio--;
2889 8523405 50         else if( j == in->dims[i] )
2890 0           t_vio++;
2891 9045375           break;
2892             case 2: /* extension -- do nothing (so the same input point is re-used) */
2893 0           break;
2894             case 3: /* periodic -- advance and mod into the allowed range */
2895 0 0         if((j % in->dims[i]) == 0) {
2896 0           i_off -= in->dimincs[i] * (in->dims[i]-1);
2897             } else {
2898 0           i_off += in->dimincs[i];
2899             }
2900 0           break;
2901             case 4: /* mirror -- advance or retreat depending on phase */
2902 0           j += in->dims[i];
2903 0           j %= (in->dims[i]*2);
2904 0           j -= in->dims[i];
2905 0 0         if( j!=0 && j!= -in->dims[i] ) {
    0          
2906 0 0         if(j<0)
2907 0           i_off -= in->dimincs[i];
2908             else
2909 0           i_off += in->dimincs[i];
2910             }
2911 0           break;
2912             }
2913             }
2914              
2915             /* Now check for carry */
2916 15334560 100         if(ivec[i] <= psize) {
2917             /* Normal case -- copy the current offset to the faster-running dim stashes */
2918             int k;
2919 13860000 100         for(k=0;k
2920 2195360           index_stash[k] = i_off;
2921             }
2922 11664640           carry = 0;
2923              
2924             } else { /* End of this scan -- recover the last position, and mark carry */
2925 3669920           i_off = index_stash[i];
2926 3669920 50         if(bounds[i]==1) {
2927 3669920           j = ivec[i] + ibvec[i];
2928 3669920 100         if( j < 0 || j >= in->dims[i] )
    50          
2929 2138120           t_vio--;
2930 3669920           ivec[i] = -psize;
2931 3669920           j = ivec[i] + ibvec[i];
2932 3669920 100         if( j < 0 || j >= in->dims[i] )
    50          
2933 2660090           t_vio++;
2934 3669920           carry = 1;
2935             } else {
2936 0           ivec[i] = -psize;
2937             }
2938             }
2939             } /* End of counter-advance loop */
2940 12401920 100         } while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
2941              
2942             {
2943 737280           PDLA_Double *ac = acc;
2944 737280           PDLA_Double *wg = wgt;
2945 737280           PDLA_Double *wg2 = wgt2;
2946 737280           PDLA_Long *dat = out->data;
2947              
2948             /* Calculate output vector offset */
2949 2211840 100         for(i=0;i
2950 1474560           dat += out->dimincs[i] * ovec[i];
2951              
2952 737280 50         if(!flux) {
2953             /* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
2954 1474560 100         for(i=0; i < out->dims[ndims]; i++) {
2955 737280 100         if(*wg && (*wg2 / *wg) < 1.5 ) {
    100          
2956 138062           *dat = *(ac++) / *(wg++);
2957 138062           wg2++;
2958             } else {
2959 599218           *dat = badval;
2960 599218           ac++; wg++; wg2++;
2961             }
2962 737280           dat += out->dimincs[ndims];
2963             }
2964             } else {
2965             /* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
2966 0           PDLA_Double det = tmp[ndims*ndims];
2967 737280 0         for(i=0; i < out->dims[ndims]; i++) {
2968 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
2969 0           *dat = *(ac++) / *(wg++) * det;
2970 0           wg2++;
2971             } else {
2972 0           *dat = badval;
2973 0           ac++; wg++; wg2++;
2974             }
2975 0           dat += out->dimincs[ndims];
2976             } /* end of for loop */
2977             } /* end of flux flag set conditional */
2978             } /* end of convenience block */
2979            
2980             /* End of code for normal pixels */
2981             } else {
2982             /* The pixel was ludicrously huge -- just set this pixel to nan */
2983 0           PDLA_Long *dat = out->data;
2984 0 0         for(i=0;i
2985 0           dat += out->dimincs[i] * ovec[i];
2986 0 0         for(i=0;idims[ndims];i++) {
2987 0           *dat = badval; /* Should handle bad values too -- not yet */
2988 0           dat += out->dimincs[ndims];
2989             }
2990             }
2991              
2992             /* Increment the pixel counter */
2993             {
2994 739205 100         for(i=0;
2995 739200 50         (i
2996 739200 100         (map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
2997 1478400           (++(ovec[i]) >= out->dims[i]); /* Actual carry test */
2998 1925           i++) {
2999 1925           ovec[i] = 0;
3000 1925           map_ptr -= out->dims[i] * map->dimincs[i+1];
3001             }
3002             }
3003 737280 100         } while(i
3004              
3005              
3006              
3007             }
3008             PDLA_COMMENT("THREADLOOPEND")
3009             }
3010             }
3011 5           k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
3012 5 50         } while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_IND: {
3013 0 0         PDLA_Indx * k0_datap = ((PDLA_Indx *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
3014 0           PDLA_Indx * k0_physdatap = ((PDLA_Indx *)((__privtrans->pdls[0])->data));
3015              
3016              
3017             PDLA_COMMENT("THREADLOOPBEGIN")
3018 0 0         if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
3019 0           do { register PDLA_Indx __tind1=0,__tind2=0;
3020 0           register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
3021 0           register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
3022 0           register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
3023 0           register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
3024 0           register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
3025 0           register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
3026 0           k0_datap += __offsp[0];
3027 0 0         for( __tind2 = 0 ;
3028             __tind2 < __tdims1 ;
3029 0           __tind2++
3030 0           ,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
3031             )
3032             {
3033 0 0         for( __tind1 = 0 ;
3034             __tind1 < __tdims0 ;
3035 0           __tind1++
3036 0           ,k0_datap += __tinc0_0
3037             )
3038             { PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
3039              
3040             /*
3041             * Pixel interpolation & averaging code
3042             *
3043             * Calls a common coordinate-transformation block (see following hdr)
3044             * that isn't dependent on the type of the input variable.
3045             *
3046             * The inputs are SVs to avoid hassling with threadloops; threading
3047             * is handled internally. To simplify the threading business, any
3048             * thread dimensions should all be collapsed to a single one by the
3049             * perl front-end.
3050             *
3051             */
3052              
3053             short ndims; /* Number of dimensions we're working in */
3054             PDLA_Double *tmp; /* Workspace for prefrobnication */
3055             PDLA_Indx *ovec; /* output pixel loop vector */
3056             PDLA_Indx *ivec; /* input pixel loop vector */
3057             PDLA_Indx *ibvec; /* input pixel base offset vector */
3058             PDLA_Double *dvec; /* Residual vector for linearization */
3059             PDLA_Double *tvec; /* Temporary floating-point vector */
3060             PDLA_Double *acc; /* Threaded accumulator */
3061             PDLA_Double *wgt; /* Threaded weight accumulator */
3062             PDLA_Double *wgt2; /* Threaded weight accumulator for badval finding */
3063             char *bounds; /* Boundary condition packed string */
3064             PDLA_Indx *index_stash; /* Stash to store the opening index of dim sample scans */
3065             char method; /* Method identifier (gets one of 'h','g') */
3066             PDLA_Long big; /* Max size of input footprint for each pix */
3067             PDLA_Double blur; /* Scaling of filter */
3068             PDLA_Double sv_min; /* minimum singular value */
3069             char flux; /* Flag to indicate flux conservation */
3070             PDLA_Double *map_ptr;
3071             PDLA_Long i, j;
3072 0 0         PDLA_Indx badval = SvNV(__privtrans->bv);
3073             #define HANNING_LOOKUP_SIZE 2500
3074             static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
3075             static int needs_hanning_calc = 1;
3076             PDLA_Double zeta;
3077             PDLA_Double hanning_offset;
3078              
3079             #define GAUSSIAN_LOOKUP_SIZE 4000
3080             #define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
3081             static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
3082             static int needs_gaussian_calc = 1;
3083              
3084 0           pdl *in = PDLA->SvPDLAV(__privtrans->in);
3085 0           pdl *out = PDLA->SvPDLAV(__privtrans->out);
3086 0           pdl *map = PDLA->SvPDLAV(__privtrans->map);
3087              
3088 0           PDLA->make_physical(in);
3089 0           PDLA->make_physical(out);
3090 0           PDLA->make_physical(map);
3091              
3092 0           ndims = map->ndims -1;
3093              
3094             /*
3095             * Allocate all our dynamic workspaces at once
3096             * */
3097 0           ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
3098 0           ( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3*ndims) + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndims*ndims + ndims + sizeof(char) * ndims + sizeof(PDLA_Indx) * ndims )
3099             )
3100             );
3101 0           ivec = &(ovec[ndims]);
3102 0           ibvec = &(ivec[ndims]);
3103 0           dvec = (PDLA_Double *)(&(ibvec[ndims]));
3104 0           tvec = &(dvec[ndims]);
3105 0           acc = &(tvec[ndims]);
3106 0           wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
3107 0           bounds = (char *)(&(tmp [3*ndims*ndims+ndims]));
3108 0           index_stash = (PDLA_Indx *) &(bounds[ndims]);
3109              
3110              
3111             /***
3112             * Fill in the boundary condition array
3113             */
3114             {
3115             char *bstr;
3116             STRLEN blen;
3117 0 0         bstr = SvPV(__privtrans->boundary,blen);
3118              
3119 0 0         if(blen == 0) {
3120             /* If no boundary is specified then every dim gets truncated */
3121             int i;
3122 0 0         for (i=0;i
3123 0           bounds[i] = 1;
3124             } else {
3125             int i;
3126 0 0         for(i=0;i
3127 0 0         switch(bstr[i < blen ? i : blen-1 ]) {
3128             case '0': case 'f': case 'F': /* forbid */
3129 0           bounds[i] = 0;
3130 0           break;
3131             case '1': case 't': case 'T': /* truncate */
3132 0           bounds[i] = 1;
3133 0           break;
3134             case '2': case 'e': case 'E': /* extend */
3135 0           bounds[i] = 2;
3136 0           break;
3137             case '3': case 'p': case 'P': /* periodic */
3138 0           bounds[i] = 3;
3139 0           break;
3140             case '4': case 'm': case 'M': /* mirror */
3141 0           bounds[i] = 4;
3142 0           break;
3143             default:
3144             {
3145             char buf[BUFSIZ];
3146 0           sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
3147 0           barf("%s", buf);
3148             }
3149 0           break;
3150             }
3151             }
3152             }
3153             }
3154              
3155             /***
3156             * Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
3157             */
3158 0 0         big = labs((PDLA_Long) (SvNV(__privtrans->big)));
3159 0 0         if(big <= 0)
3160 0           barf("%s","map: 'big' parameter must be >0");
3161              
3162 0 0         blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
3163 0 0         if(blur < 0)
3164 0           barf("%s","map: 'blur' parameter must be >= 0");
3165              
3166 0 0         sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
3167 0 0         if(sv_min < 0)
3168 0           barf("%s","map: 'sv_min' parameter must be >= 0");
3169              
3170 0 0         flux = (SvNV(__privtrans->flux) != 0);
3171              
3172             {
3173             char *mstr;
3174             STRLEN mlen;
3175 0 0         mstr = SvPV(__privtrans->method,mlen);
3176              
3177 0 0         if(mlen==0)
3178 0           method = 'h';
3179 0           else switch(*mstr) {
3180 0           case 'H': method='H'; break;
3181 0           case 'h': method = 'h';
3182 0 0         if( needs_hanning_calc ) {
3183             int i;
3184 0 0         for(i=0;i
3185 0           hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
3186             }
3187 0           hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
3188 0           hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
3189 0           needs_hanning_calc = 0;
3190             }
3191 0           zeta = HANNING_LOOKUP_SIZE / blur;
3192 0           hanning_offset = (blur >= 1) ?
3193 0 0         0 :
3194 0           0.5 * (1.0 - blur);
3195 0           break;
3196              
3197 0           case 'g': case 'j': method = 'g';
3198 0           zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
3199              
3200 0 0         if( needs_gaussian_calc ) {
3201             int i;
3202 0 0         for(i=0;i
3203 0           gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
3204             }
3205 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
3206 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
3207 0           needs_gaussian_calc = 0;
3208             }
3209 0           break;
3210              
3211 0           case 'G': case 'J': method = 'G'; break;
3212             default:
3213             {
3214             char err[80];
3215 0           sprintf(err,"Bug in map: unknown method '%c'",*mstr);
3216 0           barf("%s", err);
3217             }
3218 0           break;
3219             }
3220             }
3221              
3222              
3223              
3224             /* End of initialization */
3225             /*************************************************************/
3226             /* Start of Real Work */
3227              
3228             /* Initialize coordinate vector and map offset
3229             */
3230 0 0         for(i=0;i
3231 0           ovec[i] = 0;
3232              
3233 0           map_ptr = (PDLA_Double *)(map->data);
3234              
3235              
3236             /* Main pixel loop (iterates over pixels in the output plane) */
3237             do {
3238             PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
3239             /* Prefrobnicate the transformation matrix */
3240 0           psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
3241              
3242             #ifdef DEBUG_MAP
3243             {
3244             int k; PDLA_Indx foo = 0;
3245             printf("ovec: [");
3246             for(k=0;k
3247             foo += ovec[k] * map->dimincs[k+1];
3248             printf(" %2d ",(int)(ovec[k]));
3249             }
3250             printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
3251             for(k=0;k
3252             printf("%8.2f",(double)(((PDLA_Indx *)(map->data))[foo + k*map->dimincs[0]]));
3253             }
3254             printf("]\n");
3255             }
3256             #endif
3257              
3258             /* Don't bother accumulating output if psize is too large */
3259 0 0         if(psize <= big) {
3260             /* Use the prefrobnicated matrix to generate a local linearization.
3261             * dvec gets the delta; ibvec gets the base.
3262             */
3263             {
3264 0           PDLA_Double *mp = map_ptr;
3265 0 0         for (i=0;i
3266 0           dvec[i] = *mp - ( ibvec[i] = (PDLA_Long)(*mp + 0.5)); /* assignment */
3267 0           mp += map->dimincs[0];
3268             }
3269             }
3270              
3271             /* Initialize input delta vector */
3272 0 0         for(i=0;i
3273 0           ivec[i] = -psize;
3274              
3275             /* Initialize accumulators */
3276             {
3277 0           PDLA_Double *ac = acc;
3278 0 0         for(i=0; i < in->dims[ndims]; i++)
3279 0           *(ac++) = 0.0;
3280              
3281             }
3282             {
3283 0           PDLA_Double *wg = wgt;
3284 0 0         for(i=0;i < in->dims[ndims]; i++)
3285 0           *(wg++) = 0.0;
3286             }
3287             {
3288 0           PDLA_Double *wg = wgt2;
3289 0 0         for(i=0;i < in->dims[ndims]; i++)
3290 0           *(wg++) = 0.0;
3291             }
3292              
3293              
3294             /*
3295             * Calculate the original offset into the data array, to enable
3296             * delta calculations in the pixel loop
3297             *
3298             * i runs over dims; j holds the working integer index in the
3299             * current dim.
3300             *
3301             * This code matches the incrementation code at the bottom of the accumulation loop
3302             */
3303            
3304 0           t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
3305 0           i_off = 0;
3306 0 0         for(i=0;i
3307 0           j = ivec[i] + ibvec[i];
3308 0 0         if(j<0 || j >= in->dims[i]) {
    0          
3309 0           switch(bounds[i]) {
3310             case 0: /* no breakage allowed */
3311 0           barf("%s","index out-of-bounds in map");
3312 0           break;
3313             case 1: /* truncation */
3314 0           t_vio++;
3315             /* fall through */
3316             case 2: /* extension -- crop */
3317 0 0         if(j<0)
3318 0           j=0;
3319 0           else j = in->dims[i] - 1;
3320 0           break;
3321             case 3: /* periodic -- mod it */
3322 0           j %= in->dims[i];
3323 0 0         if(j<0)
3324 0           j += in->dims[i];
3325 0           break;
3326             case 4: /* mirror -- reflect off the edges */
3327 0           j += in->dims[i];
3328 0           j %= (in->dims[i]*2);
3329 0 0         if(j<0)
3330 0           j += in->dims[i]*2;
3331 0           j -= in->dims[i];
3332 0 0         if(j<0) {
3333 0           j *= -1;
3334 0           j -= 1;
3335             }
3336 0           break;
3337             default:
3338 0           barf("%s", "Unknown boundary condition in map -- bug alert!");
3339 0           break;
3340             }
3341             }
3342 0           i_off += in->dimincs[i] * j;
3343             }
3344              
3345             /* Initialize index stashes for later reference as we scan the footprint */
3346             /* It's a pain in the ass to deal with boundaries, and doubly so at the */
3347             /* end of a dimensional scan. So we stash the index location at the */
3348             /* start of each dimensional scan here. When we finish incrementing */
3349             /* through a particular dim, we pull its value back out of the stash. */
3350 0 0         for(i=0;i
3351 0           index_stash[i] = i_off;
3352             }
3353              
3354             /* The input accumulation loop is the hotspot for the entire operation. */
3355             /* We loop over pixels in the region of interest (+/- psize in each dimension) */
3356             /* in the input array, use the linearized transform to bring each pixel center */
3357             /* forward to the output plane, and calculate a weighting based on the chosen */
3358             /* filter function. 'h' is a fast Hanning window rolloff using a lookup */
3359             /* table that is initialized the first time through the code. 'H' is the */
3360             /* same process, but explicitly calculated for each interation (~2x slower). */
3361             /* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
3362             /* into the input array fresh from the current input array vector each time, */
3363             /* we walk through the array using dimincs and the old offset. This saves */
3364             /* about half of the time spent on index calculation. */
3365              
3366             do { /* Input accumulation loop */
3367             PDLA_Double *cp;
3368             PDLA_Double alpha;
3369             /* Calculate the weight of the current input point. Don't bother if we're
3370             * violating any truncation boundaries (in that case our value is zero, but
3371             * for the interpolation we also set the weight to zero).
3372             */
3373 0 0         if( !t_vio ) {
3374              
3375 0           PDLA_Double *ap = tvec;
3376 0           PDLA_Double *bp = dvec;
3377 0           PDLA_Indx *ip = ivec;
3378 0 0         for(i=0; i
3379 0           *(ap++) = *(ip++) - *(bp++);
3380              
3381 0           switch(method) {
3382             PDLA_Double dd;
3383             case 'h':
3384             /* This is the Hanning window rolloff. It is a product of a simple */
3385             /* cos^2(theta) rolloff in each dimension. Using a lookup table */
3386             /* is about 2x faster than using cos(theta) directly in each */
3387             /* weighting calculation, so we do. Using 2500 entries and linear */
3388             /* interpolation is accurate to about 10^-7, and should preserve */
3389             /* the contents of cache pretty well. */
3390 0           alpha = 1;
3391 0           cp = tmp;
3392 0 0         for(i=0; i
3393             int lodex;
3394             int hidex;
3395             PDLA_Double beta;
3396 0           dd = 0;
3397 0           ap = tvec;
3398             /* Get the matrix-multiply element for this dimension */
3399 0 0         for(j=0;j
3400 0           dd += *(ap++) * *(cp++);
3401              
3402             /* Do linear interpolation from the table */
3403             /* The table captures a hanning window centered 0.5 pixel from center. */
3404             /* We scale the filter by the blur parameter -- but if blur is less */
3405             /* than unity, we shrink the hanning blur window while keeping the 0.5 */
3406             /* value on the pixel edge at 0.5. For blur greater than unity, we */
3407             /* scale simply. */
3408 0           beta = fabs(dd) - hanning_offset;
3409 0 0         if(beta > 0) {
3410 0 0         if(beta >= blur) {
3411 0           alpha = 0;
3412 0           i = ndims;
3413             } else {
3414 0           beta *= zeta;
3415 0           lodex = beta;
3416 0 0         beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
3417 0           lodex = HANNING_LOOKUP_SIZE;
3418 0           hidex = lodex+1;
3419 0           alpha *= hanning_lookup[hidex]*beta + hanning_lookup[lodex]*(1-beta);
3420             } /* end of interpolation branch */
3421             } /* end of beta > 0 branch */
3422             } /* end of dimension loop */
3423 0           break;
3424              
3425             case 'H':
3426             /* This is the Hanning window rolloff with explicit calculation, preserved */
3427             /* in case someone actually wants the slower longer method. */
3428 0           alpha = 1;
3429 0           cp = tmp;
3430 0 0         for(i=0; i
3431 0           dd = 0;
3432 0           ap = tvec;
3433 0 0         for(j=0;j
3434 0           dd += *(ap++) * *(cp++);
3435 0           dd = (fabs(dd) - hanning_offset) / blur;
3436 0 0         if( dd > 1 ) {
3437 0           alpha = 0;
3438 0           i = ndims;
3439             } else
3440 0           alpha *= (0.5 + 0.5 * cos( dd * 3.1415926536 ));
3441             }
3442 0           break;
3443              
3444             case 'g':
3445             /* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
3446             {
3447 0           PDLA_Double sum = 0;
3448 0           cp = tmp;
3449 0 0         for(i=0; i
3450 0           dd = 0;
3451 0           ap = tvec;
3452 0 0         for(j=0;j
3453 0           dd += *(ap++) * *(cp++);
3454 0           dd /= blur;
3455 0           sum += dd * dd;
3456 0 0         if(sum > GAUSSIAN_MAXVAL) {
3457 0           i = ndims; /* exit early if we're too far out */
3458 0           alpha = 0;
3459             }
3460             }
3461 0 0         if( sum > GAUSSIAN_MAXVAL || !isfinite(sum) || isnan(sum) ) {
    0          
    0          
3462 0           alpha = 0;
3463             } else {
3464             int lodex,hidex;
3465 0           PDLA_Double beta = fabs(zeta * sum);
3466              
3467 0           lodex = beta;
3468 0           beta -= lodex; hidex = lodex+1;
3469 0           alpha = gaussian_lookup[hidex]*beta + gaussian_lookup[lodex]*(1 - beta);
3470              
3471             }
3472             }
3473 0           break;
3474              
3475             case 'G':
3476             /* This is the Gaussian rolloff with explicit calculation, preserved */
3477             /* in case someone actually wants the slower longer method. */
3478             {
3479 0           PDLA_Double sum = 0;
3480 0           cp = tmp;
3481 0 0         for(i=0; i
3482 0           dd = 0;
3483 0           ap = tvec;
3484 0 0         for(j=0;j
3485 0           dd += *(ap++) * *(cp++);
3486 0           dd /= blur;
3487 0           sum += dd * dd;
3488 0 0         if(sum > 4) /* 2 pixels -- four half-widths */
3489 0           i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
3490             }
3491              
3492 0 0         if(sum > GAUSSIAN_MAXVAL)
3493 0           alpha = 0;
3494             else
3495 0           alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
3496             }
3497 0           break;
3498             default:
3499             {
3500             char buf[80];
3501 0           sprintf(buf,"This can't happen: method='%c'",method);
3502 0           barf("%s", buf);
3503             }
3504             }
3505              
3506             { /* convenience block -- accumulate the current point into the weighted sum. */
3507             /* This is more than simple assignment because we have our own explicit poor */
3508             /* man's threadloop here, so we accumulate each threaded element separately. */
3509 0           PDLA_Indx *dat = ((PDLA_Indx *)(in->data)) + i_off;
3510 0           PDLA_Indx max = out->dims[ndims];
3511 0 0         for( i=0; i < max; i++ ) {
3512 0 0         if( (badval==0) || (*dat != badval) ) {
    0          
3513 0           acc[i] += *dat * alpha;
3514 0           dat += in->dimincs[ndims];
3515 0           wgt[i] += alpha;
3516             }
3517 0           wgt2[i] += alpha; }
3518             }
3519             } /* end of t_vio check (i.e. of input accumulation) */
3520              
3521              
3522             /* Advance input accumulation loop. */
3523             /* We both increment the total vector and also advance the index. */
3524 0           carry = 1;
3525 0 0         for(i=0; i
    0          
3526             /* Advance the current element of the offset vector */
3527 0           ivec[i]++;
3528 0           j = ivec[i] + ibvec[i];
3529              
3530             /* Advance the offset into the data array */
3531 0 0         if( j > 0 && j <= in->dims[i]-1 ) {
    0          
3532             /* Normal case -- just advance the input vector */
3533 0           i_off += in->dimincs[i];
3534             } else {
3535             /* Busted a boundary - either before or after. */
3536 0           switch(bounds[i]){
3537             case 0: /* no breakage allowed -- treat as truncation for interpolation */
3538             case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
3539 0 0         if( j == 0 )
3540 0           t_vio--;
3541 0 0         else if( j == in->dims[i] )
3542 0           t_vio++;
3543 0           break;
3544             case 2: /* extension -- do nothing (so the same input point is re-used) */
3545 0           break;
3546             case 3: /* periodic -- advance and mod into the allowed range */
3547 0 0         if((j % in->dims[i]) == 0) {
3548 0           i_off -= in->dimincs[i] * (in->dims[i]-1);
3549             } else {
3550 0           i_off += in->dimincs[i];
3551             }
3552 0           break;
3553             case 4: /* mirror -- advance or retreat depending on phase */
3554 0           j += in->dims[i];
3555 0           j %= (in->dims[i]*2);
3556 0           j -= in->dims[i];
3557 0 0         if( j!=0 && j!= -in->dims[i] ) {
    0          
3558 0 0         if(j<0)
3559 0           i_off -= in->dimincs[i];
3560             else
3561 0           i_off += in->dimincs[i];
3562             }
3563 0           break;
3564             }
3565             }
3566              
3567             /* Now check for carry */
3568 0 0         if(ivec[i] <= psize) {
3569             /* Normal case -- copy the current offset to the faster-running dim stashes */
3570             int k;
3571 0 0         for(k=0;k
3572 0           index_stash[k] = i_off;
3573             }
3574 0           carry = 0;
3575              
3576             } else { /* End of this scan -- recover the last position, and mark carry */
3577 0           i_off = index_stash[i];
3578 0 0         if(bounds[i]==1) {
3579 0           j = ivec[i] + ibvec[i];
3580 0 0         if( j < 0 || j >= in->dims[i] )
    0          
3581 0           t_vio--;
3582 0           ivec[i] = -psize;
3583 0           j = ivec[i] + ibvec[i];
3584 0 0         if( j < 0 || j >= in->dims[i] )
    0          
3585 0           t_vio++;
3586 0           carry = 1;
3587             } else {
3588 0           ivec[i] = -psize;
3589             }
3590             }
3591             } /* End of counter-advance loop */
3592 0 0         } while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
3593              
3594             {
3595 0           PDLA_Double *ac = acc;
3596 0           PDLA_Double *wg = wgt;
3597 0           PDLA_Double *wg2 = wgt2;
3598 0           PDLA_Indx *dat = out->data;
3599              
3600             /* Calculate output vector offset */
3601 0 0         for(i=0;i
3602 0           dat += out->dimincs[i] * ovec[i];
3603              
3604 0 0         if(!flux) {
3605             /* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
3606 0 0         for(i=0; i < out->dims[ndims]; i++) {
3607 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
3608 0           *dat = *(ac++) / *(wg++);
3609 0           wg2++;
3610             } else {
3611 0           *dat = badval;
3612 0           ac++; wg++; wg2++;
3613             }
3614 0           dat += out->dimincs[ndims];
3615             }
3616             } else {
3617             /* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
3618 0           PDLA_Double det = tmp[ndims*ndims];
3619 0 0         for(i=0; i < out->dims[ndims]; i++) {
3620 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
3621 0           *dat = *(ac++) / *(wg++) * det;
3622 0           wg2++;
3623             } else {
3624 0           *dat = badval;
3625 0           ac++; wg++; wg2++;
3626             }
3627 0           dat += out->dimincs[ndims];
3628             } /* end of for loop */
3629             } /* end of flux flag set conditional */
3630             } /* end of convenience block */
3631            
3632             /* End of code for normal pixels */
3633             } else {
3634             /* The pixel was ludicrously huge -- just set this pixel to nan */
3635 0           PDLA_Indx *dat = out->data;
3636 0 0         for(i=0;i
3637 0           dat += out->dimincs[i] * ovec[i];
3638 0 0         for(i=0;idims[ndims];i++) {
3639 0           *dat = badval; /* Should handle bad values too -- not yet */
3640 0           dat += out->dimincs[ndims];
3641             }
3642             }
3643              
3644             /* Increment the pixel counter */
3645             {
3646 0 0         for(i=0;
3647 0 0         (i
3648 0 0         (map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
3649 0           (++(ovec[i]) >= out->dims[i]); /* Actual carry test */
3650 0           i++) {
3651 0           ovec[i] = 0;
3652 0           map_ptr -= out->dims[i] * map->dimincs[i+1];
3653             }
3654             }
3655 0 0         } while(i
3656              
3657              
3658              
3659             }
3660             PDLA_COMMENT("THREADLOOPEND")
3661             }
3662             }
3663 0           k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
3664 0 0         } while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_LL: {
3665 0 0         PDLA_LongLong * k0_datap = ((PDLA_LongLong *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
3666 0           PDLA_LongLong * k0_physdatap = ((PDLA_LongLong *)((__privtrans->pdls[0])->data));
3667              
3668              
3669             PDLA_COMMENT("THREADLOOPBEGIN")
3670 0 0         if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
3671 0           do { register PDLA_Indx __tind1=0,__tind2=0;
3672 0           register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
3673 0           register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
3674 0           register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
3675 0           register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
3676 0           register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
3677 0           register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
3678 0           k0_datap += __offsp[0];
3679 0 0         for( __tind2 = 0 ;
3680             __tind2 < __tdims1 ;
3681 0           __tind2++
3682 0           ,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
3683             )
3684             {
3685 0 0         for( __tind1 = 0 ;
3686             __tind1 < __tdims0 ;
3687 0           __tind1++
3688 0           ,k0_datap += __tinc0_0
3689             )
3690             { PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
3691              
3692             /*
3693             * Pixel interpolation & averaging code
3694             *
3695             * Calls a common coordinate-transformation block (see following hdr)
3696             * that isn't dependent on the type of the input variable.
3697             *
3698             * The inputs are SVs to avoid hassling with threadloops; threading
3699             * is handled internally. To simplify the threading business, any
3700             * thread dimensions should all be collapsed to a single one by the
3701             * perl front-end.
3702             *
3703             */
3704              
3705             short ndims; /* Number of dimensions we're working in */
3706             PDLA_Double *tmp; /* Workspace for prefrobnication */
3707             PDLA_Indx *ovec; /* output pixel loop vector */
3708             PDLA_Indx *ivec; /* input pixel loop vector */
3709             PDLA_Indx *ibvec; /* input pixel base offset vector */
3710             PDLA_Double *dvec; /* Residual vector for linearization */
3711             PDLA_Double *tvec; /* Temporary floating-point vector */
3712             PDLA_Double *acc; /* Threaded accumulator */
3713             PDLA_Double *wgt; /* Threaded weight accumulator */
3714             PDLA_Double *wgt2; /* Threaded weight accumulator for badval finding */
3715             char *bounds; /* Boundary condition packed string */
3716             PDLA_Indx *index_stash; /* Stash to store the opening index of dim sample scans */
3717             char method; /* Method identifier (gets one of 'h','g') */
3718             PDLA_Long big; /* Max size of input footprint for each pix */
3719             PDLA_Double blur; /* Scaling of filter */
3720             PDLA_Double sv_min; /* minimum singular value */
3721             char flux; /* Flag to indicate flux conservation */
3722             PDLA_Double *map_ptr;
3723             PDLA_Long i, j;
3724 0 0         PDLA_LongLong badval = SvNV(__privtrans->bv);
3725             #define HANNING_LOOKUP_SIZE 2500
3726             static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
3727             static int needs_hanning_calc = 1;
3728             PDLA_Double zeta;
3729             PDLA_Double hanning_offset;
3730              
3731             #define GAUSSIAN_LOOKUP_SIZE 4000
3732             #define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
3733             static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
3734             static int needs_gaussian_calc = 1;
3735              
3736 0           pdl *in = PDLA->SvPDLAV(__privtrans->in);
3737 0           pdl *out = PDLA->SvPDLAV(__privtrans->out);
3738 0           pdl *map = PDLA->SvPDLAV(__privtrans->map);
3739              
3740 0           PDLA->make_physical(in);
3741 0           PDLA->make_physical(out);
3742 0           PDLA->make_physical(map);
3743              
3744 0           ndims = map->ndims -1;
3745              
3746             /*
3747             * Allocate all our dynamic workspaces at once
3748             * */
3749 0           ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
3750 0           ( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3*ndims) + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndims*ndims + ndims + sizeof(char) * ndims + sizeof(PDLA_Indx) * ndims )
3751             )
3752             );
3753 0           ivec = &(ovec[ndims]);
3754 0           ibvec = &(ivec[ndims]);
3755 0           dvec = (PDLA_Double *)(&(ibvec[ndims]));
3756 0           tvec = &(dvec[ndims]);
3757 0           acc = &(tvec[ndims]);
3758 0           wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
3759 0           bounds = (char *)(&(tmp [3*ndims*ndims+ndims]));
3760 0           index_stash = (PDLA_Indx *) &(bounds[ndims]);
3761              
3762              
3763             /***
3764             * Fill in the boundary condition array
3765             */
3766             {
3767             char *bstr;
3768             STRLEN blen;
3769 0 0         bstr = SvPV(__privtrans->boundary,blen);
3770              
3771 0 0         if(blen == 0) {
3772             /* If no boundary is specified then every dim gets truncated */
3773             int i;
3774 0 0         for (i=0;i
3775 0           bounds[i] = 1;
3776             } else {
3777             int i;
3778 0 0         for(i=0;i
3779 0 0         switch(bstr[i < blen ? i : blen-1 ]) {
3780             case '0': case 'f': case 'F': /* forbid */
3781 0           bounds[i] = 0;
3782 0           break;
3783             case '1': case 't': case 'T': /* truncate */
3784 0           bounds[i] = 1;
3785 0           break;
3786             case '2': case 'e': case 'E': /* extend */
3787 0           bounds[i] = 2;
3788 0           break;
3789             case '3': case 'p': case 'P': /* periodic */
3790 0           bounds[i] = 3;
3791 0           break;
3792             case '4': case 'm': case 'M': /* mirror */
3793 0           bounds[i] = 4;
3794 0           break;
3795             default:
3796             {
3797             char buf[BUFSIZ];
3798 0           sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
3799 0           barf("%s", buf);
3800             }
3801 0           break;
3802             }
3803             }
3804             }
3805             }
3806              
3807             /***
3808             * Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
3809             */
3810 0 0         big = labs((PDLA_Long) (SvNV(__privtrans->big)));
3811 0 0         if(big <= 0)
3812 0           barf("%s","map: 'big' parameter must be >0");
3813              
3814 0 0         blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
3815 0 0         if(blur < 0)
3816 0           barf("%s","map: 'blur' parameter must be >= 0");
3817              
3818 0 0         sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
3819 0 0         if(sv_min < 0)
3820 0           barf("%s","map: 'sv_min' parameter must be >= 0");
3821              
3822 0 0         flux = (SvNV(__privtrans->flux) != 0);
3823              
3824             {
3825             char *mstr;
3826             STRLEN mlen;
3827 0 0         mstr = SvPV(__privtrans->method,mlen);
3828              
3829 0 0         if(mlen==0)
3830 0           method = 'h';
3831 0           else switch(*mstr) {
3832 0           case 'H': method='H'; break;
3833 0           case 'h': method = 'h';
3834 0 0         if( needs_hanning_calc ) {
3835             int i;
3836 0 0         for(i=0;i
3837 0           hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
3838             }
3839 0           hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
3840 0           hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
3841 0           needs_hanning_calc = 0;
3842             }
3843 0           zeta = HANNING_LOOKUP_SIZE / blur;
3844 0           hanning_offset = (blur >= 1) ?
3845 0 0         0 :
3846 0           0.5 * (1.0 - blur);
3847 0           break;
3848              
3849 0           case 'g': case 'j': method = 'g';
3850 0           zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
3851              
3852 0 0         if( needs_gaussian_calc ) {
3853             int i;
3854 0 0         for(i=0;i
3855 0           gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
3856             }
3857 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
3858 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
3859 0           needs_gaussian_calc = 0;
3860             }
3861 0           break;
3862              
3863 0           case 'G': case 'J': method = 'G'; break;
3864             default:
3865             {
3866             char err[80];
3867 0           sprintf(err,"Bug in map: unknown method '%c'",*mstr);
3868 0           barf("%s", err);
3869             }
3870 0           break;
3871             }
3872             }
3873              
3874              
3875              
3876             /* End of initialization */
3877             /*************************************************************/
3878             /* Start of Real Work */
3879              
3880             /* Initialize coordinate vector and map offset
3881             */
3882 0 0         for(i=0;i
3883 0           ovec[i] = 0;
3884              
3885 0           map_ptr = (PDLA_Double *)(map->data);
3886              
3887              
3888             /* Main pixel loop (iterates over pixels in the output plane) */
3889             do {
3890             PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
3891             /* Prefrobnicate the transformation matrix */
3892 0           psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
3893              
3894             #ifdef DEBUG_MAP
3895             {
3896             int k; PDLA_Indx foo = 0;
3897             printf("ovec: [");
3898             for(k=0;k
3899             foo += ovec[k] * map->dimincs[k+1];
3900             printf(" %2d ",(int)(ovec[k]));
3901             }
3902             printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
3903             for(k=0;k
3904             printf("%8.2f",(double)(((PDLA_LongLong *)(map->data))[foo + k*map->dimincs[0]]));
3905             }
3906             printf("]\n");
3907             }
3908             #endif
3909              
3910             /* Don't bother accumulating output if psize is too large */
3911 0 0         if(psize <= big) {
3912             /* Use the prefrobnicated matrix to generate a local linearization.
3913             * dvec gets the delta; ibvec gets the base.
3914             */
3915             {
3916 0           PDLA_Double *mp = map_ptr;
3917 0 0         for (i=0;i
3918 0           dvec[i] = *mp - ( ibvec[i] = (PDLA_Long)(*mp + 0.5)); /* assignment */
3919 0           mp += map->dimincs[0];
3920             }
3921             }
3922              
3923             /* Initialize input delta vector */
3924 0 0         for(i=0;i
3925 0           ivec[i] = -psize;
3926              
3927             /* Initialize accumulators */
3928             {
3929 0           PDLA_Double *ac = acc;
3930 0 0         for(i=0; i < in->dims[ndims]; i++)
3931 0           *(ac++) = 0.0;
3932              
3933             }
3934             {
3935 0           PDLA_Double *wg = wgt;
3936 0 0         for(i=0;i < in->dims[ndims]; i++)
3937 0           *(wg++) = 0.0;
3938             }
3939             {
3940 0           PDLA_Double *wg = wgt2;
3941 0 0         for(i=0;i < in->dims[ndims]; i++)
3942 0           *(wg++) = 0.0;
3943             }
3944              
3945              
3946             /*
3947             * Calculate the original offset into the data array, to enable
3948             * delta calculations in the pixel loop
3949             *
3950             * i runs over dims; j holds the working integer index in the
3951             * current dim.
3952             *
3953             * This code matches the incrementation code at the bottom of the accumulation loop
3954             */
3955            
3956 0           t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
3957 0           i_off = 0;
3958 0 0         for(i=0;i
3959 0           j = ivec[i] + ibvec[i];
3960 0 0         if(j<0 || j >= in->dims[i]) {
    0          
3961 0           switch(bounds[i]) {
3962             case 0: /* no breakage allowed */
3963 0           barf("%s","index out-of-bounds in map");
3964 0           break;
3965             case 1: /* truncation */
3966 0           t_vio++;
3967             /* fall through */
3968             case 2: /* extension -- crop */
3969 0 0         if(j<0)
3970 0           j=0;
3971 0           else j = in->dims[i] - 1;
3972 0           break;
3973             case 3: /* periodic -- mod it */
3974 0           j %= in->dims[i];
3975 0 0         if(j<0)
3976 0           j += in->dims[i];
3977 0           break;
3978             case 4: /* mirror -- reflect off the edges */
3979 0           j += in->dims[i];
3980 0           j %= (in->dims[i]*2);
3981 0 0         if(j<0)
3982 0           j += in->dims[i]*2;
3983 0           j -= in->dims[i];
3984 0 0         if(j<0) {
3985 0           j *= -1;
3986 0           j -= 1;
3987             }
3988 0           break;
3989             default:
3990 0           barf("%s", "Unknown boundary condition in map -- bug alert!");
3991 0           break;
3992             }
3993             }
3994 0           i_off += in->dimincs[i] * j;
3995             }
3996              
3997             /* Initialize index stashes for later reference as we scan the footprint */
3998             /* It's a pain in the ass to deal with boundaries, and doubly so at the */
3999             /* end of a dimensional scan. So we stash the index location at the */
4000             /* start of each dimensional scan here. When we finish incrementing */
4001             /* through a particular dim, we pull its value back out of the stash. */
4002 0 0         for(i=0;i
4003 0           index_stash[i] = i_off;
4004             }
4005              
4006             /* The input accumulation loop is the hotspot for the entire operation. */
4007             /* We loop over pixels in the region of interest (+/- psize in each dimension) */
4008             /* in the input array, use the linearized transform to bring each pixel center */
4009             /* forward to the output plane, and calculate a weighting based on the chosen */
4010             /* filter function. 'h' is a fast Hanning window rolloff using a lookup */
4011             /* table that is initialized the first time through the code. 'H' is the */
4012             /* same process, but explicitly calculated for each interation (~2x slower). */
4013             /* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
4014             /* into the input array fresh from the current input array vector each time, */
4015             /* we walk through the array using dimincs and the old offset. This saves */
4016             /* about half of the time spent on index calculation. */
4017              
4018             do { /* Input accumulation loop */
4019             PDLA_Double *cp;
4020             PDLA_Double alpha;
4021             /* Calculate the weight of the current input point. Don't bother if we're
4022             * violating any truncation boundaries (in that case our value is zero, but
4023             * for the interpolation we also set the weight to zero).
4024             */
4025 0 0         if( !t_vio ) {
4026              
4027 0           PDLA_Double *ap = tvec;
4028 0           PDLA_Double *bp = dvec;
4029 0           PDLA_Indx *ip = ivec;
4030 0 0         for(i=0; i
4031 0           *(ap++) = *(ip++) - *(bp++);
4032              
4033 0           switch(method) {
4034             PDLA_Double dd;
4035             case 'h':
4036             /* This is the Hanning window rolloff. It is a product of a simple */
4037             /* cos^2(theta) rolloff in each dimension. Using a lookup table */
4038             /* is about 2x faster than using cos(theta) directly in each */
4039             /* weighting calculation, so we do. Using 2500 entries and linear */
4040             /* interpolation is accurate to about 10^-7, and should preserve */
4041             /* the contents of cache pretty well. */
4042 0           alpha = 1;
4043 0           cp = tmp;
4044 0 0         for(i=0; i
4045             int lodex;
4046             int hidex;
4047             PDLA_Double beta;
4048 0           dd = 0;
4049 0           ap = tvec;
4050             /* Get the matrix-multiply element for this dimension */
4051 0 0         for(j=0;j
4052 0           dd += *(ap++) * *(cp++);
4053              
4054             /* Do linear interpolation from the table */
4055             /* The table captures a hanning window centered 0.5 pixel from center. */
4056             /* We scale the filter by the blur parameter -- but if blur is less */
4057             /* than unity, we shrink the hanning blur window while keeping the 0.5 */
4058             /* value on the pixel edge at 0.5. For blur greater than unity, we */
4059             /* scale simply. */
4060 0           beta = fabs(dd) - hanning_offset;
4061 0 0         if(beta > 0) {
4062 0 0         if(beta >= blur) {
4063 0           alpha = 0;
4064 0           i = ndims;
4065             } else {
4066 0           beta *= zeta;
4067 0           lodex = beta;
4068 0 0         beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
4069 0           lodex = HANNING_LOOKUP_SIZE;
4070 0           hidex = lodex+1;
4071 0           alpha *= hanning_lookup[hidex]*beta + hanning_lookup[lodex]*(1-beta);
4072             } /* end of interpolation branch */
4073             } /* end of beta > 0 branch */
4074             } /* end of dimension loop */
4075 0           break;
4076              
4077             case 'H':
4078             /* This is the Hanning window rolloff with explicit calculation, preserved */
4079             /* in case someone actually wants the slower longer method. */
4080 0           alpha = 1;
4081 0           cp = tmp;
4082 0 0         for(i=0; i
4083 0           dd = 0;
4084 0           ap = tvec;
4085 0 0         for(j=0;j
4086 0           dd += *(ap++) * *(cp++);
4087 0           dd = (fabs(dd) - hanning_offset) / blur;
4088 0 0         if( dd > 1 ) {
4089 0           alpha = 0;
4090 0           i = ndims;
4091             } else
4092 0           alpha *= (0.5 + 0.5 * cos( dd * 3.1415926536 ));
4093             }
4094 0           break;
4095              
4096             case 'g':
4097             /* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
4098             {
4099 0           PDLA_Double sum = 0;
4100 0           cp = tmp;
4101 0 0         for(i=0; i
4102 0           dd = 0;
4103 0           ap = tvec;
4104 0 0         for(j=0;j
4105 0           dd += *(ap++) * *(cp++);
4106 0           dd /= blur;
4107 0           sum += dd * dd;
4108 0 0         if(sum > GAUSSIAN_MAXVAL) {
4109 0           i = ndims; /* exit early if we're too far out */
4110 0           alpha = 0;
4111             }
4112             }
4113 0 0         if( sum > GAUSSIAN_MAXVAL || !isfinite(sum) || isnan(sum) ) {
    0          
    0          
4114 0           alpha = 0;
4115             } else {
4116             int lodex,hidex;
4117 0           PDLA_Double beta = fabs(zeta * sum);
4118              
4119 0           lodex = beta;
4120 0           beta -= lodex; hidex = lodex+1;
4121 0           alpha = gaussian_lookup[hidex]*beta + gaussian_lookup[lodex]*(1 - beta);
4122              
4123             }
4124             }
4125 0           break;
4126              
4127             case 'G':
4128             /* This is the Gaussian rolloff with explicit calculation, preserved */
4129             /* in case someone actually wants the slower longer method. */
4130             {
4131 0           PDLA_Double sum = 0;
4132 0           cp = tmp;
4133 0 0         for(i=0; i
4134 0           dd = 0;
4135 0           ap = tvec;
4136 0 0         for(j=0;j
4137 0           dd += *(ap++) * *(cp++);
4138 0           dd /= blur;
4139 0           sum += dd * dd;
4140 0 0         if(sum > 4) /* 2 pixels -- four half-widths */
4141 0           i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
4142             }
4143              
4144 0 0         if(sum > GAUSSIAN_MAXVAL)
4145 0           alpha = 0;
4146             else
4147 0           alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
4148             }
4149 0           break;
4150             default:
4151             {
4152             char buf[80];
4153 0           sprintf(buf,"This can't happen: method='%c'",method);
4154 0           barf("%s", buf);
4155             }
4156             }
4157              
4158             { /* convenience block -- accumulate the current point into the weighted sum. */
4159             /* This is more than simple assignment because we have our own explicit poor */
4160             /* man's threadloop here, so we accumulate each threaded element separately. */
4161 0           PDLA_LongLong *dat = ((PDLA_LongLong *)(in->data)) + i_off;
4162 0           PDLA_Indx max = out->dims[ndims];
4163 0 0         for( i=0; i < max; i++ ) {
4164 0 0         if( (badval==0) || (*dat != badval) ) {
    0          
4165 0           acc[i] += *dat * alpha;
4166 0           dat += in->dimincs[ndims];
4167 0           wgt[i] += alpha;
4168             }
4169 0           wgt2[i] += alpha; }
4170             }
4171             } /* end of t_vio check (i.e. of input accumulation) */
4172              
4173              
4174             /* Advance input accumulation loop. */
4175             /* We both increment the total vector and also advance the index. */
4176 0           carry = 1;
4177 0 0         for(i=0; i
    0          
4178             /* Advance the current element of the offset vector */
4179 0           ivec[i]++;
4180 0           j = ivec[i] + ibvec[i];
4181              
4182             /* Advance the offset into the data array */
4183 0 0         if( j > 0 && j <= in->dims[i]-1 ) {
    0          
4184             /* Normal case -- just advance the input vector */
4185 0           i_off += in->dimincs[i];
4186             } else {
4187             /* Busted a boundary - either before or after. */
4188 0           switch(bounds[i]){
4189             case 0: /* no breakage allowed -- treat as truncation for interpolation */
4190             case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
4191 0 0         if( j == 0 )
4192 0           t_vio--;
4193 0 0         else if( j == in->dims[i] )
4194 0           t_vio++;
4195 0           break;
4196             case 2: /* extension -- do nothing (so the same input point is re-used) */
4197 0           break;
4198             case 3: /* periodic -- advance and mod into the allowed range */
4199 0 0         if((j % in->dims[i]) == 0) {
4200 0           i_off -= in->dimincs[i] * (in->dims[i]-1);
4201             } else {
4202 0           i_off += in->dimincs[i];
4203             }
4204 0           break;
4205             case 4: /* mirror -- advance or retreat depending on phase */
4206 0           j += in->dims[i];
4207 0           j %= (in->dims[i]*2);
4208 0           j -= in->dims[i];
4209 0 0         if( j!=0 && j!= -in->dims[i] ) {
    0          
4210 0 0         if(j<0)
4211 0           i_off -= in->dimincs[i];
4212             else
4213 0           i_off += in->dimincs[i];
4214             }
4215 0           break;
4216             }
4217             }
4218              
4219             /* Now check for carry */
4220 0 0         if(ivec[i] <= psize) {
4221             /* Normal case -- copy the current offset to the faster-running dim stashes */
4222             int k;
4223 0 0         for(k=0;k
4224 0           index_stash[k] = i_off;
4225             }
4226 0           carry = 0;
4227              
4228             } else { /* End of this scan -- recover the last position, and mark carry */
4229 0           i_off = index_stash[i];
4230 0 0         if(bounds[i]==1) {
4231 0           j = ivec[i] + ibvec[i];
4232 0 0         if( j < 0 || j >= in->dims[i] )
    0          
4233 0           t_vio--;
4234 0           ivec[i] = -psize;
4235 0           j = ivec[i] + ibvec[i];
4236 0 0         if( j < 0 || j >= in->dims[i] )
    0          
4237 0           t_vio++;
4238 0           carry = 1;
4239             } else {
4240 0           ivec[i] = -psize;
4241             }
4242             }
4243             } /* End of counter-advance loop */
4244 0 0         } while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
4245              
4246             {
4247 0           PDLA_Double *ac = acc;
4248 0           PDLA_Double *wg = wgt;
4249 0           PDLA_Double *wg2 = wgt2;
4250 0           PDLA_LongLong *dat = out->data;
4251              
4252             /* Calculate output vector offset */
4253 0 0         for(i=0;i
4254 0           dat += out->dimincs[i] * ovec[i];
4255              
4256 0 0         if(!flux) {
4257             /* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
4258 0 0         for(i=0; i < out->dims[ndims]; i++) {
4259 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
4260 0           *dat = *(ac++) / *(wg++);
4261 0           wg2++;
4262             } else {
4263 0           *dat = badval;
4264 0           ac++; wg++; wg2++;
4265             }
4266 0           dat += out->dimincs[ndims];
4267             }
4268             } else {
4269             /* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
4270 0           PDLA_Double det = tmp[ndims*ndims];
4271 0 0         for(i=0; i < out->dims[ndims]; i++) {
4272 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
4273 0           *dat = *(ac++) / *(wg++) * det;
4274 0           wg2++;
4275             } else {
4276 0           *dat = badval;
4277 0           ac++; wg++; wg2++;
4278             }
4279 0           dat += out->dimincs[ndims];
4280             } /* end of for loop */
4281             } /* end of flux flag set conditional */
4282             } /* end of convenience block */
4283            
4284             /* End of code for normal pixels */
4285             } else {
4286             /* The pixel was ludicrously huge -- just set this pixel to nan */
4287 0           PDLA_LongLong *dat = out->data;
4288 0 0         for(i=0;i
4289 0           dat += out->dimincs[i] * ovec[i];
4290 0 0         for(i=0;idims[ndims];i++) {
4291 0           *dat = badval; /* Should handle bad values too -- not yet */
4292 0           dat += out->dimincs[ndims];
4293             }
4294             }
4295              
4296             /* Increment the pixel counter */
4297             {
4298 0 0         for(i=0;
4299 0 0         (i
4300 0 0         (map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
4301 0           (++(ovec[i]) >= out->dims[i]); /* Actual carry test */
4302 0           i++) {
4303 0           ovec[i] = 0;
4304 0           map_ptr -= out->dims[i] * map->dimincs[i+1];
4305             }
4306             }
4307 0 0         } while(i
4308              
4309              
4310              
4311             }
4312             PDLA_COMMENT("THREADLOOPEND")
4313             }
4314             }
4315 0           k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
4316 0 0         } while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_F: {
4317 0 0         PDLA_Float * k0_datap = ((PDLA_Float *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
4318 0           PDLA_Float * k0_physdatap = ((PDLA_Float *)((__privtrans->pdls[0])->data));
4319              
4320              
4321             PDLA_COMMENT("THREADLOOPBEGIN")
4322 0 0         if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
4323 0           do { register PDLA_Indx __tind1=0,__tind2=0;
4324 0           register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
4325 0           register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
4326 0           register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
4327 0           register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
4328 0           register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
4329 0           register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
4330 0           k0_datap += __offsp[0];
4331 0 0         for( __tind2 = 0 ;
4332             __tind2 < __tdims1 ;
4333 0           __tind2++
4334 0           ,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
4335             )
4336             {
4337 0 0         for( __tind1 = 0 ;
4338             __tind1 < __tdims0 ;
4339 0           __tind1++
4340 0           ,k0_datap += __tinc0_0
4341             )
4342             { PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
4343              
4344             /*
4345             * Pixel interpolation & averaging code
4346             *
4347             * Calls a common coordinate-transformation block (see following hdr)
4348             * that isn't dependent on the type of the input variable.
4349             *
4350             * The inputs are SVs to avoid hassling with threadloops; threading
4351             * is handled internally. To simplify the threading business, any
4352             * thread dimensions should all be collapsed to a single one by the
4353             * perl front-end.
4354             *
4355             */
4356              
4357             short ndims; /* Number of dimensions we're working in */
4358             PDLA_Double *tmp; /* Workspace for prefrobnication */
4359             PDLA_Indx *ovec; /* output pixel loop vector */
4360             PDLA_Indx *ivec; /* input pixel loop vector */
4361             PDLA_Indx *ibvec; /* input pixel base offset vector */
4362             PDLA_Double *dvec; /* Residual vector for linearization */
4363             PDLA_Double *tvec; /* Temporary floating-point vector */
4364             PDLA_Double *acc; /* Threaded accumulator */
4365             PDLA_Double *wgt; /* Threaded weight accumulator */
4366             PDLA_Double *wgt2; /* Threaded weight accumulator for badval finding */
4367             char *bounds; /* Boundary condition packed string */
4368             PDLA_Indx *index_stash; /* Stash to store the opening index of dim sample scans */
4369             char method; /* Method identifier (gets one of 'h','g') */
4370             PDLA_Long big; /* Max size of input footprint for each pix */
4371             PDLA_Double blur; /* Scaling of filter */
4372             PDLA_Double sv_min; /* minimum singular value */
4373             char flux; /* Flag to indicate flux conservation */
4374             PDLA_Double *map_ptr;
4375             PDLA_Long i, j;
4376 0 0         PDLA_Float badval = SvNV(__privtrans->bv);
4377             #define HANNING_LOOKUP_SIZE 2500
4378             static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
4379             static int needs_hanning_calc = 1;
4380             PDLA_Double zeta;
4381             PDLA_Double hanning_offset;
4382              
4383             #define GAUSSIAN_LOOKUP_SIZE 4000
4384             #define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
4385             static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
4386             static int needs_gaussian_calc = 1;
4387              
4388 0           pdl *in = PDLA->SvPDLAV(__privtrans->in);
4389 0           pdl *out = PDLA->SvPDLAV(__privtrans->out);
4390 0           pdl *map = PDLA->SvPDLAV(__privtrans->map);
4391              
4392 0           PDLA->make_physical(in);
4393 0           PDLA->make_physical(out);
4394 0           PDLA->make_physical(map);
4395              
4396 0           ndims = map->ndims -1;
4397              
4398             /*
4399             * Allocate all our dynamic workspaces at once
4400             * */
4401 0           ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
4402 0           ( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3*ndims) + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndims*ndims + ndims + sizeof(char) * ndims + sizeof(PDLA_Indx) * ndims )
4403             )
4404             );
4405 0           ivec = &(ovec[ndims]);
4406 0           ibvec = &(ivec[ndims]);
4407 0           dvec = (PDLA_Double *)(&(ibvec[ndims]));
4408 0           tvec = &(dvec[ndims]);
4409 0           acc = &(tvec[ndims]);
4410 0           wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
4411 0           bounds = (char *)(&(tmp [3*ndims*ndims+ndims]));
4412 0           index_stash = (PDLA_Indx *) &(bounds[ndims]);
4413              
4414              
4415             /***
4416             * Fill in the boundary condition array
4417             */
4418             {
4419             char *bstr;
4420             STRLEN blen;
4421 0 0         bstr = SvPV(__privtrans->boundary,blen);
4422              
4423 0 0         if(blen == 0) {
4424             /* If no boundary is specified then every dim gets truncated */
4425             int i;
4426 0 0         for (i=0;i
4427 0           bounds[i] = 1;
4428             } else {
4429             int i;
4430 0 0         for(i=0;i
4431 0 0         switch(bstr[i < blen ? i : blen-1 ]) {
4432             case '0': case 'f': case 'F': /* forbid */
4433 0           bounds[i] = 0;
4434 0           break;
4435             case '1': case 't': case 'T': /* truncate */
4436 0           bounds[i] = 1;
4437 0           break;
4438             case '2': case 'e': case 'E': /* extend */
4439 0           bounds[i] = 2;
4440 0           break;
4441             case '3': case 'p': case 'P': /* periodic */
4442 0           bounds[i] = 3;
4443 0           break;
4444             case '4': case 'm': case 'M': /* mirror */
4445 0           bounds[i] = 4;
4446 0           break;
4447             default:
4448             {
4449             char buf[BUFSIZ];
4450 0           sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
4451 0           barf("%s", buf);
4452             }
4453 0           break;
4454             }
4455             }
4456             }
4457             }
4458              
4459             /***
4460             * Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
4461             */
4462 0 0         big = labs((PDLA_Long) (SvNV(__privtrans->big)));
4463 0 0         if(big <= 0)
4464 0           barf("%s","map: 'big' parameter must be >0");
4465              
4466 0 0         blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
4467 0 0         if(blur < 0)
4468 0           barf("%s","map: 'blur' parameter must be >= 0");
4469              
4470 0 0         sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
4471 0 0         if(sv_min < 0)
4472 0           barf("%s","map: 'sv_min' parameter must be >= 0");
4473              
4474 0 0         flux = (SvNV(__privtrans->flux) != 0);
4475              
4476             {
4477             char *mstr;
4478             STRLEN mlen;
4479 0 0         mstr = SvPV(__privtrans->method,mlen);
4480              
4481 0 0         if(mlen==0)
4482 0           method = 'h';
4483 0           else switch(*mstr) {
4484 0           case 'H': method='H'; break;
4485 0           case 'h': method = 'h';
4486 0 0         if( needs_hanning_calc ) {
4487             int i;
4488 0 0         for(i=0;i
4489 0           hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
4490             }
4491 0           hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
4492 0           hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
4493 0           needs_hanning_calc = 0;
4494             }
4495 0           zeta = HANNING_LOOKUP_SIZE / blur;
4496 0           hanning_offset = (blur >= 1) ?
4497 0 0         0 :
4498 0           0.5 * (1.0 - blur);
4499 0           break;
4500              
4501 0           case 'g': case 'j': method = 'g';
4502 0           zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
4503              
4504 0 0         if( needs_gaussian_calc ) {
4505             int i;
4506 0 0         for(i=0;i
4507 0           gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
4508             }
4509 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
4510 0           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
4511 0           needs_gaussian_calc = 0;
4512             }
4513 0           break;
4514              
4515 0           case 'G': case 'J': method = 'G'; break;
4516             default:
4517             {
4518             char err[80];
4519 0           sprintf(err,"Bug in map: unknown method '%c'",*mstr);
4520 0           barf("%s", err);
4521             }
4522 0           break;
4523             }
4524             }
4525              
4526              
4527              
4528             /* End of initialization */
4529             /*************************************************************/
4530             /* Start of Real Work */
4531              
4532             /* Initialize coordinate vector and map offset
4533             */
4534 0 0         for(i=0;i
4535 0           ovec[i] = 0;
4536              
4537 0           map_ptr = (PDLA_Double *)(map->data);
4538              
4539              
4540             /* Main pixel loop (iterates over pixels in the output plane) */
4541             do {
4542             PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
4543             /* Prefrobnicate the transformation matrix */
4544 0           psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
4545              
4546             #ifdef DEBUG_MAP
4547             {
4548             int k; PDLA_Indx foo = 0;
4549             printf("ovec: [");
4550             for(k=0;k
4551             foo += ovec[k] * map->dimincs[k+1];
4552             printf(" %2d ",(int)(ovec[k]));
4553             }
4554             printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
4555             for(k=0;k
4556             printf("%8.2f",(double)(((PDLA_Float *)(map->data))[foo + k*map->dimincs[0]]));
4557             }
4558             printf("]\n");
4559             }
4560             #endif
4561              
4562             /* Don't bother accumulating output if psize is too large */
4563 0 0         if(psize <= big) {
4564             /* Use the prefrobnicated matrix to generate a local linearization.
4565             * dvec gets the delta; ibvec gets the base.
4566             */
4567             {
4568 0           PDLA_Double *mp = map_ptr;
4569 0 0         for (i=0;i
4570 0           dvec[i] = *mp - ( ibvec[i] = (PDLA_Long)(*mp + 0.5)); /* assignment */
4571 0           mp += map->dimincs[0];
4572             }
4573             }
4574              
4575             /* Initialize input delta vector */
4576 0 0         for(i=0;i
4577 0           ivec[i] = -psize;
4578              
4579             /* Initialize accumulators */
4580             {
4581 0           PDLA_Double *ac = acc;
4582 0 0         for(i=0; i < in->dims[ndims]; i++)
4583 0           *(ac++) = 0.0;
4584              
4585             }
4586             {
4587 0           PDLA_Double *wg = wgt;
4588 0 0         for(i=0;i < in->dims[ndims]; i++)
4589 0           *(wg++) = 0.0;
4590             }
4591             {
4592 0           PDLA_Double *wg = wgt2;
4593 0 0         for(i=0;i < in->dims[ndims]; i++)
4594 0           *(wg++) = 0.0;
4595             }
4596              
4597              
4598             /*
4599             * Calculate the original offset into the data array, to enable
4600             * delta calculations in the pixel loop
4601             *
4602             * i runs over dims; j holds the working integer index in the
4603             * current dim.
4604             *
4605             * This code matches the incrementation code at the bottom of the accumulation loop
4606             */
4607            
4608 0           t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
4609 0           i_off = 0;
4610 0 0         for(i=0;i
4611 0           j = ivec[i] + ibvec[i];
4612 0 0         if(j<0 || j >= in->dims[i]) {
    0          
4613 0           switch(bounds[i]) {
4614             case 0: /* no breakage allowed */
4615 0           barf("%s","index out-of-bounds in map");
4616 0           break;
4617             case 1: /* truncation */
4618 0           t_vio++;
4619             /* fall through */
4620             case 2: /* extension -- crop */
4621 0 0         if(j<0)
4622 0           j=0;
4623 0           else j = in->dims[i] - 1;
4624 0           break;
4625             case 3: /* periodic -- mod it */
4626 0           j %= in->dims[i];
4627 0 0         if(j<0)
4628 0           j += in->dims[i];
4629 0           break;
4630             case 4: /* mirror -- reflect off the edges */
4631 0           j += in->dims[i];
4632 0           j %= (in->dims[i]*2);
4633 0 0         if(j<0)
4634 0           j += in->dims[i]*2;
4635 0           j -= in->dims[i];
4636 0 0         if(j<0) {
4637 0           j *= -1;
4638 0           j -= 1;
4639             }
4640 0           break;
4641             default:
4642 0           barf("%s", "Unknown boundary condition in map -- bug alert!");
4643 0           break;
4644             }
4645             }
4646 0           i_off += in->dimincs[i] * j;
4647             }
4648              
4649             /* Initialize index stashes for later reference as we scan the footprint */
4650             /* It's a pain in the ass to deal with boundaries, and doubly so at the */
4651             /* end of a dimensional scan. So we stash the index location at the */
4652             /* start of each dimensional scan here. When we finish incrementing */
4653             /* through a particular dim, we pull its value back out of the stash. */
4654 0 0         for(i=0;i
4655 0           index_stash[i] = i_off;
4656             }
4657              
4658             /* The input accumulation loop is the hotspot for the entire operation. */
4659             /* We loop over pixels in the region of interest (+/- psize in each dimension) */
4660             /* in the input array, use the linearized transform to bring each pixel center */
4661             /* forward to the output plane, and calculate a weighting based on the chosen */
4662             /* filter function. 'h' is a fast Hanning window rolloff using a lookup */
4663             /* table that is initialized the first time through the code. 'H' is the */
4664             /* same process, but explicitly calculated for each interation (~2x slower). */
4665             /* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
4666             /* into the input array fresh from the current input array vector each time, */
4667             /* we walk through the array using dimincs and the old offset. This saves */
4668             /* about half of the time spent on index calculation. */
4669              
4670             do { /* Input accumulation loop */
4671             PDLA_Double *cp;
4672             PDLA_Double alpha;
4673             /* Calculate the weight of the current input point. Don't bother if we're
4674             * violating any truncation boundaries (in that case our value is zero, but
4675             * for the interpolation we also set the weight to zero).
4676             */
4677 0 0         if( !t_vio ) {
4678              
4679 0           PDLA_Double *ap = tvec;
4680 0           PDLA_Double *bp = dvec;
4681 0           PDLA_Indx *ip = ivec;
4682 0 0         for(i=0; i
4683 0           *(ap++) = *(ip++) - *(bp++);
4684              
4685 0           switch(method) {
4686             PDLA_Double dd;
4687             case 'h':
4688             /* This is the Hanning window rolloff. It is a product of a simple */
4689             /* cos^2(theta) rolloff in each dimension. Using a lookup table */
4690             /* is about 2x faster than using cos(theta) directly in each */
4691             /* weighting calculation, so we do. Using 2500 entries and linear */
4692             /* interpolation is accurate to about 10^-7, and should preserve */
4693             /* the contents of cache pretty well. */
4694 0           alpha = 1;
4695 0           cp = tmp;
4696 0 0         for(i=0; i
4697             int lodex;
4698             int hidex;
4699             PDLA_Double beta;
4700 0           dd = 0;
4701 0           ap = tvec;
4702             /* Get the matrix-multiply element for this dimension */
4703 0 0         for(j=0;j
4704 0           dd += *(ap++) * *(cp++);
4705              
4706             /* Do linear interpolation from the table */
4707             /* The table captures a hanning window centered 0.5 pixel from center. */
4708             /* We scale the filter by the blur parameter -- but if blur is less */
4709             /* than unity, we shrink the hanning blur window while keeping the 0.5 */
4710             /* value on the pixel edge at 0.5. For blur greater than unity, we */
4711             /* scale simply. */
4712 0           beta = fabs(dd) - hanning_offset;
4713 0 0         if(beta > 0) {
4714 0 0         if(beta >= blur) {
4715 0           alpha = 0;
4716 0           i = ndims;
4717             } else {
4718 0           beta *= zeta;
4719 0           lodex = beta;
4720 0 0         beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
4721 0           lodex = HANNING_LOOKUP_SIZE;
4722 0           hidex = lodex+1;
4723 0           alpha *= hanning_lookup[hidex]*beta + hanning_lookup[lodex]*(1-beta);
4724             } /* end of interpolation branch */
4725             } /* end of beta > 0 branch */
4726             } /* end of dimension loop */
4727 0           break;
4728              
4729             case 'H':
4730             /* This is the Hanning window rolloff with explicit calculation, preserved */
4731             /* in case someone actually wants the slower longer method. */
4732 0           alpha = 1;
4733 0           cp = tmp;
4734 0 0         for(i=0; i
4735 0           dd = 0;
4736 0           ap = tvec;
4737 0 0         for(j=0;j
4738 0           dd += *(ap++) * *(cp++);
4739 0           dd = (fabs(dd) - hanning_offset) / blur;
4740 0 0         if( dd > 1 ) {
4741 0           alpha = 0;
4742 0           i = ndims;
4743             } else
4744 0           alpha *= (0.5 + 0.5 * cos( dd * 3.1415926536 ));
4745             }
4746 0           break;
4747              
4748             case 'g':
4749             /* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
4750             {
4751 0           PDLA_Double sum = 0;
4752 0           cp = tmp;
4753 0 0         for(i=0; i
4754 0           dd = 0;
4755 0           ap = tvec;
4756 0 0         for(j=0;j
4757 0           dd += *(ap++) * *(cp++);
4758 0           dd /= blur;
4759 0           sum += dd * dd;
4760 0 0         if(sum > GAUSSIAN_MAXVAL) {
4761 0           i = ndims; /* exit early if we're too far out */
4762 0           alpha = 0;
4763             }
4764             }
4765 0 0         if( sum > GAUSSIAN_MAXVAL || !isfinite(sum) || isnan(sum) ) {
    0          
    0          
4766 0           alpha = 0;
4767             } else {
4768             int lodex,hidex;
4769 0           PDLA_Double beta = fabs(zeta * sum);
4770              
4771 0           lodex = beta;
4772 0           beta -= lodex; hidex = lodex+1;
4773 0           alpha = gaussian_lookup[hidex]*beta + gaussian_lookup[lodex]*(1 - beta);
4774              
4775             }
4776             }
4777 0           break;
4778              
4779             case 'G':
4780             /* This is the Gaussian rolloff with explicit calculation, preserved */
4781             /* in case someone actually wants the slower longer method. */
4782             {
4783 0           PDLA_Double sum = 0;
4784 0           cp = tmp;
4785 0 0         for(i=0; i
4786 0           dd = 0;
4787 0           ap = tvec;
4788 0 0         for(j=0;j
4789 0           dd += *(ap++) * *(cp++);
4790 0           dd /= blur;
4791 0           sum += dd * dd;
4792 0 0         if(sum > 4) /* 2 pixels -- four half-widths */
4793 0           i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
4794             }
4795              
4796 0 0         if(sum > GAUSSIAN_MAXVAL)
4797 0           alpha = 0;
4798             else
4799 0           alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
4800             }
4801 0           break;
4802             default:
4803             {
4804             char buf[80];
4805 0           sprintf(buf,"This can't happen: method='%c'",method);
4806 0           barf("%s", buf);
4807             }
4808             }
4809              
4810             { /* convenience block -- accumulate the current point into the weighted sum. */
4811             /* This is more than simple assignment because we have our own explicit poor */
4812             /* man's threadloop here, so we accumulate each threaded element separately. */
4813 0           PDLA_Float *dat = ((PDLA_Float *)(in->data)) + i_off;
4814 0           PDLA_Indx max = out->dims[ndims];
4815 0 0         for( i=0; i < max; i++ ) {
4816 0 0         if( (badval==0) || (*dat != badval) ) {
    0          
4817 0           acc[i] += *dat * alpha;
4818 0           dat += in->dimincs[ndims];
4819 0           wgt[i] += alpha;
4820             }
4821 0           wgt2[i] += alpha; }
4822             }
4823             } /* end of t_vio check (i.e. of input accumulation) */
4824              
4825              
4826             /* Advance input accumulation loop. */
4827             /* We both increment the total vector and also advance the index. */
4828 0           carry = 1;
4829 0 0         for(i=0; i
    0          
4830             /* Advance the current element of the offset vector */
4831 0           ivec[i]++;
4832 0           j = ivec[i] + ibvec[i];
4833              
4834             /* Advance the offset into the data array */
4835 0 0         if( j > 0 && j <= in->dims[i]-1 ) {
    0          
4836             /* Normal case -- just advance the input vector */
4837 0           i_off += in->dimincs[i];
4838             } else {
4839             /* Busted a boundary - either before or after. */
4840 0           switch(bounds[i]){
4841             case 0: /* no breakage allowed -- treat as truncation for interpolation */
4842             case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
4843 0 0         if( j == 0 )
4844 0           t_vio--;
4845 0 0         else if( j == in->dims[i] )
4846 0           t_vio++;
4847 0           break;
4848             case 2: /* extension -- do nothing (so the same input point is re-used) */
4849 0           break;
4850             case 3: /* periodic -- advance and mod into the allowed range */
4851 0 0         if((j % in->dims[i]) == 0) {
4852 0           i_off -= in->dimincs[i] * (in->dims[i]-1);
4853             } else {
4854 0           i_off += in->dimincs[i];
4855             }
4856 0           break;
4857             case 4: /* mirror -- advance or retreat depending on phase */
4858 0           j += in->dims[i];
4859 0           j %= (in->dims[i]*2);
4860 0           j -= in->dims[i];
4861 0 0         if( j!=0 && j!= -in->dims[i] ) {
    0          
4862 0 0         if(j<0)
4863 0           i_off -= in->dimincs[i];
4864             else
4865 0           i_off += in->dimincs[i];
4866             }
4867 0           break;
4868             }
4869             }
4870              
4871             /* Now check for carry */
4872 0 0         if(ivec[i] <= psize) {
4873             /* Normal case -- copy the current offset to the faster-running dim stashes */
4874             int k;
4875 0 0         for(k=0;k
4876 0           index_stash[k] = i_off;
4877             }
4878 0           carry = 0;
4879              
4880             } else { /* End of this scan -- recover the last position, and mark carry */
4881 0           i_off = index_stash[i];
4882 0 0         if(bounds[i]==1) {
4883 0           j = ivec[i] + ibvec[i];
4884 0 0         if( j < 0 || j >= in->dims[i] )
    0          
4885 0           t_vio--;
4886 0           ivec[i] = -psize;
4887 0           j = ivec[i] + ibvec[i];
4888 0 0         if( j < 0 || j >= in->dims[i] )
    0          
4889 0           t_vio++;
4890 0           carry = 1;
4891             } else {
4892 0           ivec[i] = -psize;
4893             }
4894             }
4895             } /* End of counter-advance loop */
4896 0 0         } while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
4897              
4898             {
4899 0           PDLA_Double *ac = acc;
4900 0           PDLA_Double *wg = wgt;
4901 0           PDLA_Double *wg2 = wgt2;
4902 0           PDLA_Float *dat = out->data;
4903              
4904             /* Calculate output vector offset */
4905 0 0         for(i=0;i
4906 0           dat += out->dimincs[i] * ovec[i];
4907              
4908 0 0         if(!flux) {
4909             /* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
4910 0 0         for(i=0; i < out->dims[ndims]; i++) {
4911 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
4912 0           *dat = *(ac++) / *(wg++);
4913 0           wg2++;
4914             } else {
4915 0           *dat = badval;
4916 0           ac++; wg++; wg2++;
4917             }
4918 0           dat += out->dimincs[ndims];
4919             }
4920             } else {
4921             /* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
4922 0           PDLA_Double det = tmp[ndims*ndims];
4923 0 0         for(i=0; i < out->dims[ndims]; i++) {
4924 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
4925 0           *dat = *(ac++) / *(wg++) * det;
4926 0           wg2++;
4927             } else {
4928 0           *dat = badval;
4929 0           ac++; wg++; wg2++;
4930             }
4931 0           dat += out->dimincs[ndims];
4932             } /* end of for loop */
4933             } /* end of flux flag set conditional */
4934             } /* end of convenience block */
4935            
4936             /* End of code for normal pixels */
4937             } else {
4938             /* The pixel was ludicrously huge -- just set this pixel to nan */
4939 0           PDLA_Float *dat = out->data;
4940 0 0         for(i=0;i
4941 0           dat += out->dimincs[i] * ovec[i];
4942 0 0         for(i=0;idims[ndims];i++) {
4943 0           *dat = badval; /* Should handle bad values too -- not yet */
4944 0           dat += out->dimincs[ndims];
4945             }
4946             }
4947              
4948             /* Increment the pixel counter */
4949             {
4950 0 0         for(i=0;
4951 0 0         (i
4952 0 0         (map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
4953 0           (++(ovec[i]) >= out->dims[i]); /* Actual carry test */
4954 0           i++) {
4955 0           ovec[i] = 0;
4956 0           map_ptr -= out->dims[i] * map->dimincs[i+1];
4957             }
4958             }
4959 0 0         } while(i
4960              
4961              
4962              
4963             }
4964             PDLA_COMMENT("THREADLOOPEND")
4965             }
4966             }
4967 0           k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
4968 0 0         } while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); } break; case PDLA_D: {
4969 7 50         PDLA_Double * k0_datap = ((PDLA_Double *)(PDLA_REPRP_TRANS((__privtrans->pdls[0]),(__privtrans->vtable->per_pdl_flags[0]))));
    0          
4970 7           PDLA_Double * k0_physdatap = ((PDLA_Double *)((__privtrans->pdls[0])->data));
4971              
4972              
4973             PDLA_COMMENT("THREADLOOPBEGIN")
4974 7 50         if ( PDLA->startthreadloop(&(__privtrans->__pdlthread),__privtrans->vtable->readdata, __tr) ) return;
4975 7           do { register PDLA_Indx __tind1=0,__tind2=0;
4976 7           register PDLA_Indx __tnpdls = __privtrans->__pdlthread.npdls;
4977 7           register PDLA_Indx __tdims1 = __privtrans->__pdlthread.dims[1];
4978 7           register PDLA_Indx __tdims0 = __privtrans->__pdlthread.dims[0];
4979 7           register PDLA_Indx *__offsp = PDLA->get_threadoffsp(&__privtrans->__pdlthread);
4980 7           register PDLA_Indx __tinc0_0 = __privtrans->__pdlthread.incs[0];
4981 7           register PDLA_Indx __tinc1_0 = __privtrans->__pdlthread.incs[__tnpdls+0];
4982 7           k0_datap += __offsp[0];
4983 14 100         for( __tind2 = 0 ;
4984             __tind2 < __tdims1 ;
4985 7           __tind2++
4986 7           ,k0_datap += __tinc1_0 - __tinc0_0 * __tdims0
4987             )
4988             {
4989 14 100         for( __tind1 = 0 ;
4990             __tind1 < __tdims0 ;
4991 7           __tind1++
4992 7           ,k0_datap += __tinc0_0
4993             )
4994             { PDLA_COMMENT("This is the tightest threadloop. Make sure inside is optimal."){
4995              
4996             /*
4997             * Pixel interpolation & averaging code
4998             *
4999             * Calls a common coordinate-transformation block (see following hdr)
5000             * that isn't dependent on the type of the input variable.
5001             *
5002             * The inputs are SVs to avoid hassling with threadloops; threading
5003             * is handled internally. To simplify the threading business, any
5004             * thread dimensions should all be collapsed to a single one by the
5005             * perl front-end.
5006             *
5007             */
5008              
5009             short ndims; /* Number of dimensions we're working in */
5010             PDLA_Double *tmp; /* Workspace for prefrobnication */
5011             PDLA_Indx *ovec; /* output pixel loop vector */
5012             PDLA_Indx *ivec; /* input pixel loop vector */
5013             PDLA_Indx *ibvec; /* input pixel base offset vector */
5014             PDLA_Double *dvec; /* Residual vector for linearization */
5015             PDLA_Double *tvec; /* Temporary floating-point vector */
5016             PDLA_Double *acc; /* Threaded accumulator */
5017             PDLA_Double *wgt; /* Threaded weight accumulator */
5018             PDLA_Double *wgt2; /* Threaded weight accumulator for badval finding */
5019             char *bounds; /* Boundary condition packed string */
5020             PDLA_Indx *index_stash; /* Stash to store the opening index of dim sample scans */
5021             char method; /* Method identifier (gets one of 'h','g') */
5022             PDLA_Long big; /* Max size of input footprint for each pix */
5023             PDLA_Double blur; /* Scaling of filter */
5024             PDLA_Double sv_min; /* minimum singular value */
5025             char flux; /* Flag to indicate flux conservation */
5026             PDLA_Double *map_ptr;
5027             PDLA_Long i, j;
5028 7 100         PDLA_Double badval = SvNV(__privtrans->bv);
5029             #define HANNING_LOOKUP_SIZE 2500
5030             static PDLA_Double hanning_lookup[HANNING_LOOKUP_SIZE + 2];
5031             static int needs_hanning_calc = 1;
5032             PDLA_Double zeta;
5033             PDLA_Double hanning_offset;
5034              
5035             #define GAUSSIAN_LOOKUP_SIZE 4000
5036             #define GAUSSIAN_MAXVAL 6.25 /* 2.5 HWHMs (square it) */
5037             static PDLA_Double gaussian_lookup[GAUSSIAN_LOOKUP_SIZE + 2];
5038             static int needs_gaussian_calc = 1;
5039              
5040 7           pdl *in = PDLA->SvPDLAV(__privtrans->in);
5041 7           pdl *out = PDLA->SvPDLAV(__privtrans->out);
5042 7           pdl *map = PDLA->SvPDLAV(__privtrans->map);
5043              
5044 7           PDLA->make_physical(in);
5045 7           PDLA->make_physical(out);
5046 7           PDLA->make_physical(map);
5047              
5048 7           ndims = map->ndims -1;
5049              
5050             /*
5051             * Allocate all our dynamic workspaces at once
5052             * */
5053 7           ovec = (PDLA_Indx *)(PDLA->smalloc( (STRLEN)
5054 7           ( + sizeof(PDLA_Indx) * 3 * ndims + sizeof(PDLA_Double) * (3*ndims) + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * in->dims[ndims] + sizeof(PDLA_Double) * 3 * ndims*ndims + ndims + sizeof(char) * ndims + sizeof(PDLA_Indx) * ndims )
5055             )
5056             );
5057 7           ivec = &(ovec[ndims]);
5058 7           ibvec = &(ivec[ndims]);
5059 7           dvec = (PDLA_Double *)(&(ibvec[ndims]));
5060 7           tvec = &(dvec[ndims]);
5061 7           acc = &(tvec[ndims]);
5062 7           wgt = &(acc[in->dims[ndims]]); wgt2 = &(wgt[in->dims[ndims]]); tmp = &(wgt2[in->dims[ndims]]);
5063 7           bounds = (char *)(&(tmp [3*ndims*ndims+ndims]));
5064 7           index_stash = (PDLA_Indx *) &(bounds[ndims]);
5065              
5066              
5067             /***
5068             * Fill in the boundary condition array
5069             */
5070             {
5071             char *bstr;
5072             STRLEN blen;
5073 7 50         bstr = SvPV(__privtrans->boundary,blen);
5074              
5075 7 50         if(blen == 0) {
5076             /* If no boundary is specified then every dim gets truncated */
5077             int i;
5078 0 0         for (i=0;i
5079 0           bounds[i] = 1;
5080             } else {
5081             int i;
5082 21 100         for(i=0;i
5083 14 100         switch(bstr[i < blen ? i : blen-1 ]) {
5084             case '0': case 'f': case 'F': /* forbid */
5085 0           bounds[i] = 0;
5086 0           break;
5087             case '1': case 't': case 'T': /* truncate */
5088 12           bounds[i] = 1;
5089 12           break;
5090             case '2': case 'e': case 'E': /* extend */
5091 0           bounds[i] = 2;
5092 0           break;
5093             case '3': case 'p': case 'P': /* periodic */
5094 1           bounds[i] = 3;
5095 1           break;
5096             case '4': case 'm': case 'M': /* mirror */
5097 1           bounds[i] = 4;
5098 1           break;
5099             default:
5100             {
5101             char buf[BUFSIZ];
5102 0           sprintf(buf,"Error in map: Unknown boundary condition '%c'",bstr[i]);
5103 0           barf("%s", buf);
5104             }
5105 0           break;
5106             }
5107             }
5108             }
5109             }
5110              
5111             /***
5112             * Parse out the 'method', 'big', 'blur', and 'sv_min' arguments
5113             */
5114 7 50         big = labs((PDLA_Long) (SvNV(__privtrans->big)));
5115 7 50         if(big <= 0)
5116 0           barf("%s","map: 'big' parameter must be >0");
5117              
5118 7 100         blur = fabs((PDLA_Double) (SvNV(__privtrans->blur)));
5119 7 50         if(blur < 0)
5120 0           barf("%s","map: 'blur' parameter must be >= 0");
5121              
5122 7 50         sv_min = fabs((PDLA_Double) (SvNV(__privtrans->sv_min)));
5123 7 50         if(sv_min < 0)
5124 0           barf("%s","map: 'sv_min' parameter must be >= 0");
5125              
5126 7 50         flux = (SvNV(__privtrans->flux) != 0);
5127              
5128             {
5129             char *mstr;
5130             STRLEN mlen;
5131 7 50         mstr = SvPV(__privtrans->method,mlen);
5132              
5133 7 50         if(mlen==0)
5134 0           method = 'h';
5135 7           else switch(*mstr) {
5136 0           case 'H': method='H'; break;
5137 6           case 'h': method = 'h';
5138 6 100         if( needs_hanning_calc ) {
5139             int i;
5140 2501 100         for(i=0;i
5141 2500           hanning_lookup[i] = 0.5 + 0.5 * cos(3.1415926536 / HANNING_LOOKUP_SIZE * i);
5142             }
5143 1           hanning_lookup[HANNING_LOOKUP_SIZE] = 0;
5144 1           hanning_lookup[HANNING_LOOKUP_SIZE+1] = 0;
5145 1           needs_hanning_calc = 0;
5146             }
5147 6           zeta = HANNING_LOOKUP_SIZE / blur;
5148 6           hanning_offset = (blur >= 1) ?
5149 6 50         0 :
5150 0           0.5 * (1.0 - blur);
5151 6           break;
5152              
5153 1           case 'g': case 'j': method = 'g';
5154 1           zeta = GAUSSIAN_LOOKUP_SIZE / GAUSSIAN_MAXVAL;
5155              
5156 1 50         if( needs_gaussian_calc ) {
5157             int i;
5158 4001 100         for(i=0;i
5159 4000           gaussian_lookup[i] = exp( - i * 1.386294 / zeta );
5160             }
5161 1           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE] = 0;
5162 1           gaussian_lookup[GAUSSIAN_LOOKUP_SIZE+1] = 0;
5163 1           needs_gaussian_calc = 0;
5164             }
5165 1           break;
5166              
5167 0           case 'G': case 'J': method = 'G'; break;
5168             default:
5169             {
5170             char err[80];
5171 0           sprintf(err,"Bug in map: unknown method '%c'",*mstr);
5172 0           barf("%s", err);
5173             }
5174 0           break;
5175             }
5176             }
5177              
5178              
5179              
5180             /* End of initialization */
5181             /*************************************************************/
5182             /* Start of Real Work */
5183              
5184             /* Initialize coordinate vector and map offset
5185             */
5186 21 100         for(i=0;i
5187 14           ovec[i] = 0;
5188              
5189 7           map_ptr = (PDLA_Double *)(map->data);
5190              
5191              
5192             /* Main pixel loop (iterates over pixels in the output plane) */
5193             do {
5194             PDLA_Indx psize; PDLA_Indx i_off; PDLA_Indx j; char t_vio; char carry;
5195             /* Prefrobnicate the transformation matrix */
5196 421           psize = (PDLA_Long)(blur * PDLA_xform_aux(map, ovec, tmp, sv_min) + 0.5)+1; /* assignment */
5197              
5198             #ifdef DEBUG_MAP
5199             {
5200             int k; PDLA_Indx foo = 0;
5201             printf("ovec: [");
5202             for(k=0;k
5203             foo += ovec[k] * map->dimincs[k+1];
5204             printf(" %2d ",(int)(ovec[k]));
5205             }
5206             printf("]; psize is %ld; big is %d; blur is %8.2f; map is [",psize,big, blur);
5207             for(k=0;k
5208             printf("%8.2f",(double)(((PDLA_Double *)(map->data))[foo + k*map->dimincs[0]]));
5209             }
5210             printf("]\n");
5211             }
5212             #endif
5213              
5214             /* Don't bother accumulating output if psize is too large */
5215 421 50         if(psize <= big) {
5216             /* Use the prefrobnicated matrix to generate a local linearization.
5217             * dvec gets the delta; ibvec gets the base.
5218             */
5219             {
5220 421           PDLA_Double *mp = map_ptr;
5221 1263 100         for (i=0;i
5222 842           dvec[i] = *mp - ( ibvec[i] = (PDLA_Long)(*mp + 0.5)); /* assignment */
5223 842           mp += map->dimincs[0];
5224             }
5225             }
5226              
5227             /* Initialize input delta vector */
5228 1263 100         for(i=0;i
5229 842           ivec[i] = -psize;
5230              
5231             /* Initialize accumulators */
5232             {
5233 421           PDLA_Double *ac = acc;
5234 842 100         for(i=0; i < in->dims[ndims]; i++)
5235 421           *(ac++) = 0.0;
5236              
5237             }
5238             {
5239 421           PDLA_Double *wg = wgt;
5240 842 100         for(i=0;i < in->dims[ndims]; i++)
5241 421           *(wg++) = 0.0;
5242             }
5243             {
5244 421           PDLA_Double *wg = wgt2;
5245 842 100         for(i=0;i < in->dims[ndims]; i++)
5246 421           *(wg++) = 0.0;
5247             }
5248              
5249              
5250             /*
5251             * Calculate the original offset into the data array, to enable
5252             * delta calculations in the pixel loop
5253             *
5254             * i runs over dims; j holds the working integer index in the
5255             * current dim.
5256             *
5257             * This code matches the incrementation code at the bottom of the accumulation loop
5258             */
5259            
5260 421           t_vio = 0; /* truncation-boundary violation count - don't bother if it is nonzero */
5261 421           i_off = 0;
5262 1263 100         for(i=0;i
5263 842           j = ivec[i] + ibvec[i];
5264 842 100         if(j<0 || j >= in->dims[i]) {
    100          
5265 368           switch(bounds[i]) {
5266             case 0: /* no breakage allowed */
5267 0           barf("%s","index out-of-bounds in map");
5268 0           break;
5269             case 1: /* truncation */
5270 268           t_vio++;
5271             /* fall through */
5272             case 2: /* extension -- crop */
5273 268 100         if(j<0)
5274 208           j=0;
5275 60           else j = in->dims[i] - 1;
5276 268           break;
5277             case 3: /* periodic -- mod it */
5278 50           j %= in->dims[i];
5279 50 100         if(j<0)
5280 20           j += in->dims[i];
5281 50           break;
5282             case 4: /* mirror -- reflect off the edges */
5283 50           j += in->dims[i];
5284 50           j %= (in->dims[i]*2);
5285 50 50         if(j<0)
5286 0           j += in->dims[i]*2;
5287 50           j -= in->dims[i];
5288 50 50         if(j<0) {
5289 50           j *= -1;
5290 50           j -= 1;
5291             }
5292 50           break;
5293             default:
5294 0           barf("%s", "Unknown boundary condition in map -- bug alert!");
5295 0           break;
5296             }
5297             }
5298 842           i_off += in->dimincs[i] * j;
5299             }
5300              
5301             /* Initialize index stashes for later reference as we scan the footprint */
5302             /* It's a pain in the ass to deal with boundaries, and doubly so at the */
5303             /* end of a dimensional scan. So we stash the index location at the */
5304             /* start of each dimensional scan here. When we finish incrementing */
5305             /* through a particular dim, we pull its value back out of the stash. */
5306 1263 100         for(i=0;i
5307 842           index_stash[i] = i_off;
5308             }
5309              
5310             /* The input accumulation loop is the hotspot for the entire operation. */
5311             /* We loop over pixels in the region of interest (+/- psize in each dimension) */
5312             /* in the input array, use the linearized transform to bring each pixel center */
5313             /* forward to the output plane, and calculate a weighting based on the chosen */
5314             /* filter function. 'h' is a fast Hanning window rolloff using a lookup */
5315             /* table that is initialized the first time through the code. 'H' is the */
5316             /* same process, but explicitly calculated for each interation (~2x slower). */
5317             /* 'g' uses a radial Gaussian filter. Rather than calculate the array offset */
5318             /* into the input array fresh from the current input array vector each time, */
5319             /* we walk through the array using dimincs and the old offset. This saves */
5320             /* about half of the time spent on index calculation. */
5321              
5322             do { /* Input accumulation loop */
5323             PDLA_Double *cp;
5324             PDLA_Double alpha;
5325             /* Calculate the weight of the current input point. Don't bother if we're
5326             * violating any truncation boundaries (in that case our value is zero, but
5327             * for the interpolation we also set the weight to zero).
5328             */
5329 11701 100         if( !t_vio ) {
5330              
5331 7182           PDLA_Double *ap = tvec;
5332 7182           PDLA_Double *bp = dvec;
5333 7182           PDLA_Indx *ip = ivec;
5334 21546 100         for(i=0; i
5335 14364           *(ap++) = *(ip++) - *(bp++);
5336              
5337 7182           switch(method) {
5338             PDLA_Double dd;
5339             case 'h':
5340             /* This is the Hanning window rolloff. It is a product of a simple */
5341             /* cos^2(theta) rolloff in each dimension. Using a lookup table */
5342             /* is about 2x faster than using cos(theta) directly in each */
5343             /* weighting calculation, so we do. Using 2500 entries and linear */
5344             /* interpolation is accurate to about 10^-7, and should preserve */
5345             /* the contents of cache pretty well. */
5346 6341           alpha = 1;
5347 6341           cp = tmp;
5348 14694 100         for(i=0; i
5349             int lodex;
5350             int hidex;
5351             PDLA_Double beta;
5352 8353           dd = 0;
5353 8353           ap = tvec;
5354             /* Get the matrix-multiply element for this dimension */
5355 25059 100         for(j=0;j
5356 16706           dd += *(ap++) * *(cp++);
5357              
5358             /* Do linear interpolation from the table */
5359             /* The table captures a hanning window centered 0.5 pixel from center. */
5360             /* We scale the filter by the blur parameter -- but if blur is less */
5361             /* than unity, we shrink the hanning blur window while keeping the 0.5 */
5362             /* value on the pixel edge at 0.5. For blur greater than unity, we */
5363             /* scale simply. */
5364 8353           beta = fabs(dd) - hanning_offset;
5365 8353 100         if(beta > 0) {
5366 6875 100         if(beta >= blur) {
5367 5707           alpha = 0;
5368 5707           i = ndims;
5369             } else {
5370 1168           beta *= zeta;
5371 1168           lodex = beta;
5372 1168 50         beta -= lodex; if(lodex > HANNING_LOOKUP_SIZE)
5373 0           lodex = HANNING_LOOKUP_SIZE;
5374 1168           hidex = lodex+1;
5375 1168           alpha *= hanning_lookup[hidex]*beta + hanning_lookup[lodex]*(1-beta);
5376             } /* end of interpolation branch */
5377             } /* end of beta > 0 branch */
5378             } /* end of dimension loop */
5379 6341           break;
5380              
5381             case 'H':
5382             /* This is the Hanning window rolloff with explicit calculation, preserved */
5383             /* in case someone actually wants the slower longer method. */
5384 0           alpha = 1;
5385 0           cp = tmp;
5386 0 0         for(i=0; i
5387 0           dd = 0;
5388 0           ap = tvec;
5389 0 0         for(j=0;j
5390 0           dd += *(ap++) * *(cp++);
5391 0           dd = (fabs(dd) - hanning_offset) / blur;
5392 0 0         if( dd > 1 ) {
5393 0           alpha = 0;
5394 0           i = ndims;
5395             } else
5396 0           alpha *= (0.5 + 0.5 * cos( dd * 3.1415926536 ));
5397             }
5398 0           break;
5399              
5400             case 'g':
5401             /* This is the Gaussian rolloff. It does lookup into a precalculated exponential. */
5402             {
5403 841           PDLA_Double sum = 0;
5404 841           cp = tmp;
5405 2523 100         for(i=0; i
5406 1682           dd = 0;
5407 1682           ap = tvec;
5408 5046 100         for(j=0;j
5409 3364           dd += *(ap++) * *(cp++);
5410 1682           dd /= blur;
5411 1682           sum += dd * dd;
5412 1682 100         if(sum > GAUSSIAN_MAXVAL) {
5413 100           i = ndims; /* exit early if we're too far out */
5414 100           alpha = 0;
5415             }
5416             }
5417 841 100         if( sum > GAUSSIAN_MAXVAL || !isfinite(sum) || isnan(sum) ) {
    50          
    50          
5418 100           alpha = 0;
5419             } else {
5420             int lodex,hidex;
5421 741           PDLA_Double beta = fabs(zeta * sum);
5422              
5423 741           lodex = beta;
5424 741           beta -= lodex; hidex = lodex+1;
5425 741           alpha = gaussian_lookup[hidex]*beta + gaussian_lookup[lodex]*(1 - beta);
5426              
5427             }
5428             }
5429 841           break;
5430              
5431             case 'G':
5432             /* This is the Gaussian rolloff with explicit calculation, preserved */
5433             /* in case someone actually wants the slower longer method. */
5434             {
5435 0           PDLA_Double sum = 0;
5436 0           cp = tmp;
5437 0 0         for(i=0; i
5438 0           dd = 0;
5439 0           ap = tvec;
5440 0 0         for(j=0;j
5441 0           dd += *(ap++) * *(cp++);
5442 0           dd /= blur;
5443 0           sum += dd * dd;
5444 0 0         if(sum > 4) /* 2 pixels -- four half-widths */
5445 0           i = ndims; /* exit early if this pixel is too far outside the footprint of the ideal point */
5446             }
5447              
5448 0 0         if(sum > GAUSSIAN_MAXVAL)
5449 0           alpha = 0;
5450             else
5451 0           alpha = exp(-sum * 1.386294); /* Gaussian, rt(2)-pix HWHM */
5452             }
5453 0           break;
5454             default:
5455             {
5456             char buf[80];
5457 0           sprintf(buf,"This can't happen: method='%c'",method);
5458 0           barf("%s", buf);
5459             }
5460             }
5461              
5462             { /* convenience block -- accumulate the current point into the weighted sum. */
5463             /* This is more than simple assignment because we have our own explicit poor */
5464             /* man's threadloop here, so we accumulate each threaded element separately. */
5465 7182           PDLA_Double *dat = ((PDLA_Double *)(in->data)) + i_off;
5466 7182           PDLA_Indx max = out->dims[ndims];
5467 14364 100         for( i=0; i < max; i++ ) {
5468 7182 100         if( (badval==0) || (*dat != badval) ) {
    50          
5469 7182           acc[i] += *dat * alpha;
5470 7182           dat += in->dimincs[ndims];
5471 7182           wgt[i] += alpha;
5472             }
5473 7182           wgt2[i] += alpha; }
5474             }
5475             } /* end of t_vio check (i.e. of input accumulation) */
5476              
5477              
5478             /* Advance input accumulation loop. */
5479             /* We both increment the total vector and also advance the index. */
5480 11701           carry = 1;
5481 25605 100         for(i=0; i
    100          
5482             /* Advance the current element of the offset vector */
5483 13904           ivec[i]++;
5484 13904           j = ivec[i] + ibvec[i];
5485              
5486             /* Advance the offset into the data array */
5487 13904 100         if( j > 0 && j <= in->dims[i]-1 ) {
    100          
5488             /* Normal case -- just advance the input vector */
5489 7651           i_off += in->dimincs[i];
5490             } else {
5491             /* Busted a boundary - either before or after. */
5492 6253           switch(bounds[i]){
5493             case 0: /* no breakage allowed -- treat as truncation for interpolation */
5494             case 1: /* truncation -- if we crossed the boundary mark ourselves out-of-bounds */
5495 4273 100         if( j == 0 )
5496 642           t_vio--;
5497 3631 100         else if( j == in->dims[i] )
5498 950           t_vio++;
5499 4273           break;
5500             case 2: /* extension -- do nothing (so the same input point is re-used) */
5501 0           break;
5502             case 3: /* periodic -- advance and mod into the allowed range */
5503 330 100         if((j % in->dims[i]) == 0) {
5504 100           i_off -= in->dimincs[i] * (in->dims[i]-1);
5505             } else {
5506 230           i_off += in->dimincs[i];
5507             }
5508 330           break;
5509             case 4: /* mirror -- advance or retreat depending on phase */
5510 1650           j += in->dims[i];
5511 1650           j %= (in->dims[i]*2);
5512 1650           j -= in->dims[i];
5513 1650 100         if( j!=0 && j!= -in->dims[i] ) {
    100          
5514 1150 100         if(j<0)
5515 1000           i_off -= in->dimincs[i];
5516             else
5517 150           i_off += in->dimincs[i];
5518             }
5519 1650           break;
5520             }
5521             }
5522              
5523             /* Now check for carry */
5524 13904 100         if(ivec[i] <= psize) {
5525             /* Normal case -- copy the current offset to the faster-running dim stashes */
5526             int k;
5527 13062 100         for(k=0;k
5528 1782           index_stash[k] = i_off;
5529             }
5530 11280           carry = 0;
5531              
5532             } else { /* End of this scan -- recover the last position, and mark carry */
5533 2624           i_off = index_stash[i];
5534 2624 100         if(bounds[i]==1) {
5535 2024           j = ivec[i] + ibvec[i];
5536 2024 50         if( j < 0 || j >= in->dims[i] )
    100          
5537 1130           t_vio--;
5538 2024           ivec[i] = -psize;
5539 2024           j = ivec[i] + ibvec[i];
5540 2024 100         if( j < 0 || j >= in->dims[i] )
    100          
5541 822           t_vio++;
5542 2024           carry = 1;
5543             } else {
5544 600           ivec[i] = -psize;
5545             }
5546             }
5547             } /* End of counter-advance loop */
5548 11701 100         } while(carry==0); /* end of total data accumulation loop (termination condition has carry on last dim) */
5549              
5550             {
5551 421           PDLA_Double *ac = acc;
5552 421           PDLA_Double *wg = wgt;
5553 421           PDLA_Double *wg2 = wgt2;
5554 421           PDLA_Double *dat = out->data;
5555              
5556             /* Calculate output vector offset */
5557 1263 100         for(i=0;i
5558 842           dat += out->dimincs[i] * ovec[i];
5559              
5560 421 50         if(!flux) {
5561             /* Flux flag is NOT set -- normal case. Copy the weighted accumulated data. */
5562 842 100         for(i=0; i < out->dims[ndims]; i++) {
5563 421 100         if(*wg && (*wg2 / *wg) < 1.5 ) {
    50          
5564 326           *dat = *(ac++) / *(wg++);
5565 326           wg2++;
5566             } else {
5567 95           *dat = badval;
5568 95           ac++; wg++; wg2++;
5569             }
5570 421           dat += out->dimincs[ndims];
5571             }
5572             } else {
5573             /* Flux flag is set - scale by the (unpadded) determinant of the Jacobian */
5574 0           PDLA_Double det = tmp[ndims*ndims];
5575 421 0         for(i=0; i < out->dims[ndims]; i++) {
5576 0 0         if(*wg && (*wg2 / *wg) < 1.5 ) {
    0          
5577 0           *dat = *(ac++) / *(wg++) * det;
5578 0           wg2++;
5579             } else {
5580 0           *dat = badval;
5581 0           ac++; wg++; wg2++;
5582             }
5583 0           dat += out->dimincs[ndims];
5584             } /* end of for loop */
5585             } /* end of flux flag set conditional */
5586             } /* end of convenience block */
5587            
5588             /* End of code for normal pixels */
5589             } else {
5590             /* The pixel was ludicrously huge -- just set this pixel to nan */
5591 0           PDLA_Double *dat = out->data;
5592 0 0         for(i=0;i
5593 0           dat += out->dimincs[i] * ovec[i];
5594 0 0         for(i=0;idims[ndims];i++) {
5595 0           *dat = badval; /* Should handle bad values too -- not yet */
5596 0           dat += out->dimincs[ndims];
5597             }
5598             }
5599              
5600             /* Increment the pixel counter */
5601             {
5602 481 100         for(i=0;
5603 474 50         (i
5604 474 100         (map_ptr += map->dimincs[i+1]) && /* Funky pre-test increment */
5605 948           (++(ovec[i]) >= out->dims[i]); /* Actual carry test */
5606 60           i++) {
5607 60           ovec[i] = 0;
5608 60           map_ptr -= out->dims[i] * map->dimincs[i+1];
5609             }
5610             }
5611 421 100         } while(i
5612              
5613              
5614              
5615             }
5616             PDLA_COMMENT("THREADLOOPEND")
5617             }
5618             }
5619 7           k0_datap -= __tinc1_0 * __tdims1 + __offsp[0];
5620 7 50         } while(PDLA->iterthreadloop(&__privtrans->__pdlthread,2)); break;}
5621 0           default:barf("PP INTERNAL ERROR! PLEASE MAKE A BUG REPORT\n");}
5622             }
5623             }
5624             }
5625            
5626              
5627              
5628              
5629 12           void pdl_map_free(pdl_trans *__tr ) {
5630             int __dim;
5631 12           pdl_map_struct *__privtrans = (pdl_map_struct *) __tr;
5632            
5633             {
5634            
5635 12           PDLA_TR_CLRMAGIC(__privtrans);
5636 12           SvREFCNT_dec(__privtrans->in);;SvREFCNT_dec(__privtrans->out);;SvREFCNT_dec(__privtrans->map);;SvREFCNT_dec(__privtrans->boundary);;SvREFCNT_dec(__privtrans->method);;SvREFCNT_dec(__privtrans->big);;SvREFCNT_dec(__privtrans->blur);;SvREFCNT_dec(__privtrans->sv_min);;SvREFCNT_dec(__privtrans->flux);;SvREFCNT_dec(__privtrans->bv);;
5637 12 50         if(__privtrans->__ddone) {
5638 12           PDLA->freethreadloop(&(__privtrans->__pdlthread));
5639             ;
5640             }
5641            
5642             }
5643 12           }
5644            
5645              
5646              
5647              
5648             static char pdl_map_vtable_flags[] =
5649             { PDLA_TPDLA_VAFFINE_OK};
5650             pdl_transvtable pdl_map_vtable = {
5651             0,0, 1, 1, pdl_map_vtable_flags,
5652             pdl_map_redodims, pdl_map_readdata, NULL,
5653             pdl_map_free,NULL,NULL,pdl_map_copy,
5654             sizeof(pdl_map_struct),"pdl_map_vtable"
5655             };
5656              
5657              
5658              
5659             MODULE = PDLA::Transform PACKAGE = PDLA::Transform
5660              
5661             PROTOTYPES: ENABLE
5662              
5663             int
5664             set_debugging(i)
5665             int i;
5666             CODE:
5667 0           RETVAL = __pdl_debugging;
5668 0           __pdl_debugging = i;
5669             OUTPUT:
5670             RETVAL
5671              
5672             int
5673             set_boundscheck(i)
5674             int i;
5675             CODE:
5676             if (! 1)
5677             warn("Bounds checking is disabled for PDLA::Transform");
5678 0           RETVAL = __pdl_boundscheck;
5679 0           __pdl_boundscheck = i;
5680             OUTPUT:
5681             RETVAL
5682              
5683              
5684             MODULE = PDLA::Transform PACKAGE = PDLA
5685              
5686              
5687             void
5688             _map_int(k0,in,out,map,boundary,method,big,blur,sv_min,flux,bv)
5689             pdl *k0
5690             SV *in
5691             SV *out
5692             SV *map
5693             SV *boundary
5694             SV *method
5695             SV *big
5696             SV *blur
5697             SV *sv_min
5698             SV *flux
5699             SV *bv
5700             CODE:
5701             { pdl_map_struct *__privtrans;
5702 12           int badflag_cache = 0;
5703 12           __privtrans = malloc(sizeof(*__privtrans));
5704 12           PDLA_THR_CLRMAGIC(&__privtrans->__pdlthread);
5705 12           PDLA_TR_SETMAGIC(__privtrans);
5706 12           __privtrans->flags = 0;
5707 12           __privtrans->__ddone = 0;
5708 12           __privtrans->vtable = &pdl_map_vtable;
5709 12           __privtrans->freeproc = PDLA->trans_mallocfreeproc;
5710 12           __privtrans->bvalflag = 0;
5711 12           badflag_cache = ((k0->state & PDLA_BADVAL) > 0);
5712 12 100         if (badflag_cache) __privtrans->bvalflag = 1;
5713 12 50         __privtrans->__datatype = 0;if(__privtrans->__datatype < k0->datatype) {
5714 12           __privtrans->__datatype = k0->datatype;
5715             }
5716 12 50         if(__privtrans->__datatype == PDLA_B) {}
5717 12 50         else if(__privtrans->__datatype == PDLA_S) {}
5718 12 50         else if(__privtrans->__datatype == PDLA_U) {}
5719 12 100         else if(__privtrans->__datatype == PDLA_L) {}
5720 7 50         else if(__privtrans->__datatype == PDLA_N) {}
5721 7 50         else if(__privtrans->__datatype == PDLA_Q) {}
5722 7 50         else if(__privtrans->__datatype == PDLA_F) {}
5723 7 50         else if(__privtrans->__datatype == PDLA_D) {}
5724 0           else __privtrans->__datatype = PDLA_D;
5725 12 50         if(__privtrans->__datatype != k0->datatype) {
5726 0           k0 = PDLA->get_convertedpdl(k0,__privtrans->__datatype);
5727 12           }{(__privtrans->in) = newSVsv(in);(__privtrans->out) = newSVsv(out);(__privtrans->map) = newSVsv(map);(__privtrans->boundary) = newSVsv(boundary);(__privtrans->method) = newSVsv(method);(__privtrans->big) = newSVsv(big);(__privtrans->blur) = newSVsv(blur);(__privtrans->sv_min) = newSVsv(sv_min);(__privtrans->flux) = newSVsv(flux);(__privtrans->bv) = newSVsv(bv);}PDLA_COMMENT("No flow")__privtrans->__pdlthread.inds = 0;__privtrans->pdls[0] = k0;
5728 12           PDLA->make_trans_mutual((pdl_trans *)__privtrans);
5729             if (badflag_cache) {
5730             }
5731 12           XSRETURN(0);
5732             }
5733              
5734              
5735             BOOT:
5736              
5737             PDLA_COMMENT("Get pointer to structure of core shared C routines")
5738             PDLA_COMMENT("make sure PDLA::Core is loaded")
5739            
5740 1           perl_require_pv ("PDLA/Core.pm"); /* make sure PDLA::Core is loaded */
5741             #ifndef aTHX_
5742             #define aTHX_
5743             #endif
5744 1 50         if (SvTRUE (ERRSV)) Perl_croak(aTHX_ "%s",SvPV_nolen (ERRSV));
    50          
    50          
    50          
    0          
    50          
    50          
    0          
    0          
    0          
    0          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    50          
    0          
    0          
    0          
    0          
5745 1           CoreSV = perl_get_sv("PDLA::SHARE",FALSE); /* SV* value */
5746 1 50         if (CoreSV==NULL)
5747 0           Perl_croak(aTHX_ "We require the PDLA::Core module, which was not found");
5748 1 50         PDLA = INT2PTR(Core*,SvIV( CoreSV )); /* Core* value */
5749 1 50         if (PDLA->Version != PDLA_CORE_VERSION)
5750 0           Perl_croak(aTHX_ "[PDLA->Version: %d PDLA_CORE_VERSION: %d XS_VERSION: %s] PDLA::Transform needs to be recompiled against the newly installed PDLA", PDLA->Version, PDLA_CORE_VERSION, XS_VERSION);
5751              
5752              
5753