Blender: node_composite_defocus.c Source File

Blender V2.61 - r43446
00001 /*
00002  * ***** BEGIN GPL LICENSE BLOCK *****
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License
00006  * as published by the Free Software Foundation; either version 2
00007  * of the License, or (at your option) any later version. 
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software Foundation,
00016  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  *
00018  * The Original Code is Copyright (C) 2006 Blender Foundation.
00019  * All rights reserved.
00020  *
00021  * The Original Code is: all of this file.
00022  *
00023  * Contributor(s): none yet.
00024  *
00025  * ***** END GPL LICENSE BLOCK *****
00026  */
00027 
00033 #include "node_composite_util.h"
00034 
00035 /* ************ qdn: Defocus node ****************** */
00036 static bNodeSocketTemplate cmp_node_defocus_in[]= {
00037     {   SOCK_RGBA, 1, "Image",          1.0f, 1.0f, 1.0f, 1.0f},
00038     {   SOCK_FLOAT, 1, "Z",         1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, PROP_FACTOR},
00039     {   -1, 0, ""   }
00040 };
00041 static bNodeSocketTemplate cmp_node_defocus_out[]= {
00042     {   SOCK_RGBA, 0, "Image"},
00043     {   -1, 0, ""   }
00044 };
00045 
00046 
00047 // line coefs for point sampling & scancon. data.
00048 typedef struct BokehCoeffs {
00049     float x0, y0, dx, dy;
00050     float ls_x, ls_y;
00051     float min_x, min_y, max_x, max_y;
00052 } BokehCoeffs;
00053 
00054 // returns array of BokehCoeffs
00055 // returns length of array in 'len_bkh',
00056 // radius squared of inscribed disk in 'inradsq', needed in getWeight() test,
00057 // BKH[8] is the data returned for the bokeh shape & bkh_b[4] is it's 2d bound
00058 static void makeBokeh(char bktype, char ro, int* len_bkh, float* inradsq, BokehCoeffs BKH[8], float bkh_b[4])
00059 {
00060     float x0, x1, y0, y1, dx, dy, iDxy;
00061     /* ro now is in radians. */
00062     float w = MAX2(1e-6f, ro);  // never reported stangely enough, but a zero offset causes missing center line...
00063     float wi = DEG2RADF(360.f/bktype);
00064     int i, ov, nv;
00065     
00066     // bktype must be at least 3 & <= 8
00067     bktype = (bktype<3) ? 3 : ((bktype>8) ? 8 : bktype);
00068     *len_bkh = bktype;
00069     *inradsq = -1.f;
00070 
00071     for (i=0; i<(*len_bkh); i++) {
00072         x0 = cos(w);
00073         y0 = sin(w);
00074         w += wi;
00075         x1 = cos(w);
00076         y1 = sin(w);
00077         if ((*inradsq)<0.f) {
00078             // radius squared of inscribed disk
00079             float idx=(x0+x1)*0.5f, idy=(y0+y1)*0.5f;
00080             *inradsq = idx*idx + idy*idy;
00081         }
00082         BKH[i].x0 = x0;
00083         BKH[i].y0 = y0;
00084         dx = x1-x0, dy = y1-y0;
00085         iDxy = 1.f / sqrtf(dx*dx + dy*dy);
00086         dx *= iDxy;
00087         dy *= iDxy;
00088         BKH[i].dx = dx;
00089         BKH[i].dy = dy;
00090     }
00091 
00092     // precalc scanconversion data
00093     // bokeh bound, not transformed, for scanconvert
00094     bkh_b[0] = bkh_b[2] = 1e10f;    // xmin/ymin
00095     bkh_b[1] = bkh_b[3] = -1e10f;   // xmax/ymax
00096     ov = (*len_bkh) - 1;
00097     for (nv=0; nv<(*len_bkh); nv++) {
00098         bkh_b[0] = MIN2(bkh_b[0], BKH[nv].x0);  // xmin
00099         bkh_b[1] = MAX2(bkh_b[1], BKH[nv].x0);  // xmax
00100         bkh_b[2] = MIN2(bkh_b[2], BKH[nv].y0);  // ymin
00101         bkh_b[3] = MAX2(bkh_b[3], BKH[nv].y0);  // ymax
00102         BKH[nv].min_x = MIN2(BKH[ov].x0, BKH[nv].x0);
00103         BKH[nv].max_x = MAX2(BKH[ov].x0, BKH[nv].x0);
00104         BKH[nv].min_y = MIN2(BKH[ov].y0, BKH[nv].y0);
00105         BKH[nv].max_y = MAX2(BKH[ov].y0, BKH[nv].y0);
00106         dy = BKH[nv].y0 - BKH[ov].y0;
00107         BKH[nv].ls_x = (BKH[nv].x0 - BKH[ov].x0) / ((dy==0.f) ? 1.f : dy);
00108         BKH[nv].ls_y = (BKH[nv].ls_x==0.f) ? 1.f : (1.f/BKH[nv].ls_x);
00109         ov = nv;
00110     }
00111 }
00112 
00113 // test if u/v inside shape & returns weight value
00114 static float getWeight(BokehCoeffs* BKH, int len_bkh, float u, float v, float rad, float inradsq)
00115 {
00116     BokehCoeffs* bc = BKH;
00117     float cdist, irad = (rad==0.f) ? 1.f : (1.f/rad);
00118     u *= irad;
00119     v *= irad;
00120  
00121     // early out test1: if point outside outer unit disk, it cannot be inside shape
00122     cdist = u*u + v*v;
00123     if (cdist>1.f) return 0.f;
00124     
00125     // early out test2: if point inside or on inner disk, point must be inside shape
00126     if (cdist<=inradsq) return 1.f;
00127     
00128     while (len_bkh--) {
00129         if ((bc->dy*(u - bc->x0) - bc->dx*(v - bc->y0)) > 0.f) return 0.f;
00130         bc++;
00131     }
00132     return 1.f;
00133 }
00134 
00135 // QMC.seq. for sampling, A.Keller, EMS
00136 static float RI_vdC(unsigned int bits, unsigned int r)
00137 {
00138     bits = ( bits << 16) | ( bits >> 16);
00139     bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
00140     bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
00141     bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
00142     bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
00143     bits ^= r;
00144     return (float)((double)bits / 4294967296.0);
00145 }
00146 
00147 // single channel IIR gaussian filtering
00148 // much faster than anything else, constant time independent of width
00149 // should extend to multichannel and make this a node, could be useful
00150 // note: this is an almost exact copy of 'IIR_gauss'
00151 static void IIR_gauss_single(CompBuf* buf, float sigma)
00152 {
00153     double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
00154     float *X, *Y, *W;
00155     int i, x, y, sz;
00156 
00157     // single channel only for now
00158     if (buf->type != CB_VAL) return;
00159 
00160     // <0.5 not valid, though can have a possibly useful sort of sharpening effect
00161     if (sigma < 0.5f) return;
00162     
00163     // see "Recursive Gabor Filtering" by Young/VanVliet
00164     // all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
00165     if (sigma >= 3.556f)
00166         q = 0.9804f*(sigma - 3.556f) + 2.5091f;
00167     else // sigma >= 0.5
00168         q = (0.0561f*sigma + 0.5784f)*sigma - 0.2568f;
00169     q2 = q*q;
00170     sc = (1.1668 + q)*(3.203729649  + (2.21566 + q)*q);
00171     // no gabor filtering here, so no complex multiplies, just the regular coefs.
00172     // all negated here, so as not to have to recalc Triggs/Sdika matrix
00173     cf[1] = q*(5.788961737 + (6.76492 + 3.0*q)*q)/ sc;
00174     cf[2] = -q2*(3.38246 + 3.0*q)/sc;
00175     // 0 & 3 unchanged
00176     cf[3] = q2*q/sc;
00177     cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
00178 
00179     // Triggs/Sdika border corrections,
00180     // it seems to work, not entirely sure if it is actually totally correct,
00181     // Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark),
00182     // found one other implementation by Cristoph Lampert,
00183     // but neither seem to be quite the same, result seems to be ok sofar anyway.
00184     // Extra scale factor here to not have to do it in filter,
00185     // though maybe this had something to with the precision errors
00186     sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3]));
00187     tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]);
00188     tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1]));
00189     tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
00190     tsM[3] = sc*(cf[1] + cf[3]*cf[2]);
00191     tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1]));
00192     tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]);
00193     tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]);
00194     tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]);
00195     tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
00196 
00197 #define YVV(L)\
00198 {\
00199     W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0];\
00200     W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0];\
00201     W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0];\
00202     for (i=3; i<L; i++)\
00203         W[i] = cf[0]*X[i] + cf[1]*W[i-1] + cf[2]*W[i-2] + cf[3]*W[i-3];\
00204     tsu[0] = W[L-1] - X[L-1];\
00205     tsu[1] = W[L-2] - X[L-1];\
00206     tsu[2] = W[L-3] - X[L-1];\
00207     tsv[0] = tsM[0]*tsu[0] + tsM[1]*tsu[1] + tsM[2]*tsu[2] + X[L-1];\
00208     tsv[1] = tsM[3]*tsu[0] + tsM[4]*tsu[1] + tsM[5]*tsu[2] + X[L-1];\
00209     tsv[2] = tsM[6]*tsu[0] + tsM[7]*tsu[1] + tsM[8]*tsu[2] + X[L-1];\
00210     Y[L-1] = cf[0]*W[L-1] + cf[1]*tsv[0] + cf[2]*tsv[1] + cf[3]*tsv[2];\
00211     Y[L-2] = cf[0]*W[L-2] + cf[1]*Y[L-1] + cf[2]*tsv[0] + cf[3]*tsv[1];\
00212     Y[L-3] = cf[0]*W[L-3] + cf[1]*Y[L-2] + cf[2]*Y[L-1] + cf[3]*tsv[0];\
00213     for (i=L-4; i>=0; i--)\
00214         Y[i] = cf[0]*W[i] + cf[1]*Y[i+1] + cf[2]*Y[i+2] + cf[3]*Y[i+3];\
00215 }
00216 
00217     // intermediate buffers
00218     sz = MAX2(buf->x, buf->y);
00219     Y = MEM_callocN(sz*sizeof(float), "IIR_gauss Y buf");
00220     W = MEM_callocN(sz*sizeof(float), "IIR_gauss W buf");
00221     // H
00222     for (y=0; y<buf->y; y++) {
00223         X = &buf->rect[y*buf->x];
00224         YVV(buf->x);
00225         memcpy(X, Y, sizeof(float)*buf->x);
00226     }
00227     // V
00228     X = MEM_callocN(buf->y*sizeof(float), "IIR_gauss X buf");
00229     for (x=0; x<buf->x; x++) {
00230         for (y=0; y<buf->y; y++)
00231             X[y] = buf->rect[x + y*buf->x];
00232         YVV(buf->y);
00233         for (y=0; y<buf->y; y++)
00234             buf->rect[x + y*buf->x] = Y[y];
00235     }
00236     MEM_freeN(X);
00237 
00238     MEM_freeN(W);
00239     MEM_freeN(Y);
00240 #undef YVV
00241 }
00242 
00243 static void defocus_blur(bNode *node, CompBuf *new, CompBuf *img, CompBuf *zbuf, float inpval, int no_zbuf)
00244 {
00245     NodeDefocus *nqd = node->storage;
00246     CompBuf *wts;       // weights buffer
00247     CompBuf *crad;      // CoC radius buffer
00248     BokehCoeffs BKH[8]; // bokeh shape data, here never > 8 pts.
00249     float bkh_b[4] = {0};   // shape 2D bound
00250     float cam_fdist=1, cam_invfdist=1, cam_lens=35;
00251     float dof_sp, maxfgc, bk_hn_theta=0, inradsq=0;
00252     int y, len_bkh=0, ydone=0;
00253     float aspect, aperture;
00254     int minsz;
00255     //float bcrad, nmaxc, scf;
00256     
00257     // get some required params from the current scene camera
00258     // (ton) this is wrong, needs fixed
00259     Scene *scene= (Scene*)node->id;
00260     Object* camob = (scene)? scene->camera: NULL;
00261     if (camob && camob->type==OB_CAMERA) {
00262         Camera* cam = (Camera*)camob->data;
00263         cam_lens = cam->lens;
00264         cam_fdist = object_camera_dof_distance(camob);
00265         if (cam_fdist==0.0f) cam_fdist = 1e10f; /* if the dof is 0.0 then set it be be far away */
00266         cam_invfdist = 1.f/cam_fdist;
00267     }
00268 
00269     // guess work here.. best match with raytraced result
00270     minsz = MIN2(img->x, img->y);
00271     dof_sp = (float)minsz / (16.f / cam_lens);  // <- == aspect * MIN2(img->x, img->y) / tan(0.5f * fov);
00272     
00273     // aperture
00274     aspect = (img->x > img->y) ? (img->y / (float)img->x) : (img->x / (float)img->y);
00275     aperture = 0.5f*(cam_lens / (aspect*32.f)) / nqd->fstop;
00276     
00277     // if not disk, make bokeh coefficients and other needed data
00278     if (nqd->bktype!=0) {
00279         makeBokeh(nqd->bktype, nqd->rotation, &len_bkh, &inradsq, BKH, bkh_b);
00280         bk_hn_theta = 0.5 * nqd->bktype * sin(2.0 * M_PI / nqd->bktype);    // weight factor
00281     }
00282     
00283     // accumulated weights
00284     wts = alloc_compbuf(img->x, img->y, CB_VAL, 1);
00285     // CoC radius buffer
00286     crad = alloc_compbuf(img->x, img->y, CB_VAL, 1);
00287 
00288     // if 'no_zbuf' flag set (which is always set if input is not an image),
00289     // values are instead interpreted directly as blur radius values
00290     if (no_zbuf) {
00291         // to prevent *reaaallly* big radius values and impossible calculation times,
00292         // limit the maximum to half the image width or height, whichever is smaller
00293         float maxr = 0.5f*(float)MIN2(img->x, img->y);
00294         unsigned int p;
00295 
00296         for (p=0; p<(unsigned int)(img->x*img->y); p++) {
00297             crad->rect[p] = zbuf ? (zbuf->rect[p]*nqd->scale) : inpval;
00298             // bug #5921, limit minimum
00299             crad->rect[p] = MAX2(1e-5f, crad->rect[p]);
00300             crad->rect[p] = MIN2(crad->rect[p], maxr);
00301             // if maxblur!=0, limit maximum
00302             if (nqd->maxblur != 0.f) crad->rect[p] = MIN2(crad->rect[p], nqd->maxblur);
00303         }
00304     }
00305     else {
00306         float wt;
00307 
00308         // actual zbuffer.
00309         // separate foreground from background CoC's
00310         // then blur background and blend in again with foreground,
00311         // improves the 'blurred foreground overlapping in-focus midground' sharp boundary problem.
00312         // wts buffer here used for blendmask
00313         maxfgc = 0.f; // maximum foreground CoC radius
00314         for (y=0; y<img->y; y++) {
00315             unsigned int p = y * img->x;
00316             int x;
00317             for (x=0; x<img->x; x++) {
00318                 unsigned int px = p + x;
00319                 float iZ = (zbuf->rect[px]==0.f) ? 0.f : (1.f/zbuf->rect[px]);
00320                 crad->rect[px] = 0.5f*(aperture*(dof_sp*(cam_invfdist - iZ) - 1.f));
00321                 if (crad->rect[px] <= 0.f) {
00322                     wts->rect[px] = 1.f;
00323                     crad->rect[px] = -crad->rect[px];
00324                     if (crad->rect[px] > maxfgc) maxfgc = crad->rect[px];
00325                 }
00326                 else crad->rect[px] = wts->rect[px] = 0;
00327             }
00328         }
00329         
00330         // fast blur...
00331         // bug #6656 part 1, probably when previous node_composite.c was split into separate files, it was not properly updated
00332         // to include recent cvs commits (well, at least not defocus node), so this part was missing...
00333         wt = aperture*128.f;
00334         IIR_gauss_single(crad, wt);
00335         IIR_gauss_single(wts, wt);
00336         
00337         // bug #6656 part 2a, although foreground blur is not based anymore on closest object,
00338         // the rescaling op below was still based on that anyway, and unlike the comment in below code,
00339         // the difference is therefore not always that small at all...
00340         // so for now commented out, not sure if this is going to cause other future problems, lets just wait and see...
00341         /*
00342         // find new maximum to scale it back to original
00343         // (could skip this, not strictly necessary, in general, difference is quite small, but just in case...)
00344         nmaxc = 0;
00345         for (p=0; p<(img->x*img->y); p++)
00346             if (crad->rect[p] > nmaxc) nmaxc = crad->rect[p];
00347         // rescale factor
00348         scf = (nmaxc==0.f) ? 1.f: (maxfgc / nmaxc);
00349         */
00350 
00351         // and blend...
00352         for (y=0; y<img->y; y++) {
00353             unsigned int p = y*img->x;
00354             int x;
00355 
00356             for (x=0; x<img->x; x++) {
00357                 unsigned px = p + x;
00358                 if (zbuf->rect[px]!=0.f) {
00359                     float iZ = (zbuf->rect[px]==0.f) ? 0.f : (1.f/zbuf->rect[px]);
00360                     
00361                     // bug #6656 part 2b, do not rescale
00362                     /*
00363                     bcrad = 0.5f*fabs(aperture*(dof_sp*(cam_invfdist - iZ) - 1.f));
00364                     // scale crad back to original maximum and blend
00365                     crad->rect[px] = bcrad + wts->rect[px]*(scf*crad->rect[px] - bcrad);
00366                     */
00367                     crad->rect[px] = 0.5f*fabsf(aperture*(dof_sp*(cam_invfdist - iZ) - 1.f));
00368                     
00369                     // 'bug' #6615, limit minimum radius to 1 pixel, not really a solution, but somewhat mitigates the problem
00370                     crad->rect[px] = MAX2(crad->rect[px], 0.5f);
00371                     // if maxblur!=0, limit maximum
00372                     if (nqd->maxblur != 0.f) crad->rect[px] = MIN2(crad->rect[px], nqd->maxblur);
00373                 }
00374                 else crad->rect[px] = 0.f;
00375                 // clear weights for next part
00376                 wts->rect[px] = 0.f;
00377             }
00378             // esc set by main calling process
00379             if(node->exec & NODE_BREAK)
00380                 break;
00381         }
00382     }
00383 
00384     //------------------------------------------------------------------
00385     // main loop
00386 #ifndef __APPLE__ /* can crash on Mac, see bug #22856, disabled for now */
00387 #ifdef __INTEL_COMPILER /* icc doesn't like the compound statement -- internal error: 0_1506 */
00388     #pragma omp parallel for private(y) if(!nqd->preview) schedule(guided)
00389 #else
00390     #pragma omp parallel for private(y) if(!nqd->preview && img->y*img->x > 16384) schedule(guided)
00391 #endif
00392 #endif
00393     for (y=0; y<img->y; y++) {
00394         unsigned int p, p4, zp, cp, cp4;
00395         float *ctcol, u, v, ct_crad, cR2=0;
00396         int x, sx, sy;
00397 
00398         // some sort of visual feedback would be nice, or at least this text in the renderwin header
00399         // but for now just print some info in the console every 8 scanlines.
00400         #pragma omp critical
00401         {
00402             if (((ydone & 7)==0) || (ydone==(img->y-1))) {
00403                 if(G.background==0) {
00404                     printf("\rdefocus: Processing Line %d of %d ... ", ydone+1, img->y);
00405                     fflush(stdout);
00406                 }
00407             }
00408 
00409             ydone++;
00410         }
00411 
00412         // esc set by main calling process. don't break because openmp doesn't
00413         // allow it, just continue and do nothing 
00414         if(node->exec & NODE_BREAK)
00415             continue;
00416 
00417         zp = y * img->x;
00418         for (x=0; x<img->x; x++) {
00419             cp = zp + x;
00420             cp4 = cp * img->type;
00421 
00422             // Circle of Confusion radius for current pixel
00423             cR2 = ct_crad = crad->rect[cp];
00424             // skip if zero (border render)
00425             if (ct_crad==0.f) {
00426                 // related to bug #5921, forgot output image when skipping 0 radius values
00427                 new->rect[cp4] = img->rect[cp4];
00428                 if (new->type != CB_VAL) {
00429                     new->rect[cp4+1] = img->rect[cp4+1];
00430                     new->rect[cp4+2] = img->rect[cp4+2];
00431                     new->rect[cp4+3] = img->rect[cp4+3];
00432                 }
00433                 continue;
00434             }
00435             cR2 *= cR2;
00436             
00437             // pixel color
00438             ctcol = &img->rect[cp4];
00439             
00440             if (!nqd->preview) {
00441                 int xs, xe, ys, ye;
00442                 float lwt, wtcol[4] = {0}, aacol[4] = {0};
00443                 float wt;
00444 
00445                 // shape weight
00446                 if (nqd->bktype==0) // disk
00447                     wt = 1.f/((float)M_PI*cR2);
00448                 else
00449                     wt = 1.f/(cR2*bk_hn_theta);
00450 
00451                 // weighted color
00452                 wtcol[0] = wt*ctcol[0];
00453                 if (new->type != CB_VAL) {
00454                     wtcol[1] = wt*ctcol[1];
00455                     wtcol[2] = wt*ctcol[2];
00456                     wtcol[3] = wt*ctcol[3];
00457                 }
00458 
00459                 // macro for background blur overlap test
00460                 // unfortunately, since this is done per pixel,
00461                 // it has a very significant negative impact on processing time...
00462                 // (eg. aa disk blur without test: 112 sec, vs with test: 176 sec...)
00463                 // iff center blur radius > threshold
00464                 // and if overlap pixel in focus, do nothing, else add color/weigbt
00465                 // (threshold constant is dependant on amount of blur)
00466                 #define TESTBG1(c, w) {\
00467                     if (ct_crad > nqd->bthresh) {\
00468                         if (crad->rect[p] > nqd->bthresh) {\
00469                             new->rect[p] += c[0];\
00470                             wts->rect[p] += w;\
00471                         }\
00472                     }\
00473                     else {\
00474                         new->rect[p] += c[0];\
00475                         wts->rect[p] += w;\
00476                     }\
00477                 }
00478                 #define TESTBG4(c, w) {\
00479                     if (ct_crad > nqd->bthresh) {\
00480                         if (crad->rect[p] > nqd->bthresh) {\
00481                             new->rect[p4] += c[0];\
00482                             new->rect[p4+1] += c[1];\
00483                             new->rect[p4+2] += c[2];\
00484                             new->rect[p4+3] += c[3];\
00485                             wts->rect[p] += w;\
00486                         }\
00487                     }\
00488                     else {\
00489                         new->rect[p4] += c[0];\
00490                         new->rect[p4+1] += c[1];\
00491                         new->rect[p4+2] += c[2];\
00492                         new->rect[p4+3] += c[3];\
00493                         wts->rect[p] += w;\
00494                     }\
00495                 }
00496                 if (nqd->bktype == 0) {
00497                     // Disk
00498                     int _x, i, j, di;
00499                     float Dj, T;
00500                     // AA pixel
00501                     #define AAPIX(a, b) {\
00502                         int _ny = b;\
00503                         if ((_ny >= 0) && (_ny < new->y)) {\
00504                             int _nx = a;\
00505                             if ((_nx >=0) && (_nx < new->x)) {\
00506                                 p = _ny*new->x + _nx;\
00507                                 if (new->type==CB_VAL) {\
00508                                     TESTBG1(aacol, lwt);\
00509                                 }\
00510                                 else {\
00511                                     p4 = p * new->type;\
00512                                     TESTBG4(aacol, lwt);\
00513                                 }\
00514                             }\
00515                         }\
00516                     }
00517                     // circle scanline
00518                     #define CSCAN(a, b) {\
00519                         int _ny = y + b;\
00520                         if ((_ny >= 0) && (_ny < new->y)) {\
00521                             xs = x - a + 1;\
00522                             if (xs < 0) xs = 0;\
00523                             xe = x + a;\
00524                             if (xe > new->x) xe = new->x;\
00525                             p = _ny*new->x + xs;\
00526                             if (new->type==CB_VAL) {\
00527                                 for (_x=xs; _x<xe; _x++, p++) TESTBG1(wtcol, wt);\
00528                             }\
00529                             else {\
00530                                 p4 = p * new->type;\
00531                                 for (_x=xs; _x<xe; _x++, p++, p4+=new->type) TESTBG4(wtcol, wt);\
00532                             }\
00533                         }\
00534                     }
00535 
00536                     i = ceil(ct_crad);
00537                     j = 0;
00538                     T = 0;
00539                     while (i > j) {
00540                         Dj = sqrt(cR2 - j*j);
00541                         Dj -= floorf(Dj);
00542                         di = 0;
00543                         if (Dj > T) { i--;  di = 1; }
00544                         T = Dj;
00545                         aacol[0] = wtcol[0]*Dj;
00546                         if (new->type != CB_VAL) {
00547                             aacol[1] = wtcol[1]*Dj;
00548                             aacol[2] = wtcol[2]*Dj;
00549                             aacol[3] = wtcol[3]*Dj;
00550                         }
00551                         lwt = wt*Dj;
00552                         if (i!=j) {
00553                             // outer pixels
00554                             AAPIX(x+j, y+i)
00555                             AAPIX(x+j, y-i)
00556                             if (j) {
00557                                 AAPIX(x-j, y+i) // BL
00558                                 AAPIX(x-j, y-i) // TL
00559                             }
00560                             if (di) { // only when i changed, interior of outer section
00561                                 CSCAN(j, i) // bottom
00562                                 CSCAN(j, -i) // top
00563                             }
00564                         }
00565                         // lower mid section
00566                         AAPIX(x+i, y+j)
00567                         if (i) AAPIX(x-i, y+j)
00568                         CSCAN(i, j)
00569                         // upper mid section
00570                         if (j) {
00571                             AAPIX(x+i, y-j)
00572                             if (i) AAPIX(x-i, y-j)
00573                             CSCAN(i, -j)
00574                         }
00575                         j++;
00576                     }
00577                     #undef CSCAN
00578                     #undef AAPIX
00579                 }
00580                 else {
00581                     // n-agonal
00582                     int ov, nv;
00583                     float mind, maxd, lwt;
00584                     ys = MAX2((int)floor(bkh_b[2]*ct_crad + y), 0);
00585                     ye = MIN2((int)ceil(bkh_b[3]*ct_crad + y), new->y - 1);
00586                     for (sy=ys; sy<=ye; sy++) {
00587                         float fxs = 1e10f, fxe = -1e10f;
00588                         float yf = (sy - y)/ct_crad;
00589                         int found = 0;
00590                         ov = len_bkh - 1;
00591                         mind = maxd = 0;
00592                         for (nv=0; nv<len_bkh; nv++) {
00593                             if ((BKH[nv].max_y >= yf) && (BKH[nv].min_y <= yf)) {
00594                                 float tx = BKH[ov].x0 + BKH[nv].ls_x*(yf - BKH[ov].y0);
00595                                 if (tx < fxs) { fxs = tx;  mind = BKH[nv].ls_x; }
00596                                 if (tx > fxe) { fxe = tx;  maxd = BKH[nv].ls_x; }
00597                                 if (++found == 2) break;
00598                             }
00599                             ov = nv;
00600                         }
00601                         if (found) {
00602                             fxs = fxs*ct_crad + x;
00603                             fxe = fxe*ct_crad + x;
00604                             xs = (int)floor(fxs), xe = (int)ceil(fxe);
00605                             // AA hack for first and last x pixel, near vertical edges only
00606                             if (fabsf(mind) <= 1.f) {
00607                                 if ((xs >= 0) && (xs < new->x)) {
00608                                     lwt = 1.f-(fxs - xs);
00609                                     aacol[0] = wtcol[0]*lwt;
00610                                     p = xs + sy*new->x;
00611                                     if (new->type==CB_VAL) {
00612                                         lwt *= wt;
00613                                         TESTBG1(aacol, lwt);
00614                                     }
00615                                     else {
00616                                         p4 = p * new->type;
00617                                         aacol[1] = wtcol[1]*lwt;
00618                                         aacol[2] = wtcol[2]*lwt;
00619                                         aacol[3] = wtcol[3]*lwt;
00620                                         lwt *= wt;
00621                                         TESTBG4(aacol, lwt);
00622                                     }
00623                                 }
00624                             }
00625                             if (fabsf(maxd) <= 1.f) {
00626                                 if ((xe >= 0) && (xe < new->x)) {
00627                                     lwt = 1.f-(xe - fxe);
00628                                     aacol[0] = wtcol[0]*lwt;
00629                                     p = xe + sy*new->x;
00630                                     if (new->type==CB_VAL) {
00631                                         lwt *= wt;
00632                                         TESTBG1(aacol, lwt);
00633                                     }
00634                                     else {
00635                                         p4 = p * new->type;
00636                                         aacol[1] = wtcol[1]*lwt;
00637                                         aacol[2] = wtcol[2]*lwt;
00638                                         aacol[3] = wtcol[3]*lwt;
00639                                         lwt *= wt;
00640                                         TESTBG4(aacol, lwt);
00641                                     }
00642                                 }
00643                             }
00644                             xs = MAX2(xs+1, 0);
00645                             xe = MIN2(xe, new->x);
00646                             // remaining interior scanline
00647                             p = sy*new->x + xs;
00648                             if (new->type==CB_VAL) {
00649                                 for (sx=xs; sx<xe; sx++, p++) TESTBG1(wtcol, wt);
00650                             }
00651                             else {
00652                                 p4 = p * new->type;
00653                                 for (sx=xs; sx<xe; sx++, p++, p4+=new->type) TESTBG4(wtcol, wt);
00654                             }
00655                         }
00656                     }
00657 
00658                     // now traverse in opposite direction, y scanlines,
00659                     // but this time only draw the near horizontal edges,
00660                     // applying same AA hack as above
00661                     xs = MAX2((int)floor(bkh_b[0]*ct_crad + x), 0);
00662                     xe = MIN2((int)ceil(bkh_b[1]*ct_crad + x), img->x - 1);
00663                     for (sx=xs; sx<=xe; sx++) {
00664                         float xf = (sx - x)/ct_crad;
00665                         float fys = 1e10f, fye = -1e10f;
00666                         int found = 0;
00667                         ov = len_bkh - 1;
00668                         mind = maxd = 0;
00669                         for (nv=0; nv<len_bkh; nv++) {
00670                             if ((BKH[nv].max_x >= xf) && (BKH[nv].min_x <= xf)) {
00671                                 float ty = BKH[ov].y0 + BKH[nv].ls_y*(xf - BKH[ov].x0);
00672                                 if (ty < fys) { fys = ty;  mind = BKH[nv].ls_y; }
00673                                 if (ty > fye) { fye = ty;  maxd = BKH[nv].ls_y; }
00674                                 if (++found == 2) break;
00675                             }
00676                             ov = nv;
00677                         }
00678                         if (found) {
00679                             fys = fys*ct_crad + y;
00680                             fye = fye*ct_crad + y;
00681                             // near horizontal edges only, line slope <= 1
00682                             if (fabsf(mind) <= 1.f) {
00683                                 int iys = (int)floor(fys);
00684                                 if ((iys >= 0) && (iys < new->y)) {
00685                                     lwt = 1.f - (fys - iys);
00686                                     aacol[0] = wtcol[0]*lwt;
00687                                     p = sx + iys*new->x;
00688                                     if (new->type==CB_VAL) {
00689                                         lwt *= wt;
00690                                         TESTBG1(aacol, lwt);
00691                                     }
00692                                     else {
00693                                         p4 = p * new->type;
00694                                         aacol[1] = wtcol[1]*lwt;
00695                                         aacol[2] = wtcol[2]*lwt;
00696                                         aacol[3] = wtcol[3]*lwt;
00697                                         lwt *= wt;
00698                                         TESTBG4(aacol, lwt);
00699                                     }
00700                                 }
00701                             }
00702                             if (fabsf(maxd) <= 1.f) {
00703                                 int iye = ceil(fye);
00704                                 if ((iye >= 0) && (iye < new->y)) {
00705                                     lwt = 1.f - (iye - fye);
00706                                     aacol[0] = wtcol[0]*lwt;
00707                                     p = sx + iye*new->x;
00708                                     if (new->type==CB_VAL) {
00709                                         lwt *= wt;
00710                                         TESTBG1(aacol, lwt);
00711                                     }
00712                                     else {
00713                                         p4 = p * new->type;
00714                                         aacol[1] = wtcol[1]*lwt;
00715                                         aacol[2] = wtcol[2]*lwt;
00716                                         aacol[3] = wtcol[3]*lwt;
00717                                         lwt *= wt;
00718                                         TESTBG4(aacol, lwt);
00719                                     }
00720                                 }
00721                             }
00722                         }
00723                     }
00724 
00725                 }
00726                 #undef TESTBG4
00727                 #undef TESTBG1
00728 
00729             }
00730             else {
00731                 // sampled, simple rejection sampling here, good enough
00732                 unsigned int maxsam, s, ui = BLI_rand()*BLI_rand();
00733                 float wcor, cpr = BLI_frand(), lwt;
00734                 if (no_zbuf)
00735                     maxsam = nqd->samples;  // no zbuffer input, use sample value directly
00736                 else {
00737                     // depth adaptive sampling hack, the more out of focus, the more samples taken, 16 minimum.
00738                     maxsam = (int)(0.5f + nqd->samples*(1.f-(float)exp(-fabs(zbuf->rect[cp] - cam_fdist))));
00739                     if (maxsam < 16) maxsam = 16;
00740                 }
00741                 wcor = 1.f/(float)maxsam;
00742                 for (s=0; s<maxsam; ++s) {
00743                     u = ct_crad*(2.f*RI_vdC(s, ui) - 1.f);
00744                     v = ct_crad*(2.f*(s + cpr)/(float)maxsam - 1.f);
00745                     sx = (int)(x + u + 0.5f), sy = (int)(y + v + 0.5f);
00746                     if ((sx<0) || (sx >= new->x) || (sy<0) || (sy >= new->y)) continue;
00747                     p = sx + sy*new->x;
00748                     p4 = p * new->type;
00749                     if (nqd->bktype==0) // Disk
00750                         lwt = ((u*u + v*v)<=cR2) ? wcor : 0.f;
00751                     else    // AA not needed here
00752                         lwt = wcor * getWeight(BKH, len_bkh, u, v, ct_crad, inradsq);
00753                     // prevent background bleeding onto in-focus pixels, user-option
00754                     if (ct_crad > nqd->bthresh) {  // if center blur > threshold
00755                         if (crad->rect[p] > nqd->bthresh) { // if overlap pixel in focus, do nothing, else add color/weigbt
00756                             new->rect[p4] += ctcol[0] * lwt;
00757                             if (new->type != CB_VAL) {
00758                                 new->rect[p4+1] += ctcol[1] * lwt;
00759                                 new->rect[p4+2] += ctcol[2] * lwt;
00760                                 new->rect[p4+3] += ctcol[3] * lwt;
00761                             }
00762                             wts->rect[p] += lwt;
00763                         }
00764                     }
00765                     else {
00766                         new->rect[p4] += ctcol[0] * lwt;
00767                         if (new->type != CB_VAL) {
00768                             new->rect[p4+1] += ctcol[1] * lwt;
00769                             new->rect[p4+2] += ctcol[2] * lwt;
00770                             new->rect[p4+3] += ctcol[3] * lwt;
00771                         }
00772                         wts->rect[p] += lwt;
00773                     }
00774                 }
00775             }
00776 
00777         }
00778     }
00779     
00780     // finally, normalize
00781     for (y=0; y<new->y; y++) {
00782         unsigned int p = y * new->x;
00783         unsigned int p4 = p * new->type;
00784         int x;
00785 
00786         for (x=0; x<new->x; x++) {
00787             float dv = (wts->rect[p]==0.f) ? 1.f : (1.f/wts->rect[p]);
00788             new->rect[p4] *= dv;
00789             if (new->type!=CB_VAL) {
00790                 new->rect[p4+1] *= dv;
00791                 new->rect[p4+2] *= dv;
00792                 new->rect[p4+3] *= dv;
00793             }
00794             p++;
00795             p4 += new->type;
00796         }
00797     }
00798 
00799     free_compbuf(crad);
00800     free_compbuf(wts);
00801     
00802     printf("Done\n");
00803 }
00804 
00805 
00806 static void node_composit_exec_defocus(void *UNUSED(data), bNode *node, bNodeStack **in, bNodeStack **out)
00807 {
00808     CompBuf *new, *old, *zbuf_use = NULL, *img = in[0]->data, *zbuf = in[1]->data;
00809     NodeDefocus *nqd = node->storage;
00810     int no_zbuf = nqd->no_zbuf;
00811     
00812     if ((img==NULL) || (out[0]->hasoutput==0)) return;
00813     
00814     // if image not valid type or fstop==infinite (128), nothing to do, pass in to out
00815     if (((img->type!=CB_RGBA) && (img->type!=CB_VAL)) || ((no_zbuf==0) && (nqd->fstop==128.f))) {
00816         out[0]->data = pass_on_compbuf(img);
00817         return;
00818     }
00819     
00820     if (zbuf!=NULL) {
00821         // Zbuf input, check to make sure, single channel, same size
00822         // doesn't have to be actual zbuffer, but must be value type
00823         if ((zbuf->x != img->x) || (zbuf->y != img->y)) {
00824             // could do a scale here instead...
00825             printf("Z input must be same size as image !\n");
00826             return;
00827         }
00828         zbuf_use = typecheck_compbuf(zbuf, CB_VAL);
00829     }
00830     else no_zbuf = 1;   // no zbuffer input
00831         
00832     // ok, process
00833     old = img;
00834     if (nqd->gamco) {
00835         // gamma correct, blender func is simplified, fixed value & RGBA only,
00836         // should make user param. also depremul and premul afterwards, gamma
00837         // correction can't work with premul alpha
00838         old = dupalloc_compbuf(img);
00839         premul_compbuf(old, 1);
00840         gamma_correct_compbuf(old, 0);
00841         premul_compbuf(old, 0);
00842     }
00843     
00844     new = alloc_compbuf(old->x, old->y, old->type, 1);
00845     defocus_blur(node, new, old, zbuf_use, in[1]->vec[0]*nqd->scale, no_zbuf);
00846     
00847     if (nqd->gamco) {
00848         premul_compbuf(new, 1);
00849         gamma_correct_compbuf(new, 1);
00850         premul_compbuf(new, 0);
00851         free_compbuf(old);
00852     }
00853     if(node->exec & NODE_BREAK) {
00854         free_compbuf(new);
00855         new= NULL;
00856     }   
00857     out[0]->data = new;
00858     if (zbuf_use && (zbuf_use != zbuf)) free_compbuf(zbuf_use);
00859 }
00860 
00861 static void node_composit_init_defocus(bNodeTree *UNUSED(ntree), bNode* node, bNodeTemplate *UNUSED(ntemp))
00862 {
00863     /* qdn: defocus node */
00864     NodeDefocus *nbd = MEM_callocN(sizeof(NodeDefocus), "node defocus data");
00865     nbd->bktype = 0;
00866     nbd->rotation = 0.0f;
00867     nbd->preview = 1;
00868     nbd->gamco = 0;
00869     nbd->samples = 16;
00870     nbd->fstop = 128.f;
00871     nbd->maxblur = 0;
00872     nbd->bthresh = 1.f;
00873     nbd->scale = 1.f;
00874     nbd->no_zbuf = 1;
00875     node->storage = nbd;
00876 }
00877 
00878 void register_node_type_cmp_defocus(bNodeTreeType *ttype)
00879 {
00880     static bNodeType ntype;
00881 
00882     node_type_base(ttype, &ntype, CMP_NODE_DEFOCUS, "Defocus", NODE_CLASS_OP_FILTER, NODE_OPTIONS);
00883     node_type_socket_templates(&ntype, cmp_node_defocus_in, cmp_node_defocus_out);
00884     node_type_size(&ntype, 150, 120, 200);
00885     node_type_init(&ntype, node_composit_init_defocus);
00886     node_type_storage(&ntype, "NodeDefocus", node_free_standard_storage, node_copy_standard_storage);
00887     node_type_exec(&ntype, node_composit_exec_defocus);
00888 
00889     nodeRegisterType(ttype, &ntype);
00890 }