libcerf.w_of_z source code

1 /**
2 Computation of Faddeeva's complex scaled error function,
3   w(z) = exp(-z^2) * erfc(-i*z),
4 nameless function (7.1.3) of Abramowitz&Stegun (1964),
5 also known as the plasma dispersion function.
6 
7 This implementation uses a combination of different algorithms.
8  
9 Copyright:
10     © 2012 Massachusetts Institute of Technology,    
11     © 2013 Forschungszentrum Jülich GmbH,
12     © 2014 Ilya Yaroshenko
13  
14 Authors:
15     Steven G. Johnson, core author;
16     Joachim Wuttke, C package maintainer;
17     $(LINK2 http://plus.google.com/+IlyaYaroshenko, Ilya Yaroshenko), D package maintainer
18 
19 License: 
20     Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
21 */
22 module libcerf.w_of_z;
23 
24 import std.complex;
25 import core.stdc.math : exp, fabs, sin, cos, copysign;
26 import std.math : isNaN, isInfinity, floor, M_2_SQRTPI;
27 
28 import libcerf.erfcx_;
29 import libcerf.im_w_of_x;
30 
31 version(unittest)
32 {
33     import libcerf.testutils;
34 }
35 
36 ///
37 //import core.stdc.complex : cexp;
38 extern (C) @trusted nothrow @nogc Complex!double cexp(Complex!double z);
39 
40 /**
41  w_of_z, Faddeeva's scaled complex error function                         
42 
43    Computes various error functions (erf, erfc, erfi, erfcx), 
44    including the Dawson integral, in the complex plane, based
45    on algorithms for the computation of the Faddeeva function 
46               w(z) = exp(-z^2) * erfc(-i*z).
47    Given w(z), the error functions are mostly straightforward
48    to compute, except for certain regions where we have to
49    switch to Taylor expansions to avoid cancellation errors
50    [e.g. near the origin for erf(z)].
51 
52 */
53 //Steven G. Johnson, October 2012.
54 Complex!double w_of_z(Complex!double z) @safe @nogc nothrow
55 {
56     if (!z.re) {
57         // Purely imaginary input, purely real output.
58         // However, use z.re to give correct sign of 0 in cimag(w).
59         return Complex!double(erfcx(z.im), z.re);
60     }
61     if (!z.im) {
62         // Purely real input, complex output.
63         return Complex!double(exp(-sqr(z.re)),  im_w_of_x(z.re));
64     }
65     enum double a = 0.518321480430085929872; // pi / sqrt(-log(eps*0.5))
66     enum double c = 0.329973702884629072537; // (2/pi) * a;
67     enum double a2 = 0.268657157075235951582; // a^2
68 
69     immutable x = fabs(z.re);
70     immutable y = z.im;
71     immutable ya = fabs(y);
72 
73     Complex!double ret = 0.; // return value
74 
75     double sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0, sum5 = 0;
76 
77     if (ya > 7 || (x > 6  // continued fraction is faster
78                    /* As pointed out by M. Zaghloul, the continued
79                       fraction seems to give a large relative error in
80                       Re w(z) for |x| ~ 6 and small |y|, so use
81                       algorithm 816 in this region: */
82                    && (ya > 0.1 || (x > 8 && ya > 1e-10) || x > 28))) {
83 
84         /* Poppe & Wijers suggest using a number of terms
85            nu = 3 + 1442 / (26*rho + 77)
86            where rho = sqrt((x/x0)^2 + (y/y0)^2) where x0=6.3, y0=4.4.
87            (They only use this expansion for rho >= 1, but rho a little less
88            than 1 seems okay too.)
89            Instead, I did my own fit to a slightly different function
90            that avoids the hypotenuse calculation, using NLopt to minimize
91            the sum of the squares of the errors in nu with the constraint
92            that the estimated nu be >= minimum nu to attain machine precision.
93            I also separate the regions where nu == 2 and nu == 1. */
94         enum double ispi = M_2_SQRTPI / 2; // 1 / sqrt(pi)
95         immutable xs = y < 0 ? -z.re : z.re; // compute for -z if y < 0
96         if (x + ya > 4000) { // nu <= 2
97             if (x + ya > 1e7) { // nu == 1, w(z) = i/sqrt(pi) / z
98                 // scale to avoid overflow
99                 if (x > ya) {
100                     immutable double yax = ya / xs; 
101                     immutable double denom = ispi / (xs + yax*ya);
102                     ret = Complex!double(denom*yax, denom);
103                 }
104                 else if (isInfinity(ya)) {
105                     return ((isNaN(x) || y < 0) 
106                             ? Complex!double.init : Complex!double(0,0));
107                 }
108                 else {
109                     immutable double xya = xs / ya;
110                     immutable double denom = ispi / (xya*xs + ya);
111                     ret = Complex!double(denom, denom*xya);
112                 }
113             }
114             else { // nu == 2, w(z) = i/sqrt(pi) * z / (z*z - 0.5)
115                 immutable double dr = xs*xs - ya*ya - 0.5, di = 2*xs*ya;
116                 immutable double denom = ispi / (dr*dr + di*di);
117                 ret = Complex!double(denom * (xs*di-ya*dr), denom * (xs*dr+ya*di));
118             }
119         }
120         else { // compute nu(z) estimate and do general continued fraction
121             immutable double c0=3.9, c1=11.398, c2=0.08254, c3=0.1421, c4=0.2023; // fit
122             double nu = floor(c0 + c1 / (c2*x + c3*ya + c4));
123             double wr = xs, wi = ya;
124             for (nu = 0.5 * (nu - 1); nu > 0.4; nu -= 0.5) {
125                 // w <- z - nu/w:
126                 double denom = nu / (wr*wr + wi*wi);
127                 wr = xs - wr * denom;
128                 wi = ya + wi * denom;
129             }
130             { // w(z) = i/sqrt(pi) / w:
131                 immutable double denom = ispi / (wr*wr + wi*wi);
132                 ret = Complex!double(denom*wi, denom*wr);
133             }
134         }
135         if (y < 0) {
136             // use w(z) = 2.0*exp(-z*z) - w(-z), 
137             // but be careful of overflow in exp(-z*z) 
138             //                                = exp(-(xs*xs-ya*ya) -2*i*xs*ya) 
139             return 2.0 * cexp(Complex!double((ya-xs)*(xs+ya), 2*xs*y)) - ret;
140         }
141         else
142             return ret;
143     }
144 
145     /* Note: The test that seems to be suggested in the paper is x <
146        sqrt(-log(DBL_MIN)), about 26.6, since otherwise exp(-x^2)
147        underflows to zero and sum1,sum2,sum4 are zero.  However, long
148        before this occurs, the sum1,sum2,sum4 contributions are
149        negligible in double precision; I find that this happens for x >
150        about 6, for all y.  On the other hand, I find that the case
151        where we compute all of the sums is faster (at least with the
152        precomputed expa2n2 table) until about x=10.  Furthermore, if we
153        try to compute all of the sums for x > 20, I find that we
154        sometimes run into numerical problems because underflow/overflow
155        problems start to appear in the various coefficients of the sums,
156        below.  Therefore, we use x < 10 here. */
157     else if (x < 10) {
158 
159         double prod2ax = 1, prodm2ax = 1;
160         double expx2;
161 
162         if (isNaN(y)) {
163             return Complex!double(y,y);
164         }
165 
166         if (x < 5e-4) { // compute sum4 and sum5 together as sum5-sum4
167                         // This special case is needed for accuracy.
168             immutable double x2 = x*x;
169             expx2 = 1 - x2 * (1 - 0.5*x2); // exp(-x*x) via Taylor
170             // compute exp(2*a*x) and exp(-2*a*x) via Taylor, to double precision
171             immutable double ax2 = 1.036642960860171859744*x; // 2*a*x
172             immutable double exp2ax =
173                 1 + ax2 * (1 + ax2 * (0.5 + 0.166666666666666666667*ax2));
174             immutable double expm2ax =
175                 1 - ax2 * (1 - ax2 * (0.5 - 0.166666666666666666667*ax2));
176             for (uint n = 1; ; n++) {
177                 immutable double coef = expa2n2[n-1] * expx2 / (a2*(n*n) + y*y);
178                 prod2ax *= exp2ax;
179                 prodm2ax *= expm2ax;
180                 sum1 += coef;
181                 sum2 += coef * prodm2ax;
182                 sum3 += coef * prod2ax;
183           
184                 // really = sum5 - sum4
185                 sum5 += coef * (2*a) * n * sinh_taylor((2*a)*n*x);
186           
187                 // test convergence via sum3
188                 if (coef * prod2ax < double.epsilon * sum3) 
189                     break;
190             }
191         }
192         else { // x > 5e-4, compute sum4 and sum5 separately
193             expx2 = exp(-x*x);
194             immutable double exp2ax = exp((2*a)*x), expm2ax = 1 / exp2ax;
195             for (int n = 1; 1; ++n) {
196                 immutable double coef = expa2n2[n-1] * expx2 / (a2*(n*n) + y*y);
197                 prod2ax *= exp2ax;
198                 prodm2ax *= expm2ax;
199                 sum1 += coef;
200                 sum2 += coef * prodm2ax;
201                 sum4 += (coef * prodm2ax) * (a*n);
202                 sum3 += coef * prod2ax;
203                 sum5 += (coef * prod2ax) * (a*n);
204                 // test convergence via sum5, since this sum has the slowest decay
205                 if ((coef * prod2ax) * (a*n) < double.epsilon * sum5) 
206                     break;
207             }
208         }
209         immutable double expx2erfcxy = // avoid spurious overflow for large negative y
210             y > -6 // for y < -6, erfcx(y) = 2*exp(y*y) to double precision
211             ? expx2*erfcx(y) : 2*exp(y*y-x*x);
212         if (y > 5) { // imaginary terms cancel
213             immutable double sinxy = sin(x*y);
214             ret = (expx2erfcxy - c*y*sum1) * cos(2*x*y)
215                 + (c*x*expx2) * sinxy * sinc(x*y, sinxy);
216         }
217         else {
218             double xs = z.re;
219             immutable double sinxy = sin(xs*y);
220             immutable double sin2xy = sin(2*xs*y), cos2xy = cos(2*xs*y);
221             immutable double coef1 = expx2erfcxy - c*y*sum1;
222             immutable double coef2 = c*xs*expx2;
223             ret = Complex!double(coef1 * cos2xy + coef2 * sinxy * sinc(xs*y, sinxy),
224                     coef2 * sinc(2*xs*y, sin2xy) - coef1 * sin2xy);
225         }
226     }
227     else { // x large: only sum3 & sum5 contribute (see above note)    
228 
229 
230         if (isNaN(x))
231             return Complex!double(x,x);
232         if (isNaN(y))
233             return Complex!double(y,y);
234         ret = exp(-x*x); // |y| < 1e-10, so we only need exp(-x*x) term
235         // (round instead of ceil as in original paper; note that x/a > 1 here)
236         immutable double n0 = floor(x/a + 0.5); // sum in both directions, starting at n0
237         immutable double dx = a*n0 - x;
238         sum3 = exp(-dx*dx) / (a2*(n0*n0) + y*y);
239         sum5 = a*n0 * sum3;
240         immutable double exp1 = exp(4*a*dx);
241         double exp1dn = 1;
242         int dn;
243         for (dn = 1; n0 - dn > 0; ++dn) { // loop over n0-dn and n0+dn terms
244             immutable double np = n0 + dn, nm = n0 - dn;
245             double tp = exp(-sqr(a*dn+dx));
246             double tm = tp * (exp1dn *= exp1); // trick to get tm from tp
247             tp /= (a2*(np*np) + y*y);
248             tm /= (a2*(nm*nm) + y*y);
249             sum3 += tp + tm;
250             sum5 += a * (np * tp + nm * tm);
251             if (a * (np * tp + nm * tm) < double.epsilon * sum5) 
252                 goto finish;
253         }
254         while (1) { // loop over n0+dn terms only (since n0-dn <= 0)
255             immutable double np = n0 + dn++;
256             immutable double tp = exp(-sqr(a*dn+dx)) / (a2*(np*np) + y*y);
257             sum3 += tp;
258             sum5 += a * np * tp;
259             if (a * np * tp < double.epsilon * sum5) 
260                 goto finish;
261         }
262     }
263 finish:
264     return ret + Complex!double((0.5*c)*y*(sum2+sum3), 
265                    (0.5*c)*copysign(sum5-sum4, z.re));
266 }
267 
268 private:
269 
270 /**
271 auxiliary function                                            
272 */
273 double sinc(double x, double sinx) @safe @nogc nothrow
274 { 
275     // return sinc(x) = sin(x)/x, given both x and sin(x) 
276     // [since we only use this in cases where sin(x) has already been computed]
277     return fabs(x) < 1e-4 ? 1 - 0.1666666666666666666667*x*x : sinx / x; 
278 }
279 
280 ///ditto
281 double sinh_taylor(double x) pure @safe @nogc nothrow
282 {
283     // sinh(x) via Taylor series, accurate to machine precision for |x| < 1e-2
284     immutable x2 = x*x;
285     return x * (1 + (x2) * (0.1666666666666666666667
286                              + 0.00833333333333333333333 * (x2)));
287 }
288 
289 ///ditto
290 double sqr(double x) pure @safe @nogc nothrow { return x*x; } 
291 
292 /**
293 precomputed table of expa2n2[n-1] = exp(-a2*n*n)                           
294 for double-precision a2 = 0.26865... in w_of_z, below.                     
295 */
296 immutable double[] expa2n2 = [
297     7.64405281671221563e-01,
298     3.41424527166548425e-01,
299     8.91072646929412548e-02,
300     1.35887299055460086e-02,
301     1.21085455253437481e-03,
302     6.30452613933449404e-05,
303     1.91805156577114683e-06,
304     3.40969447714832381e-08,
305     3.54175089099469393e-10,
306     2.14965079583260682e-12,
307     7.62368911833724354e-15,
308     1.57982797110681093e-17,
309     1.91294189103582677e-20,
310     1.35344656764205340e-23,
311     5.59535712428588720e-27,
312     1.35164257972401769e-30,
313     1.90784582843501167e-34,
314     1.57351920291442930e-38,
315     7.58312432328032845e-43,
316     2.13536275438697082e-47,
317     3.51352063787195769e-52,
318     3.37800830266396920e-57,
319     1.89769439468301000e-62,
320     6.22929926072668851e-68,
321     1.19481172006938722e-73,
322     1.33908181133005953e-79,
323     8.76924303483223939e-86,
324     3.35555576166254986e-92,
325     7.50264110688173024e-99,
326     9.80192200745410268e-106,
327     7.48265412822268959e-113,
328     3.33770122566809425e-120,
329     8.69934598159861140e-128,
330     1.32486951484088852e-135,
331     1.17898144201315253e-143,
332     6.13039120236180012e-152,
333     1.86258785950822098e-160,
334     3.30668408201432783e-169,
335     3.43017280887946235e-178,
336     2.07915397775808219e-187,
337     7.36384545323984966e-197,
338     1.52394760394085741e-206,
339     1.84281935046532100e-216,
340     1.30209553802992923e-226,
341     5.37588903521080531e-237,
342     1.29689584599763145e-247,
343     1.82813078022866562e-258,
344     1.50576355348684241e-269,
345     7.24692320799294194e-281,
346     2.03797051314726829e-292,
347     3.34880215927873807e-304,
348     0.0 // underflow (also prevents reads past array end, below)
349 ]; 
350 
351 unittest 
352 {
353     static immutable Z = [
354         Complex!double(624.2,-0.26123),
355         Complex!double(-0.4,3.),
356         Complex!double(0.6,2.),
357         Complex!double(-1.,1.),
358         Complex!double(-1.,-9.),
359         Complex!double(-1.,9.),
360         Complex!double(-0.0000000234545,1.1234),
361         Complex!double(-3.,5.1),
362         Complex!double(-53,30.1),
363         Complex!double(0.0,0.12345),
364         Complex!double(11,1),
365         Complex!double(-22,-2),
366         Complex!double(9,-28),
367         Complex!double(21,-33),
368         Complex!double(1e5,1e5),
369         Complex!double(1e14,1e14),
370         Complex!double(-3001,-1000),
371         Complex!double(1e160,-1e159),
372         Complex!double(-6.01,0.01),
373         Complex!double(-0.7,-0.7),
374         Complex!double(2.611780000000000e+01, 4.540909610972489e+03),
375         Complex!double(0.8e7,0.3e7),
376         Complex!double(-20,-19.8081),
377         Complex!double(1e-16,-1.1e-16),
378         Complex!double(2.3e-8,1.3e-8),
379         Complex!double(6.3,-1e-13),
380         Complex!double(6.3,1e-20),
381         Complex!double(1e-20,6.3),
382         Complex!double(1e-20,16.3),
383         Complex!double(9,1e-300),
384         Complex!double(6.01,0.11),
385         Complex!double(8.01,1.01e-10),
386         Complex!double(28.01,1e-300),
387         Complex!double(10.01,1e-200),
388         Complex!double(10.01,-1e-200),
389         Complex!double(10.01,0.99e-10),
390         Complex!double(10.01,-0.99e-10),
391         Complex!double(1e-20,7.01),
392         Complex!double(-1,7.01),
393         Complex!double(5.99,7.01),
394         Complex!double(1,0),
395         Complex!double(55,0),
396         Complex!double(-0.1,0),
397         Complex!double(1e-20,0),
398         Complex!double(0,5e-14),
399         Complex!double(0,51),
400         Complex!double(double.infinity,0),
401         Complex!double(-double.infinity,0),
402         Complex!double(0,double.infinity),
403         Complex!double(0,-double.infinity),
404         Complex!double(double.infinity,double.infinity),
405         Complex!double(double.infinity,-double.infinity),
406         Complex!double(double.nan,double.nan),
407         Complex!double(double.nan,0),
408         Complex!double(0,double.nan),
409         Complex!double(double.nan,double.infinity),
410         Complex!double(double.infinity,double.nan)
411     ];
412     /* w(z), computed with WolframAlpha
413     ... note that WolframAlpha is problematic
414     some of the above inputs, so I had to
415     use the continued-fraction expansion
416     in WolframAlpha in some cases, or switch
417     to Maple */
418     static immutable W = [ 
419         Complex!double(-3.78270245518980507452677445620103199303131110e-7,
420           0.000903861276433172057331093754199933411710053155),
421         Complex!double(0.1764906227004816847297495349730234591778719532788,
422           -0.02146550539468457616788719893991501311573031095617),
423         Complex!double(0.2410250715772692146133539023007113781272362309451,
424           0.06087579663428089745895459735240964093522265589350),
425         Complex!double(0.30474420525691259245713884106959496013413834051768,
426           -0.20821893820283162728743734725471561394145872072738),
427         Complex!double(7.317131068972378096865595229600561710140617977e34,
428           8.321873499714402777186848353320412813066170427e34),
429         Complex!double(0.0615698507236323685519612934241429530190806818395,
430           -0.00676005783716575013073036218018565206070072304635),
431         Complex!double(0.3960793007699874918961319170187598400134746631,
432           -5.593152259116644920546186222529802777409274656e-9),
433         Complex!double(0.08217199226739447943295069917990417630675021771804,
434           -0.04701291087643609891018366143118110965272615832184),
435         Complex!double(0.00457246000350281640952328010227885008541748668738,
436           -0.00804900791411691821818731763401840373998654987934),
437         Complex!double(0.8746342859608052666092782112565360755791467973338452,
438           0.),
439         Complex!double(0.00468190164965444174367477874864366058339647648741,
440           0.0510735563901306197993676329845149741675029197050),
441         Complex!double(-0.0023193175200187620902125853834909543869428763219,
442           -0.025460054739731556004902057663500272721780776336),
443         Complex!double(9.11463368405637174660562096516414499772662584e304,
444           3.97101807145263333769664875189354358563218932e305),
445         Complex!double(-4.4927207857715598976165541011143706155432296e281,
446           -2.8019591213423077494444700357168707775769028e281),
447         Complex!double(2.820947917809305132678577516325951485807107151e-6,
448           2.820947917668257736791638444590253942253354058e-6),
449         Complex!double(2.82094791773878143474039725787438662716372268e-15,
450           2.82094791773878143474039725773333923127678361e-15),
451         Complex!double(-0.0000563851289696244350147899376081488003110150498,
452           -0.000169211755126812174631861529808288295454992688),
453         Complex!double(-5.586035480670854326218608431294778077663867e-162,
454           5.586035480670854326218608431294778077663867e-161),
455         Complex!double(0.00016318325137140451888255634399123461580248456,
456           -0.095232456573009287370728788146686162555021209999),
457         Complex!double(0.69504753678406939989115375989939096800793577783885,
458           -1.8916411171103639136680830887017670616339912024317),
459         Complex!double(0.0001242418269653279656612334210746733213167234822,
460           7.145975826320186888508563111992099992116786763e-7),
461         Complex!double(2.318587329648353318615800865959225429377529825e-8,
462           6.182899545728857485721417893323317843200933380e-8),
463         Complex!double(-0.0133426877243506022053521927604277115767311800303,
464           -0.0148087097143220769493341484176979826888871576145),
465         Complex!double(1.00000000000000012412170838050638522857747934,
466           1.12837916709551279389615890312156495593616433e-16),
467         Complex!double(0.9999999853310704677583504063775310832036830015,
468           2.595272024519678881897196435157270184030360773e-8),
469         Complex!double(-1.4731421795638279504242963027196663601154624e-15,
470           0.090727659684127365236479098488823462473074709),
471         Complex!double(5.79246077884410284575834156425396800754409308e-18,
472           0.0907276596841273652364790985059772809093822374),
473         Complex!double(0.0884658993528521953466533278764830881245144368,
474           1.37088352495749125283269718778582613192166760e-22),
475         Complex!double(0.0345480845419190424370085249304184266813447878,
476           2.11161102895179044968099038990446187626075258e-23),
477         Complex!double(6.63967719958073440070225527042829242391918213e-36,
478           0.0630820900592582863713653132559743161572639353),
479         Complex!double(0.00179435233208702644891092397579091030658500743634,
480           0.0951983814805270647939647438459699953990788064762),
481         Complex!double(9.09760377102097999924241322094863528771095448e-13,
482           0.0709979210725138550986782242355007611074966717),
483         Complex!double(7.2049510279742166460047102593255688682910274423e-304,
484           0.0201552956479526953866611812593266285000876784321),
485         Complex!double(3.04543604652250734193622967873276113872279682e-44,
486           0.0566481651760675042930042117726713294607499165),
487         Complex!double(3.04543604652250734193622967873276113872279682e-44,
488           0.0566481651760675042930042117726713294607499165),
489         Complex!double(0.5659928732065273429286988428080855057102069081e-12,
490           0.056648165176067504292998527162143030538756683302),
491         Complex!double(-0.56599287320652734292869884280802459698927645e-12,
492           0.0566481651760675042929985271621430305387566833029),
493         Complex!double(0.0796884251721652215687859778119964009569455462,
494           1.11474461817561675017794941973556302717225126e-22),
495         Complex!double(0.07817195821247357458545539935996687005781943386550,
496           -0.01093913670103576690766705513142246633056714279654),
497         Complex!double(0.04670032980990449912809326141164730850466208439937,
498           0.03944038961933534137558064191650437353429669886545),
499         Complex!double(0.36787944117144232159552377016146086744581113103176,
500           0.60715770584139372911503823580074492116122092866515),
501         Complex!double(0,
502           0.010259688805536830986089913987516716056946786526145),
503         Complex!double(0.99004983374916805357390597718003655777207908125383,
504           -0.11208866436449538036721343053869621153527769495574),
505         Complex!double(0.99999999999999999999999999999999999999990000,
506           1.12837916709551257389615890312154517168802603e-20),
507         Complex!double(0.999999999999943581041645226871305192054749891144158,
508           0),
509         Complex!double(0.0110604154853277201542582159216317923453996211744250,
510           0),
511         Complex!double(0,0),
512         Complex!double(0,0),
513         Complex!double(0,0),
514         Complex!double(double.infinity,0),
515         Complex!double(0,0),
516         Complex!double(double.nan,double.nan),
517         Complex!double(double.nan,double.nan),
518         Complex!double(double.nan,double.nan),
519         Complex!double(double.nan,0),
520         Complex!double(double.nan,double.nan),
521         Complex!double(double.nan,double.nan)
522     ];
523     commonTest!w_of_z(Z, W);
524 }