1 /** 2 Computation of Faddeeva's complex scaled error function, 3 w(z) = exp(-z^2) * erfc(-i*z), 4 nameless function (7.1.3) of Abramowitz&Stegun (1964), 5 also known as the plasma dispersion function. 6 7 This implementation uses a combination of different algorithms. 8 9 Copyright: 10 © 2012 Massachusetts Institute of Technology, 11 © 2013 Forschungszentrum Jülich GmbH, 12 © 2014 Ilya Yaroshenko 13 14 Authors: 15 Steven G. Johnson, core author; 16 Joachim Wuttke, C package maintainer; 17 $(LINK2 http://plus.google.com/+IlyaYaroshenko, Ilya Yaroshenko), D package maintainer 18 19 License: 20 Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 21 */ 22 module libcerf.w_of_z; 23 24 import std.complex; 25 import core.stdc.math : exp, fabs, sin, cos, copysign; 26 import std.math : isNaN, isInfinity, floor, M_2_SQRTPI; 27 28 import libcerf.erfcx_; 29 import libcerf.im_w_of_x; 30 31 version(unittest) 32 { 33 import libcerf.testutils; 34 } 35 36 /// 37 //import core.stdc.complex : cexp; 38 extern (C) @trusted nothrow @nogc Complex!double cexp(Complex!double z); 39 40 /** 41 w_of_z, Faddeeva's scaled complex error function 42 43 Computes various error functions (erf, erfc, erfi, erfcx), 44 including the Dawson integral, in the complex plane, based 45 on algorithms for the computation of the Faddeeva function 46 w(z) = exp(-z^2) * erfc(-i*z). 47 Given w(z), the error functions are mostly straightforward 48 to compute, except for certain regions where we have to 49 switch to Taylor expansions to avoid cancellation errors 50 [e.g. near the origin for erf(z)]. 51 52 */ 53 //Steven G. Johnson, October 2012. 54 Complex!double w_of_z(Complex!double z) @safe @nogc nothrow 55 { 56 if (!z.re) { 57 // Purely imaginary input, purely real output. 58 // However, use z.re to give correct sign of 0 in cimag(w). 59 return Complex!double(erfcx(z.im), z.re); 60 } 61 if (!z.im) { 62 // Purely real input, complex output. 63 return Complex!double(exp(-sqr(z.re)), im_w_of_x(z.re)); 64 } 65 enum double a = 0.518321480430085929872; // pi / sqrt(-log(eps*0.5)) 66 enum double c = 0.329973702884629072537; // (2/pi) * a; 67 enum double a2 = 0.268657157075235951582; // a^2 68 69 immutable x = fabs(z.re); 70 immutable y = z.im; 71 immutable ya = fabs(y); 72 73 Complex!double ret = 0.; // return value 74 75 double sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0, sum5 = 0; 76 77 if (ya > 7 || (x > 6 // continued fraction is faster 78 /* As pointed out by M. Zaghloul, the continued 79 fraction seems to give a large relative error in 80 Re w(z) for |x| ~ 6 and small |y|, so use 81 algorithm 816 in this region: */ 82 && (ya > 0.1 || (x > 8 && ya > 1e-10) || x > 28))) { 83 84 /* Poppe & Wijers suggest using a number of terms 85 nu = 3 + 1442 / (26*rho + 77) 86 where rho = sqrt((x/x0)^2 + (y/y0)^2) where x0=6.3, y0=4.4. 87 (They only use this expansion for rho >= 1, but rho a little less 88 than 1 seems okay too.) 89 Instead, I did my own fit to a slightly different function 90 that avoids the hypotenuse calculation, using NLopt to minimize 91 the sum of the squares of the errors in nu with the constraint 92 that the estimated nu be >= minimum nu to attain machine precision. 93 I also separate the regions where nu == 2 and nu == 1. */ 94 enum double ispi = M_2_SQRTPI / 2; // 1 / sqrt(pi) 95 immutable xs = y < 0 ? -z.re : z.re; // compute for -z if y < 0 96 if (x + ya > 4000) { // nu <= 2 97 if (x + ya > 1e7) { // nu == 1, w(z) = i/sqrt(pi) / z 98 // scale to avoid overflow 99 if (x > ya) { 100 immutable double yax = ya / xs; 101 immutable double denom = ispi / (xs + yax*ya); 102 ret = Complex!double(denom*yax, denom); 103 } 104 else if (isInfinity(ya)) { 105 return ((isNaN(x) || y < 0) 106 ? Complex!double.init : Complex!double(0,0)); 107 } 108 else { 109 immutable double xya = xs / ya; 110 immutable double denom = ispi / (xya*xs + ya); 111 ret = Complex!double(denom, denom*xya); 112 } 113 } 114 else { // nu == 2, w(z) = i/sqrt(pi) * z / (z*z - 0.5) 115 immutable double dr = xs*xs - ya*ya - 0.5, di = 2*xs*ya; 116 immutable double denom = ispi / (dr*dr + di*di); 117 ret = Complex!double(denom * (xs*di-ya*dr), denom * (xs*dr+ya*di)); 118 } 119 } 120 else { // compute nu(z) estimate and do general continued fraction 121 immutable double c0=3.9, c1=11.398, c2=0.08254, c3=0.1421, c4=0.2023; // fit 122 double nu = floor(c0 + c1 / (c2*x + c3*ya + c4)); 123 double wr = xs, wi = ya; 124 for (nu = 0.5 * (nu - 1); nu > 0.4; nu -= 0.5) { 125 // w <- z - nu/w: 126 double denom = nu / (wr*wr + wi*wi); 127 wr = xs - wr * denom; 128 wi = ya + wi * denom; 129 } 130 { // w(z) = i/sqrt(pi) / w: 131 immutable double denom = ispi / (wr*wr + wi*wi); 132 ret = Complex!double(denom*wi, denom*wr); 133 } 134 } 135 if (y < 0) { 136 // use w(z) = 2.0*exp(-z*z) - w(-z), 137 // but be careful of overflow in exp(-z*z) 138 // = exp(-(xs*xs-ya*ya) -2*i*xs*ya) 139 return 2.0 * cexp(Complex!double((ya-xs)*(xs+ya), 2*xs*y)) - ret; 140 } 141 else 142 return ret; 143 } 144 145 /* Note: The test that seems to be suggested in the paper is x < 146 sqrt(-log(DBL_MIN)), about 26.6, since otherwise exp(-x^2) 147 underflows to zero and sum1,sum2,sum4 are zero. However, long 148 before this occurs, the sum1,sum2,sum4 contributions are 149 negligible in double precision; I find that this happens for x > 150 about 6, for all y. On the other hand, I find that the case 151 where we compute all of the sums is faster (at least with the 152 precomputed expa2n2 table) until about x=10. Furthermore, if we 153 try to compute all of the sums for x > 20, I find that we 154 sometimes run into numerical problems because underflow/overflow 155 problems start to appear in the various coefficients of the sums, 156 below. Therefore, we use x < 10 here. */ 157 else if (x < 10) { 158 159 double prod2ax = 1, prodm2ax = 1; 160 double expx2; 161 162 if (isNaN(y)) { 163 return Complex!double(y,y); 164 } 165 166 if (x < 5e-4) { // compute sum4 and sum5 together as sum5-sum4 167 // This special case is needed for accuracy. 168 immutable double x2 = x*x; 169 expx2 = 1 - x2 * (1 - 0.5*x2); // exp(-x*x) via Taylor 170 // compute exp(2*a*x) and exp(-2*a*x) via Taylor, to double precision 171 immutable double ax2 = 1.036642960860171859744*x; // 2*a*x 172 immutable double exp2ax = 173 1 + ax2 * (1 + ax2 * (0.5 + 0.166666666666666666667*ax2)); 174 immutable double expm2ax = 175 1 - ax2 * (1 - ax2 * (0.5 - 0.166666666666666666667*ax2)); 176 for (uint n = 1; ; n++) { 177 immutable double coef = expa2n2[n-1] * expx2 / (a2*(n*n) + y*y); 178 prod2ax *= exp2ax; 179 prodm2ax *= expm2ax; 180 sum1 += coef; 181 sum2 += coef * prodm2ax; 182 sum3 += coef * prod2ax; 183 184 // really = sum5 - sum4 185 sum5 += coef * (2*a) * n * sinh_taylor((2*a)*n*x); 186 187 // test convergence via sum3 188 if (coef * prod2ax < double.epsilon * sum3) 189 break; 190 } 191 } 192 else { // x > 5e-4, compute sum4 and sum5 separately 193 expx2 = exp(-x*x); 194 immutable double exp2ax = exp((2*a)*x), expm2ax = 1 / exp2ax; 195 for (int n = 1; 1; ++n) { 196 immutable double coef = expa2n2[n-1] * expx2 / (a2*(n*n) + y*y); 197 prod2ax *= exp2ax; 198 prodm2ax *= expm2ax; 199 sum1 += coef; 200 sum2 += coef * prodm2ax; 201 sum4 += (coef * prodm2ax) * (a*n); 202 sum3 += coef * prod2ax; 203 sum5 += (coef * prod2ax) * (a*n); 204 // test convergence via sum5, since this sum has the slowest decay 205 if ((coef * prod2ax) * (a*n) < double.epsilon * sum5) 206 break; 207 } 208 } 209 immutable double expx2erfcxy = // avoid spurious overflow for large negative y 210 y > -6 // for y < -6, erfcx(y) = 2*exp(y*y) to double precision 211 ? expx2*erfcx(y) : 2*exp(y*y-x*x); 212 if (y > 5) { // imaginary terms cancel 213 immutable double sinxy = sin(x*y); 214 ret = (expx2erfcxy - c*y*sum1) * cos(2*x*y) 215 + (c*x*expx2) * sinxy * sinc(x*y, sinxy); 216 } 217 else { 218 double xs = z.re; 219 immutable double sinxy = sin(xs*y); 220 immutable double sin2xy = sin(2*xs*y), cos2xy = cos(2*xs*y); 221 immutable double coef1 = expx2erfcxy - c*y*sum1; 222 immutable double coef2 = c*xs*expx2; 223 ret = Complex!double(coef1 * cos2xy + coef2 * sinxy * sinc(xs*y, sinxy), 224 coef2 * sinc(2*xs*y, sin2xy) - coef1 * sin2xy); 225 } 226 } 227 else { // x large: only sum3 & sum5 contribute (see above note) 228 229 230 if (isNaN(x)) 231 return Complex!double(x,x); 232 if (isNaN(y)) 233 return Complex!double(y,y); 234 ret = exp(-x*x); // |y| < 1e-10, so we only need exp(-x*x) term 235 // (round instead of ceil as in original paper; note that x/a > 1 here) 236 immutable double n0 = floor(x/a + 0.5); // sum in both directions, starting at n0 237 immutable double dx = a*n0 - x; 238 sum3 = exp(-dx*dx) / (a2*(n0*n0) + y*y); 239 sum5 = a*n0 * sum3; 240 immutable double exp1 = exp(4*a*dx); 241 double exp1dn = 1; 242 int dn; 243 for (dn = 1; n0 - dn > 0; ++dn) { // loop over n0-dn and n0+dn terms 244 immutable double np = n0 + dn, nm = n0 - dn; 245 double tp = exp(-sqr(a*dn+dx)); 246 double tm = tp * (exp1dn *= exp1); // trick to get tm from tp 247 tp /= (a2*(np*np) + y*y); 248 tm /= (a2*(nm*nm) + y*y); 249 sum3 += tp + tm; 250 sum5 += a * (np * tp + nm * tm); 251 if (a * (np * tp + nm * tm) < double.epsilon * sum5) 252 goto finish; 253 } 254 while (1) { // loop over n0+dn terms only (since n0-dn <= 0) 255 immutable double np = n0 + dn++; 256 immutable double tp = exp(-sqr(a*dn+dx)) / (a2*(np*np) + y*y); 257 sum3 += tp; 258 sum5 += a * np * tp; 259 if (a * np * tp < double.epsilon * sum5) 260 goto finish; 261 } 262 } 263 finish: 264 return ret + Complex!double((0.5*c)*y*(sum2+sum3), 265 (0.5*c)*copysign(sum5-sum4, z.re)); 266 } 267 268 private: 269 270 /** 271 auxiliary function 272 */ 273 double sinc(double x, double sinx) @safe @nogc nothrow 274 { 275 // return sinc(x) = sin(x)/x, given both x and sin(x) 276 // [since we only use this in cases where sin(x) has already been computed] 277 return fabs(x) < 1e-4 ? 1 - 0.1666666666666666666667*x*x : sinx / x; 278 } 279 280 ///ditto 281 double sinh_taylor(double x) pure @safe @nogc nothrow 282 { 283 // sinh(x) via Taylor series, accurate to machine precision for |x| < 1e-2 284 immutable x2 = x*x; 285 return x * (1 + (x2) * (0.1666666666666666666667 286 + 0.00833333333333333333333 * (x2))); 287 } 288 289 ///ditto 290 double sqr(double x) pure @safe @nogc nothrow { return x*x; } 291 292 /** 293 precomputed table of expa2n2[n-1] = exp(-a2*n*n) 294 for double-precision a2 = 0.26865... in w_of_z, below. 295 */ 296 immutable double[] expa2n2 = [ 297 7.64405281671221563e-01, 298 3.41424527166548425e-01, 299 8.91072646929412548e-02, 300 1.35887299055460086e-02, 301 1.21085455253437481e-03, 302 6.30452613933449404e-05, 303 1.91805156577114683e-06, 304 3.40969447714832381e-08, 305 3.54175089099469393e-10, 306 2.14965079583260682e-12, 307 7.62368911833724354e-15, 308 1.57982797110681093e-17, 309 1.91294189103582677e-20, 310 1.35344656764205340e-23, 311 5.59535712428588720e-27, 312 1.35164257972401769e-30, 313 1.90784582843501167e-34, 314 1.57351920291442930e-38, 315 7.58312432328032845e-43, 316 2.13536275438697082e-47, 317 3.51352063787195769e-52, 318 3.37800830266396920e-57, 319 1.89769439468301000e-62, 320 6.22929926072668851e-68, 321 1.19481172006938722e-73, 322 1.33908181133005953e-79, 323 8.76924303483223939e-86, 324 3.35555576166254986e-92, 325 7.50264110688173024e-99, 326 9.80192200745410268e-106, 327 7.48265412822268959e-113, 328 3.33770122566809425e-120, 329 8.69934598159861140e-128, 330 1.32486951484088852e-135, 331 1.17898144201315253e-143, 332 6.13039120236180012e-152, 333 1.86258785950822098e-160, 334 3.30668408201432783e-169, 335 3.43017280887946235e-178, 336 2.07915397775808219e-187, 337 7.36384545323984966e-197, 338 1.52394760394085741e-206, 339 1.84281935046532100e-216, 340 1.30209553802992923e-226, 341 5.37588903521080531e-237, 342 1.29689584599763145e-247, 343 1.82813078022866562e-258, 344 1.50576355348684241e-269, 345 7.24692320799294194e-281, 346 2.03797051314726829e-292, 347 3.34880215927873807e-304, 348 0.0 // underflow (also prevents reads past array end, below) 349 ]; 350 351 unittest 352 { 353 static immutable Z = [ 354 Complex!double(624.2,-0.26123), 355 Complex!double(-0.4,3.), 356 Complex!double(0.6,2.), 357 Complex!double(-1.,1.), 358 Complex!double(-1.,-9.), 359 Complex!double(-1.,9.), 360 Complex!double(-0.0000000234545,1.1234), 361 Complex!double(-3.,5.1), 362 Complex!double(-53,30.1), 363 Complex!double(0.0,0.12345), 364 Complex!double(11,1), 365 Complex!double(-22,-2), 366 Complex!double(9,-28), 367 Complex!double(21,-33), 368 Complex!double(1e5,1e5), 369 Complex!double(1e14,1e14), 370 Complex!double(-3001,-1000), 371 Complex!double(1e160,-1e159), 372 Complex!double(-6.01,0.01), 373 Complex!double(-0.7,-0.7), 374 Complex!double(2.611780000000000e+01, 4.540909610972489e+03), 375 Complex!double(0.8e7,0.3e7), 376 Complex!double(-20,-19.8081), 377 Complex!double(1e-16,-1.1e-16), 378 Complex!double(2.3e-8,1.3e-8), 379 Complex!double(6.3,-1e-13), 380 Complex!double(6.3,1e-20), 381 Complex!double(1e-20,6.3), 382 Complex!double(1e-20,16.3), 383 Complex!double(9,1e-300), 384 Complex!double(6.01,0.11), 385 Complex!double(8.01,1.01e-10), 386 Complex!double(28.01,1e-300), 387 Complex!double(10.01,1e-200), 388 Complex!double(10.01,-1e-200), 389 Complex!double(10.01,0.99e-10), 390 Complex!double(10.01,-0.99e-10), 391 Complex!double(1e-20,7.01), 392 Complex!double(-1,7.01), 393 Complex!double(5.99,7.01), 394 Complex!double(1,0), 395 Complex!double(55,0), 396 Complex!double(-0.1,0), 397 Complex!double(1e-20,0), 398 Complex!double(0,5e-14), 399 Complex!double(0,51), 400 Complex!double(double.infinity,0), 401 Complex!double(-double.infinity,0), 402 Complex!double(0,double.infinity), 403 Complex!double(0,-double.infinity), 404 Complex!double(double.infinity,double.infinity), 405 Complex!double(double.infinity,-double.infinity), 406 Complex!double(double.nan,double.nan), 407 Complex!double(double.nan,0), 408 Complex!double(0,double.nan), 409 Complex!double(double.nan,double.infinity), 410 Complex!double(double.infinity,double.nan) 411 ]; 412 /* w(z), computed with WolframAlpha 413 ... note that WolframAlpha is problematic 414 some of the above inputs, so I had to 415 use the continued-fraction expansion 416 in WolframAlpha in some cases, or switch 417 to Maple */ 418 static immutable W = [ 419 Complex!double(-3.78270245518980507452677445620103199303131110e-7, 420 0.000903861276433172057331093754199933411710053155), 421 Complex!double(0.1764906227004816847297495349730234591778719532788, 422 -0.02146550539468457616788719893991501311573031095617), 423 Complex!double(0.2410250715772692146133539023007113781272362309451, 424 0.06087579663428089745895459735240964093522265589350), 425 Complex!double(0.30474420525691259245713884106959496013413834051768, 426 -0.20821893820283162728743734725471561394145872072738), 427 Complex!double(7.317131068972378096865595229600561710140617977e34, 428 8.321873499714402777186848353320412813066170427e34), 429 Complex!double(0.0615698507236323685519612934241429530190806818395, 430 -0.00676005783716575013073036218018565206070072304635), 431 Complex!double(0.3960793007699874918961319170187598400134746631, 432 -5.593152259116644920546186222529802777409274656e-9), 433 Complex!double(0.08217199226739447943295069917990417630675021771804, 434 -0.04701291087643609891018366143118110965272615832184), 435 Complex!double(0.00457246000350281640952328010227885008541748668738, 436 -0.00804900791411691821818731763401840373998654987934), 437 Complex!double(0.8746342859608052666092782112565360755791467973338452, 438 0.), 439 Complex!double(0.00468190164965444174367477874864366058339647648741, 440 0.0510735563901306197993676329845149741675029197050), 441 Complex!double(-0.0023193175200187620902125853834909543869428763219, 442 -0.025460054739731556004902057663500272721780776336), 443 Complex!double(9.11463368405637174660562096516414499772662584e304, 444 3.97101807145263333769664875189354358563218932e305), 445 Complex!double(-4.4927207857715598976165541011143706155432296e281, 446 -2.8019591213423077494444700357168707775769028e281), 447 Complex!double(2.820947917809305132678577516325951485807107151e-6, 448 2.820947917668257736791638444590253942253354058e-6), 449 Complex!double(2.82094791773878143474039725787438662716372268e-15, 450 2.82094791773878143474039725773333923127678361e-15), 451 Complex!double(-0.0000563851289696244350147899376081488003110150498, 452 -0.000169211755126812174631861529808288295454992688), 453 Complex!double(-5.586035480670854326218608431294778077663867e-162, 454 5.586035480670854326218608431294778077663867e-161), 455 Complex!double(0.00016318325137140451888255634399123461580248456, 456 -0.095232456573009287370728788146686162555021209999), 457 Complex!double(0.69504753678406939989115375989939096800793577783885, 458 -1.8916411171103639136680830887017670616339912024317), 459 Complex!double(0.0001242418269653279656612334210746733213167234822, 460 7.145975826320186888508563111992099992116786763e-7), 461 Complex!double(2.318587329648353318615800865959225429377529825e-8, 462 6.182899545728857485721417893323317843200933380e-8), 463 Complex!double(-0.0133426877243506022053521927604277115767311800303, 464 -0.0148087097143220769493341484176979826888871576145), 465 Complex!double(1.00000000000000012412170838050638522857747934, 466 1.12837916709551279389615890312156495593616433e-16), 467 Complex!double(0.9999999853310704677583504063775310832036830015, 468 2.595272024519678881897196435157270184030360773e-8), 469 Complex!double(-1.4731421795638279504242963027196663601154624e-15, 470 0.090727659684127365236479098488823462473074709), 471 Complex!double(5.79246077884410284575834156425396800754409308e-18, 472 0.0907276596841273652364790985059772809093822374), 473 Complex!double(0.0884658993528521953466533278764830881245144368, 474 1.37088352495749125283269718778582613192166760e-22), 475 Complex!double(0.0345480845419190424370085249304184266813447878, 476 2.11161102895179044968099038990446187626075258e-23), 477 Complex!double(6.63967719958073440070225527042829242391918213e-36, 478 0.0630820900592582863713653132559743161572639353), 479 Complex!double(0.00179435233208702644891092397579091030658500743634, 480 0.0951983814805270647939647438459699953990788064762), 481 Complex!double(9.09760377102097999924241322094863528771095448e-13, 482 0.0709979210725138550986782242355007611074966717), 483 Complex!double(7.2049510279742166460047102593255688682910274423e-304, 484 0.0201552956479526953866611812593266285000876784321), 485 Complex!double(3.04543604652250734193622967873276113872279682e-44, 486 0.0566481651760675042930042117726713294607499165), 487 Complex!double(3.04543604652250734193622967873276113872279682e-44, 488 0.0566481651760675042930042117726713294607499165), 489 Complex!double(0.5659928732065273429286988428080855057102069081e-12, 490 0.056648165176067504292998527162143030538756683302), 491 Complex!double(-0.56599287320652734292869884280802459698927645e-12, 492 0.0566481651760675042929985271621430305387566833029), 493 Complex!double(0.0796884251721652215687859778119964009569455462, 494 1.11474461817561675017794941973556302717225126e-22), 495 Complex!double(0.07817195821247357458545539935996687005781943386550, 496 -0.01093913670103576690766705513142246633056714279654), 497 Complex!double(0.04670032980990449912809326141164730850466208439937, 498 0.03944038961933534137558064191650437353429669886545), 499 Complex!double(0.36787944117144232159552377016146086744581113103176, 500 0.60715770584139372911503823580074492116122092866515), 501 Complex!double(0, 502 0.010259688805536830986089913987516716056946786526145), 503 Complex!double(0.99004983374916805357390597718003655777207908125383, 504 -0.11208866436449538036721343053869621153527769495574), 505 Complex!double(0.99999999999999999999999999999999999999990000, 506 1.12837916709551257389615890312154517168802603e-20), 507 Complex!double(0.999999999999943581041645226871305192054749891144158, 508 0), 509 Complex!double(0.0110604154853277201542582159216317923453996211744250, 510 0), 511 Complex!double(0,0), 512 Complex!double(0,0), 513 Complex!double(0,0), 514 Complex!double(double.infinity,0), 515 Complex!double(0,0), 516 Complex!double(double.nan,double.nan), 517 Complex!double(double.nan,double.nan), 518 Complex!double(double.nan,double.nan), 519 Complex!double(double.nan,0), 520 Complex!double(double.nan,double.nan), 521 Complex!double(double.nan,double.nan) 522 ]; 523 commonTest!w_of_z(Z, W); 524 }