*Bounty: 50*

I’m trying to lighten the code review load for the maintainers of boost.math, and I was hoping you guys could help me out. I have a pull request which implements tanh-sinh quadrature, which is provably optimal for integrands in Hardy spaces.

Here’s my code:

which is also reproduced below.

I have a few design worries.

- It is a class and not a function. This is a bit confusing; I worry that people will not recognize that the constructor is doing some one-time calculations to make integrations faster.
- It takes a long time to compile. I generated the abscissas and weights to 100 digits, and then they must be lexically cast to their correct type. I could keep fewer levels of abscissas and weights in the .hpp, but then the runtime would longer for complex integrands.
- For integrands in Hardy spaces, the number of correct digits always doubles on each refinement. However, we want to do just the right amount of work to deliver the requested accuracy, which is almost always overshot.

Interface:

```
#ifndef BOOST_MATH_QUADRATURE_TANH_SINH_HPP
#define BOOST_MATH_QUADRATURE_TANH_SINH_HPP
#include <cmath>
#include <limits>
#include <memory>
#include <boost/math/quadrature/detail/tanh_sinh_detail.hpp>
namespace boost{ namespace math{
template<class Real>
class tanh_sinh
{
public:
tanh_sinh(Real tol = sqrt(std::numeric_limits<Real>::epsilon()), size_t max_refinements = 20);
template<class F>
Real integrate(F f, Real a, Real b, Real* error = nullptr);
private:
std::shared_ptr<detail::tanh_sinh_detail<Real>> m_imp;
};
template<class Real>
tanh_sinh<Real>::tanh_sinh(Real tol, size_t max_refinements) : m_imp(std::make_shared<detail::tanh_sinh_detail<Real>>(tol, max_refinements))
{
return;
}
template<class Real>
template<class F>
Real tanh_sinh<Real>::integrate(F f, Real a, Real b, Real* error)
{
using std::isfinite;
using boost::math::constants::half;
using boost::math::detail::tanh_sinh_detail;
if (isfinite(a) && isfinite(b))
{
if (b <= a)
{
throw std::domain_error("Arguments to integrate are in wrong order; integration over [a,b] must have b > a.n");
}
Real avg = (a+b)*half<Real>();
Real diff = (b-a)*half<Real>();
auto u = [=](Real z) { return f(avg + diff*z); };
return diff*m_imp->integrate(u, error);
}
// Infinite limits:
if (a <= std::numeric_limits<Real>::lowest() && b >= std::numeric_limits<Real>::max())
{
auto u = [=](Real t) { auto t_sq = t*t; auto inv = 1/(1 - t_sq); return f(t*inv)*(1+t_sq)*inv*inv; };
return m_imp->integrate(u, error);
}
// Right limit is infinite:
if (isfinite(a) && b >= std::numeric_limits<Real>::max())
{
auto u = [=](Real t) { auto z = 1/(t+1); auto arg = 2*z + a - 1; return f(arg)*z*z; };
return 2*m_imp->integrate(u, error);
}
if (isfinite(b) && a <= std::numeric_limits<Real>::lowest())
{
auto u = [=](Real t) { return f(b-t);};
auto v = [=](Real t) { auto z = 1/(t+1); auto arg = 2*z - 1; return u(arg)*z*z; };
return 2*m_imp->integrate(v, error);
}
throw std::logic_error("The domain of integration is not sensible; please check the bounds.n");
}
}}
#endif
```

Implementation (with some layers of abscissas and weights removed, for brevity):

```
#ifndef BOOST_MATH_QUADRATURE_DETAIL_TANH_SINH_DETAIL_HPP
#define BOOST_MATH_QUADRATURE_DETAIL_TANH_SINH_DETAIL_HPP
#include <cmath>
#include <vector>
#include <typeinfo>
#include <boost/math/constants/constants.hpp>
#include <boost/math/special_functions/next.hpp>
namespace boost{ namespace math{ namespace detail{
// Returns the tanh-sinh quadrature of a function f over the open interval (-1, 1)
template<class Real>
class tanh_sinh_detail
{
public:
tanh_sinh_detail(Real tol = sqrt(std::numeric_limits<Real>::epsilon()), size_t max_refinements = 15);
template<class F>
Real integrate(F f, Real* error = nullptr);
private:
Real m_tol;
Real m_t_max;
size_t m_max_refinements;
const std::vector<std::vector<Real>> m_abscissas{
{
lexical_cast<Real>("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"), // g(0)
lexical_cast<Real>("0.951367964072746945727055362904639667492765811307212865380079106867050650113429723656597692697291568999499"), // g(1)
lexical_cast<Real>("0.999977477192461592864899636308688849285982470957489530009950811164291603926066803141755297920571692976244"), // g(2)
lexical_cast<Real>("0.999999999999957058389441217592223051901253805502353310119906858210171731776098247943305316472169355401371"), // g(3)
lexical_cast<Real>("0.999999999999999999999999999999999999883235110249013906725663510362671044720752808415603434458608954302982"), // g(4)
lexical_cast<Real>("0.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999988520"), // g(5)
},
{
lexical_cast<Real>("0.674271492248435826080420090632052144267244477154740136576044169121421222924677035505101849981126759183585"), // g(0.5)
lexical_cast<Real>("0.997514856457224386832717419238820368149231215381809295391585321457677448277585202664213469426402672227688"), // g(1.5)
lexical_cast<Real>("0.999999988875664881984668015033322737014902900245095922058323073481945768209599289672119775131473502267885"), // g(2.5)
lexical_cast<Real>("0.999999999999999999999946215084086063112254432391666747077319911504923816981286361121293026490456057993796"), // g(3.5)
lexical_cast<Real>("0.999999999999999999999999999999999999999999999999999999999999920569786807778838966034206923747918174840316"), // g(4.5)
},
};
const std::vector<std::vector<Real>> m_weights{
{
lexical_cast<Real>("1.570796326794896619231321691639751442098584699687552910487472296153908203143104499314017412671058533991074"), // g'(0)
lexical_cast<Real>("0.230022394514788685000412470422321665303851255802659059205889049267344079034811721955914622224110769925389"), // g'(1)
lexical_cast<Real>("0.000266200513752716908657010159372233158103339260303474890448151422406465941700219597184051859505048039379"), // g'(2)
lexical_cast<Real>("0.000000000001358178427453909083422196787474500211182703205221379182701148467473091863958082265061102415190"), // g'(3)
lexical_cast<Real>("0.000000000000000000000000000000000010017416784066252963809895613167040078319571113599666377944404151233916"), // g'(4)
lexical_cast<Real>("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002676308"), // g'(5)
},
{
lexical_cast<Real>("0.965976579412301148012086924538029475282925173953839319280640651228612016942374156051084481340637789686773"), // g'(0.5)
lexical_cast<Real>("0.018343166989927842087331266912053799182780844891859123704139702537001454134135727608312038892655885289502"), // g'(1.5)
lexical_cast<Real>("0.000000214312045569430393576972333072321177878392994404158296872167216420137367626015660887389125958297359"), // g'(2.5)
lexical_cast<Real>("0.000000000000000000002800315101977588958258001699217015336310581249269449114860803391121477177123095973212"), // g'(3.5)
lexical_cast<Real>("0.000000000000000000000000000000000000000000000000000000000011232705345486918789827474356787339538750684404"), // g'(4.5)
},
};
};
template<class Real>
tanh_sinh_detail<Real>::tanh_sinh_detail(Real tol, size_t max_refinements)
{
m_tol = tol;
m_max_refinements = max_refinements;
/*
* Our goal is to calculate t_max such that tanh(pi/2 sinh(t_max)) < 1 in the requested precision.
* What follows is a good estimate for t_max, but in fact we can get closer by starting with this estimate
* and then calculating tanh(pi/2 sinh(t_max + eps)) until it = 1 (the second to last is t_max).
* However, this is computationally expensive, so we can't do it.
* An alternative is to cache the value of t_max for various types (float, double, long double, float128, cpp_bin_float_50, cpp_bin_float_100)
* and then simply use them, but unfortunately the values will be platform dependent.
* As such we are then susceptible to the catastrophe where we evaluate the function at x = 1, when we have promised we wouldn't do that.
* Other authors solve this problem by computing the abscissas in double the requested precision, and then returning the result at the request precision.
* This seems to be overkill to me, but presumably it's an option if we find integrals on which this method struggles.
*/
using std::tanh;
using std::sinh;
using std::asinh;
using std::atanh;
using boost::math::constants::half_pi;
using boost::math::constants::two_div_pi;
auto g = [](Real t) { return tanh(half_pi<Real>()*sinh(t)); };
auto g_inv = [](Real x) { return asinh(two_div_pi<Real>()*atanh(x)); };
Real x = float_prior((Real) 1);
m_t_max = g_inv(x);
while(!isnormal(m_t_max))
{
// Although you might suspect that we could be in this loop essentially for ever, in point of fact it is only called once
// even for 100 digit accuracy, and isn't called at all up to float128.
x = float_prior(x);
m_t_max = g_inv(x);
}
// This occurs once on 100 digit arithmetic:
while(!(g(m_t_max) < (Real) 1))
{
x = float_prior(x);
m_t_max = g_inv(x);
}
}
template<class Real>
template<class F>
Real tanh_sinh_detail<Real>::integrate(F f, Real* error)
{
using std::abs;
using std::floor;
using std::tanh;
using std::sinh;
using std::sqrt;
using boost::math::constants::half;
using boost::math::constants::half_pi;
Real h = 1;
Real I0 = half_pi<Real>()*f(0);
Real IL0 = abs(I0);
for(size_t i = 1; i <= m_t_max; ++i)
{
Real x = m_abscissas[0][i];
Real w = m_weights[0][i];
Real yp = f(x);
Real ym = f(-x);
I0 += (yp + ym)*w;
IL0 += (abs(yp) + abs(ym))*w;
}
Real I1 = half<Real>()*I0;
Real IL1 = abs(I1);
h /= 2;
Real sum = 0;
Real absum = 0;
for(size_t i = 0; i < m_weights[1].size() && h + i <= m_t_max; ++i)
{
Real x = m_abscissas[1][i];
Real w = m_weights[1][i];
Real yp = f(x);
Real ym = f(-x);
sum += (yp + ym)*w;
absum += (abs(yp) + abs(ym))*w;
}
I1 += sum*h;
IL1 += sum*h;
size_t k = 2;
Real err = abs(I0 - I1);
while (k < 4 || (k < m_weights.size() && k < m_max_refinements) )
{
I0 = I1;
IL0 = IL1;
I1 = half<Real>()*I0;
IL1 = half<Real>()*IL0;
h = (Real) 1/ (Real) (1 << k);
Real sum = 0;
Real absum = 0;
auto const& abscissa_row = m_abscissas[k];
auto const& weight_row = m_weights[k];
// We have no guarantee that round-off error won't cause the function to be evaluated strictly at the endpoints.
// However, making sure x < 1 - eps is a reasonable compromise between accuracy and usability..
for(size_t j = 0; j < weight_row.size() && abscissa_row[j] < (Real) 1 - std::numeric_limits<Real>::epsilon(); ++j)
{
Real x = abscissa_row[j];
Real w = weight_row[j];
Real yp = f(x);
Real ym = f(-x);
Real term = (yp + ym)*w;
sum += term;
// A question arises as to how accurately we actually need to estimate the L1 integral.
// For simple integrands, computing the L1 norm makes the integration 20% slower,
// but for more complicated integrands, this calculation is not noticeable.
Real abterm = (abs(yp) + abs(ym))*w;
absum += abterm;
}
I1 += sum*h;
IL1 += absum*h;
++k;
err = abs(I0 - I1);
if (err <= m_tol*IL1)
{
if (error)
{
*error = err;
}
return I1;
}
}
if (!isnormal(I1))
{
throw std::domain_error("The tanh_sinh integrate evaluated your function at a singular point. Please narrow the bounds of integration or chech your function for singularities.n");
}
while (k < m_max_refinements && err > m_tol*IL1)
{
I0 = I1;
IL0 = IL1;
I1 = half<Real>()*I0;
IL1 = half<Real>()*IL0;
h *= half<Real>();
Real sum = 0;
Real absum = 0;
for(Real t = h; t < m_t_max - std::numeric_limits<Real>::epsilon(); t += 2*h)
{
Real s = sinh(t);
Real c = sqrt(1+s*s);
Real x = tanh(half_pi<Real>()*s);
Real w = half_pi<Real>()*c*(1-x*x);
Real yp = f(x);
Real ym = f(-x);
Real term = (yp + ym)*w;
sum += term;
Real abterm = (abs(yp) + abs(ym))*w;
absum += abterm;
// There are claims that this test improves performance,
// however my benchmarks show that it's slower!
// However, I leave this comment here because it totally stands to reason that this *should* help:
//if (abterm < std::numeric_limits<Real>::epsilon()) { break; }
}
I1 += sum*h;
IL1 += absum*h;
++k;
err = abs(I0 - I1);
if (!isnormal(I1))
{
throw std::domain_error("The tanh_sinh integrate evaluated your function at a singular point. Please narrow the bounds of integration or chech your function for singularities.n");
}
}
if (error)
{
*error = err;
}
return I1;
}
}}}
#endif
```

For those of you interested in performance, I have used google benchmark to measure the runtime, which is reproduced below:

```
#include <cmath>
#include <iostream>
#include <random>
#include <boost/math/quadrature/tanh_sinh.hpp>
#include <boost/math/constants/constants.hpp>
#include <boost/multiprecision/float128.hpp>
#include <boost/multiprecision/cpp_bin_float.hpp>
#include <boost/multiprecision/cpp_dec_float.hpp>
#include <benchmark/benchmark.h>
using boost::math::tanh_sinh;
using boost::math::constants::half_pi;
using boost::multiprecision::float128;
using boost::multiprecision::cpp_bin_float_50;
using boost::multiprecision::cpp_bin_float_100;
using boost::multiprecision::cpp_dec_float_50;
using boost::multiprecision::cpp_dec_float_100;
template<class Real>
static void BM_tanh_sinh_sqrt_log(benchmark::State& state)
{
using std::sqrt;
using std::log;
auto f = [](Real t) { return sqrt(t)*log(t); };
Real Q;
Real tol = 100*std::numeric_limits<Real>::epsilon();
tanh_sinh<Real> integrator(tol, 20);
while(state.KeepRunning())
{
benchmark::DoNotOptimize(Q = integrator.integrate(f, (Real) 0, (Real) 1));
}
}
template<class Real>
static void BM_tanh_sinh_linear(benchmark::State& state)
{
auto f = [](Real t) { return 5*t + 7; };
Real Q;
Real tol = 100*std::numeric_limits<Real>::epsilon();
tanh_sinh<Real> integrator(tol, 20);
while(state.KeepRunning())
{
benchmark::DoNotOptimize(Q = integrator.integrate(f, (Real) 0, (Real) 1));
}
}
template<class Real>
static void BM_tanh_sinh_quadratic(benchmark::State& state)
{
auto f = [](Real t) { return 5*t*t + 3*t + 7; };
Real Q;
Real tol = 100*std::numeric_limits<Real>::epsilon();
tanh_sinh<Real> integrator(tol, 20);
while(state.KeepRunning())
{
benchmark::DoNotOptimize(Q = integrator.integrate(f, (Real) -1, (Real) 1));
}
}
template<class Real>
static void BM_tanh_sinh_runge(benchmark::State& state)
{
auto f = [](Real t) { return 1/(1+25*t*t); };
Real Q;
Real tol = 100*std::numeric_limits<Real>::epsilon();
tanh_sinh<Real> integrator(tol, 20);
while(state.KeepRunning())
{
benchmark::DoNotOptimize(Q = integrator.integrate(f, (Real) -1, (Real) 1));
}
}
template<class Real>
static void BM_tanh_sinh_runge_move_pole(benchmark::State& state)
{
auto f = [](Real t) { Real tmp = t/5; return 1/(1+tmp*tmp); };
Real Q;
Real tol = 100*std::numeric_limits<Real>::epsilon();
tanh_sinh<Real> integrator(tol, 20);
while(state.KeepRunning())
{
benchmark::DoNotOptimize(Q = integrator.integrate(f, (Real) -1, (Real) 1));
}
}
template<class Real>
static void BM_tanh_sinh_exp_cos(benchmark::State& state)
{
auto f = [](Real t) { return exp(t)*cos(t); };
Real Q;
Real tol = 100*std::numeric_limits<Real>::epsilon();
tanh_sinh<Real> integrator(tol, 20);
while(state.KeepRunning())
{
benchmark::DoNotOptimize(Q = integrator.integrate(f, (Real) 0, half_pi<Real>()));
}
}
template<class Real>
static void BM_tanh_sinh_horrible(benchmark::State& state)
{
auto f = [](Real x) { return x*sin(2*exp(2*sin(2*exp(2*x) ) ) ); };
Real Q;
Real tol = 100*std::numeric_limits<Real>::epsilon();
tanh_sinh<Real> integrator(tol, 20);
while(state.KeepRunning())
{
benchmark::DoNotOptimize(Q = integrator.integrate(f, (Real) -1, (Real) 1));
}
}
BENCHMARK_TEMPLATE(BM_tanh_sinh_sqrt_log, float);
BENCHMARK_TEMPLATE(BM_tanh_sinh_sqrt_log, double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_sqrt_log, long double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_sqrt_log, float128);
BENCHMARK_TEMPLATE(BM_tanh_sinh_sqrt_log, cpp_bin_float_50);
BENCHMARK_TEMPLATE(BM_tanh_sinh_sqrt_log, cpp_bin_float_100);
BENCHMARK_TEMPLATE(BM_tanh_sinh_linear, float);
BENCHMARK_TEMPLATE(BM_tanh_sinh_linear, double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_linear, long double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_linear, float128);
BENCHMARK_TEMPLATE(BM_tanh_sinh_linear, cpp_bin_float_50);
BENCHMARK_TEMPLATE(BM_tanh_sinh_linear, cpp_bin_float_100);
BENCHMARK_TEMPLATE(BM_tanh_sinh_quadratic, float);
BENCHMARK_TEMPLATE(BM_tanh_sinh_quadratic, double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_quadratic, long double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_quadratic, float128);
BENCHMARK_TEMPLATE(BM_tanh_sinh_runge, float);
BENCHMARK_TEMPLATE(BM_tanh_sinh_runge, double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_runge, long double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_runge, float128);
BENCHMARK_TEMPLATE(BM_tanh_sinh_runge_move_pole, float);
BENCHMARK_TEMPLATE(BM_tanh_sinh_runge_move_pole, double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_runge_move_pole, long double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_runge_move_pole, float128);
BENCHMARK_TEMPLATE(BM_tanh_sinh_exp_cos, float);
BENCHMARK_TEMPLATE(BM_tanh_sinh_exp_cos, double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_exp_cos, long double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_exp_cos, float128);
BENCHMARK_TEMPLATE(BM_tanh_sinh_exp_cos, cpp_bin_float_50);
BENCHMARK_TEMPLATE(BM_tanh_sinh_exp_cos, cpp_bin_float_100);
BENCHMARK_TEMPLATE(BM_tanh_sinh_horrible, float);
BENCHMARK_TEMPLATE(BM_tanh_sinh_horrible, double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_horrible, long double);
BENCHMARK_TEMPLATE(BM_tanh_sinh_horrible, float128);
BENCHMARK_TEMPLATE(BM_tanh_sinh_horrible, cpp_bin_float_50);
BENCHMARK_TEMPLATE(BM_tanh_sinh_horrible, cpp_bin_float_100);
BENCHMARK_MAIN();
/*
---------------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------------
BM_tanh_sinh_sqrt_log<float> 550 ns 550 ns 1214044
BM_tanh_sinh_sqrt_log<double> 9004 ns 9003 ns 77327
BM_tanh_sinh_sqrt_log<long double> 3635 ns 3635 ns 192432
BM_tanh_sinh_sqrt_log<float128> 342661 ns 342653 ns 2043
BM_tanh_sinh_sqrt_log<cpp_bin_float_50> 5940813 ns 5940664 ns 117
BM_tanh_sinh_sqrt_log<cpp_bin_float_100> 41784341 ns 41783310 ns 17
BM_tanh_sinh_linear<float> 33 ns 33 ns 20972925
BM_tanh_sinh_linear<double> 150 ns 150 ns 4655756
BM_tanh_sinh_linear<long double> 347 ns 347 ns 2019473
BM_tanh_sinh_linear<float128> 41586 ns 41585 ns 16807
BM_tanh_sinh_linear<cpp_bin_float_50> 147107 ns 147104 ns 4753
BM_tanh_sinh_linear<cpp_bin_float_100> 482590 ns 482581 ns 1452
BM_tanh_sinh_quadratic<float> 79 ns 79 ns 8846856
BM_tanh_sinh_quadratic<double> 183 ns 183 ns 3828752
BM_tanh_sinh_quadratic<long double> 424 ns 424 ns 1651417
BM_tanh_sinh_quadratic<float128> 58832 ns 58831 ns 11778
BM_tanh_sinh_runge<float> 340 ns 340 ns 2061682
BM_tanh_sinh_runge<double> 17312 ns 17311 ns 40403
BM_tanh_sinh_runge<long double> 36697 ns 36696 ns 19071
BM_tanh_sinh_runge<float128> 2984174 ns 2984116 ns 236
BM_tanh_sinh_runge_move_pole<float> 158 ns 158 ns 4431412
BM_tanh_sinh_runge_move_pole<double> 777 ns 777 ns 896286
BM_tanh_sinh_runge_move_pole<long double> 1095 ns 1095 ns 636425
BM_tanh_sinh_runge_move_pole<float128> 80297 ns 80295 ns 8678
BM_tanh_sinh_exp_cos<float> 5685 ns 5685 ns 121337
BM_tanh_sinh_exp_cos<double> 55022 ns 55021 ns 12558
BM_tanh_sinh_exp_cos<long double> 71875 ns 71874 ns 9663
BM_tanh_sinh_exp_cos<float128> 379522 ns 379514 ns 1848
BM_tanh_sinh_exp_cos<cpp_bin_float_50> 4538073 ns 4537984 ns 156
BM_tanh_sinh_exp_cos<cpp_bin_float_100> 33965946 ns 33965260 ns 21
BM_tanh_sinh_horrible<float> 427490 ns 427478 ns 1633
BM_tanh_sinh_horrible<double> 572976 ns 572966 ns 1214
BM_tanh_sinh_horrible<long double> 1346058 ns 1346033 ns 516
BM_tanh_sinh_horrible<float128> 22030156 ns 22029403 ns 32
BM_tanh_sinh_horrible<cpp_bin_float_50> 635390418 ns 635373654 ns 1
BM_tanh_sinh_horrible<cpp_bin_float_100> 4867960245 ns 4867844329 ns 1
*/
```

These numbers were created using an Intel Xeon E3-1230, and the benchmarks can be compiled with

```
CXX = g++
INC=-I/path/to/boost
all: perf.x
perf.x: perf.o
$(CXX) -o $@ $< -lbenchmark -pthread -lquadmath
perf.o: perf.cpp
$(CXX) --std=c++14 -fext-numeric-literals -Wfatal-errors -g -O3 $(INC) -c $< -o $@
clean:
rm -f *.x *.o
```

Get this bounty!!!