#StackBounty: #c++ #cuda Shaders made with CUDA

Bounty: 50

While I know that Shaders are often made with glsl or hlsl because that is want those languages were made for I wanted to see if there was another way to accomplish Shaders while also using the GPU. as a result I decided to use CUDA for the Shaders and win32 for the actual window. also most of the Shaders were ported from shader-toy so that is why they look different compared to the rest of the code.

main.cu

/* standard headers */
#include <cstdint>
#include <cstddef>

/* windows headers */
#include <windows.h>

/* cuda headers */
#include <cuda_runtime.h>

/* glm headers */
#define GLM_FORCE_SWIZZLE
#include <glm/glm.hpp>

/* project headers */
#include "shaders.cuh"

/* global variables */
namespace
{
    constexpr std::uint32_t width = 1920 / 2;
    constexpr std::uint32_t height = 1080 / 2;
    constexpr std::int64_t milliseconds_in_a_second = 1000;
    bool running = true;
    std::uint32_t *image = nullptr;
    std::uint32_t *device_image = nullptr;
    HBITMAP bitmap = nullptr;
    HBITMAP old_bitmap = nullptr;
    HDC bitmap_device_context = nullptr;
    std::uint32_t window_width;
    std::uint32_t window_height;
    dim3 const threadsPerBlock{ 32, 32 };
    dim3 numBlocks = { 0 };
}

void initialize_globals(HWND window_handle)
{
    /* get window rect */
    RECT rect;
    GetClientRect(window_handle, &rect);

    /* get the window width and height */
    window_width = rect.right - rect.left;
    window_height = rect.bottom - rect.top;

    /* calculate the number of blocks and also use ceil rounding */
    numBlocks = dim3{ (window_width + threadsPerBlock.x - 1) / threadsPerBlock.x, (window_height + threadsPerBlock.x - 1) / threadsPerBlock.y };

    /* create a DIB */
    HDC const temp_hdc = CreateCompatibleDC(nullptr);
    BITMAPINFO bitmap_info = { 0 };
    bitmap_info.bmiHeader.biSize = sizeof(bitmap_info);
    bitmap_info.bmiHeader.biWidth = window_width;
    bitmap_info.bmiHeader.biHeight = window_height;
    bitmap_info.bmiHeader.biPlanes = 1;
    bitmap_info.bmiHeader.biBitCount = 32;
    bitmap_info.bmiHeader.biCompression = BI_RGB;
    bitmap_info.bmiHeader.biSizeImage = 0;
    bitmap = CreateDIBSection(temp_hdc, &bitmap_info, DIB_RGB_COLORS, reinterpret_cast<void **>(&image), nullptr, 0);
    bitmap_device_context = CreateCompatibleDC(nullptr);
    old_bitmap = static_cast<HBITMAP>(SelectObject(bitmap_device_context, bitmap));

    /* allocate memory for the image on the device */
    cudaMalloc(&device_image, sizeof(std::uint32_t) * window_width * window_height);

    /* cleanup */
    DeleteDC(temp_hdc);
}

void cleanup_globals()
{
    SelectObject(bitmap_device_context, old_bitmap);
    DeleteDC(bitmap_device_context);
    DeleteObject(bitmap);
}

void blt_bitmap(HWND const window_handle, HDC const device_context)
{
    BitBlt(device_context, 0, 0, window_width,
        window_height, bitmap_device_context, 0, 0, SRCCOPY);
}

std::int64_t milliseconds_now()
{
    static LARGE_INTEGER frequency;
    static bool use_qpc = QueryPerformanceFrequency(&frequency);
    if (use_qpc)
    {
        LARGE_INTEGER now;
        QueryPerformanceCounter(&now);
        return (milliseconds_in_a_second * now.QuadPart) / frequency.QuadPart;
    }
    else
    {
        return GetTickCount();
    }
}

LRESULT CALLBACK WinProc(HWND window_handle, UINT message, WPARAM wParam, LPARAM lParam)
{
    switch (message)
    {
        case WM_PAINT:
        {
            /* redraw the screen */
            PAINTSTRUCT ps;
            HDC hdc = BeginPaint(window_handle, &ps);
            blt_bitmap(window_handle, hdc);
            EndPaint(window_handle, &ps);
        } break;
        case WM_CREATE:
        {
            initialize_globals(window_handle);
        } break;
        case WM_QUIT:
        case WM_CLOSE:
        case WM_DESTROY:
        {
            running = false;
            PostQuitMessage(0);
        } break;
        default:
        {
            return DefWindowProcA(window_handle, message, wParam, lParam);
        } break;
    }
    return 0;
}

template<typename T>
__global__
void generate_image_kernel(std::uint32_t *const image, std::uint32_t const image_width, std::uint32_t const image_height, float time, T pixel_function)
{
    std::uint32_t x = threadIdx.x + blockIdx.x * blockDim.x;
    std::uint32_t y = threadIdx.y + blockIdx.y * blockDim.y;

    /* check if y or x is out of bounds */
    if (x >= image_width || y >= image_height) return;

    glm::u8vec3 const pixel_color{ glm::round(glm::clamp(pixel_function(glm::vec2{x, y}, glm::vec2{image_width, image_height}, time), 0.0f, 1.0f) * 255.0f) };
    image[y * image_width + x] = pixel_color.r << 16 | pixel_color.g << 8 | pixel_color.b;
}

template<typename T>
void generate_image(float time, T &&pixel_function)
{
    generate_image_kernel <<< numBlocks, threadsPerBlock >>>(device_image, window_width, window_height, time, std::forward<T>(pixel_function));
}

int __stdcall WinMain(HINSTANCE hInstance, HINSTANCE, LPSTR, int)
{
    /* create a window class */
    WNDCLASSEXA wndclassex = { };
    wndclassex.cbSize = sizeof(wndclassex),
        wndclassex.style = CS_HREDRAW | CS_VREDRAW,
        wndclassex.lpfnWndProc = &WinProc,
        wndclassex.cbClsExtra = 0,
        wndclassex.cbWndExtra = 0,
        wndclassex.hInstance = hInstance,
        wndclassex.hIcon = LoadIconA(hInstance, IDI_APPLICATION),
        wndclassex.hCursor = LoadCursorA(nullptr, IDC_ARROW),
        wndclassex.hbrBackground = (HBRUSH)(COLOR_BACKGROUND),
        wndclassex.lpszMenuName = nullptr,
        wndclassex.lpszClassName = "Class Name",
        wndclassex.hIconSm = LoadIconA(hInstance, IDI_APPLICATION);

    if (RegisterClassExA(&wndclassex) == 0)
    {
        ExitProcess(GetLastError());
    }
    else
    {

        /* create a window */
        HWND window_handle = CreateWindowA(wndclassex.lpszClassName,
            "This is the title of the window",
            WS_OVERLAPPEDWINDOW ^ WS_THICKFRAME ^ WS_MAXIMIZEBOX, /* we don't want resizing */
            CW_USEDEFAULT, CW_USEDEFAULT,
            width, height,
            nullptr, nullptr,
            hInstance, nullptr);
        if (window_handle == nullptr)
        {
            ExitProcess(GetLastError());
        }
        else
        {
            /* start timers */
            std::uint64_t start_time = milliseconds_now();

            /* get device context */
            HDC device_context = GetDC(window_handle);

            /* show the window */
            ShowWindow(window_handle, SW_SHOWDEFAULT);

            /* setup */
            UpdateWindow(window_handle);

            MSG msg;
            for (;;)
            {
                if (PeekMessageA(&msg, nullptr, 0, 0, PM_REMOVE))
                {
                    TranslateMessage(&msg);
                    DispatchMessageA(&msg);
                    if (msg.message == WM_QUIT) running = false;
                }

                /* exit the loop if we are no longer running */
                if (running == false) break;

                {
                    std::uint64_t elapsed_time = milliseconds_now() - start_time;
                    float time = elapsed_time/(float)milliseconds_in_a_second;

                    /* generate the image */
                    generate_image(time, [=] __device__(auto const &coords, auto const &res, auto const &time) {
                        return + shader::one_way_trip(coords, res, time);
                    });

                    /* copy the memory from the device to the host */
                    cudaMemcpy(image, device_image,
                        sizeof(std::uint32_t) * window_width * window_height, cudaMemcpyDeviceToHost);
                    blt_bitmap(window_handle, device_context);
                }
            }

            /* cleanup and exit */
            ReleaseDC(window_handle, device_context);
            cleanup_globals();
            DestroyWindow(window_handle);
            UnregisterClassA(wndclassex.lpszClassName, hInstance);
            ExitProcess(msg.message);
        }
    }
}

shaders.cuh

#pragma once

/* note: most of these are from shader toy */
namespace shader
{
    /* while glm already has glm::pow for vec3 it does not seem to work for nvcc with -std=c++17 enabled */
    namespace detail
    {
        __device__ glm::vec3 pow(glm::vec3 const &base, glm::vec3 const &exponent)
        {
            return { ::pow(base.x, exponent.x), ::pow(base.y, exponent.y), ::pow(base.z, exponent.z) };
        }
    }
    
    __device__ glm::vec3 one_way_trip(glm::vec2 const &coords, glm::vec2 res, float iTime)
    {
        using namespace glm;
        // -------------------------------------------------------------------------------------------

// "ONE WAY TRIP" by PVM

// Shadertoy version of our 4k intro entry for Evoke 2019

// Code: Kali
// Music: Uctumi

// NOTE: Rewind the shader after it starts to correct audio sync

// -------------------------------------------------------------------------------------------

// Original code without optimizations for the 4k intro and including PVM logo

// global variables
        float det = .005f, fin = 0.f, time; // raymarching threshold, aux variable
        const float maxdist = 60.f; // max distance for raymarching
        vec3 ldir = vec3(0.f, 1.f, 4.f); // light direction (without normalization)
        vec3 fcol; // global for coloring
        vec3 suncol = vec3(2.f, 1.f, .6f); // sun color

        // 2D rotation functions
        auto rot2D = [](float a) {
            float s = sin(a);
            float c = cos(a);
            return mat2(c, s, -s, c);
        };

        // A version with degrees used when designing the logo
        auto rot2Ddeg = [](float a) {
            a = radians(a);
            float s = sin(a);
            float c = cos(a);
            return mat2(c, s, -s, c);
        };


        // -------------------------------------------------------------------------------------------

        // PVM LOGO


        // 2D rectangle with a tilt distortion value
        auto rect = [](vec2 p, vec2 b, float inc) {
            p.x += p.y * inc;
            vec2 d = abs(p) - b;
            return length(max(d, vec2(0))) + min(max(d.x, d.y), 0.0f);
        };

        // 2D triangle function by iq (I think)
        auto tri = [&](vec2 p, vec2 q, float ang) {
            p = p * rot2Ddeg(ang);
            p.x = abs(p.x);

            vec2 a = p - q * clamp(dot(p, q) / dot(q, q), 0.0f, 1.0f);
            vec2 b = p - q * vec2(clamp(p.x / q.x, 0.0f, 1.0f), 1.0f);
            float s = -sign(q.y);
            vec2 d = min(vec2(dot(a, a), s * (p.x * q.y - p.y * q.x)),
                vec2(dot(b, b), s * (p.y - q.y)));

            return -sqrt(d.x) * sign(d.y);
        };


        // Here the logo is constructed from distoted rectangles and triangles
        auto logo = [&](vec2 uv) {
            uv *= 1.2f;
            uv.x *= 1.15f;
            uv.y -= .6f;
            uv.x -= 1.3f;
            float d = rect(uv, vec2(.045, .25), -.5f);
            uv.x += .25f;
            uv.y += .01f;
            d = min(d, rect(uv, vec2(.045, .24), .5f));
            uv.x += .265f;
            uv.y -= .04f;
            d = min(d, rect(uv, vec2(.045, .2), -.55f));
            uv.x -= .73f;
            uv.y -= .06f;
            d = min(d, rect(uv, vec2(.045, .16), .4f));
            uv.x -= .105f;
            uv.y += .074f;
            d = min(d, rect(uv, vec2(.045, .085), -.45f));
            uv.x -= .105f;
            uv.y += .045f;
            d = min(d, rect(uv, vec2(.045, .13), .45f));
            uv.x -= .25f;
            uv.y += .1f;
            d = min(d, rect(uv, vec2(.18, .03), .0f));
            uv.x += 1.32f;
            uv.y -= .18f;
            d = min(d, rect(uv + vec2(0.0f, .03f), vec2(.35f, .03f), .0f));
            uv.x -= .5165f;
            uv.y += .4f;
            d = min(d, tri(uv, vec2(.09, .185), 0.f));
            uv.x -= .492f;
            uv.y -= .56f;
            d = min(d, tri(uv, vec2(.063, .14), 180.f));
            uv.x += .225f;
            uv.y -= .17f;
            d = min(d, tri(uv, vec2(.063, .145), 180.f));
            uv.x -= .142f;
            uv.y += .555f;
            d = min(d, tri(uv, vec2(.063, .145), 0.f));
            uv.x += .985f;
            uv.y += .075f;
            vec2 uvd = uv - vec2(uv.y, 0.f);
            d = min(d, tri(uvd - vec2(0.003, 0.022), vec2(.04, .05), 0.f));
            uv.x -= .16f;
            uv.y -= .63f;
            uvd = uv + vec2(uv.y * .4f, 0.f);
            d = min(d, tri(uvd + vec2(.03f, 0.f), vec2(.07, .23), -145.f));
            uvd = uv + vec2(.465f, .33f);
            uvd = uvd * rot2Ddeg(27.f);
            uvd -= vec2(uvd.y * .5f * sign(uvd.x), 0.);
            d = min(d, rect(uvd, vec2(.08f, .03f), .0f));
            uvd = uv + vec2(-1.43f, .534f);
            uvd = uvd * rot2Ddeg(206.f);
            uvd -= vec2(uvd.y * .5f * sign(uvd.x), 0.f);
            d = min(d, rect(uvd, vec2(.08f, .03f), .0f));
            float s = pow(abs(d) + .9f, 10.f);
            uvd = uv + vec2(-.28f, .36f);
            uvd = uvd * rot2Ddeg(50.f);
            d = max(d, -rect(uvd, vec2(.1f, .025f), .0f));
            // logo coloring, RGBA 
            float o = 1.f - smoothstep(0.f, .01f, d);
            float l = 1.f - smoothstep(0.f, .05f, d);
            vec3 col = mix(vec3(2.f, .15f, .1f), vec3(1.f, 2.f, .5f), min(1.f, abs(uv.y + .4f)));
            return vec4(col * o + .1f, l);
        };


        // -------------------------------------------------------------------------------------------

        // FRACTAL 

        // A bunch of sin and cos that defines the curves of the fractal path
        // it returns the displacement at a given point. freq was used to explore diferent scales 
        auto pitpath = [](float ti) {
            float freq = .5f;
            ti *= freq;
            float x = cos(cos(ti * .35682f) + ti * .2823f) * cos(ti * .1322f) * 1.5f;
            float y = sin(ti * .166453f) * 4.f + cos(cos(ti * .125465f) + ti * .17354f) * cos(ti * .05123f) * 2.f;
            vec3  p = vec3(x, y, ti / freq);
            return p;
        };

        // Distance Estimation function

        auto de = [&](vec3 pos) {
            float x = 1.f - smoothstep(5.f, 8.f, abs(pos.x)); // aux variable used for washing the colors away from the path in the fractal
            pos.y += .9f; // offset y position for the fractal object
            pos.xy -= pitpath(pos.z).xy(); // distortion of coordinates based on the path function
            mat2 rot = rot2D(.5f); // rotation matrix used in the fractal iteration loop
            float organic = smoothstep(.5f, 1.f, -sin(pos.z * .005f)); // used for the "organic" part of the fractal
            mat2 rot2 = rot2D(organic * .4f); // rotation matrix used in the fractal iteration loop, it mutates the fractal to kinda "organic" shapes
            float fold = 2.6f + sin(pos.z * .01f) + organic * 1.5f; // fold is a parameter for one of the operations inside the fractal loop
            pos.x += pow(organic, .2f) * fold * .75f; // x offset for the organic part
            pos.y += organic * .3f; // y offset for the organic part
            pos.z = abs(5.f - mod(pos.z, 10.f)); // tiling for repeating the fractal along the z axis
            pos.x = abs(10.f - mod(pos.x + 10.f, 20.f)); // tiling for reapeating along x axis
            pos.x -= fold; // x offset to center the fractal
            vec4 p = vec4(pos, 1.f); // w value will be used for calculating the derivative
            vec3 m = vec3(1000.f); // for orbit trap coloring
            int it = int(8.f + fin * 2.f); // gives more iterations to the fractal at the end
            // Amazing Surface fractal formula by Kali
            // Derived from Mandelbox by tglad
            for (int i = 0; i < it; i++)
            {
                p.xz = clamp(p.xz(), -vec2(fold, 2.f), vec2(fold, 2.f)) * 2.0f - p.xz; // fold transform on xz plane
                p.xyz -= vec3(.5f, .8, 1.); // translation transform
                p = p * (2.f - organic * .2f) / clamp(dot(p.xyz(), p.xyz()), .25f, 1.f) - vec4(2.f, .5f, -1.f, 0.f) * x; // scale + spheric fold + translation transform
                // rotation transforms
                p.xy = p.xy() * rot;
                p.xz = p.xz() * rot2; // rotations on xz and yz give the "organic" feel for the "alien reefs" part
                p.yz = p.yz() * rot2;
                m = min(m, abs(p.xyz())); // saves the minimum value of the position during the iteration, used for "orbit traps" coloring
            }
            // fractal coloring (fcol global variable)
            fcol = vec3(1.f, .3f, .0f) * m * x;
            fcol = max(fcol, length(p) * .0015f * vec3(1.f, .9f, .8f) * (1.f - fin)) * (1.f + organic * .5f);
            return (max(p.x, p.y) / p.w - .025f * (1.f - fin)) * .85f; // returns the distance estimation to the fractal's surface, with some adjustment towards the end
        };


        // -------------------------------------------------------------------------------------------

        // RAYMARCHING

        // Returns the perpendicular vector to the surface at a given point
        auto normal = [&](vec3 p) {
            vec3 d = vec3(0.f, det * 2.f, 0.f);
            return normalize(vec3(de(p - d.yxx()), de(p - d.xyx()), de(p - d.xxy())) - de(p));
        };


        // Ambient occlusion and soft shadows, classic iq's methods

        auto ao = [&](vec3 p, vec3 n) {
            float td = 0.f, ao = 0.f;
            for (int i = 0; i < 6; i++)
            {
                td += .05f;
                float d = de(p - n * td);
                ao += max(0.f, (td - d) / td);
            }
            return clamp(1.f - ao * .1f, 0.f, 1.f);
        };


        auto shadow = [&](vec3 p) {
            float sh = 1.f, td = .1f;
            for (int i = 0; i < 50; i++)
            {
                p += ldir * td;
                float d = de(p);
                td += d;
                sh = min(sh, 10.f * d / td);
                if (sh < .05f) break;
            }
            return clamp(sh, 0.f, 1.f);
        };

        // Lighting

        auto shade = [&](vec3 p, vec3 dir, vec3 n, vec3 col) {
            float sha = shadow(p);
            float aoc = ao(p, n);
            float amb = .25f * aoc; // ambient light with ambient occlusion
            float dif = max(0.f, dot(ldir, -n)) * sha; // diffuse light with shadow
            vec3 ref = reflect(dir, n); // reflection vector
            float spe = pow(max(0.f, dot(ldir, ref)), 10.f) * .7f * sha; // specular lights    
            return col * (amb + dif) + spe * suncol; // lighting applied to the surface color
        };

        // Raymarching

        auto march = [&](vec3 from, vec3 dir, vec3 camdir) {
            // variable declarations
            vec3 p = from, col = vec3(0.1f), backcol = col;
            float totdist = 0.f, d = 0.f, sdet, glow = 0.f, lhit = 1.f;
            // the detail value is smaller towards the end as we are closer to the fractal boundary
            det *= 1.f - fin * .7f;
            // raymarching loop to obtain an occlusion value of the sun at the camera direction
            // used for the lens flare
            for (int i = 0; i < 70; i++)
            {
                p += d * ldir; // advance ray from camera pos to light dir
                d = de(p) * 2.f; // distance estimation, doubled to gain performance as we don't need too much accuracy for this
                lhit = min(lhit, d); // occlusion value based on how close the ray pass from the surfaces and very small if it hits 
                if (d < det)
                { // ray hits the surface, bye
                    break;
                }
            }
            // main raymarching loop
            for (int i = 0; i < 150; i++)
            {
                p = from + totdist * dir; // advance ray
                d = de(p); // distance estimation to fractal surface
                sdet = det * (1.f + totdist * .1f); // makes the detail level lower for far hits 
                if (d<sdet || totdist>maxdist) break; // ray hits the surface or it reached the max distance defined
                totdist += d; // distance accumulator  
                glow++; // step counting used for glow
            }
            float sun = max(0.f, dot(dir, ldir)); // the dot product of the cam direction and the light direction using for drawing the sun
            if (d < .2f)
            { // ray most likely hit a surface
                p -= (sdet - d) * dir; // backstep to correct the ray position
                vec3 c = fcol; // saves the color set by the de function to not get altered by the normal calculation
                vec3 n = normal(p); // calculates the normal at the ray hit point
                col = shade(p, dir, n, c); // sets the color and lighting
            }
            else
            { // ray missed any surface, this is the background
                totdist = maxdist;
                p = from + dir * maxdist; // moves the ray to the max distance defined
                // Kaliset fractal for stars and cosmic dust near the sun. 
                vec3 st = (dir * 3.f + vec3(1.3f, 2.5f, 1.25f)) * .3f;
                for (int i = 0; i < 10; i++) st = abs(st) / dot(st, st) - .8f;
                backcol += length(st) * .015f * (1.f - pow(sun, 3.f)) * (.5f + abs(st.grb()) * .5f);
                sun -= length(st) * .0017f;
                sun = max(0.f, sun);
                backcol += pow(sun, 100.f) * .5f; // adds sun light to the background
            }
            backcol += pow(sun, 20.f) * suncol * .8f; // sun light
            float normdist = totdist / maxdist; // distance of the ray normalized from 0 to 1
            col = mix(col, backcol, pow(normdist, 1.5f)); // mix the surface with the background in the far distance (fog)
            col = max(col, col * vec3(sqrt(glow)) * .13f); // adds a little bit of glow
            // lens flare
            vec2 pflare = dir.xy - ldir.xy;
            float flare = max(0.f, 1.0f - length(pflare)) - pow(abs(1.f - mod(camdir.x - atan(pflare.y, pflare.x) * 5.f / 3.14f, 2.f)), .6f);
            float cflare = pow(max(0.f, dot(camdir, ldir)), 20.f) * lhit;
            col += pow(max(0.f, flare), 3.f) * cflare * suncol;
            col += pow(sun, 30.f) * cflare;
            // "only glow" part (at sec. 10)
            col.rgb() = mix(col.rgb(), glow * suncol * .01f + backcol, 1.f - smoothstep(0.f, .8f, abs(time - 10.5f)));
            return vec4(col, normdist); // returns the resulting color and a normalized depth in alpha
        };  //(depth was going to be used for a postprocessing shader) 


        // -------------------------------------------------------------------------------------------

        // Camera and main function

        // I learnt this function from eiffie,
        // it takes a direction, a reference up vec3
        // and returns the rotation matrix to orient a vector
        auto lookat = [&](vec3 dir, vec3 up) {
            dir = normalize(dir); vec3 rt = normalize(cross(dir, normalize(up)));
            return mat3(rt, cross(rt, dir), dir);
        };


        // the path of the camera at a given point of time
        auto campath = [&](float ti) {
            float start = pow(max(0.f, 1.f - ti * .02f), 3.f); // interpolation curve for the starting camera
            vec3 p = pitpath(ti); // path displacement of the fractal 
            p *= 1.f - start; // the camera gradually ends following the fractal when, that happens when start=0
            p += vec3(start * 30.f, start * 25.f, 0.f); // position offset for starting camera curve   
            return p;
        };

        
            vec2 uv = coords / res.xy - .5f;
            uv.x *= res.x / res.y;
            vec3 dir = normalize(vec3(uv, 1.)); // ray direction  
            time = mod(iTime, 162.f); // time loop
            fin = smoothstep(145.f, 147.5f, time); // aux variable used for the end sequence
            // camera accelerations and slow downs
            float acel1 = smoothstep(11.f, 12.f, time) * 7.31;
            float acel2 = smoothstep(99.f, 100.f, time) * 4.;
            float desacel1 = smoothstep(77.f, 78.f, time) * 5.;
            float desacel2 = fin * 9.5f;
            float tt = time;
            // freeze BW frame
            if (abs(tt - 25.5f) < .5f) tt = 25.f;
            float acel = acel1 + acel2 - desacel1 - desacel2;
            // time variable
            float t = tt * (3.6f + acel) - acel1 * 11.f - acel2 * 99.f + desacel1 * 77.f + desacel2 * 147.5f;
            t += smoothstep(125.f, 145.f, time) * 243.f;
            vec3 from = campath(t); // camera position
            from.y -= desacel2 * .035f; // camera offset on 2nd slow down
            vec3 fw = normalize(campath(t + 3.f) - from); // camera forward direction
            from.x -= fw.x * .1f; // camera x offset based on the forward direction
            dir = dir * lookat(fw * vec3(1.f, -1.f, 1.f), vec3(fw.x * .2f, 1.f, 0.f)); // re-orientation of the ray dir with the camera fwd dir
            ldir = normalize(ldir); // light dir normalization 
            vec4 col = march(from, dir, fw); // get color from raymarching and background
            col.rgb() = mix(vec3(length(col.rgb()) * .6f), col.rgb(), .85f - step(abs(tt - 25.f), .1f)); // BW freeze frame sequence coloring
            col.rgb() *= 1.f - smoothstep(25.f, 26.f, time) + step(25.1f, tt); // BW freeze frame sequence fading
            col.rgb() *= 1.f + step(abs(tt - 25.f), .1f);
            // PVM Logo color mixing
            vec4 pvm = logo(uv * 1.5f + vec2(.9f, .5f)) * smoothstep(1.f, 3.f, time + uv.x * 2.f) * (1.f - smoothstep(7.5f, 8.f, time + uv.x * 2.f));
            col.rgb() = mix(col.rgb(), pvm.rgb(), pvm.a);
            // fade in from black
            col.rgb() *= smoothstep(0.f, 4.f, time);
            // fade out to black
            col.rgb()  *= 1.f - smoothstep(160.f, 162.f, time);
            return col;
    }
    __device__ glm::vec3 sea_scape(glm::vec2 const &coords, glm::vec2 res, float iTime)
    {
        using namespace glm;
        /*
 * "Seascape" by Alexander Alekseev aka TDM - 2014
 * License Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.
 * Contact: tdmaav@gmail.com
 */

        const int NUM_STEPS = 8;
        const float PI = 3.141592f;
        const float EPSILON = 1e-4f;
#define EPSILON_NRM (0.1f / res.x)
#define AA

        // sea
        const int ITER_GEOMETRY = 3;
        const int ITER_FRAGMENT = 5;
        const float SEA_HEIGHT = 0.6f;
        const float SEA_CHOPPY = 4.0f;
        const float SEA_SPEED = 0.8f;
        const float SEA_FREQ = 0.16f;
        const vec3 SEA_BASE = vec3(0.0f, 0.09f, 0.18f);
        const vec3 SEA_WATER_COLOR = vec3(0.8f, 0.9f, 0.6f) * 0.6f;
#define SEA_TIME (1.0f + iTime * SEA_SPEED)
        const mat2 octave_m = mat2(1.6f, 1.2f, -1.2f, 1.6f);

        // math
        auto fromEuler = [](vec3 ang) {
            vec2 a1 = vec2(sin(ang.x), cos(ang.x));
            vec2 a2 = vec2(sin(ang.y), cos(ang.y));
            vec2 a3 = vec2(sin(ang.z), cos(ang.z));
            mat3 m;
            m[0] = vec3(a1.y * a3.y + a1.x * a2.x * a3.x, a1.y * a2.x * a3.x + a3.y * a1.x, -a2.y * a3.x);
            m[1] = vec3(-a2.y * a1.x, a1.y * a2.y, a2.x);
            m[2] = vec3(a3.y * a1.x * a2.x + a1.y * a3.x, a1.x * a3.x - a1.y * a3.y * a2.x, a2.y * a3.y);
            return m;
        };

        auto hash = [](vec2 p) {
            float h = dot(p, vec2(127.1f, 311.7f));
            return fract(sin(h) * 43758.5453123f);
        };

        auto noise = [&](vec2 p) {
            vec2 i = floor(p);
            vec2 f = fract(p);
            vec2 u = f * f * (3.0f - 2.0f * f);
            return -1.0f + 2.0f * mix(mix(hash(i + vec2(0.0f, 0.0f)),
                hash(i + vec2(1.0f, 0.0f)), u.x),
                mix(hash(i + vec2(0.0f, 1.0f)),
                    hash(i + vec2(1.0f, 1.0f)), u.x), u.y);
        };

        // lighting
        auto diffuse = [](vec3 n, vec3 l, float p) {
            return pow(dot(n, l) * 0.4f + 0.6f, p);
        };

        auto specular = [&] (vec3 n, vec3 l, vec3 e, float s) {
            float nrm = (s + 8.0f) / (PI * 8.0f);
            return pow(max(dot(reflect(e, n), l), 0.0f), s) * nrm;
        };

        // sky
        auto getSkyColor = [](vec3 e) {
            e.y = (max(e.y, 0.0f) * 0.8f + 0.2f) * 0.8f;
            return vec3(pow(1.0f - e.y, 2.0f), 1.0f - e.y, 0.6f + (1.0f - e.y) * 0.4f) * 1.1f;
        };

        // sea
        auto sea_octave = [&](vec2 uv, float choppy) {
            uv += noise(uv);
            vec2 wv = 1.0f - abs(sin(uv));
            vec2 swv = abs(cos(uv));
            wv = mix(wv, swv, wv);
            return pow(1.0f - pow(wv.x * wv.y, 0.65f), choppy);
        };

        auto map = [&](vec3 p) {
            float freq = SEA_FREQ;
            float amp = SEA_HEIGHT;
            float choppy = SEA_CHOPPY;
            vec2 uv = p.xz; uv.x *= 0.75f;

            float d, h = 0.0f;
            for (int i = 0; i < ITER_GEOMETRY; i++)
            {
                d = sea_octave((uv + SEA_TIME) * freq, choppy);
                d += sea_octave((uv - SEA_TIME) * freq, choppy);
                h += d * amp;
                uv = uv * octave_m; freq *= 1.9f; amp *= 0.22f;
                choppy = mix(choppy, 1.0f, 0.2f);
            }
            return p.y - h;
        };

        auto map_detailed = [&](vec3 p) {
            float freq = SEA_FREQ;
            float amp = SEA_HEIGHT;
            float choppy = SEA_CHOPPY;
            vec2 uv = p.xz; uv.x *= 0.75f;

            float d, h = 0.0f;
            for (int i = 0; i < ITER_FRAGMENT; i++)
            {
                d = sea_octave((uv + SEA_TIME) * freq, choppy);
                d += sea_octave((uv - SEA_TIME) * freq, choppy);
                h += d * amp;
                uv = uv * octave_m; freq *= 1.9f; amp *= 0.22f;
                choppy = mix(choppy, 1.0f, 0.2f);
            }
            return p.y - h;
        };

        auto getSeaColor = [&](vec3 p, vec3 n, vec3 l, vec3 eye, vec3 dist) {
            float fresnel = clamp(1.0f - dot(n, -eye), 0.0f, 1.0f);
            fresnel = pow(fresnel, 3.0f) * 0.5f;

            vec3 reflected = getSkyColor(reflect(eye, n));
            vec3 refracted = SEA_BASE + diffuse(n, l, 80.0f) * SEA_WATER_COLOR * 0.12f;

            vec3 color = mix(refracted, reflected, fresnel);

            float atten = max(1.0f - dot(dist, dist) * 0.001f, 0.0f);
            color += SEA_WATER_COLOR * (p.y - SEA_HEIGHT) * 0.18f * atten;

            color += vec3(specular(n, l, eye, 60.0f));

            return color;
        };

        // tracing
        auto getNormal = [&](vec3 p, float eps) {
            vec3 n;
            n.y = map_detailed(p);
            n.x = map_detailed(vec3(p.x + eps, p.y, p.z)) - n.y;
            n.z = map_detailed(vec3(p.x, p.y, p.z + eps)) - n.y;
            n.y = eps;
            return normalize(n);
        };

        auto heightMapTracing = [&](vec3 ori, vec3 dir, vec3& p) {
            float tm = 0.0f;
            float tx = 1000.0f;
            float hx = map(ori + dir * tx);
            if (hx > 0.0f) return tx;
            float hm = map(ori + dir * tm);
            float tmid = 0.0f;
            for (int i = 0; i < NUM_STEPS; i++)
            {
                tmid = mix(tm, tx, hm / (hm - hx));
                p = ori + dir * tmid;
                float hmid = map(p);
                if (hmid < 0.0f)
                {
                    tx = tmid;
                    hx = hmid;
                }
                else
                {
                    tm = tmid;
                    hm = hmid;
                }
            }
            return tmid;
        };

        auto getPixel = [&](vec2 coord, float time) {
            vec2 uv = coord / res.xy;
            uv = uv * 2.0f - 1.0f;
            uv.x *= res.x / res.y;

            // ray
            vec3 ang = vec3(sin(time * 3.0f) * 0.1f, sin(time) * 0.2f + 0.3f, time);
            vec3 ori = vec3(0.0f, 3.5f, time * 5.0f);
            vec3 dir = normalize(vec3(uv.xy(), -2.0f)); dir.z += length(uv) * 0.14f;
            dir = normalize(dir) * fromEuler(ang);

            // tracing
            vec3 p;
            heightMapTracing(ori, dir, p);
            vec3 dist = p - ori;
            vec3 n = getNormal(p, dot(dist, dist) * EPSILON_NRM);
            vec3 light = normalize(vec3(0.0f, 1.0f, 0.8f));

            // color
            return mix(
                getSkyColor(dir),
                getSeaColor(p, n, light, dir, dist),
                pow(smoothstep(0.0f, -0.02f, dir.y), 0.2f));
        };
           
        // main
            float time = iTime * 0.3f;

#ifdef AA
            vec3 color = vec3(0.0f);
            for (int i = -1; i <= 1; i++)
            {
                for (int j = -1; j <= 1; j++)
                {
                    vec2 uv = coords + vec2(i, j) / 3.0f;
                    color += getPixel(uv, time);
                }
            }
            color /= 9.0f;
#else
            vec3 color = getPixel(fragCoord, time);
#endif

            // post
            return vec3(detail::pow(color, vec3(0.65f)));
    }
    __device__ glm::vec3 clouds(glm::vec2 const &coords, glm::vec2 res, float time)
    {
        // Volumetric screamer - Result of an improvised live coding session on Twitch
        // LIVE SHADER CODING, SHADER SHOWDOWN STYLE, EVERY TUESDAYS 20:00 Uk time: 
        // https://www.twitch.tv/evvvvil_
        using namespace glm;
        float iTime = time;
        vec2 z; float tt, b, g = 0., gg = 0., cr; vec3 faceP, cp; vec4 su = vec4(0);
        auto smin = [](float d1, float d2, float k) { float h = max(k - abs(d1 - d2), 0.f); return min(d1, d2) - h * h * .25f / k; };
        auto smax = [](float d1, float d2, float k) { float h = max(k - abs(-d1 - d2), 0.f); return max(-d1, d2) + h * h * .25f / k; };
        auto r2 = [](float r) { return mat2(cos(r), sin(r), -sin(r), cos(r)); };
        auto noi = [](vec3 p) {
            vec3 f = floor(p), s = vec3(7, 157, 113);
            p -= f;
            vec4 h = vec4(0, s.yz, s.y + s.z) + dot(f, s);
            p = p * p * (3.f - 2.f * p);
            h = mix(fract(sin(h) * 43758.5f), fract(sin(h + s.x) * 43758.5f), p.x);
            h.xy = mix(h.xz(), h.yw(), p.y);
            return mix(h.x, h.y, p.z);
        };
        auto ferlin = [&](vec3 p) {
            float f = 0.f, A = .5f, I;
            p.zy() += tt * 2.f;
            for (int i = 0; i < 3; i++) I = float(i), f += A / (I + 1.f) * noi(p + I), p = (2.1f + .1f * I) * p;
            return f;
        };
        auto face = [&](vec3 p) {
            p -= vec3(0, -12.f + b * 20.f, 0) + sin(p.y * 2.f) * .1f;
            p.yz() = r2(1.65f * (1.f - b)) * p.yz();
            faceP = p * vec3(1, .7f, 1);
            float t = length(faceP) - 4.f - sin(p.y) * .66f;
            t = smin(t, length(abs(faceP + vec3(0, -2.5f, -1)) - vec3(2, 0, 0)) - 4.f, 1.f);
            vec3 spikeP = p + vec3(0, -3.9f, 2);
            spikeP.x = abs(spikeP.x) - 2.f;
            spikeP.xy() = r2(-.785f) * spikeP.xy();
            spikeP.yz() = r2(-.1785f) * spikeP.yz();
            t = smin(t, length(spikeP.xz()) - 2.f + abs(p.x) * .2f, 1.5f);
            vec3 eyeP = abs(p - vec3(0, 2, 0));
            eyeP.xy() = r2(.6f) * eyeP.xy();
            float eyes = max(eyeP.y, (length(abs(faceP + vec3(0, -1.5f, 3.f)) - vec3(1.f, 0, 0)) - 3.f));
            t = smax(eyes, t, 1.f);
            t = min(t, max(eyeP.y + 4.f, eyes));
            t = smax(length(faceP + vec3(0, 2, -2.f + 5.f * b)) - 2.5f, t, .5f);
            spikeP.xy() = r2(-.1485f) * spikeP.xy();
            spikeP -= vec3(8.f * b, -3, -1);
            t = smin(t, length(spikeP.xz()) - 1.f + abs(spikeP.y + 3.f) * .25f, 1.5f);
            return t;
        };
        auto terrain = [&](vec3 p) {
            float t = p.y + 5.f + cos(length(p * (.5f)) - b * 15.f - tt * 4.f) * b + noi(p * .07f + 1.f) * 5.f; //WOBBLE: cos(length(p*(.5f))-b*15.-tt*4.)
            t = smax(length(p.xz()) - 2.f - b * 6.f, t, 3.f);
            t = smin(t, length(p.xz()) - 1.f + (p.y + 15.f - b * 17.f) * .5f, 1.5f);
            return t;
        };
        auto cmp = [&](vec3 p)
        {
            float t = face(p);
            t = smin(t, terrain(p), 2.5f);
            vec3 boltP = p;
            boltP = abs(boltP - vec3(0, 0, 2)) - 11.f + sin(p.y * 5.f * p.x * .1f + tt * 25.5f) * .05f + 4.f * sin(p.y * .3f - 3.f) + p.y * .2f;//ORIGINAL SHADER IN BONZOMATIC HAD NOISE TEXTURE CALL FOR BETTER LIGHTNING BOLT EFFECT BUT, THIS SHADER BEING GREEDY ENOUGH, I THOUGHT BEST REPLACE WITH BUNCH OF SINS ON SHADERTOY
            float bolt = length(boltP.xz()) - .1f; //~Above line on bonzo end should be: abs(boltP-vec3(0,0,2))-11.+texture(texNoise,p.xy*.1+tt*.5f).r*2.+4.*sin(p.y*.3-3)+p.y*.2;      
            bolt = max(bolt, p.y + 10.f - b * 25.f);
            float mouthFlash = max(p.z, length(faceP.xy() - vec2(0, -2)) + 2.f + p.z * .2f * b);
            g += 0.1f / (0.1f + bolt * bolt * (1.02f - b) * (40.f - 39.5f * sin(p.y * .2f - b * 8.f)));
            gg += 0.1f / (0.1f + mouthFlash * mouthFlash * (1.05f - b) * (40.f - 39.5f * sin(p.z * .3f + tt * 5.f)));
            return t;
        };
        vec2 uv = (coords.xy / res.xy - 0.5f) / vec2(res.y / res.x, 1);
        tt = mod(iTime, 62.82f);
        b = smoothstep(0.f, 1.f, sin(tt) * .5f + .5f);
        vec3 ro = vec3(sin(tt * .5f) * 10.f, mix(15.f, -3.f, b), -20.f + sin(tt * .5f) * 5.f) * mix(vec3(1), vec3(2, 1, cos(tt * .5f) * 1.5f), cos(-tt * .5f + .5f) * .5f + .5f),
            cw = normalize(vec3(0, b * 10.f, 0) - ro), cu = normalize(cross(cw, vec3(0, 1, 0))),
            cv = normalize(cross(cu, cw)), rd = mat3(cu, cv, cw) * normalize(vec3(uv, .5f)), co, fo;
        co = fo = vec3(.1f, .12f, 0.17f) - length(uv) * .1f - rd.y * .2f;
        cr = cmp(ro - 3.f) + fract(dot(sin(uv * 476.567f + uv.yx() * 785.951f + tt), vec2(984.156f)));
        for (int i = 0; i < 128; i++)
        {
            cp = ro + rd * (cr += 1.f / 2.5f);
            if (su.a > .99f) break; //NOTE TO SELF: cr>t NOT NEEDED AS ONLY VOLUMETRIC GEOM ARE PRESENT
            float de = clamp((-cmp(cp) * 9.f + 8.f * ferlin(cp)) / 8.f, 0.f, 1.f);
            su += vec4(vec3(mix(1.f, 0.f, de) * de), mix(.07f, de, exp(-.00001f * cr * cr * cr))) * (1.f - su.a);//FOG ON CLOUDS! mix(.07,de,exp(-.00001*cr*cr*cr))
        }
        co = mix(co, su.xyz(), su.a);

        return glm::vec3(detail::pow(co + g * .4f * vec3(.5f, .2f, .1f) + gg * .4f * vec3(.1f, .2f, .5f), vec3(.55f)));
    }

    __device__ glm::vec3 mandelbrot(glm::vec2 const &coords, glm::vec2 const &res, float time)
    {
        constexpr std::uint32_t max_iterations = 200;
        constexpr float bailout = 200000.0f;

        /* get uv coords */
        glm::vec2 uv = (coords - 0.5f * res) / res.y * 4.0f * sin(time) - glm::vec2{cos(time) * 1.35f, 0.0f};
        glm::vec2 z = {};

        /* iterate z = z ^ 2 +c until abs(z) > bailout */
        std::uint32_t i;
        for (i = 0; i < max_iterations; ++i)
        {
            /* exit the loop if z has escaped */
            if (glm::dot(z, z) >= bailout) break;
            z = glm::vec2{ z.x * z.x - z.y * z.y, z.x * z.y * 2.0f } + uv;
        }

        float smooth_color = sqrtf(((float)i - log2f(logf(glm::dot(z, z)) / logf(bailout))) / (float)max_iterations);

        return  glm::sin(20.0f * smooth_color * glm::vec3{ 1.5f, 1.8f, 2.1f } + time) * 0.5f + 0.5f;
    }

    __device__ glm::vec3 paper_pattern(glm::vec2 const &coords, glm::vec2 const &res, float iTime)
    {

        /*

        Geometric Paper Pattern
        -----------------------

        A geometric pattern rendered onto some animated hanging paper, which for some
        inexplicable and physics defying reason is also animated. :D

        I put this together out of sheer boredom, and I didn't spend a lot of time on
        it, so I wouldn't look too much into the inner workings, especially not the
        physics aspects... I honestly couldn't tell you why the paper is waving around
        like that. :)

        The pattern is about as basic as it gets. I've used some equally basic post
        processing to give it a slightly hand drawn look. The pencil algorithm I've
        used is just a few lines, and is based on one of Flockaroo's more sophisticated
        examples. The link is below, for anyone interested. At some stage, I'd like
        to put a sketch algorithm out that is more hatch-like.

        On a side note, for anyone who likes small functions, feel free to take a look
        at the "n2D" value noise function. I wrote it ages ago (also when I was bored)
        and have taken it as far as I can take it. However, I've often wondered whether
        some clever soul out there could write a more compact one.




        Related examples:

        // A more sophisticated pencil sketch algorithm.
        When Voxels Wed Pixels - Flockaroo
        // https://www.shadertoy.com/view/MsKfRw

        */
        using namespace glm;

        // For those who find the default pattern just a little too abstract and minimal,
        // here's another slighly less abstract minimal pattern. :D
        //#define LINE_TRUCHET

        // I felt the pattern wasn't artistic enough, so I added some tiny holes. :)
#define HOLES
#define LINE_TRUCHET

    // Standard 2D rotation formula.
        auto rot2 = [](float a) { float c = cos(a), s = sin(a); return mat2(c, -s, s, c); };

        // IQ's vec2 to float hash.
        auto hash21 = [](vec2 p) { return fract(sin(dot(p, vec2(27.619f, 57.583f))) * 43758.5453f); };

        // IQ's box formula -- modified for smoothing.
        auto sBoxS = [](vec2 p, vec2 b, float rf) {
            vec2 d = abs(p) - b + rf;
            return min(max(d.x, d.y), 0.f) + length(max(d, 0.f)) - rf;
        };

        // IQ's box formula.
        auto sBox = [](vec2 p, vec2 b) {
            vec2 d = abs(p) - b;
            return min(max(d.x, d.y), 0.f) + length(max(d, 0.f));
        };

        // This will draw a box (no caps) of width "ew" from point "a "to "b". I hacked
        // it together pretty quickly. It seems to work, but I'm pretty sure it could be
        // improved on. In fact, if anyone would like to do that, I'd be grateful. :)
        auto lBox = [&](vec2 p, vec2 a, vec2 b, float ew) {
            float ang = atan(b.y - a.y, b.x - a.x);
            p = rot2(ang) * (p - mix(a, b, .5f));

            vec2 l = vec2(length(b - a), ew);
            return sBox(p, (l + ew) / 2.f);
        };

        auto distField = [&](vec2 p) {
            // Cell ID and local cell coordinates.
            vec2 ip = floor(p) + .5f;
            p -= ip;

            // Some random numbers.
            float rnd = hash21(ip + .37f);
            float rnd2 = hash21(ip + .23f);
            float rnd3 = hash21(ip + .72f);

            // Cell boundary.
            float bound = sBox(p, vec2(.5f));

            float d = 1e5; // Distance field.

            // Random 90 degree cell rotation.
            p = p * rot2(floor(rnd * 64.f) * 3.14159f / 2.f);

            // Just adding a tiny hole to draw the eye to the... No idea why artists do
            // this kind of thing, but it enables them to double the price, so it's
            // definitely worth the two second effort. :)
            float hole = 1e5;

#ifdef LINE_TRUCHET

            // Three tiled Truchet pattern consisting of arc, straight line
            // and dotted tiles.

            // Four corner circles.
            vec2 q = abs(p);
            float cir = min(length(q - vec2(0, .5f)), length(q - vec2(.5f, 0)));

            if (rnd3 < .75f)
            {
                if (rnd2 < .65)
                {
                    d = abs(min(length(p - .5f), length(p + .5f)) - .5f) - .5f / 3.f;

                }
                else
                {
                    p = abs(p) - .5f / 3.f;
                    d = min(max(p.x, -(p.y - .5f / 8.f)), p.y);
                }

            }
            else
            {
                // Four dots in the empty squares to complete the pattern.
                d = cir - .5f / 3.f;
            }

            // Corner holes.
            hole = cir - .05f;

#else
            // Very common quarter arc and triangle Truchet pattern, which is a
            // favorite amongst the abstract art crowd.
            if (rnd3 < .75f)
            {
                ;

                // Corner holes.
                hole = length(p - .325f) - .05f;

                if (rnd2 < .5f)
                {
                    // Corner quarter circle... Well, it's a full one,
                    // but it gets cut off at the grid boundaries.
                    d = length(p - .5f) - 1.f;
                }
                else
                {
                    // A corner diamond, but we'll only see the triangular part.
                    p = abs(p - .5f);
                    d = abs(p.x + p.y) / sqrt(2.f) - .7071f;
                }
            }
#endif

#ifdef HOLES
            d = max(d, -hole);
#endif

            // Cap to the cell boundaries. Sometimes, you have to do this
            // to stop rendering out of bounds, or if you wish to include
            // boundary lines in the rendering.
            //
            return max(d, bound);
        };

        // Cell grid borders.
        auto gridField = [](vec2 p) {
            vec2 ip = floor(p) + .5f;
            p -= ip;

            p = abs(p);
            float grid = abs(max(p.x, p.y) - .5f) - .005f;

            return grid;
        };

        // Cheap and nasty 2D smooth noise function with inbuilt hash function -- based on IQ's
        // original. Very trimmed down. In fact, I probably went a little overboard. I think it
        // might also degrade with large time values.
        auto n2D = [](vec2 p) {
            vec2 i = floor(p);
            p -= i;
            p *= p * (3.f - p * 2.f);

            vec4 temp_vec4 = fract(sin(vec4(0, 1, 113, 114) + dot(i, vec2(1, 113))) * 43758.5453f);
            return dot(mat2(temp_vec4.xy(), temp_vec4.zw()) * vec2(1.f - p.y, p.y), vec2(1.f - p.x, p.x));
        };

        // FBM -- 4 accumulated noise layers of modulated amplitudes and frequencies.
        auto fbm = [&](vec2 p) { return n2D(p) * .533f + n2D(p * 2.f) * .267f + n2D(p * 4.f) * .133f + n2D(p * 8.f) * .067f; };

        auto pencil = [&](vec3 col, vec2 p) {
            // Rough pencil color overlay... The calculations are rough... Very rough, in fact,
            // since I'm only using a small overlayed portion of it. Flockaroo does a much, much
            // better pencil sketch algorithm here:
            //
            // When Voxels Wed Pixels - Flockaroo
            // https://www.shadertoy.com/view/MsKfRw
            //
            // Anyway, the idea is very simple: Render a layer of noise, stretched out along one
            // of the directions, then mix a similar, but rotated, layer on top. Whilst doing this,
            // compare each layer to it's underlying greyscale value, and take the difference...
            // I probably could have described it better, but hopefully, the code will make it
            // more clear. :)
            //
            // Tweaked to suit the brush stroke size.
            vec2 q = p * 4.f;
            const vec2 sc = vec2(1, 12);
            q += (vec2(n2D(q * 4.f), n2D(q * 4.f + 7.3f)) - .5f) * .03f;
            q = q * rot2(-3.14159f / 2.5f);
            // I always forget this bit. Without it, the grey scale value will be above one,
            // resulting in the extra bright spots not having any hatching over the top.
            col = min(col, 1.f);
            // Underlying grey scale pixel value -- Tweaked for contrast and brightness.
            float gr = (dot(col, vec3(.299f, .587f, .114f)));
            // Stretched fBm noise layer.
            float ns = (n2D(q * sc) * .64f + n2D(q * 2.f * sc) * .34f);
            // Compare it to the underlying grey scale value.
            ns = gr - ns;
            //
            // Repeat the process with a couple of extra rotated layers.
            q = q * rot2(3.14159f / 2.f);
            float ns2 = (n2D(q * sc) * .64f + n2D(q * 2.f * sc) * .34f);
            ns2 = gr - ns2;
            q = q * rot2(-3.14159f / 5.f);
            float ns3 = (n2D(q * sc) * .64f + n2D(q * 2.f * sc) * .34f);
            ns3 = gr - ns3;
            //
            // Mix the two layers in some way to suit your needs. Flockaroo applied common sense,
            // and used a smooth threshold, which works better than the dumb things I was trying. :)
            ns = min(min(ns, ns2), ns3) + .5f; // Rough pencil sketch layer.
            //ns = smoothstep(0., 1., min(min(ns, ns2), ns3) + .5f); // Same, but with contrast.
            //
            // Return the pencil sketch value.
            return vec3(ns);
        };

        // Aspect correct screen coordinates.
        vec2 uv = (coords - res.xy() * .5f) / res.y;

        // Scaling factor.
        float gSc = 8.f;

        // Smoothing factor.
        float sf = 1.f / res.y * gSc;

        // Unperturbed coordinates.
        vec2 pBg = uv * gSc;

        vec2 offs = vec2(fbm(uv / 1.f + iTime / 4.f), fbm(uv / 1.f + iTime / 4.f + .35f));
        const float oFct = .04f;
        uv -= (offs - .5f) * oFct;

        // Scaled perturbed coordinates..
        vec2 p = uv * gSc;

        // The paper distance field.
        vec2 fw = vec2(6, 3);
        float bw = 1. / 3.;
        float paper = sBoxS(p, fw + bw, .05);

        // Mixing the static background coordinates with the wavy offset ones to
        // save calculating two functions for various things.
        vec2 pMix = mix(p, pBg, smoothstep(0.f, sf, paper));

        // Failed experiment with a moving pattern.
        //vec2 rnd22 = vec2(hash21(ip + 1.6), hash21(ip + 2.6));
        //rnd22 = smoothstep(.9, .97, sin(6.2831*rnd22 + iTime/2.));
        //float d = distField(p + rnd22);

        // The geometric pattern field.
        float d = distField(pMix);

        // Canvas pattern square ID.
        vec2 ip = floor(p) + .5f;

        // Background. Nothing exciting, but theres' a subtle vertical gradient
        // to mimic an overhead light, or something to that effect.
        vec3 bg = vec3(.9, .82, .74) * .85f;
        bg = mix(bg, bg * .9f, -uv.y * .5f + .5f);

        // Initialize the scene color to the background.
        vec3 col = bg;

        // Using the pattern distance field for a subtle background wall overlay.
        // Back in the old days (the 90s), you'd reuse whatever you could.
        col = mix(col, bg * .92f, 1.f - smoothstep(0.f, sf, d));
        col = mix(col, bg * .96f, 1.f - smoothstep(0.f, sf, d + .03f));

        // The paper shadow distance field and application.
        vec2 shOff = normalize(vec2(1, -3)) * .1f;
        float dSh = sBoxS(p - shOff, fw + bw, .05f);
        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf * 4.f, dSh)) * .5f);

        // Paper rendering.
        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf, paper)) * .1f);
        col = mix(col, vec3(1), (1.f - smoothstep(0.f, sf, paper + .02f)));
        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf, paper + bw)));

        /*
        // Distance field-based lines on the canvas. I tried a few quick things
        // for this example, and unfortunately, not a lot worked, but I've left
        // the workings in for anyone who wants to play around with this.
        const float lnN = 8.; // Number of concentric pattern lines.
        float pat = abs(fract(d*lnN*1. - .5f) - .5f)*2. - .05;
        pat = smoothstep(0., sf*lnN*2., pat)*.65 + .35;
        */

        // Random animated background color for each square.
        float rndC = hash21(ip + .23f);
        rndC = sin(6.2831f * rndC + iTime / 2.f);
        vec3 sqCol = .55f + .45f * cos(6.2831f * rndC + vec3(0, 1, 2)); // IQ's palette.
        col = mix(col, sqCol, (1.f - smoothstep(0.f, sf, paper + bw + .0f)));

        // Render a colored Truchet pattern in one of two styles.

        // Restrict pattern rendering to the canvas.
        d = max(d, (paper + bw));

        // IQ's really cool, and simple, palette.
        vec3 shCol = .55f + .45f * cos(6.2831f * rndC + vec3(0, 1, 2) + 1.f);

        // Subtle drop shadow, edge and coloring.
        col = mix(col, bg * .03f, (1.f - smoothstep(0.f, sf * 4.f, d)) * .5f);
        col = mix(col, bg * .03f, (1.f - smoothstep(0.f, sf, d)));
        col = mix(col, shCol, (1.f - smoothstep(0.f, sf, d + .02f)));

        // Adding in some blinking offset color, just to mix things up a little.
        rndC = hash21(ip + .87f);
        rndC = smoothstep(.8f, .9f, sin(6.2831f * rndC + iTime * 2.f) * .5f + .5f);
        vec3 colB = mix(col, col.xzy(), rndC / 2.f);
        col = mix(col, colB, 1.f - smoothstep(0.f, sf, paper + bw));

        // Putting some subtle layerd noise onto the wall and paper.
        col *= fbm(pMix * 48.f) * .2f + .9f;

        // Grid lines on the canvas.
        float grid = gridField(p);
        grid = max(grid, paper + bw);
        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf * 2.f, grid)) * .5f);

        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf, grid)));

        /*
        // Circles on the pattern... Too busy looking.
        vec3 svC = col/2.;
        float cir = length(p - ip) - .1;
        cir = max(cir, bord + bw);
        col = mix(col, bg*.07, (1. - smoothstep(0., sf, cir)));
        //col = mix(col, svC, (1. - smoothstep(0., sf, cir + .02)));
        */

        // Recalculating UV with no offset to use with postprocessing effects.
        uv = (coords - res.xy * .5f) / res.y;

        float canv = smoothstep(0.f, sf * 2.f, (paper + bw));
        float canvBord = smoothstep(0.f, sf * 2.f, (paper));

        /*
        // Corduroy lines... Interesting texture, but I'm leaving it out.
        vec2 q3 = mix(uv, p/gSc, 1. - (canvBord));
        float lnPat = abs(fract((q3.x - q3.y)*80.) - .5f)*2. - .01;
        float frM = smoothstep(0., sf, max(paper, -(paper + bw)));
        lnPat = smoothstep(0., sf*80./2., lnPat);
        col = mix(col, col*(lnPat*.25 + .75), frM);
        */

        // Boring, and admittedly, inefficient hanger and string calculations, etc.
        // A lot of it is made up on the spot. However, at the end of the day, this
        // is a pretty cheap example, so it'll do.
        vec2 q2 = uv;
        q2.x = mod(q2.x, 1.f) - .5f;
        q2 -= (offs - .5f) * oFct + vec2(0, (3.f + bw * .9f) / gSc);
        // String, and string shadow.
        float strg = lBox(q2, vec2(0), vec2(0, .5f) - (offs - .5f) * oFct, .002f);
        float strgSh = lBox(q2 - shOff * .5f, vec2(0, .04f), vec2(0, .5f) - (offs - .5f) * oFct, .002f);
        // Rendering the strings and shadows.
        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf / gSc, strgSh)) * .25f);
        col = mix(col, vec3(.5f, .4, .3), (1.f - smoothstep(0.f, sf / gSc / 2.f, strg)));
        // The little black hangers and corresponding shadow.
        float hang = sBoxS(q2, vec2(1, .5f) * bw / gSc, .0);
        float hangBk = sBoxS(q2, vec2(1.f + .05f, .5f) * bw / gSc, .0);
        float hangBkSh = sBoxS(q2 - vec2(.008, -.004), vec2(1. + .06, .5f) * bw / gSc, .0);
        hangBk = max(hangBk, -paper);
        hangBkSh = max(hangBkSh, -paper);
        float hangSh = sBoxS(q2 - shOff * .1f, vec2(1, .5f) * bw / gSc, .0f);
        // Rendering the hangers and shadows.
        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf / gSc * 2.f, hangBkSh)) * .5f);
        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf / gSc * 2.f, hangSh)) * .35f);
        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf / gSc, hangBk)));
        col = mix(col, vec3(0), (1.f - smoothstep(0.f, sf / gSc, hang)));
        col = mix(col, bg * oFct, 1.f - smoothstep(0.f, sf / gSc, hang + .004f));

        // Adding very subtle lighting to the wavy pattern... So subtle that it's
        // barely worth the effort, but it's done now. :)
        float eps = .01f;
        vec2 offs2 = vec2(fbm(uv / 1.f + iTime / 4.f - eps), fbm(uv / 1.f + iTime / 4.f + .35f - eps));
        float z = max(dot(vec3(0, 1, -.5f), vec3(offs2 - offs, eps)), 0.f) / eps;
        col *= mix(1.f, .9f + z * .1f, 1.f - canvBord);

        // Subtle pencel overlay... It's cheap and definitely not production worthy,
        // but it works well enough for the purpose of the example. The idea is based
        // off of one of Flockaroo's examples.
        vec2 q = mix(uv * gSc * 2.f, p, 1.f - (canvBord));
        vec3 colP = pencil(col, q * res.y / 450.f);
        //col *= colP*.8 + .5f;
        col *= mix(vec3(1), colP * .8f + .5f, .8f);
        //col = colP;

        // Cheap paper grain... Also barely worth the effort. :)
        vec2 oP = floor(p / gSc * 1024.f);
        vec3 rn3 = vec3(hash21(p), hash21(p + 2.37f), hash21(oP + 4.83f));
        vec3 pg = .9f + .1f * rn3.xyz + .1f * rn3.xxx;
        col *= mix(vec3(1), pg, 1.f - canv);

        // Rough gamma correction.
        return glm::vec3(sqrt(max(col, 0.f)));

#undef HOLES
#undef LINE_TRUCHET
    }
}

here is an example of the program:


Get this bounty!!!

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.