#StackBounty: #python #performance #image #numpy #signal-processing Bilinear image interpolation

Bounty: 50

I have written a bilinear interpolant, which is working moderately well except that is painfuly slow. How can rewrite the code to make it faster? Using opencv directly isn’t a valid answer.

import numpy as np
import numpy.linalg as la
import matplotlib
import matplotlib.pyplot as plt
from skimage.draw import line_aa
import cv2


def draw_line(img, x0, y0, x1, y1):
    r, c, w = line_aa(int(x0), int(y0), int(x1), int(y1))
    img[r, c] = w

def synth_img(sM, sN, pts_src):
    img = np.zeros((sM, sN))
    draw_line(img.T, pts_src[0][0], pts_src[0][1], pts_src[1][0], pts_src[1][1])
    draw_line(img.T, pts_src[1][0], pts_src[1][1], pts_src[2][0], pts_src[2][1])
    draw_line(img.T, pts_src[2][0], pts_src[2][1], pts_src[3][0], pts_src[3][1])
    draw_line(img.T, pts_src[3][0], pts_src[3][1], pts_src[0][0], pts_src[0][1])

    return img


sM, sN = 1440, 1450
pts_src = np.array([[ 520, 100],[ 1410, 220],[1240, 1310],[ 30, 1070]]).astype('float32')
img = synth_img(sN, sM, pts_src)

# Create a target image
M, N = 1050, 1480
img_dst = np.zeros((N, M))
pts_dst = np.array([[0, 0], [M-1, 0], [M-1, N-1], [0, N-1]], dtype='float32') 

X = cv2.getPerspectiveTransform(pts_src, pts_dst)   # SRC to DST
X_inv = la.inv(X)                                   # DST to SRC

img_exp = cv2.warpPerspective(img, X, (M, N))

for y in np.arange(img_dst.shape[0]):
    for x in np.arange(img_dst.shape[1]):

        # Find the equivalent coordinates from DST space into SRC space
        Txy = X_inv  @ [x, y, 1]
        u, v, w = Txy / Txy[-1]

        # Find the neighboring points of v and u (src space)
        n = min(max(np.floor(v).astype('int'), 0), img.shape[0]-1)
        s = min(max(np.ceil(v).astype('int'),  0), img.shape[0]-1)
        w = min(max(np.floor(u).astype('int'), 0), img.shape[1]-1)
        e = min(max(np.ceil(u).astype('int'),  0), img.shape[1]-1)

        # Find the values in neighboring values of [u, v]
        q00 = img[n, w]
        q01 = img[n, e]
        q10 = img[s, w]
        q11 = img[s, e]

        x0, x1, y0, y1 = w, e, n, s

        A = np.array([
            [1, x0, y0, x0*y0],
            [1, x0, y1, x0*y1],
            [1, x1, y0, x1*y0],
            [1, x1, y1, x1*y1]
        ])

        b = np.array([[
            q00, q01, q10, q11
        ]]).T

        a = la.lstsq(A, b, rcond=None)[0].ravel()
        z = a[0] + a[1]*u + a[2]*v + a[3]*u*v
        img_dst[y, x] = z


plt.close()
fig, ax = plt.subplots(2,2, figsize=(6,7), dpi=300, sharey=True, sharex=True)
ax[0][0].imshow(img, cmap='gray')
ax[0][0].set_title('Original')
ax[0][1].imshow(img_exp-img_dst, cmap='gray')
ax[0][1].set_title('Diff')
ax[1][0].imshow(img_dst, cmap='gray')
ax[1][0].set_title('Result')
ax[1][1].imshow(img_exp, cmap='gray')
ax[1][1].set_title('Expected')
#plt.show()

After some work I have got here, almost no for loops(except for a single list comprehension). I have added functions (as hinted on a comment MCVEs are not expected here, but I will leave there for history). This code has some helper functions and I really think that there is a lot of room for improvement, I just don’t know how to do.

import numpy as np
import numpy.linalg as la
import matplotlib
import matplotlib.pyplot as plt
import cv2
from itertools import product
from skimage.draw import line_aa



def draw_line(img, x0, y0, x1, y1):
    """Draw antaliased line from (x0,y0) to (x1, y1)"""
    r, c, w = line_aa(int(x0), int(y0), int(x1), int(y1))
    img[r, c] = w


def synth_img(sM, sN, pts_src):
    """Creates a simple synthetic image with a skewed rectangle"""
    img = np.zeros((sM, sN))
    draw_line(img.T, pts_src[0][0], pts_src[0][1], pts_src[1][0], pts_src[1][1])
    draw_line(img.T, pts_src[1][0], pts_src[1][1], pts_src[2][0], pts_src[2][1])
    draw_line(img.T, pts_src[2][0], pts_src[2][1], pts_src[3][0], pts_src[3][1])
    draw_line(img.T, pts_src[3][0], pts_src[3][1], pts_src[0][0], pts_src[0][1])

    return img


def extend_col(array):
    """Extend `array` adding a last columns of ones"""
    return np.hstack((array, np.ones((array.shape[0], 1))))


def reduce_row(array):
    """Reduce row space of array, dividing the whole array by the last
    column and returning the last first two columns"""
    return (array / array[-1])[:-1].T


def bilinear_interp(x, y, x0, x1, y0, y1, q00, q01, q10, q11):
    """Do bilinear interpolation given a point, a box and 
    values on box vetices"""
    q = np.array([[q00, q01],
                  [q10, q11]])
    xx = np.array([(x1 - x), (x - x0)])
    yy = np.array([(y1 - y), (y - y0)])
    # ATTENTION:
    # THIS IS DAMN UGLY. How to remove this comprehension and
    # transform this to a matrix/tensor multiplication?
    zz = np.array( [ a @ b @ c for a, b, c in zip(xx.T, q.T, yy.T)])
    return zz


def neigh_points(points):
    """Return the neighbor points of given uv"""
    return np.floor(points[0]), np.ceil(points[0]), np.floor(points[1]), np.ceil(points[1])


def clip(points, lower_u, upper_u, lower_v, upper_v):
    """Clip values in array given upper and lower limits"""
    u = points[:,:2]
    v = points[:,2:]
    u[u < lower_u] = lower_u
    u[u > upper_u] = upper_u
    v[v < lower_v] = lower_v
    v[v > upper_v] = upper_v


def interpolate(M, image):
    """Executes the interpolation on image based on M transform matrix"""
    MT = M.T
    u, v = MT[0], MT[1]
    w, e = MT[2].astype('int'), MT[3].astype('int')
    n, s = MT[4].astype('int'), MT[5].astype('int')
    q00 = image[n, w]
    q01 = image[n, e]
    q10 = image[s, w]
    q11 = image[s, e]
    return bilinear_interp(u, v, w, e, n, s, q00, q01, q10, q11)


def image_warp(M, img, shape):
    """Look ma! No for's

    Probably a lot to improve.
    """
    rxc = product(range(shape[0]), range(shape[1]))
    uv = reduce_row(la.inv(M) @ extend_col(np.array(list(rxc))).T)
    uv_neigh = np.array(neigh_points(uv.T)).T
    lower_u, upper_u, lower_v, upper_v = 0, img.shape[1]-1, 0, img.shape[0]-1
    clip(uv_neigh, lower_u, upper_u, lower_v, upper_v)
    coords = np.hstack((uv, uv_neigh))
    # ATTENTION:
    # This transformation should not need the rotation, something
    # is wrong here in `interpolate`
    return np.flip(np.rot90(interpolate(coords, img).reshape(shape),3), 1).astype('uint8')


def main():
    sM, sN = 1440, 1450
    pts_src = np.array([[ 526., 107],[ 1413, 227],[1245, 1313],[ 30, 1076]], dtype='float32')
    img = synth_img(sN, sM, pts_src)

    # Create a target image (100 dpi A4 sheet)
    N, M = 1480, 1050
    pts_dst = np.array([[0., 0], [M-1, 0], [M-1, N-1], [0, N-1]], dtype='float32')

    # Get transformation matrix from SRC to DST
    X = cv2.getPerspectiveTransform(pts_src, pts_dst)
    img_dst =  image_warp(X, img, (M, N))
    img_exp = cv2.warpPerspective(img, X, (M, N))

    plt.close()
    fig, ax = plt.subplots(2,2, figsize=(6,7), dpi=300)
    ax[0][0].imshow(img, cmap='gray')
    ax[0][0].set_title('Original')
    ax[0][1].imshow(np.abs(img_exp-img_dst), cmap='gray')
    ax[0][1].set_title('Diff')
    ax[1][0].imshow(img_dst, cmap='gray')
    ax[1][0].set_title('Result')
    ax[1][1].imshow(img_exp, cmap='gray')
    ax[1][1].set_title('Expected')
    plt.show()

Actually, my image_warp function mimics cv2.warpPerspective. Almost got there except for two things:

  1. First pixel [0][0] is weirdly set as 0. But looking the return from zz in bilinear_interp function the value is correct.

  2. Some rounding issues in values (when comparing to OpenCV’s result). Tried to use np.ceil, np.floor, np.round and plain astype('uint8'), but none returned perfect results matching OpenCVs. Left as is using the option that gives the smallest error (using opencv’s output as reference).

About the code, I have some special concerns. There are some very ugly parts. I can point some:

  1. In bilinear_inter the calculation of zz is a VERY UGLY and slow list comprehension. Converting types there and back again.
  2. In image_warp there is an excessive number of conversion to list and numpy arrays and transposes.

  3. In image_warp I’m rotating and flipping the results. Don’t know what is happening and where the error may be, since the vector space transfomation should result in the correct orientation.

  4. I could use np.clip instead my own clip. But I treat the first two columns differently than the last two columns. Can np.clip do that somehow?

  5. The extend_col/reduce_row trick works fine and is reasonably fast, but ugly. Any better approaches?

Of course any other improvements are very welcome.


Get this bounty!!!

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.