2025-10-08 Low Rank

  • Reflection on algorithm choices

  • Low-rank structure

  • Primer on interpolation

[1]:
using LinearAlgebra
using Plots
using Polynomials
default(lw=4, ms=5, legendfontsize=12, xtickfontsize=12, ytickfontsize=12)
default(aspect_ratio=:equal)

# Here's our Vandermonde matrix again
function vander(x, k=nothing)
    if isnothing(k)
        k = length(x)
    end
    m = length(x)
    V = ones(m, k)
    for j in 2:k
        V[:, j] = V[:, j-1] .* x
    end
    V
end

# Our modifiend Gram Schmidt for QR
function gram_schmidt_modified(A)
    m, n = size(A)
    Q = copy(A)
    R = zeros(n, n)
    for j in 1:n
        # First we normalize our column
        R[j, j]  = norm(Q[:, j])
        Q[:, j] /= R[j, j]
        # Then we make all following columns orthogonal to the current
        R[j, j+1:end]  = Q[:, j]' * Q[:, j+1:end]
        Q[:, j+1:end] -= Q[:, j]  * R[j, j+1:end]'
    end
    Q, R
end

# Householder for QR
function qr_householder(A)
    m, n = size(A)
    R = copy(A)
    V = [] # list of reflectors
    for j in 1:n
        # Build v for the current column
        v = copy(R[j:end, j])
        v[1] += sign(v[1]) * norm(v) # Here's the fix!
        v = normalize(v)
        # Build and apply F
        R[j:end, j:end] -= 2 * v * (v' * R[j:end,j:end])
        # Keep track of the reflectors
        push!(V, v)
    end
    V, R
end

# Here's a function to apply the reflectors V to a vector x
function reflectors_mult(V, x)
    y = copy(x)
    for v in reverse(V)
        n = length(v) - 1
        # This is applying F as we did to form R
        y[end-n:end] -= 2 * v * (v' * y[end-n:end])
    end
    y
end

# Create a dense matrix Q from reflectors V
function reflectors_to_dense(V)
    m = length(V[1])
    Q = diagm(ones(m))
    # Apply the reflectors to the identity matrix to build Q
    for j in 1:m
        Q[:, j] = reflectors_mult(V, Q[:,j])
    end
    Q
end

# And Cholesky QR
function qr_chol(A)
    # First find R
    R = cholesky(A' * A).U
    Q = A / R # Triangular solves are easy
    Q, R
end

# Also Cholesky QR with actual orthogonality retained
function qr_chol2(A)
    # First one had loss of orthogonality
    R = cholesky(A' * A).U
    Q = A / R # Triangular solves are easy
    # So we Cholesky decompose twice
    R2 = cholesky(Q' * Q).U
    Q = Q / R2
    # Note, triangular * triangular = triangular
    R = R2 * R
    Q, R
end

# Finally our visualization aids
# Let's use the peanut blob again
function peanut()
    theta = LinRange(0, 2*pi, 50)
    r = 1 .+ .4*sin.(3*theta) + .6*sin.(2*theta)
    r' .* [cos.(theta) sin.(theta)]'
end

# and a perfect circle
function circle()
    theta = LinRange(0, 2*pi, 50)
    [cos.(theta) sin.(theta)]'
end

function Aplot(A)
    "Plot a transformation from X to Y"
    X = peanut()
    Y = A * X
    p = scatter(X[1,:], X[2,:], label="in")
    scatter!(p, Y[1,:], Y[2,:], label="out")
    X = circle()
    Y = A * X
    q = scatter(X[1,:], X[2,:], label="in")
    scatter!(q, Y[1,:], Y[2,:], label="out")
    plot(p, q, layout=2)
end
[1]:
Aplot (generic function with 1 method)

Condition number via SVD

Recall that we can compute the condition number of a matrix from its singular values.

\[\begin{split}\begin{split} U \overbrace{\begin{bmatrix} \sigma_{\max} && \\ & \ddots & \\ && \sigma_{\min} \end{bmatrix}}^{\Sigma} V^T = A \end{split}\end{split}\]
\[ \begin{align} \lVert A \rVert &= \sigma_{\max} & \kappa(A) &= \frac{\sigma_{\max}}{\sigma_{\min}} = \texttt{cond}(A) \end{align}\]
[2]:
# Let's make a random symmetric matrix
A = randn(2, 2)
A = A + A'
[2]:
2×2 Matrix{Float64}:
 -4.66531   -0.518554
 -0.518554   0.287251
[3]:
# The condition number is the ratio of the largest and smallest singular value
@show svdvals(A)
U, S, V = svd(A)
@show cond_A = maximum(S) / minimum(S);

# U and V are in fact orthogonal
@show norm(U * U' - I)
@show norm(V * V' - I)

# Let's see what this matrix does to the peanut
Aplot(A)
svdvals(A) = [4.7190242617009615, 0.3409628088748754]
cond_A = maximum(S) / minimum(S) = 13.840290315747376
norm(U * U' - I) = 2.220446049250313e-16
norm(V * V' - I) = 0.0
[3]:

Least squares

Consider autonomous vehicles.

  • Need to solve least squares problems in real time

  • Weight/size/cost increase with computational needs

  • What algorithm to choose?

  • What precision to use?

Factors

  • How many objects to track?

  • Speed (of robot and objects)

  • Movement mode - aerial, wheeled, walking?

  • Environment - fog, lighting, etc - longer memory needed?

  • Tolerances (how accurate does the solution need to be?)

  • Consequences of errors, and who is responsible?

[4]:
# Let's look at the raw time to solve with our methods above
A = rand(5000, 500)
A_32 = Float32.(A)
@show cond(A)

println("\nHousholder, from Julia")
@time Q, R = qr(A); # Householder; backward stable
@show norm(Q * R - A)
@show norm(Q' * Q - I)

println("\nCholesky")
@time Q, R = qr_chol(A) # Unstable
@show norm(Q * R - A)
@show norm(Q' * Q - I)

println("\nHousholder, single precision")
@time Q, R = qr(A_32); # Lower precision
@show norm(Q * R - A)
@show norm(Q' * Q - I);

println("\nCholesky, single precision")
@time Q, R = qr_chol(A_32); # Lower precision
@show norm(Q * R - A)
@show norm(Q' * Q - I);
cond(A) = 56.089149554120844

Housholder, from Julia
  0.042941 seconds (16.12 k allocations: 22.086 MiB, 14.73% gc time, 26.93% compilation time)
norm(Q * R - A) = 5.419947033929591e-13
norm(Q' * Q - I) = 3.4228183349535124e-14

Cholesky
  0.083962 seconds (159.33 k allocations: 31.155 MiB, 6.38% gc time, 78.06% compilation time)
norm(Q * R - A) = 4.7598648681881803e-14
norm(Q' * Q - I) = 1.0712375374007724e-12

Housholder, single precision
  0.087600 seconds (198.00 k allocations: 20.700 MiB, 84.27% compilation time)
norm(Q * R - A) = 0.0002855807790455134
norm(Q' * Q - I) = 1.7505894f-5

Cholesky, single precision
  0.409190 seconds (4.42 M allocations: 231.161 MiB, 7.47% gc time, 98.44% compilation time)
norm(Q * R - A) = 3.9128822133829075e-5
norm(Q' * Q - I) = 0.00057106407f0
[5]:
# And lets check with a particularly ill conditioned matrix
V = vander(LinRange(-1, 1, 22))
V_32 = Float32.(V)
@show cond(V)

println("\nHousholder, from Julia")
@time Q, R = qr(V); # Householder; backward stable
@show norm(Q * R - V)
@show norm(Q' * Q - I)

println("\nCholesky")
@time Q, R = qr_chol(V) # Unstable
@show norm(Q * R - V)
@show norm(Q' * Q - I)

println("\nHousholder, single precision")
@time Q, R = qr(V_32); # Lower precision
@show norm(Q * R - V)
@show norm(Q' * Q - I);
cond(V) = 2.4996629598602743e9

Housholder, from Julia
  0.000067 seconds (16 allocations: 15.688 KiB)
norm(Q * R - V) = 3.5159362514746e-15
norm(Q' * Q - I) = 3.254229278676633e-15

Cholesky
  0.000036 seconds (16 allocations: 11.883 KiB)
norm(Q * R - V) = 3.0243458890798737e-16
norm(Q' * Q - I) = 1.0000208696450787

Housholder, single precision
  0.000057 seconds (12 allocations: 8.219 KiB)
norm(Q * R - V) = 1.8764177333132612e-6
norm(Q' * Q - I) = 1.8709579f-6

Low rank approximation

The SVD can be truncated to yield the best rank-\(k\) approximation of a matrix.

[6]:
# Let's make a random rank 2 matrix
n, k = 2, 1

A = randn(n, n)
Aplot(A)
[6]:
[7]:
# Let's make the SVD
U, S, V = svd(A)

# Let's truncate out the last singular value
@show S[1:k+1]
Û = U[:, 1:k]
Ŝ = S[1:k]
 = V[:, 1:k]
 = Û * diagm(Ŝ) * '
@show norm(Â)
# Here's our reduced A
Aplot(Â)
S[1:k + 1] = [1.283712670445647, 0.41954590250072576]
norm(Â) = 1.283712670445647
[7]:
[8]:
# And the part left out
Aplot(A - Â)
[8]:

Example: Galaxies

Suppose we have two galaxies of size \(n_1 = 100\) and \(n_2 = 200\), each randomly distributed around their respective centers.

[9]:
galaxy(center, sigma, n) = reshape(center, 1, 3) .+ sigma*randn(n, 3)
g1 = galaxy([0 0 0], 1, 100)
g2 = galaxy([10 0 0], 1, 100)

scatter(g1[:, 1], g1[:, 2], aspect_ratio=:equal, label="galaxy 1")
scatter!(g2[:, 1], g2[:, 2], label="galaxy 2")
[9]:

Force between stars

Consider the gravitational force from a star at position \(x_2\) acting on a start at position \(x_1\).

\[F_{1, 2} = G \frac{m_1 m_2}{\left\lvert \left\lvert x_2 - x_1 \right\rvert \right\rvert^3} \left( x_2 - x_1 \right)\]

where \(m_1\) and \(m_2\) are the masses of each star, respectively.

[10]:
# Let's make a matrix of all gravitational forces
function gravity(g1, g2)
    m = size(g1, 1)
    n = size(g2, 1)
    F = zeros(3*m, n)
    for i in 0:m-1
        for j in 1:n
            r = g2[j, :] - g1[1+i, :]
            F[1+3*i:3*(i+1), j] = r / norm(r)^3
        end
    end
    F
end

gravity(g1, g2)
[10]:
300×100 Matrix{Float64}:
  0.0092555     0.00659191    0.0109241    …   0.00924183    0.00902314
  0.00205554    0.0011782     0.00171437       0.000804196   0.00143314
  0.000161345  -0.00102562   -0.00111365       0.000713908   0.000338838
  0.00891627    0.00661779    0.010422         0.00812364    0.0083581
  0.000134426   2.41079e-5   -0.000645886     -0.000886846  -0.00033312
  0.00112317   -0.000375582   0.000205911  …   0.00146679    0.00118899
  0.00888392    0.00603805    0.00973773       0.00858864    0.00851904
  0.000703421   0.000352533   6.72438e-5      -0.000398637   0.000192527
 -0.000847777  -0.00148159   -0.00211502      -0.000303389  -0.000622363
  0.00818796    0.00583       0.00913395       0.00768037    0.007762
  0.000218858   8.26747e-5   -0.000412912  …  -0.000702157  -0.000199458
 -4.68933e-5   -0.000942961  -0.00104933       0.000378266   0.00010238
  0.00767546    0.00563559    0.0088714        0.00750916    0.00742524
  ⋮                                        ⋱
  0.00896992    0.00633828    0.0100622        0.00827117    0.00843467
  3.88403e-6   -5.56722e-5   -0.000774142     -0.00101226   -0.000452355
  0.000147973  -0.000943928  -0.000970932  …   0.000607043   0.000300734
  0.0100196     0.00673199    0.0110054        0.00933815    0.0094457
  0.000106889  -3.88533e-6   -0.000779955     -0.00111612   -0.000442161
 -0.000577707  -0.00148147   -0.00201912       4.214e-5     -0.000332865
  0.00639916    0.00515134    0.00776259       0.00642084    0.00629253
  0.00185955    0.00125154    0.00185531   …   0.00110014    0.00147084
  0.00105506   -4.25808e-5    0.000557162      0.00137125    0.00112444
  0.0100633     0.00691262    0.0114197        0.00946221    0.00953241
  0.000594516   0.000279234  -0.000216889     -0.00068787    6.02512e-7
 -5.58421e-5   -0.00121833   -0.00145847       0.000523819   0.000146799

Spectrum (Singular values)

[11]:
# A new pair of galaxies
g1 = galaxy([0 0 0], 1, 500)
g2 = galaxy([10 0 0], 1, 500)
F = gravity(g1, g2)

# Let's plot the singular values
U, S, V = svd(F)
default(aspect_ratio=:auto)
scatter(S, yscale=:log10, ylims=(1e-16, 1e1), xlims=(0, 200), label="singular values")
[11]:
[12]:
# Let's make a minimal model
k = 10
Û = U[:, 1:k]
Ŝ = S[1:k]
 = V[:, 1:k]
 = Û * diagm(Ŝ) * '

# And how good does it do?
@show norm(F - )
norm(F - F́) = 0.004289146304990904
[12]:
0.004289146304990904
[13]:
# Let's make a minimal model
n = 200
norm_err = zeros(n)
for k in 1:n
    Û = U[:, 1:k]
    Ŝ = S[1:k]
     = V[:, 1:k]
     = Û * diagm(Ŝ) * '
    norm_err[k] = norm(F -  )
end

scatter(norm_err, yscale=:log10, ylims=(1e-15, 1e0), xlims=(1, n), label="\$\\vert \\vert F - F_k \\vert \\vert\$", xlabel="\$k\$")
[13]:

What is interpolation?

Given data \(\left( x_i, y_i \right)\), find a (smooth?) function \(f \left( x \right)\) such that \(f \left( x_i \right) = y_i\).

Data in

  • Direct field observations/measurements of physical or social system

  • Numerically processed observations, perhaps by applying physical principles

  • Output from an expensive “exact” numerical computation

  • Output from an approximate numerical computation

Function out

  • Polynomials

  • Piecewise polynomials (includes nearest-neighbor)

  • Powers and exponentials

  • Trigonometric functions (sine and cosine)

  • Neural networks

Interpolation fits the data exactly!

Polynomial interpolation

We’ve seen how we can fit a polynomial using Vandermonde matrices, with one column per basis function and one row per observation.

\[\underbrace{\Bigg[ 1 \Bigg| x \Bigg| x^2 \Bigg| x^3 \Bigg]}_{A \in \mathbb R^{m\times n}} \Bigg[ \mathbf p \Bigg] = \Bigg[ \mathbf y \Bigg]\]

It’s possible to find a unique polynomial :math:`p` when which of the following are true?

  1. \(m \leq n\)

  2. \(m = n\)

  3. \(m \geq n\)

Polynomial interpolation with Vandermonde

[14]:
# Let's build some data
x = LinRange(-1.5, 2, 4)
y = sin.(x)

# And find coefficients so that A p = y
A = vander(x)
p = A \ y

# How well did we match?
scatter(x, y, label="data")
s = LinRange(-3, 3, 50)
plot!(s, [sin.(s) vander(s, length(p)) * p], label=["fit" "true"])
[14]:

Recall that Vandermonde matrices can be ill-conditioned.

[15]:
for i in 1:5
    A = vander(LinRange(-1, 1, 10 * i))
    println("k = $i, cond(A) = $(cond(A))")
end
k = 1, cond(A) = 4626.449923375609
k = 2, cond(A) = 2.7224082423906505e8
k = 3, cond(A) = 1.8388423738279527e13
k = 4, cond(A) = 9.028174036334257e17
k = 5, cond(A) = 5.645434903563094e18

Is this due to the points \(x\)?

Or is this because of the basis functions \(\lbrace 1, x, x^2, x^3, \dots \rbrace\)?

Or something else?