import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(0, 2, 1000)
f_x = (1/3)*(x-1)**6-(x-2)**5+5*x**4
plt.figure()
plt.plot(x, f_x)

[<matplotlib.lines.Line2D at 0x115aef350>]


import numpy as np
import matplotlib.pyplot as plt

# Define the function
def f_x(x):
    return (1/3)*(x-1)**6 - (x-2)**5 + 5*x**4

# Iterative search parameters
x_start = 1.7  # Initial x value
delta = 0.25  # Initial step size
tolerance = 1e-5  # Convergence criterion
x_vals = [x_start]  # Store visited points
f_vals = [f_x(x_start)]

# Iterative search for the minimum
x = x_start
while True:
    x_next = x + delta
    f_current = f_x(x)
    f_next = f_x(x_next)

    if f_next > f_current:  # If function value increases, reduce step size and reverse direction
        delta = -delta / 2

    x = x_next
    x_vals.append(x)
    f_vals.append(f_x(x))

    # Check for convergence
    if abs(delta) < tolerance:
        break

# Generate plot data
x_range = np.linspace(0, 2, 400)
y_range = f_x(x_range)

# Plot the function
plt.figure(figsize=(8, 6))
plt.plot(x_range, y_range, 'b-', label="Function $f(x)$")  # Function in blue
plt.plot(x_vals, f_vals, 'ro-', label="Iterative search")  # Search path in red with dots

# Set x and y limits
plt.xlim(-0.1, 2.1)
plt.ylim(0, 85)

# Ensure full box around the graph
plt.gca().spines['top'].set_visible(True)
plt.gca().spines['right'].set_visible(True)
plt.gca().spines['bottom'].set_visible(True)
plt.gca().spines['left'].set_visible(True)

# Show the plot
plt

print(x_vals[-1])

0.801776123046875


import numpy as np
import matplotlib.pyplot as plt

def f(x):
    return (1/3)*(x-1)**6 - (x-2)**5 + 5*x**4
    
n_iter = 100 # number of iterations
x_init = 1.7 # initial guess for x
alpha = 1e-3 # step size, 10**-3
x_values = [x_init]
x = x_init
gradients = []
for n in range(n_iter):
    # calculate gradient
    gradient = 2*(x-1)**5 - 5*(x-2)**4 + 20*x**3
    # perform gradient descent step to obtain next value
    x_next = x - alpha*gradient
    # store values of x and gradient
    x_values.append(x_next)
    gradients.append(gradient)
    # update x for next step
    x = x_next

x_range = np.linspace(0, 2, 400)    
# plotting
plt.figure(figsize=(12, 8))
plt.subplot(2,1,1)
plt.plot(x_range, f(x_range), 'royalblue')
plt.plot(np.array(x_values), f(np.array(x_values)), 'ro-')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True)
plt.subplot(223)
plt.plot(np.arange(len(x_values)), x_values, 'forestgreen')
plt.xlabel('Iteration number')
plt.ylabel('Value of x')
plt.grid(True)
plt.subplot(224)
plt.semilogy(np.arange(n_iter), np.abs(gradients), 'royalblue')
plt.xlabel('Iteration number')
plt.ylabel('Magnitude of gradient')
plt.grid(True)


import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(-20, 20, 1000)
f_x = np.sin(np.log(x**2 + 1))
plt.figure()
plt.plot(x, f_x)

[<matplotlib.lines.Line2D at 0x115d10d10>]


import numpy as np
import matplotlib.pyplot as plt

def f(x):
    return np.sin(np.log(x**2+1))
    
n_iter = 100 # number of iterations
x_init = 1.7 # initial guess for x
#x_init = 2.5 # initial guess for x
alpha = 1e-3 # step size, 10**-3
x_values = [x_init]
x = x_init
gradients = []
for n in range(n_iter):
    # calculate gradient
    gradient = 2*x*np.cos(np.log(x**2+1))/(x**2+1)
    # perform gradient descent step to obtain next value
    x_next = x - alpha*gradient
    # store values of x and gradient
    x_values.append(x_next)
    gradients.append(gradient)
    # update x for next step
    x = x_next

#print(x_values)
x_range = np.linspace(-20, 20, 400)    
# plotting
plt.figure(figsize=(12, 8))
plt.subplot(2,1,1)
plt.plot(x_range, f(x_range), 'royalblue')
plt.plot(np.array(x_values), f(np.array(x_values)), 'ro-')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True)


import numpy as np
import matplotlib.pyplot as plt

def f(x):
    return np.sin(np.log(x**2+1))
    
n_iter = 100 # number of iterations
x_init = 1.7 # initial guess for x
#x_init = 2.5 # initial guess for x
alpha = 100e-3 # step size, 10**-3
x_values = [x_init]
x = x_init
gradients = []
for n in range(n_iter):
    # calculate gradient
    gradient = 2*(0.5-f(x))*-1*2*x*np.cos(np.log(x**2+1))/(x**2+1)
    # perform gradient descent step to obtain next value
    x_next = x - alpha*gradient
    # store values of x and gradient
    x_values.append(x_next)
    gradients.append(gradient)
    # update x for next step
    x = x_next

#print(x_values)
x_range = np.linspace(-20, 20, 400)    
# plotting
plt.figure(figsize=(12, 8))
plt.subplot(2,1,1)
plt.plot(x_range, f(x_range), 'royalblue')
plt.plot(np.array(x_values), f(np.array(x_values)), 'ro-')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True)


import numpy as np
import matplotlib.pyplot as plt

# values we are trying to regress, pretend we don't know them!
a = 3
b = 2
c = 1

# generate dataset
N = 20 # number of data points
x = np.linspace(-2, 2, N)
y = a*x**2 + b*x + c

# initialize guesses for a, b, and c
a_gd = np.random.randn()
b_gd = np.random.randn()
c_gd = np.random.randn()
print('Initial guesses: a={:.6f}, b={:.6f}, c={:.6f}'.format(a_gd, b_gd, c_gd))

# information for tracking
a_vals = [a_gd]
b_vals = [b_gd]
c_vals = [c_gd]
loss_vals = []

# gradient descent loop
n_iter = 100 # number of iterations
alpha = 1e-2 # step size
for n in range(n_iter):
    # let numpy broadcasting compute all partials across the dataset
    errors = y-(a_gd*x**2 + b_gd*x + c_gd)
    partial_a = np.sum(-2*x**2*errors)/N
    partial_b = np.sum(-2*x*errors)/N
    partial_c = np.sum(-2*errors)/N
    # perform gradient descent update step
    a_gd = a_gd - alpha*partial_a
    b_gd = b_gd - alpha*partial_b
    c_gd = c_gd - alpha*partial_c
    # log information
    loss_vals.append(np.sum(errors**2)/N) # log MSE
    a_vals.append(a_gd)
    b_vals.append(b_gd)
    c_vals.append(c_gd)

# examine solution
print('Final guesses: a={:.6f}, b={:.6f}, c={:.6f}'.format(a_vals[-1], b_vals[-1], c_vals[-1]))

# visualize loss and progression of solution
plt.figure(figsize=(10, 6))
plt.semilogy(loss_vals, color='blue')
plt.grid(True)
plt.xlabel('Iteration number')
plt.ylabel('MSE value for regression')

iter_num = np.array([0, 5, 25, 50, 100]).astype(int)

plt.figure(figsize=(20, 5))
for j, i in enumerate(iter_num):
    plt.subplot(1, 5, j+1)
    curr_fn = a_vals[i]*x**2 + b_vals[i]*x + c_vals[i]
    plt.plot(x, curr_fn, color='blue')
    plt.scatter(x, y, color='orange')
    plt.grid(True)
    plt.title('Regressed function: Iteration {}'.format(i))

Initial guesses: a=-1.179648, b=0.467805, c=0.313049
Final guesses: a=2.823157, b=1.923079, c=1.420865


import numpy as np
import torch
import matplotlib.pyplot as plt

# values we are trying to regress, pretend we don't know them!
a = 3
b = 2
c = 1

# generate dataset
N = 20 # number of data points
x = torch.linspace(-2, 2, N)
y = a*x**2 + b*x + c

# initialize guesses for a, b, and c
a_gd = torch.randn((), requires_grad=True) # size (1,)
b_gd = torch.randn((), requires_grad=True)
c_gd = torch.randn((), requires_grad=True)
print('Initial guesses: a={:.6f}, b={:.6f}, c={:.6f}'.format(a_gd.data, b_gd.data, c_gd.data))

# information for tracking
a_vals = [a_gd.data.item()]
b_vals = [b_gd.data.item()]
c_vals = [c_gd.data.item()]
loss_vals = []

# gradient descent loop
n_iter = 100 # number of iterations
alpha = 1e-2 # step size
for n in range(n_iter):
    # compute loss function (objective function)
    errors = y-(a_gd*x**2 + b_gd*x + c_gd)
    loss = torch.sum((errors)**2)/N
    # backpropagate gradients
    loss.backward()
    # perform gradient descent update step
    with torch.no_grad():
        # don't want the gradient update step to accumulate further gradients at a, b, and c
        a_gd -= alpha*a_gd.grad
        b_gd -= alpha*b_gd.grad
        c_gd -= alpha*c_gd.grad
        # manually zero out the gradients before next backward pass
        a_gd.grad = None
        b_gd.grad = None
        c_gd.grad = None
        
    # log information
    loss_vals.append(loss.item()) # log MSE
    a_vals.append(a_gd.data.item())
    b_vals.append(b_gd.data.item())
    c_vals.append(c_gd.data.item())

# examine solution
print('Final guesses: a={:.6f}, b={:.6f}, c={:.6f}'.format(a_vals[-1], b_vals[-1], c_vals[-1]))

# visualize loss and progression of solution
plt.figure(figsize=(10, 6))
plt.semilogy(loss_vals, color='blue')
plt.grid(True)
plt.xlabel('Iteration number')
plt.ylabel('MSE value for regression')

iter_num = np.array([0, 5, 25, 50, 100]).astype(int)
plt.figure(figsize=(20, 5))
for j, i in enumerate(iter_num):
    plt.subplot(1, 5, j+1)
    curr_fn = a_vals[i]*x**2 + b_vals[i]*x + c_vals[i]
    plt.plot(x.detach().numpy(), curr_fn.detach().numpy(), color='blue')
    plt.scatter(x.detach().numpy(), y, color='orange')
    plt.grid(True)
    plt.title('Regressed function: Iteration {}'.format(i))

Initial guesses: a=-1.012377, b=-0.415224, c=-1.208924
Final guesses: a=3.086846, b=1.878748, c=0.791994


import numpy as np
import torch
import matplotlib.pyplot as plt

# "unknown" parameters we are trying to uncover
b = -0.75
tau = 2.25

# create dataset
N = 30 # number of datapoints
x = torch.linspace(-10, 10, N)
y = 1/(1+torch.exp(-(x-b)/tau))


# part (a) initialize parameters
b_gd = torch.randn((), requires_grad=True)
tau_gd = torch.rand((), requires_grad=True) # tau_gd = torch.tensor([0.2], requires_grad=True)
print('Initial Guess: b={:.4f}, tau={:.4f}'.format(b_gd.data.item(), tau_gd.data.item()))

Initial Guess: b=0.0048, tau=0.7914


# part (b) gradient descent loop
n_iter = 500
alpha = 1e0
b_vals = [b_gd.data.item()]
tau_vals = [tau_gd.data.item()]

for n in range(n_iter):
    # compute function outputs
    f_x = 1/(1+torch.exp(-(x-b_gd)/tau_gd))
    # calculate loss and initiate backpropagation
    loss = torch.mean((y-f_x)**2) # same as torch.sum((y-f_x)**2)/N
    loss.backward()
    # update parameters by gradient descent
    with torch.no_grad():
        # gradient step
        b_gd -= alpha*b_gd.grad
        tau_gd -= alpha*tau_gd.grad
        # set gradients to None
        b_gd.grad = None
        tau_gd.grad = None
    b_vals.append(b_gd.data.item())
    tau_vals.append(tau_gd.data.item())


# print final guesses
print('Final Guess: b={:.4f}, tau={:.4f}'.format(b_gd.data.item(), tau_gd.data.item()))
        
# part (c) plotting solution
plt.figure(figsize=(20, 10))
with torch.no_grad():
    plt.subplot(2,1,1)
    plt.scatter(x.numpy(), y.numpy(), color='orange')
    f_x = 1/(1+torch.exp(-(x-b_gd)/tau_gd)) # fill in this line, apply your parameters to the input data in x
    plt.plot(x.numpy(), f_x.numpy(), color='blue')
    plt.grid(True)

iter_num = np.array([0, 100, 200, 300, 400]).astype(int)
for j, i in enumerate(iter_num):
    plt.subplot(2, 5, 5+j+1)
    curr_fn = 1/(1+torch.exp(-(x-b_vals[i])/tau_vals[i]))
    plt.plot(x.detach().numpy(), curr_fn.detach().numpy(), color='blue')
    plt.scatter(x.detach().numpy(), y, color='orange')
    plt.grid(True)
    plt.title('Regressed function: Iteration {}'.format(i))

Final Guess: b=-0.7330, tau=2.2412

Lecture 5 - Gradient Descent¶

ECE364 - Programming Methods for Machine Learning¶

Nickvash Kani¶

Slides based off prior lectures by Alex Schwing, Aigou Han, Farzas Kamalabadi, Corey Snyder. All mistakes are my own!¶

Motivation:¶

Gradient Descent¶

Drawback #1¶

Automatic Differentiation¶

Objective Functions¶

Estimating parameters to match multiple data values¶

Combining Gradient Descent and Backpropagation¶

Using PyTorch Autograd¶

One more function fit for the road?¶

That's it for today¶