Derivatives and Applications in Computer Science
Derivatives are crucial in computer science for optimization, machine learning, and algorithm analysis. This section covers derivatives with practical CS applications.
What is a Derivative?
A derivative measures the rate of change of a function. In CS contexts: - Gradient descent: Uses derivatives to find optimal parameters - Backpropagation: Computes derivatives to train neural networks - Algorithm analysis: Derivatives help analyze growth rates
Basic Derivative Rules
Power Rule
If f(x) = x^n, then f’(x) = nx^(n-1)
import numpy as np
import matplotlib.pyplot as plt
# Example: f(x) = x^2, f'(x) = 2x
= np.linspace(-3, 3, 100)
x = x**2
f = 2*x
f_prime
=(10, 6))
plt.figure(figsize1, 2, 1)
plt.subplot('b-', label='f(x) = x²')
plt.plot(x, f, 'Original Function')
plt.title(True)
plt.grid(
plt.legend()
1, 2, 2)
plt.subplot('r-', label="f'(x) = 2x")
plt.plot(x, f_prime, 'Derivative')
plt.title(True)
plt.grid(
plt.legend() plt.show()
Chain Rule
If f(x) = g(h(x)), then f’(x) = g’(h(x)) × h’(x)
This is fundamental in neural networks for backpropagation.
Applications in Machine Learning
1. Gradient Descent
Gradient descent uses derivatives to minimize cost functions:
import numpy as np
def gradient_descent(f, df, x0, learning_rate=0.01, iterations=1000):
"""
Minimize function f using gradient descent
f: function to minimize
df: derivative of f
x0: starting point
"""
= x0
x = [x]
history
for i in range(iterations):
= df(x)
gradient = x - learning_rate * gradient
x
history.append(x)
return x, history
# Example: minimize f(x) = x^2 + 2x + 1
def f(x):
return x**2 + 2*x + 1
def df(x):
return 2*x + 2
# Find minimum
= gradient_descent(f, df, x0=5.0)
minimum, path print(f"Minimum found at x = {minimum:.4f}")
print(f"Function value: {f(minimum):.4f}")
2. Neural Network Backpropagation
import numpy as np
class SimpleNeuron:
def __init__(self, weights, bias):
self.weights = np.array(weights)
self.bias = bias
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self, x):
= self.sigmoid(x)
s return s * (1 - s)
def forward(self, inputs):
self.inputs = np.array(inputs)
self.z = np.dot(self.weights, self.inputs) + self.bias
self.output = self.sigmoid(self.z)
return self.output
def backward(self, error):
# Compute gradients using chain rule
= error
d_output = d_output * self.sigmoid_derivative(self.z)
d_z = d_z * self.inputs
d_weights = d_z
d_bias = d_z * self.weights
d_inputs
return d_weights, d_bias, d_inputs
# Example usage
= SimpleNeuron([0.5, -0.3], 0.1)
neuron = neuron.forward([1.0, 2.0])
output = neuron.backward(0.1) # Small error
gradients print(f"Output: {output:.4f}")
print(f"Weight gradients: {gradients[0]}")
Applications in Computer Graphics
1. Parametric Curves
Derivatives help create smooth curves in graphics:
import numpy as np
import matplotlib.pyplot as plt
def bezier_curve(t, P0, P1, P2, P3):
"""Cubic Bezier curve"""
return (1-t)**3 * P0 + 3*(1-t)**2*t * P1 + 3*(1-t)*t**2 * P2 + t**3 * P3
def bezier_derivative(t, P0, P1, P2, P3):
"""Derivative of cubic Bezier curve (tangent vector)"""
return 3*(1-t)**2 * (P1-P0) + 6*(1-t)*t * (P2-P1) + 3*t**2 * (P3-P2)
# Control points
= np.array([0,0]), np.array([1,2]), np.array([3,2]), np.array([4,0])
P0, P1, P2, P3
= np.linspace(0, 1, 100)
t = np.array([bezier_curve(ti, P0, P1, P2, P3) for ti in t])
curve = np.array([bezier_derivative(ti, P0, P1, P2, P3) for ti in t])
tangents
=(10, 6))
plt.figure(figsize0], curve[:, 1], 'b-', linewidth=2, label='Bezier Curve')
plt.plot(curve[:, 0], P1[0], P2[0], P3[0]], [P0[1], P1[1], P2[1], P3[1]], 'ro--', alpha=0.5, label='Control Points')
plt.plot([P0[
# Show tangent vectors at a few points
for i in range(0, len(t), 20):
= curve[i]
start = tangents[i] * 0.1 # Scale for visibility
direction 0], start[1], direction[0], direction[1],
plt.arrow(start[=0.05, head_length=0.05, fc='red', ec='red')
head_width
'equal')
plt.axis(True)
plt.grid(
plt.legend()'Bezier Curve with Tangent Vectors')
plt.title( plt.show()
2. Surface Normals
In 3D graphics, derivatives help compute surface normals:
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
def surface_function(x, y):
"""Example surface: z = x^2 + y^2"""
return x**2 + y**2
def surface_normal(x, y):
"""Compute normal vector using partial derivatives"""
# ∂z/∂x = 2x, ∂z/∂y = 2y
= 2*x
dz_dx = 2*y
dz_dy
# Normal vector is (-∂z/∂x, -∂z/∂y, 1)
= np.array([-dz_dx, -dz_dy, 1])
normal # Normalize
return normal / np.linalg.norm(normal)
# Create surface
= np.linspace(-2, 2, 20)
x = np.linspace(-2, 2, 20)
y = np.meshgrid(x, y)
X, Y = surface_function(X, Y)
Z
# Compute normals
= np.array([[surface_normal(xi, yi) for xi, yi in zip(row_x, row_y)]
normals for row_x, row_y in zip(X, Y)])
= plt.figure(figsize=(12, 5))
fig
# Plot surface
= fig.add_subplot(121, projection='3d')
ax1 =0.7, cmap='viridis')
ax1.plot_surface(X, Y, Z, alpha'Surface: z = x² + y²')
ax1.set_title(
# Plot surface with normals
= fig.add_subplot(122, projection='3d')
ax2 =0.3, cmap='viridis')
ax2.plot_surface(X, Y, Z, alpha
# Show normal vectors at selected points
= 3
step for i in range(0, X.shape[0], step):
for j in range(0, X.shape[1], step):
= [X[i,j], Y[i,j], Z[i,j]]
start = normals[i,j] * 0.5
direction 0], start[1], start[2],
ax2.quiver(start[0], direction[1], direction[2],
direction[='red', arrow_length_ratio=0.1)
color
'Surface with Normal Vectors')
ax2.set_title( plt.show()
Applications in Algorithm Analysis
Growth Rate Analysis
Derivatives help analyze algorithm complexity:
import numpy as np
import matplotlib.pyplot as plt
def analyze_growth_rate(f, df, x_range):
"""Analyze how fast a function grows"""
= np.linspace(x_range[0], x_range[1], 1000)
x = f(x)
y = df(x)
dy
=(12, 4))
plt.figure(figsize
1, 3, 1)
plt.subplot('b-', linewidth=2)
plt.plot(x, y, 'Function f(x)')
plt.title(True)
plt.grid(
1, 3, 2)
plt.subplot('r-', linewidth=2)
plt.plot(x, dy, "Growth Rate f'(x)")
plt.title(True)
plt.grid(
1, 3, 3)
plt.subplot(>0], y[x>0], 'b-', linewidth=2, label='f(x)')
plt.loglog(x[x>0], dy[x>0], 'r-', linewidth=2, label="f'(x)")
plt.loglog(x[x'Log-Log Plot')
plt.title(
plt.legend()True)
plt.grid(
plt.tight_layout()
plt.show()
# Compare different complexity functions
= [
functions lambda x: x, lambda x: np.ones_like(x), "O(n)"),
(lambda x: x*np.log(x), lambda x: np.log(x) + 1, "O(n log n)"),
(lambda x: x**2, lambda x: 2*x, "O(n²)"),
(lambda x: x**3, lambda x: 3*x**2, "O(n³)")
(
]
= np.linspace(1, 100, 1000)
x =(12, 8))
plt.figure(figsize
for i, (f, df, label) in enumerate(functions):
2, 2, i+1)
plt.subplot(= f(x)
y = df(x)
dy
'b-', linewidth=2, label=f'f(x) = {label}')
plt.plot(x, y, 'r--', linewidth=2, label="f'(x)")
plt.plot(x, dy, f'Growth Analysis: {label}')
plt.title(
plt.legend()True)
plt.grid(
plt.tight_layout() plt.show()
Optimization Problems
Finding Critical Points
import numpy as np
from scipy.optimize import minimize_scalar
import matplotlib.pyplot as plt
def find_critical_points(f, df, x_range):
"""Find critical points where f'(x) = 0"""
= np.linspace(x_range[0], x_range[1], 1000)
x = f(x)
y = df(x)
dy
# Find where derivative is approximately zero
= []
critical_points for i in range(1, len(dy)-1):
if dy[i-1] * dy[i+1] < 0: # Sign change
# Use more precise method
= minimize_scalar(lambda x: abs(df(x)),
result =(x[i-1], x[i+1]),
bounds='bounded')
method
critical_points.append(result.x)
=(12, 4))
plt.figure(figsize
1, 2, 1)
plt.subplot('b-', linewidth=2, label='f(x)')
plt.plot(x, y, for cp in critical_points:
'ro', markersize=8, label=f'Critical point: x={cp:.2f}')
plt.plot(cp, f(cp), 'Function with Critical Points')
plt.title(
plt.legend()True)
plt.grid(
1, 2, 2)
plt.subplot('r-', linewidth=2, label="f'(x)")
plt.plot(x, dy, =0, color='k', linestyle='--', alpha=0.5)
plt.axhline(yfor cp in critical_points:
'ro', markersize=8)
plt.plot(cp, df(cp), 'Derivative (zeros are critical points)')
plt.title(
plt.legend()True)
plt.grid(
plt.tight_layout()
plt.show()
return critical_points
# Example: f(x) = x^3 - 3x^2 + 2x + 1
def f(x):
return x**3 - 3*x**2 + 2*x + 1
def df(x):
return 3*x**2 - 6*x + 2
= find_critical_points(f, df, (-1, 3))
critical_points print("Critical points found at:", critical_points)
Practical Exercises
Exercise 1: Implement Gradient Descent for Linear Regression
import numpy as np
import matplotlib.pyplot as plt
def linear_regression_gradient_descent():
# Generate sample data
42)
np.random.seed(= np.random.randn(100, 1)
X = 2 * X.flatten() + 1 + 0.1 * np.random.randn(100)
y
# Initialize parameters
= 0.0 # weight
w = 0.0 # bias
b = 0.01
learning_rate = 1000
iterations
# Cost function: MSE
def cost_function(w, b, X, y):
= w * X.flatten() + b
predictions return np.mean((predictions - y)**2)
# Gradients
def compute_gradients(w, b, X, y):
= len(y)
m = w * X.flatten() + b
predictions = (2/m) * np.sum((predictions - y) * X.flatten())
dw = (2/m) * np.sum(predictions - y)
db return dw, db
# Training loop
= []
costs for i in range(iterations):
= cost_function(w, b, X, y)
cost
costs.append(cost)
= compute_gradients(w, b, X, y)
dw, db -= learning_rate * dw
w -= learning_rate * db
b
# Plot results
=(12, 4))
plt.figure(figsize
1, 2, 1)
plt.subplot(=0.5, label='Data')
plt.scatter(X, y, alpha* X.flatten() + b, 'r-', linewidth=2, label=f'Fitted line: y = {w:.2f}x + {b:.2f}')
plt.plot(X, w 'X')
plt.xlabel('y')
plt.ylabel(
plt.legend()'Linear Regression Result')
plt.title(
1, 2, 2)
plt.subplot(
plt.plot(costs)'Iteration')
plt.xlabel('Cost')
plt.ylabel('Cost Function During Training')
plt.title(True)
plt.grid(
plt.tight_layout()
plt.show()
print(f"Final parameters: w = {w:.4f}, b = {b:.4f}")
print(f"True parameters: w = 2.0, b = 1.0")
linear_regression_gradient_descent()
Exercise 2: Numerical Differentiation
def numerical_derivative(f, x, h=1e-7):
"""Compute numerical derivative using central difference"""
return (f(x + h) - f(x - h)) / (2 * h)
def compare_derivatives():
"""Compare analytical and numerical derivatives"""
# Test function: f(x) = x^3 + 2x^2 - x + 1
def f(x):
return x**3 + 2*x**2 - x + 1
def analytical_derivative(x):
return 3*x**2 + 4*x - 1
= np.linspace(-3, 3, 100)
x_values = [analytical_derivative(x) for x in x_values]
analytical = [numerical_derivative(f, x) for x in x_values]
numerical
=(10, 6))
plt.figure(figsize'b-', linewidth=2, label='Analytical')
plt.plot(x_values, analytical, 'r--', linewidth=2, label='Numerical')
plt.plot(x_values, numerical, 'x')
plt.xlabel("f'(x)")
plt.ylabel('Analytical vs Numerical Derivatives')
plt.title(
plt.legend()True)
plt.grid(
plt.show()
# Compute error
= np.abs(np.array(analytical) - np.array(numerical))
error print(f"Maximum error: {np.max(error):.2e}")
print(f"Average error: {np.mean(error):.2e}")
compare_derivatives()
Summary
Derivatives are essential in computer science for:
- Optimization: Finding optimal solutions using gradient-based methods
- Machine Learning: Training neural networks through backpropagation
- Computer Graphics: Creating smooth curves and computing surface properties
- Algorithm Analysis: Understanding growth rates and complexity
- Numerical Methods: Approximating solutions to complex problems
The key insight is that derivatives measure rates of change, which is fundamental to optimization and learning algorithms that form the backbone of modern AI and machine learning systems.