Derivatives and Applications in Computer Science
Derivatives are crucial in computer science for optimization, machine learning, and algorithm analysis. This section covers derivatives with practical CS applications.
What is a Derivative?
A derivative measures the rate of change of a function. In CS contexts: - Gradient descent: Uses derivatives to find optimal parameters - Backpropagation: Computes derivatives to train neural networks - Algorithm analysis: Derivatives help analyze growth rates
Basic Derivative Rules
Power Rule
If f(x) = x^n, then f’(x) = nx^(n-1)
import numpy as np
import matplotlib.pyplot as plt
# Example: f(x) = x^2, f'(x) = 2x
x = np.linspace(-3, 3, 100)
f = x**2
f_prime = 2*x
plt.figure(figsize=(10, 6))
plt.subplot(1, 2, 1)
plt.plot(x, f, 'b-', label='f(x) = x²')
plt.title('Original Function')
plt.grid(True)
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(x, f_prime, 'r-', label="f'(x) = 2x")
plt.title('Derivative')
plt.grid(True)
plt.legend()
plt.show()Chain Rule
If f(x) = g(h(x)), then f’(x) = g’(h(x)) × h’(x)
This is fundamental in neural networks for backpropagation.
Applications in Machine Learning
1. Gradient Descent
Gradient descent uses derivatives to minimize cost functions:
import numpy as np
def gradient_descent(f, df, x0, learning_rate=0.01, iterations=1000):
"""
Minimize function f using gradient descent
f: function to minimize
df: derivative of f
x0: starting point
"""
x = x0
history = [x]
for i in range(iterations):
gradient = df(x)
x = x - learning_rate * gradient
history.append(x)
return x, history
# Example: minimize f(x) = x^2 + 2x + 1
def f(x):
return x**2 + 2*x + 1
def df(x):
return 2*x + 2
# Find minimum
minimum, path = gradient_descent(f, df, x0=5.0)
print(f"Minimum found at x = {minimum:.4f}")
print(f"Function value: {f(minimum):.4f}")2. Neural Network Backpropagation
import numpy as np
class SimpleNeuron:
def __init__(self, weights, bias):
self.weights = np.array(weights)
self.bias = bias
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self, x):
s = self.sigmoid(x)
return s * (1 - s)
def forward(self, inputs):
self.inputs = np.array(inputs)
self.z = np.dot(self.weights, self.inputs) + self.bias
self.output = self.sigmoid(self.z)
return self.output
def backward(self, error):
# Compute gradients using chain rule
d_output = error
d_z = d_output * self.sigmoid_derivative(self.z)
d_weights = d_z * self.inputs
d_bias = d_z
d_inputs = d_z * self.weights
return d_weights, d_bias, d_inputs
# Example usage
neuron = SimpleNeuron([0.5, -0.3], 0.1)
output = neuron.forward([1.0, 2.0])
gradients = neuron.backward(0.1) # Small error
print(f"Output: {output:.4f}")
print(f"Weight gradients: {gradients[0]}")Applications in Computer Graphics
1. Parametric Curves
Derivatives help create smooth curves in graphics:
import numpy as np
import matplotlib.pyplot as plt
def bezier_curve(t, P0, P1, P2, P3):
"""Cubic Bezier curve"""
return (1-t)**3 * P0 + 3*(1-t)**2*t * P1 + 3*(1-t)*t**2 * P2 + t**3 * P3
def bezier_derivative(t, P0, P1, P2, P3):
"""Derivative of cubic Bezier curve (tangent vector)"""
return 3*(1-t)**2 * (P1-P0) + 6*(1-t)*t * (P2-P1) + 3*t**2 * (P3-P2)
# Control points
P0, P1, P2, P3 = np.array([0,0]), np.array([1,2]), np.array([3,2]), np.array([4,0])
t = np.linspace(0, 1, 100)
curve = np.array([bezier_curve(ti, P0, P1, P2, P3) for ti in t])
tangents = np.array([bezier_derivative(ti, P0, P1, P2, P3) for ti in t])
plt.figure(figsize=(10, 6))
plt.plot(curve[:, 0], curve[:, 1], 'b-', linewidth=2, label='Bezier Curve')
plt.plot([P0[0], P1[0], P2[0], P3[0]], [P0[1], P1[1], P2[1], P3[1]], 'ro--', alpha=0.5, label='Control Points')
# Show tangent vectors at a few points
for i in range(0, len(t), 20):
start = curve[i]
direction = tangents[i] * 0.1 # Scale for visibility
plt.arrow(start[0], start[1], direction[0], direction[1],
head_width=0.05, head_length=0.05, fc='red', ec='red')
plt.axis('equal')
plt.grid(True)
plt.legend()
plt.title('Bezier Curve with Tangent Vectors')
plt.show()2. Surface Normals
In 3D graphics, derivatives help compute surface normals:
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
def surface_function(x, y):
"""Example surface: z = x^2 + y^2"""
return x**2 + y**2
def surface_normal(x, y):
"""Compute normal vector using partial derivatives"""
# ∂z/∂x = 2x, ∂z/∂y = 2y
dz_dx = 2*x
dz_dy = 2*y
# Normal vector is (-∂z/∂x, -∂z/∂y, 1)
normal = np.array([-dz_dx, -dz_dy, 1])
# Normalize
return normal / np.linalg.norm(normal)
# Create surface
x = np.linspace(-2, 2, 20)
y = np.linspace(-2, 2, 20)
X, Y = np.meshgrid(x, y)
Z = surface_function(X, Y)
# Compute normals
normals = np.array([[surface_normal(xi, yi) for xi, yi in zip(row_x, row_y)]
for row_x, row_y in zip(X, Y)])
fig = plt.figure(figsize=(12, 5))
# Plot surface
ax1 = fig.add_subplot(121, projection='3d')
ax1.plot_surface(X, Y, Z, alpha=0.7, cmap='viridis')
ax1.set_title('Surface: z = x² + y²')
# Plot surface with normals
ax2 = fig.add_subplot(122, projection='3d')
ax2.plot_surface(X, Y, Z, alpha=0.3, cmap='viridis')
# Show normal vectors at selected points
step = 3
for i in range(0, X.shape[0], step):
for j in range(0, X.shape[1], step):
start = [X[i,j], Y[i,j], Z[i,j]]
direction = normals[i,j] * 0.5
ax2.quiver(start[0], start[1], start[2],
direction[0], direction[1], direction[2],
color='red', arrow_length_ratio=0.1)
ax2.set_title('Surface with Normal Vectors')
plt.show()Applications in Algorithm Analysis
Growth Rate Analysis
Derivatives help analyze algorithm complexity:
import numpy as np
import matplotlib.pyplot as plt
def analyze_growth_rate(f, df, x_range):
"""Analyze how fast a function grows"""
x = np.linspace(x_range[0], x_range[1], 1000)
y = f(x)
dy = df(x)
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.plot(x, y, 'b-', linewidth=2)
plt.title('Function f(x)')
plt.grid(True)
plt.subplot(1, 3, 2)
plt.plot(x, dy, 'r-', linewidth=2)
plt.title("Growth Rate f'(x)")
plt.grid(True)
plt.subplot(1, 3, 3)
plt.loglog(x[x>0], y[x>0], 'b-', linewidth=2, label='f(x)')
plt.loglog(x[x>0], dy[x>0], 'r-', linewidth=2, label="f'(x)")
plt.title('Log-Log Plot')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# Compare different complexity functions
functions = [
(lambda x: x, lambda x: np.ones_like(x), "O(n)"),
(lambda x: x*np.log(x), lambda x: np.log(x) + 1, "O(n log n)"),
(lambda x: x**2, lambda x: 2*x, "O(n²)"),
(lambda x: x**3, lambda x: 3*x**2, "O(n³)")
]
x = np.linspace(1, 100, 1000)
plt.figure(figsize=(12, 8))
for i, (f, df, label) in enumerate(functions):
plt.subplot(2, 2, i+1)
y = f(x)
dy = df(x)
plt.plot(x, y, 'b-', linewidth=2, label=f'f(x) = {label}')
plt.plot(x, dy, 'r--', linewidth=2, label="f'(x)")
plt.title(f'Growth Analysis: {label}')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()Optimization Problems
Finding Critical Points
import numpy as np
from scipy.optimize import minimize_scalar
import matplotlib.pyplot as plt
def find_critical_points(f, df, x_range):
"""Find critical points where f'(x) = 0"""
x = np.linspace(x_range[0], x_range[1], 1000)
y = f(x)
dy = df(x)
# Find where derivative is approximately zero
critical_points = []
for i in range(1, len(dy)-1):
if dy[i-1] * dy[i+1] < 0: # Sign change
# Use more precise method
result = minimize_scalar(lambda x: abs(df(x)),
bounds=(x[i-1], x[i+1]),
method='bounded')
critical_points.append(result.x)
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(x, y, 'b-', linewidth=2, label='f(x)')
for cp in critical_points:
plt.plot(cp, f(cp), 'ro', markersize=8, label=f'Critical point: x={cp:.2f}')
plt.title('Function with Critical Points')
plt.legend()
plt.grid(True)
plt.subplot(1, 2, 2)
plt.plot(x, dy, 'r-', linewidth=2, label="f'(x)")
plt.axhline(y=0, color='k', linestyle='--', alpha=0.5)
for cp in critical_points:
plt.plot(cp, df(cp), 'ro', markersize=8)
plt.title('Derivative (zeros are critical points)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
return critical_points
# Example: f(x) = x^3 - 3x^2 + 2x + 1
def f(x):
return x**3 - 3*x**2 + 2*x + 1
def df(x):
return 3*x**2 - 6*x + 2
critical_points = find_critical_points(f, df, (-1, 3))
print("Critical points found at:", critical_points)Practical Exercises
Exercise 1: Implement Gradient Descent for Linear Regression
import numpy as np
import matplotlib.pyplot as plt
def linear_regression_gradient_descent():
# Generate sample data
np.random.seed(42)
X = np.random.randn(100, 1)
y = 2 * X.flatten() + 1 + 0.1 * np.random.randn(100)
# Initialize parameters
w = 0.0 # weight
b = 0.0 # bias
learning_rate = 0.01
iterations = 1000
# Cost function: MSE
def cost_function(w, b, X, y):
predictions = w * X.flatten() + b
return np.mean((predictions - y)**2)
# Gradients
def compute_gradients(w, b, X, y):
m = len(y)
predictions = w * X.flatten() + b
dw = (2/m) * np.sum((predictions - y) * X.flatten())
db = (2/m) * np.sum(predictions - y)
return dw, db
# Training loop
costs = []
for i in range(iterations):
cost = cost_function(w, b, X, y)
costs.append(cost)
dw, db = compute_gradients(w, b, X, y)
w -= learning_rate * dw
b -= learning_rate * db
# Plot results
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.scatter(X, y, alpha=0.5, label='Data')
plt.plot(X, w * X.flatten() + b, 'r-', linewidth=2, label=f'Fitted line: y = {w:.2f}x + {b:.2f}')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.title('Linear Regression Result')
plt.subplot(1, 2, 2)
plt.plot(costs)
plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.title('Cost Function During Training')
plt.grid(True)
plt.tight_layout()
plt.show()
print(f"Final parameters: w = {w:.4f}, b = {b:.4f}")
print(f"True parameters: w = 2.0, b = 1.0")
linear_regression_gradient_descent()Exercise 2: Numerical Differentiation
def numerical_derivative(f, x, h=1e-7):
"""Compute numerical derivative using central difference"""
return (f(x + h) - f(x - h)) / (2 * h)
def compare_derivatives():
"""Compare analytical and numerical derivatives"""
# Test function: f(x) = x^3 + 2x^2 - x + 1
def f(x):
return x**3 + 2*x**2 - x + 1
def analytical_derivative(x):
return 3*x**2 + 4*x - 1
x_values = np.linspace(-3, 3, 100)
analytical = [analytical_derivative(x) for x in x_values]
numerical = [numerical_derivative(f, x) for x in x_values]
plt.figure(figsize=(10, 6))
plt.plot(x_values, analytical, 'b-', linewidth=2, label='Analytical')
plt.plot(x_values, numerical, 'r--', linewidth=2, label='Numerical')
plt.xlabel('x')
plt.ylabel("f'(x)")
plt.title('Analytical vs Numerical Derivatives')
plt.legend()
plt.grid(True)
plt.show()
# Compute error
error = np.abs(np.array(analytical) - np.array(numerical))
print(f"Maximum error: {np.max(error):.2e}")
print(f"Average error: {np.mean(error):.2e}")
compare_derivatives()Summary
Derivatives are essential in computer science for:
- Optimization: Finding optimal solutions using gradient-based methods
- Machine Learning: Training neural networks through backpropagation
- Computer Graphics: Creating smooth curves and computing surface properties
- Algorithm Analysis: Understanding growth rates and complexity
- Numerical Methods: Approximating solutions to complex problems
The key insight is that derivatives measure rates of change, which is fundamental to optimization and learning algorithms that form the backbone of modern AI and machine learning systems.