import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Simulate a batch of pre-activation outputs
np.random.seed(0)
raw_activations = np.random.randn(1000) * 5 + 10  # High variance, non-zero mean

# Apply batch normalization manually
mean = np.mean(raw_activations)
std = np.std(raw_activations)
normalized = (raw_activations - mean) / std

# Apply affine transform (optional: here gamma=1, beta=0)
gamma = 1.0
beta = 0.0
batchnorm_output = gamma * normalized + beta

# Plot the distributions
plt.figure(figsize=(10, 5))
sns.kdeplot(raw_activations, label='Before BatchNorm', fill=True, linewidth=2)
sns.kdeplot(batchnorm_output, label='After BatchNorm', fill=True, linewidth=2)
plt.title("Effect of Batch Normalization on Activation Distribution", fontsize=14)
plt.xlabel("Activation Value")
plt.ylabel("Density")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

### from https://debuggercafe.com/implementing-resnet18-in-pytorch-from-scratch/

class ResNet(nn.Module):
    def __init__(
        self, 
        img_channels: int,
        num_layers: int,
        block: Type[BasicBlock],
        num_classes: int  = 1000
    ) -> None:
        super(ResNet, self).__init__()
        if num_layers == 18:
            # The following `layers` list defines the number of `BasicBlock` 
            # to use to build the network and how many basic blocks to stack
            # together.
            layers = [2, 2, 2, 2]
            self.expansion = 1
        
        self.in_channels = 64
        # All ResNets (18 to 152) contain a Conv2d => BN => ReLU for the first
        # three layers. Here, kernel size is 7.
        self.conv1 = nn.Conv2d(
            in_channels=img_channels,
            out_channels=self.in_channels,
            kernel_size=7, 
            stride=2,
            padding=3,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(self.in_channels) ###############################
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512*self.expansion, num_classes)
    def _make_layer(
        self, 
        block: Type[BasicBlock],
        out_channels: int,
        blocks: int,
        stride: int = 1
    ) -> nn.Sequential:
        downsample = None
        if stride != 1:
            """
            This should pass from `layer2` to `layer4` or 
            when building ResNets50 and above. Section 3.3 of the paper
            Deep Residual Learning for Image Recognition
            (https://arxiv.org/pdf/1512.03385v1.pdf).
            """
            downsample = nn.Sequential(
                nn.Conv2d(
                    self.in_channels, 
                    out_channels*self.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias=False 
                ),
                nn.BatchNorm2d(out_channels * self.expansion),
            )
        layers = []
        layers.append(
            block(
                self.in_channels, out_channels, stride, self.expansion, downsample
            )
        )
        self.in_channels = out_channels * self.expansion
        for i in range(1, blocks):
            layers.append(block(
                self.in_channels,
                out_channels,
                expansion=self.expansion
            ))
        return nn.Sequential(*layers)
    def forward(self, x: Tensor) -> Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        # The spatial dimension of the final layer's feature 
        # map should be (7, 7) for all ResNets.
        print('Dimensions of the last convolutional feature map: ', x.shape)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def iou(box1, box2):
    """Compute IoU between two boxes."""
    x1, y1, x2, y2 = box1
    x1b, y1b, x2b, y2b = box2

    xi1 = max(x1, x1b)
    yi1 = max(y1, y1b)
    xi2 = min(x2, x2b)
    yi2 = min(y2, y2b)
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)

    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2b - x1b) * (y2b - y1b)
    union_area = box1_area + box2_area - inter_area

    return inter_area / union_area if union_area != 0 else 0

def non_max_suppression(boxes, scores, iou_thresh=0.5):
    """Perform Non-Maximum Suppression."""
    indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
    keep = []

    while indices:
        current = indices.pop(0)
        keep.append(current)
        indices = [i for i in indices if iou(boxes[current], boxes[i]) < iou_thresh]

    return keep

def draw_boxes(ax, boxes, scores, color='blue', label='Before NMS'):
    """Draw bounding boxes."""
    for i, box in enumerate(boxes):
        x1, y1, x2, y2 = box
        w, h = x2 - x1, y2 - y1
        rect = patches.Rectangle((x1, y1), w, h, linewidth=2, edgecolor=color, facecolor='none')
        ax.add_patch(rect)
        ax.text(x1, y1 - 3, f"{scores[i]:.2f}", color=color, fontsize=10)
    ax.set_title(label)
    ax.set_xlim(0, 100)
    ax.set_ylim(0, 100)
    ax.invert_yaxis()
    ax.axis('off')

# Sample boxes and confidence scores
boxes = [
    [20, 20, 60, 60],
    [25, 25, 65, 65],
    [70, 20, 110, 60],
    [22, 22, 58, 58],  # Highly overlapping with the first box
]
scores = [0.9, 0.75, 0.8, 0.6]

# Apply NMS
keep_indices = non_max_suppression(boxes, scores, iou_thresh=0.5)
boxes_nms = [boxes[i] for i in keep_indices]
scores_nms = [scores[i] for i in keep_indices]

# Plotting
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
draw_boxes(ax1, boxes, scores, color='blue', label='Before NMS')
draw_boxes(ax2, boxes_nms, scores_nms, color='green', label='After NMS')
plt.tight_layout()
plt.show()

Rank	Correct?	Precision	Recall
1	True	1.0 ↑	0.2 ↑
2	True	1.0 →	0.4 ↑
3	False	0.67 ↓	0.4 →
4	False	0.5 ↓	0.4 →
5	False	0.4 ↓	0.4 →
6	True	0.5 ↑	0.6 ↑
7	True	0.57 ↑	0.8 ↑

Lecture 20 - Deep learning and image tasks¶

ECE364 - Programming Methods for Machine Learning¶

Nickvash Kani¶

Slides based off prior lectures by Alex Schwing, Aigou Han, Farzad Kamalabadi, Corey Snyder. All mistakes are my own!¶

Deep learning tips and tricks¶

Batch normalization:¶

Semantic segmentation review¶

U-Net¶

Object Detection¶

Problem Statement¶

Single-stage Object Detection¶

Positive and Negative Default Boxes¶

Predictions at Default Boxes¶

Training Object Detectors¶

Inference in Object Detectors¶

Non-maximum Suppression (NMS)¶

Average Precision (AP) and Mean Average Precision (mAP)¶

Example (from Hui et al.)¶

So so so many AP variations....¶

Multi-scale detection¶

Encoder-Decoder models¶

That's it for today¶