对抗攻击

对抗攻击#

前端模型#

备注

前端模型表示被攻击的模型。

from detectron2 import model_zoo
from detectron2.config import get_cfg as _get_cfg
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer


# 可以参考 Detectron2 做更多定制工作
def get_cfg(config_path, score_thresh_test=0.99):
    """准备 Detectron2"""
    cfg = _get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(config_path))
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config_path)
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = score_thresh_test
    return cfg

def load_model(cfg):
    """从 Detectron2 加载模型"""
    model = build_model(cfg)
    checkpointer = DetectionCheckpointer(model)
    checkpointer.load(cfg.MODEL.WEIGHTS)
    return model.eval()

直接使用 目标检测模型元框架

# 模型元框架
config_path = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
# config_path = "COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml"

配置自定义数据集:

# 其他配置参数
cata_name = "coco_2017_val"
out_dir = "build" # edit
name = "Bear"
# "Bear" and "00285"
#'Kite' and '07784'
#'Wineglass' and '19742'
#'FireHydrant' and '15746'
image_id = 285
image_path = f"datasets/coco/val2017/{image_id:012d}.jpg"

加载数据和模型:

from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data import DatasetMapper
from detectron2.utils.logger import setup_logger

setup_logger() # 启用日志
cfg = get_cfg(config_path) # 获取基础配置
# # 更新配置
coco_val_metadata = MetadataCatalog.get(cata_name)
coco_val_dataset_dicts = DatasetCatalog.get(cata_name)
img_names = [c['file_name'] for c in coco_val_dataset_dicts]
idx = img_names.index(image_path)
dsm = DatasetMapper(cfg, is_train=True, augmentations=[])
input_benign = dsm(coco_val_dataset_dicts[idx])
print(input_benign['file_name'])
model = load_model(cfg)
model.train = False
model.proposal_generator.training = False
[03/10 19:43:19 d2.data.datasets.coco]: Loaded 5000 images in COCO format from datasets/coco/annotations/instances_val2017.json
[03/10 19:43:20 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: []
datasets/coco/val2017/000000000285.jpg
[03/10 19:43:21 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl ...

预览良性(未被攻击的)图片:

from PIL import Image
import numpy as np
from IPython.display import display
import torch
from detectron2.utils.visualizer import Visualizer


with torch.no_grad():
    outputs = model([input_benign]) # 推理结果
    print(outputs[0]['instances'].pred_classes)
    print(outputs[0]['instances'].pred_boxes)
    with Image.open(image_path) as im:
        v = Visualizer(np.array(im), MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.5)
    v._default_font_size=40
    out = v.draw_instance_predictions(outputs[0]["instances"].to("cpu"))
    disp_image = out.get_image()
    with Image.fromarray(disp_image) as im:
        display(im.resize((200, 200)))
        im.save(f'{out_dir}/{name}_Detection_unperturbed.png') # 保存图片
tensor([21], device='cuda:0')
Boxes(tensor([[6.1768e-02, 6.9083e+01, 5.7901e+02, 6.3026e+02]], device='cuda:0'))
/media/pc/data/tmp/cache/conda/envs/tvmx/lib/python3.10/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3190.)
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
../../../_images/64b10ae22211c29878ea65c7b1579c4cd5476b522f09bd74792d7f48993b28f8.png

PGD 攻击算法#

from typing import Any
from detectron2.utils.events import EventStorage


def norms(Z):
    """计算除第一个维外的所有维的范数"""
    return Z.view(Z.shape[0], -1).norm(dim=1)[:,None,None]


def pgd_l2(model, X, epsilon, alpha, num_iter):
    """基于 PGD L2 构造样本 X 的对抗样本"""
    target_loss_idx = [0]
    losses_name = ["loss_cls", "loss_box_reg", "loss_rpn_cls", "loss_rpn_loc"]    
    delta = torch.zeros_like(X['image'], dtype=torch.float32, requires_grad=True)
    with EventStorage(0):
        for t in range(num_iter):
            X['image'] = X['image'] + delta
            losses = model([X])
            loss = sum([losses[losses_name[tgt_idx]] for tgt_idx in target_loss_idx])
            if t % 5 == 0:
                print(f"iter: {t}, loss: {loss}")            
            loss.backward()
            delta.data += alpha*delta.grad.detach() / norms(delta.grad.detach())
            delta.data = torch.min(torch.max(delta.detach(), -X['image']), 255-X['image']) # clip X+delta to [0,255]
            delta.data *= epsilon /  norms(delta.detach()).clamp(min=epsilon)
            delta.grad.zero_()
    del losses
    return X, delta.detach()

def pgd_linf(model, X, epsilon=0.1, alpha=0.01,
             num_iter=20, randomize=False):
    """基于 PGD Linf 构造样本 X 的对抗样本"""
    target_loss_idx = [0]
    # 调整此损失函数以获得不同类型的结果(hallucination/miscclassification/misdetections)
    losses_name = ["loss_cls", "loss_box_reg", "loss_rpn_cls", "loss_rpn_loc"]
    if randomize:
        delta = torch.rand_like(X['image'], dtype=torch.float32, requires_grad=True)
        delta.data = delta.data * 2 * epsilon - epsilon
    else:
        delta = torch.zeros_like(X['image'], dtype=torch.float32, requires_grad=True)
    with EventStorage(0):
        for t in range(num_iter):
            X['image'] = X['image'] + delta
            losses = model([X])
            #print(losses)
            loss = sum([losses[losses_name[tgt_idx]] for tgt_idx in target_loss_idx])
            if t % 5 == 0:
                print(f"iter: {t}, loss: {loss}")
            loss.backward()
            delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
            delta.grad.zero_()
    del losses
    return X, delta.detach()
def attack_dt2(model: torch.nn.Module, 
               inp: dict, attack_fn: Any|None=None, 
               **kwargs)->tuple:
    """攻击 DT2 模型。
    接受预训练的模型,DT2 格式的输入,并使用作为 kwargs 传递的提供的攻击参数运行指定的攻击。
    
    Args:
        model: 与 DT2 兼容的预训练模型
        inp: DT2 格式的输入字典,用作模型的输入
        attack_fn: 攻击方法 (可选 pgd_l2 和 pgd_linf)

    Returns:
        干扰后的图像以及干扰(perturbation)张量
    """
    model.training = True
    model.proposal_generator.training = True
    model.roi_heads.training = True
    inp, perturbation = attack_fn(model, inp, kwargs['epsilon'], 
                                  kwargs['alpha'], kwargs['num_iter'])
    model.training = False
    model.proposal_generator.training = False
    model.roi_heads.training = False
    return inp, perturbation
input_attacked, _ = attack_dt2(model, input_benign,
                               pgd_linf, epsilon=128/255,
                               alpha=0.5, num_iter=100)
iter: 0, loss: 0.009153555147349834
iter: 5, loss: 0.31559261679649353
iter: 10, loss: 0.5566940903663635
iter: 15, loss: 0.9820669293403625
iter: 20, loss: 1.221840739250183
iter: 25, loss: 1.6122164726257324
iter: 30, loss: 1.681879997253418
iter: 35, loss: 1.9374594688415527
iter: 40, loss: 1.935586929321289
iter: 45, loss: 2.205859422683716
iter: 50, loss: 2.11039662361145
iter: 55, loss: 2.46783447265625
iter: 60, loss: 2.44680118560791
iter: 65, loss: 2.822873830795288
iter: 70, loss: 2.652467966079712
iter: 75, loss: 2.9727275371551514
iter: 80, loss: 2.8871679306030273
iter: 85, loss: 2.917658567428589
iter: 90, loss: 2.9429056644439697
iter: 95, loss: 3.1205837726593018

保存攻击后的图片:

with torch.no_grad():
    img = torch.permute(input_attacked['image'], 
                        (1, 2, 0)).numpy()[:, :, ::-1]
img = (img - img.min())/(img.max()-img.min())
img = img * 255
with Image.fromarray(img.astype(np.uint8)) as im:
    display(im.resize((200, 200)))
    im.save(f'{out_dir}/{name}_Attacked.png')
../../../_images/e12c8d4c4235be26d52442d3e522b32a26ecd802e2ce77878958a05796a698a6.png

预览干扰后的检测结果:

instance_mask_thresh=0.99
with torch.no_grad():
    adv_outputs = model([input_attacked])
    perturbed_image = input_attacked['image'].data.permute((1,2,0)).numpy()
    pbi = torch.tensor(perturbed_image, requires_grad=False).numpy()
v = Visualizer(pbi, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.0)
v._default_font_size=40
instances = adv_outputs[0]['instances']
mask = instances.scores > instance_mask_thresh
instances = instances[mask]
out = v.draw_instance_predictions(instances.to("cpu"))
pred = out.get_image()[:, :, ::-1]
with Image.fromarray(pred) as im:
    display(im.resize((200, 200)))
    im.save(f'{out_dir}/{name}_Detection_perturbed.png')
../../../_images/062b0d23ee7a381c629bd96e1c955c765d0f7bfb2217864c6dfd6fb65fd8f83a.png