对抗攻击#
前端模型#
备注
前端模型表示被攻击的模型。
from detectron2 import model_zoo
from detectron2.config import get_cfg as _get_cfg
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
# 可以参考 Detectron2 做更多定制工作
def get_cfg(config_path, score_thresh_test=0.99):
"""准备 Detectron2"""
cfg = _get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(config_path))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config_path)
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = score_thresh_test
return cfg
def load_model(cfg):
"""从 Detectron2 加载模型"""
model = build_model(cfg)
checkpointer = DetectionCheckpointer(model)
checkpointer.load(cfg.MODEL.WEIGHTS)
return model.eval()
直接使用 目标检测模型元框架:
# 模型元框架
config_path = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
# config_path = "COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml"
配置自定义数据集:
# 其他配置参数
cata_name = "coco_2017_val"
out_dir = "build" # edit
name = "Bear"
# "Bear" and "00285"
#'Kite' and '07784'
#'Wineglass' and '19742'
#'FireHydrant' and '15746'
image_id = 285
image_path = f"datasets/coco/val2017/{image_id:012d}.jpg"
加载数据和模型:
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data import DatasetMapper
from detectron2.utils.logger import setup_logger
setup_logger() # 启用日志
cfg = get_cfg(config_path) # 获取基础配置
# # 更新配置
coco_val_metadata = MetadataCatalog.get(cata_name)
coco_val_dataset_dicts = DatasetCatalog.get(cata_name)
img_names = [c['file_name'] for c in coco_val_dataset_dicts]
idx = img_names.index(image_path)
dsm = DatasetMapper(cfg, is_train=True, augmentations=[])
input_benign = dsm(coco_val_dataset_dicts[idx])
print(input_benign['file_name'])
model = load_model(cfg)
model.train = False
model.proposal_generator.training = False
[03/10 19:43:19 d2.data.datasets.coco]: Loaded 5000 images in COCO format from datasets/coco/annotations/instances_val2017.json
[03/10 19:43:20 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: []
datasets/coco/val2017/000000000285.jpg
[03/10 19:43:21 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl ...
预览良性(未被攻击的)图片:
from PIL import Image
import numpy as np
from IPython.display import display
import torch
from detectron2.utils.visualizer import Visualizer
with torch.no_grad():
outputs = model([input_benign]) # 推理结果
print(outputs[0]['instances'].pred_classes)
print(outputs[0]['instances'].pred_boxes)
with Image.open(image_path) as im:
v = Visualizer(np.array(im), MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.5)
v._default_font_size=40
out = v.draw_instance_predictions(outputs[0]["instances"].to("cpu"))
disp_image = out.get_image()
with Image.fromarray(disp_image) as im:
display(im.resize((200, 200)))
im.save(f'{out_dir}/{name}_Detection_unperturbed.png') # 保存图片
tensor([21], device='cuda:0')
Boxes(tensor([[6.1768e-02, 6.9083e+01, 5.7901e+02, 6.3026e+02]], device='cuda:0'))
/media/pc/data/tmp/cache/conda/envs/tvmx/lib/python3.10/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3190.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
PGD 攻击算法#
from typing import Any
from detectron2.utils.events import EventStorage
def norms(Z):
"""计算除第一个维外的所有维的范数"""
return Z.view(Z.shape[0], -1).norm(dim=1)[:,None,None]
def pgd_l2(model, X, epsilon, alpha, num_iter):
"""基于 PGD L2 构造样本 X 的对抗样本"""
target_loss_idx = [0]
losses_name = ["loss_cls", "loss_box_reg", "loss_rpn_cls", "loss_rpn_loc"]
delta = torch.zeros_like(X['image'], dtype=torch.float32, requires_grad=True)
with EventStorage(0):
for t in range(num_iter):
X['image'] = X['image'] + delta
losses = model([X])
loss = sum([losses[losses_name[tgt_idx]] for tgt_idx in target_loss_idx])
if t % 5 == 0:
print(f"iter: {t}, loss: {loss}")
loss.backward()
delta.data += alpha*delta.grad.detach() / norms(delta.grad.detach())
delta.data = torch.min(torch.max(delta.detach(), -X['image']), 255-X['image']) # clip X+delta to [0,255]
delta.data *= epsilon / norms(delta.detach()).clamp(min=epsilon)
delta.grad.zero_()
del losses
return X, delta.detach()
def pgd_linf(model, X, epsilon=0.1, alpha=0.01,
num_iter=20, randomize=False):
"""基于 PGD Linf 构造样本 X 的对抗样本"""
target_loss_idx = [0]
# 调整此损失函数以获得不同类型的结果(hallucination/miscclassification/misdetections)
losses_name = ["loss_cls", "loss_box_reg", "loss_rpn_cls", "loss_rpn_loc"]
if randomize:
delta = torch.rand_like(X['image'], dtype=torch.float32, requires_grad=True)
delta.data = delta.data * 2 * epsilon - epsilon
else:
delta = torch.zeros_like(X['image'], dtype=torch.float32, requires_grad=True)
with EventStorage(0):
for t in range(num_iter):
X['image'] = X['image'] + delta
losses = model([X])
#print(losses)
loss = sum([losses[losses_name[tgt_idx]] for tgt_idx in target_loss_idx])
if t % 5 == 0:
print(f"iter: {t}, loss: {loss}")
loss.backward()
delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
delta.grad.zero_()
del losses
return X, delta.detach()
def attack_dt2(model: torch.nn.Module,
inp: dict, attack_fn: Any|None=None,
**kwargs)->tuple:
"""攻击 DT2 模型。
接受预训练的模型,DT2 格式的输入,并使用作为 kwargs 传递的提供的攻击参数运行指定的攻击。
Args:
model: 与 DT2 兼容的预训练模型
inp: DT2 格式的输入字典,用作模型的输入
attack_fn: 攻击方法 (可选 pgd_l2 和 pgd_linf)
Returns:
干扰后的图像以及干扰(perturbation)张量
"""
model.training = True
model.proposal_generator.training = True
model.roi_heads.training = True
inp, perturbation = attack_fn(model, inp, kwargs['epsilon'],
kwargs['alpha'], kwargs['num_iter'])
model.training = False
model.proposal_generator.training = False
model.roi_heads.training = False
return inp, perturbation
input_attacked, _ = attack_dt2(model, input_benign,
pgd_linf, epsilon=128/255,
alpha=0.5, num_iter=100)
iter: 0, loss: 0.009153555147349834
iter: 5, loss: 0.31559261679649353
iter: 10, loss: 0.5566940903663635
iter: 15, loss: 0.9820669293403625
iter: 20, loss: 1.221840739250183
iter: 25, loss: 1.6122164726257324
iter: 30, loss: 1.681879997253418
iter: 35, loss: 1.9374594688415527
iter: 40, loss: 1.935586929321289
iter: 45, loss: 2.205859422683716
iter: 50, loss: 2.11039662361145
iter: 55, loss: 2.46783447265625
iter: 60, loss: 2.44680118560791
iter: 65, loss: 2.822873830795288
iter: 70, loss: 2.652467966079712
iter: 75, loss: 2.9727275371551514
iter: 80, loss: 2.8871679306030273
iter: 85, loss: 2.917658567428589
iter: 90, loss: 2.9429056644439697
iter: 95, loss: 3.1205837726593018
保存攻击后的图片:
with torch.no_grad():
img = torch.permute(input_attacked['image'],
(1, 2, 0)).numpy()[:, :, ::-1]
img = (img - img.min())/(img.max()-img.min())
img = img * 255
with Image.fromarray(img.astype(np.uint8)) as im:
display(im.resize((200, 200)))
im.save(f'{out_dir}/{name}_Attacked.png')
预览干扰后的检测结果:
instance_mask_thresh=0.99
with torch.no_grad():
adv_outputs = model([input_attacked])
perturbed_image = input_attacked['image'].data.permute((1,2,0)).numpy()
pbi = torch.tensor(perturbed_image, requires_grad=False).numpy()
v = Visualizer(pbi, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.0)
v._default_font_size=40
instances = adv_outputs[0]['instances']
mask = instances.scores > instance_mask_thresh
instances = instances[mask]
out = v.draw_instance_predictions(instances.to("cpu"))
pred = out.get_image()[:, :, ::-1]
with Image.fromarray(pred) as im:
display(im.resize((200, 200)))
im.save(f'{out_dir}/{name}_Detection_perturbed.png')