Skip to content

Commit

Permalink
paraphrase buffs (#333)
Browse files Browse the repository at this point in the history
* add pegasus paraphrase (it's heavy though, let's find another)

* add reqs for paraphrasing

* clarify a few command comments

* tidy up buff load failure messages

* rm dev file

* get a cpu-friendly buff running

* state module on buff load
  • Loading branch information
leondz authored Nov 29, 2023
1 parent 93573b9 commit dafb06f
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 14 deletions.
6 changes: 5 additions & 1 deletion garak/buffs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,17 @@ class Buff:
active = True

def __init__(self) -> None:
module = self.__class__.__module__.replace("garak.buffs.", "")
print(
f"🦾 loading {Style.BRIGHT}{Fore.LIGHTGREEN_EX}buff: {Style.RESET_ALL}{self.__class__.__name__}"
f"🦾 loading {Style.BRIGHT}{Fore.LIGHTGREEN_EX}buff: {Style.RESET_ALL}{module}.{self.__class__.__name__}"
)
logging.info(f"buff init: {self}")

def _derive_new_attempt(
self, source_attempt: garak.attempt.Attempt, seq=-1
) -> garak.attempt.Attempt:
if seq == -1:
seq = source_attempt.seq
new_attempt = garak.attempt.Attempt(
status=source_attempt.status,
prompt=source_attempt.prompt,
Expand All @@ -64,6 +67,7 @@ def _derive_new_attempt(
def transform(
self, attempt: garak.attempt.Attempt
) -> Iterable[garak.attempt.Attempt]:
"""attempt copying is handled elsewhere. isn't that nice"""
yield attempt

def buff(
Expand Down
124 changes: 124 additions & 0 deletions garak/buffs/paraphrase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

""" Buff that paraphrases a prompt. """

from collections.abc import Iterable
import copy

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

import garak.attempt
from garak.buffs.base import Buff


class PegasusT5(Buff):
"""Paraphrasing buff using Pegasus model"""

bcp47 = "en"
uri = "https://huggingface.co/tuner007/pegasus_paraphrase"

def __init__(self) -> None:
super().__init__()
self.para_model_name = "tuner007/pegasus_paraphrase" # https://huggingface.co/tuner007/pegasus_paraphrase
self.torch_device = "cuda" if torch.cuda.is_available() else "cpu"
self.max_length = 60
self.temperature = 1.5
self.num_return_sequences = 6
self.num_beams = self.num_return_sequences
self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name)
self.para_model = PegasusForConditionalGeneration.from_pretrained(
self.para_model_name
).to(self.torch_device)

def _get_response(self, input_text):
batch = self.tokenizer(
[input_text],
truncation=True,
padding="longest",
max_length=self.max_length,
return_tensors="pt",
).to(self.torch_device)
translated = self.para_model.generate(
**batch,
max_length=self.max_length,
num_beams=self.num_beams,
num_return_sequences=self.num_return_sequences,
temperature=self.temperature,
)
tgt_text = self.tokenizer.batch_decode(translated, skip_special_tokens=True)
return tgt_text

def transform(
self, attempt: garak.attempt.Attempt
) -> Iterable[garak.attempt.Attempt]:
yield self._derive_new_attempt(attempt)
paraphrases = self._get_response(attempt.prompt)
for paraphrase in set(paraphrases):
paraphrased_attempt = self._derive_new_attempt(attempt)
paraphrased_attempt.prompt = paraphrase
yield paraphrased_attempt


class HumarinT5(Buff):
"""CPU-friendly paraphrase buff based on Humarin's T5 paraphraser"""

bcp47 = "en"
uri = "https://huggingface.co/humarin/chatgpt_paraphraser_on_T5_base"

def __init__(self) -> None:
super().__init__()
self.para_model_name = "humarin/chatgpt_paraphraser_on_T5_base"
self.torch_device = "cuda" if torch.cuda.is_available() else "cpu"
self.num_beams = 5
self.num_beam_groups = 5
self.num_return_sequences = 5
self.repetition_penalty = 10.0
self.diversity_penalty = 3.0
self.no_repeat_ngram_size = 2
# self.temperature = 0.7
self.max_length = 128
self.tokenizer = AutoTokenizer.from_pretrained(self.para_model_name)
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.para_model_name).to(
self.torch_device
)

def _get_response(self, input_text):
input_ids = self.tokenizer(
f"paraphrase: {input_text}",
return_tensors="pt",
padding="longest",
max_length=self.max_length,
truncation=True,
).input_ids

outputs = self.model.generate(
input_ids,
# temperature=self.temperature,
repetition_penalty=self.repetition_penalty,
num_return_sequences=self.num_return_sequences,
no_repeat_ngram_size=self.no_repeat_ngram_size,
num_beams=self.num_beams,
num_beam_groups=self.num_beam_groups,
max_length=self.max_length,
diversity_penalty=self.diversity_penalty,
# do_sample = False,
)

res = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)

return res

def transform(
self, attempt: garak.attempt.Attempt
) -> Iterable[garak.attempt.Attempt]:
yield self._derive_new_attempt(attempt)
paraphrases = self._get_response(attempt.prompt)
for paraphrase in set(paraphrases):
paraphrased_attempt = self._derive_new_attempt(attempt)
paraphrased_attempt.prompt = paraphrase
yield paraphrased_attempt
7 changes: 4 additions & 3 deletions garak/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,12 @@ def plugin_info(plugin_name):
)


# set config vars - debug, threshold
# load generator
# probe
# TODO set config vars - debug, threshold
# TODO load generator
# TODO set probe config string


# do a run
def probewise_run(generator, probe_names, evaluator, buffs):
import garak.harnesses.probewise

Expand Down
3 changes: 0 additions & 3 deletions garak/garak.site.yaml

This file was deleted.

17 changes: 11 additions & 6 deletions garak/harnesses/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,21 @@ def _load_buffs(self, buffs: List) -> None:
If one wants to use buffs directly with this harness without subclassing,
then call this method instance directly."""

_config.buffs = []
_config.buffs = [] # maybe put this in transient / session, eh
for buff in buffs:
err_msg = None
try:
_config.buffs.append(_plugins.load_plugin(buff))
logging.debug(f"loaded {buff}")
logging.debug("loaded %s", buff)
except ValueError as ve:
err_msg = f"❌🦾 buff load error:❌ {ve}"
except Exception as e:
msg = f"failed to load buff {buff}"
print(msg)
logging.warning(f"{msg}: {e}")
continue
err_msg = f"❌🦾 failed to load buff {buff}:❌ {e}"
finally:
if err_msg is not None:
print(err_msg)
logging.warning(err_msg)
continue

def run(self, model, probes, detectors, evaluator, announce_probe=True) -> None:
"""Core harness method
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ dependencies = [
"langchain>=0.0.300",
"nemollm>=0.3.0",
"octoai-sdk",
"cmd2"
"cmd2",
"torch>=2.1.0",
"sentencepiece>=0.1.99"
]

[project.urls]
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ langchain>=0.0.300
nemollm>=0.3.0
octoai-sdk
cmd2
torch>=2.1.0
sentencepiece>=0.1.99

0 comments on commit dafb06f

Please sign in to comment.