Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Buff #290

Merged
merged 7 commits into from
Oct 30, 2023
Merged

Buff #290

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [2023] [Leon Derczynski]
Copyright (c) 2023 Leon Derczynski
Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
11 changes: 9 additions & 2 deletions garak/_plugins.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Portions Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Functions for working with garak plugins (enumeration, loading, etc)"""

import importlib
Expand Down Expand Up @@ -27,7 +31,7 @@ def enumerate_plugins(category: str = "probes", skip_base_classes=True) -> List[
:type category: str
"""

if category not in ("probes", "detectors", "generators", "harnesses"):
if category not in ("probes", "detectors", "generators", "harnesses", "buffs"):
raise ValueError("Not a recognised plugin type:", category)

base_mod = importlib.import_module(f"garak.{category}.base")
Expand Down Expand Up @@ -122,6 +126,9 @@ def load_plugin(path, break_on_fail=True):
except Exception as e:
# print("error in: module", mod.__name__, "class", plugin_class_name)
# logging.warning(f"error in: module {mod} class {plugin_class_name}")
return False
if break_on_fail:
raise Exception(e)
else:
return False

return plugin_instance
4 changes: 4 additions & 0 deletions garak/buffs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from .base import *
81 changes: 81 additions & 0 deletions garak/buffs/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Base classes for buffs.

Buff plugins augment, constrain, or otherwise perturb the interaction
between probes and a generator. Buffs must inherit this base class.
`Buff` serves as a template showing what expectations there are for
implemented buffs. """

from collections.abc import Iterable
import logging
from typing import List

from colorama import Fore, Style

import garak.attempt


class Buff:
"""Base class for a buff.

A buff should take as input a list of attempts, and return
a list of events. It should be able to return a generator.
It's worth storing the origin attempt ID in the notes attrib
of derivative attempt objects.
"""

uri = ""
bcp47 = None # set of languages this buff should be constrained to
active = True

def __init__(self) -> None:
print(
f"🦾 loading {Style.BRIGHT}{Fore.LIGHTGREEN_EX}buff: {Style.RESET_ALL}{self.__class__.__name__}"
)
logging.info(f"buff init: {self}")

def _derive_new_attempt(
self, source_attempt: garak.attempt.Attempt, seq=-1
) -> garak.attempt.Attempt:
new_attempt = garak.attempt.Attempt(
status=source_attempt.status,
prompt=source_attempt.prompt,
probe_classname=source_attempt.probe_classname,
probe_params=source_attempt.probe_params,
targets=source_attempt.targets,
outputs=source_attempt.outputs,
notes=source_attempt.notes,
detector_results=source_attempt.detector_results,
goal=source_attempt.goal,
seq=seq,
)
new_attempt.notes["buff_creator"] = self.__class__.__name__
new_attempt.notes["buff_source_attempt_uuid"] = str(
source_attempt.uuid
) ## UUIDs don't serialise nice
new_attempt.notes["buff_source_seq"] = source_attempt.seq

return new_attempt

def transform(
self, attempt: garak.attempt.Attempt
) -> Iterable[garak.attempt.Attempt]:
yield attempt

def buff(
self, source_attempts: List[garak.attempt.Attempt]
) -> Iterable[garak.attempt.Attempt]:
for source_attempt in source_attempts:
# create one or more untransformed new attempts
new_attempts = []
new_attempts.append(
self._derive_new_attempt(source_attempt, source_attempt.seq)
)
for new_attempt in new_attempts:
for transformed_new_attempt in self.transform(new_attempt):
# transform can returns multiple results
yield transformed_new_attempt
22 changes: 22 additions & 0 deletions garak/buffs/lowercase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

""" Buff that converts prompts to lower case. """

from collections.abc import Iterable
from typing import List

import garak.attempt
from garak.buffs.base import Buff


class Lowercase(Buff):
"""Lowercasing buff"""

def transform(
self, attempt: garak.attempt.Attempt
) -> Iterable[garak.attempt.Attempt]:
attempt.prompt = attempt.prompt.lower()
yield attempt
35 changes: 31 additions & 4 deletions garak/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Portions Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Flow for invoking garak from the command line"""


Expand Down Expand Up @@ -73,6 +77,13 @@ def print_plugins(prefix, color):
default="",
help="list of detectors to use, or 'all' for all. Default is to use the probe's suggestion.",
)
parser.add_argument(
"--buff",
"-b",
type=str,
default="",
help="buff to use",
)
parser.add_argument(
"--eval_threshold",
type=float,
Expand Down Expand Up @@ -100,6 +111,11 @@ def print_plugins(prefix, color):
action="store_true",
help="list available generation model interfaces",
)
parser.add_argument(
"--list_buffs",
action="store_true",
help="list available buffs/fuzzes",
)
parser.add_argument(
"--version", "-V", action="store_true", help="print version info & exit"
)
Expand Down Expand Up @@ -228,6 +244,9 @@ def print_plugins(prefix, color):
elif _config.args.list_detectors:
print_plugins("detectors", Fore.LIGHTBLUE_EX)

elif _config.args.list_buffs:
print_plugins("buffs", Fore.LIGHTGREEN_EX)

elif _config.args.list_generators:
print_plugins("generators", Fore.LIGHTMAGENTA_EX)

Expand Down Expand Up @@ -306,13 +325,21 @@ def print_plugins(prefix, color):
if detector_names == []:
import garak.harnesses.probewise

h = garak.harnesses.probewise.ProbewiseHarness()
h.run(generator, probe_names, evaluator)
probewise_h = garak.harnesses.probewise.ProbewiseHarness()
probewise_h.run(
generator, probe_names, evaluator, buffs=[_config.args.buff]
)
else:
import garak.harnesses.pxd

h = garak.harnesses.pxd.PxD()
h.run(generator, probe_names, detector_names, evaluator)
pxd_h = garak.harnesses.pxd.PxD()
pxd_h.run(
generator,
probe_names,
detector_names,
evaluator,
buffs=[_config.args.buff],
)

logging.info("run complete, ending")
_config.reportfile.close()
Expand Down
33 changes: 33 additions & 0 deletions garak/harnesses/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Portions Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Base harness

A harness coordinates running probes on a generator, running detectors on the
Expand All @@ -12,12 +16,14 @@
from collections import defaultdict
import json
import logging
from typing import List

from colorama import Fore, Style
import tqdm

from garak.attempt import *
import garak._config as _config
import garak._plugins as _plugins


class Harness:
Expand All @@ -28,6 +34,27 @@ class Harness:
def __init__(self):
logging.debug(f"harness run: {self}")

def _load_buffs(self, buffs: List) -> None:
"""load buff instances into global config

Don't use this in the base class's run method, garak.harness.base.Harness.run.
Inheriting classes call _load_buffs in their run() methods. They then call
garak.harness.base.Harness.run themselves, and so if _load_buffs() is called
from this base class, we'll end up inefficient reinstantiation of buff objects.
If one wants to use buffs directly with this harness without subclassing,
then call this method instance directly."""

_config.buffs = []
for buff in buffs:
try:
_config.buffs.append(_plugins.load_plugin(buff))
logging.debug(f"loaded {buff}")
except Exception as e:
msg = f"failed to load buff {buff}"
print(msg)
logging.warning(f"{msg}: {e}")
continue

def run(self, model, probes, detectors, evaluator, announce_probe=True) -> None:
"""Core harness method

Expand All @@ -48,6 +75,12 @@ def run(self, model, probes, detectors, evaluator, announce_probe=True) -> None:
print("No detectors, nothing to do")
return None

if not probes:
logging.warning("No probes, nothing to do")
if _config.args and _config.args.verbose >= 2:
print("No probes, nothing to do")
return None

for probe in probes:
logging.info("generating...")
if not probe:
Expand Down
20 changes: 17 additions & 3 deletions garak/harnesses/probewise.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Portions Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Probewise harness

Selects detectors to run for each probe based on that probe's recommendations
Expand Down Expand Up @@ -29,7 +33,7 @@ def _load_detector(self, detector_name: str) -> Detector:
logging.error(f" detector load failed: {detector_name}, skipping >>")
return False

def run(self, model, probenames, evaluator):
def run(self, model, probenames, evaluator, buffs=[]):
"""Execute a probe-by-probe scan

Probes are executed in name order. For each probe, the detectors
Expand All @@ -48,12 +52,22 @@ def run(self, model, probenames, evaluator):

:param model: an instantiated generator providing an interface to the model to be examined
:type model: garak.generators.base.Generator
:param probenames: a list of probe instances to be run
:type probenames: List[garak.probes.base.Probe]
:param probenames: a list of probe names to be run
:type probenames: List[str]
:param evaluator: an instantiated evaluator for judging detector results
:type evaluator: garak.evaluators.base.Evaluator
:param buffs: a list of buff names to be used this run
:type buffs: List[str]
"""

if not probenames:
logging.warning("No probes, nothing to do")
if _config.args and _config.args.verbose >= 2:
print("No probes, nothing to do")
return None

self._load_buffs(buffs)

probenames = sorted(probenames)
print(
f"🕵️ queue of {Style.BRIGHT}{Fore.LIGHTYELLOW_EX}probes:{Style.RESET_ALL} "
Expand Down
7 changes: 6 additions & 1 deletion garak/harnesses/pxd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Portions Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""pxd harness

The pxd (probes x detectors) harness runs all specified probes and analyses
Expand All @@ -21,7 +25,7 @@ class PxD(Harness):
def __init__(self):
super().__init__()

def run(self, model, probe_names, detector_names, evaluator):
def run(self, model, probe_names, detector_names, evaluator, buffs=[]):
probe_names = sorted(probe_names)
detector_names = sorted(detector_names)
print(
Expand All @@ -33,6 +37,7 @@ def run(self, model, probe_names, detector_names, evaluator):
+ ", ".join([name.replace("detectors.", "") for name in detector_names])
)
logging.info("probe queue: " + " ".join(probe_names))
self._load_buffs(buffs)
for probename in probe_names:
try:
probe = _plugins.load_plugin(probename)
Expand Down
Loading