Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scripts to publish results on PaperWithCode #561

Merged
merged 14 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ Develop branch

Enhancements
~~~~~~~~~~~~
- Add scripts to upload results on PapersWithCode (:gh:`561` by `Pierre Guetschel`_)
- Centralize dataset summary tables in CSV files (:gh:`635` by `Pierre Guetschel`_)
- Add new dataset :class:`moabb.datasets.Liu2024` dataset (:gh:`619` by `Taha Habib`_)


Bugs
~~~~
- Fix caching in the workflows (:gh:`632` by `Pierre Guetschel`_)
Expand Down
155 changes: 155 additions & 0 deletions scripts/paperswithcode/create_datasets_and_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import pickle
import re
from argparse import ArgumentParser
from dataclasses import dataclass

from paperswithcode import PapersWithCodeClient
from paperswithcode.models import DatasetCreateRequest


def dataset_name(dataset):
return f"{dataset.code} MOABB"


def dataset_full_name(dataset):
s = dataset.__doc__.split("\n\n")[0]
s = re.sub(r" \[\d+\]_", "", s)
s = re.sub(r"\s+", " ", s)
return s


def dataset_url(dataset):
return f"http://moabb.neurotechx.com/docs/generated/moabb.datasets.{dataset.__class__.__name__}.html"


def valid_datasets():
from moabb.datasets.utils import dataset_list
from moabb.utils import aliases_list

deprecated_names = [n[0] for n in aliases_list]
return [
d()
for d in dataset_list
if (d.__name__ not in deprecated_names) and ("Fake" not in d.__name__)
]


_paradigms = {
"MotorImagery": (
"Motor Imagery",
["all classes", "left hand vs. right hand", "right hand vs. feet"],
"Motor Imagery",
),
"P300": ("ERP", None, "Event-Related Potential (ERP)"),
"SSVEP": ("SSVEP", None, "Steady-State Visually Evoked Potential (SSVEP)"),
"CVEP": ("c-VEP", None, "Code-Modulated Visual Evoked Potential (c-VEP)"),
}
_evaluations = {
"WithinSession": "Within-Session",
"CrossSession": "Cross-Session",
"CrossSubject": "Cross-Subject",
}


@dataclass
class Task:
id: str
name: str
description: str
area: str
parent_task: str

@classmethod
def make(cls, name, description, area, parent_task):
# to snake case
task_id = (
name.lower().replace(" ", "-").replace("(", "").replace(")", "").split(".")[0]
)
return cls(task_id, name, description, area, parent_task)


def create_tasks(client: PapersWithCodeClient):
tasks = {}
for paradigm_class, (
paradigm_name,
subparadigms,
paradigm_fullname,
) in _paradigms.items():
description = f"Classification of examples recorded under the {paradigm_fullname} paradigm, as part of Brain-Computer Interfaces (BCI)."
d = dict(
name=paradigm_name,
description=description,
area="Medical",
parent_task="Brain Computer Interface",
)
# task = client.task_add(TaskCreateRequest(**d))
task = Task.make(**d)
tasks[paradigm_class] = task
for evaluation_class, evaluation in _evaluations.items():
eval_url = f'http://moabb.neurotechx.com/docs/generated/moabb.evaluations.{evaluation.replace("-", "")}Evaluation.html'
d = dict(
name=f"{evaluation} {paradigm_name}",
description=f"""MOABB's {evaluation} evaluation for the {paradigm_name} paradigm.

Evaluation details: [{eval_url}]({eval_url})""",
area="medical",
parent_task=task.id,
)
# subtask = client.task_add(TaskCreateRequest(**d))
subtask = Task.make(**d)
tasks[(paradigm_class, evaluation_class)] = subtask
if subparadigms is not None:
for subparadigm in subparadigms:
d = dict(
name=f"{evaluation} {paradigm_name} ({subparadigm})",
description=f"""MOABB's {evaluation} evaluation for the {paradigm_name} paradigm ({subparadigm}).

Evaluation details: [{eval_url}]({eval_url})""",
area="medical",
parent_task=subtask.id,
)
# subsubtask = client.task_add(TaskCreateRequest(**d))
subsubtask = Task.make(**d)
tasks[(paradigm_class, evaluation_class, subparadigm)] = subsubtask
return tasks


def create_datasets(client):
datasets = valid_datasets()
pwc_datasets = {}
for dataset in datasets:
pwc_dataset = client.dataset_add(
DatasetCreateRequest(
name=dataset_name(dataset),
full_name=dataset_full_name(dataset),
url=dataset_url(dataset),
)
)
pwc_datasets[dataset.code] = pwc_dataset
return pwc_datasets


if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("token", type=str, help="PapersWithCode API token")
parser.add_argument(
"-o",
"--output",
type=str,
help="Pickle output file",
default="paperswithcode_datasets_and_tasks.pickle",
)
args = parser.parse_args()

client = PapersWithCodeClient(token=args.token)

# create tasks
tasks = create_tasks(client)

# create datasets
datasets = create_datasets(client)
obj = {"datasets": datasets, "tasks": tasks}

with open(args.output, "wb") as f:
pickle.dump(obj, f)
print(f"Datasets and tasks saved to {args.output}")
182 changes: 182 additions & 0 deletions scripts/paperswithcode/upload_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import pickle
from argparse import ArgumentParser
from dataclasses import dataclass
from math import isnan

import pandas as pd
from paperswithcode import PapersWithCodeClient
from paperswithcode.models import (
EvaluationTableSyncRequest,
MetricSyncRequest,
ResultSyncRequest,
)


@dataclass
class Task:
id: str
name: str
description: str
area: str
parent_task: str


_metrics = {
"time": "training time (s)",
"carbon_emission": "CO2 Emission (g)",
}


def make_table(results_csv_list: list[str], metric: str):
df_list = []
for results_csv in results_csv_list:
df = pd.read_csv(results_csv)
columns = ["score"]
if "time" in df.columns:
columns.append("time")
if "carbon_emission" in df.columns:
columns.append("carbon_emission")
df = (
df.groupby(
["dataset", "paradigm", "evaluation", "pipeline"],
)[columns]
.mean()
.reset_index()
)
df.score = df.score * 100
columns = dict(**_metrics, score=metric)
df.rename(columns=columns, inplace=True)
df.paradigm = df.paradigm.replace(
{"FilterBankMotorImagery": "MotorImagery", "LeftRightImagery": "MotorImagery"}
)
print(df.head())
df_list.append(df)
return pd.concat(df_list)


def upload_subtable(client, df, dataset, task, paper, evaluated_on):
kwargs = dict(
task=task.id,
dataset=dataset.id,
description=task.description,
external_id=f"{dataset.id}-{task.id}",
mirror_url="http://moabb.neurotechx.com/docs/benchmark_summary.html",
)
print(f"Uploading {kwargs=}")
# client.evaluation_create(EvaluationTableCreateRequest(**kwargs))

r = EvaluationTableSyncRequest(
**kwargs,
metrics=[
MetricSyncRequest(name=metric, is_loss=metric in _metrics.values())
for metric in df.columns
],
results=[
ResultSyncRequest(
metrics={k: str(v) for k, v in row.to_dict().items() if not isnan(v)},
paper=paper,
methodology=pipeline,
external_id=f"{dataset.id}-{task.id}-{pipeline}",
evaluated_on=evaluated_on,
# external_source_url="http://moabb.neurotechx.com/docs/benchmark_summary.html",
# TODO: maybe update url with the exact row of the result
)
for pipeline, row in df.iterrows()
],
)
print(r)
leaderboard_id = client.evaluation_synchronize(r)
print(f"{leaderboard_id=}")
return leaderboard_id


def upload_table(client, df, datasets, tasks, paper, evaluated_on, subsubtask):
gp_cols = ["dataset", "paradigm", "evaluation"]
df_gp = df.groupby(gp_cols)
ids = []
for (dataset_name, paradigm_name, evaluation_name), sub_df in df_gp:
dataset = datasets[dataset_name]
task_key = (paradigm_name, evaluation_name)
if subsubtask is not None:
task_key += (subsubtask,)
task = tasks[task_key]
id = upload_subtable(
client,
sub_df.set_index("pipeline").drop(
columns=gp_cols
), # + list(_metrics.values())),
dataset,
task,
paper,
evaluated_on,
)
ids.append(id)
return ids


if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("token", type=str, help="PapersWithCode API token")
parser.add_argument(
"metric",
type=str,
help="Metric used in the results CSV (see PapersWithCode metrics)",
)
parser.add_argument(
"results_csv", type=str, help="CSV file with results to upload", nargs="+"
)

parser.add_argument(
"-s",
"--subsubtask",
type=str,
default=None,
help="If relevant, the type of motor imagery task (see create_datasets_and_tasks.py)",
)
parser.add_argument(
"-d",
"--datasets",
type=str,
help="Pickle file created by create_datasets_and_tasks.py",
default="paperswithcode_datasets_and_tasks.pickle",
)
parser.add_argument(
"-o",
"--output",
type=str,
help="Pickle output file",
default="paperswithcode_results.pickle",
)
parser.add_argument("-p", "--paper", type=str, help="Paper URL", default="")
parser.add_argument(
"-e",
"--evaluated_on",
type=str,
help="Results date YYYY-MM-DD",
default="2024-04-09",
)
args = parser.parse_args()

with open(args.datasets, "rb") as f:
datasets = pickle.load(f)
summary_table = make_table(args.results_csv, metric=args.metric)

client = PapersWithCodeClient(token=args.token)

upload_table(
client,
summary_table,
datasets["datasets"],
datasets["tasks"],
args.paper,
args.evaluated_on,
args.subsubtask,
)

# Commands used to upload the results of the benchmark paper:
# (generate a new API token, this one is expired)
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_rf_Optuna.csv -s="right hand vs. feet" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_lhrh_Optuna.csv -s="left hand vs. right hand" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b Accuracy ../moabb_paper_plots/DATA/results_All_Optuna.csv -s="all classes" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b Accuracy ../moabb_paper_plots/DATA/results_SSVEP.csv ../moabb_paper_plots/DATA/results_SSVEP_DL.csv -d paperswithcode_datasets_and_tasks2.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_P300.csv ../moabb_paper_plots/DATA/results_P300_DL.csv -d paperswithcode_datasets_and_tasks2.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03
Loading