Skip to content

Commit

Permalink
CodeCarbon (#350)
Browse files Browse the repository at this point in the history
* Add CodeCarbon for each pipeline

* [pre-commit.ci] auto fixes from pre-commit.com hooks

* adding codecarbon in the requirement

* Updating poetry files

* Change test setting output column from 8 to 9

* Change test setting output column from 8 to 9

* changing to a dict

* Change unit from Kg to g

* [pre-commit.ci] auto fixes from pre-commit.com hooks

* kg to g

* Update moabb/analysis/results.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update moabb/analysis/results.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update moabb/evaluations/evaluations.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update moabb/evaluations/evaluations.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update moabb/evaluations/evaluations.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update moabb/tests/analysis.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update moabb/tests/analysis.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update moabb/tests/analysis.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update moabb/tests/analysis.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update moabb/tests/evaluations.py

Co-authored-by: Sylvain Chevallier <[email protected]>

* Update pyproject.toml

Co-authored-by: Sylvain Chevallier <[email protected]>

* Fix Initialization of Tracker

* [pre-commit.ci] auto fixes from pre-commit.com hooks

* ttrying to solve the poetry issue

* Change EmissionTracker to OfflineEmissionTracker

* [pre-commit.ci] auto fixes from pre-commit.com hooks

* Change EmissionTracker to OfflineEmissionTracker

* Change EmissionTracker to OfflineEmissionTracker

* Returning to online approach

* Updating the whats_new.rst file

* fix: make codecarbon an optional dep

* fix: correct bugs for test

* feat: print carbon footprint after benchmark

* fix: correct expension bug

* fix: error in unit test

* fix: correct codecarbon dependency

* fix: update poetry.loack and requirements.txt

* fix: correct poetry.loack and requirements.txt

* fix: restricting to older libclang

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: bruAristimunha <[email protected]>
Co-authored-by: Sylvain Chevallier <[email protected]>
Co-authored-by: Sylvain Chevallier <[email protected]>
  • Loading branch information
5 people authored Mar 28, 2023
1 parent 56b343c commit c9c4043
Show file tree
Hide file tree
Showing 9 changed files with 1,661 additions and 770 deletions.
3 changes: 2 additions & 1 deletion docs/source/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ Enhancements
- Adding a comprehensive benchmarking function (:gh:`264` by `Divyesh Narayanan`_ and `Sylvain Chevallier`_)
- Add meta-information for datasets in documentation (:gh:`317` by `Bruno Aristimunha`_)
- Add GridSearchCV for different evaluation procedure (:gh:`319` by `Igor Carrara`_)
- Adding new tutorial to benchmark with GridSearchCV (:gh:`323` by `Igor Carrara`_)
- Add new tutorial to benchmark with GridSearchCV (:gh:`323` by `Igor Carrara`_)
- Add six deep learning models (Tensorflow), and build a tutorial to show to use the deep learning model (:gh:`326` by `Igor Carrara`_, `Bruno Aristimunha`_ and `Sylvain Chevallier`_)
- Add a augmentation model to the pipeline (:gh:`326` by `Igor Carrara`_)
- Add BrainDecode example(:gh:`340` by `Igor Carrara`_ and `Bruno Aristimunha`_)
- Add Google Analytics to the documentation (:gh:`335` by `Bruno Aristimunha`_)
- Add CodeCarbon to track emission CO₂ (:gh:`350` by `Igor Carrara`_, `Bruno Aristimunha`_ and `Sylvain Chevallier`_)

Bugs
~~~~
Expand Down
34 changes: 30 additions & 4 deletions moabb/analysis/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@
from sklearn.base import BaseEstimator


try:
from codecarbon import EmissionsTracker # noqa

_carbonfootprint = True
except ImportError:
_carbonfootprint = False


def get_string_rep(obj):
if issubclass(type(obj), BaseEstimator):
str_repr = repr(obj.get_params())
Expand Down Expand Up @@ -103,7 +111,7 @@ class that will abstract result storage
"{:%Y-%m-%d, %H:%M}".format(datetime.now())
)

def add(self, results, pipelines):
def add(self, results, pipelines): # noqa: C901
"""add results"""

def to_list(res):
Expand All @@ -117,6 +125,13 @@ def to_list(res):
else:
return res

col_names = ["score", "time", "samples"]
if _carbonfootprint:
n_cols = 4
col_names.append("carbon_emission")
else:
n_cols = 3

with h5py.File(self.filepath, "r+") as f:
for name, data_dict in results.items():
digest = get_digest(pipelines[name])
Expand All @@ -140,12 +155,14 @@ def to_list(res):
dt = h5py.special_dtype(vlen=str)
dset.create_dataset("id", (0, 2), dtype=dt, maxshape=(None, 2))
dset.create_dataset(
"data", (0, 3 + n_add_cols), maxshape=(None, 3 + n_add_cols)
"data",
(0, n_cols + n_add_cols),
maxshape=(None, n_cols + n_add_cols),
)
dset.attrs["channels"] = d1["n_channels"]
dset.attrs.create(
"columns",
["score", "time", "samples", *self.additional_columns],
col_names + self.additional_columns,
dtype=dt,
)
dset = ppline_grp[dname]
Expand All @@ -163,8 +180,17 @@ def to_list(res):
f"were specified in the evaluation, but results"
f" contain only these keys: {d.keys()}."
) from None
cols = [d["score"], d["time"], d["n_samples"]]
if _carbonfootprint:
if isinstance(d["carbon_emission"], tuple):
cols.append(*d["carbon_emission"])
else:
cols.append(d["carbon_emission"])
dset["data"][-1, :] = np.asarray(
[d["score"], d["time"], d["n_samples"], *add_cols]
[
*cols,
*add_cols,
]
)

def to_dataframe(self, pipelines=None):
Expand Down
37 changes: 25 additions & 12 deletions moabb/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
)


try:
from codecarbon import EmissionsTracker # noqa

_carbonfootprint = True
except ImportError:
_carbonfootprint = False


log = logging.getLogger(__name__)


Expand Down Expand Up @@ -185,18 +193,23 @@ def _display_results(results):
for d in results["dataset"].unique():
for p in results["pipeline"].unique():
for e in results["evaluation"].unique():
tab.append(
{
"dataset": d,
"evaluation": e,
"pipeline": p,
"avg score": results[
(results["dataset"] == d)
& (results["pipeline"] == p)
& (results["evaluation"] == e)
]["score"].mean(),
}
)
r = {
"dataset": d,
"evaluation": e,
"pipeline": p,
"avg score": results[
(results["dataset"] == d)
& (results["pipeline"] == p)
& (results["evaluation"] == e)
]["score"].mean(),
}
if _carbonfootprint:
r["carbon emission"] = results[
(results["dataset"] == d)
& (results["pipeline"] == p)
& (results["evaluation"] == e)
]["carbon_emission"].sum()
tab.append(r)
tab = pd.DataFrame(tab)
print(tab)

Expand Down
65 changes: 65 additions & 0 deletions moabb/evaluations/evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@
from moabb.evaluations.base import BaseEvaluation


try:
from codecarbon import EmissionsTracker

_carbonfootprint = True
except ImportError:
_carbonfootprint = False


log = logging.getLogger(__name__)

# Numpy ArrayLike is only available starting from Numpy 1.20 and Python 3.8
Expand Down Expand Up @@ -183,6 +191,10 @@ def _evaluate(self, dataset, pipelines, param_grid):
ix = metadata.session == session

for name, clf in run_pipes.items():
if _carbonfootprint:
# Initialize CodeCarbon
tracker = EmissionsTracker(save_to_file=False, log_level="error")
tracker.start()
t_start = time()
cv = StratifiedKFold(5, shuffle=True, random_state=self.random_state)
scorer = get_scorer(self.paradigm.scoring)
Expand Down Expand Up @@ -228,6 +240,10 @@ def _evaluate(self, dataset, pipelines, param_grid):
error_score=self.error_score,
)
score = acc.mean()
if _carbonfootprint:
emissions = tracker.stop()
if emissions is None:
emissions = np.NaN
duration = time() - t_start
nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1]
res = {
Expand All @@ -240,6 +256,8 @@ def _evaluate(self, dataset, pipelines, param_grid):
"n_channels": nchan,
"pipeline": name,
}
if _carbonfootprint:
res["carbon_emission"] = (1000 * emissions,)

yield res

Expand Down Expand Up @@ -482,6 +500,11 @@ def evaluate(self, dataset, pipelines, param_grid):
scorer = get_scorer(self.paradigm.scoring)

for name, clf in run_pipes.items():
if _carbonfootprint:
# Initialise CodeCarbon
tracker = EmissionsTracker(save_to_file=False, log_level="error")
tracker.start()

# we want to store a results per session
cv = LeaveOneGroupOut()

Expand All @@ -501,7 +524,14 @@ def evaluate(self, dataset, pipelines, param_grid):
param_grid, name_grid, name, grid_clf, X, y, cv, groups
)

if _carbonfootprint:
emissions_grid = tracker.stop()
if emissions_grid is None:
emissions_grid = 0

for train, test in cv.split(X, y, groups):
if _carbonfootprint:
tracker.start()
t_start = time()
if isinstance(X, BaseEpochs):
cvclf = clone(grid_clf)
Expand All @@ -521,6 +551,11 @@ def evaluate(self, dataset, pipelines, param_grid):
error_score=self.error_score,
)
score = result["test_scores"]
if _carbonfootprint:
emissions = tracker.stop()
if emissions is None:
emissions = 0

duration = time() - t_start
nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1]
res = {
Expand All @@ -533,6 +568,9 @@ def evaluate(self, dataset, pipelines, param_grid):
"n_channels": nchan,
"pipeline": name,
}
if _carbonfootprint:
res["carbon_emission"] = (1000 * (emissions + emissions_grid),)

yield res

def is_valid(self, dataset):
Expand Down Expand Up @@ -643,7 +681,15 @@ def evaluate(self, dataset, pipelines, param_grid):
cv = LeaveOneGroupOut()

# Implement Grid Search
emissions_grid = {}

if _carbonfootprint:
# Initialise CodeCarbon
tracker = EmissionsTracker(save_to_file=False, log_level="error")

for name, clf in pipelines.items():
if _carbonfootprint:
tracker.start()
name_grid = os.path.join(
str(self.hdf5_path), "GridSearch_CrossSubject", dataset.code, name
)
Expand All @@ -652,6 +698,11 @@ def evaluate(self, dataset, pipelines, param_grid):
param_grid, name_grid, name, clf, pipelines, X, y, cv, groups
)

if _carbonfootprint:
emissions_grid[name] = tracker.stop()
if emissions_grid[name] is None:
emissions_grid[name] = 0

# Progressbar at subject level
for train, test in tqdm(
cv.split(X, y, groups),
Expand All @@ -664,15 +715,25 @@ def evaluate(self, dataset, pipelines, param_grid):

# iterate over pipelines
for name, clf in run_pipes.items():
if _carbonfootprint:
tracker.start()
t_start = time()
model = deepcopy(clf).fit(X[train], y[train])
if _carbonfootprint:
emissions = tracker.stop()
if emissions is None:
emissions = 0
duration = time() - t_start

# we eval on each session
for session in np.unique(sessions[test]):
ix = sessions[test] == session
score = _score(model, X[test[ix]], y[test[ix]], scorer)

if _carbonfootprint:
if emissions_grid[name] is None:
emissions_grid[name] = 0

nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1]
res = {
"time": duration,
Expand All @@ -685,6 +746,10 @@ def evaluate(self, dataset, pipelines, param_grid):
"pipeline": name,
}

if _carbonfootprint:
res["carbon_emission"] = (
1000 * (emissions + emissions_grid[name]),
)
yield res

def is_valid(self, dataset):
Expand Down
13 changes: 12 additions & 1 deletion moabb/tests/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@
from moabb.paradigms.base import BaseParadigm


# dummy evaluation
try:
from codecarbon import EmissionsTracker # noqa

_carbonfootprint = True
except ImportError:
_carbonfootprint = False


class DummyEvaluation(BaseEvaluation):
Expand Down Expand Up @@ -83,6 +88,12 @@ def datasets(self):
"n_channels": 10,
}

if _carbonfootprint:
d1["carbon_emission"] = 5
d2["carbon_emission"] = 10
d3["carbon_emission"] = 0.2
d4["carbon_emission"] = 1


def to_pipeline_dict(pnames):
return {n: "pipeline {}".format(n) for n in pnames}
Expand Down
16 changes: 12 additions & 4 deletions moabb/tests/evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@
from moabb.paradigms.motor_imagery import FakeImageryParadigm


try:
from codecarbon import EmissionsTracker # noqa

_carbonfootprint = True
except ImportError:
_carbonfootprint = False


pipelines = OrderedDict()
pipelines["C"] = make_pipeline(Covariances("oas"), CSP(8), LDA())
dataset = FakeDataset(["left_hand", "right_hand"], n_subjects=2)
Expand Down Expand Up @@ -64,8 +72,8 @@ def test_eval_results(self):

# We should get 4 results, 2 sessions 2 subjects
self.assertEqual(len(results), 4)
# We should have 8 columns in the results data frame
self.assertEqual(len(results[0].keys()), 8)
# We should have 9 columns in the results data frame
self.assertEqual(len(results[0].keys()), 9 if _carbonfootprint else 8)

def test_eval_grid_search(self):
gs_param = {
Expand Down Expand Up @@ -109,8 +117,8 @@ def test_eval_grid_search(self):

# We should get 4 results, 2 sessions 2 subjects
self.assertEqual(len(results), 4)
# We should have 8 columns in the results data frame
self.assertEqual(len(results[0].keys()), 8)
# We should have 9 columns in the results data frame
self.assertEqual(len(results[0].keys()), 9 if _carbonfootprint else 8)
# We should check for selected parameters with joblib
self.assertTrue(os.path.isfile(respath))
res = joblib.load(respath)
Expand Down
Loading

0 comments on commit c9c4043

Please sign in to comment.