forked from pytorch/ao
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create header for packed weight ops (pytorch#1072)
Summary: This diff defines a packed_weight hader in torchao/experimental/ops/packed_weights_header.h. The header is 16 bytes and has 4 fields: * format: PackedWeightsFormat (enum) * extra0: int * extra1: int * extra2: int Whenever we have a new format type, we can add a field to the enum. Currently I have a field for the format the universal kernels use, but MPS can have a different format, and KleidiAI also has its own format. I modified the pack ops put this header and the start of the packed weights. When the linear op runs, it reads the header to understand how the weights were packed. Reviewed By: digantdesai Differential Revision: D63498956
- Loading branch information
1 parent
afc0a02
commit c121875
Showing
4 changed files
with
185 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
torchao/experimental/ops/linear_8bit_act_xbit_weight/packed_weights_header.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
// Copyright (c) Meta Platforms, Inc. and affiliates. | ||
// All rights reserved. | ||
// | ||
// This source code is licensed under the license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
#pragma once | ||
#include <torchao/experimental/ops/macro.h> | ||
#include <torchao/experimental/ops/packed_weights_header.h> | ||
|
||
namespace torchao::ops::linear_8bit_act_xbit_weight { | ||
|
||
torchao::ops::PackedWeightsHeader get_packed_weights_header_universal( | ||
int weight_nbit, | ||
bool has_weight_zeros, | ||
bool has_bias, | ||
int nr, | ||
int kr, | ||
int version = 1) { | ||
TORCHAO_CHECK( | ||
version >= 0 && version < 256, "version must be between 0 and 255"); | ||
TORCHAO_CHECK( | ||
weight_nbit >= 1 && weight_nbit < 256, | ||
"weight_nbit must be between 1 and 255"); | ||
return torchao::ops::PackedWeightsHeader( | ||
torchao::ops::PackedWeightsFormat::linear_8bit_act_xbit_weight_universal, | ||
{((static_cast<unsigned short>(version) << 8) | | ||
static_cast<unsigned short>(weight_nbit)), | ||
((static_cast<unsigned short>(has_weight_zeros) << 8) | | ||
static_cast<unsigned short>(has_bias)), | ||
static_cast<unsigned short>(nr), | ||
static_cast<unsigned short>(kr), | ||
0, | ||
0, | ||
0, | ||
0}); | ||
} | ||
|
||
} // namespace torchao::ops::linear_8bit_act_xbit_weight |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Copyright (c) Meta Platforms, Inc. and affiliates. | ||
// All rights reserved. | ||
// | ||
// This source code is licensed under the license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
#pragma once | ||
#include <array> | ||
|
||
#include <cassert> | ||
namespace torchao::ops { | ||
|
||
enum PackedWeightsFormat : unsigned short { | ||
unknown = 0, | ||
linear_8bit_act_xbit_weight_universal = 1 | ||
}; | ||
|
||
class PackedWeightsHeader { | ||
public: | ||
using params_type = std::array<unsigned short, 7>; | ||
PackedWeightsFormat format; | ||
|
||
// 14 bytes of format specific params | ||
params_type params; | ||
|
||
PackedWeightsHeader( | ||
PackedWeightsFormat format = PackedWeightsFormat::unknown, | ||
params_type params = {0, 0, 0, 0, 0, 0, 0}) | ||
: format{format}, params{params} {} | ||
|
||
inline static constexpr int size() { | ||
static_assert(sizeof(format) + sizeof(params) == 16); | ||
return 16; | ||
} | ||
|
||
inline void write(void* packed_weights) const { | ||
auto header = (unsigned short*)(packed_weights); | ||
header[0] = (unsigned short)format; | ||
for (int i = 0; i < params.size(); i++) { | ||
header[i + 1] = params[i]; | ||
} | ||
} | ||
|
||
static PackedWeightsHeader read(const void* packed_weights) { | ||
auto header = (unsigned short*)(packed_weights); | ||
params_type params; | ||
for (int i = 0; i < params.size(); i++) { | ||
params[i] = header[i + 1]; | ||
} | ||
return PackedWeightsHeader((PackedWeightsFormat)header[0], params); | ||
} | ||
|
||
bool operator==(const PackedWeightsHeader& other) const { | ||
if (format != other.format) { | ||
return false; | ||
} | ||
for (int i = 0; i < params.size(); i++) { | ||
if (params[i] != other.params[i]) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
}; | ||
|
||
} // namespace torchao::ops |