generated from openacid/gotmpl
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtraft.proto
273 lines (224 loc) · 6.94 KB
/
traft.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
syntax = "proto3";
option go_package = ".;traft";
import "github.com/gogo/protobuf/gogoproto/gogo.proto";
// option (gogoproto.goproto_unrecognized_all) = false;
option (gogoproto.equal_all) = true;
option (gogoproto.goproto_enum_prefix_all) = false;
// option (gogoproto.goproto_stringer_all) = true;
// option (gogoproto.goproto_getters_all) = false;
//
// Nonexchangeable logs: r1 and r3. They both modify the same key `x`.
// r1 and r2, r2 and r3 are exchangeable.
//
// r3: set x = 2 <-.
// r2: set y = 1 |
// r1: set x = 1 <-'
// Issue about the original raft
//
// 1. Only one leader can be established in a `term`.
// Such a design introduces more conflict if multiple replicas try to become a
// leader in a short time.
//
// Solution
//
// blabla TODO
// Cmd defines the action a log record does
message Cmd {
string Op = 10;
string Key = 20;
oneof Value {
string VStr = 31;
int64 VI64 = 32;
// cluster config change: adding/removing members.
Cluster VClusterConfig = 33;
}
}
// TailBitmap is a bitmap that has all its leading bits set to `1`.
// Thus it is compressed with an Offset of all-ones position and a trailing
// bitmap.
// It is used to describe LogRecord dependency etc.
//
// The data structure is as the following described:
//
// reclaimed
// |
// | Offset
// | |
// v v
// ..... X ... 01010...00111 00...
// bitIndex: 0123... ^ ^
// | |
// Words[0] Words[1]
//
message TailBitmap {
int64 Offset = 1;
repeated uint64 Words = 2;
int64 Reclamed = 3;
}
// LogRecord is a log record
message LogRecord {
// Which leader initially proposed this log.
// Author may not be the same with Committer, if Author fails when trying to
// commit a log record.
//
// TODO It seems this field is useless. Because we already have `Accepted`.
// This is different from the original raft:
// raft does not have a explicit concept `accepted`, which is essential in
// paxos.
// Instead, The `committed` in raft is defined as: leader forwards its
// own term log to a quorum.
LeaderId Author = 1;
// Log sequence number.
int64 Seq = 10;
// Cmd describes what this log does.
Cmd Cmd = 30;
// The logs that must be executed before this one.
// Normally it is the least lsn on a leader that is not purged yet.
TailBitmap Depends = 32;
// Overrides describes what previous logs this log record overrides.
TailBitmap Overrides = 40;
}
/*
* // SSValue is the value structure in snapshot.
* message SSValue {
*
* oneof Value {
* string VStr = 31;
* int64 VI64 = 32;
* }
*
* // The last log seq number modifying this value.
* int64 Lsn = 40;
* }
*
* message Snapshot {
* map<string, SnapShotValue> KVs = 1;
* int64 WAL
* }
*
*
* message WALRecord {
*
*
* }
*/
message LeaderId {
int64 Term = 1;
int64 Id = 2;
}
// The replica that has the latest log is allow to be a new leader.
// I.e., log is forwarded from latest leader(max Committer), and has the highest
// log seq number.
// Then a leader is chosen from these candidates by their Current leader id.
// The max LeaderId wins.
message Node {
// replica id of this replica.
int64 Id = 3;
Cluster Config = 1;
// From which log seq number we keeps here.
int64 LogOffset = 4;
repeated LogRecord Logs = 2;
// local view of every replica, including this node too.
map<int64, ReplicaStatus> Status = 6;
}
message LogStatus {
LeaderId Committer = 4;
TailBitmap Accepted = 1;
}
message ReplicaStatus {
// last seen term+id
// int64 Term = 3;
// int64 Id = 10;
// the last leader it voted for. or it is local term + local id.
// E.g., voted for itself.
//
// TODO cleanup comment:
// which replica it has voted for as a leader.
//
// Accepted is the same as VotedFor after receiving one log-replication
// message from the leader.
//
// Before receiving a message, VotedFor is the leader this replica knows of,
// Accepted is nil.
LeaderId VotedFor = 10;
// at what time the voted value expires,
// in unix time in nanosecond: 10^-9 second
int64 VoteExpireAt = 11;
// The Leader tried to commit all of the local logs.
// The Committer is the same as Author if a log entry is committed by its
// Author.
//
// If an Author fails and the log is finally committed by some other leader,
// Committer is a higher value than Author.
//
// It is similar to the vrnd/vballot concept in paxos.
// the Ballot number a value is accepted at.
LeaderId Committer = 4;
// What logs has been accepted by this replica.
TailBitmap Accepted = 1;
TailBitmap Committed = 2;
TailBitmap Applied = 3;
}
message ReplicaInfo {
int64 Id = 1;
string Addr = 2;
// Position indicates the index of this member in its cluster.
int64 Position = 3;
}
message Cluster {
map<int64, ReplicaInfo> Members = 11;
repeated uint64 Quorums = 21;
}
message ElectReq {
// who initiates the election
LeaderId Candidate = 1;
// candidate local log status:
// Latest leader that forwarded log to the candidate
LeaderId Committer = 2;
// what logs the candidate has.
TailBitmap Accepted = 3;
}
message ElectReply {
bool OK = 10;
// the replica id this reply comes from
int64 Id = 1;
// voted for a candidate or the previous voted other leader.
LeaderId VotedFor = 2;
// latest log committer.
LeaderId Committer = 4;
// what logs I have.
TailBitmap Accepted = 21;
TailBitmap Committed = 22;
// The logs that voter has but leader candidate does not have.
// For the leader to rebuild all possibly committed logs from a quorum.
repeated LogRecord Logs = 30;
}
message LogForwardReq {
LeaderId Committer = 1;
repeated LogRecord Logs = 2;
// Committed indicates logs committed by leader.
// A follower should commit every log it has that is in Committed directly.
TailBitmap Committed = 3;
}
message LogForwardReply {
bool OK = 10;
// A replica responding a VotedFor with the same value with
// ReplciateReq.Committer indicates the logs are accepted.
// Otherwise declined.
LeaderId VotedFor =1;
// Also a replica should respond with what logs it already has and
// has committed.
TailBitmap Accepted = 2;
TailBitmap Committed = 3;
}
message ProposeReply {
bool OK = 2;
string Err = 3;
// I am not leader, please redirect to `OtherLeader` to write to TRaft.
LeaderId OtherLeader =1;
}
service TRaft {
rpc Elect (ElectReq) returns (ElectReply) {}
rpc LogForward (LogForwardReq) returns (LogForwardReply) {}
rpc Propose (Cmd) returns (ProposeReply) {}
}