Skip to content

Commit

Permalink
Fix: become non-leader when restarting single node cluster
Browse files Browse the repository at this point in the history
A node should not set `server_state` to `Leader` when just starting up,
even when it's the only voter in a cluster. It still needs several step
to initialize leader related fields to become a leader.

- Fix: #607
  • Loading branch information
drmingdrmer committed Dec 3, 2022
1 parent 229bb50 commit ff9a933
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 1 deletion.
9 changes: 8 additions & 1 deletion openraft/src/core/raft_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,14 @@ impl<C: RaftTypeConfig, N: RaftNetworkFactory<C>, S: RaftStorage<C>> RaftCore<C,
self.engine.metrics_flags.set_data_changed();
}

self.engine.state.server_state = self.engine.calc_server_state();
// On startup, do not assume a leader. Becoming a leader require initialization on several fields.
// TODO: enable starting up as a leader. After `server_state` is removed from Engine.
let server_state = self.engine.calc_server_state();
self.engine.state.server_state = if server_state == ServerState::Leader {
ServerState::Follower
} else {
server_state
};

// To ensure that restarted nodes don't disrupt a stable cluster.
self.set_next_election_time(false);
Expand Down
1 change: 1 addition & 0 deletions openraft/tests/life_cycle/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ mod fixtures;
mod t20_initialization;
mod t20_shutdown;
mod t30_connect_error;
mod t90_issue_607_single_restart;
62 changes: 62 additions & 0 deletions openraft/tests/life_cycle/t90_issue_607_single_restart.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use std::sync::Arc;
use std::time::Duration;

use maplit::btreeset;
use openraft::Config;
use openraft::ServerState;

use crate::fixtures::init_default_ut_tracing;
use crate::fixtures::RaftRouter;

/// Brings up a cluster of 1 node and restart it.
///
/// Assert that `RaftCore.engine.state.server_state` and `RaftCore.leader_data` are consistent:
/// `server_state == Leader && leader_data.is_some() || server_state != Leader && leader_data.is_none()`
#[async_entry::test(worker_threads = 8, init = "init_default_ut_tracing()", tracing_span = "debug")]
async fn single_restart() -> anyhow::Result<()> {
let config = Arc::new(
Config {
enable_heartbeat: false,
..Default::default()
}
.validate()?,
);

let mut router = RaftRouter::new(config.clone());

tracing::info!("--- bring up cluster of 1 node");
let mut log_index = router.new_nodes_from_single(btreeset! {0}, btreeset! {}).await?;

tracing::info!("--- write to 1 log");
{
router.client_request_many(0, "foo", 1).await?;
log_index += 1;
}

tracing::info!("--- restart node 0");
{
let (node, sto) = router.remove_node(0).unwrap();
node.shutdown().await?;

router.new_raft_node_with_sto(0, sto);
// leader appends a blank log.
log_index += 1;

router.wait(&0, timeout()).state(ServerState::Leader, "node-0 is leader").await?;
router.wait(&0, timeout()).log(Some(log_index), "node-0 restarted").await?;
}

tracing::info!("--- write to 1 log after restart");
{
router.client_request_many(0, "foo", 1).await?;
log_index += 1;

router.wait(&0, timeout()).log(Some(log_index), "node-0 works").await?;
}

Ok(())
}

fn timeout() -> Option<Duration> {
Some(Duration::from_millis(1000))
}

0 comments on commit ff9a933

Please sign in to comment.