Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Failsafes to prevent a consensus round from taking too long #5277

Open
wants to merge 28 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
60de826
Drop out of consensus if the round takes too long
ximinez Feb 5, 2025
197356b
Declare consensus if peers enter into a stable state
ximinez Feb 5, 2025
8ad2cb3
[WIP] Consensus tests
ximinez Feb 12, 2025
7e2fa5c
[WIP] Fix builds
ximinez Feb 12, 2025
196f6b6
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 12, 2025
5108e55
Update levelization
ximinez Feb 12, 2025
5889dc5
Make ConsensusParms const, remove unnecessary copy assignment
ximinez Feb 13, 2025
69c1b00
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 13, 2025
f023ad2
Refine stable state detection:
ximinez Feb 13, 2025
d8aeb73
Merge branch 'develop' into ximinez/consensus
ximinez Feb 14, 2025
a6a2f5f
A few updates:
ximinez Feb 14, 2025
1b4163a
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 14, 2025
a396385
[WIP] Checkpoint
ximinez Feb 14, 2025
9aa953b
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 15, 2025
4a745d3
fixup! [WIP] Checkpoint
ximinez Feb 15, 2025
3c7fb74
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 20, 2025
688139f
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 20, 2025
56d78b8
Fix some bugs in stalled dispute logic, unit tests to demonstrate
ximinez Feb 25, 2025
71c1312
Merge branch 'develop' into ximinez/consensus
ximinez Feb 25, 2025
729821c
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 25, 2025
979462f
Tweak the `stalled` condition, remove some unit test logs
ximinez Feb 26, 2025
de040c9
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 27, 2025
ddd3436
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 27, 2025
e0b3b10
Add to the consensus log ("clog") in some newly added places
ximinez Feb 27, 2025
4d2f020
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Feb 28, 2025
e196bcd
Fix build/merge errors
ximinez Feb 28, 2025
8a3341f
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Mar 11, 2025
88cf631
Merge remote-tracking branch 'upstream/develop' into ximinez/consensus
ximinez Mar 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/consensus.md
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ struct ConsensusResult
ConsensusTimer roundTime;

// Indicates state in which consensus ended. Once in the accept phase
// will be either Yes or MovedOn
// will be either Yes or MovedOn or Expired
ConsensusState state = ConsensusState::No;
};

Expand Down
175 changes: 168 additions & 7 deletions src/test/consensus/Consensus_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <xrpld/consensus/ConsensusProposal.h>
#include <xrpl/beast/clock/manual_clock.h>
#include <xrpl/beast/unit_test.h>
#include <xrpl/json/to_string.h>
#include <utility>

namespace ripple {
Expand All @@ -40,6 +41,7 @@ class Consensus_test : public beast::unit_test::suite
testShouldCloseLedger()
{
using namespace std::chrono_literals;
testcase("should close ledger");

// Use default parameters
ConsensusParms const p{};
Expand Down Expand Up @@ -78,53 +80,109 @@ class Consensus_test : public beast::unit_test::suite
testCheckConsensus()
{
using namespace std::chrono_literals;
testcase("check consensus");

// Use default parameterss
ConsensusParms const p{};

///////////////
// Disputes still in doubt
//
// Not enough time has elapsed
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 2, 0, 3s, 2s, p, true, journal_));
checkConsensus(10, 2, 2, 0, 3s, 2s, false, p, true, journal_));

// If not enough peers have propsed, ensure
// more time for proposals
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 2, 0, 3s, 4s, p, true, journal_));
checkConsensus(10, 2, 2, 0, 3s, 4s, false, p, true, journal_));

// Enough time has elapsed and we all agree
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 2, 2, 0, 3s, 10s, p, true, journal_));
checkConsensus(10, 2, 2, 0, 3s, 10s, false, p, true, journal_));

// Enough time has elapsed and we don't yet agree
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 1, 0, 3s, 10s, p, true, journal_));
checkConsensus(10, 2, 1, 0, 3s, 10s, false, p, true, journal_));

// Our peers have moved on
// Enough time has elapsed and we all agree
BEAST_EXPECT(
ConsensusState::MovedOn ==
checkConsensus(10, 2, 1, 8, 3s, 10s, p, true, journal_));
checkConsensus(10, 2, 1, 8, 3s, 10s, false, p, true, journal_));

// If no peers, don't agree until time has passed.
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(0, 0, 0, 0, 3s, 10s, p, true, journal_));
checkConsensus(0, 0, 0, 0, 3s, 10s, false, p, true, journal_));

// Agree if no peers and enough time has passed.
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(0, 0, 0, 0, 3s, 16s, p, true, journal_));
checkConsensus(0, 0, 0, 0, 3s, 16s, false, p, true, journal_));

// Expire if too much time has passed without agreement
BEAST_EXPECT(
ConsensusState::Expired ==
checkConsensus(10, 8, 1, 0, 1s, 19s, false, p, true, journal_));
///////////////
// Stable state
//
// Not enough time has elapsed
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 2, 0, 3s, 2s, true, p, true, journal_));

// If not enough peers have propsed, ensure
// more time for proposals
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 2, 0, 3s, 4s, true, p, true, journal_));

// Enough time has elapsed and we all agree
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 2, 2, 0, 3s, 10s, true, p, true, journal_));

// Enough time has elapsed and we don't yet agree, but there's nothing
// left to dispute
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 2, 1, 0, 3s, 10s, true, p, true, journal_));

// Our peers have moved on
// Enough time has elapsed and we all agree, nothing left to dispute
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 2, 1, 8, 3s, 10s, true, p, true, journal_));

// If no peers, don't agree until time has passed.
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(0, 0, 0, 0, 3s, 10s, true, p, true, journal_));

// Agree if no peers and enough time has passed.
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(0, 0, 0, 0, 3s, 16s, true, p, true, journal_));

// We are done if there's nothing left to dispute, no matter how much
// time has passed
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 8, 1, 0, 1s, 19s, true, p, true, journal_));
}

void
testStandalone()
{
using namespace std::chrono_literals;
using namespace csf;
testcase("standalone");

Sim s;
PeerGroup peers = s.createGroup(1);
Expand All @@ -149,6 +207,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("peers agree");

ConsensusParms const parms{};
Sim sim;
Expand Down Expand Up @@ -186,6 +245,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("slow peers");

// Several tests of a complete trust graph with a subset of peers
// that have significantly longer network delays to the rest of the
Expand Down Expand Up @@ -351,6 +411,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("close time disagree");

// This is a very specialized test to get ledgers to disagree on
// the close time. It unfortunately assumes knowledge about current
Expand Down Expand Up @@ -417,6 +478,8 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("wrong LCL");

// Specialized test to exercise a temporary fork in which some peers
// are working on an incorrect prior ledger.

Expand Down Expand Up @@ -589,6 +652,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("consensus close time rounding");

// This is a specialized test engineered to yield ledgers with different
// close times even though the peers believe they had close time
Expand Down Expand Up @@ -692,6 +756,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("fork");

std::uint32_t numPeers = 10;
// Vary overlap between two UNLs
Expand Down Expand Up @@ -748,6 +813,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("hub network");

// Simulate a set of 5 validators that aren't directly connected but
// rely on a single hub node for communication
Expand Down Expand Up @@ -835,6 +901,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("preferred by branch");

// Simulate network splits that are prevented from forking when using
// preferred ledger by trie. This is a contrived example that involves
Expand Down Expand Up @@ -967,6 +1034,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("pause for laggards");

// Test that validators that jump ahead of the network slow
// down.
Expand Down Expand Up @@ -1052,6 +1120,98 @@ class Consensus_test : public beast::unit_test::suite
BEAST_EXPECT(sim.synchronized());
}

void
testDisputes()
{
// WIP: Try to create conditions where messaging is unreliable and all
// peers have different initial proposals
using namespace csf;
using namespace std::chrono;
testcase("disputes");

std::uint32_t const numPeers = 35;

ConsensusParms const parms{};
Sim sim;

std::vector<PeerGroup> peerGroups;
peerGroups.reserve(numPeers);
PeerGroup network;
for (int i = 0; i < numPeers; ++i)
{
peerGroups.emplace_back(sim.createGroup(1));
network = network + peerGroups.back();
}

// Fully connected trust graph
network.trust(network);

for (int i = 0; i < peerGroups.size(); ++i)
{
auto const delay = i * 0.2 * parms.ledgerGRANULARITY;
auto& group = peerGroups[i];
auto& nextGroup = peerGroups[(i + 1) % numPeers];
group.connect(nextGroup, round<milliseconds>(delay));
auto& oppositeGroup = peerGroups[(i + numPeers / 2) % numPeers];
group.connect(oppositeGroup, round<milliseconds>(delay));
}

// Initial round to set prior state
sim.run(1);
for (Peer* peer : network)
{
// Every transaction will be seen by every node but two.
// To accomplish that, every peer will add the ids of every peer
// except itself, and the one following.
auto const myId = peer->id;
auto const nextId = (peer->id + PeerID(1)) % PeerID(numPeers);
for (Peer* to : sim.trustGraph.trustedPeers(peer))
{
if (to->id == myId || to->id == nextId)
continue;
peer->openTxs.insert(Tx{static_cast<std::uint32_t>(to->id)});
}
}
sim.run(1);

// Peers are out of sync
if (BEAST_EXPECT(!sim.synchronized()))
{
BEAST_EXPECT(sim.branches() == 1);
for (auto const& grp : peerGroups)
{
Peer const* peer = grp[0];
BEAST_EXPECT(
peer->fullyValidatedLedger.seq() == Ledger::Seq{1});

auto const& lcl = peer->lastClosedLedger;
BEAST_EXPECT(lcl.id() == peer->prevLedgerID());
log << "Peer " << peer->id << ", lcl seq: " << lcl.seq()
<< ", prevProposers: " << peer->prevProposers
<< ", txs in lcl: " << lcl.txs().size() << ", validations: "
<< peer->validations.sizeOfByLedgerCache() << std::endl;
for (auto const& [id, positions] : peer->peerPositions)
{
log << "\tLedger ID: " << id
<< ", #positions: " << positions.size() << std::endl;
}
/*
log << "\t" << to_string(peer->consensus.getJson(true))
<< std::endl
<< std::endl;
*/
/*
BEAST_EXPECT(lcl.seq() == Ledger::Seq{1}, to_string);
// All peers proposed
BEAST_EXPECT(peer->prevProposers == network.size() - 1);
// All transactions were accepted
for (std::uint32_t i = 0; i < network.size(); ++i)
BEAST_EXPECT(lcl.txs().find(Tx{i}) != lcl.txs().end());
*/
}
}
}

void
run() override
{
Expand All @@ -1068,6 +1228,7 @@ class Consensus_test : public beast::unit_test::suite
testHubNetwork();
testPreferredByBranch();
testPauseForLaggards();
testDisputes();
}
};

Expand Down
5 changes: 3 additions & 2 deletions src/xrpld/app/consensus/RCLValidations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,9 @@ handleNewValidation(
auto& validations = app.getValidations();

// masterKey is seated only if validator is trusted or listed
auto const outcome =
validations.add(calcNodeID(masterKey.value_or(signingKey)), val);
auto const nodeKey = masterKey.value_or(signingKey);
// assert(nodeKey != app.getValidationPublicKey());
auto const outcome = validations.add(calcNodeID(nodeKey), val);

if (outcome == ValStatus::current)
{
Expand Down
9 changes: 9 additions & 0 deletions src/xrpld/app/misc/NetworkOPs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1887,6 +1887,15 @@
bool
NetworkOPsImp::processTrustedProposal(RCLCxPeerPos peerPos)
{
if (auto const localPk = app_.getValidationPublicKey())
{
auto const localID = calcNodeID(*localPk);
auto const& peerID = peerPos.proposal().nodeID();

Check warning on line 1893 in src/xrpld/app/misc/NetworkOPs.cpp

View check run for this annotation

Codecov / codecov/patch

src/xrpld/app/misc/NetworkOPs.cpp#L1893

Added line #L1893 was not covered by tests
assert(localID != peerID);
if (localID == peerID)
return false;

Check warning on line 1896 in src/xrpld/app/misc/NetworkOPs.cpp

View check run for this annotation

Codecov / codecov/patch

src/xrpld/app/misc/NetworkOPs.cpp#L1896

Added line #L1896 was not covered by tests
}

return mConsensus.peerProposal(app_.timeKeeper().closeTime(), peerPos);
}

Expand Down
Loading
Loading