Skip to content

Commit 87a89ba

Browse files
ximinezMark Travis
and
Mark Travis
committed
Optimize when to acquire ledgers from the network.
Particularly avoid acquiring ledgers likely to be produced locally very soon. Derived from XRPLF#4764 Co-authored-by: Mark Travis <mtravis@ripple.com>
1 parent a79293f commit 87a89ba

File tree

5 files changed

+178
-9
lines changed

5 files changed

+178
-9
lines changed

src/ripple/app/ledger/InboundLedger.h

+20
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,26 @@ class InboundLedger final : public TimeoutCounter,
197197
std::unique_ptr<PeerSet> mPeerSet;
198198
};
199199

200+
inline std::string
201+
to_string(InboundLedger::Reason reason)
202+
{
203+
using enum InboundLedger::Reason;
204+
switch (reason)
205+
{
206+
case HISTORY:
207+
return "HISTORY";
208+
case SHARD:
209+
return "SHARD";
210+
case GENERIC:
211+
return "GENERIC";
212+
case CONSENSUS:
213+
return "CONSENSUS";
214+
default:
215+
assert(false);
216+
return "unknown";
217+
}
218+
}
219+
200220
} // namespace ripple
201221

202222
#endif

src/ripple/app/ledger/impl/InboundLedgers.cpp

+94-5
Original file line numberDiff line numberDiff line change
@@ -69,23 +69,103 @@ class InboundLedgersImp : public InboundLedgers
6969
std::uint32_t seq,
7070
InboundLedger::Reason reason) override
7171
{
72+
std::stringstream ss;
73+
ss << "InboundLedger::acquire: "
74+
<< "Request: " << to_string(hash) << ", " << seq
75+
<< " NeedNetworkLedger: "
76+
<< (app_.getOPs().isNeedNetworkLedger() ? "yes" : "no")
77+
<< " Reason: " << to_string(reason) << " Old rule: ";
78+
if (app_.getOPs().isNeedNetworkLedger() &&
79+
(reason != InboundLedger::Reason::GENERIC) &&
80+
(reason != InboundLedger::Reason::CONSENSUS))
81+
ss << "false";
82+
else
83+
ss << "true";
84+
7285
assert(hash.isNonZero());
7386
assert(
7487
reason != InboundLedger::Reason::SHARD ||
7588
(seq != 0 && app_.getShardStore()));
7689

77-
// probably not the right rule
78-
if (app_.getOPs().isNeedNetworkLedger() &&
79-
(reason != InboundLedger::Reason::GENERIC) &&
80-
(reason != InboundLedger::Reason::CONSENSUS))
90+
/* Acquiring ledgers is somewhat expensive. It requires lots of
91+
* computation and network communication. Avoid it when it's not
92+
* appropriate. Every validation from a peer for a ledger that
93+
* we do not have locally results in a call to this function: even
94+
* if we are moments away from validating the same ledger.
95+
*/
96+
// If the node is not in "full" state, it needs to sync to the network,
97+
// and doesn't have the necessary tx's and ledger entries to build the
98+
// ledger.
99+
bool const isFull = app_.getOPs().isFull();
100+
// fallingBehind means the last closed ledger is at least 2 behind the
101+
// validated ledger. If the node is falling behind the network, it
102+
// probably needs information from the network to catch up.
103+
//
104+
// The reason this should not simply be only at least 1 behind the
105+
// validated ledger is that a slight lag is normal case because some
106+
// nodes get there slightly later than others. A difference of 2 means
107+
// that at least a full ledger interval has passed, so the node is
108+
// beginning to fall behind.
109+
bool const fallingBehind = app_.getOPs().isFallingBehind();
110+
// If everything else is ok, don't try to acquire the ledger if the
111+
// requested seq is in the near future relative to the validated ledger.
112+
// If the requested ledger is between 1 and 19 inclusive ledgers ahead
113+
// of the valid ledger this node has not built it yet, but it's
114+
// possible/likely it has the tx's necessary to build it and get caught
115+
// up. Plus it might not become validated. On the other hand, if it's
116+
// more than 20 in the future, this node should request it so that it
117+
// can jump ahead and get caught up.
118+
LedgerIndex const validSeq =
119+
app_.getLedgerMaster().getValidLedgerIndex();
120+
constexpr std::size_t lagLeeway = 20;
121+
bool const nearFuture =
122+
(seq > validSeq) && (seq < validSeq + lagLeeway);
123+
// If everything else is ok, don't try to acquire the ledger if the
124+
// request is related to consensus. (Note that consensus calls usually
125+
// pass a seq of 0, so nearFuture will be false other than on a brand
126+
// new network.)
127+
bool const consensus = reason == InboundLedger::Reason::CONSENSUS;
128+
129+
bool const shouldAcquire = [&]() {
130+
// If the node is not synced, try to get the ledger.
131+
if (!isFull)
132+
return true;
133+
// If the node is falling behind, try to get the ledger.
134+
if (fallingBehind)
135+
return true;
136+
// If the ledger is in the near future, do NOT get the ledger. This
137+
// node is probably about to build it.
138+
if (nearFuture)
139+
return false;
140+
// If the request is because of consensus, do NOT get the ledger.
141+
// This node is probably about to build it.
142+
if (consensus)
143+
return false;
144+
return true;
145+
}();
146+
ss << " Evaluating whether to acquire ledger " << hash
147+
<< ". full: " << (isFull ? "true" : "false")
148+
<< ". falling behind: " << (fallingBehind ? "true" : "false")
149+
<< ". ledger sequence " << seq << ". Valid sequence: " << validSeq
150+
<< ". Lag leeway: " << lagLeeway
151+
<< ". request for near future ledger: "
152+
<< (nearFuture ? "true" : "false")
153+
<< ". Consensus: " << (consensus ? "true" : "false")
154+
<< ". Acquiring ledger? " << (shouldAcquire ? "true" : "false");
155+
156+
if (!shouldAcquire)
157+
{
158+
JLOG(j_.debug()) << "Abort(rule): " << ss.str();
81159
return {};
160+
}
82161

83162
bool isNew = true;
84163
std::shared_ptr<InboundLedger> inbound;
85164
{
86165
ScopedLockType sl(mLock);
87166
if (stopping_)
88167
{
168+
JLOG(j_.debug()) << "Abort(stopping): " << ss.str();
89169
return {};
90170
}
91171

@@ -111,13 +191,19 @@ class InboundLedgersImp : public InboundLedgers
111191
}
112192

113193
if (inbound->isFailed())
194+
{
195+
JLOG(j_.debug()) << "Abort(failed): " << ss.str();
114196
return {};
197+
}
115198

116199
if (!isNew)
117200
inbound->update(seq);
118201

119202
if (!inbound->isComplete())
203+
{
204+
JLOG(j_.debug()) << "Abort(incomplete): " << ss.str();
120205
return {};
206+
}
121207

122208
if (reason == InboundLedger::Reason::HISTORY)
123209
{
@@ -130,14 +216,17 @@ class InboundLedgersImp : public InboundLedgers
130216
if (!shardStore)
131217
{
132218
JLOG(j_.error())
133-
<< "Acquiring shard with no shard store available";
219+
<< "Acquiring shard with no shard store available"
220+
<< ss.str();
134221
return {};
135222
}
136223
if (inbound->getLedger()->stateMap().family().isShardBacked())
137224
shardStore->setStored(inbound->getLedger());
138225
else
139226
shardStore->storeLedger(inbound->getLedger());
140227
}
228+
229+
JLOG(j_.debug()) << "Requesting: " << ss.str();
141230
return inbound->getLedger();
142231
}
143232

src/ripple/app/misc/NetworkOPs.cpp

+60-4
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,8 @@ class NetworkOPsImp final : public NetworkOPs
430430
clearLedgerFetch() override;
431431
Json::Value
432432
getLedgerFetchInfo() override;
433+
bool
434+
isFallingBehind() const override;
433435
std::uint32_t
434436
acceptLedger(
435437
std::optional<std::chrono::milliseconds> consensusDelay) override;
@@ -729,6 +731,7 @@ class NetworkOPsImp final : public NetworkOPs
729731
std::atomic<bool> amendmentBlocked_{false};
730732
std::atomic<bool> amendmentWarned_{false};
731733
std::atomic<bool> unlBlocked_{false};
734+
std::atomic<bool> fallingBehind_{false};
732735

733736
ClosureCounter<void, boost::system::error_code const&> waitHandlerCounter_;
734737
boost::asio::steady_timer heartbeatTimer_;
@@ -1810,22 +1813,69 @@ NetworkOPsImp::beginConsensus(uint256 const& networkClosed)
18101813

18111814
auto closingInfo = m_ledgerMaster.getCurrentLedger()->info();
18121815

1813-
JLOG(m_journal.info()) << "Consensus time for #" << closingInfo.seq
1816+
JLOG(m_journal.info()) << "beginConsensus time for #" << closingInfo.seq
18141817
<< " with LCL " << closingInfo.parentHash;
18151818

1816-
auto prevLedger = m_ledgerMaster.getLedgerByHash(closingInfo.parentHash);
1819+
fallingBehind_ = false;
1820+
if (closingInfo.seq < m_ledgerMaster.getValidLedgerIndex() - 1)
1821+
{
1822+
fallingBehind_ = true;
1823+
JLOG(m_journal.warn())
1824+
<< "beginConsensus Current ledger " << closingInfo.seq
1825+
<< " is at least 2 behind validated "
1826+
<< m_ledgerMaster.getValidLedgerIndex();
1827+
}
1828+
1829+
auto const prevLedger =
1830+
m_ledgerMaster.getLedgerByHash(closingInfo.parentHash);
18171831

18181832
if (!prevLedger)
18191833
{
1834+
fallingBehind_ = true;
18201835
// this shouldn't happen unless we jump ledgers
18211836
if (mMode == OperatingMode::FULL)
18221837
{
1823-
JLOG(m_journal.warn()) << "Don't have LCL, going to tracking";
1838+
JLOG(m_journal.warn())
1839+
<< "beginConsensus Don't have LCL, going to tracking";
18241840
setMode(OperatingMode::TRACKING);
18251841
}
18261842

18271843
return false;
18281844
}
1845+
else if (!m_ledgerMaster.isValidated(*prevLedger))
1846+
{
1847+
// Do not merge this block unless it proves useful.
1848+
auto parentLedger = prevLedger;
1849+
for (; parentLedger && !m_ledgerMaster.isValidated(*parentLedger) &&
1850+
parentLedger->info().seq > closingInfo.seq - 20;
1851+
parentLedger = m_ledgerMaster.getLedgerByHash(
1852+
parentLedger->info().parentHash))
1853+
{
1854+
JLOG(m_journal.debug())
1855+
<< "beginConsensus for " << closingInfo.seq << ". Ledger "
1856+
<< parentLedger->info().seq << " (" << parentLedger->info().hash
1857+
<< ") is not validated.";
1858+
}
1859+
if (parentLedger && m_ledgerMaster.isValidated(*parentLedger))
1860+
{
1861+
JLOG(m_journal.debug())
1862+
<< "beginConsensus for " << closingInfo.seq << ". Ledger "
1863+
<< parentLedger->info().seq << " (" << parentLedger->info().hash
1864+
<< ") IS validated.";
1865+
}
1866+
else
1867+
{
1868+
if (parentLedger)
1869+
JLOG(m_journal.warn())
1870+
<< "beginConsensus for " << closingInfo.seq << ". Previous "
1871+
<< closingInfo.seq - parentLedger->info().seq
1872+
<< " ledgers are not validated";
1873+
else
1874+
JLOG(m_journal.warn())
1875+
<< "beginConsensus for " << closingInfo.seq
1876+
<< ". Ran out of parent ledgers to check.";
1877+
}
1878+
}
18291879

18301880
assert(prevLedger->info().hash == closingInfo.parentHash);
18311881
assert(
@@ -1863,7 +1913,7 @@ NetworkOPsImp::beginConsensus(uint256 const& networkClosed)
18631913
mLastConsensusPhase = currPhase;
18641914
}
18651915

1866-
JLOG(m_journal.debug()) << "Initiating consensus engine";
1916+
JLOG(m_journal.debug()) << "beginConsensus Initiating consensus engine";
18671917
return true;
18681918
}
18691919

@@ -2749,6 +2799,12 @@ NetworkOPsImp::getLedgerFetchInfo()
27492799
return app_.getInboundLedgers().getInfo();
27502800
}
27512801

2802+
bool
2803+
NetworkOPsImp::isFallingBehind() const
2804+
{
2805+
return fallingBehind_;
2806+
}
2807+
27522808
void
27532809
NetworkOPsImp::pubProposedTransaction(
27542810
std::shared_ptr<ReadView const> const& ledger,

src/ripple/app/misc/NetworkOPs.h

+2
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ class NetworkOPs : public InfoSub::Source
227227
clearLedgerFetch() = 0;
228228
virtual Json::Value
229229
getLedgerFetchInfo() = 0;
230+
virtual bool
231+
isFallingBehind() const = 0;
230232

231233
/** Accepts the current transaction tree, return the new ledger's sequence
232234

src/ripple/protocol/LedgerHeader.h

+2
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ struct LedgerHeader
5555

5656
// If validated is false, it means "not yet validated."
5757
// Once validated is true, it will never be set false at a later time.
58+
// NOTE: If you are accessing this directly, you are probably doing it
59+
// wrong. Use LedgerMaster::isValidated().
5860
// VFALCO TODO Make this not mutable
5961
bool mutable validated = false;
6062
bool accepted = false;

0 commit comments

Comments
 (0)