Fix deadlock when trigger_recovery() called while holding sidechain lock, Add mainchain_height to checkpoints for smarter rollback during reorgs, Resume mining immediately after reset (network rebuilds via mining, not sync), Increase checkpoint history from 20 to 50 for deeper reorg coverage
This commit is contained in:
+94
-28
@@ -3140,12 +3140,14 @@ void SideChain::update_checkpoints(uint64_t new_height)
|
||||
if (block && block->m_verified) {
|
||||
Checkpoint cp;
|
||||
cp.height = checkpoint_height;
|
||||
cp.mainchain_height = block->m_txinGenHeight;
|
||||
cp.id = block->m_sidechainId;
|
||||
cp.cumulative_difficulty = block->m_cumulativeDifficulty;
|
||||
|
||||
|
||||
m_checkpoints.push_back(cp);
|
||||
|
||||
LOGINFO(1, "Checkpoint created: height " << cp.height
|
||||
|
||||
LOGINFO(1, "Checkpoint created: sidechain height " << cp.height
|
||||
<< ", mainchain height " << cp.mainchain_height
|
||||
<< ", id " << cp.id);
|
||||
|
||||
// Prune old checkpoints beyond history limit
|
||||
@@ -3184,7 +3186,7 @@ Checkpoint SideChain::get_latest_checkpoint() const
|
||||
{
|
||||
ReadLock lock(m_checkpointsLock);
|
||||
if (m_checkpoints.empty()) {
|
||||
return Checkpoint{0, hash(), difficulty_type()};
|
||||
return Checkpoint{0, 0, hash(), difficulty_type()};
|
||||
}
|
||||
return m_checkpoints.back();
|
||||
}
|
||||
@@ -3237,11 +3239,12 @@ void SideChain::on_block_rejected(const PoolBlock* block, const char* reason)
|
||||
LOGERR(0, "================================================");
|
||||
LOGERR(0, " CONSENSUS FAILURE DETECTED");
|
||||
LOGERR(0, " " << m_externalBlockFailures << " consecutive blocks rejected");
|
||||
LOGERR(0, " Failure around height " << block->m_sidechainHeight);
|
||||
LOGERR(0, " Failure around sidechain height " << block->m_sidechainHeight);
|
||||
LOGERR(0, " Affected mainchain height " << block->m_txinGenHeight);
|
||||
LOGERR(0, " Initiating recovery...");
|
||||
LOGERR(0, "================================================");
|
||||
LOGERR(0, "");
|
||||
trigger_recovery(block->m_sidechainHeight);
|
||||
trigger_recovery(block->m_sidechainHeight, block->m_txinGenHeight);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3250,7 +3253,7 @@ void SideChain::on_block_accepted()
|
||||
m_externalBlockFailures = 0;
|
||||
}
|
||||
|
||||
void SideChain::trigger_recovery(uint64_t failure_height)
|
||||
void SideChain::trigger_recovery(uint64_t failure_height, uint64_t affected_mainchain_height)
|
||||
{
|
||||
if (m_recoveryMode.exchange(true)) {
|
||||
// Already in recovery mode
|
||||
@@ -3260,10 +3263,40 @@ void SideChain::trigger_recovery(uint64_t failure_height)
|
||||
// Disable mining immediately
|
||||
m_readyToMine = false;
|
||||
|
||||
// Find the checkpoint before the failure
|
||||
uint64_t recovery_checkpoint = (failure_height / CHECKPOINT_INTERVAL) * CHECKPOINT_INTERVAL;
|
||||
if (recovery_checkpoint >= failure_height && recovery_checkpoint >= CHECKPOINT_INTERVAL) {
|
||||
recovery_checkpoint -= CHECKPOINT_INTERVAL;
|
||||
LOGINFO(0, "");
|
||||
LOGINFO(0, "================================================");
|
||||
LOGINFO(0, " RECOVERY TRIGGERED");
|
||||
LOGINFO(0, " Sidechain failure height: " << failure_height);
|
||||
LOGINFO(0, " Affected mainchain height: " << affected_mainchain_height);
|
||||
LOGINFO(0, "================================================");
|
||||
|
||||
// Find a checkpoint that references mainchain BEFORE the affected height
|
||||
// This ensures we go back far enough to be before any mainchain reorg
|
||||
uint64_t recovery_checkpoint = 0;
|
||||
{
|
||||
ReadLock lock(m_checkpointsLock);
|
||||
|
||||
if (affected_mainchain_height > 0) {
|
||||
// Mainchain-aware recovery: find checkpoint before the affected mainchain height
|
||||
for (auto it = m_checkpoints.rbegin(); it != m_checkpoints.rend(); ++it) {
|
||||
if (it->mainchain_height > 0 && it->mainchain_height < affected_mainchain_height) {
|
||||
recovery_checkpoint = it->height;
|
||||
LOGINFO(0, "Found safe checkpoint: sidechain " << it->height
|
||||
<< " (mainchain " << it->mainchain_height
|
||||
<< " < affected " << affected_mainchain_height << ")");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: if no mainchain-aware checkpoint found, use sidechain height based approach
|
||||
if (recovery_checkpoint == 0) {
|
||||
recovery_checkpoint = (failure_height / CHECKPOINT_INTERVAL) * CHECKPOINT_INTERVAL;
|
||||
if (recovery_checkpoint >= failure_height && recovery_checkpoint >= CHECKPOINT_INTERVAL) {
|
||||
recovery_checkpoint -= CHECKPOINT_INTERVAL;
|
||||
}
|
||||
LOGINFO(0, "Using sidechain-based fallback checkpoint: " << recovery_checkpoint);
|
||||
}
|
||||
}
|
||||
|
||||
LOGINFO(0, "Recovery target checkpoint: " << recovery_checkpoint);
|
||||
@@ -3395,13 +3428,19 @@ void SideChain::reset_to_checkpoint(uint64_t checkpoint_height)
|
||||
m_pendingRecoveryHeight = 0;
|
||||
m_recoveryMode = false;
|
||||
|
||||
LOGINFO(0, "");
|
||||
LOGINFO(0, "================================================");
|
||||
LOGINFO(0, " RESET COMPLETE - RESYNCING FROM " << checkpoint_height);
|
||||
LOGINFO(0, "================================================");
|
||||
LOGINFO(0, "");
|
||||
// Re-enable mining immediately
|
||||
// Key insight: After a mainchain reorg, ALL p2pool nodes reset together.
|
||||
// There's nothing to "sync" from peers - we need to BUILD NEW BLOCKS.
|
||||
// Mining on the new mainchain state creates fresh sidechain blocks that propagate.
|
||||
m_readyToMine = true;
|
||||
|
||||
// Mining will be re-enabled after resync completes
|
||||
LOGINFO(0, "");
|
||||
LOGINFO(0, "================================================");
|
||||
LOGINFO(0, " RESET COMPLETE");
|
||||
LOGINFO(0, " Chain tip: sidechain height " << actual_checkpoint_height);
|
||||
LOGINFO(0, " Mining re-enabled - will build new blocks on current mainchain");
|
||||
LOGINFO(0, "================================================");
|
||||
LOGINFO(0, "");
|
||||
}
|
||||
|
||||
void SideChain::clear_checkpoints()
|
||||
@@ -3423,8 +3462,9 @@ void SideChain::save_checkpoints() const
|
||||
}
|
||||
|
||||
// Build binary buffer matching file format
|
||||
// Version 2: added mainchain_height field
|
||||
std::vector<uint8_t> buf;
|
||||
const uint32_t version = 1;
|
||||
const uint32_t version = 2;
|
||||
const uint32_t count = static_cast<uint32_t>(m_checkpoints.size());
|
||||
|
||||
buf.insert(buf.end(), reinterpret_cast<const uint8_t*>(&version),
|
||||
@@ -3435,6 +3475,8 @@ void SideChain::save_checkpoints() const
|
||||
for (const Checkpoint& cp : m_checkpoints) {
|
||||
buf.insert(buf.end(), reinterpret_cast<const uint8_t*>(&cp.height),
|
||||
reinterpret_cast<const uint8_t*>(&cp.height) + sizeof(cp.height));
|
||||
buf.insert(buf.end(), reinterpret_cast<const uint8_t*>(&cp.mainchain_height),
|
||||
reinterpret_cast<const uint8_t*>(&cp.mainchain_height) + sizeof(cp.mainchain_height));
|
||||
buf.insert(buf.end(), cp.id.h, cp.id.h + HASH_SIZE);
|
||||
buf.insert(buf.end(), reinterpret_cast<const uint8_t*>(&cp.cumulative_difficulty),
|
||||
reinterpret_cast<const uint8_t*>(&cp.cumulative_difficulty) + sizeof(cp.cumulative_difficulty));
|
||||
@@ -3463,7 +3505,8 @@ void SideChain::save_checkpoints() const
|
||||
}
|
||||
|
||||
// Write version marker for future compatibility
|
||||
const uint32_t version = 1;
|
||||
// Version 2: added mainchain_height field
|
||||
const uint32_t version = 2;
|
||||
f.write(reinterpret_cast<const char*>(&version), sizeof(version));
|
||||
|
||||
// Write checkpoint count
|
||||
@@ -3473,6 +3516,7 @@ void SideChain::save_checkpoints() const
|
||||
// Write each checkpoint
|
||||
for (const Checkpoint& cp : m_checkpoints) {
|
||||
f.write(reinterpret_cast<const char*>(&cp.height), sizeof(cp.height));
|
||||
f.write(reinterpret_cast<const char*>(&cp.mainchain_height), sizeof(cp.mainchain_height));
|
||||
f.write(reinterpret_cast<const char*>(cp.id.h), HASH_SIZE);
|
||||
f.write(reinterpret_cast<const char*>(&cp.cumulative_difficulty), sizeof(cp.cumulative_difficulty));
|
||||
}
|
||||
@@ -3501,7 +3545,7 @@ void SideChain::load_checkpoints()
|
||||
// Read version
|
||||
uint32_t version = *reinterpret_cast<const uint32_t*>(buf.data() + offset);
|
||||
offset += sizeof(version);
|
||||
if (version != 1) {
|
||||
if (version != 1 && version != 2) {
|
||||
LOGWARN(1, "Unknown checkpoint version " << version << ", ignoring");
|
||||
return;
|
||||
}
|
||||
@@ -3514,11 +3558,16 @@ void SideChain::load_checkpoints()
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate expected entry size based on version
|
||||
const size_t entry_size = (version == 2)
|
||||
? sizeof(uint64_t) + sizeof(uint64_t) + HASH_SIZE + sizeof(difficulty_type) // v2: height + mainchain_height + id + cumulative_diff
|
||||
: sizeof(uint64_t) + HASH_SIZE + sizeof(difficulty_type); // v1: height + id + cumulative_diff
|
||||
|
||||
WriteLock lock(m_checkpointsLock);
|
||||
m_checkpoints.clear();
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
if (offset + sizeof(uint64_t) + HASH_SIZE + sizeof(difficulty_type) > buf.size()) {
|
||||
if (offset + entry_size > buf.size()) {
|
||||
LOGWARN(1, "Checkpoint data corrupted at entry " << i << ", discarding");
|
||||
m_checkpoints.clear();
|
||||
return;
|
||||
@@ -3527,6 +3576,14 @@ void SideChain::load_checkpoints()
|
||||
Checkpoint cp;
|
||||
memcpy(&cp.height, buf.data() + offset, sizeof(cp.height));
|
||||
offset += sizeof(cp.height);
|
||||
|
||||
if (version == 2) {
|
||||
memcpy(&cp.mainchain_height, buf.data() + offset, sizeof(cp.mainchain_height));
|
||||
offset += sizeof(cp.mainchain_height);
|
||||
} else {
|
||||
cp.mainchain_height = 0; // v1 checkpoints don't have mainchain height
|
||||
}
|
||||
|
||||
memcpy(cp.id.h, buf.data() + offset, HASH_SIZE);
|
||||
offset += HASH_SIZE;
|
||||
memcpy(&cp.cumulative_difficulty, buf.data() + offset, sizeof(cp.cumulative_difficulty));
|
||||
@@ -3535,11 +3592,12 @@ void SideChain::load_checkpoints()
|
||||
m_checkpoints.push_back(cp);
|
||||
}
|
||||
|
||||
LOGINFO(1, "Loaded " << count << " checkpoints from Redis");
|
||||
LOGINFO(1, "Loaded " << count << " checkpoints (v" << version << ") from Redis");
|
||||
|
||||
if (!m_checkpoints.empty()) {
|
||||
LOGINFO(1, "Latest anchor point: height " << m_checkpoints.back().height <<
|
||||
", id " << m_checkpoints.back().id);
|
||||
LOGINFO(1, "Latest anchor point: sidechain height " << m_checkpoints.back().height
|
||||
<< ", mainchain height " << m_checkpoints.back().mainchain_height
|
||||
<< ", id " << m_checkpoints.back().id);
|
||||
m_checkpointsNeedValidation = true;
|
||||
}
|
||||
#else
|
||||
@@ -3554,7 +3612,7 @@ void SideChain::load_checkpoints()
|
||||
// Read version
|
||||
uint32_t version = 0;
|
||||
f.read(reinterpret_cast<char*>(&version), sizeof(version));
|
||||
if (version != 1) {
|
||||
if (version != 1 && version != 2) {
|
||||
LOGWARN(1, "Unknown checkpoint file version " << version << ", ignoring");
|
||||
return;
|
||||
}
|
||||
@@ -3574,6 +3632,13 @@ void SideChain::load_checkpoints()
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
Checkpoint cp;
|
||||
f.read(reinterpret_cast<char*>(&cp.height), sizeof(cp.height));
|
||||
|
||||
if (version == 2) {
|
||||
f.read(reinterpret_cast<char*>(&cp.mainchain_height), sizeof(cp.mainchain_height));
|
||||
} else {
|
||||
cp.mainchain_height = 0; // v1 checkpoints don't have mainchain height
|
||||
}
|
||||
|
||||
f.read(reinterpret_cast<char*>(cp.id.h), HASH_SIZE);
|
||||
f.read(reinterpret_cast<char*>(&cp.cumulative_difficulty), sizeof(cp.cumulative_difficulty));
|
||||
|
||||
@@ -3587,11 +3652,12 @@ void SideChain::load_checkpoints()
|
||||
}
|
||||
|
||||
f.close();
|
||||
LOGINFO(1, "Loaded " << count << " checkpoints from " << path);
|
||||
LOGINFO(1, "Loaded " << count << " checkpoints (v" << version << ") from " << path);
|
||||
|
||||
if (!m_checkpoints.empty()) {
|
||||
LOGINFO(1, "Latest anchor point: height " << m_checkpoints.back().height <<
|
||||
", id " << m_checkpoints.back().id);
|
||||
LOGINFO(1, "Latest anchor point: sidechain height " << m_checkpoints.back().height
|
||||
<< ", mainchain height " << m_checkpoints.back().mainchain_height
|
||||
<< ", id " << m_checkpoints.back().id);
|
||||
m_checkpointsNeedValidation = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
+6
-5
@@ -46,10 +46,11 @@ struct MinerShare
|
||||
|
||||
struct Checkpoint
|
||||
{
|
||||
uint64_t height; // Block height (always multiple of CHECKPOINT_INTERVAL)
|
||||
uint64_t height; // Sidechain block height (always multiple of CHECKPOINT_INTERVAL)
|
||||
uint64_t mainchain_height; // Mainchain height referenced by this block
|
||||
hash id; // Block hash at this height
|
||||
difficulty_type cumulative_difficulty; // For additional validation
|
||||
|
||||
|
||||
bool operator==(const Checkpoint& other) const {
|
||||
return height == other.height && id == other.id;
|
||||
}
|
||||
@@ -96,8 +97,8 @@ public:
|
||||
[[nodiscard]] static uint64_t network_major_version(uint64_t height);
|
||||
|
||||
// Checkpoint system constants
|
||||
static constexpr uint64_t CHECKPOINT_INTERVAL = 200;
|
||||
static constexpr uint64_t CHECKPOINT_HISTORY = 20;
|
||||
static constexpr uint64_t CHECKPOINT_INTERVAL = 200; // Checkpoint every 200 sidechain blocks (~33 min)
|
||||
static constexpr uint64_t CHECKPOINT_HISTORY = 50; // Keep 50 checkpoints (~27 hours / ~800 mainchain blocks)
|
||||
|
||||
// Checkpoint system - public interface
|
||||
Checkpoint get_latest_checkpoint() const;
|
||||
@@ -108,7 +109,7 @@ public:
|
||||
void clear_checkpoints();
|
||||
|
||||
// Recovery
|
||||
void trigger_recovery(uint64_t failure_height);
|
||||
void trigger_recovery(uint64_t failure_height, uint64_t affected_mainchain_height = 0);
|
||||
void reset_to_checkpoint(uint64_t checkpoint_height);
|
||||
void check_and_run_deferred_recovery();
|
||||
bool is_in_recovery() const { return m_recoveryMode.load(); }
|
||||
|
||||
Reference in New Issue
Block a user