diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index a8d055a9c5b..a3b0c5bcb83 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -6817,6 +6817,7 @@ mod tests { let legacy_cfg = test_legacy_channel_config(); let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[Some(legacy_cfg.clone()), Some(legacy_cfg.clone()), Some(legacy_cfg)]); let nodes = create_network(3, &node_cfgs, &node_chanmgrs); + nodes[1].disable_monitor_completeness_assertion(); let channel = create_announced_chan_between_nodes(&nodes, 0, 1); create_announced_chan_between_nodes(&nodes, 1, 2); diff --git a/lightning/src/chain/mod.rs b/lightning/src/chain/mod.rs index bc47f1b1db6..99e184d8fda 100644 --- a/lightning/src/chain/mod.rs +++ b/lightning/src/chain/mod.rs @@ -233,11 +233,10 @@ pub enum ChannelMonitorUpdateStatus { /// This includes performing any `fsync()` calls required to ensure the update is guaranteed to /// be available on restart even if the application crashes. /// - /// If you return this variant, you cannot later return [`InProgress`] from the same instance of - /// [`Persist`]/[`Watch`] without first restarting. + /// You cannot switch from [`InProgress`] to this variant for the same channel without first + /// restarting. However, switching from this variant to [`InProgress`] is always allowed. /// /// [`InProgress`]: ChannelMonitorUpdateStatus::InProgress - /// [`Persist`]: chainmonitor::Persist Completed, /// Indicates that the update will happen asynchronously in the background or that a transient /// failure occurred which is being retried in the background and will eventually complete. @@ -263,12 +262,7 @@ pub enum ChannelMonitorUpdateStatus { /// reliable, this feature is considered beta, and a handful of edge-cases remain. Until the /// remaining cases are fixed, in rare cases, *using this feature may lead to funds loss*. /// - /// If you return this variant, you cannot later return [`Completed`] from the same instance of - /// [`Persist`]/[`Watch`] without first restarting. - /// /// [`InProgress`]: ChannelMonitorUpdateStatus::InProgress - /// [`Completed`]: ChannelMonitorUpdateStatus::Completed - /// [`Persist`]: chainmonitor::Persist InProgress, /// Indicates that an update has failed and will not complete at any point in the future. /// @@ -328,6 +322,8 @@ pub trait Watch { /// cannot be retried, the node should shut down immediately after returning /// [`ChannelMonitorUpdateStatus::UnrecoverableError`], see its documentation for more info. /// + /// See [`ChannelMonitorUpdateStatus`] for requirements on when each variant may be returned. + /// /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager fn update_channel( &self, channel_id: ChannelId, update: &ChannelMonitorUpdate, diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index cd32d219b93..4a6d81a806a 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -16,7 +16,7 @@ use crate::chain::chaininterface::LowerBoundedFeeEstimator; use crate::chain::chainmonitor::ChainMonitor; use crate::chain::channelmonitor::{ChannelMonitor, MonitorEvent, ANTI_REORG_DELAY}; use crate::chain::transaction::OutPoint; -use crate::chain::{ChannelMonitorUpdateStatus, Listen, Watch}; +use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Listen, Watch}; use crate::events::{ClosureReason, Event, HTLCHandlingFailureType, PaymentPurpose}; use crate::ln::channel::AnnouncementSigsState; use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder}; @@ -175,6 +175,7 @@ fn do_test_simple_monitor_temporary_update_fail(disconnect: bool) { let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs); + nodes[0].disable_monitor_completeness_assertion(); let node_a_id = nodes[0].node.get_our_node_id(); let node_b_id = nodes[1].node.get_our_node_id(); @@ -316,6 +317,7 @@ fn do_test_monitor_temporary_update_fail(disconnect_count: usize) { let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs); + nodes[0].disable_monitor_completeness_assertion(); let node_a_id = nodes[0].node.get_our_node_id(); let node_b_id = nodes[1].node.get_our_node_id(); @@ -969,6 +971,7 @@ fn do_test_monitor_update_fail_raa(test_ignore_second_cs: bool) { let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]); let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + nodes[1].disable_monitor_completeness_assertion(); let node_a_id = nodes[0].node.get_our_node_id(); let node_b_id = nodes[1].node.get_our_node_id(); @@ -1500,6 +1503,7 @@ fn claim_while_disconnected_monitor_update_fail() { let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs); + nodes[1].disable_monitor_completeness_assertion(); let node_a_id = nodes[0].node.get_our_node_id(); let node_b_id = nodes[1].node.get_our_node_id(); @@ -1727,6 +1731,7 @@ fn first_message_on_recv_ordering() { let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs); + nodes[1].disable_monitor_completeness_assertion(); let node_a_id = nodes[0].node.get_our_node_id(); let node_b_id = nodes[1].node.get_our_node_id(); @@ -3849,6 +3854,7 @@ fn do_test_durable_preimages_on_closed_channel( // Now reload node B let manager_b = nodes[1].node.encode(); reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, chain_mon, node_b_reload); + nodes[1].disable_monitor_completeness_assertion(); nodes[0].node.peer_disconnected(node_b_id); nodes[2].node.peer_disconnected(node_b_id); @@ -5170,3 +5176,72 @@ fn test_mpp_claim_to_holding_cell() { expect_payment_claimable!(nodes[3], paymnt_hash_2, payment_secret_2, 400_000); claim_payment(&nodes[2], &[&nodes[3]], preimage_2); } + +#[test] +#[should_panic(expected = "Watch::update_channel returned Completed while prior updates are still InProgress")] +fn test_monitor_update_fail_after_funding_spend() { + // When a counterparty commitment transaction confirms (funding spend), the + // ChannelMonitor sets funding_spend_seen. If a commitment_signed from the + // counterparty is then processed (a race between chain events and message + // processing), update_monitor returns Err because no_further_updates_allowed() + // is true. ChainMonitor overrides the result to InProgress, permanently + // freezing the channel. A subsequent preimage claim returning Completed then + // triggers the per-channel assertion. + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let nodes = create_network(2, &node_cfgs, &node_chanmgrs); + + let node_a_id = nodes[0].node.get_our_node_id(); + + let (_, _, chan_id, _) = create_announced_chan_between_nodes(&nodes, 0, 1); + + // Route payment 1 fully so B can claim it later. + let (payment_preimage_1, _payment_hash_1, ..) = + route_payment(&nodes[0], &[&nodes[1]], 1_000_000); + + // Get A's commitment tx (this is the "counterparty" commitment from B's perspective). + let as_commitment_tx = get_local_commitment_txn!(nodes[0], chan_id); + assert_eq!(as_commitment_tx.len(), 1); + + // Confirm A's commitment tx on B's chain_monitor ONLY (not on B's ChannelManager). + // This sets funding_spend_seen in the monitor, making no_further_updates_allowed() true. + let (block_hash, height) = nodes[1].best_block_info(); + let block = create_dummy_block(block_hash, height + 1, vec![as_commitment_tx[0].clone()]); + let txdata: Vec<_> = block.txdata.iter().enumerate().collect(); + nodes[1].chain_monitor.chain_monitor.transactions_confirmed( + &block.header, &txdata, height + 1, + ); + + // Send payment 2 from A to B. + let (route, payment_hash_2, _, payment_secret_2) = + get_route_and_payment_hash!(&nodes[0], nodes[1], 1_000_000); + nodes[0] + .node + .send_payment_with_route( + route, + payment_hash_2, + RecipientOnionFields::secret_only(payment_secret_2, 1_000_000), + PaymentId(payment_hash_2.0), + ) + .unwrap(); + check_added_monitors(&nodes[0], 1); + + let mut events = nodes[0].node.get_and_clear_pending_msg_events(); + assert_eq!(events.len(), 1); + let payment_event = SendEvent::from_event(events.remove(0)); + + nodes[1].node.handle_update_add_htlc(node_a_id, &payment_event.msgs[0]); + + // B processes commitment_signed. The monitor's update_monitor succeeds on the + // update steps, but returns Err at the end because no_further_updates_allowed() + // is true (funding_spend_seen). ChainMonitor overrides the result to InProgress. + nodes[1].node.handle_commitment_signed(node_a_id, &payment_event.commitment_msg[0]); + check_added_monitors(&nodes[1], 1); + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + + // B claims payment 1. The PaymentPreimage monitor update returns Completed + // (update_monitor succeeds for preimage, and persister returns Completed), + // but the prior InProgress from the commitment_signed is still pending. + nodes[1].node.claim_funds(payment_preimage_1); +} diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index ada27af749f..a17188611d4 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -2793,12 +2793,12 @@ pub struct ChannelManager< #[cfg(any(test, feature = "_test_utils"))] pub(super) per_peer_state: FairRwLock>>>, - /// We only support using one of [`ChannelMonitorUpdateStatus::InProgress`] and - /// [`ChannelMonitorUpdateStatus::Completed`] without restarting. Because the API does not - /// otherwise directly enforce this, we enforce it in non-test builds here by storing which one - /// is in use. - #[cfg(not(any(test, feature = "_externalize_tests")))] - monitor_update_type: AtomicUsize, + /// When set, disables the panic when `Watch::update_channel` returns `Completed` while + /// prior updates are still `InProgress`. Some legacy tests switch the persister between + /// `InProgress` and `Completed` mid-flight, which violates this contract but is otherwise + /// harmless in a test context. + #[cfg(test)] + pub(crate) skip_monitor_update_assertion: AtomicBool, /// The set of events which we need to give to the user to handle. In some cases an event may /// require some further action after the user handles it (currently only blocking a monitor @@ -3541,8 +3541,8 @@ impl< per_peer_state: FairRwLock::new(new_hash_map()), - #[cfg(not(any(test, feature = "_externalize_tests")))] - monitor_update_type: AtomicUsize::new(0), + #[cfg(test)] + skip_monitor_update_assertion: AtomicBool::new(false), pending_events: Mutex::new(VecDeque::new()), pending_events_processor: AtomicBool::new(false), @@ -10076,6 +10076,15 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ if update_completed { let _ = in_flight_updates.remove(update_idx); } + // A Watch implementation must not return Completed while prior updates are + // still InProgress, as this would violate the async persistence contract. + #[cfg(test)] + let skip_check = self.skip_monitor_update_assertion.load(Ordering::Relaxed); + #[cfg(not(test))] + let skip_check = false; + if !skip_check && update_completed && !in_flight_updates.is_empty() { + panic!("Watch::update_channel returned Completed while prior updates are still InProgress"); + } (update_completed, update_completed && in_flight_updates.is_empty()) } else { // We blindly assume that the ChannelMonitorUpdate will be regenerated on startup if we @@ -10141,23 +10150,13 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ panic!("{}", err_str); }, ChannelMonitorUpdateStatus::InProgress => { - #[cfg(not(any(test, feature = "_externalize_tests")))] - if self.monitor_update_type.swap(1, Ordering::Relaxed) == 2 { - panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart"); - } log_debug!( logger, "ChannelMonitor update in flight, holding messages until the update completes.", ); false }, - ChannelMonitorUpdateStatus::Completed => { - #[cfg(not(any(test, feature = "_externalize_tests")))] - if self.monitor_update_type.swap(2, Ordering::Relaxed) == 1 { - panic!("Cannot use both ChannelMonitorUpdateStatus modes InProgress and Completed without restart"); - } - true - }, + ChannelMonitorUpdateStatus::Completed => true, } } @@ -19707,8 +19706,8 @@ impl< per_peer_state: FairRwLock::new(per_peer_state), - #[cfg(not(any(test, feature = "_externalize_tests")))] - monitor_update_type: AtomicUsize::new(0), + #[cfg(test)] + skip_monitor_update_assertion: AtomicBool::new(false), pending_events: Mutex::new(pending_events_read), pending_events_processor: AtomicBool::new(false), diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs index 2d971c3a100..00db5504fc1 100644 --- a/lightning/src/ln/functional_test_utils.rs +++ b/lightning/src/ln/functional_test_utils.rs @@ -598,6 +598,14 @@ impl<'a, 'b, 'c> Node<'a, 'b, 'c> { self.node.init_features() | self.onion_messenger.provided_init_features(peer_node_id) }) } + + /// Disables the panic when `Watch::update_channel` returns `Completed` while prior updates + /// are still `InProgress`. Some legacy tests switch the persister between modes mid-flight, + /// which violates this contract but is otherwise harmless. + #[cfg(test)] + pub fn disable_monitor_completeness_assertion(&self) { + self.node.skip_monitor_update_assertion.store(true, core::sync::atomic::Ordering::Relaxed); + } } impl<'a, 'b, 'c> std::panic::UnwindSafe for Node<'a, 'b, 'c> {} diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 18a976871a6..687c5ebb8cb 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3384,6 +3384,7 @@ fn test_claim_event_never_handled() { let chan_0_monitor_serialized = get_monitor!(nodes[1], chan.2).encode(); let mons = &[&chan_0_monitor_serialized[..]]; reload_node!(nodes[1], &init_node_ser, mons, persister, new_chain_mon, nodes_1_reload); + nodes[1].disable_monitor_completeness_assertion(); expect_payment_claimed!(nodes[1], payment_hash_a, 1_000_000); // The reload logic spuriously generates a redundant payment preimage-containing diff --git a/lightning/src/ln/reload_tests.rs b/lightning/src/ln/reload_tests.rs index bb730f8fba8..8d9eac5c001 100644 --- a/lightning/src/ln/reload_tests.rs +++ b/lightning/src/ln/reload_tests.rs @@ -823,12 +823,14 @@ fn do_test_partial_claim_before_restart(persist_both_monitors: bool, double_rest // Now restart nodes[3]. reload_node!(nodes[3], original_manager.clone(), &[&updated_monitor.0, &original_monitor.0], persist_d_1, chain_d_1, node_d_1); + nodes[3].disable_monitor_completeness_assertion(); if double_restart { // Previously, we had a bug where we'd fail to reload if we re-persist the `ChannelManager` // without updating any `ChannelMonitor`s as we'd fail to double-initiate the claim replay. // We test that here ensuring that we can reload again. reload_node!(nodes[3], node_d_1.encode(), &[&updated_monitor.0, &original_monitor.0], persist_d_2, chain_d_2, node_d_2); + nodes[3].disable_monitor_completeness_assertion(); } // Until the startup background events are processed (in `get_and_clear_pending_events`, @@ -2216,6 +2218,7 @@ fn test_reload_with_mpp_claims_on_same_channel() { nodes_1_deserialized, Some(true) ); + nodes[1].disable_monitor_completeness_assertion(); // When the claims are reconstructed during reload, PaymentForwarded events are regenerated. let events = nodes[1].node.get_and_clear_pending_events();