@@ -2342,7 +2342,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out;
}
- sock_orphan(ssk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2350,7 +2349,20 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
* reference owned by msk;
*/
if (!inet_csk(ssk)->icsk_ulp_ops) {
+ WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
+ } else if (msk->in_accept_queue && msk->first == ssk) {
+ /* if the first subflow moved to a close state, e.g. due to
+ * incoming reset and we reach here before inet_child_forget()
+ * the TCP stack could later try to close it via
+ * inet_csk_listen_stop(), or deliver it to the user space via
+ * accept().
+ * We can't delete the subflow - or risk a double free - nor let
+ * the msk survive - or will be leaked in the non accept scenario:
+ * fallback and let TCP cope with the subflow cleanup.
+ */
+ WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
+ mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN) {
@@ -2398,9 +2410,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
return 0;
}
-static void __mptcp_close_subflow(struct mptcp_sock *msk)
+static void __mptcp_close_subflow(struct sock *sk)
{
struct mptcp_subflow_context *subflow, *tmp;
+ struct mptcp_sock *msk = mptcp_sk(sk);
might_sleep();
@@ -2414,7 +2427,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
continue;
- mptcp_close_ssk((struct sock *)msk, ssk, subflow);
+ mptcp_close_ssk(sk, ssk, subflow);
+ }
+
+ /* if the MPC subflow has been closed before the msk is accepted,
+ * msk will never be accept-ed, close it now
+ */
+ if (!msk->first && msk->in_accept_queue) {
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_sk_state_store(sk, TCP_CLOSE);
}
}
@@ -2623,6 +2644,9 @@ static void mptcp_worker(struct work_struct *work)
__mptcp_check_send_data_fin(sk);
mptcp_check_data_fin(sk);
+ if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ __mptcp_close_subflow(sk);
+
/* There is no point in keeping around an orphaned sk timedout or
* closed, but we need the msk around to reply to incoming DATA_FIN,
* even if it is orphaned and in FIN_WAIT2 state
@@ -2638,9 +2662,6 @@ static void mptcp_worker(struct work_struct *work)
}
}
- if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
- __mptcp_close_subflow(msk);
-
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
@@ -3078,6 +3099,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
msk->subflow = NULL;
+ msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
@@ -3095,8 +3117,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
security_inet_csk_clone(nsk, req);
bh_unlock_sock(nsk);
- /* keep a single reference */
- __sock_put(nsk);
+ /* note: the newly allocated socket refcount is 2 now */
return nsk;
}
@@ -3152,8 +3173,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
goto out;
}
- /* acquire the 2nd reference for the owning socket */
- sock_hold(new_mptcp_sock);
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
@@ -3704,6 +3723,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct sock *newsk = newsock->sk;
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+ msk->in_accept_queue = 0;
lock_sock(newsk);
@@ -295,7 +295,8 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1,
- fastopening:1;
+ fastopening:1,
+ in_accept_queue:1;
int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -666,6 +667,8 @@ void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
+void mptcp_subflow_drop_ctx(struct sock *ssk);
+
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
{
@@ -699,9 +699,10 @@ static bool subflow_hmac_valid(const struct request_sock *req,
static void mptcp_force_close(struct sock *sk)
{
- /* the msk is not yet exposed to user-space */
+ /* the msk is not yet exposed to user-space, and refcount is 2 */
inet_sk_state_store(sk, TCP_CLOSE);
sk_common_release(sk);
+ sock_put(sk);
}
static void subflow_ulp_fallback(struct sock *sk,
@@ -717,7 +718,7 @@ static void subflow_ulp_fallback(struct sock *sk,
mptcp_subflow_ops_undo_override(sk);
}
-static void subflow_drop_ctx(struct sock *ssk)
+void mptcp_subflow_drop_ctx(struct sock *ssk)
{
struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
@@ -823,7 +824,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (new_msk)
mptcp_copy_inaddrs(new_msk, child);
- subflow_drop_ctx(child);
+ mptcp_subflow_drop_ctx(child);
goto out;
}
@@ -914,7 +915,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
return child;
dispose_child:
- subflow_drop_ctx(child);
+ mptcp_subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
@@ -1866,7 +1867,6 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
struct sock *sk = (struct sock *)msk;
bool do_cancel_work;
- sock_hold(sk);
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
next = msk->dl_next;
msk->first = NULL;
@@ -1954,6 +1954,13 @@ static void subflow_ulp_release(struct sock *ssk)
* when the subflow is still unaccepted
*/
release = ctx->disposable || list_empty(&ctx->node);
+
+ /* inet_child_forget() does not call sk_state_change(),
+ * explicitly trigger the socket close machinery
+ */
+ if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW,
+ &mptcp_sk(sk)->flags))
+ mptcp_schedule_work(sk);
sock_put(sk);
}