From 5e0f2ee2051f669e30c64a2f925728d6944ba4e5 Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Fri, 17 Feb 2023 20:42:03 -0800 Subject: [PATCH] net: qmsgq: Add the qmsgq socket family Add initial implementation of the qmsgq socket family that will replace the gunyah vsock functionality. This socket family is a temporary solution for communicating between guest vms using socket apis. This socket family will reusing the vsock address structures to minimize the changes in userspace from gunyah vsock to qmsgq. This version supports dgram sockets, future improvements will be made to support seqpacket sockets. Change-Id: I4ecf0a97c15e3dd68687ef501158fbd4bc2a6a2f Signed-off-by: Chris Lew --- net/Kconfig | 1 + net/Makefile | 1 + net/qmsgq/Kconfig | 27 ++ net/qmsgq/Makefile | 3 + net/qmsgq/af_qmsgq.c | 805 +++++++++++++++++++++++++++++++++++++++ net/qmsgq/af_qmsgq.h | 87 +++++ net/qmsgq/qmsgq_gunyah.c | 484 +++++++++++++++++++++++ 7 files changed, 1408 insertions(+) create mode 100644 net/qmsgq/Kconfig create mode 100644 net/qmsgq/Makefile create mode 100644 net/qmsgq/af_qmsgq.c create mode 100644 net/qmsgq/af_qmsgq.h create mode 100644 net/qmsgq/qmsgq_gunyah.c diff --git a/net/Kconfig b/net/Kconfig index 48c33c222199..5400a05b1fe6 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -241,6 +241,7 @@ source "net/hsr/Kconfig" source "net/switchdev/Kconfig" source "net/l3mdev/Kconfig" source "net/qrtr/Kconfig" +source "net/qmsgq/Kconfig" source "net/ncsi/Kconfig" config PCPU_DEV_REFCNT diff --git a/net/Makefile b/net/Makefile index 6a62e5b27378..3a7c1a1db7e4 100644 --- a/net/Makefile +++ b/net/Makefile @@ -74,6 +74,7 @@ obj-$(CONFIG_HSR) += hsr/ obj-$(CONFIG_NET_SWITCHDEV) += switchdev/ obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev/ obj-$(CONFIG_QRTR) += qrtr/ +obj-$(CONFIG_QMSGQ) += qmsgq/ obj-$(CONFIG_NET_NCSI) += ncsi/ obj-$(CONFIG_XDP_SOCKETS) += xdp/ obj-$(CONFIG_MPTCP) += mptcp/ diff --git a/net/qmsgq/Kconfig b/net/qmsgq/Kconfig new file mode 100644 index 000000000000..c80f7f2c8452 --- /dev/null +++ b/net/qmsgq/Kconfig @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0-only +# QTI Message Queue Socket configuration +# + +config QMSGQ + tristate "QTI Message Queue Socket" + select VSOCKETS + help + Say Y if you intend to use QTI Message Queue Socket protocol. The + protocol is used to communicate with baremetal VM and platforms + that use the Gunyah Hypervisor. This protocol will mimic the vsock + address space with cid and port id but allow for guest to guest + communication. + +if QMSGQ + +config QMSGQ_GUNYAH + tristate "QTI Message Queue Socket Gunyah Transport" + depends on GH_MSGQ + depends on QMSGQ + help + Say Y here to support Gunyah Message Queue based transport for the + QMSGQ Socket Transport. This transport is intended to facilitate + Guest to Guest communication on the Gunyah Hypervisor. This transport + supports Datagram and Seqpacket operations. + +endif # QMSGQ diff --git a/net/qmsgq/Makefile b/net/qmsgq/Makefile new file mode 100644 index 000000000000..2d03ed03ddc0 --- /dev/null +++ b/net/qmsgq/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_QMSGQ) += af_qmsgq.o +obj-$(CONFIG_QMSGQ_GUNYAH) += qmsgq_gunyah.o diff --git a/net/qmsgq/af_qmsgq.c b/net/qmsgq/af_qmsgq.c new file mode 100644 index 000000000000..35693edbc869 --- /dev/null +++ b/net/qmsgq/af_qmsgq.c @@ -0,0 +1,805 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_qmsgq.h" + +#ifndef AF_QMSGQ +#define AF_QMSGQ 27 +#endif + +#ifndef PF_QMSGQ +#define PF_QMSGQ AF_QMSGQ +#endif + +struct qmsgq_cb { + u32 src_cid; + u32 src_port; + u32 dst_cid; + u32 dst_port; +}; + +static const struct qmsgq_endpoint *registered_ep; +static DEFINE_MUTEX(qmsgq_register_mutex); + +/* auto-bind range */ +#define QMSGQ_MIN_EPH_SOCKET 0x4000 +#define QMSGQ_MAX_EPH_SOCKET 0x7fff +#define QMSGQ_EPH_PORT_RANGE \ + XA_LIMIT(QMSGQ_MIN_EPH_SOCKET, QMSGQ_MAX_EPH_SOCKET) + +/* local port allocation management */ +static DEFINE_XARRAY_ALLOC(qmsgq_ports); +u32 qmsgq_ports_next = QMSGQ_MIN_EPH_SOCKET; +static DEFINE_SPINLOCK(qmsgq_port_lock); + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define QMSGQ_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define QMSGQ_DEFAULT_BUFFER_SIZE (1024 * 256) +#define QMSGQ_DEFAULT_BUFFER_MAX_SIZE (1024 * 256) +#define QMSGQ_DEFAULT_BUFFER_MIN_SIZE 128 + +static void qmsgq_deassign_ep(struct qmsgq_sock *qsk) +{ + if (!qsk->ep) + return; + + qsk->ep->destruct(qsk); + module_put(qsk->ep->module); + qsk->ep = NULL; +} + +int qmsgq_assign_ep(struct qmsgq_sock *qsk, struct qmsgq_sock *psk) +{ + const struct qmsgq_endpoint *new_ep; + int ret; + + new_ep = registered_ep; + if (qsk->ep) { + if (qsk->ep == new_ep) + return 0; + + qsk->ep->release(qsk); + qmsgq_deassign_ep(qsk); + } + + /* We increase the module refcnt to prevent the transport unloading + * while there are open sockets assigned to it. + */ + if (!new_ep || !try_module_get(new_ep->module)) + return -ENODEV; + + ret = new_ep->init(qsk, psk); + if (ret) { + module_put(new_ep->module); + return ret; + } + + qsk->ep = new_ep; + + return 0; +} + +static bool qmsgq_find_cid(unsigned int cid) +{ + if (registered_ep && cid == registered_ep->get_local_cid()) + return true; + + return false; +} + +static bool sock_type_connectible(u16 type) +{ + return (type == SOCK_STREAM) || (type == SOCK_SEQPACKET); +} + +static struct qmsgq_sock *qmsgq_port_lookup(int port) +{ + struct qmsgq_sock *qsk; + unsigned long flags; + + spin_lock_irqsave(&qmsgq_port_lock, flags); + qsk = xa_load(&qmsgq_ports, port); + if (qsk) + sock_hold(qsk_sk(qsk)); + spin_unlock_irqrestore(&qmsgq_port_lock, flags); + + return qsk; +} + +static void qmsgq_port_put(struct qmsgq_sock *qsk) +{ + sock_put(qsk_sk(qsk)); +} + +static void qmsgq_port_remove(struct qmsgq_sock *qsk) +{ + int port = qsk->local_addr.svm_port; + unsigned long flags; + + __sock_put(qsk_sk(qsk)); + + spin_lock_irqsave(&qmsgq_port_lock, flags); + xa_erase(&qmsgq_ports, port); + spin_unlock_irqrestore(&qmsgq_port_lock, flags); +} + +static int qmsgq_port_assign(struct qmsgq_sock *qsk, int *port) +{ + int rc; + + if (!*port || *port < 0) { + rc = xa_alloc_cyclic(&qmsgq_ports, port, qsk, + QMSGQ_EPH_PORT_RANGE, &qmsgq_ports_next, + GFP_ATOMIC); + } else if (*port < QMSGQ_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { + rc = -EACCES; + } else { + rc = xa_insert(&qmsgq_ports, *port, qsk, GFP_ATOMIC); + } + + if (rc == -EBUSY) + return -EADDRINUSE; + else if (rc < 0) + return rc; + + sock_hold(qsk_sk(qsk)); + return 0; +} + +static int qmsgq_send_shutdown(struct sock *sk, int mode) +{ + struct qmsgq_sock *qsk = sk_qsk(sk); + + if (!qsk->ep) + return -ENODEV; + + return qsk->ep->shutdown(qsk, mode); +} + +static void qmsgq_connect_timeout(struct work_struct *work) +{ +} + +static void qmsgq_pending_work(struct work_struct *work) +{ +} + +/* Bind socket to address. + * + * Socket should be locked upon call. + */ +static int __qmsgq_bind(struct socket *sock, + const struct sockaddr_vm *addr, int zapped) +{ + struct qmsgq_sock *qsk = sk_qsk(sock->sk); + struct sock *sk = sock->sk; + unsigned long flags; + int port; + int rc; + + /* rebinding ok */ + if (!zapped && addr->svm_port == qsk->local_addr.svm_port) + return 0; + + if (addr->svm_cid != VMADDR_CID_ANY && !qmsgq_find_cid(addr->svm_cid)) + return -EADDRNOTAVAIL; + + spin_lock_irqsave(&qmsgq_port_lock, flags); + port = addr->svm_port; + rc = qmsgq_port_assign(qsk, &port); + spin_unlock_irqrestore(&qmsgq_port_lock, flags); + if (rc) + return rc; + + /* unbind previous, if any */ + if (!zapped) + qmsgq_port_remove(qsk); + + vsock_addr_init(&qsk->local_addr, VMADDR_CID_HOST, port); + sock_reset_flag(sk, SOCK_ZAPPED); + + return 0; +} + +/* Auto bind to an ephemeral port. */ +static int qmsgq_autobind(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct sockaddr_vm addr; + + if (!sock_flag(sk, SOCK_ZAPPED)) + return 0; + vsock_addr_init(&addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + return __qmsgq_bind(sock, &addr, 1); +} + +static int qmsgq_bind(struct socket *sock, struct sockaddr *addr, int len) +{ + struct sockaddr_vm *vm_addr; + struct sock *sk = sock->sk; + int rc; + + if (vsock_addr_cast(addr, len, &vm_addr) != 0) + return -EINVAL; + + lock_sock(sk); + rc = __qmsgq_bind(sock, vm_addr, sock_flag(sk, SOCK_ZAPPED)); + release_sock(sk); + + return rc; +} + +static int qmsgq_dgram_connect(struct socket *sock, struct sockaddr *addr, int addr_len, int flags) +{ + struct sockaddr_vm *remote_addr; + struct qmsgq_sock *qsk; + struct sock *sk; + int rc; + + sk = sock->sk; + qsk = sk_qsk(sk); + + rc = vsock_addr_cast(addr, addr_len, &remote_addr); + if (rc == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) { + lock_sock(sk); + vsock_addr_init(&qsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + sock->state = SS_UNCONNECTED; + release_sock(sk); + return 0; + } else if (rc != 0) { + return -EINVAL; + } + + lock_sock(sk); + rc = qmsgq_autobind(sock); + if (rc) + goto out; + + if (!qsk->ep->dgram_allow(remote_addr->svm_cid, remote_addr->svm_port)) { + rc = -EINVAL; + goto out; + } + memcpy(&qsk->remote_addr, remote_addr, sizeof(qsk->remote_addr)); + sock->state = SS_CONNECTED; + +out: + release_sock(sk); + return rc; +} + +static int qmsgq_getname(struct socket *sock, struct sockaddr *addr, int peer) +{ + struct sockaddr_vm *vm_addr = NULL; + struct sock *sk = sock->sk; + struct qmsgq_sock *qsk; + int rc = 0; + + qsk = sk_qsk(sk); + + lock_sock(sk); + if (peer) { + if (sock->state != SS_CONNECTED) { + rc = -ENOTCONN; + goto out; + } + vm_addr = &qsk->remote_addr; + } else { + vm_addr = &qsk->local_addr; + } + if (!vm_addr) { + rc = -EINVAL; + goto out; + } + + BUILD_BUG_ON(sizeof(*vm_addr) > 128); + memcpy(addr, vm_addr, sizeof(*vm_addr)); + rc = sizeof(*vm_addr); + +out: + release_sock(sk); + return rc; +} + +static int qmsgq_shutdown(struct socket *sock, int mode) +{ + struct sock *sk; + int rc; + + /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses + * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode + * here like the other address families do. Note also that the + * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), + * which is what we want. + */ + mode++; + + if ((mode & ~SHUTDOWN_MASK) || !mode) + return -EINVAL; + + /* If this is a connection oriented socket and it is not connected then + * bail out immediately. If it is a DGRAM socket then we must first + * kick the socket so that it wakes up from any sleeping calls, for + * example recv(), and then afterwards return the error. + */ + sk = sock->sk; + + lock_sock(sk); + if (sock->state == SS_UNCONNECTED) { + rc = -ENOTCONN; + if (sock_type_connectible(sk->sk_type)) + goto out; + } else { + sock->state = SS_DISCONNECTING; + rc = 0; + } + + /* Receive and send shutdowns are treated alike. */ + mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); + if (mode) { + sk->sk_shutdown |= mode; + sk->sk_state_change(sk); + + if (sock_type_connectible(sk->sk_type)) { + sock_reset_flag(sk, SOCK_DONE); + qmsgq_send_shutdown(sk, mode); + } + } + +out: + release_sock(sk); + return rc; +} + +static __poll_t qmsgq_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + struct sock *sk = sock->sk; + struct qmsgq_sock *qsk; + __poll_t mask; + + qsk = sk_qsk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (sk->sk_err) + /* Signify that there has been an error on this socket. */ + mask |= EPOLLERR; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of EPOLLHUP set. + */ + if (sk->sk_shutdown == SHUTDOWN_MASK || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (qsk->peer_shutdown & SEND_SHUTDOWN))) { + mask |= EPOLLHUP; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN || + qsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= EPOLLRDHUP; + } + + if (sock->type == SOCK_DGRAM) { + /* For datagram sockets we can read if there is something in + * the queue and write as long as the socket isn't shutdown for + * sending. + */ + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { + mask |= EPOLLIN | EPOLLRDNORM; + } + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; + + } /* TODO Connected POLL */ + + return mask; +} + +static int qmsgq_dgram_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) +{ + const struct qmsgq_endpoint *ep; + struct sockaddr_vm *remote_addr; + struct sock *sk = sock->sk; + struct qmsgq_sock *qsk; + int rc = 0; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + qsk = sk_qsk(sk); + + lock_sock(sk); + ep = qsk->ep; + + rc = qmsgq_autobind(sock); + if (rc) + goto out; + + if (msg->msg_name) { + rc = vsock_addr_cast(msg->msg_name, msg->msg_namelen, &remote_addr); + if (rc) + goto out; + } else if (sock->state == SS_CONNECTED) { + remote_addr = &qsk->remote_addr; + } else { + rc = -EINVAL; + goto out; + } + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = ep->get_local_cid(); + + if (!vsock_addr_bound(remote_addr)) { + rc = -EINVAL; + goto out; + } + + if (!ep->dgram_allow(remote_addr->svm_cid, remote_addr->svm_port)) { + rc = -EINVAL; + goto out; + } + + rc = ep->dgram_enqueue(qsk, remote_addr, msg, len); + if (!rc) + rc = len; + +out: + release_sock(sk); + return rc; +} + +static int qmsgq_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) +{ + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); + struct sock *sk = sock->sk; + struct qmsgq_sock *qsk; + struct sk_buff *skb; + struct qmsgq_cb *cb; + int copied; + int rc = 0; + + qsk = sk_qsk(sk); + + if (sock_flag(sk, SOCK_ZAPPED)) { + pr_err("%s: Invalid socket error\n", __func__); + return -EADDRNOTAVAIL; + } + + skb = skb_recv_datagram(sk, flags, &rc); + if (!skb) + return rc; + + lock_sock(sk); + cb = (struct qmsgq_cb *)skb->cb; + + copied = skb->len; + if (copied > len) { + copied = len; + msg->msg_flags |= MSG_TRUNC; + } + + /* Place the datagram payload in the user's iovec. */ + rc = skb_copy_datagram_msg(skb, 0, msg, copied); + if (rc < 0) { + pr_err("%s: skb_copy_datagram_msg failed: %d\n", __func__, rc); + goto out; + } + rc = copied; + + if (vm_addr) { + vsock_addr_init(vm_addr, VMADDR_CID_HOST, cb->src_port); + msg->msg_namelen = sizeof(*vm_addr); + } +out: + skb_free_datagram(sk, skb); + release_sock(sk); + + return rc; +} + +static void __qmsgq_release(struct sock *sk, int level) +{ + if (sk) { + struct qmsgq_sock *qsk = sk_qsk(sk); + + lock_sock_nested(sk, level); + if (qsk->ep) + qsk->ep->release(qsk); + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + + if (!sock_flag(sk, SOCK_ZAPPED)) + qmsgq_port_remove(qsk); + + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + skb_queue_purge(&sk->sk_receive_queue); + release_sock(sk); + sock_put(sk); + } +} + +static int qmsgq_release(struct socket *sock) +{ + __qmsgq_release(sock->sk, 0); + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + +static const struct proto_ops qmsgq_dgram_ops = { + .owner = THIS_MODULE, + .family = AF_QMSGQ, + .release = qmsgq_release, + .bind = qmsgq_bind, + .connect = qmsgq_dgram_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = qmsgq_getname, + .poll = qmsgq_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = qmsgq_shutdown, + .sendmsg = qmsgq_dgram_sendmsg, + .recvmsg = qmsgq_dgram_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static struct proto qmsgq_proto = { + .name = "QMSGQ", + .owner = THIS_MODULE, + .obj_size = sizeof(struct qmsgq_sock), +}; + +static void sk_qsk_destruct(struct sock *sk) +{ + struct qmsgq_sock *qsk = sk_qsk(sk); + + qmsgq_deassign_ep(qsk); + /* When clearing these addresses, there's no need to set the family and + * possibly register the address family with the kernel. + */ + vsock_addr_init(&qsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&qsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + put_cred(qsk->owner); +} + +static int qmsgq_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sock_queue_rcv_skb(sk, skb); + if (err) + kfree_skb(skb); + + return err; +} + +static struct sock *__qmsgq_create(struct net *net, struct socket *sock, struct sock *parent, + gfp_t priority, unsigned short type, int kern) +{ + struct qmsgq_sock *psk; + struct qmsgq_sock *qsk; + struct sock *sk; + + sk = sk_alloc(net, AF_QMSGQ, priority, &qmsgq_proto, kern); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + + if (!sock) + sk->sk_type = type; + + qsk = sk_qsk(sk); + vsock_addr_init(&qsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&qsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + sk->sk_destruct = sk_qsk_destruct; + sk->sk_backlog_rcv = qmsgq_queue_rcv_skb; + sock_reset_flag(sk, SOCK_DONE); + sock_set_flag(sk, SOCK_ZAPPED); + + INIT_LIST_HEAD(&qsk->bound_table); + INIT_LIST_HEAD(&qsk->connected_table); + qsk->listener = NULL; + INIT_LIST_HEAD(&qsk->pending_links); + INIT_LIST_HEAD(&qsk->accept_queue); + qsk->rejected = false; + qsk->sent_request = false; + qsk->ignore_connecting_rst = false; + qsk->peer_shutdown = 0; + INIT_DELAYED_WORK(&qsk->connect_work, qmsgq_connect_timeout); + INIT_DELAYED_WORK(&qsk->pending_work, qmsgq_pending_work); + + psk = parent ? sk_qsk(parent) : NULL; + if (parent) { + qsk->trusted = psk->trusted; + qsk->owner = get_cred(psk->owner); + qsk->connect_timeout = psk->connect_timeout; + qsk->buffer_size = psk->buffer_size; + qsk->buffer_min_size = psk->buffer_min_size; + qsk->buffer_max_size = psk->buffer_max_size; + security_sk_clone(parent, sk); + } else { + qsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN); + qsk->owner = get_current_cred(); + qsk->connect_timeout = QMSGQ_DEFAULT_CONNECT_TIMEOUT; + qsk->buffer_size = QMSGQ_DEFAULT_BUFFER_SIZE; + qsk->buffer_min_size = QMSGQ_DEFAULT_BUFFER_MIN_SIZE; + qsk->buffer_max_size = QMSGQ_DEFAULT_BUFFER_MAX_SIZE; + } + + return sk; +} + +static int qmsgq_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + struct qmsgq_sock *qsk; + struct sock *sk; + int rc; + + if (!sock) + return -EINVAL; + + if (protocol && protocol != PF_QMSGQ) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_DGRAM: + sock->ops = &qmsgq_dgram_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + sock->state = SS_UNCONNECTED; + + sk = __qmsgq_create(net, sock, NULL, GFP_KERNEL, 0, kern); + if (!sk) + return -ENOMEM; + + qsk = sk_qsk(sk); + if (sock->type == SOCK_DGRAM) { + rc = qmsgq_assign_ep(qsk, NULL); + if (rc < 0) { + sock_put(sk); + return rc; + } + } + + return 0; +} + +int qmsgq_post(const struct qmsgq_endpoint *ep, struct sockaddr_vm *src, struct sockaddr_vm *dst, + void *data, int len) +{ + struct qmsgq_sock *qsk; + struct qmsgq_cb *cb; + struct sk_buff *skb; + int rc; + + skb = alloc_skb_with_frags(0, len, 0, &rc, GFP_KERNEL); + if (!skb) { + pr_err("%s: Unable to get skb with len:%lu\n", __func__, len); + return -ENOMEM; + } + cb = (struct qmsgq_cb *)skb->cb; + cb->src_cid = src->svm_cid; + cb->src_port = src->svm_port; + cb->dst_cid = dst->svm_cid; + cb->dst_port = dst->svm_port; + + skb->data_len = len; + skb->len = len; + skb_store_bits(skb, 0, data, len); + + qsk = qmsgq_port_lookup(dst->svm_port); + if (!qsk || qsk->ep != ep) { + pr_err("%s: invalid dst port:%d\n", __func__, dst->svm_port); + kfree_skb(skb); + return -EINVAL; + } + + if (sock_queue_rcv_skb(qsk_sk(qsk), skb)) { + pr_err("%s: sock_queue_rcv_skb failed\n", __func__); + qmsgq_port_put(qsk); + kfree_skb(skb); + return -EINVAL; + } + qmsgq_port_put(qsk); + return 0; +} +EXPORT_SYMBOL(qmsgq_post); + +int qmsgq_endpoint_register(const struct qmsgq_endpoint *ep) +{ + int rc = 0; + + if (!ep) + return -EINVAL; + + mutex_lock(&qmsgq_register_mutex); + if (registered_ep) { + rc = -EBUSY; + goto error; + } + registered_ep = ep; + +error: + mutex_unlock(&qmsgq_register_mutex); + return rc; +} +EXPORT_SYMBOL(qmsgq_endpoint_register); + +void qmsgq_endpoint_unregister(const struct qmsgq_endpoint *ep) +{ + mutex_lock(&qmsgq_register_mutex); + if (registered_ep == ep) + ep = NULL; + mutex_unlock(&qmsgq_register_mutex); +} +EXPORT_SYMBOL(qmsgq_endpoint_unregister); + +static const struct net_proto_family qmsgq_family = { + .owner = THIS_MODULE, + .family = AF_QMSGQ, + .create = qmsgq_create, +}; + +static int __init qmsgq_proto_init(void) +{ + int rc; + + registered_ep = NULL; + + rc = proto_register(&qmsgq_proto, 1); + if (rc) + return rc; + + rc = sock_register(&qmsgq_family); + if (rc) + goto err_proto; + + return 0; + +err_proto: + proto_unregister(&qmsgq_proto); + + return rc; +} + +static void __exit qmsgq_proto_fini(void) +{ + sock_unregister(qmsgq_family.family); + proto_unregister(&qmsgq_proto); +} +module_init(qmsgq_proto_init); +module_exit(qmsgq_proto_fini); + +MODULE_DESCRIPTION("QTI Gunyah MSGQ Socket driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_QMSGQ); diff --git a/net/qmsgq/af_qmsgq.h b/net/qmsgq/af_qmsgq.h new file mode 100644 index 000000000000..0468f8bacbcb --- /dev/null +++ b/net/qmsgq/af_qmsgq.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef __AF_QMSGQ_H_ +#define __AF_QMSGQ_H_ + +#include + +struct qmsgq_endpoint; + +struct qmsgq_sock { + /* sk must be the first member. */ + struct sock sk; + const struct qmsgq_endpoint *ep; + struct sockaddr_vm local_addr; + struct sockaddr_vm remote_addr; + /* Links for the global tables of bound and connected sockets. */ + struct list_head bound_table; + struct list_head connected_table; + /* Accessed without the socket lock held. This means it can never be + * modified outsided of socket create or destruct. + */ + bool trusted; + bool cached_peer_allow_dgram; /* Dgram communication allowed to + * cached peer? + */ + u32 cached_peer; /* Context ID of last dgram destination check. */ + const struct cred *owner; + /* Rest are SOCK_STREAM only. */ + long connect_timeout; + /* Listening socket that this came from. */ + struct sock *listener; + /* Used for pending list and accept queue during connection handshake. + * The listening socket is the head for both lists. Sockets created + * for connection requests are placed in the pending list until they + * are connected, at which point they are put in the accept queue list + * so they can be accepted in accept(). If accept() cannot accept the + * connection, it is marked as rejected so the cleanup function knows + * to clean up the socket. + */ + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; + struct delayed_work connect_work; + struct delayed_work pending_work; + struct delayed_work close_work; + bool close_work_scheduled; + u32 peer_shutdown; + bool sent_request; + bool ignore_connecting_rst; + + /* Protected by lock_sock(sk) */ + u64 buffer_size; + u64 buffer_min_size; + u64 buffer_max_size; +}; + +#define qsk_sk(__qsk) (&(__qsk)->sk) +#define sk_qsk(__sk) ((struct qmsgq_sock *)__sk) + +struct qmsgq_endpoint { + struct module *module; + + /* Initialize/tear-down socket. */ + int (*init)(struct qmsgq_sock *qsk, struct qmsgq_sock *psk); + void (*destruct)(struct qmsgq_sock *qsk); + void (*release)(struct qmsgq_sock *qsk); + + /* DGRAM. */ + int (*dgram_enqueue)(struct qmsgq_sock *qsk, struct sockaddr_vm *addr, + struct msghdr *msg, size_t len); + bool (*dgram_allow)(u32 cid, u32 port); + + /* Shutdown. */ + int (*shutdown)(struct qmsgq_sock *qsk, int mode); + + /* Addressing. */ + u32 (*get_local_cid)(void); +}; + +int qmsgq_post(const struct qmsgq_endpoint *ep, struct sockaddr_vm *src, struct sockaddr_vm *dst, + void *data, int len); +int qmsgq_endpoint_register(const struct qmsgq_endpoint *ep); +void qmsgq_endpoint_unregister(const struct qmsgq_endpoint *ep); + +#endif diff --git a/net/qmsgq/qmsgq_gunyah.c b/net/qmsgq/qmsgq_gunyah.c new file mode 100644 index 000000000000..5e6f3c8ab1e3 --- /dev/null +++ b/net/qmsgq/qmsgq_gunyah.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_qmsgq.h" + +#define QMSGQ_GH_PROTO_VER_1 1 +#define MAX_PKT_SZ SZ_64K + +enum qmsgq_gh_pkt_type { + QMSGQ_GH_TYPE_DATA = 1, +}; + +/** + * struct qmsgq_gh_hdr - qmsgq gunyah packet header + * @version: protocol version + * @type: packet type; one of qmsgq_gh_pkt_type + * @flags: Reserved for future use + * @optlen: length of optional header data + * @size: length of packet, excluding this header and optlen + * @src_node_id: source cid, reserved + * @src_port_id: source port + * @dst_node_id: destination cid, reserved + * @dst_port_id: destination port + */ +struct qmsgq_gh_hdr { + u8 version; + u8 type; + u8 flags; + u8 optlen; + __le32 size; + __le32 src_rsvd; + __le32 src_port_id; + __le32 dst_rsvd; + __le32 dst_port_id; +}; + +/* gh_transport_buf: gunyah transport buffer + * @lock: lock for the buffer + * @len: hdrlen + packet size + * @copied: size of buffer copied + * @hdr_received: true if the header is already saved, else false + * @buf: buffer saved + */ +struct qmsgq_gh_recv_buf { + /* @lock: lock for the buffer */ + struct mutex lock; + size_t len; + size_t copied; + bool hdr_received; + + char buf[MAX_PKT_SZ]; +}; + +/* qmsgq_gh_device: vm devices attached to this transport + * @item: list item of all vm devices + * @dev: device from platform_device. + * @peer_cid: remote cid + * @master: primary vm indicator + * @msgq_label: msgq label + * @msgq_hdl: msgq handle + * @rm_nb: notifier block for vm status from rm + * @tx_lock: tx lock to queue only one packet at a time + * @rx_thread: rx thread to receive incoming packets + * @ep: qmsq endpoint + */ +struct qmsgq_gh_device { + struct list_head item; + struct device *dev; + struct qmsgq_endpoint ep; + + unsigned int peer_cid; + bool master; + enum gh_msgq_label msgq_label; + void *msgq_hdl; + struct notifier_block rm_nb; + + /* @tx_lock: tx lock to queue only one packet at a time */ + struct mutex tx_lock; + struct task_struct *rx_thread; + struct qmsgq_gh_recv_buf rx_buf; +}; + +static void reset_buf(struct qmsgq_gh_recv_buf *rx_buf) +{ + memset(rx_buf->buf, 0, MAX_PKT_SZ); + rx_buf->hdr_received = false; + rx_buf->copied = 0; + rx_buf->len = 0; +} + +static int qmsgq_gh_post(struct qmsgq_gh_device *qdev, struct qmsgq_gh_recv_buf *rx_buf) +{ + unsigned int cid, port, len; + struct qmsgq_gh_hdr *hdr; + struct sockaddr_vm src; + struct sockaddr_vm dst; + void *data; + int rc; + + if (rx_buf->len < sizeof(*hdr)) { + pr_err("%s: len: %d < hdr size\n", __func__, rx_buf->len); + return -EINVAL; + } + hdr = (struct qmsgq_gh_hdr *)rx_buf->buf; + + if (hdr->type != QMSGQ_GH_TYPE_DATA) + return -EINVAL; + + cid = le32_to_cpu(hdr->src_rsvd); + port = le32_to_cpu(hdr->src_port_id); + vsock_addr_init(&src, cid, port); + + cid = le32_to_cpu(hdr->dst_rsvd); + port = le32_to_cpu(hdr->dst_port_id); + vsock_addr_init(&dst, cid, port); + + data = rx_buf->buf + sizeof(*hdr); + len = rx_buf->len - sizeof(*hdr); + + rc = qmsgq_post(&qdev->ep, &src, &dst, data, len); + + return rc; +} + +static void qmsgq_process_recv(struct qmsgq_gh_device *qdev, void *buf, size_t len) +{ + struct qmsgq_gh_recv_buf *rx_buf = &qdev->rx_buf; + struct qmsgq_gh_hdr *hdr; + size_t n; + + mutex_lock(&rx_buf->lock); + + /* Copy message into the local buffer */ + n = (rx_buf->copied + len < MAX_PKT_SZ) ? len : MAX_PKT_SZ - rx_buf->copied; + memcpy(rx_buf->buf + rx_buf->copied, buf, n); + rx_buf->copied += n; + + if (!rx_buf->hdr_received) { + hdr = (struct qmsgq_gh_hdr *)rx_buf->buf; + + if (hdr->version != QMSGQ_GH_PROTO_VER_1) { + pr_err("%s: Incorrect version:%d\n", __func__, hdr->version); + goto err; + } + if (hdr->type != QMSGQ_GH_TYPE_DATA) { + pr_err("%s: Incorrect type:%d\n", __func__, hdr->type); + goto err; + } + if (hdr->size > MAX_PKT_SZ - sizeof(*hdr)) { + pr_err("%s: Packet size too big:%d\n", __func__, hdr->size); + goto err; + } + + rx_buf->len = sizeof(*hdr) + hdr->size; + rx_buf->hdr_received = true; + + /* Check len size, can not be smaller than amount copied*/ + if (rx_buf->len < rx_buf->copied) { + pr_err("%s: Size mismatch len:%d, copied:%d\n", __func__, + rx_buf->len, rx_buf->copied); + goto err; + } + } + + if (rx_buf->len == rx_buf->copied) { + qmsgq_gh_post(qdev, rx_buf); + reset_buf(rx_buf); + } + + mutex_unlock(&rx_buf->lock); + return; + +err: + reset_buf(rx_buf); + mutex_unlock(&rx_buf->lock); +} + +static int qmsgq_gh_msgq_recv(void *data) +{ + struct qmsgq_gh_device *qdev = data; + size_t size; + void *buf; + int rc; + + buf = kzalloc(GH_MSGQ_MAX_MSG_SIZE_BYTES, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + while (!kthread_should_stop()) { + rc = gh_msgq_recv(qdev->msgq_hdl, buf, GH_MSGQ_MAX_MSG_SIZE_BYTES, &size, + GH_MSGQ_TX_PUSH); + if (rc) + continue; + + if (size <= 0) + continue; + + qmsgq_process_recv(qdev, buf, size); + } + kfree(buf); + + return 0; +} + +static int qmsgq_gh_send(struct qmsgq_gh_device *qdev, void *buf, size_t len) +{ + size_t left, chunk, offset; + int rc = 0; + + left = len; + chunk = 0; + offset = 0; + + mutex_lock(&qdev->tx_lock); + while (left > 0) { + chunk = (left > GH_MSGQ_MAX_MSG_SIZE_BYTES) ? GH_MSGQ_MAX_MSG_SIZE_BYTES : left; + rc = gh_msgq_send(qdev->msgq_hdl, buf + offset, chunk, GH_MSGQ_TX_PUSH); + if (rc) { + pr_err("%s: gh_msgq_send failed: %d\n", __func__, rc); + mutex_unlock(&qdev->tx_lock); + goto err; + } + left -= chunk; + offset += chunk; + } + mutex_unlock(&qdev->tx_lock); + return 0; + +err: + return rc; +} + +static int qmsgq_gh_dgram_enqueue(struct qmsgq_sock *qsk, struct sockaddr_vm *remote, + struct msghdr *msg, size_t len) +{ + struct sockaddr_vm *local_addr = &qsk->local_addr; + const struct qmsgq_endpoint *ep; + struct qmsgq_gh_device *qdev; + struct qmsgq_gh_hdr *hdr; + char *buf; + int rc; + + ep = qsk->ep; + if (!ep) + return -ENXIO; + qdev = container_of(ep, struct qmsgq_gh_device, ep); + + if (!qdev->msgq_hdl) { + pr_err("%s: Transport not ready\n", __func__); + return -ENODEV; + } + + if (len > MAX_PKT_SZ - sizeof(*hdr)) { + pr_err("%s: Invalid pk size: len: %lu\n", __func__, len); + return -EMSGSIZE; + } + + /* Allocate a buffer for the user's message and our packet header. */ + buf = kmalloc(len + sizeof(*hdr), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Populate Header */ + hdr = (struct qmsgq_gh_hdr *)buf; + hdr->version = QMSGQ_GH_PROTO_VER_1; + hdr->type = QMSGQ_GH_TYPE_DATA; + hdr->flags = 0; + hdr->optlen = 0; + hdr->size = len; + hdr->src_rsvd = 0; + hdr->src_port_id = local_addr->svm_port; + hdr->dst_rsvd = 0; + hdr->dst_port_id = remote->svm_port; + rc = memcpy_from_msg((void *)buf + sizeof(*hdr), msg, len); + if (rc) { + pr_err("%s failed: memcpy_from_msg rc: %d\n", __func__, rc); + goto send_err; + } + + pr_debug("TX DATA: Len:0x%x src[0x%x] dst[0x%x]\n", len, hdr->src_port_id, + hdr->dst_port_id); + + rc = qmsgq_gh_send(qdev, buf, len + sizeof(*hdr)); + if (rc < 0) { + pr_err("%s: failed to send msg rc: %d\n", __func__, rc); + goto send_err; + } + kfree(buf); + + return 0; + +send_err: + kfree(buf); + return rc; +} + +static int qmsgq_gh_socket_init(struct qmsgq_sock *qsk, struct qmsgq_sock *psk) +{ + return 0; +} + +static void qmsgq_gh_destruct(struct qmsgq_sock *qsk) +{ +} + +static void qmsgq_gh_release(struct qmsgq_sock *qsk) +{ +} + +static bool qmsgq_gh_allow_rsvd_cid(u32 cid) +{ + /* Allowing for cid 0 as of now as af_qmsgq sends 0 if no cid is + * passed by the client. + */ + if (cid == 0) + return true; + + return false; +} + +static bool qmsgq_gh_dgram_allow(u32 cid, u32 port) +{ + if (qmsgq_gh_allow_rsvd_cid(cid) || cid == VMADDR_CID_ANY || cid == VMADDR_CID_HOST) + return true; + + pr_err("%s: dgram not allowed for cid 0x%x\n", __func__, cid); + + return false; +} + +static int qmsgq_gh_shutdown(struct qmsgq_sock *qsk, int mode) +{ + return 0; +} + +static u32 qmsgq_gh_get_local_cid(void) +{ + return VMADDR_CID_HOST; +} + +static int qmsgq_gh_msgq_start(struct qmsgq_gh_device *qdev) +{ + struct device *dev = qdev->dev; + int rc; + + if (qdev->msgq_hdl) { + dev_err(qdev->dev, "Already have msgq handle!\n"); + return NOTIFY_DONE; + } + + qdev->msgq_hdl = gh_msgq_register(qdev->msgq_label); + if (IS_ERR_OR_NULL(qdev->msgq_hdl)) { + rc = PTR_ERR(qdev->msgq_hdl); + dev_err(dev, "msgq register failed rc:%d\n", rc); + return rc; + } + + qdev->rx_thread = kthread_run(qmsgq_gh_msgq_recv, qdev, "qmsgq_gh_rx"); + if (IS_ERR_OR_NULL(qdev->rx_thread)) { + rc = PTR_ERR(qdev->rx_thread); + dev_err(dev, "Failed to create rx thread rc:%d\n", rc); + return rc; + } + + return 0; +} + +static int qmsgq_gh_rm_cb(struct notifier_block *nb, unsigned long cmd, void *data) +{ + struct qmsgq_gh_device *qdev = container_of(nb, struct qmsgq_gh_device, rm_nb); + struct gh_rm_notif_vm_status_payload *vm_status_payload = data; + u8 vm_status = vm_status_payload->vm_status; + int rc; + + if (cmd != GH_RM_NOTIF_VM_STATUS) + return NOTIFY_DONE; + + /* TODO - check for peer */ + switch (vm_status) { + case GH_RM_VM_STATUS_READY: + rc = qmsgq_gh_msgq_start(qdev); + break; + default: + pr_debug("Unknown notification for vmid = %d vm_status = %d\n", + vm_status_payload->vmid, vm_status); + } + + return NOTIFY_DONE; +} + +static int qmsgq_gh_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct qmsgq_gh_device *qdev; + int rc; + + qdev = devm_kzalloc(dev, sizeof(*qdev), GFP_KERNEL); + if (!qdev) + return -ENOMEM; + qdev->dev = dev; + dev_set_drvdata(&pdev->dev, qdev); + + mutex_init(&qdev->tx_lock); + mutex_init(&qdev->rx_buf.lock); + qdev->rx_buf.len = 0; + qdev->rx_buf.copied = 0; + qdev->rx_buf.hdr_received = false; + + qdev->ep.module = THIS_MODULE; + qdev->ep.init = qmsgq_gh_socket_init; + qdev->ep.destruct = qmsgq_gh_destruct; + qdev->ep.release = qmsgq_gh_release; + qdev->ep.dgram_enqueue = qmsgq_gh_dgram_enqueue; + qdev->ep.dgram_allow = qmsgq_gh_dgram_allow; + qdev->ep.shutdown = qmsgq_gh_shutdown; + qdev->ep.get_local_cid = qmsgq_gh_get_local_cid; + + //TODO properly set this + qdev->peer_cid = 0; + + rc = of_property_read_u32(np, "msgq-label", &qdev->msgq_label); + if (rc) { + dev_err(dev, "failed to read msgq-label info %d\n", rc); + return rc; + } + + qdev->master = of_property_read_bool(np, "qcom,master"); + if (qdev->master) { + qdev->rm_nb.notifier_call = qmsgq_gh_rm_cb; + gh_rm_register_notifier(&qdev->rm_nb); + } else { + rc = qmsgq_gh_msgq_start(qdev); + } + qmsgq_endpoint_register(&qdev->ep); + + return rc; +} + +static int qmsgq_gh_remove(struct platform_device *pdev) +{ + struct qmsgq_gh_device *qdev = dev_get_drvdata(&pdev->dev); + + if (qdev->master) + gh_rm_unregister_notifier(&qdev->rm_nb); + + if (qdev->rx_thread) + kthread_stop(qdev->rx_thread); + + qmsgq_endpoint_unregister(&qdev->ep); + + return 0; +} + +static const struct of_device_id qmsgq_gh_of_match[] = { + { .compatible = "qcom,qmsgq-gh" }, + {} +}; +MODULE_DEVICE_TABLE(of, qmsgq_gh_of_match); + +static struct platform_driver qmsgq_gh_driver = { + .probe = qmsgq_gh_probe, + .remove = qmsgq_gh_remove, + .driver = { + .name = "qmsgq-gh", + .of_match_table = qmsgq_gh_of_match, + } +}; +module_platform_driver(qmsgq_gh_driver); + +MODULE_ALIAS("gunyah:QMSGQ"); +MODULE_DESCRIPTION("Gunyah QMSGQ Transport driver"); +MODULE_LICENSE("GPL");