soc: qcom: hgsl: use rb tree to track memory node

As the scenes become more complex, there will be more and more memory nodes. At this time, the efficiency of using list to track them will become worse and worse, so introduce rb tree to improve the efficiency of finding specific memory nodes. Change-Id: I984c866eaa1c877ce70803b31f1119f8ce4ae621 Signed-off-by: Hui Li <quic_hul@quicinc.com>
2024-09-20 13:40:44 +08:00 · 2024-09-20 13:40:44 +08:00 · 7b2f3224eb
commit 7b2f3224eb
parent f719ac3c91
5 changed files with 188 additions and 114 deletions
--- a/drivers/soc/qcom/hgsl/hgsl.c
+++ b/drivers/soc/qcom/hgsl/hgsl.c
@ -2282,7 +2282,7 @@ static int hgsl_ioctl_mem_alloc(
 	struct hgsl_priv *priv = filep->private_data;
 	struct hgsl_ioctl_mem_alloc_params *params = data;
 	struct qcom_hgsl *hgsl = priv->dev;
-	int ret = 0;
+	int ret = 0, mem_fd = -1;
 	struct hgsl_mem_node *mem_node = NULL;
 	struct hgsl_hab_channel_t *hab_channel = NULL;

@ -2298,6 +2298,13 @@ static int hgsl_ioctl_mem_alloc(
 		goto out;
 	}

+	mem_fd = get_unused_fd_flags(O_CLOEXEC);
+	if (mem_fd < 0) {
+		LOGE("no available fd %d", mem_fd);
+		ret = -EMFILE;
+		goto out;
+	}
+
 	mem_node = hgsl_mem_node_zalloc(hgsl->default_iocoherency);
 	if (mem_node == NULL) {
 		ret = -ENOMEM;
@ -2316,30 +2323,34 @@ static int hgsl_ioctl_mem_alloc(
 	if (ret)
 		goto out;

-	/* increase reference count before install fd. */
-	get_dma_buf(mem_node->dma_buf);
-	params->fd = dma_buf_fd(mem_node->dma_buf, O_CLOEXEC);
-
-	if (params->fd < 0) {
-		LOGE("dma_buf_fd failed, size 0x%x", mem_node->memdesc.size);
-		ret = -EINVAL;
-		dma_buf_put(mem_node->dma_buf);
-		goto out;
-	}
 	if (copy_to_user(USRPTR(params->memdesc),
 		&mem_node->memdesc, sizeof(mem_node->memdesc))) {
 		ret = -EFAULT;
 		goto out;
 	}
+
+	/* increase reference count before install fd. */
+	get_dma_buf(mem_node->dma_buf);
 	mutex_lock(&priv->lock);
-	list_add(&mem_node->node, &priv->mem_allocated);
-	hgsl_trace_gpu_mem_total(priv, mem_node->memdesc.size64);
+	ret = hgsl_mem_add_node(&priv->mem_allocated, mem_node);
+	if (unlikely(ret))
+		dma_buf_put(mem_node->dma_buf);
+	else {
+		params->fd = mem_fd;
+		fd_install(params->fd, mem_node->dma_buf->file);
+		hgsl_trace_gpu_mem_total(priv, mem_node->memdesc.size64);
+	}
 	mutex_unlock(&priv->lock);

 out:
-	if (ret && mem_node) {
-		hgsl_hyp_mem_unmap_smmu(hab_channel, mem_node);
-		hgsl_sharedmem_free(mem_node);
+	if (ret) {
+		if (mem_node) {
+			hgsl_hyp_mem_unmap_smmu(hab_channel, mem_node);
+			hgsl_sharedmem_free(mem_node);
+		}
+
+		if (mem_fd >= 0)
+			put_unused_fd(mem_fd);
 	}
 	hgsl_hyp_channel_pool_put(hab_channel);
 	return ret;
@ -2354,7 +2365,6 @@ static int hgsl_ioctl_mem_free(
 	struct gsl_memdesc_t memdesc;
 	int ret = 0;
 	struct hgsl_mem_node *node_found = NULL;
-	struct hgsl_mem_node *tmp = NULL;
 	struct hgsl_hab_channel_t *hab_channel = NULL;

 	ret = hgsl_hyp_channel_pool_get(&priv->hyp_priv, 0, &hab_channel);
@ -2371,16 +2381,11 @@ static int hgsl_ioctl_mem_free(
 	}

 	mutex_lock(&priv->lock);
-	list_for_each_entry(tmp, &priv->mem_allocated, node) {
-		if ((tmp->memdesc.gpuaddr == memdesc.gpuaddr)
-			&& (tmp->memdesc.size == memdesc.size)) {
-			node_found = tmp;
-			list_del(&node_found->node);
-			break;
-		}
-	}
+	node_found = hgsl_mem_find_node_locked(&priv->mem_allocated,
+					memdesc.gpuaddr, memdesc.size64, true);
+	if (node_found)
+		rb_erase(&node_found->mem_rb_node, &priv->mem_allocated);
 	mutex_unlock(&priv->lock);
-
 	if (node_found) {
 		ret = hgsl_hyp_mem_unmap_smmu(hab_channel, node_found);
 		if (!ret) {
@ -2390,14 +2395,14 @@ static int hgsl_ioctl_mem_free(
 		} else {
 			LOGE("hgsl_hyp_mem_unmap_smmu failed %d", ret);
 			mutex_lock(&priv->lock);
-			list_add(&node_found->node, &priv->mem_allocated);
+			ret = hgsl_mem_add_node(&priv->mem_allocated, node_found);
 			mutex_unlock(&priv->lock);
+			if (unlikely(ret))
+				LOGE("unlikely to get here! %d", ret);
 		}
-	} else {
+	} else
 		LOGE("can't find the memory 0x%llx, 0x%x",
 			memdesc.gpuaddr, memdesc.size);
-		goto out;
-	}

 out:
 	hgsl_hyp_channel_pool_put(hab_channel);
@ -2413,6 +2418,7 @@ static int hgsl_ioctl_set_metainfo(
 	int ret = 0;
 	struct hgsl_mem_node *mem_node = NULL;
 	struct hgsl_mem_node *tmp = NULL;
+	struct rb_node *rb = NULL;
 	char metainfo[HGSL_MEM_META_MAX_SIZE] = {0};

 	if (params->metainfo_len > HGSL_MEM_META_MAX_SIZE) {
@ -2429,7 +2435,8 @@ static int hgsl_ioctl_set_metainfo(
 	metainfo[HGSL_MEM_META_MAX_SIZE - 1] = '\0';

 	mutex_lock(&priv->lock);
-	list_for_each_entry(tmp, &priv->mem_allocated, node) {
+	for (rb = rb_first(&priv->mem_allocated); rb; rb = rb_next(rb)) {
+		tmp = rb_entry(rb, struct hgsl_mem_node, mem_rb_node);
 		if (tmp->memdesc.priv64 == params->memdesc_priv) {
 			mem_node = tmp;
 			break;
@ -2482,19 +2489,21 @@ static int hgsl_ioctl_mem_map_smmu(
 	mem_node->memtype = params->memtype;

 	ret = hgsl_hyp_mem_map_smmu(hab_channel, params->size, params->offset, mem_node);
+	if (ret)
+		goto out;

-	if (ret == 0) {
-		if (copy_to_user(USRPTR(params->memdesc), &mem_node->memdesc,
-			sizeof(mem_node->memdesc))) {
-			ret = -EFAULT;
-			goto out;
-		}
-		mutex_lock(&priv->lock);
-		list_add(&mem_node->node, &priv->mem_mapped);
-		hgsl_trace_gpu_mem_total(priv, mem_node->memdesc.size64);
-		mutex_unlock(&priv->lock);
+	if (copy_to_user(USRPTR(params->memdesc), &mem_node->memdesc,
+		sizeof(mem_node->memdesc))) {
+		ret = -EFAULT;
+		goto out;
 	}

+	mutex_lock(&priv->lock);
+	ret = hgsl_mem_add_node(&priv->mem_mapped, mem_node);
+	if (likely(!ret))
+		hgsl_trace_gpu_mem_total(priv, mem_node->memdesc.size64);
+	mutex_unlock(&priv->lock);
+
 out:
 	if (ret) {
 		hgsl_hyp_mem_unmap_smmu(hab_channel, mem_node);
@ -2512,7 +2521,6 @@ static int hgsl_ioctl_mem_unmap_smmu(
 	struct hgsl_ioctl_mem_unmap_smmu_params *params = data;
 	int ret = 0;
 	struct hgsl_mem_node *node_found = NULL;
-	struct hgsl_mem_node *tmp = NULL;
 	struct hgsl_hab_channel_t *hab_channel = NULL;

 	ret = hgsl_hyp_channel_pool_get(&priv->hyp_priv, 0, &hab_channel);
@ -2522,31 +2530,29 @@ static int hgsl_ioctl_mem_unmap_smmu(
 	}

 	mutex_lock(&priv->lock);
-	list_for_each_entry(tmp, &priv->mem_mapped, node) {
-		if ((tmp->memdesc.gpuaddr == params->gpuaddr)
-			&& (tmp->memdesc.size == params->size)) {
-			node_found = tmp;
-			list_del(&node_found->node);
-			break;
-		}
-	}
+	node_found = hgsl_mem_find_node_locked(&priv->mem_mapped,
+					params->gpuaddr, params->size, true);
+	if (node_found)
+		rb_erase(&node_found->mem_rb_node, &priv->mem_mapped);
 	mutex_unlock(&priv->lock);

 	if (node_found) {
 		hgsl_put_sgt(node_found, false);
 		ret = hgsl_hyp_mem_unmap_smmu(hab_channel, node_found);
-		if (ret) {
-			mutex_lock(&priv->lock);
-			list_add(&node_found->node, &priv->mem_mapped);
-			mutex_unlock(&priv->lock);
-		} else {
+		if (!ret) {
 			hgsl_trace_gpu_mem_total(priv,
 					-(node_found->memdesc.size64));
 			hgsl_free(node_found);
+		} else {
+			LOGE("hgsl_hyp_mem_unmap_smmu failed %d", ret);
+			mutex_lock(&priv->lock);
+			ret = hgsl_mem_add_node(&priv->mem_mapped, node_found);
+			mutex_unlock(&priv->lock);
+			if (unlikely(ret))
+				LOGE("unlikely to get here! %d", ret);
 		}
-	} else {
+	} else
 		ret = -EINVAL;
-	}

 out:
 	hgsl_hyp_channel_pool_put(hab_channel);
@ -2573,15 +2579,16 @@ static int hgsl_ioctl_mem_cache_operation(
 	}

 	mutex_lock(&priv->lock);
-	node_found = hgsl_mem_find_base_locked(&priv->mem_allocated,
-					gpuaddr, params->sizebytes);
+	node_found = hgsl_mem_find_node_locked(&priv->mem_allocated,
+					gpuaddr, params->sizebytes, false);
 	if (node_found)
 		internal = true;
 	else {
-		node_found = hgsl_mem_find_base_locked(&priv->mem_mapped,
-					gpuaddr, params->sizebytes);
+		node_found = hgsl_mem_find_node_locked(&priv->mem_mapped,
+					gpuaddr, params->sizebytes, false);
 		if (!node_found) {
-			LOGE("failed to find node %d", ret);
+			LOGE("failed to find gpuaddr: 0x%llx size: 0x%llx",
+				gpuaddr, params->sizebytes);
 			ret = -EINVAL;
 			mutex_unlock(&priv->lock);
 			goto out;
@ -2607,7 +2614,6 @@ static int hgsl_ioctl_mem_get_fd(
 	struct hgsl_ioctl_mem_get_fd_params *params = data;
 	struct gsl_memdesc_t memdesc;
 	struct hgsl_mem_node *node_found = NULL;
-	struct hgsl_mem_node *tmp = NULL;
 	int ret = 0;

 	if (copy_from_user(&memdesc, USRPTR(params->memdesc),
@ -2618,28 +2624,25 @@ static int hgsl_ioctl_mem_get_fd(
 	}

 	mutex_lock(&priv->lock);
-	list_for_each_entry(tmp, &priv->mem_allocated, node) {
-		if ((tmp->memdesc.gpuaddr == memdesc.gpuaddr)
-			&& (tmp->memdesc.size == memdesc.size)) {
-			node_found = tmp;
-			break;
-		}
-	}
-	params->fd = -1;
-	if (node_found && node_found->dma_buf) {
+	node_found = hgsl_mem_find_node_locked(&priv->mem_allocated,
+				memdesc.gpuaddr, memdesc.size64, true);
+	if (node_found && node_found->dma_buf)
 		get_dma_buf(node_found->dma_buf);
+	else
+		ret = -EINVAL;
+	mutex_unlock(&priv->lock);
+
+	params->fd = -1;
+	if (!ret) {
 		params->fd = dma_buf_fd(node_found->dma_buf, O_CLOEXEC);
 		if (params->fd < 0) {
 			LOGE("dma buf to fd failed");
 			ret = -EINVAL;
 			dma_buf_put(node_found->dma_buf);
 		}
-	} else {
+	} else
 		LOGE("can't find the memory 0x%llx, 0x%x, node_found:%p",
 			 memdesc.gpuaddr, memdesc.size, node_found);
-		ret = -EINVAL;
-	}
-	mutex_unlock(&priv->lock);

 out:
 	return ret;
@ -3251,8 +3254,8 @@ static int hgsl_open(struct inode *inodep, struct file *filep)
 		goto out;
 	}

-	INIT_LIST_HEAD(&priv->mem_mapped);
-	INIT_LIST_HEAD(&priv->mem_allocated);
+	priv->mem_mapped = RB_ROOT;
+	priv->mem_allocated = RB_ROOT;
 	mutex_init(&priv->lock);
 	priv->pid = pid_nr;

@ -3279,13 +3282,11 @@ out:
 static int hgsl_cleanup(struct hgsl_priv *priv)
 {
 	struct hgsl_mem_node *node_found = NULL;
-	struct hgsl_mem_node *tmp = NULL;
+	struct rb_node *next = NULL;
 	int ret;
-	bool need_notify = (!list_empty(&priv->mem_mapped) ||
-				!list_empty(&priv->mem_allocated));
 	struct hgsl_hab_channel_t *hab_channel = NULL;

-	if (need_notify) {
+	if (!hgsl_mem_rb_empty(priv)) {
 		ret = hgsl_hyp_channel_pool_get(&priv->hyp_priv, 0, &hab_channel);
 		if (ret)
 			LOGE("Failed to get channel %d", ret);
@ -3298,14 +3299,15 @@ static int hgsl_cleanup(struct hgsl_priv *priv)
 	}

 	mutex_lock(&priv->lock);
-	if ((hab_channel == NULL) &&
-			(!list_empty(&priv->mem_mapped) || !list_empty(&priv->mem_allocated))) {
+	if (!hab_channel && !hgsl_mem_rb_empty(priv)) {
 		ret = hgsl_hyp_channel_pool_get(&priv->hyp_priv, 0, &hab_channel);
 		if (ret)
 			LOGE("Failed to get channel %d", ret);
 	}

-	list_for_each_entry_safe(node_found, tmp, &priv->mem_mapped, node) {
+	next = rb_first(&priv->mem_mapped);
+	while (next) {
+		node_found = rb_entry(next, struct hgsl_mem_node, mem_rb_node);
 		hgsl_put_sgt(node_found, false);
 		ret = hgsl_hyp_mem_unmap_smmu(hab_channel, node_found);
 		if (ret)
@ -3313,16 +3315,23 @@ static int hgsl_cleanup(struct hgsl_priv *priv)
 					node_found->export_id, node_found->memdesc.gpuaddr, ret);
 		else
 			hgsl_trace_gpu_mem_total(priv, -(node_found->memdesc.size64));
-		list_del(&node_found->node);
+
+		next = rb_next(&node_found->mem_rb_node);
+		rb_erase(&node_found->mem_rb_node, &priv->mem_mapped);
 		hgsl_free(node_found);
 	}
-	list_for_each_entry_safe(node_found, tmp, &priv->mem_allocated, node) {
+
+	next = rb_first(&priv->mem_allocated);
+	while (next) {
+		node_found = rb_entry(next, struct hgsl_mem_node, mem_rb_node);
 		ret = hgsl_hyp_mem_unmap_smmu(hab_channel, node_found);
 		if (ret)
 			LOGE("Failed to clean mapped buffer %u, 0x%llx, ret %d",
 					node_found->export_id, node_found->memdesc.gpuaddr, ret);
-		list_del(&node_found->node);
 		hgsl_trace_gpu_mem_total(priv, -(node_found->memdesc.size64));
+
+		next = rb_next(&node_found->mem_rb_node);
+		rb_erase(&node_found->mem_rb_node, &priv->mem_allocated);
 		hgsl_sharedmem_free(node_found);
 	}
 	mutex_unlock(&priv->lock);
--- a/drivers/soc/qcom/hgsl/hgsl.h
+++ b/drivers/soc/qcom/hgsl/hgsl.h
@ -192,8 +192,8 @@ struct hgsl_priv {
 	struct list_head node;
 	struct hgsl_hyp_priv_t hyp_priv;
 	struct mutex lock;
-	struct list_head mem_mapped;
-	struct list_head mem_allocated;
+	struct rb_root mem_mapped;
+	struct rb_root mem_allocated;
 	int open_count;

 	atomic64_t total_mem_size;
@ -230,6 +230,12 @@ static inline bool hgsl_ts_ge(uint64_t a, uint64_t b, bool is64)
 		return hgsl_ts32_ge((uint32_t)a, (uint32_t)b);
 }

+static inline bool hgsl_mem_rb_empty(struct hgsl_priv *priv)
+{
+	return (RB_EMPTY_ROOT(&priv->mem_mapped) &&
+		RB_EMPTY_ROOT(&priv->mem_allocated));
+}
+
 /**
 * struct hgsl_hsync_timeline - A sync timeline attached under each hgsl context
 * @kref: Refcount to keep the struct alive
--- a/drivers/soc/qcom/hgsl/hgsl_debugfs.c
+++ b/drivers/soc/qcom/hgsl/hgsl_debugfs.c
@ -14,12 +14,14 @@ static int hgsl_client_mem_show(struct seq_file *s, void *unused)
 {
 	struct hgsl_priv *priv = s->private;
 	struct hgsl_mem_node *tmp = NULL;
+	struct rb_node *rb = NULL;

 	seq_printf(s, "%16s %16s %10s %10s\n",
 			"gpuaddr", "size", "flags", "type");

 	mutex_lock(&priv->lock);
-	list_for_each_entry(tmp, &priv->mem_allocated, node) {
+	for (rb = rb_first(&priv->mem_allocated); rb; rb = rb_next(rb)) {
+		tmp = rb_entry(rb, struct hgsl_mem_node, mem_rb_node);
 		seq_printf(s, "%p %16llx %10x %10d\n",
 				tmp->memdesc.gpuaddr,
 				tmp->memdesc.size,
@ -37,6 +39,7 @@ static int hgsl_client_memtype_show(struct seq_file *s, void *unused)
 {
 	struct hgsl_priv *priv = s->private;
 	struct hgsl_mem_node *tmp = NULL;
+	struct rb_node *rb = NULL;
 	int i;
 	int memtype;

@ -71,7 +74,8 @@ static int hgsl_client_memtype_show(struct seq_file *s, void *unused)
 		gpu_mem_types[i].size = 0;

 	mutex_lock(&priv->lock);
-	list_for_each_entry(tmp, &priv->mem_allocated, node) {
+	for (rb = rb_first(&priv->mem_allocated); rb; rb = rb_next(rb)) {
+		tmp = rb_entry(rb, struct hgsl_mem_node, mem_rb_node);
 		memtype = GET_MEMTYPE(tmp->flags);
 		if (memtype < ARRAY_SIZE(gpu_mem_types))
 			gpu_mem_types[memtype].size += tmp->memdesc.size;
--- a/drivers/soc/qcom/hgsl/hgsl_memory.c
+++ b/drivers/soc/qcom/hgsl/hgsl_memory.c
@ -612,24 +612,6 @@ void hgsl_sharedmem_free(struct hgsl_mem_node *mem_node)

 }

-struct hgsl_mem_node *hgsl_mem_find_base_locked(struct list_head *head,
-	uint64_t gpuaddr, uint64_t size)
-{
-	struct hgsl_mem_node *node_found = NULL;
-	struct hgsl_mem_node *tmp = NULL;
-	uint64_t end = gpuaddr + size;
-
-	list_for_each_entry(tmp, head, node) {
-		if ((tmp->memdesc.gpuaddr <= gpuaddr)
-			&& ((tmp->memdesc.gpuaddr + tmp->memdesc.size) >= end)) {
-			node_found = tmp;
-			break;
-		}
-	}
-
-	return node_found;
-}
-
 void *hgsl_mem_node_zalloc(bool iocoherency)
 {
 	struct hgsl_mem_node *mem_node = NULL;
@ -644,4 +626,64 @@ out:
 	return mem_node;
 }

+int hgsl_mem_add_node(struct rb_root *rb_root,
+		struct hgsl_mem_node *mem_node)
+{
+	struct rb_node **cur;
+	struct rb_node *parent = NULL;
+	struct hgsl_mem_node *node = NULL;
+	int ret = 0;
+
+	cur = &rb_root->rb_node;
+	while (*cur) {
+		parent = *cur;
+		node = rb_entry(parent, struct hgsl_mem_node, mem_rb_node);
+		if (mem_node->memdesc.gpuaddr > node->memdesc.gpuaddr)
+			cur = &parent->rb_right;
+		else if (mem_node->memdesc.gpuaddr < node->memdesc.gpuaddr)
+			cur = &parent->rb_left;
+		else {
+			LOGE("Duplicate gpuaddr: 0x%llx",
+				mem_node->memdesc.gpuaddr);
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+
+	rb_link_node(&mem_node->mem_rb_node, parent, cur);
+	rb_insert_color(&mem_node->mem_rb_node, rb_root);
+out:
+	return ret;
+}
+
+struct hgsl_mem_node *hgsl_mem_find_node_locked(
+		struct rb_root *rb_root, uint64_t gpuaddr,
+		uint64_t size, bool accurate)
+{
+	struct rb_node *cur = NULL;
+	struct hgsl_mem_node *node_found = NULL;
+
+	cur = rb_root->rb_node;
+	while (cur) {
+		node_found = rb_entry(cur, struct hgsl_mem_node, mem_rb_node);
+		if (hgsl_mem_range_inspect(
+				node_found->memdesc.gpuaddr, gpuaddr,
+				node_found->memdesc.size64, size,
+				accurate)) {
+			return node_found;
+		} else if (node_found->memdesc.gpuaddr < gpuaddr)
+			cur = cur->rb_right;
+		else if (node_found->memdesc.gpuaddr > gpuaddr)
+			cur = cur->rb_left;
+		else {
+			LOGE("Invalid addr: 0x%llx size: [0x%llx 0x%llx]",
+				gpuaddr, size, node_found->memdesc.size64);
+			goto out;
+		}
+	}
+
+out:
+	return NULL;
+}
+
 MODULE_IMPORT_NS(DMA_BUF);
--- a/drivers/soc/qcom/hgsl/hgsl_memory.h
+++ b/drivers/soc/qcom/hgsl/hgsl_memory.h
@ -10,6 +10,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/types.h>
+#include <linux/rbtree.h>
 #include "hgsl_types.h"
 #include "hgsl_utils.h"

@ -49,7 +50,7 @@ enum gsl_user_mem_type_t {
 };

 struct hgsl_mem_node {
-	struct list_head           node;
+	struct rb_node             mem_rb_node;
 	struct gsl_memdesc_t       memdesc;
 	int32_t                    fd;
 	uint32_t                   export_id;
@ -79,9 +80,21 @@ int hgsl_mem_cache_op(struct device *dev, struct hgsl_mem_node *mem_node,

 void hgsl_put_sgt(struct hgsl_mem_node *mem_node, bool internal);

-struct hgsl_mem_node *hgsl_mem_find_base_locked(struct list_head *head,
-	uint64_t gpuaddr, uint64_t size);
-
 void *hgsl_mem_node_zalloc(bool iocoherency);

+int hgsl_mem_add_node(struct rb_root *rb_root,
+		struct hgsl_mem_node *mem_node);
+struct hgsl_mem_node *hgsl_mem_find_node_locked(
+		struct rb_root *rb_root, uint64_t gpuaddr,
+		uint64_t size, bool accurate);
+
+static inline bool hgsl_mem_range_inspect(uint64_t da1, uint64_t da2,
+			uint64_t size1, uint64_t size2, bool accurate)
+{
+	if (accurate)
+		return ((da1 == da2) && (size1 == size2));
+	else
+		return ((da1 <= da2) && (da1 + size1) >= (da2 + size2));
+}
+
 #endif