All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Michel Thierry <michel.thierry@intel.com>
To: intel-gfx@lists.freedesktop.org
Cc: Ben Widawsky <ben@bwidawsk.net>, dri-devel@lists.freedesktop.org
Subject: [PATCH v4 1/2] intel: 48b ppgtt support (EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag)
Date: Thu,  3 Sep 2015 15:23:58 +0100	[thread overview]
Message-ID: <1441290239-31462-2-git-send-email-michel.thierry@intel.com> (raw)
In-Reply-To: <1441290239-31462-1-git-send-email-michel.thierry@intel.com>

Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap (GSH) or Instruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State Offset
are limited to 32-bits.

The i915 driver has been modified to provide a flag to set when the 4GB
limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
48-bit range will only be used when explicitly requested.

Callers to the existing drm_intel_bo_emit_reloc function should set the
use_48b_address_range flag beforehand, in order to use full ppgtt range.

v2: Make set/clear functions nops on pre-gen8 platforms, and use them
    internally in emit_reloc functions (Ben)
    s/48BADDRESS/48B_ADDRESS/ (Dave)
v3: Keep set/clear functions internal, no-one needs to use them directly.
v4: Don't set 48bit-support flag in emit reloc, check for ppgtt type
    before enabling set/clear function, print full offsets in debug
    statements, using port of lower_32_bits and upper_32_bits from linux
    kernel (Michał)

References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 include/drm/i915_drm.h    |  3 +-
 intel/intel_bufmgr.c      | 11 ++++++
 intel/intel_bufmgr.h      |  1 +
 intel/intel_bufmgr_gem.c  | 88 +++++++++++++++++++++++++++++++++++++----------
 intel/intel_bufmgr_priv.h | 14 ++++++++
 5 files changed, 97 insertions(+), 20 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ded43b1..426b25c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 14ea9f9..0856e60 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -293,6 +293,17 @@ drm_intel_bo_madvise(drm_intel_bo *bo, int madv)
 }
 
 int
+drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
+{
+	if (bo->bufmgr->bo_use_48b_address_range) {
+		bo->bufmgr->bo_use_48b_address_range(bo, enable);
+		return 0;
+	}
+
+	return -ENODEV;
+}
+
+int
 drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
 {
 	return bo->bufmgr->bo_references(bo, target_bo);
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 95eecb8..a14c78f 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -164,6 +164,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
 int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name);
 int drm_intel_bo_busy(drm_intel_bo *bo);
 int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
+int drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable);
 
 int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
 int drm_intel_bo_is_reusable(drm_intel_bo *bo);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 2723e21..09d82d2 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -83,6 +83,22 @@
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
 
+/**
+ * upper_32_bits - return bits 32-63 of a number
+ * @n: the number we're accessing
+ *
+ * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
+ * the "right shift count >= width of type" warning when that quantity is
+ * 32-bits.
+ */
+#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
+
+/**
+ * lower_32_bits - return bits 0-31 of a number
+ * @n: the number we're accessing
+ */
+#define lower_32_bits(n) ((__u32)(n))
+
 typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
 
 struct drm_intel_gem_bo_bucket {
@@ -237,6 +253,15 @@ struct _drm_intel_bo_gem {
 	bool is_userptr;
 
 	/**
+	 * Boolean of whether this buffer can be placed in the full 48-bit
+	 * address range on gen8+.
+	 *
+	 * By default, buffers will be keep in a 32-bit range, unless this
+	 * flag is explicitly set.
+	 */
+	bool use_48b_address_range;
+
+	/**
 	 * Size in bytes of this buffer and its relocation descendents.
 	 *
 	 * Used to avoid costly tree walking in
@@ -395,14 +420,16 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
 			drm_intel_bo_gem *target_gem =
 			    (drm_intel_bo_gem *) target_bo;
 
-			DBG("%2d: %d (%s)@0x%08llx -> "
-			    "%d (%s)@0x%08lx + 0x%08x\n",
+			DBG("%2d: %d (%s)@0x%08x %08x -> "
+			    "%d (%s)@0x%08x %08x + 0x%08x\n",
 			    i,
 			    bo_gem->gem_handle, bo_gem->name,
-			    (unsigned long long)bo_gem->relocs[j].offset,
+			    upper_32_bits(bo_gem->relocs[j].offset),
+			    lower_32_bits(bo_gem->relocs[j].offset),
 			    target_gem->gem_handle,
 			    target_gem->name,
-			    target_bo->offset64,
+			    upper_32_bits(target_bo->offset64),
+			    lower_32_bits(target_bo->offset64),
 			    bo_gem->relocs[j].delta);
 		}
 	}
@@ -468,11 +495,15 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
 	int index;
+	int flags = 0;
+
+	if (need_fence)
+		flags |= EXEC_OBJECT_NEEDS_FENCE;
+	if (bo_gem->use_48b_address_range)
+		flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 
 	if (bo_gem->validate_index != -1) {
-		if (need_fence)
-			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
-				EXEC_OBJECT_NEEDS_FENCE;
+		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
 		return;
 	}
 
@@ -501,13 +532,9 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
 	bufmgr_gem->exec2_objects[index].alignment = bo->align;
 	bufmgr_gem->exec2_objects[index].offset = 0;
 	bufmgr_gem->exec_bos[index] = bo;
-	bufmgr_gem->exec2_objects[index].flags = 0;
+	bufmgr_gem->exec2_objects[index].flags = flags;
 	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
 	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
-	if (need_fence) {
-		bufmgr_gem->exec2_objects[index].flags |=
-			EXEC_OBJECT_NEEDS_FENCE;
-	}
 	bufmgr_gem->exec_count++;
 }
 
@@ -780,6 +807,7 @@ retry:
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
 	bo_gem->reusable = true;
+	bo_gem->use_48b_address_range = false;
 
 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
 
@@ -926,6 +954,7 @@ drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
 	bo_gem->reusable = false;
+	bo_gem->use_48b_address_range = false;
 
 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
 
@@ -1081,6 +1110,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
 	bo_gem->bo.handle = open_arg.handle;
 	bo_gem->global_name = handle;
 	bo_gem->reusable = false;
+	bo_gem->use_48b_address_range = false;
 
 	memclear(get_tiling);
 	get_tiling.handle = bo_gem->gem_handle;
@@ -1930,6 +1960,13 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 	return 0;
 }
 
+static void
+drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
+{
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	bo_gem->use_48b_address_range = enable;
+}
+
 static int
 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			    drm_intel_bo *target_bo, uint32_t target_offset,
@@ -2073,10 +2110,12 @@ drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
 
 		/* Update the buffer offset */
 		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
-			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
-			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
-			    (unsigned long long)bufmgr_gem->exec_objects[i].
-			    offset);
+			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
+			    bo_gem->gem_handle, bo_gem->name,
+			    upper_32_bits(bo->offset64),
+			    lower_32_bits(bo->offset64),
+			    upper_32_bits(bufmgr_gem->exec_objects[i].offset),
+			    lower_32_bits(bufmgr_gem->exec_objects[i].offset));
 			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
 			bo->offset = bufmgr_gem->exec_objects[i].offset;
 		}
@@ -2094,9 +2133,12 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
 
 		/* Update the buffer offset */
 		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
-			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
-			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
-			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
+			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
+			    bo_gem->gem_handle, bo_gem->name,
+			    upper_32_bits(bo->offset64),
+			    lower_32_bits(bo->offset64),
+			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
+			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
 			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
 			bo->offset = bufmgr_gem->exec2_objects[i].offset;
 		}
@@ -2481,6 +2523,7 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
 	bo_gem->reusable = false;
+	bo_gem->use_48b_address_range = false;
 
 	DRMINITLISTHEAD(&bo_gem->vma_list);
 	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
@@ -3278,6 +3321,13 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 		}
 	}
 
+	if (bufmgr_gem->gen >= 8) {
+		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
+		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+		if (ret == 0 && *gp.value == 3)
+			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
+	}
+
 	/* Let's go with one relocation per every 2 dwords (but round down a bit
 	 * since a power of two will mean an extra page allocation for the reloc
 	 * buffer).
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
index 59ebd18..5c17ffb 100644
--- a/intel/intel_bufmgr_priv.h
+++ b/intel/intel_bufmgr_priv.h
@@ -152,6 +152,20 @@ struct _drm_intel_bufmgr {
 	void (*destroy) (drm_intel_bufmgr *bufmgr);
 
 	/**
+	 * Indicate if the buffer can be placed anywhere in the full ppgtt
+	 * address range (2^48).
+	 *
+	 * Any resource used with flat/heapless (0x00000000-0xfffff000)
+	 * General State Heap (GSH) or Intructions State Heap (ISH) must
+	 * be in a 32-bit range. 48-bit range will only be used when explicitly
+	 * requested.
+	 *
+	 * \param bo Buffer to set the use_48b_address_range flag.
+	 * \param enable The flag value.
+	 */
+	void (*bo_use_48b_address_range) (drm_intel_bo *bo, uint32_t enable);
+
+	/**
 	 * Add relocation entry in reloc_buf, which will be updated with the
 	 * target buffer's real offset on on command submission.
 	 *
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2015-09-03 14:23 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-03 14:23 [PATCH libdrm v4 0/2] 48-bit virtual address support in i915 Michel Thierry
2015-09-03 14:23 ` Michel Thierry [this message]
2015-09-14 13:54   ` [PATCH v4 1/2] intel: 48b ppgtt support (EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag) Michał Winiarski
2015-10-05 14:03     ` Michel Thierry
2015-10-05 18:06       ` Kristian Høgsberg
2015-10-06 13:12         ` Michel Thierry
2015-10-13 12:16           ` Michel Thierry
2015-10-13 14:13             ` Emil Velikov
2015-10-13 14:55               ` Michel Thierry
2015-10-13 21:51                 ` Kristian Høgsberg
2015-10-14  7:19                   ` [Mesa-dev] " Daniel Vetter
2015-10-14 12:11                     ` Michel Thierry
2015-11-18 22:53                       ` Kristian Høgsberg
2015-12-04 14:24                         ` Michel Thierry
2015-12-10 19:40                           ` Kristian Høgsberg
2015-09-03 14:23 ` [PATCH v4 2/2] intel: add drm_intel_bo_use_48b_address_range to symbol-check test Michel Thierry
2015-10-02 17:35 ` [PATCH libdrm v4 0/2] 48-bit virtual address support in i915 Emil Velikov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1441290239-31462-2-git-send-email-michel.thierry@intel.com \
    --to=michel.thierry@intel.com \
    --cc=ben@bwidawsk.net \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.