Snap for 8695492 from 7940fcf597 to android-mainline-keystone-qcom-release

Change-Id: I8d2524317a06982cb50a2c0bd842e62d0157a166
2022-06-08 10:00:47 +00:00 · 2022-06-08 10:00:47 +00:00 · 019bc1f65e
commit 019bc1f65e
parent 05713a3e9d 7940fcf597
1226 changed files with 42670 additions and 18197 deletions
--- a/.mailmap
+++ b/.mailmap
@ -398,6 +398,7 @@ Vasily Averin <vasily.averin@linux.dev> <vvs@virtuozzo.com>
 Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
 Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
 Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
+Valentin Schneider <vschneid@redhat.com> <valentin.schneider@arm.com>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
 Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
--- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io
+++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
@ -467,3 +467,39 @@ Description:	These files provide the maximum powered required for line card
 		feeding and line card configuration Id.

 		The files are read only.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/phy_reset
+Date:		May 2022
+KernelVersion:	5.19
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file allows to reset PHY 88E1548 when attribute is set 0
+		due to some abnormal PHY behavior.
+		Expected behavior:
+		When phy_reset is written 1, all PHY 88E1548 are released
+		from the reset state, when 0 - are hold in reset state.
+
+		The files are read/write.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/mac_reset
+Date:		May 2022
+KernelVersion:	5.19
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file allows to reset ASIC MT52132 when attribute is set 0
+		due to some abnormal ASIC behavior.
+		Expected behavior:
+		When mac_reset is written 1, the ASIC MT52132 is released
+		from the reset state, when 0 - is hold in reset state.
+
+		The files are read/write.
+
+What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/qsfp_pwr_good
+Date:		May 2022
+KernelVersion:	5.19
+Contact:	Vadim Pasternak <vadimpmellanox.com>
+Description:	This file shows QSFP ports power status. The value is set to 0
+		when one of any QSFP ports is plugged. The value is set to 1 when
+		there are no any QSFP ports are plugged.
+		The possible values are:
+		0 - Power good, 1 - Not power good.
+
+		The files are read only.
--- a/Documentation/ABI/testing/securityfs-secrets-coco
+++ b/Documentation/ABI/testing/securityfs-secrets-coco
@ -0,0 +1,51 @@
+What:		security/secrets/coco
+Date:		February 2022
+Contact:	Dov Murik <dovmurik@linux.ibm.com>
+Description:
+		Exposes confidential computing (coco) EFI secrets to
+		userspace via securityfs.
+
+		EFI can declare memory area used by confidential computing
+		platforms (such as AMD SEV and SEV-ES) for secret injection by
+		the Guest Owner during VM's launch.  The secrets are encrypted
+		by the Guest Owner and decrypted inside the trusted enclave,
+		and therefore are not readable by the untrusted host.
+
+		The efi_secret module exposes the secrets to userspace.  Each
+		secret appears as a file under <securityfs>/secrets/coco,
+		where the filename is the GUID of the entry in the secrets
+		table.  This module is loaded automatically by the EFI driver
+		if the EFI secret area is populated.
+
+		Two operations are supported for the files: read and unlink.
+		Reading the file returns the content of secret entry.
+		Unlinking the file overwrites the secret data with zeroes and
+		removes the entry from the filesystem.  A secret cannot be read
+		after it has been unlinked.
+
+		For example, listing the available secrets::
+
+		  # modprobe efi_secret
+		  # ls -l /sys/kernel/security/secrets/coco
+		  -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+		  -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+		  -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+		  -r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+		Reading the secret data by reading a file::
+
+		  # cat /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+		  the-content-of-the-secret-data
+
+		Wiping a secret by unlinking a file::
+
+		  # rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+		  # ls -l /sys/kernel/security/secrets/coco
+		  -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+		  -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+		  -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+		Note: The binary format of the secrets table injected by the
+		Guest Owner is described in
+		drivers/virt/coco/efi_secret/efi_secret.c under "Structure of
+		the EFI secret area".
--- a/Documentation/ABI/testing/sysfs-driver-xen-blkback
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback
@ -29,7 +29,7 @@ Description:
 What:           /sys/module/xen_blkback/parameters/buffer_squeeze_duration_ms
 Date:           December 2019
 KernelVersion:  5.6
-Contact:        SeongJae Park <sj@kernel.org>
+Contact:        Maximilian Heyne <mheyne@amazon.de>
 Description:
                When memory pressure is reported to blkback this option
                controls the duration in milliseconds that blkback will not
@ -39,7 +39,7 @@ Description:
 What:           /sys/module/xen_blkback/parameters/feature_persistent
 Date:           September 2020
 KernelVersion:  5.10
-Contact:        SeongJae Park <sj@kernel.org>
+Contact:        Maximilian Heyne <mheyne@amazon.de>
 Description:
                Whether to enable the persistent grants feature or not.  Note
                that this option only takes effect on newly created backends.
--- a/Documentation/ABI/testing/sysfs-driver-xen-blkfront
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
@ -12,7 +12,7 @@ Description:
 What:           /sys/module/xen_blkfront/parameters/feature_persistent
 Date:           September 2020
 KernelVersion:  5.10
-Contact:        SeongJae Park <sj@kernel.org>
+Contact:        Maximilian Heyne <mheyne@amazon.de>
 Description:
                Whether to enable the persistent grants feature or not.  Note
                that this option only takes effect on newly created frontends.
--- a/Documentation/ABI/testing/sysfs-platform-intel-ifs
+++ b/Documentation/ABI/testing/sysfs-platform-intel-ifs
@ -0,0 +1,39 @@
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/run_test
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	Write <cpu#> to trigger IFS test for one online core.
+		Note that the test is per core. The cpu# can be
+		for any thread on the core. Running on one thread
+		completes the test for the core containing that thread.
+		Example: to test the core containing cpu5: echo 5 >
+		/sys/devices/platform/intel_ifs.<N>/run_test
+
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/status
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	The status of the last test. It can be one of "pass", "fail"
+		or "untested".
+
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/details
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	Additional information regarding the last test. The details file reports
+		the hex value of the SCAN_STATUS MSR. Note that the error_code field
+		may contain driver defined software code not defined in the Intel SDM.
+
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/image_version
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	Version (hexadecimal) of loaded IFS binary image. If no scan image
+		is loaded reports "none".
+
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/reload
+Date:		April 21 2022
+KernelVersion:	5.19
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:	Write "1" (or "y" or "Y") to reload the IFS image from
+		/lib/firmware/intel/ifs/ff-mm-ss.scan.
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
@ -973,7 +973,7 @@ The ``->dynticks`` field counts the corresponding CPU's transitions to
 and from either dyntick-idle or user mode, so that this counter has an
 even value when the CPU is in dyntick-idle mode or user mode and an odd
 value otherwise. The transitions to/from user mode need to be counted
-for user mode adaptive-ticks support (see timers/NO_HZ.txt).
+for user mode adaptive-ticks support (see Documentation/timers/no_hz.rst).

 The ``->rcu_need_heavy_qs`` field is used to record the fact that the
 RCU core code would really like to see a quiescent state from the
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
@ -406,7 +406,7 @@ In earlier implementations, the task requesting the expedited grace
 period also drove it to completion. This straightforward approach had
 the disadvantage of needing to account for POSIX signals sent to user
 tasks, so more recent implemementations use the Linux kernel's
-`workqueues <https://www.kernel.org/doc/Documentation/core-api/workqueue.rst>`__.
+workqueues (see Documentation/core-api/workqueue.rst).

 The requesting task still does counter snapshotting and funnel-lock
 processing, but the task reaching the top of the funnel lock does a
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@ -370,8 +370,8 @@ pointer fetched by rcu_dereference() may not be used outside of the
 outermost RCU read-side critical section containing that
 rcu_dereference(), unless protection of the corresponding data
 element has been passed from RCU to some other synchronization
-mechanism, most commonly locking or `reference
-counting <https://www.kernel.org/doc/Documentation/RCU/rcuref.txt>`__.
+mechanism, most commonly locking or reference counting
+(see ../../rcuref.rst).

 .. |high-quality implementation of C11 memory_order_consume [PDF]| replace:: high-quality implementation of C11 ``memory_order_consume`` [PDF]
 .. _high-quality implementation of C11 memory_order_consume [PDF]: http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf
@ -2654,6 +2654,38 @@ synchronize_rcu(), and rcu_barrier(), respectively. In
 three APIs are therefore implemented by separate functions that check
 for voluntary context switches.

+Tasks Rude RCU
+~~~~~~~~~~~~~~
+
+Some forms of tracing need to wait for all preemption-disabled regions
+of code running on any online CPU, including those executed when RCU is
+not watching.  This means that synchronize_rcu() is insufficient, and
+Tasks Rude RCU must be used instead.  This flavor of RCU does its work by
+forcing a workqueue to be scheduled on each online CPU, hence the "Rude"
+moniker.  And this operation is considered to be quite rude by real-time
+workloads that don't want their ``nohz_full`` CPUs receiving IPIs and
+by battery-powered systems that don't want their idle CPUs to be awakened.
+
+The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
+consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
+and rcu_barrier_tasks_rude().
+
+Tasks Trace RCU
+~~~~~~~~~~~~~~~
+
+Some forms of tracing need to sleep in readers, but cannot tolerate
+SRCU's read-side overhead, which includes a full memory barrier in both
+srcu_read_lock() and srcu_read_unlock().  This need is handled by a
+Tasks Trace RCU that uses scheduler locking and IPIs to synchronize with
+readers.  Real-time systems that cannot tolerate IPIs may build their
+kernels with ``CONFIG_TASKS_TRACE_RCU_READ_MB=y``, which avoids the IPIs at
+the expense of adding full memory barriers to the read-side primitives.
+
+The tasks-trace-RCU API is also reasonably compact,
+consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(),
+rcu_read_lock_trace_held(), call_rcu_tasks_trace(),
+synchronize_rcu_tasks_trace(), and rcu_barrier_tasks_trace().
+
 Possible Future Changes
 -----------------------

--- a/Documentation/RCU/arrayRCU.rst
+++ b/Documentation/RCU/arrayRCU.rst
@ -33,8 +33,8 @@ Situation 1: Hash Tables

 Hash tables are often implemented as an array, where each array entry
 has a linked-list hash chain.  Each hash chain can be protected by RCU
-as described in the listRCU.txt document.  This approach also applies
-to other array-of-list situations, such as radix trees.
+as described in listRCU.rst.  This approach also applies to other
+array-of-list situations, such as radix trees.

 .. _static_arrays:

--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@ -140,8 +140,7 @@ over a rather long period of time, but improvements are always welcome!
 		prevents destructive compiler optimizations.  However,
 		with a bit of devious creativity, it is possible to
 		mishandle the return value from rcu_dereference().
-		Please see rcu_dereference.txt in this directory for
-		more information.
+		Please see rcu_dereference.rst for more information.

 		The rcu_dereference() primitive is used by the
 		various "_rcu()" list-traversal primitives, such
@ -151,7 +150,7 @@ over a rather long period of time, but improvements are always welcome!
 		primitives.  This is particularly useful in code that
 		is common to readers and updaters.  However, lockdep
 		will complain if you access rcu_dereference() outside
-		of an RCU read-side critical section.  See lockdep.txt
+		of an RCU read-side critical section.  See lockdep.rst
 		to learn what to do about this.

 		Of course, neither rcu_dereference() nor the "_rcu()"
@ -323,7 +322,7 @@ over a rather long period of time, but improvements are always welcome!
 	primitives when the update-side lock is held is that doing so
 	can be quite helpful in reducing code bloat when common code is
 	shared between readers and updaters.  Additional primitives
-	are provided for this case, as discussed in lockdep.txt.
+	are provided for this case, as discussed in lockdep.rst.

 	One exception to this rule is when data is only ever added to
 	the linked data structure, and is never removed during any
@ -480,4 +479,4 @@ over a rather long period of time, but improvements are always welcome!
 	both rcu_barrier() and synchronize_rcu(), if necessary, using
 	something like workqueues to to execute them concurrently.

-	See rcubarrier.txt for more information.
+	See rcubarrier.rst for more information.
--- a/Documentation/RCU/rcu.rst
+++ b/Documentation/RCU/rcu.rst
@ -10,9 +10,8 @@ A "grace period" must elapse between the two parts, and this grace period
 must be long enough that any readers accessing the item being deleted have
 since dropped their references.  For example, an RCU-protected deletion
 from a linked list would first remove the item from the list, wait for
-a grace period to elapse, then free the element.  See the
-:ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` for more information on
-using RCU with linked lists.
+a grace period to elapse, then free the element.  See listRCU.rst for more
+information on using RCU with linked lists.

 Frequently Asked Questions
 --------------------------
@ -50,7 +49,7 @@ Frequently Asked Questions
 - If I am running on a uniprocessor kernel, which can only do one
  thing at a time, why should I wait for a grace period?

-  See :ref:`Documentation/RCU/UP.rst <up_doc>` for more information.
+  See UP.rst for more information.

 - How can I see where RCU is currently used in the Linux kernel?

@ -64,13 +63,13 @@ Frequently Asked Questions

 - What guidelines should I follow when writing code that uses RCU?

-  See the checklist.txt file in this directory.
+  See checklist.rst.

 - Why the name "RCU"?

  "RCU" stands for "read-copy update".
-  :ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` has more information on where
-  this name came from, search for "read-copy update" to find it.
+  listRCU.rst has more information on where this name came from, search
+  for "read-copy update" to find it.

 - I hear that RCU is patented?  What is with that?

--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@ -8,7 +8,7 @@ This section describes how to use hlist_nulls to
 protect read-mostly linked lists and
 objects using SLAB_TYPESAFE_BY_RCU allocations.

-Please read the basics in Documentation/RCU/listRCU.rst
+Please read the basics in listRCU.rst.

 Using 'nulls'
 =============
--- a/Documentation/RCU/stallwarn.rst
+++ b/Documentation/RCU/stallwarn.rst
@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
 	Stall-warning messages may be enabled and disabled completely via
 	/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.

+CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
+--------------------------------
+
+	Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
+	the expedited grace period. This parameter defines the period
+	of time that RCU will wait from the beginning of an expedited
+	grace period until it issues an RCU CPU stall warning. This time
+	period is normally 20 milliseconds on Android devices.	A zero
+	value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
+	after conversion to milliseconds.
+
+	This configuration parameter may be changed at runtime via the
+	/sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
+	this parameter is checked only at the beginning of a cycle. If you
+	are in a current stall cycle, setting it to a new value will change
+	the timeout for the -next- stall.
+
+	Stall-warning messages may be enabled and disabled completely via
+	/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
 RCU_STALL_DELAY_DELTA
 ---------------------

--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@ -224,7 +224,7 @@ synchronize_rcu()
 	be delayed.  This property results in system resilience in face
 	of denial-of-service attacks.  Code using call_rcu() should limit
 	update rate in order to gain this same sort of resilience.  See
-	checklist.txt for some approaches to limiting the update rate.
+	checklist.rst for some approaches to limiting the update rate.

 rcu_assign_pointer()
 ^^^^^^^^^^^^^^^^^^^^
@ -318,7 +318,7 @@ rcu_dereference()
 	must prohibit.	The rcu_dereference_protected() variant takes
 	a lockdep expression to indicate which locks must be acquired
 	by the caller. If the indicated protection is not provided,
-	a lockdep splat is emitted.  See Documentation/RCU/Design/Requirements/Requirements.rst
+	a lockdep splat is emitted.  See Design/Requirements/Requirements.rst
 	and the API's code comments for more details and example usage.

 .. 	[2] If the list_for_each_entry_rcu() instance might be used by
@ -399,8 +399,7 @@ for specialized uses, but are relatively uncommon.

 This section shows a simple use of the core RCU API to protect a
 global pointer to a dynamically allocated structure.  More-typical
-uses of RCU may be found in :ref:`listRCU.rst <list_rcu_doc>`,
-:ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst <NMI_rcu_doc>`.
+uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst.
 ::

 	struct foo {
@ -482,10 +481,9 @@ So, to sum up:
 	RCU read-side critical sections that might be referencing that
 	data item.

-See checklist.txt for additional rules to follow when using RCU.
-And again, more-typical uses of RCU may be found in :ref:`listRCU.rst
-<list_rcu_doc>`, :ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst
-<NMI_rcu_doc>`.
+See checklist.rst for additional rules to follow when using RCU.
+And again, more-typical uses of RCU may be found in listRCU.rst,
+arrayRCU.rst, and NMI-RCU.rst.

 .. _4_whatisRCU:

@ -579,7 +577,7 @@ to avoid having to write your own callback::

 	kfree_rcu(old_fp, rcu);

-Again, see checklist.txt for additional rules governing the use of RCU.
+Again, see checklist.rst for additional rules governing the use of RCU.

 .. _5_whatisRCU:

@ -663,7 +661,7 @@ been able to write-acquire the lock otherwise.  The smp_mb__after_spinlock()
 promotes synchronize_rcu() to a full memory barrier in compliance with
 the "Memory-Barrier Guarantees" listed in:

-	Documentation/RCU/Design/Requirements/Requirements.rst
+	Design/Requirements/Requirements.rst

 It is possible to nest rcu_read_lock(), since reader-writer locks may
 be recursively acquired.  Note also that rcu_read_lock() is immune
--- a/Documentation/accounting/psi.rst
+++ b/Documentation/accounting/psi.rst
@ -37,11 +37,7 @@ Pressure interface
 Pressure information for each resource is exported through the
 respective file in /proc/pressure/ -- cpu, memory, and io.

-The format for CPU is as such::
-
-	some avg10=0.00 avg60=0.00 avg300=0.00 total=0
-
-and for memory and IO::
+The format is as such::

 	some avg10=0.00 avg60=0.00 avg300=0.00 total=0
 	full avg10=0.00 avg60=0.00 avg300=0.00 total=0
@ -58,6 +54,9 @@ situation from a state where some tasks are stalled but the CPU is
 still doing productive work. As such, time spent in this subset of the
 stall state is tracked separately and exported in the "full" averages.

+CPU full is undefined at the system level, but has been reported
+since 5.13, so it is set to zero for backward compatibility.
+
 The ratios (in %) are tracked as recent trends over ten, sixty, and
 three hundred second windows, which gives insight into short term events
 as well as medium and long term trends. The total absolute stall time
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@ -631,12 +631,17 @@
 			Defaults to zero when built as a module and to
 			10 seconds when built into the kernel.

-	clearcpuid=BITNUM[,BITNUM...] [X86]
+	clearcpuid=X[,X...] [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
-			numbers. Note the Linux specific bits are not necessarily
-			stable over kernel options, but the vendor specific
+			numbers X. Note the Linux-specific bits are not necessarily
+			stable over kernel options, but the vendor-specific
 			ones should be.
+			X can also be a string as appearing in the flags: line
+			in /proc/cpuinfo which does not have the above
+			instability issue. However, not all features have names
+			in /proc/cpuinfo.
+			Note that using this option will taint your kernel.
 			Also note that user programs calling CPUID directly
 			or using the feature without checking anything
 			will still see it. This just prevents it from
@ -808,7 +813,7 @@
 			Documentation/admin-guide/kdump/kdump.rst for an example.

 	crashkernel=size[KMG],high
-			[KNL, X86-64] range could be above 4G. Allow kernel
+			[KNL, X86-64, ARM64] range could be above 4G. Allow kernel
 			to allocate physical memory region from top, so could
 			be above 4G if system have more than 4G ram installed.
 			Otherwise memory region will be allocated below 4G, if
@ -821,14 +826,20 @@
 			that require some amount of low memory, e.g. swiotlb
 			requires at least 64M+32K low memory, also enough extra
 			low memory is needed to make sure DMA buffers for 32-bit
-			devices won't run out. Kernel would try to allocate at
+			devices won't run out. Kernel would try to allocate
 			at least 256M below 4G automatically.
-			This one let user to specify own low range under 4G
+			This one lets the user specify own low range under 4G
 			for second kernel instead.
 			0: to disable low allocation.
 			It will be ignored when crashkernel=X,high is not used
 			or memory reserved is below 4G.

+			[KNL, ARM64] range in low memory.
+			This one lets the user specify a low range in the
+			DMA zone for the crash dump kernel.
+			It will be ignored when crashkernel=X,high is not used
+			or memory reserved is located in the DMA zones.
+
 	cryptomgr.notests
 			[KNL] Disable crypto self-tests

@ -2630,14 +2641,14 @@
 			when set.
 			Format: <int>

-	libata.force=	[LIBATA] Force configurations.  The format is comma-
-			separated list of "[ID:]VAL" where ID is
-			PORT[.DEVICE].  PORT and DEVICE are decimal numbers
-			matching port, link or device.  Basically, it matches
-			the ATA ID string printed on console by libata.  If
-			the whole ID part is omitted, the last PORT and DEVICE
-			values are used.  If ID hasn't been specified yet, the
-			configuration applies to all ports, links and devices.
+	libata.force=	[LIBATA] Force configurations.  The format is a comma-
+			separated list of "[ID:]VAL" where ID is PORT[.DEVICE].
+			PORT and DEVICE are decimal numbers matching port, link
+			or device.  Basically, it matches the ATA ID string
+			printed on console by libata.  If the whole ID part is
+			omitted, the last PORT and DEVICE values are used.  If
+			ID hasn't been specified yet, the configuration applies
+			to all ports, links and devices.

 			If only DEVICE is omitted, the parameter applies to
 			the port and all links and devices behind it.  DEVICE
@ -2647,7 +2658,7 @@
 			host link and device attached to it.

 			The VAL specifies the configuration to force.  As long
-			as there's no ambiguity shortcut notation is allowed.
+			as there is no ambiguity, shortcut notation is allowed.
 			For example, both 1.5 and 1.5G would work for 1.5Gbps.
 			The following configurations can be forced.

@ -2660,19 +2671,58 @@
 			  udma[/][16,25,33,44,66,100,133] notation is also
 			  allowed.

+			* nohrst, nosrst, norst: suppress hard, soft and both
+			  resets.
+
+			* rstonce: only attempt one reset during hot-unplug
+			  link recovery.
+
+			* [no]dbdelay: Enable or disable the extra 200ms delay
+			  before debouncing a link PHY and device presence
+			  detection.
+
 			* [no]ncq: Turn on or off NCQ.

-			* [no]ncqtrim: Turn off queued DSM TRIM.
+			* [no]ncqtrim: Enable or disable queued DSM TRIM.

-			* nohrst, nosrst, norst: suppress hard, soft
-			  and both resets.
+			* [no]ncqati: Enable or disable NCQ trim on ATI chipset.

-			* rstonce: only attempt one reset during
-			  hot-unplug link recovery
+			* [no]trim: Enable or disable (unqueued) TRIM.

-			* dump_id: dump IDENTIFY data.
+			* trim_zero: Indicate that TRIM command zeroes data.

-			* atapi_dmadir: Enable ATAPI DMADIR bridge support
+			* max_trim_128m: Set 128M maximum trim size limit.
+
+			* [no]dma: Turn on or off DMA transfers.
+
+			* atapi_dmadir: Enable ATAPI DMADIR bridge support.
+
+			* atapi_mod16_dma: Enable the use of ATAPI DMA for
+			  commands that are not a multiple of 16 bytes.
+
+			* [no]dmalog: Enable or disable the use of the
+			  READ LOG DMA EXT command to access logs.
+
+			* [no]iddevlog: Enable or disable access to the
+			  identify device data log.
+
+			* [no]logdir: Enable or disable access to the general
+			  purpose log directory.
+
+			* max_sec_128: Set transfer size limit to 128 sectors.
+
+			* max_sec_1024: Set or clear transfer size limit to
+			  1024 sectors.
+
+			* max_sec_lba48: Set or clear transfer size limit to
+			  65535 sectors.
+
+			* [no]lpm: Enable or disable link power management.
+
+			* [no]setxfer: Indicate if transfer speed mode setting
+			  should be skipped.
+
+			* dump_id: Dump IDENTIFY data.

 			* disable: Disable this device.

@ -3111,6 +3161,7 @@
 					       mds=off [X86]
 					       tsx_async_abort=off [X86]
 					       kvm.nx_huge_pages=off [X86]
+					       srbds=off [X86,INTEL]
 					       no_entry_flush [PPC]
 					       no_uaccess_flush [PPC]

@ -3447,8 +3498,6 @@

 	nocache		[ARM]

-	noclflush	[BUGS=X86] Don't use the CLFLUSH instruction
-
 	delayacct	[KNL] Enable per-task delay accounting

 	nodsp		[SH] Disable hardware DSP at boot time.
@ -3459,16 +3508,11 @@

 	noexec		[IA-64]

-	noexec		[X86]
-			On X86-32 available only on PAE configured kernels.
-			noexec=on: enable non-executable mappings (default)
-			noexec=off: disable non-executable mappings
-
-	nosmap		[X86,PPC]
+	nosmap		[PPC]
 			Disable SMAP (Supervisor Mode Access Prevention)
 			even if it is supported by processor.

-	nosmep		[X86,PPC64s]
+	nosmep		[PPC64s]
 			Disable SMEP (Supervisor Mode Execution Prevention)
 			even if it is supported by processor.

@ -3668,8 +3712,6 @@

 	nosbagart	[IA-64]

-	nosep		[BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
-
 	nosgx		[X86-64,SGX] Disables Intel SGX kernel support.

 	nosmp		[SMP] Tells an SMP kernel to act as a UP kernel,
@ -4901,6 +4943,18 @@

 	rcupdate.rcu_cpu_stall_timeout= [KNL]
 			Set timeout for RCU CPU stall warning messages.
+			The value is in seconds and the maximum allowed
+			value is 300 seconds.
+
+	rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
+			Set timeout for expedited RCU CPU stall warning
+			messages.  The value is in milliseconds
+			and the maximum allowed value is 21000
+			milliseconds. Please note that this value is
+			adjusted to an arch timer tick resolution.
+			Setting this to zero causes the value from
+			rcupdate.rcu_cpu_stall_timeout to be used (after
+			conversion from seconds to milliseconds).

 	rcupdate.rcu_expedited= [KNL]
 			Use expedited grace-period primitives, for
@ -4963,10 +5017,34 @@
 			number avoids disturbing real-time workloads,
 			but lengthens grace periods.

+	rcupdate.rcu_task_stall_info= [KNL]
+			Set initial timeout in jiffies for RCU task stall
+			informational messages, which give some indication
+			of the problem for those not patient enough to
+			wait for ten minutes.  Informational messages are
+			only printed prior to the stall-warning message
+			for a given grace period. Disable with a value
+			less than or equal to zero.  Defaults to ten
+			seconds.  A change in value does not take effect
+			until the beginning of the next grace period.
+
+	rcupdate.rcu_task_stall_info_mult= [KNL]
+			Multiplier for time interval between successive
+			RCU task stall informational messages for a given
+			RCU tasks grace period.  This value is clamped
+			to one through ten, inclusive.	It defaults to
+			the value three, so that the first informational
+			message is printed 10 seconds into the grace
+			period, the second at 40 seconds, the third at
+			160 seconds, and then the stall warning at 600
+			seconds would prevent a fourth at 640 seconds.
+
 	rcupdate.rcu_task_stall_timeout= [KNL]
-			Set timeout in jiffies for RCU task stall warning
-			messages.  Disable with a value less than or equal
-			to zero.
+			Set timeout in jiffies for RCU task stall
+			warning messages.  Disable with a value less
+			than or equal to zero.	Defaults to ten minutes.
+			A change in value does not take effect until
+			the beginning of the next grace period.

 	rcupdate.rcu_self_test= [KNL]
 			Run the RCU early boot self tests
@ -5316,6 +5394,8 @@

 	serialnumber	[BUGS=X86-32]

+	sev=option[,option...] [X86-64] See Documentation/x86/x86_64/boot-options.rst
+
 	shapers=	[NET]
 			Maximal number of shapers.

@ -5385,6 +5465,17 @@
 	smart2=		[HW]
 			Format: <io1>[,<io2>[,...,<io8>]]

+	smp.csd_lock_timeout= [KNL]
+			Specify the period of time in milliseconds
+			that smp_call_function() and friends will wait
+			for a CPU to release the CSD lock.  This is
+			useful when diagnosing bugs involving CPUs
+			disabling interrupts for extended periods
+			of time.  Defaults to 5,000 milliseconds, and
+			setting a value of zero disables this feature.
+			This feature may be more efficiently disabled
+			using the csdlock_debug- kernel parameter.
+
 	smsc-ircc2.nopnp	[HW] Don't use PNP to discover SMC devices
 	smsc-ircc2.ircc_cfg=	[HW] Device configuration I/O port
 	smsc-ircc2.ircc_sir=	[HW] SIR base I/O port
@ -5616,6 +5707,30 @@
 			off:    Disable mitigation and remove
 				performance impact to RDRAND and RDSEED

+	srcutree.big_cpu_lim [KNL]
+			Specifies the number of CPUs constituting a
+			large system, such that srcu_struct structures
+			should immediately allocate an srcu_node array.
+			This kernel-boot parameter defaults to 128,
+			but takes effect only when the low-order four
+			bits of srcutree.convert_to_big is equal to 3
+			(decide at boot).
+
+	srcutree.convert_to_big [KNL]
+			Specifies under what conditions an SRCU tree
+			srcu_struct structure will be converted to big
+			form, that is, with an rcu_node tree:
+
+				   0:  Never.
+				   1:  At init_srcu_struct() time.
+				   2:  When rcutorture decides to.
+				   3:  Decide at boot time (default).
+				0x1X:  Above plus if high contention.
+
+			Either way, the srcu_node tree will be sized based
+			on the actual runtime number of CPUs (nr_cpu_ids)
+			instead of the compile-time CONFIG_NR_CPUS.
+
 	srcutree.counter_wrap_check [KNL]
 			Specifies how frequently to check for
 			grace-period sequence counter wrap for the
@ -5633,6 +5748,14 @@
 			expediting.  Set to zero to disable automatic
 			expediting.

+	srcutree.small_contention_lim [KNL]
+			Specifies the number of update-side contention
+			events per jiffy will be tolerated before
+			initiating a conversion of an srcu_struct
+			structure to big form.	Note that the value of
+			srcutree.convert_to_big must have the 0x10 bit
+			set for contention-based conversions to occur.
+
 	ssbd=		[ARM64,HW]
 			Speculative Store Bypass Disable control

--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@ -994,6 +994,9 @@ This is a directory, with the following entries:
 * ``boot_id``: a UUID generated the first time this is retrieved, and
  unvarying after that;

+* ``uuid``: a UUID generated every time this is retrieved (this can
+  thus be used to generate UUIDs at will);
+
 * ``entropy_avail``: the pool's entropy count, in bits;

 * ``poolsize``: the entropy pool size, in bits;
@ -1001,10 +1004,7 @@ This is a directory, with the following entries:
 * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
  number of seconds between urandom pool reseeding). This file is
  writable for compatibility purposes, but writing to it has no effect
-  on any RNG behavior.
-
-* ``uuid``: a UUID generated every time this is retrieved (this can
-  thus be used to generate UUIDs at will);
+  on any RNG behavior;

 * ``write_wakeup_threshold``: when the entropy count drops below this
  (as a number of bits), processes waiting to write to ``/dev/random``
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@ -350,6 +350,16 @@ Before jumping into the kernel, the following conditions must be met:

    - SMCR_EL2.FA64 (bit 31) must be initialised to 0b1.

+  For CPUs with the Memory Tagging Extension feature (FEAT_MTE2):
+
+  - If EL3 is present:
+
+    - SCR_EL3.ATA (bit 26) must be initialised to 0b1.
+
+  - If the kernel is entered at EL1 and EL2 is present:
+
+    - HCR_EL2.ATA (bit 56) must be initialised to 0b1.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.  Where the values documented
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@ -264,6 +264,39 @@ HWCAP2_MTE3
    Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
    by Documentation/arm64/memory-tagging-extension.rst.

+HWCAP2_SME
+
+    Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
+    by Documentation/arm64/sme.rst.
+
+HWCAP2_SME_I16I64
+
+    Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
+
+HWCAP2_SME_F64F64
+
+    Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
+
+HWCAP2_SME_I8I32
+
+    Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
+
+HWCAP2_SME_F16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
+
+HWCAP2_SME_B16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
+
+HWCAP2_SME_F32F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
+
+HWCAP2_SME_FA64
+
+    Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
+
 4. Unused AT_HWCAP bits
 -----------------------

--- a/Documentation/arm64/index.rst
+++ b/Documentation/arm64/index.rst
@ -21,6 +21,7 @@ ARM64 Architecture
    perf
    pointer-authentication
    silicon-errata
+    sme
    sve
    tagged-address-abi
    tagged-pointers
--- a/Documentation/arm64/sme.rst
+++ b/Documentation/arm64/sme.rst
@ -0,0 +1,428 @@
+===================================================
+Scalable Matrix Extension support for AArch64 Linux
+===================================================
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Scalable Matrix Extension (SME).
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.  It should be read in conjunction with the SVE
+documentation in sve.rst which provides details on the Streaming SVE mode
+included in SME.
+
+This document does not aim to describe the SME architecture or programmer's
+model.  To aid understanding, a minimal description of relevant programmer's
+model features for SME is included in Appendix A.
+
+
+1.  General
+-----------
+
+* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA
+  register state and TPIDR2_EL0 are tracked per thread.
+
+* The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
+  AT_HWCAP2 entry.  Presence of this flag implies the presence of the SME
+  instructions and registers, and the Linux-specific system interfaces
+  described in this document.  SME is reported in /proc/cpuinfo as "sme".
+
+* Support for the execution of SME instructions in userspace can also be
+  detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
+  instruction, and checking that the value of the SME field is nonzero. [3]
+
+  It does not guarantee the presence of the system interfaces described in the
+  following sections: software that needs to verify that those interfaces are
+  present must check for HWCAP2_SME instead.
+
+* There are a number of optional SME features, presence of these is reported
+  through AT_HWCAP2 through:
+
+	HWCAP2_SME_I16I64
+	HWCAP2_SME_F64F64
+	HWCAP2_SME_I8I32
+	HWCAP2_SME_F16F32
+	HWCAP2_SME_B16F32
+	HWCAP2_SME_F32F32
+	HWCAP2_SME_FA64
+
+  This list may be extended over time as the SME architecture evolves.
+
+  These extensions are also reported via the CPU ID register ID_AA64SMFR0_EL1,
+  which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
+  cpu-feature-registers.txt for details.
+
+* Debuggers should restrict themselves to interacting with the target via the
+  NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets.  The recommended way
+  of detecting support for these regsets is to connect to a target process
+  first and then attempt a
+
+	ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
+
+* Whenever ZA register values are exchanged in memory between userspace and
+  the kernel, the register value is encoded in memory as a series of horizontal
+  vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
+  used for SVE vectors.
+
+* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
+  in which case it is set to 0.
+
+2.  Vector lengths
+------------------
+
+SME defines a second vector length similar to the SVE vector length which is
+controls the size of the streaming mode SVE vectors and the ZA matrix array.
+The ZA matrix is square with each side having as many bytes as a streaming
+mode SVE vector.
+
+
+3.  Sharing of streaming and non-streaming mode SVE state
+---------------------------------------------------------
+
+It is implementation defined which if any parts of the SVE state are shared
+between streaming and non-streaming modes.  When switching between modes
+via software interfaces such as ptrace if no register content is provided as
+part of switching no state will be assumed to be shared and everything will
+be zeroed.
+
+
+4.  System call behaviour
+-------------------------
+
+* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
+  ZA matrix are preserved.
+
+* On syscall PSTATE.SM will be cleared and the SVE registers will be handled
+  as per the standard SVE ABI.
+
+* Neither the SVE registers nor ZA are used to pass arguments to or receive
+  results from any syscall.
+
+* On process creation (eg, clone()) the newly created process will have
+  PSTATE.SM cleared.
+
+* All other SME state of a thread, including the currently configured vector
+  length, the state of the PR_SME_VL_INHERIT flag, and the deferred vector
+  length (if any), is preserved across all syscalls, subject to the specific
+  exceptions for execve() described in section 6.
+
+
+5.  Signal handling
+-------------------
+
+* Signal handlers are invoked with streaming mode and ZA disabled.
+
+* A new signal frame record za_context encodes the ZA register contents on
+  signal delivery. [1]
+
+* The signal frame record for ZA always contains basic metadata, in particular
+  the thread's vector length (in za_context.vl).
+
+* The ZA matrix may or may not be included in the record, depending on
+  the value of PSTATE.ZA.  The registers are present if and only if:
+  za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
+  in which case PSTATE.ZA == 1.
+
+* If matrix data is present, the remainder of the record has a vl-dependent
+  size and layout.  Macros ZA_SIG_* are defined [1] to facilitate access to
+  them.
+
+* The matrix is stored as a series of horizontal vectors in the same format as
+  is used for SVE vectors.
+
+* If the ZA context is too big to fit in sigcontext.__reserved[], then extra
+  space is allocated on the stack, an extra_context record is written in
+  __reserved[] referencing this space.  za_context is then written in the
+  extra space.  Refer to [1] for further details about this mechanism.
+
+
+5.  Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is no za_context record in the signal frame, or if the record is
+  present but contains no register data as described in the previous section,
+  then ZA is disabled.
+
+* If za_context is present in the signal frame and contains matrix data then
+  PSTATE.ZA is set to 1 and ZA is populated with the specified data.
+
+* The vector length cannot be changed via signal return.  If za_context.vl in
+  the signal frame does not match the current vector length, the signal return
+  attempt is treated as illegal, resulting in a forced SIGSEGV.
+
+
+6.  prctl extensions
+--------------------
+
+Some new prctl() calls are added to allow programs to manage the SME vector
+length:
+
+prctl(PR_SME_SET_VL, unsigned long arg)
+
+    Sets the vector length of the calling thread and related flags, where
+    arg == vl | flags.  Other threads of the calling process are unaffected.
+
+    vl is the desired vector length, where sve_vl_valid(vl) must be true.
+
+    flags:
+
+	PR_SME_VL_INHERIT
+
+	    Inherit the current vector length across execve().  Otherwise, the
+	    vector length is reset to the system default at execve().  (See
+	    Section 9.)
+
+	PR_SME_SET_VL_ONEXEC
+
+	    Defer the requested vector length change until the next execve()
+	    performed by this thread.
+
+	    The effect is equivalent to implicit execution of the following
+	    call immediately after the next execve() (if any) by the thread:
+
+		prctl(PR_SME_SET_VL, arg & ~PR_SME_SET_VL_ONEXEC)
+
+	    This allows launching of a new program with a different vector
+	    length, while avoiding runtime side effects in the caller.
+
+	    Without PR_SME_SET_VL_ONEXEC, the requested change takes effect
+	    immediately.
+
+
+    Return value: a nonnegative on success, or a negative value on error:
+	EINVAL: SME not supported, invalid vector length requested, or
+	    invalid flags.
+
+
+    On success:
+
+    * Either the calling thread's vector length or the deferred vector length
+      to be applied at the next execve() by the thread (dependent on whether
+      PR_SME_SET_VL_ONEXEC is present in arg), is set to the largest value
+      supported by the system that is less than or equal to vl.  If vl ==
+      SVE_VL_MAX, the value set will be the largest value supported by the
+      system.
+
+    * Any previously outstanding deferred vector length change in the calling
+      thread is cancelled.
+
+    * The returned value describes the resulting configuration, encoded as for
+      PR_SME_GET_VL.  The vector length reported in this value is the new
+      current vector length for this thread if PR_SME_SET_VL_ONEXEC was not
+      present in arg; otherwise, the reported vector length is the deferred
+      vector length that will be applied at the next execve() by the calling
+      thread.
+
+    * Changing the vector length causes all of ZA, P0..P15, FFR and all bits of
+      Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+      unspecified, including both streaming and non-streaming SVE state.
+      Calling PR_SME_SET_VL with vl equal to the thread's current vector
+      length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+      does not constitute a change to the vector length for this purpose.
+
+    * Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
+      Calling PR_SME_SET_VL with vl equal to the thread's current vector
+      length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+      does not constitute a change to the vector length for this purpose.
+
+
+prctl(PR_SME_GET_VL)
+
+    Gets the vector length of the calling thread.
+
+    The following flag may be OR-ed into the result:
+
+	PR_SME_VL_INHERIT
+
+	    Vector length will be inherited across execve().
+
+    There is no way to determine whether there is an outstanding deferred
+    vector length change (which would only normally be the case between a
+    fork() or vfork() and the corresponding execve() in typical use).
+
+    To extract the vector length from the result, bitwise and it with
+    PR_SME_VL_LEN_MASK.
+
+    Return value: a nonnegative value on success, or a negative value on error:
+	EINVAL: SME not supported.
+
+
+7.  ptrace extensions
+---------------------
+
+* A new regset NT_ARM_SSVE is defined for access to streaming mode SVE
+  state via PTRACE_GETREGSET and  PTRACE_SETREGSET, this is documented in
+  sve.rst.
+
+* A new regset NT_ARM_ZA is defined for ZA state for access to ZA state via
+  PTRACE_GETREGSET and PTRACE_SETREGSET.
+
+  Refer to [2] for definitions.
+
+The regset data starts with struct user_za_header, containing:
+
+    size
+
+	Size of the complete regset, in bytes.
+	This depends on vl and possibly on other things in the future.
+
+	If a call to PTRACE_GETREGSET requests less data than the value of
+	size, the caller can allocate a larger buffer and retry in order to
+	read the complete regset.
+
+    max_size
+
+	Maximum size in bytes that the regset can grow to for the target
+	thread.  The regset won't grow bigger than this even if the target
+	thread changes its vector length etc.
+
+    vl
+
+	Target thread's current streaming vector length, in bytes.
+
+    max_vl
+
+	Maximum possible streaming vector length for the target thread.
+
+    flags
+
+	Zero or more of the following flags, which have the same
+	meaning and behaviour as the corresponding PR_SET_VL_* flags:
+
+	    SME_PT_VL_INHERIT
+
+	    SME_PT_VL_ONEXEC (SETREGSET only).
+
+* The effects of changing the vector length and/or flags are equivalent to
+  those documented for PR_SME_SET_VL.
+
+  The caller must make a further GETREGSET call if it needs to know what VL is
+  actually set by SETREGSET, unless is it known in advance that the requested
+  VL is supported.
+
+* The size and layout of the payload depends on the header fields.  The
+  SME_PT_ZA_*() macros are provided to facilitate access to the data.
+
+* In either case, for SETREGSET it is permissible to omit the payload, in which
+  case the vector length and flags are changed and PSTATE.ZA is set to 0
+  (along with any consequences of those changes).  If a payload is provided
+  then PSTATE.ZA will be set to 1.
+
+* For SETREGSET, if the requested VL is not supported, the effect will be the
+  same as if the payload were omitted, except that an EIO error is reported.
+  No attempt is made to translate the payload data to the correct layout
+  for the vector length actually set.  It is up to the caller to translate the
+  payload layout for the actual VL and retry.
+
+* The effect of writing a partial, incomplete payload is unspecified.
+
+
+8.  ELF coredump extensions
+---------------------------
+
+* NT_ARM_SSVE notes will be added to each coredump for
+  each thread of the dumped process.  The contents will be equivalent to the
+  data that would have been read if a PTRACE_GETREGSET of the corresponding
+  type were executed for each thread when the coredump was generated.
+
+* A NT_ARM_ZA note will be added to each coredump for each thread of the
+  dumped process.  The contents will be equivalent to the data that would have
+  been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
+  when the coredump was generated.
+
+
+9.  System runtime configuration
+--------------------------------
+
+* To mitigate the ABI impact of expansion of the signal frame, a policy
+  mechanism is provided for administrators, distro maintainers and developers
+  to set the default vector length for userspace processes:
+
+/proc/sys/abi/sme_default_vector_length
+
+    Writing the text representation of an integer to this file sets the system
+    default vector length to the specified value, unless the value is greater
+    than the maximum vector length supported by the system in which case the
+    default vector length is set to that maximum.
+
+    The result can be determined by reopening the file and reading its
+    contents.
+
+    At boot, the default vector length is initially set to 32 or the maximum
+    supported vector length, whichever is smaller and supported.  This
+    determines the initial vector length of the init process (PID 1).
+
+    Reading this file returns the current system default vector length.
+
+* At every execve() call, the new vector length of the new process is set to
+  the system default vector length, unless
+
+    * PR_SME_VL_INHERIT (or equivalently SME_PT_VL_INHERIT) is set for the
+      calling thread, or
+
+    * a deferred vector length change is pending, established via the
+      PR_SME_SET_VL_ONEXEC flag (or SME_PT_VL_ONEXEC).
+
+* Modifying the system default vector length does not affect the vector length
+  of any existing process or thread that does not make an execve() call.
+
+
+Appendix A.  SME programmer's model (informative)
+=================================================
+
+This section provides a minimal description of the additions made by SVE to the
+ARMv8-A programmer's model that are relevant to this document.
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+A.1.  Registers
+---------------
+
+In A64 state, SME adds the following:
+
+* A new mode, streaming mode, in which a subset of the normal FPSIMD and SVE
+  features are available.  When supported EL0 software may enter and leave
+  streaming mode at any time.
+
+  For best system performance it is strongly encouraged for software to enable
+  streaming mode only when it is actively being used.
+
+* A new vector length controlling the size of ZA and the Z registers when in
+  streaming mode, separately to the vector length used for SVE when not in
+  streaming mode.  There is no requirement that either the currently selected
+  vector length or the set of vector lengths supported for the two modes in
+  a given system have any relationship.  The streaming mode vector length
+  is referred to as SVL.
+
+* A new ZA matrix register.  This is a square matrix of SVLxSVL bits.  Most
+  operations on ZA require that streaming mode be enabled but ZA can be
+  enabled without streaming mode in order to load, save and retain data.
+
+  For best system performance it is strongly encouraged for software to enable
+  ZA only when it is actively being used.
+
+* Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
+  SMSTOP instructions or by access to the SVCR system register:
+
+  * PSTATE.ZA, if this is 1 then the ZA matrix is accessible and has valid
+    data while if it is 0 then ZA can not be accessed.  When PSTATE.ZA is
+    changed from 0 to 1 all bits in ZA are cleared.
+
+  * PSTATE.SM, if this is 1 then the PE is in streaming mode.  When the value
+    of PSTATE.SM is changed then it is implementation defined if the subset
+    of the floating point register bits valid in both modes may be retained.
+    Any other bits will be cleared.
+
+
+References
+==========
+
+[1] arch/arm64/include/uapi/asm/sigcontext.h
+    AArch64 Linux signal ABI definitions
+
+[2] arch/arm64/include/uapi/asm/ptrace.h
+    AArch64 Linux ptrace ABI definitions
+
+[3] Documentation/arm64/cpu-feature-registers.rst
--- a/Documentation/arm64/sve.rst
+++ b/Documentation/arm64/sve.rst
@ -7,7 +7,9 @@ Author: Dave Martin <Dave.Martin@arm.com>
 Date:   4 August 2017

 This document outlines briefly the interface provided to userspace by Linux in
-order to support use of the ARM Scalable Vector Extension (SVE).
+order to support use of the ARM Scalable Vector Extension (SVE), including
+interactions with Streaming SVE mode added by the Scalable Matrix Extension
+(SME).

 This is an outline of the most important features and issues only and not
 intended to be exhaustive.
@ -23,6 +25,10 @@ model features for SVE is included in Appendix A.
 * SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
  tracked per-thread.

+* In streaming mode FFR is not accessible unless HWCAP2_SME_FA64 is present
+  in the system, when it is not supported and these interfaces are used to
+  access streaming mode FFR is read and written as zero.
+
 * The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
  AT_HWCAP entry.  Presence of this flag implies the presence of the SVE
  instructions and registers, and the Linux-specific system interfaces
@ -53,10 +59,19 @@ model features for SVE is included in Appendix A.
  which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
  cpu-feature-registers.txt for details.

+* On hardware that supports the SME extensions, HWCAP2_SME will also be
+  reported in the AT_HWCAP2 aux vector entry.  Among other things SME adds
+  streaming mode which provides a subset of the SVE feature set using a
+  separate SME vector length and the same Z/V registers.  See sme.rst
+  for more details.
+
 * Debuggers should restrict themselves to interacting with the target via the
  NT_ARM_SVE regset.  The recommended way of detecting support for this regset
  is to connect to a target process first and then attempt a
-  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
+  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).  Note that when SME is
+  present and streaming SVE mode is in use the FPSIMD subset of registers
+  will be read via NT_ARM_SVE and NT_ARM_SVE writes will exit streaming mode
+  in the target.

 * Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
  between userspace and the kernel, the register value is encoded in memory in
@ -126,6 +141,11 @@ the SVE instruction set architecture.
  are only present in fpsimd_context.  For convenience, the content of V0..V31
  is duplicated between sve_context and fpsimd_context.

+* The record contains a flag field which includes a flag SVE_SIG_FLAG_SM which
+  if set indicates that the thread is in streaming mode and the vector length
+  and register data (if present) describe the streaming SVE data and vector
+  length.
+
 * The signal frame record for SVE always contains basic metadata, in particular
  the thread's vector length (in sve_context.vl).

@ -170,6 +190,11 @@ When returning from a signal handler:
  the signal frame does not match the current vector length, the signal return
  attempt is treated as illegal, resulting in a forced SIGSEGV.

+* It is permitted to enter or leave streaming mode by setting or clearing
+  the SVE_SIG_FLAG_SM flag but applications should take care to ensure that
+  when doing so sve_context.vl and any register data are appropriate for the
+  vector length in the new mode.
+

 6.  prctl extensions
 --------------------
@ -265,8 +290,14 @@ prctl(PR_SVE_GET_VL)
 7.  ptrace extensions
 ---------------------

-* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
-  PTRACE_SETREGSET.
+* New regsets NT_ARM_SVE and NT_ARM_SSVE are defined for use with
+  PTRACE_GETREGSET and PTRACE_SETREGSET. NT_ARM_SSVE describes the
+  streaming mode SVE registers and NT_ARM_SVE describes the
+  non-streaming mode SVE registers.
+
+  In this description a register set is referred to as being "live" when
+  the target is in the appropriate streaming or non-streaming mode and is
+  using data beyond the subset shared with the FPSIMD Vn registers.

  Refer to [2] for definitions.

@ -297,7 +328,7 @@ The regset data starts with struct user_sve_header, containing:

    flags

-	either
+	at most one of

 	    SVE_PT_REGS_FPSIMD

@ -331,6 +362,10 @@ The regset data starts with struct user_sve_header, containing:

 	    SVE_PT_VL_ONEXEC (SETREGSET only).

+	If neither FPSIMD nor SVE flags are provided then no register
+	payload is available, this is only possible when SME is implemented.
+
+
 * The effects of changing the vector length and/or flags are equivalent to
  those documented for PR_SVE_SET_VL.

@ -346,6 +381,13 @@ The regset data starts with struct user_sve_header, containing:
  case only the vector length and flags are changed (along with any
  consequences of those changes).

+* In systems supporting SME when in streaming mode a GETREGSET for
+  NT_REG_SVE will return only the user_sve_header with no register data,
+  similarly a GETREGSET for NT_REG_SSVE will not return any register data
+  when not in streaming mode.
+
+* A GETREGSET for NT_ARM_SSVE will never return SVE_PT_REGS_FPSIMD.
+
 * For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
  requested VL is not supported, the effect will be the same as if the
  payload were omitted, except that an EIO error is reported.  No
@ -355,17 +397,25 @@ The regset data starts with struct user_sve_header, containing:
  unspecified.  It is up to the caller to translate the payload layout
  for the actual VL and retry.

+* Where SME is implemented it is not possible to GETREGSET the register
+  state for normal SVE when in streaming mode, nor the streaming mode
+  register state when in normal mode, regardless of the implementation defined
+  behaviour of the hardware for sharing data between the two modes.
+
+* Any SETREGSET of NT_ARM_SVE will exit streaming mode if the target was in
+  streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
+  if the target was not in streaming mode.
+
 * The effect of writing a partial, incomplete payload is unspecified.


 8.  ELF coredump extensions
 ---------------------------

-* A NT_ARM_SVE note will be added to each coredump for each thread of the
-  dumped process.  The contents will be equivalent to the data that would have
-  been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
-  when the coredump was generated.
-
+* NT_ARM_SVE and NT_ARM_SSVE notes will be added to each coredump for
+  each thread of the dumped process.  The contents will be equivalent to the
+  data that would have been read if a PTRACE_GETREGSET of the corresponding
+  type were executed for each thread when the coredump was generated.

 9.  System runtime configuration
 --------------------------------
--- a/Documentation/cdrom/cdrom-standard.rst
+++ b/Documentation/cdrom/cdrom-standard.rst
@ -218,7 +218,6 @@ current *struct* is::
 		int (*tray_move)(struct cdrom_device_info *, int);
 		int (*lock_door)(struct cdrom_device_info *, int);
 		int (*select_speed)(struct cdrom_device_info *, int);
-		int (*select_disc)(struct cdrom_device_info *, int);
 		int (*get_last_session) (struct cdrom_device_info *,
 					 struct cdrom_multisession *);
 		int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
@ -419,15 +418,6 @@ this `auto-selection` capability, the decision should be made on the
 current disc loaded and the return value should be positive. A negative
 return value indicates an error.

-::
-
-	int select_disc(struct cdrom_device_info *cdi, int number)
-
-If the drive can store multiple discs (a juke-box) this function
-will perform disc selection. It should return the number of the
-selected disc on success, a negative value on error. Currently, only
-the ide-cd driver supports this functionality.
-
 ::

 	int get_last_session(struct cdrom_device_info *cdi,
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases:
 .. c:function:: u64 ktime_get_mono_fast_ns( void )
 		u64 ktime_get_raw_fast_ns( void )
 		u64 ktime_get_boot_fast_ns( void )
+		u64 ktime_get_tai_fast_ns( void )
 		u64 ktime_get_real_fast_ns( void )

 	These variants are safe to call from any context, including from
--- a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
+++ b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
@ -26,6 +26,7 @@ properties:
      - items:
          - enum:
              - renesas,sata-r8a774b1     # RZ/G2N
+              - renesas,sata-r8a774e1     # RZ/G2H
              - renesas,sata-r8a7795      # R-Car H3
              - renesas,sata-r8a77965     # R-Car M3-N
          - const: renesas,rcar-gen3-sata # generic R-Car Gen3 or RZ/G2
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Generic Interrupt Controller, version 3

 maintainers:
-  - Marc Zyngier <marc.zyngier@arm.com>
+  - Marc Zyngier <maz@kernel.org>

 description: |
  AArch64 SMP cores are often associated with a GICv3, providing Private
@ -78,7 +78,11 @@ properties:
      - GIC Hypervisor interface (GICH)
      - GIC Virtual CPU interface (GICV)

-      GICC, GICH and GICV are optional.
+      GICC, GICH and GICV are optional, but must be described if the CPUs
+      support them. Examples of such CPUs are ARM's implementations of the
+      ARMv8.0 architecture such as Cortex-A32, A34, A35, A53, A57, A72 and
+      A73 (this list is not exhaustive).
+
    minItems: 2
    maxItems: 4096   # Should be enough?

--- a/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml
@ -25,12 +25,6 @@ properties:
          - const: fsl,qoriq-memory-controller
      - enum:
          - fsl,bsc9132-memory-controller
-          - fsl,8540-memory-controller
-          - fsl,8541-memory-controller
-          - fsl,8544-memory-controller
-          - fsl,8548-memory-controller
-          - fsl,8555-memory-controller
-          - fsl,8568-memory-controller
          - fsl,mpc8536-memory-controller
          - fsl,mpc8540-memory-controller
          - fsl,mpc8541-memory-controller
--- a/Documentation/devicetree/bindings/perf/arm,cmn.yaml
+++ b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
@ -14,6 +14,8 @@ properties:
  compatible:
    enum:
      - arm,cmn-600
+      - arm,cmn-650
+      - arm,cmn-700
      - arm,ci-700

  reg:
--- a/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
@ -6,12 +6,6 @@ The cache bindings explained below are Devicetree Specification compliant
 Required Properties:

 - compatible	: Should include one of the following:
-		  "fsl,8540-l2-cache-controller"
-		  "fsl,8541-l2-cache-controller"
-		  "fsl,8544-l2-cache-controller"
-		  "fsl,8548-l2-cache-controller"
-		  "fsl,8555-l2-cache-controller"
-		  "fsl,8568-l2-cache-controller"
 		  "fsl,b4420-l2-cache-controller"
 		  "fsl,b4860-l2-cache-controller"
 		  "fsl,bsc9131-l2-cache-controller"
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@ -211,6 +211,7 @@ r200_reg_safe.h
 r300_reg_safe.h
 r420_reg_safe.h
 r600_reg_safe.h
+randstruct.seed
 randomize_layout_hash.h
 randomize_layout_seed.h
 recordmcount
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@ -417,30 +417,66 @@ struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip.
 If you do this, the additional irq_chip will be set up by gpiolib at the
 same time as setting up the rest of the GPIO functionality. The following
 is a typical example of a chained cascaded interrupt handler using
-the gpio_irq_chip:
+the gpio_irq_chip. Note how the mask/unmask (or disable/enable) functions
+call into the core gpiolib code:

 .. code-block:: c

-  /* Typical state container with dynamic irqchip */
+  /* Typical state container */
  struct my_gpio {
      struct gpio_chip gc;
-      struct irq_chip irq;
+  };
+
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name		= "my_gpio_irq",
+      .irq_ack		= my_gpio_ack_irq,
+      .irq_mask		= my_gpio_mask_irq,
+      .irq_unmask	= my_gpio_unmask_irq,
+      .irq_set_type	= my_gpio_set_irq_type,
+      .flags		= IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
  };

  int irq; /* from platform etc */
  struct my_gpio *g;
  struct gpio_irq_chip *girq;

-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
  /* Get a pointer to the gpio_irq_chip */
  girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
  girq->parent_handler = ftgpio_gpio_irq_handler;
  girq->num_parents = 1;
  girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
@ -458,23 +494,58 @@ the interrupt separately and go with it:

 .. code-block:: c

-  /* Typical state container with dynamic irqchip */
+  /* Typical state container */
  struct my_gpio {
      struct gpio_chip gc;
-      struct irq_chip irq;
+  };
+
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name		= "my_gpio_irq",
+      .irq_ack		= my_gpio_ack_irq,
+      .irq_mask		= my_gpio_mask_irq,
+      .irq_unmask	= my_gpio_unmask_irq,
+      .irq_set_type	= my_gpio_set_irq_type,
+      .flags		= IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
  };

  int irq; /* from platform etc */
  struct my_gpio *g;
  struct gpio_irq_chip *girq;

-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
  ret = devm_request_threaded_irq(dev, irq, NULL,
 		irq_thread_fn, IRQF_ONESHOT, "my-chip", g);
  if (ret < 0)
@ -482,7 +553,7 @@ the interrupt separately and go with it:

  /* Get a pointer to the gpio_irq_chip */
  girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
  /* This will let us handle the parent IRQ in the driver */
  girq->parent_handler = NULL;
  girq->num_parents = 0;
@ -500,24 +571,61 @@ In this case the typical set-up will look like this:
  /* Typical state container with dynamic irqchip */
  struct my_gpio {
      struct gpio_chip gc;
-      struct irq_chip irq;
      struct fwnode_handle *fwnode;
  };

-  int irq; /* from platform etc */
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+      irq_mask_mask_parent(d);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+
+      irq_mask_unmask_parent(d);
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name		= "my_gpio_irq",
+      .irq_ack		= my_gpio_ack_irq,
+      .irq_mask		= my_gpio_mask_irq,
+      .irq_unmask	= my_gpio_unmask_irq,
+      .irq_set_type	= my_gpio_set_irq_type,
+      .flags		= IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
+  };
+
  struct my_gpio *g;
  struct gpio_irq_chip *girq;

-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
  /* Get a pointer to the gpio_irq_chip */
  girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
  girq->default_type = IRQ_TYPE_NONE;
  girq->handler = handle_bad_irq;
  girq->fwnode = g->fwnode;
@ -605,8 +713,9 @@ When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .irq_disable() and .irq_enable() callbacks from the
 irqchip.

-When using the gpiolib irqchip helpers, these callbacks are automatically
-assigned.
+When IRQCHIP_IMMUTABLE is not advertised by the irqchip, these callbacks
+are automatically assigned. This behaviour is deprecated and on its way
+to be removed from the kernel.


 Real-Time compliance for GPIO IRQ chips
--- a/Documentation/driver-api/libata.rst
+++ b/Documentation/driver-api/libata.rst
@ -424,12 +424,6 @@ How commands are issued
 -----------------------

 Internal commands
-    First, qc is allocated and initialized using :c:func:`ata_qc_new_init`.
-    Although :c:func:`ata_qc_new_init` doesn't implement any wait or retry
-    mechanism when qc is not available, internal commands are currently
-    issued only during initialization and error recovery, so no other
-    command is active and allocation is guaranteed to succeed.
-
    Once allocated qc's taskfile is initialized for the command to be
    executed. qc currently has two mechanisms to notify completion. One
    is via ``qc->complete_fn()`` callback and the other is completion
@ -447,11 +441,6 @@ SCSI commands
    translated. No qc is involved in processing a simulated scmd. The
    result is computed right away and the scmd is completed.

-    For a translated scmd, :c:func:`ata_qc_new_init` is invoked to allocate a
-    qc and the scmd is translated into the qc. SCSI midlayer's
-    completion notification function pointer is stored into
-    ``qc->scsidone``.
-
    ``qc->complete_fn()`` callback is used for completion notification. ATA
    commands use :c:func:`ata_scsi_qc_complete` while ATAPI commands use
    :c:func:`atapi_qc_complete`. Both functions end up calling ``qc->scsidone``
--- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
+++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
@ -27,5 +27,5 @@
    |       sparc: | TODO |
    |          um: | TODO |
    |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/time/context-tracking/arch-support.txt
+++ b/Documentation/features/time/context-tracking/arch-support.txt
@ -27,5 +27,5 @@
    |       sparc: |  ok  |
    |          um: | TODO |
    |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/time/virt-cpuacct/arch-support.txt
+++ b/Documentation/features/time/virt-cpuacct/arch-support.txt
@ -27,5 +27,5 @@
    |       sparc: |  ok  |
    |          um: | TODO |
    |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@ -1183,85 +1183,7 @@ Provides counts of softirq handlers serviced since boot time, for each CPU.
    HRTIMER:         0          0          0          0
 	RCU:      1678       1769       2178       2250

-
-1.3 IDE devices in /proc/ide
----------------------------
-
-The subdirectory /proc/ide contains information about all IDE devices of which
-the kernel  is  aware.  There is one subdirectory for each IDE controller, the
-file drivers  and a link for each IDE device, pointing to the device directory
-in the controller specific subtree.
-
-The file 'drivers' contains general information about the drivers used for the
-IDE devices::
-
-  > cat /proc/ide/drivers
-  ide-cdrom version 4.53
-  ide-disk version 1.08
-
-More detailed  information  can  be  found  in  the  controller  specific
-subdirectories. These  are  named  ide0,  ide1  and  so  on.  Each  of  these
-directories contains the files shown in table 1-6.
-
-
-.. table:: Table 1-6: IDE controller info in  /proc/ide/ide?
-
- ======= =======================================
- File    Content
- ======= =======================================
- channel IDE channel (0 or 1)
- config  Configuration (only for PCI/IDE bridge)
- mate    Mate name
- model   Type/Chipset of IDE controller
- ======= =======================================
-
-Each device  connected  to  a  controller  has  a separate subdirectory in the
-controllers directory.  The  files  listed in table 1-7 are contained in these
-directories.
-
-
-.. table:: Table 1-7: IDE device information
-
- ================ ==========================================
- File             Content
- ================ ==========================================
- cache            The cache
- capacity         Capacity of the medium (in 512Byte blocks)
- driver           driver and version
- geometry         physical and logical geometry
- identify         device identify block
- media            media type
- model            device identifier
- settings         device setup
- smart_thresholds IDE disk management thresholds
- smart_values     IDE disk management values
- ================ ==========================================
-
-The most  interesting  file is ``settings``. This file contains a nice
-overview of the drive parameters::
-
-  # cat /proc/ide/ide0/hda/settings
-  name                    value           min             max             mode
-  ----                    -----           ---             ---             ----
-  bios_cyl                526             0               65535           rw
-  bios_head               255             0               255             rw
-  bios_sect               63              0               63              rw
-  breada_readahead        4               0               127             rw
-  bswap                   0               0               1               r
-  file_readahead          72              0               2097151         rw
-  io_32bit                0               0               3               rw
-  keepsettings            0               0               1               rw
-  max_kb_per_request      122             1               127             rw
-  multcount               0               0               8               rw
-  nice1                   1               0               1               rw
-  nowerr                  0               0               1               rw
-  pio_mode                write-only      0               255             w
-  slow                    0               0               1               rw
-  unmaskirq               0               0               1               rw
-  using_dma               0               0               1               rw
-
-
-1.4 Networking info in /proc/net
+1.3 Networking info in /proc/net
 --------------------------------

 The subdirectory  /proc/net  follows  the  usual  pattern. Table 1-8 shows the
@ -1340,7 +1262,7 @@ It will contain information that is specific to that bond, such as the
 current slaves of the bond, the link status of the slaves, and how
 many times the slaves link has failed.

-1.5 SCSI info
+1.4 SCSI info
 -------------

 If you  have  a  SCSI  host adapter in your system, you'll find a subdirectory
@ -1403,7 +1325,7 @@ AHA-2940 SCSI adapter::
    Total transfers 0 (0 reads and 0 writes)


-1.6 Parallel port info in /proc/parport
+1.5 Parallel port info in /proc/parport
 ---------------------------------------

 The directory  /proc/parport  contains information about the parallel ports of
@ -1428,7 +1350,7 @@ These directories contain the four files shown in Table 1-10.
           number or none).
 ========= ====================================================================

-1.7 TTY info in /proc/tty
+1.6 TTY info in /proc/tty
 -------------------------

 Information about  the  available  and actually used tty's can be found in the
@ -1463,7 +1385,7 @@ To see  which  tty's  are  currently in use, you can simply look into the file
  unknown              /dev/tty        4    1-63 console


-1.8 Miscellaneous kernel statistics in /proc/stat
+1.7 Miscellaneous kernel statistics in /proc/stat
 -------------------------------------------------

 Various pieces   of  information about  kernel activity  are  available in the
@ -1536,7 +1458,7 @@ softirqs serviced; each subsequent column is the total for that particular
 softirq.


-1.9 Ext4 file system parameters
+1.8 Ext4 file system parameters
 -------------------------------

 Information about mounted ext4 file systems can be found in
@ -1552,7 +1474,7 @@ in Table 1-12, below.
 mb_groups       details of multiblock allocator buddy cache of free blocks
 ==============  ==========================================================

-1.10 /proc/consoles
+1.9 /proc/consoles
 -------------------
 Shows registered system console lines.

--- a/Documentation/filesystems/zonefs.rst
+++ b/Documentation/filesystems/zonefs.rst
@ -306,8 +306,15 @@ Further notes:
 Mount options
 -------------

-zonefs define the "errors=<behavior>" mount option to allow the user to specify
-zonefs behavior in response to I/O errors, inode size inconsistencies or zone
+zonefs defines several mount options:
+* errors=<behavior>
+* explicit-open
+
+"errors=<behavior>" option
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The "errors=<behavior>" option mount option allows the user to specify zonefs
+behavior in response to I/O errors, inode size inconsistencies or zone
 condition changes. The defined behaviors are as follow:

 * remount-ro (default)
@ -326,6 +333,9 @@ discover the amount of data that has been written to the zone. In the case of a
 read-only zone discovered at run-time, as indicated in the previous section.
 The size of the zone file is left unchanged from its last updated value.

+"explicit-open" option
+~~~~~~~~~~~~~~~~~~~~~~
+
 A zoned block device (e.g. an NVMe Zoned Namespace device) may have limits on
 the number of zones that can be active, that is, zones that are in the
 implicit open, explicit open or closed conditions.  This potential limitation
@ -341,6 +351,44 @@ guaranteed that write requests can be processed. Conversely, the
 to the device on the last close() of a zone file if the zone is not full nor
 empty.

+Runtime sysfs attributes
+------------------------
+
+zonefs defines several sysfs attributes for mounted devices.  All attributes
+are user readable and can be found in the directory /sys/fs/zonefs/<dev>/,
+where <dev> is the name of the mounted zoned block device.
+
+The attributes defined are as follows.
+
+* **max_wro_seq_files**:  This attribute reports the maximum number of
+  sequential zone files that can be open for writing.  This number corresponds
+  to the maximum number of explicitly or implicitly open zones that the device
+  supports.  A value of 0 means that the device has no limit and that any zone
+  (any file) can be open for writing and written at any time, regardless of the
+  state of other zones.  When the *explicit-open* mount option is used, zonefs
+  will fail any open() system call requesting to open a sequential zone file for
+  writing when the number of sequential zone files already open for writing has
+  reached the *max_wro_seq_files* limit.
+* **nr_wro_seq_files**:  This attribute reports the current number of sequential
+  zone files open for writing.  When the "explicit-open" mount option is used,
+  this number can never exceed *max_wro_seq_files*.  If the *explicit-open*
+  mount option is not used, the reported number can be greater than
+  *max_wro_seq_files*.  In such case, it is the responsibility of the
+  application to not write simultaneously more than *max_wro_seq_files*
+  sequential zone files.  Failure to do so can result in write errors.
+* **max_active_seq_files**:  This attribute reports the maximum number of
+  sequential zone files that are in an active state, that is, sequential zone
+  files that are partially writen (not empty nor full) or that have a zone that
+  is explicitly open (which happens only if the *explicit-open* mount option is
+  used).  This number is always equal to the maximum number of active zones that
+  the device supports.  A value of 0 means that the mounted device has no limit
+  on the number of sequential zone files that can be active.
+* **nr_active_seq_files**:  This attributes reports the current number of
+  sequential zone files that are active. If *max_active_seq_files* is not 0,
+  then the value of *nr_active_seq_files* can never exceed the value of
+  *nr_active_seq_files*, regardless of the use of the *explicit-open* mount
+  option.
+
 Zonefs User Space Tools
 =======================

--- a/Documentation/kbuild/reproducible-builds.rst
+++ b/Documentation/kbuild/reproducible-builds.rst
@ -99,10 +99,10 @@ unreproducible parts can be treated as sources:
 Structure randomisation
 -----------------------

-If you enable ``CONFIG_GCC_PLUGIN_RANDSTRUCT``, you will need to
-pre-generate the random seed in
-``scripts/gcc-plugins/randomize_layout_seed.h`` so the same value
-is used in rebuilds.
+If you enable ``CONFIG_RANDSTRUCT``, you will need to pre-generate
+the random seed in ``scripts/basic/randstruct.seed`` so the same
+value is used by each build. See ``scripts/gen-randstruct-seed.sh``
+for details.

 Debug info conflicts
 --------------------
--- a/Documentation/process/maintainer-tip.rst
+++ b/Documentation/process/maintainer-tip.rst
@ -437,6 +437,20 @@ in a private repository which allows interested people to easily pull the
 series for testing. The usual way to offer this is a git URL in the cover
 letter of the patch series.

+Testing
+^^^^^^^
+
+Code should be tested before submitting to the tip maintainers.  Anything
+other than minor changes should be built, booted and tested with
+comprehensive (and heavyweight) kernel debugging options enabled.
+
+These debugging options can be found in kernel/configs/x86_debug.config
+and can be added to an existing kernel config by running:
+
+	make x86_debug.config
+
+Some of these options are x86-specific and can be left out when testing
+on other architectures.

 Coding style notes
 ------------------
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@ -17,3 +17,4 @@ Security Documentation
   tpm/index
   digsig
   landlock
+   secrets/index
--- a/Documentation/security/secrets/coco.rst
+++ b/Documentation/security/secrets/coco.rst
@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Confidential Computing secrets
+==============================
+
+This document describes how Confidential Computing secret injection is handled
+from the firmware to the operating system, in the EFI driver and the efi_secret
+kernel module.
+
+
+Introduction
+============
+
+Confidential Computing (coco) hardware such as AMD SEV (Secure Encrypted
+Virtualization) allows guest owners to inject secrets into the VMs
+memory without the host/hypervisor being able to read them.  In SEV,
+secret injection is performed early in the VM launch process, before the
+guest starts running.
+
+The efi_secret kernel module allows userspace applications to access these
+secrets via securityfs.
+
+
+Secret data flow
+================
+
+The guest firmware may reserve a designated memory area for secret injection,
+and publish its location (base GPA and length) in the EFI configuration table
+under a ``LINUX_EFI_COCO_SECRET_AREA_GUID`` entry
+(``adf956ad-e98c-484c-ae11-b51c7d336447``).  This memory area should be marked
+by the firmware as ``EFI_RESERVED_TYPE``, and therefore the kernel should not
+be use it for its own purposes.
+
+During the VM's launch, the virtual machine manager may inject a secret to that
+area.  In AMD SEV and SEV-ES this is performed using the
+``KVM_SEV_LAUNCH_SECRET`` command (see [sev]_).  The strucutre of the injected
+Guest Owner secret data should be a GUIDed table of secret values; the binary
+format is described in ``drivers/virt/coco/efi_secret/efi_secret.c`` under
+"Structure of the EFI secret area".
+
+On kernel start, the kernel's EFI driver saves the location of the secret area
+(taken from the EFI configuration table) in the ``efi.coco_secret`` field.
+Later it checks if the secret area is populated: it maps the area and checks
+whether its content begins with ``EFI_SECRET_TABLE_HEADER_GUID``
+(``1e74f542-71dd-4d66-963e-ef4287ff173b``).  If the secret area is populated,
+the EFI driver will autoload the efi_secret kernel module, which exposes the
+secrets to userspace applications via securityfs.  The details of the
+efi_secret filesystem interface are in [secrets-coco-abi]_.
+
+
+Application usage example
+=========================
+
+Consider a guest performing computations on encrypted files.  The Guest Owner
+provides the decryption key (= secret) using the secret injection mechanism.
+The guest application reads the secret from the efi_secret filesystem and
+proceeds to decrypt the files into memory and then performs the needed
+computations on the content.
+
+In this example, the host can't read the files from the disk image
+because they are encrypted.  Host can't read the decryption key because
+it is passed using the secret injection mechanism (= secure channel).
+Host can't read the decrypted content from memory because it's a
+confidential (memory-encrypted) guest.
+
+Here is a simple example for usage of the efi_secret module in a guest
+to which an EFI secret area with 4 secrets was injected during launch::
+
+	# ls -la /sys/kernel/security/secrets/coco
+	total 0
+	drwxr-xr-x 2 root root 0 Jun 28 11:54 .
+	drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+	-r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+	-r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+	-r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+	-r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+	# hd /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+	00000000  74 68 65 73 65 2d 61 72  65 2d 74 68 65 2d 6b 61  |these-are-the-ka|
+	00000010  74 61 2d 73 65 63 72 65  74 73 00 01 02 03 04 05  |ta-secrets......|
+	00000020  06 07                                             |..|
+	00000022
+
+	# rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+	# ls -la /sys/kernel/security/secrets/coco
+	total 0
+	drwxr-xr-x 2 root root 0 Jun 28 11:55 .
+	drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+	-r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+	-r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+	-r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+
+References
+==========
+
+See [sev-api-spec]_ for more info regarding SEV ``LAUNCH_SECRET`` operation.
+
+.. [sev] Documentation/virt/kvm/amd-memory-encryption.rst
+.. [secrets-coco-abi] Documentation/ABI/testing/securityfs-secrets-coco
+.. [sev-api-spec] https://www.amd.com/system/files/TechDocs/55766_SEV-KM_API_Specification.pdf
--- a/Documentation/security/secrets/index.rst
+++ b/Documentation/security/secrets/index.rst
@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Secrets documentation
+=====================
+
+.. toctree::
+
+   coco
--- a/Documentation/userspace-api/ioctl/cdrom.rst
+++ b/Documentation/userspace-api/ioctl/cdrom.rst
@ -718,6 +718,9 @@ CDROMPLAYBLK


 CDROMGETSPINDOWN
+	Obsolete, was ide-cd only
+
+
 	usage::

 	  char spindown;
@ -736,6 +739,9 @@ CDROMGETSPINDOWN


 CDROMSETSPINDOWN
+	Obsolete, was ide-cd only
+
+
 	usage::

 	  char spindown
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@ -271,6 +271,16 @@ notifying process it will be replaced. The supervisor can also add an FD, and
 respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
 value will be the injected file descriptor number.

+The notifying process can be preempted, resulting in the notification being
+aborted. This can be problematic when trying to take actions on behalf of the
+notifying process that are long-running and typically retryable (mounting a
+filesytem). Alternatively, at filter installation time, the
+``SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV`` flag can be set. This flag makes it
+such that when a user notification is received by the supervisor, the notifying
+process will ignore non-fatal signals until the response is sent. Signals that
+are sent prior to the notification being received by userspace are handled
+normally.
+
 It is worth noting that ``struct seccomp_data`` contains the values of register
 arguments to the syscall, but does not contain pointers to memory. The task's
 memory is accessible to suitably privileged traces via ``ptrace()`` or
--- a/Documentation/virt/coco/sev-guest.rst
+++ b/Documentation/virt/coco/sev-guest.rst
@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================================
+The Definitive SEV Guest API Documentation
+===================================================================
+
+1. General description
+======================
+
+The SEV API is a set of ioctls that are used by the guest or hypervisor
+to get or set a certain aspect of the SEV virtual machine. The ioctls belong
+to the following classes:
+
+ - Hypervisor ioctls: These query and set global attributes which affect the
+   whole SEV firmware.  These ioctl are used by platform provisioning tools.
+
+ - Guest ioctls: These query and set attributes of the SEV virtual machine.
+
+2. API description
+==================
+
+This section describes ioctls that is used for querying the SEV guest report
+from the SEV firmware. For each ioctl, the following information is provided
+along with a description:
+
+  Technology:
+      which SEV technology provides this ioctl. SEV, SEV-ES, SEV-SNP or all.
+
+  Type:
+      hypervisor or guest. The ioctl can be used inside the guest or the
+      hypervisor.
+
+  Parameters:
+      what parameters are accepted by the ioctl.
+
+  Returns:
+      the return value.  General error numbers (-ENOMEM, -EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device.
+The ioctl accepts struct snp_user_guest_request. The input and output structure is
+specified through the req_data and resp_data field respectively. If the ioctl fails
+to execute due to a firmware error, then fw_err code will be set otherwise the
+fw_err will be set to 0x00000000000000ff.
+
+The firmware checks that the message sequence counter is one greater than
+the guests message sequence counter. If guest driver fails to increment message
+counter (e.g. counter overflow), then -EIO will be returned.
+
+::
+
+        struct snp_guest_request_ioctl {
+                /* Message version number */
+                __u32 msg_version;
+
+                /* Request and response structure address */
+                __u64 req_data;
+                __u64 resp_data;
+
+                /* firmware error code on failure (see psp-sev.h) */
+                __u64 fw_err;
+        };
+
+2.1 SNP_GET_REPORT
+------------------
+
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in): struct snp_report_req
+:Returns (out): struct snp_report_resp on success, -negative on error
+
+The SNP_GET_REPORT ioctl can be used to query the attestation report from the
+SEV-SNP firmware. The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command
+provided by the SEV-SNP firmware to query the attestation report.
+
+On success, the snp_report_resp.data will contains the report. The report
+contain the format described in the SEV-SNP specification. See the SEV-SNP
+specification for further details.
+
+2.2 SNP_GET_DERIVED_KEY
+-----------------------
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in): struct snp_derived_key_req
+:Returns (out): struct snp_derived_key_resp on success, -negative on error
+
+The SNP_GET_DERIVED_KEY ioctl can be used to get a key derive from a root key.
+The derived key can be used by the guest for any purpose, such as sealing keys
+or communicating with external entities.
+
+The ioctl uses the SNP_GUEST_REQUEST (MSG_KEY_REQ) command provided by the
+SEV-SNP firmware to derive the key. See SEV-SNP specification for further details
+on the various fields passed in the key derivation request.
+
+On success, the snp_derived_key_resp.data contains the derived key value. See
+the SEV-SNP specification for further details.
+
+
+2.3 SNP_GET_EXT_REPORT
+----------------------
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in/out): struct snp_ext_report_req
+:Returns (out): struct snp_report_resp on success, -negative on error
+
+The SNP_GET_EXT_REPORT ioctl is similar to the SNP_GET_REPORT. The difference is
+related to the additional certificate data that is returned with the report.
+The certificate data returned is being provided by the hypervisor through the
+SNP_SET_EXT_CONFIG.
+
+The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command provided by the SEV-SNP
+firmware to get the attestation report.
+
+On success, the snp_ext_report_resp.data will contain the attestation report
+and snp_ext_report_req.certs_address will contain the certificate blob. If the
+length of the blob is smaller than expected then snp_ext_report_req.certs_len will
+be updated with the expected value.
+
+See GHCB specification for further detail on how to parse the certificate blob.
+
+3. SEV-SNP CPUID Enforcement
+============================
+
+SEV-SNP guests can access a special page that contains a table of CPUID values
+that have been validated by the PSP as part of the SNP_LAUNCH_UPDATE firmware
+command. It provides the following assurances regarding the validity of CPUID
+values:
+
+ - Its address is obtained via bootloader/firmware (via CC blob), and those
+   binaries will be measured as part of the SEV-SNP attestation report.
+ - Its initial state will be encrypted/pvalidated, so attempts to modify
+   it during run-time will result in garbage being written, or #VC exceptions
+   being generated due to changes in validation state if the hypervisor tries
+   to swap the backing page.
+ - Attempts to bypass PSP checks by the hypervisor by using a normal page, or
+   a non-CPUID encrypted page will change the measurement provided by the
+   SEV-SNP attestation report.
+ - The CPUID page contents are *not* measured, but attempts to modify the
+   expected contents of a CPUID page as part of guest initialization will be
+   gated by the PSP CPUID enforcement policy checks performed on the page
+   during SNP_LAUNCH_UPDATE, and noticeable later if the guest owner
+   implements their own checks of the CPUID values.
+
+It is important to note that this last assurance is only useful if the kernel
+has taken care to make use of the SEV-SNP CPUID throughout all stages of boot.
+Otherwise, guest owner attestation provides no assurance that the kernel wasn't
+fed incorrect values at some point during boot.
+
+
+Reference
+---------
+
+SEV-SNP and GHCB specification: developer.amd.com/sev
+
+The driver is based on SEV-SNP firmware spec 0.9 and GHCB spec version 2.0.
--- a/Documentation/virt/index.rst
+++ b/Documentation/virt/index.rst
@ -13,6 +13,7 @@ Linux Virtualization Support
   guest-halt-polling
   ne_overview
   acrn/index
+   coco/sev-guest

 .. only:: html and subproject

--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@ -5713,6 +5713,8 @@ affect the device's behavior. Current defined flags::
  #define KVM_RUN_X86_SMM     (1 << 0)
  /* x86, set if bus lock detected in VM */
  #define KVM_RUN_BUS_LOCK    (1 << 1)
+  /* arm64, set for KVM_EXIT_DEBUG */
+  #define KVM_DEBUG_ARCH_HSR_HIGH_VALID  (1 << 0)

 ::

--- a/Documentation/x86/cpuinfo.rst
+++ b/Documentation/x86/cpuinfo.rst
@ -140,9 +140,8 @@ from #define X86_FEATURE_UMIP (16*32 + 2).

 In addition, there exists a variety of custom command-line parameters that
 disable specific features. The list of parameters includes, but is not limited
-to, nofsgsbase, nosmap, and nosmep. 5-level paging can also be disabled using
-"no5lvl". SMAP and SMEP are disabled with the aforementioned parameters,
-respectively.
+to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using
+"no5lvl".

 e: The feature was known to be non-functional.
 ----------------------------------------------
--- a/Documentation/x86/ifs.rst
+++ b/Documentation/x86/ifs.rst
@ -0,0 +1,2 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. kernel-doc:: drivers/platform/x86/intel/ifs/ifs.h
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@ -26,6 +26,7 @@ x86-specific Documentation
   intel_txt
   amd-memory-encryption
   amd_hsmp
+   tdx
   pti
   mds
   microcode
@ -35,6 +36,7 @@ x86-specific Documentation
   usb-legacy-support
   i386/index
   x86_64/index
+   ifs
   sva
   sgx
   features
--- a/Documentation/x86/tdx.rst
+++ b/Documentation/x86/tdx.rst
@ -0,0 +1,218 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Intel Trust Domain Extensions (TDX)
+=====================================
+
+Intel's Trust Domain Extensions (TDX) protect confidential guest VMs from
+the host and physical attacks by isolating the guest register state and by
+encrypting the guest memory. In TDX, a special module running in a special
+mode sits between the host and the guest and manages the guest/host
+separation.
+
+Since the host cannot directly access guest registers or memory, much
+normal functionality of a hypervisor must be moved into the guest. This is
+implemented using a Virtualization Exception (#VE) that is handled by the
+guest kernel. A #VE is handled entirely inside the guest kernel, but some
+require the hypervisor to be consulted.
+
+TDX includes new hypercall-like mechanisms for communicating from the
+guest to the hypervisor or the TDX module.
+
+New TDX Exceptions
+==================
+
+TDX guests behave differently from bare-metal and traditional VMX guests.
+In TDX guests, otherwise normal instructions or memory accesses can cause
+#VE or #GP exceptions.
+
+Instructions marked with an '*' conditionally cause exceptions.  The
+details for these instructions are discussed below.
+
+Instruction-based #VE
+---------------------
+
+- Port I/O (INS, OUTS, IN, OUT)
+- HLT
+- MONITOR, MWAIT
+- WBINVD, INVD
+- VMCALL
+- RDMSR*,WRMSR*
+- CPUID*
+
+Instruction-based #GP
+---------------------
+
+- All VMX instructions: INVEPT, INVVPID, VMCLEAR, VMFUNC, VMLAUNCH,
+  VMPTRLD, VMPTRST, VMREAD, VMRESUME, VMWRITE, VMXOFF, VMXON
+- ENCLS, ENCLU
+- GETSEC
+- RSM
+- ENQCMD
+- RDMSR*,WRMSR*
+
+RDMSR/WRMSR Behavior
+--------------------
+
+MSR access behavior falls into three categories:
+
+- #GP generated
+- #VE generated
+- "Just works"
+
+In general, the #GP MSRs should not be used in guests.  Their use likely
+indicates a bug in the guest.  The guest may try to handle the #GP with a
+hypercall but it is unlikely to succeed.
+
+The #VE MSRs are typically able to be handled by the hypervisor.  Guests
+can make a hypercall to the hypervisor to handle the #VE.
+
+The "just works" MSRs do not need any special guest handling.  They might
+be implemented by directly passing through the MSR to the hardware or by
+trapping and handling in the TDX module.  Other than possibly being slow,
+these MSRs appear to function just as they would on bare metal.
+
+CPUID Behavior
+--------------
+
+For some CPUID leaves and sub-leaves, the virtualized bit fields of CPUID
+return values (in guest EAX/EBX/ECX/EDX) are configurable by the
+hypervisor. For such cases, the Intel TDX module architecture defines two
+virtualization types:
+
+- Bit fields for which the hypervisor controls the value seen by the guest
+  TD.
+
+- Bit fields for which the hypervisor configures the value such that the
+  guest TD either sees their native value or a value of 0.  For these bit
+  fields, the hypervisor can mask off the native values, but it can not
+  turn *on* values.
+
+A #VE is generated for CPUID leaves and sub-leaves that the TDX module does
+not know how to handle. The guest kernel may ask the hypervisor for the
+value with a hypercall.
+
+#VE on Memory Accesses
+======================
+
+There are essentially two classes of TDX memory: private and shared.
+Private memory receives full TDX protections.  Its content is protected
+against access from the hypervisor.  Shared memory is expected to be
+shared between guest and hypervisor and does not receive full TDX
+protections.
+
+A TD guest is in control of whether its memory accesses are treated as
+private or shared.  It selects the behavior with a bit in its page table
+entries.  This helps ensure that a guest does not place sensitive
+information in shared memory, exposing it to the untrusted hypervisor.
+
+#VE on Shared Memory
+--------------------
+
+Access to shared mappings can cause a #VE.  The hypervisor ultimately
+controls whether a shared memory access causes a #VE, so the guest must be
+careful to only reference shared pages it can safely handle a #VE.  For
+instance, the guest should be careful not to access shared memory in the
+#VE handler before it reads the #VE info structure (TDG.VP.VEINFO.GET).
+
+Shared mapping content is entirely controlled by the hypervisor. The guest
+should only use shared mappings for communicating with the hypervisor.
+Shared mappings must never be used for sensitive memory content like kernel
+stacks.  A good rule of thumb is that hypervisor-shared memory should be
+treated the same as memory mapped to userspace.  Both the hypervisor and
+userspace are completely untrusted.
+
+MMIO for virtual devices is implemented as shared memory.  The guest must
+be careful not to access device MMIO regions unless it is also prepared to
+handle a #VE.
+
+#VE on Private Pages
+--------------------
+
+An access to private mappings can also cause a #VE.  Since all kernel
+memory is also private memory, the kernel might theoretically need to
+handle a #VE on arbitrary kernel memory accesses.  This is not feasible, so
+TDX guests ensure that all guest memory has been "accepted" before memory
+is used by the kernel.
+
+A modest amount of memory (typically 512M) is pre-accepted by the firmware
+before the kernel runs to ensure that the kernel can start up without
+being subjected to a #VE.
+
+The hypervisor is permitted to unilaterally move accepted pages to a
+"blocked" state. However, if it does this, page access will not generate a
+#VE.  It will, instead, cause a "TD Exit" where the hypervisor is required
+to handle the exception.
+
+Linux #VE handler
+=================
+
+Just like page faults or #GP's, #VE exceptions can be either handled or be
+fatal.  Typically, an unhandled userspace #VE results in a SIGSEGV.
+An unhandled kernel #VE results in an oops.
+
+Handling nested exceptions on x86 is typically nasty business.  A #VE
+could be interrupted by an NMI which triggers another #VE and hilarity
+ensues.  The TDX #VE architecture anticipated this scenario and includes a
+feature to make it slightly less nasty.
+
+During #VE handling, the TDX module ensures that all interrupts (including
+NMIs) are blocked.  The block remains in place until the guest makes a
+TDG.VP.VEINFO.GET TDCALL.  This allows the guest to control when interrupts
+or a new #VE can be delivered.
+
+However, the guest kernel must still be careful to avoid potential
+#VE-triggering actions (discussed above) while this block is in place.
+While the block is in place, any #VE is elevated to a double fault (#DF)
+which is not recoverable.
+
+MMIO handling
+=============
+
+In non-TDX VMs, MMIO is usually implemented by giving a guest access to a
+mapping which will cause a VMEXIT on access, and then the hypervisor
+emulates the access.  That is not possible in TDX guests because VMEXIT
+will expose the register state to the host. TDX guests don't trust the host
+and can't have their state exposed to the host.
+
+In TDX, MMIO regions typically trigger a #VE exception in the guest.  The
+guest #VE handler then emulates the MMIO instruction inside the guest and
+converts it into a controlled TDCALL to the host, rather than exposing
+guest state to the host.
+
+MMIO addresses on x86 are just special physical addresses. They can
+theoretically be accessed with any instruction that accesses memory.
+However, the kernel instruction decoding method is limited. It is only
+designed to decode instructions like those generated by io.h macros.
+
+MMIO access via other means (like structure overlays) may result in an
+oops.
+
+Shared Memory Conversions
+=========================
+
+All TDX guest memory starts out as private at boot.  This memory can not
+be accessed by the hypervisor.  However, some kernel users like device
+drivers might have a need to share data with the hypervisor.  To do this,
+memory must be converted between shared and private.  This can be
+accomplished using some existing memory encryption helpers:
+
+ * set_memory_decrypted() converts a range of pages to shared.
+ * set_memory_encrypted() converts memory back to private.
+
+Device drivers are the primary user of shared memory, but there's no need
+to touch every driver. DMA buffers and ioremap() do the conversions
+automatically.
+
+TDX uses SWIOTLB for most DMA allocations. The SWIOTLB buffer is
+converted to shared on boot.
+
+For coherent DMA allocation, the DMA buffer gets converted on the
+allocation. Check force_dma_unencrypted() for details.
+
+References
+==========
+
+TDX reference material is collected here:
+
+https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html
--- a/Documentation/x86/x86_64/boot-options.rst
+++ b/Documentation/x86/x86_64/boot-options.rst
@ -157,15 +157,6 @@ Rebooting
     newer BIOS, or newer board) using this option will ignore the built-in
     quirk table, and use the generic default reboot actions.

-Non Executable Mappings
-=======================
-
-  noexec=on|off
-    on
-      Enable(default)
-    off
-      Disable
-
 NUMA
 ====

@ -310,3 +301,17 @@ Miscellaneous
    Do not use GB pages for kernel direct mappings.
  gbpages
    Use GB pages for kernel direct mappings.
+
+
+AMD SEV (Secure Encrypted Virtualization)
+=========================================
+Options relating to AMD SEV, specified via the following format:
+
+::
+
+   sev=option1[,option2]
+
+The available options are:
+
+   debug
+     Enable debug messages.
--- a/Documentation/x86/zero-page.rst
+++ b/Documentation/x86/zero-page.rst
@ -19,6 +19,7 @@ Offset/Size	Proto	Name			Meaning
 058/008		ALL	tboot_addr      	Physical address of tboot shared page
 060/010		ALL	ist_info		Intel SpeedStep (IST) BIOS support information
 						(struct ist_info)
+070/008		ALL	acpi_rsdp_addr		Physical address of ACPI RSDP table
 080/010		ALL	hd0_info		hd0 disk parameter, OBSOLETE!!
 090/010		ALL	hd1_info		hd1 disk parameter, OBSOLETE!!
 0A0/010		ALL	sys_desc_table		System description table (struct sys_desc_table),
@ -27,6 +28,7 @@ Offset/Size	Proto	Name			Meaning
 0C0/004		ALL	ext_ramdisk_image	ramdisk_image high 32bits
 0C4/004		ALL	ext_ramdisk_size	ramdisk_size high 32bits
 0C8/004		ALL	ext_cmd_line_ptr	cmd_line_ptr high 32bits
+13C/004		ALL	cc_blob_address		Physical address of Confidential Computing blob
 140/080		ALL	edid_info		Video mode setup (struct edid_info)
 1C0/020		ALL	efi_info		EFI 32 information (struct efi_info)
 1E0/004		ALL	alt_mem_k		Alternative mem check, in KB
--- a/10
+++ b/10
@ -7385,7 +7385,6 @@ L:	linux-mm@kvack.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
 F:	arch/alpha/kernel/binfmt_loader.c
-F:	arch/x86/ia32/ia32_aout.c
 F:	fs/*binfmt_*.c
 F:	fs/exec.c
 F:	include/linux/binfmts.h
@ -9871,6 +9870,14 @@ B:	https://bugzilla.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git
 F:	drivers/idle/intel_idle.c

+INTEL IN FIELD SCAN (IFS) DEVICE
+M:	Jithu Joseph <jithu.joseph@intel.com>
+R:	Ashok Raj <ashok.raj@intel.com>
+R:	Tony Luck <tony.luck@intel.com>
+S:	Maintained
+F:	drivers/platform/x86/intel/ifs
+F:	include/trace/events/intel_ifs.h
+
 INTEL INTEGRATED SENSOR HUB DRIVER
 M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 M:	Jiri Kosina <jikos@kernel.org>
@ -17524,6 +17531,7 @@ R:	Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
 R:	Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
 R:	Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
 R:	Daniel Bristot de Oliveira <bristot@redhat.com> (SCHED_DEADLINE)
+R:	Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core
--- a/3
+++ b/3
@ -1037,6 +1037,7 @@ include-$(CONFIG_KASAN)		+= scripts/Makefile.kasan
 include-$(CONFIG_KCSAN)		+= scripts/Makefile.kcsan
 include-$(CONFIG_UBSAN)		+= scripts/Makefile.ubsan
 include-$(CONFIG_KCOV)		+= scripts/Makefile.kcov
+include-$(CONFIG_RANDSTRUCT)	+= scripts/Makefile.randstruct
 include-$(CONFIG_GCC_PLUGINS)	+= scripts/Makefile.gcc-plugins

 include $(addprefix $(srctree)/, $(include-y))
@ -1339,7 +1340,7 @@ install: sub_make_done :=
 # ---------------------------------------------------------------------------
 # Tools

-ifdef CONFIG_STACK_VALIDATION
+ifdef CONFIG_OBJTOOL
 prepare: tools/objtool
 endif

--- a/arch/Kconfig
+++ b/arch/Kconfig
@ -24,6 +24,13 @@ config KEXEC_ELF
 config HAVE_IMA_KEXEC
 	bool

+config ARCH_HAS_SUBPAGE_FAULTS
+	bool
+	help
+	  Select if the architecture can check permissions at sub-page
+	  granularity (e.g. arm64 MTE). The probe_user_*() functions
+	  must be implemented.
+
 config HOTPLUG_SMT
 	bool

@ -35,6 +42,7 @@ config KPROBES
 	depends on MODULES
 	depends on HAVE_KPROBES
 	select KALLSYMS
+	select TASKS_RCU if PREEMPTION
 	help
 	  Kprobes allows you to trap at almost any kernel address and
 	  execute a callback function.  register_kprobe() establishes
@ -46,6 +54,7 @@ config JUMP_LABEL
 	bool "Optimize very unlikely/likely branches"
 	depends on HAVE_ARCH_JUMP_LABEL
 	depends on CC_HAS_ASM_GOTO
+	select OBJTOOL if HAVE_JUMP_LABEL_HACK
 	help
 	 This option enables a transparent branch optimization that
 	 makes certain almost-always-true or almost-always-false branch
@ -723,10 +732,7 @@ config ARCH_SUPPORTS_CFI_CLANG
 config CFI_CLANG
 	bool "Use Clang's Control Flow Integrity (CFI)"
 	depends on LTO_CLANG && ARCH_SUPPORTS_CFI_CLANG
-	# Clang >= 12:
-	# - https://bugs.llvm.org/show_bug.cgi?id=46258
-	# - https://bugs.llvm.org/show_bug.cgi?id=47479
-	depends on CLANG_VERSION >= 120000
+	depends on CLANG_VERSION >= 140000
 	select KALLSYMS
 	help
 	  This option enables Clang’s forward-edge Control Flow Integrity
@ -1026,11 +1032,23 @@ config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
 	depends on MMU
 	select ARCH_HAS_ELF_RANDOMIZE

+config HAVE_OBJTOOL
+	bool
+
+config HAVE_JUMP_LABEL_HACK
+	bool
+
+config HAVE_NOINSTR_HACK
+	bool
+
+config HAVE_NOINSTR_VALIDATION
+	bool
+
 config HAVE_STACK_VALIDATION
 	bool
 	help
-	  Architecture supports the 'objtool check' host tool command, which
-	  performs compile-time stack metadata validation.
+	  Architecture supports objtool compile-time frame pointer rule
+	  validation.

 config HAVE_RELIABLE_STACKTRACE
 	bool
@ -1300,6 +1318,7 @@ config HAVE_STATIC_CALL
 config HAVE_STATIC_CALL_INLINE
 	bool
 	depends on HAVE_STATIC_CALL
+	select OBJTOOL

 config HAVE_PREEMPT_DYNAMIC
 	bool
--- a/arch/alpha/include/asm/timex.h
+++ b/arch/alpha/include/asm/timex.h
@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void)
 	__asm__ __volatile__ ("rpcc %0" : "=r"(ret));
 	return ret;
 }
+#define get_cycles get_cycles

 #endif
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@ -972,6 +972,17 @@ config ARM_ERRATA_764369
 	  relevant cache maintenance functions and sets a specific bit
 	  in the diagnostic control register of the SCU.

+config ARM_ERRATA_764319
+	bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 764319 Cortex A-9 erratum.
+	  CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an
+	  unexpected Undefined Instruction exception when the DBGSWENABLE
+	  external pin is set to 0, even when the CP14 accesses are performed
+	  from a privileged mode. This work around catches the exception in a
+	  way the kernel does not stop execution.
+
 config ARM_ERRATA_775420
       bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
       depends on CPU_V7
--- a/arch/arm/configs/lpc18xx_defconfig
+++ b/arch/arm/configs/lpc18xx_defconfig
@ -30,7 +30,6 @@ CONFIG_ARM_APPENDED_DTB=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 # CONFIG_COREDUMP is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
--- a/arch/arm/configs/mps2_defconfig
+++ b/arch/arm/configs/mps2_defconfig
@ -23,7 +23,6 @@ CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_BINFMT_FLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 # CONFIG_COREDUMP is not set
 # CONFIG_SUSPEND is not set
 CONFIG_NET=y
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@ -28,7 +28,6 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_XIP_KERNEL=y
 CONFIG_XIP_PHYS_ADDR=0x08008000
 CONFIG_BINFMT_FLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 # CONFIG_COREDUMP is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
--- a/arch/arm/configs/vf610m4_defconfig
+++ b/arch/arm/configs/vf610m4_defconfig
@ -18,7 +18,6 @@ CONFIG_XIP_KERNEL=y
 CONFIG_XIP_PHYS_ADDR=0x0f000080
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 # CONFIG_SUSPEND is not set
 # CONFIG_UEVENT_HELPER is not set
 # CONFIG_STANDALONE is not set
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void)		\
 	return read_sysreg(a32); 		\
 }						\

+CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1)
 CPUIF_MAP(ICC_PMR, ICC_PMR_EL1)
 CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1)
 CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1)
@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1)

 /* Low-level accessors */

-static inline void gic_write_eoir(u32 irq)
-{
-	write_sysreg(irq, ICC_EOIR1);
-	isb();
-}
-
 static inline void gic_write_dir(u32 val)
 {
 	write_sysreg(val, ICC_DIR);
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@ -666,12 +666,11 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	__adldst_l	str, \src, \sym, \tmp, \cond
 	.endm

-	.macro		__ldst_va, op, reg, tmp, sym, cond
+	.macro		__ldst_va, op, reg, tmp, sym, cond, offset
 #if __LINUX_ARM_ARCH__ >= 7 || \
    !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
 	mov_l		\tmp, \sym, \cond
-	\op\cond	\reg, [\tmp]
 #else
 	/*
 	 * Avoid a literal load, by emitting a sequence of ADD/LDR instructions
@ -683,24 +682,29 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	.reloc		.L0_\@, R_ARM_ALU_PC_G0_NC, \sym
 	.reloc		.L1_\@, R_ARM_ALU_PC_G1_NC, \sym
 	.reloc		.L2_\@, R_ARM_LDR_PC_G2, \sym
-.L0_\@: sub\cond	\tmp, pc, #8
-.L1_\@: sub\cond	\tmp, \tmp, #4
-.L2_\@: \op\cond	\reg, [\tmp, #0]
+.L0_\@: sub\cond	\tmp, pc, #8 - \offset
+.L1_\@: sub\cond	\tmp, \tmp, #4 - \offset
+.L2_\@:
 #endif
+	\op\cond	\reg, [\tmp, #\offset]
 	.endm

 	/*
 	 * ldr_va - load a 32-bit word from the virtual address of \sym
 	 */
-	.macro		ldr_va, rd:req, sym:req, cond
-	__ldst_va	ldr, \rd, \rd, \sym, \cond
+	.macro		ldr_va, rd:req, sym:req, cond, tmp, offset=0
+	.ifnb		\tmp
+	__ldst_va	ldr, \rd, \tmp, \sym, \cond, \offset
+	.else
+	__ldst_va	ldr, \rd, \rd, \sym, \cond, \offset
+	.endif
 	.endm

 	/*
 	 * str_va - store a 32-bit word to the virtual address of \sym
 	 */
 	.macro		str_va, rn:req, sym:req, tmp:req, cond
-	__ldst_va	str, \rn, \tmp, \sym, \cond
+	__ldst_va	str, \rn, \tmp, \sym, \cond, 0
 	.endm

 	/*
@ -727,9 +731,11 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	 *		  are permitted to overlap with 'rd' if != sp
 	 */
 	.macro		ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
-#if __LINUX_ARM_ARCH__ >= 7 || \
-    !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
-    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
+#ifndef CONFIG_SMP
+	ldr_va		\rd, \sym, tmp=\t1
+#elif __LINUX_ARM_ARCH__ >= 7 || \
+      !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
+      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
 	this_cpu_offset	\t1
 	mov_l		\t2, \sym
 	ldr		\rd, [\t1, \t2]
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@ -3,20 +3,10 @@
 #define _ASM_ARM_MODULE_H

 #include <asm-generic/module.h>
-
-struct unwind_table;
+#include <asm/unwind.h>

 #ifdef CONFIG_ARM_UNWIND
-enum {
-	ARM_SEC_INIT,
-	ARM_SEC_DEVINIT,
-	ARM_SEC_CORE,
-	ARM_SEC_EXIT,
-	ARM_SEC_DEVEXIT,
-	ARM_SEC_HOT,
-	ARM_SEC_UNLIKELY,
-	ARM_SEC_MAX,
-};
+#define ELF_SECTION_UNWIND 0x70000001
 #endif

 #define PLT_ENT_STRIDE		L1_CACHE_BYTES
@ -36,7 +26,8 @@ struct mod_plt_sec {

 struct mod_arch_specific {
 #ifdef CONFIG_ARM_UNWIND
-	struct unwind_table *unwind[ARM_SEC_MAX];
+	struct list_head unwind_list;
+	struct unwind_table *init_table;
 #endif
 #ifdef CONFIG_ARM_MODULE_PLTS
 	struct mod_plt_sec	core;
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@ -11,5 +11,6 @@

 typedef unsigned long cycles_t;
 #define get_cycles()	({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())

 #endif
--- a/arch/arm/include/asm/unwind.h
+++ b/arch/arm/include/asm/unwind.h
@ -24,6 +24,7 @@ struct unwind_idx {

 struct unwind_table {
 	struct list_head list;
+	struct list_head mod_list;
 	const struct unwind_idx *start;
 	const struct unwind_idx *origin;
 	const struct unwind_idx *stop;
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@ -61,9 +61,8 @@
 	.macro	pabt_helper
 	@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
 #ifdef MULTI_PABORT
-	ldr	ip, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [ip, #PROCESSOR_PABT_FUNC]
+	ldr_va	ip, processor, offset=PROCESSOR_PABT_FUNC
+	bl_r	ip
 #else
 	bl	CPU_PABORT_HANDLER
 #endif
@ -82,9 +81,8 @@
 	@ the fault status register in r1.  r9 must be preserved.
 	@
 #ifdef MULTI_DABORT
-	ldr	ip, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [ip, #PROCESSOR_DABT_FUNC]
+	ldr_va	ip, processor, offset=PROCESSOR_DABT_FUNC
+	bl_r	ip
 #else
 	bl	CPU_DABORT_HANDLER
 #endif
@ -302,16 +300,6 @@ __fiq_svc:
 UNWIND(.fnend		)
 ENDPROC(__fiq_svc)

-	.align	5
-.LCcralign:
-	.word	cr_alignment
-#ifdef MULTI_DABORT
-.LCprocfns:
-	.word	processor
-#endif
-.LCfp:
-	.word	fp_enter
-
 /*
 * Abort mode handlers
 */
@ -370,7 +358,7 @@ ENDPROC(__fiq_abt)
 THUMB(	stmia	sp, {r0 - r12}	)

 ATRAP(	mrc	p15, 0, r7, c1, c0, 0)
- ATRAP(	ldr	r8, .LCcralign)
+ ATRAP(	ldr_va	r8, cr_alignment)

 	ldmia	r0, {r3 - r5}
 	add	r0, sp, #S_PC		@ here for interlock avoidance
@ -379,8 +367,6 @@ ENDPROC(__fiq_abt)
 	str	r3, [sp]		@ save the "real" r0 copied
 					@ from the exception stack

- ATRAP(	ldr	r8, [r8, #0])
-
 	@
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@
@ -505,9 +491,7 @@ __und_usr_thumb:
 */
 #if __LINUX_ARM_ARCH__ < 7
 /* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-#define NEED_CPU_ARCHITECTURE
-	ldr	r5, .LCcpu_architecture
-	ldr	r5, [r5]
+	ldr_va	r5, cpu_architecture
 	cmp	r5, #CPU_ARCH_ARMv7
 	blo	__und_usr_fault_16		@ 16bit undefined instruction
 /*
@ -654,12 +638,6 @@ call_fpe:
 	ret.w	lr				@ CP#14 (Debug)
 	ret.w	lr				@ CP#15 (Control)

-#ifdef NEED_CPU_ARCHITECTURE
-	.align	2
-.LCcpu_architecture:
-	.word	__cpu_architecture
-#endif
-
 #ifdef CONFIG_NEON
 	.align	6

@ -685,9 +663,8 @@ call_fpe:
 #endif

 do_fpe:
-	ldr	r4, .LCfp
 	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
-	ldr	pc, [r4]			@ Call FP module USR entry point
+	ldr_va	pc, fp_enter, tmp=r4		@ Call FP module USR entry point

 /*
 * The FP module is called with these registers set:
@ -1101,6 +1078,12 @@ __kuser_helper_end:
 */
 	.macro	vector_stub, name, mode, correction=0
 	.align	5
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+vector_bhb_bpiall_\name:
+	mcr	p15, 0, r0, c7, c5, 6	@ BPIALL
+	@ isb not needed due to "movs pc, lr" in the vector stub
+	@ which gives a "context synchronisation".
+#endif

 vector_\name:
 	.if \correction
@ -1111,7 +1094,8 @@ vector_\name:
 	stmia	sp, {r0, lr}		@ save r0, lr

 	@ Save spsr_<exception> (parent CPSR)
-2:	mrs	lr, spsr
+.Lvec_\name:
+	mrs	lr, spsr
 	str	lr, [sp, #8]		@ save spsr

 	@
@ -1148,25 +1132,11 @@ vector_bhb_loop8_\name:
 3:	W(b)	. + 4
 	subs	r0, r0, #1
 	bne	3b
-	dsb
-	isb
-	b	2b
-ENDPROC(vector_bhb_loop8_\name)
-
-vector_bhb_bpiall_\name:
-	.if \correction
-	sub	lr, lr, #\correction
-	.endif
-
-	@ Save r0, lr_<exception> (parent PC)
-	stmia	sp, {r0, lr}
-
-	@ bhb workaround
-	mcr	p15, 0, r0, c7, c5, 6	@ BPIALL
+	dsb	nsh
 	@ isb not needed due to "movs pc, lr" in the vector stub
 	@ which gives a "context synchronisation".
-	b	2b
-ENDPROC(vector_bhb_bpiall_\name)
+	b	.Lvec_\name
+ENDPROC(vector_bhb_loop8_\name)
 	.previous
 #endif

@ -1176,10 +1146,15 @@ ENDPROC(vector_bhb_bpiall_\name)
 	.endm

 	.section .stubs, "ax", %progbits
-	@ This must be the first word
+	@ These need to remain at the start of the section so that
+	@ they are in range of the 'SWI' entries in the vector tables
+	@ located 4k down.
+.L__vector_swi:
 	.word	vector_swi
 #ifdef CONFIG_HARDEN_BRANCH_HISTORY
+.L__vector_bhb_loop8_swi:
 	.word	vector_bhb_loop8_swi
+.L__vector_bhb_bpiall_swi:
 	.word	vector_bhb_bpiall_swi
 #endif

@ -1322,10 +1297,11 @@ vector_addrexcptn:
 	.globl	vector_fiq

 	.section .vectors, "ax", %progbits
-.L__vectors_start:
 	W(b)	vector_rst
 	W(b)	vector_und
-	W(ldr)	pc, .L__vectors_start + 0x1000
+ARM(	.reloc	., R_ARM_LDR_PC_G0, .L__vector_swi		)
+THUMB(	.reloc	., R_ARM_THM_PC12, .L__vector_swi		)
+	W(ldr)	pc, .
 	W(b)	vector_pabt
 	W(b)	vector_dabt
 	W(b)	vector_addrexcptn
@ -1334,10 +1310,11 @@ vector_addrexcptn:

 #ifdef CONFIG_HARDEN_BRANCH_HISTORY
 	.section .vectors.bhb.loop8, "ax", %progbits
-.L__vectors_bhb_loop8_start:
 	W(b)	vector_rst
 	W(b)	vector_bhb_loop8_und
-	W(ldr)	pc, .L__vectors_bhb_loop8_start + 0x1004
+ARM(	.reloc	., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi	)
+THUMB(	.reloc	., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi	)
+	W(ldr)	pc, .
 	W(b)	vector_bhb_loop8_pabt
 	W(b)	vector_bhb_loop8_dabt
 	W(b)	vector_addrexcptn
@ -1345,10 +1322,11 @@ vector_addrexcptn:
 	W(b)	vector_bhb_loop8_fiq

 	.section .vectors.bhb.bpiall, "ax", %progbits
-.L__vectors_bhb_bpiall_start:
 	W(b)	vector_rst
 	W(b)	vector_bhb_bpiall_und
-	W(ldr)	pc, .L__vectors_bhb_bpiall_start + 0x1008
+ARM(	.reloc	., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi	)
+THUMB(	.reloc	., R_ARM_THM_PC12, .L__vector_bhb_bpiall_swi	)
+	W(ldr)	pc, .
 	W(b)	vector_bhb_bpiall_pabt
 	W(b)	vector_bhb_bpiall_dabt
 	W(b)	vector_addrexcptn
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@ -164,7 +164,7 @@ ENTRY(vector_bhb_loop8_swi)
 1:	b	2f
 2:	subs	r8, r8, #1
 	bne	1b
-	dsb
+	dsb	nsh
 	isb
 	b	3f
 ENDPROC(vector_bhb_loop8_swi)
@ -198,7 +198,7 @@ ENTRY(vector_swi)
 #endif
 	reload_current r10, ip
 	zero_fp
-	alignment_trap r10, ip, __cr_alignment
+	alignment_trap r10, ip, cr_alignment
 	asm_trace_hardirqs_on save=0
 	enable_irq_notrace
 	ct_user_exit save=0
@ -328,14 +328,6 @@ __sys_trace_return:
 	bl	syscall_trace_exit
 	b	ret_slow_syscall

-	.align	5
-#ifdef CONFIG_ALIGNMENT_TRAP
-	.type	__cr_alignment, #object
-__cr_alignment:
-	.word	cr_alignment
-#endif
-	.ltorg
-
 	.macro	syscall_table_start, sym
 	.equ	__sys_nr, 0
 	.type	\sym, #object
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@ -48,8 +48,7 @@
 	.macro	alignment_trap, rtmp1, rtmp2, label
 #ifdef CONFIG_ALIGNMENT_TRAP
 	mrc	p15, 0, \rtmp2, c1, c0, 0
-	ldr	\rtmp1, \label
-	ldr	\rtmp1, [\rtmp1]
+	ldr_va	\rtmp1, \label
 	teq	\rtmp1, \rtmp2
 	mcrne	p15, 0, \rtmp1, c1, c0, 0
 #endif
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@ -941,6 +941,23 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 	return ret;
 }

+#ifdef CONFIG_ARM_ERRATA_764319
+static int oslsr_fault;
+
+static int debug_oslsr_trap(struct pt_regs *regs, unsigned int instr)
+{
+	oslsr_fault = 1;
+	instruction_pointer(regs) += 4;
+	return 0;
+}
+
+static struct undef_hook debug_oslsr_hook = {
+	.instr_mask  = 0xffffffff,
+	.instr_val = 0xee115e91,
+	.fn = debug_oslsr_trap,
+};
+#endif
+
 /*
 * One-time initialisation.
 */
@ -974,7 +991,16 @@ static bool core_has_os_save_restore(void)
 	case ARM_DEBUG_ARCH_V7_1:
 		return true;
 	case ARM_DEBUG_ARCH_V7_ECP14:
+#ifdef CONFIG_ARM_ERRATA_764319
+		oslsr_fault = 0;
+		register_undef_hook(&debug_oslsr_hook);
 		ARM_DBG_READ(c1, c1, 4, oslsr);
+		unregister_undef_hook(&debug_oslsr_hook);
+		if (oslsr_fault)
+			return false;
+#else
+		ARM_DBG_READ(c1, c1, 4, oslsr);
+#endif
 		if (oslsr & ARM_OSLSR_OSLM0)
 			return true;
 		fallthrough;
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@ -459,46 +459,40 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
 #ifdef CONFIG_ARM_UNWIND
 	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 	const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum;
-	struct mod_unwind_map maps[ARM_SEC_MAX];
-	int i;
+	struct list_head *unwind_list = &mod->arch.unwind_list;

-	memset(maps, 0, sizeof(maps));
+	INIT_LIST_HEAD(unwind_list);
+	mod->arch.init_table = NULL;

 	for (s = sechdrs; s < sechdrs_end; s++) {
 		const char *secname = secstrs + s->sh_name;
+		const char *txtname;
+		const Elf_Shdr *txt_sec;

-		if (!(s->sh_flags & SHF_ALLOC))
+		if (!(s->sh_flags & SHF_ALLOC) ||
+		    s->sh_type != ELF_SECTION_UNWIND)
 			continue;

-		if (strcmp(".ARM.exidx.init.text", secname) == 0)
-			maps[ARM_SEC_INIT].unw_sec = s;
-		else if (strcmp(".ARM.exidx", secname) == 0)
-			maps[ARM_SEC_CORE].unw_sec = s;
-		else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
-			maps[ARM_SEC_EXIT].unw_sec = s;
-		else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0)
-			maps[ARM_SEC_UNLIKELY].unw_sec = s;
-		else if (strcmp(".ARM.exidx.text.hot", secname) == 0)
-			maps[ARM_SEC_HOT].unw_sec = s;
-		else if (strcmp(".init.text", secname) == 0)
-			maps[ARM_SEC_INIT].txt_sec = s;
-		else if (strcmp(".text", secname) == 0)
-			maps[ARM_SEC_CORE].txt_sec = s;
-		else if (strcmp(".exit.text", secname) == 0)
-			maps[ARM_SEC_EXIT].txt_sec = s;
-		else if (strcmp(".text.unlikely", secname) == 0)
-			maps[ARM_SEC_UNLIKELY].txt_sec = s;
-		else if (strcmp(".text.hot", secname) == 0)
-			maps[ARM_SEC_HOT].txt_sec = s;
-	}
+		if (!strcmp(".ARM.exidx", secname))
+			txtname = ".text";
+		else
+			txtname = secname + strlen(".ARM.exidx");
+		txt_sec = find_mod_section(hdr, sechdrs, txtname);

-	for (i = 0; i < ARM_SEC_MAX; i++)
-		if (maps[i].unw_sec && maps[i].txt_sec)
-			mod->arch.unwind[i] =
-				unwind_table_add(maps[i].unw_sec->sh_addr,
-					         maps[i].unw_sec->sh_size,
-					         maps[i].txt_sec->sh_addr,
-					         maps[i].txt_sec->sh_size);
+		if (txt_sec) {
+			struct unwind_table *table =
+				unwind_table_add(s->sh_addr,
+						s->sh_size,
+						txt_sec->sh_addr,
+						txt_sec->sh_size);
+
+			list_add(&table->mod_list, unwind_list);
+
+			/* save init table for module_arch_freeing_init */
+			if (strcmp(".ARM.exidx.init.text", secname) == 0)
+				mod->arch.init_table = table;
+		}
+	}
 #endif
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
 	s = find_mod_section(hdr, sechdrs, ".pv_table");
@ -519,19 +513,27 @@ void
 module_arch_cleanup(struct module *mod)
 {
 #ifdef CONFIG_ARM_UNWIND
-	int i;
+	struct unwind_table *tmp;
+	struct unwind_table *n;

-	for (i = 0; i < ARM_SEC_MAX; i++) {
-		unwind_table_del(mod->arch.unwind[i]);
-		mod->arch.unwind[i] = NULL;
+	list_for_each_entry_safe(tmp, n,
+			&mod->arch.unwind_list, mod_list) {
+		list_del(&tmp->mod_list);
+		unwind_table_del(tmp);
 	}
+	mod->arch.init_table = NULL;
 #endif
 }

 void __weak module_arch_freeing_init(struct module *mod)
 {
 #ifdef CONFIG_ARM_UNWIND
-	unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]);
-	mod->arch.unwind[ARM_SEC_INIT] = NULL;
+	struct unwind_table *init = mod->arch.init_table;
+
+	if (init) {
+		mod->arch.init_table = NULL;
+		list_del(&init->mod_list);
+		unwind_table_del(init);
+	}
 #endif
 }
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@ -708,6 +708,7 @@ static_assert(offsetof(siginfo_t, si_upper)	== 0x18);
 static_assert(offsetof(siginfo_t, si_pkey)	== 0x14);
 static_assert(offsetof(siginfo_t, si_perf_data)	== 0x10);
 static_assert(offsetof(siginfo_t, si_perf_type)	== 0x14);
+static_assert(offsetof(siginfo_t, si_perf_flags) == 0x18);
 static_assert(offsetof(siginfo_t, si_band)	== 0x0c);
 static_assert(offsetof(siginfo_t, si_fd)	== 0x10);
 static_assert(offsetof(siginfo_t, si_call_addr)	== 0x0c);
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@ -4,10 +4,7 @@ menuconfig ARCH_SUNXI
 	depends on ARCH_MULTI_V5 || ARCH_MULTI_V7
 	select ARCH_HAS_RESET_CONTROLLER
 	select CLKSRC_MMIO
-	select GENERIC_IRQ_CHIP
 	select GPIOLIB
-	select IRQ_DOMAIN_HIERARCHY
-	select IRQ_FASTEOI_HIERARCHY_HANDLERS
 	select PINCTRL
 	select PM_OPP
 	select SUN4I_TIMER
@ -22,10 +19,12 @@ if ARCH_MULTI_V7
 config MACH_SUN4I
 	bool "Allwinner A10 (sun4i) SoCs support"
 	default ARCH_SUNXI
+	select SUN4I_INTC

 config MACH_SUN5I
 	bool "Allwinner A10s / A13 (sun5i) SoCs support"
 	default ARCH_SUNXI
+	select SUN4I_INTC
 	select SUN5I_HSTIMER

 config MACH_SUN6I
@ -34,6 +33,8 @@ config MACH_SUN6I
 	select ARM_GIC
 	select MFD_SUN6I_PRCM
 	select SUN5I_HSTIMER
+	select SUN6I_R_INTC
+	select SUNXI_NMI_INTC

 config MACH_SUN7I
 	bool "Allwinner A20 (sun7i) SoCs support"
@ -43,17 +44,21 @@ config MACH_SUN7I
 	select ARCH_SUPPORTS_BIG_ENDIAN
 	select HAVE_ARM_ARCH_TIMER
 	select SUN5I_HSTIMER
+	select SUNXI_NMI_INTC

 config MACH_SUN8I
 	bool "Allwinner sun8i Family SoCs support"
 	default ARCH_SUNXI
 	select ARM_GIC
 	select MFD_SUN6I_PRCM
+	select SUN6I_R_INTC
+	select SUNXI_NMI_INTC

 config MACH_SUN9I
 	bool "Allwinner (sun9i) SoCs support"
 	default ARCH_SUNXI
 	select ARM_GIC
+	select SUNXI_NMI_INTC

 config ARCH_SUNXI_MC_SMP
 	bool
@ -69,6 +74,7 @@ if ARCH_MULTI_V5
 config MACH_SUNIV
 	bool "Allwinner ARMv5 F-series (suniv) SoCs support"
 	default ARCH_SUNXI
+	select SUN4I_INTC
 	help
 	  Support for Allwinner suniv ARMv5 SoCs.
 	  (F1C100A, F1C100s, F1C200s, F1C500, F1C600)
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@ -28,7 +28,7 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 CFLAGS_REMOVE_vdso.o = -pg

 # Force -O2 to avoid libgcc dependencies
-CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(GCC_PLUGINS_CFLAGS)
+CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS)
 ifeq ($(c-gettimeofday-y),)
 CFLAGS_vgettimeofday.o = -O2
 else
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@ -262,31 +262,31 @@ config ARM64_CONT_PMD_SHIFT
 	default 4

 config ARCH_MMAP_RND_BITS_MIN
-       default 14 if ARM64_64K_PAGES
-       default 16 if ARM64_16K_PAGES
-       default 18
+	default 14 if ARM64_64K_PAGES
+	default 16 if ARM64_16K_PAGES
+	default 18

 # max bits determined by the following formula:
 #  VA_BITS - PAGE_SHIFT - 3
 config ARCH_MMAP_RND_BITS_MAX
-       default 19 if ARM64_VA_BITS=36
-       default 24 if ARM64_VA_BITS=39
-       default 27 if ARM64_VA_BITS=42
-       default 30 if ARM64_VA_BITS=47
-       default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
-       default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
-       default 33 if ARM64_VA_BITS=48
-       default 14 if ARM64_64K_PAGES
-       default 16 if ARM64_16K_PAGES
-       default 18
+	default 19 if ARM64_VA_BITS=36
+	default 24 if ARM64_VA_BITS=39
+	default 27 if ARM64_VA_BITS=42
+	default 30 if ARM64_VA_BITS=47
+	default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
+	default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
+	default 33 if ARM64_VA_BITS=48
+	default 14 if ARM64_64K_PAGES
+	default 16 if ARM64_16K_PAGES
+	default 18

 config ARCH_MMAP_RND_COMPAT_BITS_MIN
-       default 7 if ARM64_64K_PAGES
-       default 9 if ARM64_16K_PAGES
-       default 11
+	default 7 if ARM64_64K_PAGES
+	default 9 if ARM64_16K_PAGES
+	default 11

 config ARCH_MMAP_RND_COMPAT_BITS_MAX
-       default 16
+	default 16

 config NO_IOPORT_MAP
 	def_bool y if !PCI
@ -313,7 +313,7 @@ config GENERIC_HWEIGHT
 	def_bool y

 config GENERIC_CSUM
-        def_bool y
+	def_bool y

 config GENERIC_CALIBRATE_DELAY
 	def_bool y
@ -1046,8 +1046,7 @@ config SOCIONEXT_SYNQUACER_PREITS

 	  If unsure, say Y.

-endmenu
-
+endmenu # "ARM errata workarounds via the alternatives framework"

 choice
 	prompt "Page size"
@ -1575,9 +1574,9 @@ config SETEND_EMULATION
 	  be unexpected results in the applications.

 	  If unsure, say Y
-endif
+endif # ARMV8_DEPRECATED

-endif
+endif # COMPAT

 menu "ARMv8.1 architectural features"

@ -1602,15 +1601,15 @@ config ARM64_PAN
 	bool "Enable support for Privileged Access Never (PAN)"
 	default y
 	help
-	 Privileged Access Never (PAN; part of the ARMv8.1 Extensions)
-	 prevents the kernel or hypervisor from accessing user-space (EL0)
-	 memory directly.
+	  Privileged Access Never (PAN; part of the ARMv8.1 Extensions)
+	  prevents the kernel or hypervisor from accessing user-space (EL0)
+	  memory directly.

-	 Choosing this option will cause any unprotected (not using
-	 copy_to_user et al) memory access to fail with a permission fault.
+	  Choosing this option will cause any unprotected (not using
+	  copy_to_user et al) memory access to fail with a permission fault.

-	 The feature is detected at runtime, and will remain as a 'nop'
-	 instruction if the cpu does not implement the feature.
+	  The feature is detected at runtime, and will remain as a 'nop'
+	  instruction if the cpu does not implement the feature.

 config AS_HAS_LDAPR
 	def_bool $(as-instr,.arch_extension rcpc)
@ -1638,15 +1637,15 @@ config ARM64_USE_LSE_ATOMICS
 	  built with binutils >= 2.25 in order for the new instructions
 	  to be used.

-endmenu
+endmenu # "ARMv8.1 architectural features"

 menu "ARMv8.2 architectural features"

 config AS_HAS_ARMV8_2
-       def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
+	def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)

 config AS_HAS_SHA3
-       def_bool $(as-instr,.arch armv8.2-a+sha3)
+	def_bool $(as-instr,.arch armv8.2-a+sha3)

 config ARM64_PMEM
 	bool "Enable support for persistent memory"
@ -1690,7 +1689,7 @@ config ARM64_CNP
 	  at runtime, and does not affect PEs that do not implement
 	  this feature.

-endmenu
+endmenu # "ARMv8.2 architectural features"

 menu "ARMv8.3 architectural features"

@ -1753,7 +1752,7 @@ config AS_HAS_PAC
 config AS_HAS_CFI_NEGATE_RA_STATE
 	def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n)

-endmenu
+endmenu # "ARMv8.3 architectural features"

 menu "ARMv8.4 architectural features"

@ -1794,7 +1793,7 @@ config ARM64_TLB_RANGE
 	  The feature introduces new assembly instructions, and they were
 	  support when binutils >= 2.30.

-endmenu
+endmenu # "ARMv8.4 architectural features"

 menu "ARMv8.5 architectural features"

@ -1880,6 +1879,7 @@ config ARM64_MTE
 	depends on AS_HAS_LSE_ATOMICS
 	# Required for tag checking in the uaccess routines
 	depends on ARM64_PAN
+	select ARCH_HAS_SUBPAGE_FAULTS
 	select ARCH_USES_HIGH_VMA_FLAGS
 	help
 	  Memory Tagging (part of the ARMv8.5 Extensions) provides
@ -1901,7 +1901,7 @@ config ARM64_MTE

 	  Documentation/arm64/memory-tagging-extension.rst.

-endmenu
+endmenu # "ARMv8.5 architectural features"

 menu "ARMv8.7 architectural features"

@ -1910,12 +1910,12 @@ config ARM64_EPAN
 	default y
 	depends on ARM64_PAN
 	help
-	 Enhanced Privileged Access Never (EPAN) allows Privileged
-	 Access Never to be used with Execute-only mappings.
+	  Enhanced Privileged Access Never (EPAN) allows Privileged
+	  Access Never to be used with Execute-only mappings.

-	 The feature is detected at runtime, and will remain disabled
-	 if the cpu does not implement the feature.
-endmenu
+	  The feature is detected at runtime, and will remain disabled
+	  if the cpu does not implement the feature.
+endmenu # "ARMv8.7 architectural features"

 config ARM64_SVE
 	bool "ARM Scalable Vector Extension support"
@ -1948,6 +1948,17 @@ config ARM64_SVE
 	  booting the kernel.  If unsure and you are not observing these
 	  symptoms, you should assume that it is safe to say Y.

+config ARM64_SME
+	bool "ARM Scalable Matrix Extension support"
+	default y
+	depends on ARM64_SVE
+	help
+	  The Scalable Matrix Extension (SME) is an extension to the AArch64
+	  execution state which utilises a substantial subset of the SVE
+	  instruction set, together with the addition of new architectural
+	  register state capable of holding two dimensional matrix tiles to
+	  enable various matrix operations.
+
 config ARM64_MODULE_PLTS
 	bool "Use PLTs to allow module memory to spill over into vmalloc area"
 	depends on MODULES
@ -1991,7 +2002,7 @@ config ARM64_DEBUG_PRIORITY_MASKING
 	  the validity of ICC_PMR_EL1 when calling concerned functions.

 	  If unsure, say N
-endif
+endif # ARM64_PSEUDO_NMI

 config RELOCATABLE
 	bool "Build a relocatable kernel image" if EXPERT
@ -2050,7 +2061,19 @@ config STACKPROTECTOR_PER_TASK
 	def_bool y
 	depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG

-endmenu
+# The GPIO number here must be sorted by descending number. In case of
+# a multiplatform kernel, we just want the highest value required by the
+# selected platforms.
+config ARCH_NR_GPIO
+        int
+        default 2048 if ARCH_APPLE
+        default 0
+        help
+          Maximum number of GPIOs in the system.
+
+          If unsure, leave the default value.
+
+endmenu # "Kernel Features"

 menu "Boot options"

@ -2114,7 +2137,7 @@ config EFI
 	help
 	  This option provides support for runtime services provided
 	  by UEFI firmware (such as non-volatile variables, realtime
-          clock, and platform reset). A UEFI stub is also provided to
+	  clock, and platform reset). A UEFI stub is also provided to
 	  allow the kernel to be booted as an EFI application. This
 	  is only useful on systems that have UEFI firmware.

@ -2129,7 +2152,7 @@ config DMI
 	  However, even with this option, the resultant kernel should
 	  continue to boot on existing non-UEFI platforms.

-endmenu
+endmenu # "Boot options"

 config SYSVIPC_COMPAT
 	def_bool y
@ -2150,7 +2173,7 @@ config ARCH_HIBERNATION_HEADER
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y

-endmenu
+endmenu # "Power management options"

 menu "CPU Power Management"

@ -2158,7 +2181,7 @@ source "drivers/cpuidle/Kconfig"

 source "drivers/cpufreq/Kconfig"

-endmenu
+endmenu # "CPU Power Management"

 source "drivers/acpi/Kconfig"

@ -2166,4 +2189,4 @@ source "arch/arm64/kvm/Kconfig"

 if CRYPTO
 source "arch/arm64/crypto/Kconfig"
-endif
+endif # CRYPTO
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@ -11,12 +11,11 @@ config ARCH_ACTIONS
 config ARCH_SUNXI
 	bool "Allwinner sunxi 64-bit SoC Family"
 	select ARCH_HAS_RESET_CONTROLLER
-	select GENERIC_IRQ_CHIP
-	select IRQ_DOMAIN_HIERARCHY
-	select IRQ_FASTEOI_HIERARCHY_HANDLERS
 	select PINCTRL
 	select RESET_CONTROLLER
 	select SUN4I_TIMER
+	select SUN6I_R_INTC
+	select SUNXI_NMI_INTC
 	help
 	  This enables support for Allwinner sunxi based SoCs like the A64.

@ -253,6 +252,7 @@ config ARCH_INTEL_SOCFPGA

 config ARCH_SYNQUACER
 	bool "Socionext SynQuacer SoC Family"
+	select IRQ_FASTEOI_HIERARCHY_HANDLERS

 config ARCH_TEGRA
 	bool "NVIDIA Tegra SoC Family"
@ -325,4 +325,4 @@ config ARCH_ZYNQMP
 	help
 	  This enables support for Xilinx ZynqMP Family

-endmenu
+endmenu # "Platform selection"
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@ -7,3 +7,4 @@ generic-y += parport.h
 generic-y += user.h

 generated-y += cpucaps.h
+generated-y += sysreg-defs.h
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@ -26,12 +26,6 @@
 * sets the GP register's most significant bits to 0 with an explicit cast.
 */

-static inline void gic_write_eoir(u32 irq)
-{
-	write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
-	isb();
-}
-
 static __always_inline void gic_write_dir(u32 irq)
 {
 	write_sysreg_s(irq, SYS_ICC_DIR_EL1);
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@ -142,7 +142,7 @@ static inline bool __init __early_cpu_has_rndr(void)
 {
 	/* Open code as we run prior to the first call to cpufeature. */
 	unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1);
-	return (ftr >> ID_AA64ISAR0_RNDR_SHIFT) & 0xf;
+	return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
 }

 static inline bool __init __must_check
--- a/arch/arm64/include/asm/asm-bug.h
+++ b/arch/arm64/include/asm/asm-bug.h
@ -14,7 +14,7 @@
 	14472:	.string file;					\
 		.popsection;					\
 								\
-		.long 14472b - 14470b;				\
+		.long 14472b - .;				\
 		.short line;
 #else
 #define _BUGVERBOSE_LOCATION(file, line)
@ -25,7 +25,7 @@
 #define __BUG_ENTRY(flags) 				\
 		.pushsection __bug_table,"aw";		\
 		.align 2;				\
-	14470:	.long 14471f - 14470b;			\
+	14470:	.long 14471f - .;			\
 _BUGVERBOSE_LOCATION(__FILE__, __LINE__)		\
 		.short flags; 				\
 		.popsection;				\
--- a/arch/arm64/include/asm/compiler.h
+++ b/arch/arm64/include/asm/compiler.h
@ -23,20 +23,4 @@
 #define __builtin_return_address(val)					\
 	(void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val)))

-#ifdef CONFIG_CFI_CLANG
-/*
- * With CONFIG_CFI_CLANG, the compiler replaces function address
- * references with the address of the function's CFI jump table
- * entry. The function_nocfi macro always returns the address of the
- * actual function instead.
- */
-#define function_nocfi(x) ({						\
-	void *addr;							\
-	asm("adrp %0, " __stringify(x) "\n\t"				\
-	    "add  %0, %0, :lo12:" __stringify(x)			\
-	    : "=r" (addr));						\
-	addr;								\
-})
-#endif
-
 #endif /* __ASM_COMPILER_H */
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@ -58,11 +58,15 @@ struct cpuinfo_arm64 {
 	u64		reg_id_aa64pfr0;
 	u64		reg_id_aa64pfr1;
 	u64		reg_id_aa64zfr0;
+	u64		reg_id_aa64smfr0;

 	struct cpuinfo_32bit	aarch32;

 	/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
 	u64		reg_zcr;
+
+	/* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
+	u64		reg_smcr;
 };

 DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
 	return val > 0;
 }

+static inline bool id_aa64pfr1_sme(u64 pfr1)
+{
+	u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT);
+
+	return val > 0;
+}
+
 static inline bool id_aa64pfr1_mte(u64 pfr1)
 {
 	u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
@ -759,6 +766,23 @@ static __always_inline bool system_supports_sve(void)
 		cpus_have_const_cap(ARM64_SVE);
 }

+static __always_inline bool system_supports_sme(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_SME) &&
+		cpus_have_const_cap(ARM64_SME);
+}
+
+static __always_inline bool system_supports_fa64(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_SME) &&
+		cpus_have_const_cap(ARM64_SME_FA64);
+}
+
+static __always_inline bool system_supports_tpidr2(void)
+{
+	return system_supports_sme();
+}
+
 static __always_inline bool system_supports_cnp(void)
 {
 	return IS_ENABLED(CONFIG_ARM64_CNP) &&
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@ -36,7 +36,7 @@
 #define MIDR_VARIANT(midr)	\
 	(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
 #define MIDR_IMPLEMENTOR_SHIFT	24
-#define MIDR_IMPLEMENTOR_MASK	(0xff << MIDR_IMPLEMENTOR_SHIFT)
+#define MIDR_IMPLEMENTOR_MASK	(0xffU << MIDR_IMPLEMENTOR_SHIFT)
 #define MIDR_IMPLEMENTOR(midr)	\
 	(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)

--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@ -64,7 +64,7 @@ struct task_struct;

 struct step_hook {
 	struct list_head node;
-	int (*fn)(struct pt_regs *regs, unsigned int esr);
+	int (*fn)(struct pt_regs *regs, unsigned long esr);
 };

 void register_user_step_hook(struct step_hook *hook);
@ -75,7 +75,7 @@ void unregister_kernel_step_hook(struct step_hook *hook);

 struct break_hook {
 	struct list_head node;
-	int (*fn)(struct pt_regs *regs, unsigned int esr);
+	int (*fn)(struct pt_regs *regs, unsigned long esr);
 	u16 imm;
 	u16 mask; /* These bits are ignored when comparing with imm */
 };
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@ -143,6 +143,50 @@
 .Lskip_sve_\@:
 .endm

+/* SME register access and priority mapping */
+.macro __init_el2_nvhe_sme
+	mrs	x1, id_aa64pfr1_el1
+	ubfx	x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+	cbz	x1, .Lskip_sme_\@
+
+	bic	x0, x0, #CPTR_EL2_TSM		// Also disable SME traps
+	msr	cptr_el2, x0			// Disable copro. traps to EL2
+	isb
+
+	mrs	x1, sctlr_el2
+	orr	x1, x1, #SCTLR_ELx_ENTP2	// Disable TPIDR2 traps
+	msr	sctlr_el2, x1
+	isb
+
+	mov	x1, #0				// SMCR controls
+
+	mrs_s	x2, SYS_ID_AA64SMFR0_EL1
+	ubfx	x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM?
+	cbz	x2, .Lskip_sme_fa64_\@
+
+	orr	x1, x1, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64_\@:
+
+	orr	x1, x1, #SMCR_ELx_LEN_MASK	// Enable full SME vector
+	msr_s	SYS_SMCR_EL2, x1		// length for EL1.
+
+	mrs_s	x1, SYS_SMIDR_EL1		// Priority mapping supported?
+	ubfx    x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+	cbz     x1, .Lskip_sme_\@
+
+	msr_s	SYS_SMPRIMAP_EL2, xzr		// Make all priorities equal
+
+	mrs	x1, id_aa64mmfr1_el1		// HCRX_EL2 present?
+	ubfx	x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
+	cbz	x1, .Lskip_sme_\@
+
+	mrs_s	x1, SYS_HCRX_EL2
+	orr	x1, x1, #HCRX_EL2_SMPME_MASK	// Enable priority mapping
+	msr_s	SYS_HCRX_EL2, x1
+
+.Lskip_sme_\@:
+.endm
+
 /* Disable any fine grained traps */
 .macro __init_el2_fgt
 	mrs	x1, id_aa64mmfr0_el1
@ -153,15 +197,26 @@
 	mrs	x1, id_aa64dfr0_el1
 	ubfx	x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
 	cmp	x1, #3
-	b.lt	.Lset_fgt_\@
+	b.lt	.Lset_debug_fgt_\@
 	/* Disable PMSNEVFR_EL1 read and write traps */
 	orr	x0, x0, #(1 << 62)

-.Lset_fgt_\@:
+.Lset_debug_fgt_\@:
 	msr_s	SYS_HDFGRTR_EL2, x0
 	msr_s	SYS_HDFGWTR_EL2, x0
-	msr_s	SYS_HFGRTR_EL2, xzr
-	msr_s	SYS_HFGWTR_EL2, xzr
+
+	mov	x0, xzr
+	mrs	x1, id_aa64pfr1_el1
+	ubfx	x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+	cbz	x1, .Lset_fgt_\@
+
+	/* Disable nVHE traps of TPIDR2 and SMPRI */
+	orr	x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
+	orr	x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
+
+.Lset_fgt_\@:
+	msr_s	SYS_HFGRTR_EL2, x0
+	msr_s	SYS_HFGWTR_EL2, x0
 	msr_s	SYS_HFGITR_EL2, xzr

 	mrs	x1, id_aa64pfr0_el1		// AMU traps UNDEF without AMU
@ -196,6 +251,7 @@
 	__init_el2_nvhe_idregs
 	__init_el2_nvhe_cptr
 	__init_el2_nvhe_sve
+	__init_el2_nvhe_sme
 	__init_el2_fgt
 	__init_el2_nvhe_prepare_eret
 .endm
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@ -37,7 +37,8 @@
 #define ESR_ELx_EC_ERET		(0x1a)	/* EL2 only */
 /* Unallocated EC: 0x1B */
 #define ESR_ELx_EC_FPAC		(0x1C)	/* EL1 and above */
-/* Unallocated EC: 0x1D - 0x1E */
+#define ESR_ELx_EC_SME		(0x1D)
+/* Unallocated EC: 0x1E */
 #define ESR_ELx_EC_IMP_DEF	(0x1f)	/* EL3 only */
 #define ESR_ELx_EC_IABT_LOW	(0x20)
 #define ESR_ELx_EC_IABT_CUR	(0x21)
@ -75,6 +76,7 @@
 #define ESR_ELx_IL_SHIFT	(25)
 #define ESR_ELx_IL		(UL(1) << ESR_ELx_IL_SHIFT)
 #define ESR_ELx_ISS_MASK	(ESR_ELx_IL - 1)
+#define ESR_ELx_ISS(esr)	((esr) & ESR_ELx_ISS_MASK)

 /* ISS field definitions shared by different classes */
 #define ESR_ELx_WNR_SHIFT	(6)
@ -136,7 +138,7 @@
 #define ESR_ELx_WFx_ISS_TI	(UL(1) << 0)
 #define ESR_ELx_WFx_ISS_WFI	(UL(0) << 0)
 #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
-#define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
+#define ESR_ELx_xVC_IMM_MASK	((UL(1) << 16) - 1)

 #define DISR_EL1_IDS		(UL(1) << 24)
 /*
@ -327,17 +329,26 @@
 #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ	(ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\
 					 ESR_ELx_CP15_32_ISS_DIR_READ)

+/*
+ * ISS values for SME traps
+ */
+
+#define ESR_ELx_SME_ISS_SME_DISABLED	0
+#define ESR_ELx_SME_ISS_ILL		1
+#define ESR_ELx_SME_ISS_SM_DISABLED	2
+#define ESR_ELx_SME_ISS_ZA_DISABLED	3
+
 #ifndef __ASSEMBLY__
 #include <asm/types.h>

-static inline bool esr_is_data_abort(u32 esr)
+static inline bool esr_is_data_abort(unsigned long esr)
 {
-	const u32 ec = ESR_ELx_EC(esr);
+	const unsigned long ec = ESR_ELx_EC(esr);

 	return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
 }

-const char *esr_get_class_string(u32 esr);
+const char *esr_get_class_string(unsigned long esr);
 #endif /* __ASSEMBLY */

 #endif /* __ASM_ESR_H */
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@ -19,9 +19,9 @@
 #define __exception_irq_entry	__kprobes
 #endif

-static inline u32 disr_to_esr(u64 disr)
+static inline unsigned long disr_to_esr(u64 disr)
 {
-	unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
+	unsigned long esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;

 	if ((disr & DISR_EL1_IDS) == 0)
 		esr |= (disr & DISR_EL1_ESR_MASK);
@ -57,23 +57,24 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs,
 				  void (*func)(struct pt_regs *));
 asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);

-void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
+void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
 			struct pt_regs *regs);
-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
-void do_sve_acc(unsigned int esr, struct pt_regs *regs);
-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
-void do_sysinstr(unsigned int esr, struct pt_regs *regs);
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
-void do_cp15instr(unsigned int esr, struct pt_regs *regs);
+void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
+void do_sve_acc(unsigned long esr, struct pt_regs *regs);
+void do_sme_acc(unsigned long esr, struct pt_regs *regs);
+void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs);
+void do_sysinstr(unsigned long esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
+void do_cp15instr(unsigned long esr, struct pt_regs *regs);
 void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
-void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
-void do_serror(struct pt_regs *regs, unsigned int esr);
+void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr);
+void do_serror(struct pt_regs *regs, unsigned long esr);
 void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);

-void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far);
+void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
 #endif	/* __ASM_EXCEPTION_H */
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@ -32,6 +32,18 @@
 #define VFP_STATE_SIZE		((32 * 8) + 4)
 #endif

+/*
+ * When we defined the maximum SVE vector length we defined the ABI so
+ * that the maximum vector length included all the reserved for future
+ * expansion bits in ZCR rather than those just currently defined by
+ * the architecture. While SME follows a similar pattern the fact that
+ * it includes a square matrix means that any allocations that attempt
+ * to cover the maximum potential vector length (such as happen with
+ * the regset used for ptrace) end up being extremely large. Define
+ * the much lower actual limit for use in such situations.
+ */
+#define SME_VQ_MAX	16
+
 struct task_struct;

 extern void fpsimd_save_state(struct user_fpsimd_state *state);
@ -46,11 +58,23 @@ extern void fpsimd_restore_current_state(void);
 extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);

 extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
-				     void *sve_state, unsigned int sve_vl);
+				     void *sve_state, unsigned int sve_vl,
+				     void *za_state, unsigned int sme_vl,
+				     u64 *svcr);

 extern void fpsimd_flush_task_state(struct task_struct *target);
 extern void fpsimd_save_and_flush_cpu_state(void);

+static inline bool thread_sm_enabled(struct thread_struct *thread)
+{
+	return system_supports_sme() && (thread->svcr & SVCR_SM_MASK);
+}
+
+static inline bool thread_za_enabled(struct thread_struct *thread)
+{
+	return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
+}
+
 /* Maximum VL that SVE/SME VL-agnostic software can transparently support */
 #define VL_ARCH_MAX 0x100

@ -62,7 +86,14 @@ static inline size_t sve_ffr_offset(int vl)

 static inline void *sve_pffr(struct thread_struct *thread)
 {
-	return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread));
+	unsigned int vl;
+
+	if (system_supports_sme() && thread_sm_enabled(thread))
+		vl = thread_get_sme_vl(thread);
+	else
+		vl = thread_get_sve_vl(thread);
+
+	return (char *)thread->sve_state + sve_ffr_offset(vl);
 }

 extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
@ -71,11 +102,17 @@ extern void sve_load_state(void const *state, u32 const *pfpsr,
 extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
 extern void sve_set_vq(unsigned long vq_minus_1);
+extern void sme_set_vq(unsigned long vq_minus_1);
+extern void za_save_state(void *state);
+extern void za_load_state(void const *state);

 struct arm64_cpu_capabilities;
 extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);

 extern u64 read_zcr_features(void);
+extern u64 read_smcr_features(void);

 /*
 * Helpers to translate bit indices in sve_vq_map to VQ values (and
@ -119,6 +156,7 @@ struct vl_info {
 extern void sve_alloc(struct task_struct *task);
 extern void fpsimd_release_task(struct task_struct *task);
 extern void fpsimd_sync_to_sve(struct task_struct *task);
+extern void fpsimd_force_sync_to_sve(struct task_struct *task);
 extern void sve_sync_to_fpsimd(struct task_struct *task);
 extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);

@ -170,6 +208,12 @@ static inline void write_vl(enum vec_type type, u64 val)
 		tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
 		write_sysreg_s(tmp | val, SYS_ZCR_EL1);
 		break;
+#endif
+#ifdef CONFIG_ARM64_SME
+	case ARM64_VEC_SME:
+		tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
+		write_sysreg_s(tmp | val, SYS_SMCR_EL1);
+		break;
 #endif
 	default:
 		WARN_ON_ONCE(1);
@ -208,6 +252,8 @@ static inline bool sve_vq_available(unsigned int vq)
 	return vq_available(ARM64_VEC_SVE, vq);
 }

+size_t sve_state_size(struct task_struct const *task);
+
 #else /* ! CONFIG_ARM64_SVE */

 static inline void sve_alloc(struct task_struct *task) { }
@ -247,8 +293,93 @@ static inline void vec_update_vq_map(enum vec_type t) { }
 static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
 static inline void sve_setup(void) { }

+static inline size_t sve_state_size(struct task_struct const *task)
+{
+	return 0;
+}
+
 #endif /* ! CONFIG_ARM64_SVE */

+#ifdef CONFIG_ARM64_SME
+
+static inline void sme_user_disable(void)
+{
+	sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
+}
+
+static inline void sme_user_enable(void)
+{
+	sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
+}
+
+static inline void sme_smstart_sm(void)
+{
+	asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop_sm(void)
+{
+	asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop(void)
+{
+	asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
+}
+
+extern void __init sme_setup(void);
+
+static inline int sme_max_vl(void)
+{
+	return vec_max_vl(ARM64_VEC_SME);
+}
+
+static inline int sme_max_virtualisable_vl(void)
+{
+	return vec_max_virtualisable_vl(ARM64_VEC_SME);
+}
+
+extern void sme_alloc(struct task_struct *task);
+extern unsigned int sme_get_vl(void);
+extern int sme_set_current_vl(unsigned long arg);
+extern int sme_get_current_vl(void);
+
+/*
+ * Return how many bytes of memory are required to store the full SME
+ * specific state (currently just ZA) for task, given task's currently
+ * configured vector length.
+ */
+static inline size_t za_state_size(struct task_struct const *task)
+{
+	unsigned int vl = task_get_sme_vl(task);
+
+	return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+}
+
+#else
+
+static inline void sme_user_disable(void) { BUILD_BUG(); }
+static inline void sme_user_enable(void) { BUILD_BUG(); }
+
+static inline void sme_smstart_sm(void) { }
+static inline void sme_smstop_sm(void) { }
+static inline void sme_smstop(void) { }
+
+static inline void sme_alloc(struct task_struct *task) { }
+static inline void sme_setup(void) { }
+static inline unsigned int sme_get_vl(void) { return 0; }
+static inline int sme_max_vl(void) { return 0; }
+static inline int sme_max_virtualisable_vl(void) { return 0; }
+static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
+static inline int sme_get_current_vl(void) { return -EINVAL; }
+
+static inline size_t za_state_size(struct task_struct const *task)
+{
+	return 0;
+}
+
+#endif /* ! CONFIG_ARM64_SME */
+
 /* For use by EFI runtime services calls only */
 extern void __efi_fpsimd_begin(void);
 extern void __efi_fpsimd_end(void);
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@ -93,6 +93,12 @@
 	.endif
 .endm

+.macro _sme_check_wv v
+	.if (\v) < 12 || (\v) > 15
+		.error "Bad vector select register \v."
+	.endif
+.endm
+
 /* SVE instruction encodings for non-SVE-capable assemblers */
 /* (pre binutils 2.28, all kernel capable clang versions support SVE) */

@ -174,6 +180,54 @@
 		| (\np)
 .endm

+/* SME instruction encodings for non-SME-capable assemblers */
+/* (pre binutils 2.38/LLVM 13) */
+
+/* RDSVL X\nx, #\imm */
+.macro _sme_rdsvl nx, imm
+	_check_general_reg \nx
+	_check_num (\imm), -0x20, 0x1f
+	.inst	0x04bf5800			\
+		| (\nx)				\
+		| (((\imm) & 0x3f) << 5)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ *	STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_str_zav nw, nxbase, offset=0
+	_sme_check_wv \nw
+	_check_general_reg \nxbase
+	_check_num (\offset), -0x100, 0xff
+	.inst	0xe1200000			\
+		| (((\nw) & 3) << 13)		\
+		| ((\nxbase) << 5)		\
+		| ((\offset) & 7)
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ *	LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_ldr_zav nw, nxbase, offset=0
+	_sme_check_wv \nw
+	_check_general_reg \nxbase
+	_check_num (\offset), -0x100, 0xff
+	.inst	0xe1000000			\
+		| (((\nw) & 3) << 13)		\
+		| ((\nxbase) << 5)		\
+		| ((\offset) & 7)
+.endm
+
+/*
+ * Zero the entire ZA array
+ *	ZERO ZA
+ */
+.macro zero_za
+	.inst 0xc00800ff
+.endm
+
 .macro __for from:req, to:req
 	.if (\from) == (\to)
 		_for__body %\from
@ -208,6 +262,17 @@
 921:
 .endm

+/* Update SMCR_EL1.LEN with the new VQ */
+.macro sme_load_vq xvqminus1, xtmp, xtmp2
+		mrs_s		\xtmp, SYS_SMCR_EL1
+		bic		\xtmp2, \xtmp, SMCR_ELx_LEN_MASK
+		orr		\xtmp2, \xtmp2, \xvqminus1
+		cmp		\xtmp2, \xtmp
+		b.eq		921f
+		msr_s		SYS_SMCR_EL1, \xtmp2	//self-synchronising
+921:
+.endm
+
 /* Preserve the first 128-bits of Znz and zero the rest. */
 .macro _sve_flush_z nz
 	_sve_check_zreg \nz
@ -254,3 +319,25 @@
 		ldr		w\nxtmp, [\xpfpsr, #4]
 		msr		fpcr, x\nxtmp
 .endm
+
+.macro sme_save_za nxbase, xvl, nw
+	mov	w\nw, #0
+
+423:
+	_sme_str_zav \nw, \nxbase
+	add	x\nxbase, x\nxbase, \xvl
+	add	x\nw, x\nw, #1
+	cmp	\xvl, x\nw
+	bne	423b
+.endm
+
+.macro sme_load_za nxbase, xvl, nw
+	mov	w\nw, #0
+
+423:
+	_sme_ldr_zav \nw, \nxbase
+	add	x\nxbase, x\nxbase, \xvl
+	add	x\nw, x\nw, #1
+	cmp	\xvl, x\nw
+	bne	423b
+.endm
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@ -80,8 +80,15 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)

 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 struct dyn_ftrace;
+struct ftrace_ops;
+struct ftrace_regs;
+
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
 #define ftrace_init_nop ftrace_init_nop
+
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
 #endif

 #define ftrace_return_address(n) return_address(n)
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@ -44,6 +44,8 @@ extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
 #define __HAVE_ARCH_HUGE_PTE_CLEAR
 extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, unsigned long sz);
+#define __HAVE_ARCH_HUGE_PTEP_GET
+extern pte_t huge_ptep_get(pte_t *ptep);
 extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 				 pte_t *ptep, pte_t pte, unsigned long sz);
 #define set_huge_swap_pte_at set_huge_swap_pte_at
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@ -109,6 +109,14 @@
 #define KERNEL_HWCAP_AFP		__khwcap2_feature(AFP)
 #define KERNEL_HWCAP_RPRES		__khwcap2_feature(RPRES)
 #define KERNEL_HWCAP_MTE3		__khwcap2_feature(MTE3)
+#define KERNEL_HWCAP_SME		__khwcap2_feature(SME)
+#define KERNEL_HWCAP_SME_I16I64		__khwcap2_feature(SME_I16I64)
+#define KERNEL_HWCAP_SME_F64F64		__khwcap2_feature(SME_F64F64)
+#define KERNEL_HWCAP_SME_I8I32		__khwcap2_feature(SME_I8I32)
+#define KERNEL_HWCAP_SME_F16F32		__khwcap2_feature(SME_F16F32)
+#define KERNEL_HWCAP_SME_B16F32		__khwcap2_feature(SME_B16F32)
+#define KERNEL_HWCAP_SME_F32F32		__khwcap2_feature(SME_F32F32)
+#define KERNEL_HWCAP_SME_FA64		__khwcap2_feature(SME_FA64)

 /*
 * This yields a mask that user programs can use to figure out what
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@ -279,6 +279,7 @@
 #define CPTR_EL2_TCPAC	(1U << 31)
 #define CPTR_EL2_TAM	(1 << 30)
 #define CPTR_EL2_TTA	(1 << 20)
+#define CPTR_EL2_TSM	(1 << 12)
 #define CPTR_EL2_TFP	(1 << CPTR_EL2_TFP_SHIFT)
 #define CPTR_EL2_TZ	(1 << 8)
 #define CPTR_NVHE_EL2_RES1	0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@ -236,14 +236,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
 	return mode != PSR_MODE_EL0t;
 }

-static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
+static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault.esr_el2;
 }

 static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
 {
-	u32 esr = kvm_vcpu_get_esr(vcpu);
+	u64 esr = kvm_vcpu_get_esr(vcpu);

 	if (esr & ESR_ELx_CV)
 		return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
@ -374,7 +374,7 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)

 static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
 {
-	u32 esr = kvm_vcpu_get_esr(vcpu);
+	u64 esr = kvm_vcpu_get_esr(vcpu);
 	return ESR_ELx_SYS64_ISS_RT(esr);
 }

--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@ -153,7 +153,7 @@ struct kvm_arch {
 };

 struct kvm_vcpu_fault_info {
-	u32 esr_el2;		/* Hyp Syndrom Register */
+	u64 esr_el2;		/* Hyp Syndrom Register */
 	u64 far_el2;		/* Hyp Fault Address Register */
 	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
 	u64 disr_el1;		/* Deferred [SError] Status Register */
@ -295,8 +295,11 @@ struct vcpu_reset_state {

 struct kvm_vcpu_arch {
 	struct kvm_cpu_context ctxt;
+
+	/* Guest floating point state */
 	void *sve_state;
 	unsigned int sve_max_vl;
+	u64 svcr;

 	/* Stage 2 paging state used by the hardware on next switch */
 	struct kvm_s2_mmu *hw_mmu;
@ -451,6 +454,7 @@ struct kvm_vcpu_arch {
 #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE	(1 << 13) /* Save TRBE context if active  */
 #define KVM_ARM64_FP_FOREIGN_FPSTATE	(1 << 14)
 #define KVM_ARM64_ON_UNSUPPORTED_CPU	(1 << 15) /* Physical CPU not in supported_cpus */
+#define KVM_ARM64_HOST_SME_ENABLED	(1 << 16) /* SME enabled for EL0 */

 #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
 				 KVM_GUESTDBG_USE_SW_BP | \
--- a/arch/arm64/include/asm/kvm_ras.h
+++ b/arch/arm64/include/asm/kvm_ras.h
@ -14,7 +14,7 @@
 * Was this synchronous external abort a RAS notification?
 * Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
 */
-static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr)
+static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
 {
 	/* apei_claim_sea(NULL) expects to mask interrupts itself */
 	lockdep_assert_irqs_enabled();
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@ -47,6 +47,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
 int mte_ptrace_copy_tags(struct task_struct *child, long request,
 			 unsigned long addr, unsigned long data);
+size_t mte_probe_user_range(const char __user *uaddr, size_t size);

 #else /* CONFIG_ARM64_MTE */

--- a/Show more
+++ b/Show more