Snap for 8695492 from 7940fcf597 to android-mainline-keystone-qcom-release
Change-Id: I8d2524317a06982cb50a2c0bd842e62d0157a166
This commit is contained in:
commit
019bc1f65e
1226 changed files with 42670 additions and 18197 deletions
1
.mailmap
1
.mailmap
|
|
@ -398,6 +398,7 @@ Vasily Averin <vasily.averin@linux.dev> <vvs@virtuozzo.com>
|
|||
Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
|
||||
Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
|
||||
Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
|
||||
Valentin Schneider <vschneid@redhat.com> <valentin.schneider@arm.com>
|
||||
Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
|
||||
Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
|
||||
Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
|
||||
|
|
|
|||
|
|
@ -467,3 +467,39 @@ Description: These files provide the maximum powered required for line card
|
|||
feeding and line card configuration Id.
|
||||
|
||||
The files are read only.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/phy_reset
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
||||
Description: This file allows to reset PHY 88E1548 when attribute is set 0
|
||||
due to some abnormal PHY behavior.
|
||||
Expected behavior:
|
||||
When phy_reset is written 1, all PHY 88E1548 are released
|
||||
from the reset state, when 0 - are hold in reset state.
|
||||
|
||||
The files are read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/mac_reset
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
||||
Description: This file allows to reset ASIC MT52132 when attribute is set 0
|
||||
due to some abnormal ASIC behavior.
|
||||
Expected behavior:
|
||||
When mac_reset is written 1, the ASIC MT52132 is released
|
||||
from the reset state, when 0 - is hold in reset state.
|
||||
|
||||
The files are read/write.
|
||||
|
||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/qsfp_pwr_good
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
||||
Description: This file shows QSFP ports power status. The value is set to 0
|
||||
when one of any QSFP ports is plugged. The value is set to 1 when
|
||||
there are no any QSFP ports are plugged.
|
||||
The possible values are:
|
||||
0 - Power good, 1 - Not power good.
|
||||
|
||||
The files are read only.
|
||||
|
|
|
|||
51
Documentation/ABI/testing/securityfs-secrets-coco
Normal file
51
Documentation/ABI/testing/securityfs-secrets-coco
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
What: security/secrets/coco
|
||||
Date: February 2022
|
||||
Contact: Dov Murik <dovmurik@linux.ibm.com>
|
||||
Description:
|
||||
Exposes confidential computing (coco) EFI secrets to
|
||||
userspace via securityfs.
|
||||
|
||||
EFI can declare memory area used by confidential computing
|
||||
platforms (such as AMD SEV and SEV-ES) for secret injection by
|
||||
the Guest Owner during VM's launch. The secrets are encrypted
|
||||
by the Guest Owner and decrypted inside the trusted enclave,
|
||||
and therefore are not readable by the untrusted host.
|
||||
|
||||
The efi_secret module exposes the secrets to userspace. Each
|
||||
secret appears as a file under <securityfs>/secrets/coco,
|
||||
where the filename is the GUID of the entry in the secrets
|
||||
table. This module is loaded automatically by the EFI driver
|
||||
if the EFI secret area is populated.
|
||||
|
||||
Two operations are supported for the files: read and unlink.
|
||||
Reading the file returns the content of secret entry.
|
||||
Unlinking the file overwrites the secret data with zeroes and
|
||||
removes the entry from the filesystem. A secret cannot be read
|
||||
after it has been unlinked.
|
||||
|
||||
For example, listing the available secrets::
|
||||
|
||||
# modprobe efi_secret
|
||||
# ls -l /sys/kernel/security/secrets/coco
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
|
||||
|
||||
Reading the secret data by reading a file::
|
||||
|
||||
# cat /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
|
||||
the-content-of-the-secret-data
|
||||
|
||||
Wiping a secret by unlinking a file::
|
||||
|
||||
# rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
|
||||
# ls -l /sys/kernel/security/secrets/coco
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
|
||||
|
||||
Note: The binary format of the secrets table injected by the
|
||||
Guest Owner is described in
|
||||
drivers/virt/coco/efi_secret/efi_secret.c under "Structure of
|
||||
the EFI secret area".
|
||||
|
|
@ -29,7 +29,7 @@ Description:
|
|||
What: /sys/module/xen_blkback/parameters/buffer_squeeze_duration_ms
|
||||
Date: December 2019
|
||||
KernelVersion: 5.6
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Contact: Maximilian Heyne <mheyne@amazon.de>
|
||||
Description:
|
||||
When memory pressure is reported to blkback this option
|
||||
controls the duration in milliseconds that blkback will not
|
||||
|
|
@ -39,7 +39,7 @@ Description:
|
|||
What: /sys/module/xen_blkback/parameters/feature_persistent
|
||||
Date: September 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Contact: Maximilian Heyne <mheyne@amazon.de>
|
||||
Description:
|
||||
Whether to enable the persistent grants feature or not. Note
|
||||
that this option only takes effect on newly created backends.
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ Description:
|
|||
What: /sys/module/xen_blkfront/parameters/feature_persistent
|
||||
Date: September 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Contact: Maximilian Heyne <mheyne@amazon.de>
|
||||
Description:
|
||||
Whether to enable the persistent grants feature or not. Note
|
||||
that this option only takes effect on newly created frontends.
|
||||
|
|
|
|||
39
Documentation/ABI/testing/sysfs-platform-intel-ifs
Normal file
39
Documentation/ABI/testing/sysfs-platform-intel-ifs
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
What: /sys/devices/virtual/misc/intel_ifs_<N>/run_test
|
||||
Date: April 21 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
|
||||
Description: Write <cpu#> to trigger IFS test for one online core.
|
||||
Note that the test is per core. The cpu# can be
|
||||
for any thread on the core. Running on one thread
|
||||
completes the test for the core containing that thread.
|
||||
Example: to test the core containing cpu5: echo 5 >
|
||||
/sys/devices/platform/intel_ifs.<N>/run_test
|
||||
|
||||
What: /sys/devices/virtual/misc/intel_ifs_<N>/status
|
||||
Date: April 21 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
|
||||
Description: The status of the last test. It can be one of "pass", "fail"
|
||||
or "untested".
|
||||
|
||||
What: /sys/devices/virtual/misc/intel_ifs_<N>/details
|
||||
Date: April 21 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
|
||||
Description: Additional information regarding the last test. The details file reports
|
||||
the hex value of the SCAN_STATUS MSR. Note that the error_code field
|
||||
may contain driver defined software code not defined in the Intel SDM.
|
||||
|
||||
What: /sys/devices/virtual/misc/intel_ifs_<N>/image_version
|
||||
Date: April 21 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
|
||||
Description: Version (hexadecimal) of loaded IFS binary image. If no scan image
|
||||
is loaded reports "none".
|
||||
|
||||
What: /sys/devices/virtual/misc/intel_ifs_<N>/reload
|
||||
Date: April 21 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
|
||||
Description: Write "1" (or "y" or "Y") to reload the IFS image from
|
||||
/lib/firmware/intel/ifs/ff-mm-ss.scan.
|
||||
|
|
@ -973,7 +973,7 @@ The ``->dynticks`` field counts the corresponding CPU's transitions to
|
|||
and from either dyntick-idle or user mode, so that this counter has an
|
||||
even value when the CPU is in dyntick-idle mode or user mode and an odd
|
||||
value otherwise. The transitions to/from user mode need to be counted
|
||||
for user mode adaptive-ticks support (see timers/NO_HZ.txt).
|
||||
for user mode adaptive-ticks support (see Documentation/timers/no_hz.rst).
|
||||
|
||||
The ``->rcu_need_heavy_qs`` field is used to record the fact that the
|
||||
RCU core code would really like to see a quiescent state from the
|
||||
|
|
|
|||
|
|
@ -406,7 +406,7 @@ In earlier implementations, the task requesting the expedited grace
|
|||
period also drove it to completion. This straightforward approach had
|
||||
the disadvantage of needing to account for POSIX signals sent to user
|
||||
tasks, so more recent implemementations use the Linux kernel's
|
||||
`workqueues <https://www.kernel.org/doc/Documentation/core-api/workqueue.rst>`__.
|
||||
workqueues (see Documentation/core-api/workqueue.rst).
|
||||
|
||||
The requesting task still does counter snapshotting and funnel-lock
|
||||
processing, but the task reaching the top of the funnel lock does a
|
||||
|
|
|
|||
|
|
@ -370,8 +370,8 @@ pointer fetched by rcu_dereference() may not be used outside of the
|
|||
outermost RCU read-side critical section containing that
|
||||
rcu_dereference(), unless protection of the corresponding data
|
||||
element has been passed from RCU to some other synchronization
|
||||
mechanism, most commonly locking or `reference
|
||||
counting <https://www.kernel.org/doc/Documentation/RCU/rcuref.txt>`__.
|
||||
mechanism, most commonly locking or reference counting
|
||||
(see ../../rcuref.rst).
|
||||
|
||||
.. |high-quality implementation of C11 memory_order_consume [PDF]| replace:: high-quality implementation of C11 ``memory_order_consume`` [PDF]
|
||||
.. _high-quality implementation of C11 memory_order_consume [PDF]: http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf
|
||||
|
|
@ -2654,6 +2654,38 @@ synchronize_rcu(), and rcu_barrier(), respectively. In
|
|||
three APIs are therefore implemented by separate functions that check
|
||||
for voluntary context switches.
|
||||
|
||||
Tasks Rude RCU
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
Some forms of tracing need to wait for all preemption-disabled regions
|
||||
of code running on any online CPU, including those executed when RCU is
|
||||
not watching. This means that synchronize_rcu() is insufficient, and
|
||||
Tasks Rude RCU must be used instead. This flavor of RCU does its work by
|
||||
forcing a workqueue to be scheduled on each online CPU, hence the "Rude"
|
||||
moniker. And this operation is considered to be quite rude by real-time
|
||||
workloads that don't want their ``nohz_full`` CPUs receiving IPIs and
|
||||
by battery-powered systems that don't want their idle CPUs to be awakened.
|
||||
|
||||
The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
|
||||
consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
|
||||
and rcu_barrier_tasks_rude().
|
||||
|
||||
Tasks Trace RCU
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
Some forms of tracing need to sleep in readers, but cannot tolerate
|
||||
SRCU's read-side overhead, which includes a full memory barrier in both
|
||||
srcu_read_lock() and srcu_read_unlock(). This need is handled by a
|
||||
Tasks Trace RCU that uses scheduler locking and IPIs to synchronize with
|
||||
readers. Real-time systems that cannot tolerate IPIs may build their
|
||||
kernels with ``CONFIG_TASKS_TRACE_RCU_READ_MB=y``, which avoids the IPIs at
|
||||
the expense of adding full memory barriers to the read-side primitives.
|
||||
|
||||
The tasks-trace-RCU API is also reasonably compact,
|
||||
consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(),
|
||||
rcu_read_lock_trace_held(), call_rcu_tasks_trace(),
|
||||
synchronize_rcu_tasks_trace(), and rcu_barrier_tasks_trace().
|
||||
|
||||
Possible Future Changes
|
||||
-----------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -33,8 +33,8 @@ Situation 1: Hash Tables
|
|||
|
||||
Hash tables are often implemented as an array, where each array entry
|
||||
has a linked-list hash chain. Each hash chain can be protected by RCU
|
||||
as described in the listRCU.txt document. This approach also applies
|
||||
to other array-of-list situations, such as radix trees.
|
||||
as described in listRCU.rst. This approach also applies to other
|
||||
array-of-list situations, such as radix trees.
|
||||
|
||||
.. _static_arrays:
|
||||
|
||||
|
|
|
|||
|
|
@ -140,8 +140,7 @@ over a rather long period of time, but improvements are always welcome!
|
|||
prevents destructive compiler optimizations. However,
|
||||
with a bit of devious creativity, it is possible to
|
||||
mishandle the return value from rcu_dereference().
|
||||
Please see rcu_dereference.txt in this directory for
|
||||
more information.
|
||||
Please see rcu_dereference.rst for more information.
|
||||
|
||||
The rcu_dereference() primitive is used by the
|
||||
various "_rcu()" list-traversal primitives, such
|
||||
|
|
@ -151,7 +150,7 @@ over a rather long period of time, but improvements are always welcome!
|
|||
primitives. This is particularly useful in code that
|
||||
is common to readers and updaters. However, lockdep
|
||||
will complain if you access rcu_dereference() outside
|
||||
of an RCU read-side critical section. See lockdep.txt
|
||||
of an RCU read-side critical section. See lockdep.rst
|
||||
to learn what to do about this.
|
||||
|
||||
Of course, neither rcu_dereference() nor the "_rcu()"
|
||||
|
|
@ -323,7 +322,7 @@ over a rather long period of time, but improvements are always welcome!
|
|||
primitives when the update-side lock is held is that doing so
|
||||
can be quite helpful in reducing code bloat when common code is
|
||||
shared between readers and updaters. Additional primitives
|
||||
are provided for this case, as discussed in lockdep.txt.
|
||||
are provided for this case, as discussed in lockdep.rst.
|
||||
|
||||
One exception to this rule is when data is only ever added to
|
||||
the linked data structure, and is never removed during any
|
||||
|
|
@ -480,4 +479,4 @@ over a rather long period of time, but improvements are always welcome!
|
|||
both rcu_barrier() and synchronize_rcu(), if necessary, using
|
||||
something like workqueues to to execute them concurrently.
|
||||
|
||||
See rcubarrier.txt for more information.
|
||||
See rcubarrier.rst for more information.
|
||||
|
|
|
|||
|
|
@ -10,9 +10,8 @@ A "grace period" must elapse between the two parts, and this grace period
|
|||
must be long enough that any readers accessing the item being deleted have
|
||||
since dropped their references. For example, an RCU-protected deletion
|
||||
from a linked list would first remove the item from the list, wait for
|
||||
a grace period to elapse, then free the element. See the
|
||||
:ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` for more information on
|
||||
using RCU with linked lists.
|
||||
a grace period to elapse, then free the element. See listRCU.rst for more
|
||||
information on using RCU with linked lists.
|
||||
|
||||
Frequently Asked Questions
|
||||
--------------------------
|
||||
|
|
@ -50,7 +49,7 @@ Frequently Asked Questions
|
|||
- If I am running on a uniprocessor kernel, which can only do one
|
||||
thing at a time, why should I wait for a grace period?
|
||||
|
||||
See :ref:`Documentation/RCU/UP.rst <up_doc>` for more information.
|
||||
See UP.rst for more information.
|
||||
|
||||
- How can I see where RCU is currently used in the Linux kernel?
|
||||
|
||||
|
|
@ -64,13 +63,13 @@ Frequently Asked Questions
|
|||
|
||||
- What guidelines should I follow when writing code that uses RCU?
|
||||
|
||||
See the checklist.txt file in this directory.
|
||||
See checklist.rst.
|
||||
|
||||
- Why the name "RCU"?
|
||||
|
||||
"RCU" stands for "read-copy update".
|
||||
:ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` has more information on where
|
||||
this name came from, search for "read-copy update" to find it.
|
||||
listRCU.rst has more information on where this name came from, search
|
||||
for "read-copy update" to find it.
|
||||
|
||||
- I hear that RCU is patented? What is with that?
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ This section describes how to use hlist_nulls to
|
|||
protect read-mostly linked lists and
|
||||
objects using SLAB_TYPESAFE_BY_RCU allocations.
|
||||
|
||||
Please read the basics in Documentation/RCU/listRCU.rst
|
||||
Please read the basics in listRCU.rst.
|
||||
|
||||
Using 'nulls'
|
||||
=============
|
||||
|
|
|
|||
|
|
@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
|
|||
Stall-warning messages may be enabled and disabled completely via
|
||||
/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
|
||||
|
||||
CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
|
||||
--------------------------------
|
||||
|
||||
Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
|
||||
the expedited grace period. This parameter defines the period
|
||||
of time that RCU will wait from the beginning of an expedited
|
||||
grace period until it issues an RCU CPU stall warning. This time
|
||||
period is normally 20 milliseconds on Android devices. A zero
|
||||
value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
|
||||
after conversion to milliseconds.
|
||||
|
||||
This configuration parameter may be changed at runtime via the
|
||||
/sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
|
||||
this parameter is checked only at the beginning of a cycle. If you
|
||||
are in a current stall cycle, setting it to a new value will change
|
||||
the timeout for the -next- stall.
|
||||
|
||||
Stall-warning messages may be enabled and disabled completely via
|
||||
/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
|
||||
|
||||
RCU_STALL_DELAY_DELTA
|
||||
---------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -224,7 +224,7 @@ synchronize_rcu()
|
|||
be delayed. This property results in system resilience in face
|
||||
of denial-of-service attacks. Code using call_rcu() should limit
|
||||
update rate in order to gain this same sort of resilience. See
|
||||
checklist.txt for some approaches to limiting the update rate.
|
||||
checklist.rst for some approaches to limiting the update rate.
|
||||
|
||||
rcu_assign_pointer()
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
@ -318,7 +318,7 @@ rcu_dereference()
|
|||
must prohibit. The rcu_dereference_protected() variant takes
|
||||
a lockdep expression to indicate which locks must be acquired
|
||||
by the caller. If the indicated protection is not provided,
|
||||
a lockdep splat is emitted. See Documentation/RCU/Design/Requirements/Requirements.rst
|
||||
a lockdep splat is emitted. See Design/Requirements/Requirements.rst
|
||||
and the API's code comments for more details and example usage.
|
||||
|
||||
.. [2] If the list_for_each_entry_rcu() instance might be used by
|
||||
|
|
@ -399,8 +399,7 @@ for specialized uses, but are relatively uncommon.
|
|||
|
||||
This section shows a simple use of the core RCU API to protect a
|
||||
global pointer to a dynamically allocated structure. More-typical
|
||||
uses of RCU may be found in :ref:`listRCU.rst <list_rcu_doc>`,
|
||||
:ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst <NMI_rcu_doc>`.
|
||||
uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst.
|
||||
::
|
||||
|
||||
struct foo {
|
||||
|
|
@ -482,10 +481,9 @@ So, to sum up:
|
|||
RCU read-side critical sections that might be referencing that
|
||||
data item.
|
||||
|
||||
See checklist.txt for additional rules to follow when using RCU.
|
||||
And again, more-typical uses of RCU may be found in :ref:`listRCU.rst
|
||||
<list_rcu_doc>`, :ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst
|
||||
<NMI_rcu_doc>`.
|
||||
See checklist.rst for additional rules to follow when using RCU.
|
||||
And again, more-typical uses of RCU may be found in listRCU.rst,
|
||||
arrayRCU.rst, and NMI-RCU.rst.
|
||||
|
||||
.. _4_whatisRCU:
|
||||
|
||||
|
|
@ -579,7 +577,7 @@ to avoid having to write your own callback::
|
|||
|
||||
kfree_rcu(old_fp, rcu);
|
||||
|
||||
Again, see checklist.txt for additional rules governing the use of RCU.
|
||||
Again, see checklist.rst for additional rules governing the use of RCU.
|
||||
|
||||
.. _5_whatisRCU:
|
||||
|
||||
|
|
@ -663,7 +661,7 @@ been able to write-acquire the lock otherwise. The smp_mb__after_spinlock()
|
|||
promotes synchronize_rcu() to a full memory barrier in compliance with
|
||||
the "Memory-Barrier Guarantees" listed in:
|
||||
|
||||
Documentation/RCU/Design/Requirements/Requirements.rst
|
||||
Design/Requirements/Requirements.rst
|
||||
|
||||
It is possible to nest rcu_read_lock(), since reader-writer locks may
|
||||
be recursively acquired. Note also that rcu_read_lock() is immune
|
||||
|
|
|
|||
|
|
@ -37,11 +37,7 @@ Pressure interface
|
|||
Pressure information for each resource is exported through the
|
||||
respective file in /proc/pressure/ -- cpu, memory, and io.
|
||||
|
||||
The format for CPU is as such::
|
||||
|
||||
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
|
||||
|
||||
and for memory and IO::
|
||||
The format is as such::
|
||||
|
||||
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
|
||||
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
|
||||
|
|
@ -58,6 +54,9 @@ situation from a state where some tasks are stalled but the CPU is
|
|||
still doing productive work. As such, time spent in this subset of the
|
||||
stall state is tracked separately and exported in the "full" averages.
|
||||
|
||||
CPU full is undefined at the system level, but has been reported
|
||||
since 5.13, so it is set to zero for backward compatibility.
|
||||
|
||||
The ratios (in %) are tracked as recent trends over ten, sixty, and
|
||||
three hundred second windows, which gives insight into short term events
|
||||
as well as medium and long term trends. The total absolute stall time
|
||||
|
|
|
|||
|
|
@ -631,12 +631,17 @@
|
|||
Defaults to zero when built as a module and to
|
||||
10 seconds when built into the kernel.
|
||||
|
||||
clearcpuid=BITNUM[,BITNUM...] [X86]
|
||||
clearcpuid=X[,X...] [X86]
|
||||
Disable CPUID feature X for the kernel. See
|
||||
arch/x86/include/asm/cpufeatures.h for the valid bit
|
||||
numbers. Note the Linux specific bits are not necessarily
|
||||
stable over kernel options, but the vendor specific
|
||||
numbers X. Note the Linux-specific bits are not necessarily
|
||||
stable over kernel options, but the vendor-specific
|
||||
ones should be.
|
||||
X can also be a string as appearing in the flags: line
|
||||
in /proc/cpuinfo which does not have the above
|
||||
instability issue. However, not all features have names
|
||||
in /proc/cpuinfo.
|
||||
Note that using this option will taint your kernel.
|
||||
Also note that user programs calling CPUID directly
|
||||
or using the feature without checking anything
|
||||
will still see it. This just prevents it from
|
||||
|
|
@ -808,7 +813,7 @@
|
|||
Documentation/admin-guide/kdump/kdump.rst for an example.
|
||||
|
||||
crashkernel=size[KMG],high
|
||||
[KNL, X86-64] range could be above 4G. Allow kernel
|
||||
[KNL, X86-64, ARM64] range could be above 4G. Allow kernel
|
||||
to allocate physical memory region from top, so could
|
||||
be above 4G if system have more than 4G ram installed.
|
||||
Otherwise memory region will be allocated below 4G, if
|
||||
|
|
@ -821,14 +826,20 @@
|
|||
that require some amount of low memory, e.g. swiotlb
|
||||
requires at least 64M+32K low memory, also enough extra
|
||||
low memory is needed to make sure DMA buffers for 32-bit
|
||||
devices won't run out. Kernel would try to allocate at
|
||||
devices won't run out. Kernel would try to allocate
|
||||
at least 256M below 4G automatically.
|
||||
This one let user to specify own low range under 4G
|
||||
This one lets the user specify own low range under 4G
|
||||
for second kernel instead.
|
||||
0: to disable low allocation.
|
||||
It will be ignored when crashkernel=X,high is not used
|
||||
or memory reserved is below 4G.
|
||||
|
||||
[KNL, ARM64] range in low memory.
|
||||
This one lets the user specify a low range in the
|
||||
DMA zone for the crash dump kernel.
|
||||
It will be ignored when crashkernel=X,high is not used
|
||||
or memory reserved is located in the DMA zones.
|
||||
|
||||
cryptomgr.notests
|
||||
[KNL] Disable crypto self-tests
|
||||
|
||||
|
|
@ -2630,14 +2641,14 @@
|
|||
when set.
|
||||
Format: <int>
|
||||
|
||||
libata.force= [LIBATA] Force configurations. The format is comma-
|
||||
separated list of "[ID:]VAL" where ID is
|
||||
PORT[.DEVICE]. PORT and DEVICE are decimal numbers
|
||||
matching port, link or device. Basically, it matches
|
||||
the ATA ID string printed on console by libata. If
|
||||
the whole ID part is omitted, the last PORT and DEVICE
|
||||
values are used. If ID hasn't been specified yet, the
|
||||
configuration applies to all ports, links and devices.
|
||||
libata.force= [LIBATA] Force configurations. The format is a comma-
|
||||
separated list of "[ID:]VAL" where ID is PORT[.DEVICE].
|
||||
PORT and DEVICE are decimal numbers matching port, link
|
||||
or device. Basically, it matches the ATA ID string
|
||||
printed on console by libata. If the whole ID part is
|
||||
omitted, the last PORT and DEVICE values are used. If
|
||||
ID hasn't been specified yet, the configuration applies
|
||||
to all ports, links and devices.
|
||||
|
||||
If only DEVICE is omitted, the parameter applies to
|
||||
the port and all links and devices behind it. DEVICE
|
||||
|
|
@ -2647,7 +2658,7 @@
|
|||
host link and device attached to it.
|
||||
|
||||
The VAL specifies the configuration to force. As long
|
||||
as there's no ambiguity shortcut notation is allowed.
|
||||
as there is no ambiguity, shortcut notation is allowed.
|
||||
For example, both 1.5 and 1.5G would work for 1.5Gbps.
|
||||
The following configurations can be forced.
|
||||
|
||||
|
|
@ -2660,19 +2671,58 @@
|
|||
udma[/][16,25,33,44,66,100,133] notation is also
|
||||
allowed.
|
||||
|
||||
* nohrst, nosrst, norst: suppress hard, soft and both
|
||||
resets.
|
||||
|
||||
* rstonce: only attempt one reset during hot-unplug
|
||||
link recovery.
|
||||
|
||||
* [no]dbdelay: Enable or disable the extra 200ms delay
|
||||
before debouncing a link PHY and device presence
|
||||
detection.
|
||||
|
||||
* [no]ncq: Turn on or off NCQ.
|
||||
|
||||
* [no]ncqtrim: Turn off queued DSM TRIM.
|
||||
* [no]ncqtrim: Enable or disable queued DSM TRIM.
|
||||
|
||||
* nohrst, nosrst, norst: suppress hard, soft
|
||||
and both resets.
|
||||
* [no]ncqati: Enable or disable NCQ trim on ATI chipset.
|
||||
|
||||
* rstonce: only attempt one reset during
|
||||
hot-unplug link recovery
|
||||
* [no]trim: Enable or disable (unqueued) TRIM.
|
||||
|
||||
* dump_id: dump IDENTIFY data.
|
||||
* trim_zero: Indicate that TRIM command zeroes data.
|
||||
|
||||
* atapi_dmadir: Enable ATAPI DMADIR bridge support
|
||||
* max_trim_128m: Set 128M maximum trim size limit.
|
||||
|
||||
* [no]dma: Turn on or off DMA transfers.
|
||||
|
||||
* atapi_dmadir: Enable ATAPI DMADIR bridge support.
|
||||
|
||||
* atapi_mod16_dma: Enable the use of ATAPI DMA for
|
||||
commands that are not a multiple of 16 bytes.
|
||||
|
||||
* [no]dmalog: Enable or disable the use of the
|
||||
READ LOG DMA EXT command to access logs.
|
||||
|
||||
* [no]iddevlog: Enable or disable access to the
|
||||
identify device data log.
|
||||
|
||||
* [no]logdir: Enable or disable access to the general
|
||||
purpose log directory.
|
||||
|
||||
* max_sec_128: Set transfer size limit to 128 sectors.
|
||||
|
||||
* max_sec_1024: Set or clear transfer size limit to
|
||||
1024 sectors.
|
||||
|
||||
* max_sec_lba48: Set or clear transfer size limit to
|
||||
65535 sectors.
|
||||
|
||||
* [no]lpm: Enable or disable link power management.
|
||||
|
||||
* [no]setxfer: Indicate if transfer speed mode setting
|
||||
should be skipped.
|
||||
|
||||
* dump_id: Dump IDENTIFY data.
|
||||
|
||||
* disable: Disable this device.
|
||||
|
||||
|
|
@ -3111,6 +3161,7 @@
|
|||
mds=off [X86]
|
||||
tsx_async_abort=off [X86]
|
||||
kvm.nx_huge_pages=off [X86]
|
||||
srbds=off [X86,INTEL]
|
||||
no_entry_flush [PPC]
|
||||
no_uaccess_flush [PPC]
|
||||
|
||||
|
|
@ -3447,8 +3498,6 @@
|
|||
|
||||
nocache [ARM]
|
||||
|
||||
noclflush [BUGS=X86] Don't use the CLFLUSH instruction
|
||||
|
||||
delayacct [KNL] Enable per-task delay accounting
|
||||
|
||||
nodsp [SH] Disable hardware DSP at boot time.
|
||||
|
|
@ -3459,16 +3508,11 @@
|
|||
|
||||
noexec [IA-64]
|
||||
|
||||
noexec [X86]
|
||||
On X86-32 available only on PAE configured kernels.
|
||||
noexec=on: enable non-executable mappings (default)
|
||||
noexec=off: disable non-executable mappings
|
||||
|
||||
nosmap [X86,PPC]
|
||||
nosmap [PPC]
|
||||
Disable SMAP (Supervisor Mode Access Prevention)
|
||||
even if it is supported by processor.
|
||||
|
||||
nosmep [X86,PPC64s]
|
||||
nosmep [PPC64s]
|
||||
Disable SMEP (Supervisor Mode Execution Prevention)
|
||||
even if it is supported by processor.
|
||||
|
||||
|
|
@ -3668,8 +3712,6 @@
|
|||
|
||||
nosbagart [IA-64]
|
||||
|
||||
nosep [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
|
||||
|
||||
nosgx [X86-64,SGX] Disables Intel SGX kernel support.
|
||||
|
||||
nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
|
||||
|
|
@ -4901,6 +4943,18 @@
|
|||
|
||||
rcupdate.rcu_cpu_stall_timeout= [KNL]
|
||||
Set timeout for RCU CPU stall warning messages.
|
||||
The value is in seconds and the maximum allowed
|
||||
value is 300 seconds.
|
||||
|
||||
rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
|
||||
Set timeout for expedited RCU CPU stall warning
|
||||
messages. The value is in milliseconds
|
||||
and the maximum allowed value is 21000
|
||||
milliseconds. Please note that this value is
|
||||
adjusted to an arch timer tick resolution.
|
||||
Setting this to zero causes the value from
|
||||
rcupdate.rcu_cpu_stall_timeout to be used (after
|
||||
conversion from seconds to milliseconds).
|
||||
|
||||
rcupdate.rcu_expedited= [KNL]
|
||||
Use expedited grace-period primitives, for
|
||||
|
|
@ -4963,10 +5017,34 @@
|
|||
number avoids disturbing real-time workloads,
|
||||
but lengthens grace periods.
|
||||
|
||||
rcupdate.rcu_task_stall_info= [KNL]
|
||||
Set initial timeout in jiffies for RCU task stall
|
||||
informational messages, which give some indication
|
||||
of the problem for those not patient enough to
|
||||
wait for ten minutes. Informational messages are
|
||||
only printed prior to the stall-warning message
|
||||
for a given grace period. Disable with a value
|
||||
less than or equal to zero. Defaults to ten
|
||||
seconds. A change in value does not take effect
|
||||
until the beginning of the next grace period.
|
||||
|
||||
rcupdate.rcu_task_stall_info_mult= [KNL]
|
||||
Multiplier for time interval between successive
|
||||
RCU task stall informational messages for a given
|
||||
RCU tasks grace period. This value is clamped
|
||||
to one through ten, inclusive. It defaults to
|
||||
the value three, so that the first informational
|
||||
message is printed 10 seconds into the grace
|
||||
period, the second at 40 seconds, the third at
|
||||
160 seconds, and then the stall warning at 600
|
||||
seconds would prevent a fourth at 640 seconds.
|
||||
|
||||
rcupdate.rcu_task_stall_timeout= [KNL]
|
||||
Set timeout in jiffies for RCU task stall warning
|
||||
messages. Disable with a value less than or equal
|
||||
to zero.
|
||||
Set timeout in jiffies for RCU task stall
|
||||
warning messages. Disable with a value less
|
||||
than or equal to zero. Defaults to ten minutes.
|
||||
A change in value does not take effect until
|
||||
the beginning of the next grace period.
|
||||
|
||||
rcupdate.rcu_self_test= [KNL]
|
||||
Run the RCU early boot self tests
|
||||
|
|
@ -5316,6 +5394,8 @@
|
|||
|
||||
serialnumber [BUGS=X86-32]
|
||||
|
||||
sev=option[,option...] [X86-64] See Documentation/x86/x86_64/boot-options.rst
|
||||
|
||||
shapers= [NET]
|
||||
Maximal number of shapers.
|
||||
|
||||
|
|
@ -5385,6 +5465,17 @@
|
|||
smart2= [HW]
|
||||
Format: <io1>[,<io2>[,...,<io8>]]
|
||||
|
||||
smp.csd_lock_timeout= [KNL]
|
||||
Specify the period of time in milliseconds
|
||||
that smp_call_function() and friends will wait
|
||||
for a CPU to release the CSD lock. This is
|
||||
useful when diagnosing bugs involving CPUs
|
||||
disabling interrupts for extended periods
|
||||
of time. Defaults to 5,000 milliseconds, and
|
||||
setting a value of zero disables this feature.
|
||||
This feature may be more efficiently disabled
|
||||
using the csdlock_debug- kernel parameter.
|
||||
|
||||
smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices
|
||||
smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port
|
||||
smsc-ircc2.ircc_sir= [HW] SIR base I/O port
|
||||
|
|
@ -5616,6 +5707,30 @@
|
|||
off: Disable mitigation and remove
|
||||
performance impact to RDRAND and RDSEED
|
||||
|
||||
srcutree.big_cpu_lim [KNL]
|
||||
Specifies the number of CPUs constituting a
|
||||
large system, such that srcu_struct structures
|
||||
should immediately allocate an srcu_node array.
|
||||
This kernel-boot parameter defaults to 128,
|
||||
but takes effect only when the low-order four
|
||||
bits of srcutree.convert_to_big is equal to 3
|
||||
(decide at boot).
|
||||
|
||||
srcutree.convert_to_big [KNL]
|
||||
Specifies under what conditions an SRCU tree
|
||||
srcu_struct structure will be converted to big
|
||||
form, that is, with an rcu_node tree:
|
||||
|
||||
0: Never.
|
||||
1: At init_srcu_struct() time.
|
||||
2: When rcutorture decides to.
|
||||
3: Decide at boot time (default).
|
||||
0x1X: Above plus if high contention.
|
||||
|
||||
Either way, the srcu_node tree will be sized based
|
||||
on the actual runtime number of CPUs (nr_cpu_ids)
|
||||
instead of the compile-time CONFIG_NR_CPUS.
|
||||
|
||||
srcutree.counter_wrap_check [KNL]
|
||||
Specifies how frequently to check for
|
||||
grace-period sequence counter wrap for the
|
||||
|
|
@ -5633,6 +5748,14 @@
|
|||
expediting. Set to zero to disable automatic
|
||||
expediting.
|
||||
|
||||
srcutree.small_contention_lim [KNL]
|
||||
Specifies the number of update-side contention
|
||||
events per jiffy will be tolerated before
|
||||
initiating a conversion of an srcu_struct
|
||||
structure to big form. Note that the value of
|
||||
srcutree.convert_to_big must have the 0x10 bit
|
||||
set for contention-based conversions to occur.
|
||||
|
||||
ssbd= [ARM64,HW]
|
||||
Speculative Store Bypass Disable control
|
||||
|
||||
|
|
|
|||
|
|
@ -994,6 +994,9 @@ This is a directory, with the following entries:
|
|||
* ``boot_id``: a UUID generated the first time this is retrieved, and
|
||||
unvarying after that;
|
||||
|
||||
* ``uuid``: a UUID generated every time this is retrieved (this can
|
||||
thus be used to generate UUIDs at will);
|
||||
|
||||
* ``entropy_avail``: the pool's entropy count, in bits;
|
||||
|
||||
* ``poolsize``: the entropy pool size, in bits;
|
||||
|
|
@ -1001,10 +1004,7 @@ This is a directory, with the following entries:
|
|||
* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
|
||||
number of seconds between urandom pool reseeding). This file is
|
||||
writable for compatibility purposes, but writing to it has no effect
|
||||
on any RNG behavior.
|
||||
|
||||
* ``uuid``: a UUID generated every time this is retrieved (this can
|
||||
thus be used to generate UUIDs at will);
|
||||
on any RNG behavior;
|
||||
|
||||
* ``write_wakeup_threshold``: when the entropy count drops below this
|
||||
(as a number of bits), processes waiting to write to ``/dev/random``
|
||||
|
|
|
|||
|
|
@ -350,6 +350,16 @@ Before jumping into the kernel, the following conditions must be met:
|
|||
|
||||
- SMCR_EL2.FA64 (bit 31) must be initialised to 0b1.
|
||||
|
||||
For CPUs with the Memory Tagging Extension feature (FEAT_MTE2):
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
- SCR_EL3.ATA (bit 26) must be initialised to 0b1.
|
||||
|
||||
- If the kernel is entered at EL1 and EL2 is present:
|
||||
|
||||
- HCR_EL2.ATA (bit 56) must be initialised to 0b1.
|
||||
|
||||
The requirements described above for CPU mode, caches, MMUs, architected
|
||||
timers, coherency and system registers apply to all CPUs. All CPUs must
|
||||
enter the kernel in the same exception level. Where the values documented
|
||||
|
|
|
|||
|
|
@ -264,6 +264,39 @@ HWCAP2_MTE3
|
|||
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
|
||||
by Documentation/arm64/memory-tagging-extension.rst.
|
||||
|
||||
HWCAP2_SME
|
||||
|
||||
Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
|
||||
by Documentation/arm64/sme.rst.
|
||||
|
||||
HWCAP2_SME_I16I64
|
||||
|
||||
Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
|
||||
|
||||
HWCAP2_SME_F64F64
|
||||
|
||||
Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
|
||||
|
||||
HWCAP2_SME_I8I32
|
||||
|
||||
Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
|
||||
|
||||
HWCAP2_SME_F16F32
|
||||
|
||||
Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
|
||||
|
||||
HWCAP2_SME_B16F32
|
||||
|
||||
Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
|
||||
|
||||
HWCAP2_SME_F32F32
|
||||
|
||||
Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
|
||||
|
||||
HWCAP2_SME_FA64
|
||||
|
||||
Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
|
||||
|
||||
4. Unused AT_HWCAP bits
|
||||
-----------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ ARM64 Architecture
|
|||
perf
|
||||
pointer-authentication
|
||||
silicon-errata
|
||||
sme
|
||||
sve
|
||||
tagged-address-abi
|
||||
tagged-pointers
|
||||
|
|
|
|||
428
Documentation/arm64/sme.rst
Normal file
428
Documentation/arm64/sme.rst
Normal file
|
|
@ -0,0 +1,428 @@
|
|||
===================================================
|
||||
Scalable Matrix Extension support for AArch64 Linux
|
||||
===================================================
|
||||
|
||||
This document outlines briefly the interface provided to userspace by Linux in
|
||||
order to support use of the ARM Scalable Matrix Extension (SME).
|
||||
|
||||
This is an outline of the most important features and issues only and not
|
||||
intended to be exhaustive. It should be read in conjunction with the SVE
|
||||
documentation in sve.rst which provides details on the Streaming SVE mode
|
||||
included in SME.
|
||||
|
||||
This document does not aim to describe the SME architecture or programmer's
|
||||
model. To aid understanding, a minimal description of relevant programmer's
|
||||
model features for SME is included in Appendix A.
|
||||
|
||||
|
||||
1. General
|
||||
-----------
|
||||
|
||||
* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA
|
||||
register state and TPIDR2_EL0 are tracked per thread.
|
||||
|
||||
* The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
|
||||
AT_HWCAP2 entry. Presence of this flag implies the presence of the SME
|
||||
instructions and registers, and the Linux-specific system interfaces
|
||||
described in this document. SME is reported in /proc/cpuinfo as "sme".
|
||||
|
||||
* Support for the execution of SME instructions in userspace can also be
|
||||
detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
|
||||
instruction, and checking that the value of the SME field is nonzero. [3]
|
||||
|
||||
It does not guarantee the presence of the system interfaces described in the
|
||||
following sections: software that needs to verify that those interfaces are
|
||||
present must check for HWCAP2_SME instead.
|
||||
|
||||
* There are a number of optional SME features, presence of these is reported
|
||||
through AT_HWCAP2 through:
|
||||
|
||||
HWCAP2_SME_I16I64
|
||||
HWCAP2_SME_F64F64
|
||||
HWCAP2_SME_I8I32
|
||||
HWCAP2_SME_F16F32
|
||||
HWCAP2_SME_B16F32
|
||||
HWCAP2_SME_F32F32
|
||||
HWCAP2_SME_FA64
|
||||
|
||||
This list may be extended over time as the SME architecture evolves.
|
||||
|
||||
These extensions are also reported via the CPU ID register ID_AA64SMFR0_EL1,
|
||||
which userspace can read using an MRS instruction. See elf_hwcaps.txt and
|
||||
cpu-feature-registers.txt for details.
|
||||
|
||||
* Debuggers should restrict themselves to interacting with the target via the
|
||||
NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets. The recommended way
|
||||
of detecting support for these regsets is to connect to a target process
|
||||
first and then attempt a
|
||||
|
||||
ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
|
||||
|
||||
* Whenever ZA register values are exchanged in memory between userspace and
|
||||
the kernel, the register value is encoded in memory as a series of horizontal
|
||||
vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
|
||||
used for SVE vectors.
|
||||
|
||||
* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
|
||||
in which case it is set to 0.
|
||||
|
||||
2. Vector lengths
|
||||
------------------
|
||||
|
||||
SME defines a second vector length similar to the SVE vector length which is
|
||||
controls the size of the streaming mode SVE vectors and the ZA matrix array.
|
||||
The ZA matrix is square with each side having as many bytes as a streaming
|
||||
mode SVE vector.
|
||||
|
||||
|
||||
3. Sharing of streaming and non-streaming mode SVE state
|
||||
---------------------------------------------------------
|
||||
|
||||
It is implementation defined which if any parts of the SVE state are shared
|
||||
between streaming and non-streaming modes. When switching between modes
|
||||
via software interfaces such as ptrace if no register content is provided as
|
||||
part of switching no state will be assumed to be shared and everything will
|
||||
be zeroed.
|
||||
|
||||
|
||||
4. System call behaviour
|
||||
-------------------------
|
||||
|
||||
* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
|
||||
ZA matrix are preserved.
|
||||
|
||||
* On syscall PSTATE.SM will be cleared and the SVE registers will be handled
|
||||
as per the standard SVE ABI.
|
||||
|
||||
* Neither the SVE registers nor ZA are used to pass arguments to or receive
|
||||
results from any syscall.
|
||||
|
||||
* On process creation (eg, clone()) the newly created process will have
|
||||
PSTATE.SM cleared.
|
||||
|
||||
* All other SME state of a thread, including the currently configured vector
|
||||
length, the state of the PR_SME_VL_INHERIT flag, and the deferred vector
|
||||
length (if any), is preserved across all syscalls, subject to the specific
|
||||
exceptions for execve() described in section 6.
|
||||
|
||||
|
||||
5. Signal handling
|
||||
-------------------
|
||||
|
||||
* Signal handlers are invoked with streaming mode and ZA disabled.
|
||||
|
||||
* A new signal frame record za_context encodes the ZA register contents on
|
||||
signal delivery. [1]
|
||||
|
||||
* The signal frame record for ZA always contains basic metadata, in particular
|
||||
the thread's vector length (in za_context.vl).
|
||||
|
||||
* The ZA matrix may or may not be included in the record, depending on
|
||||
the value of PSTATE.ZA. The registers are present if and only if:
|
||||
za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
|
||||
in which case PSTATE.ZA == 1.
|
||||
|
||||
* If matrix data is present, the remainder of the record has a vl-dependent
|
||||
size and layout. Macros ZA_SIG_* are defined [1] to facilitate access to
|
||||
them.
|
||||
|
||||
* The matrix is stored as a series of horizontal vectors in the same format as
|
||||
is used for SVE vectors.
|
||||
|
||||
* If the ZA context is too big to fit in sigcontext.__reserved[], then extra
|
||||
space is allocated on the stack, an extra_context record is written in
|
||||
__reserved[] referencing this space. za_context is then written in the
|
||||
extra space. Refer to [1] for further details about this mechanism.
|
||||
|
||||
|
||||
5. Signal return
|
||||
-----------------
|
||||
|
||||
When returning from a signal handler:
|
||||
|
||||
* If there is no za_context record in the signal frame, or if the record is
|
||||
present but contains no register data as described in the previous section,
|
||||
then ZA is disabled.
|
||||
|
||||
* If za_context is present in the signal frame and contains matrix data then
|
||||
PSTATE.ZA is set to 1 and ZA is populated with the specified data.
|
||||
|
||||
* The vector length cannot be changed via signal return. If za_context.vl in
|
||||
the signal frame does not match the current vector length, the signal return
|
||||
attempt is treated as illegal, resulting in a forced SIGSEGV.
|
||||
|
||||
|
||||
6. prctl extensions
|
||||
--------------------
|
||||
|
||||
Some new prctl() calls are added to allow programs to manage the SME vector
|
||||
length:
|
||||
|
||||
prctl(PR_SME_SET_VL, unsigned long arg)
|
||||
|
||||
Sets the vector length of the calling thread and related flags, where
|
||||
arg == vl | flags. Other threads of the calling process are unaffected.
|
||||
|
||||
vl is the desired vector length, where sve_vl_valid(vl) must be true.
|
||||
|
||||
flags:
|
||||
|
||||
PR_SME_VL_INHERIT
|
||||
|
||||
Inherit the current vector length across execve(). Otherwise, the
|
||||
vector length is reset to the system default at execve(). (See
|
||||
Section 9.)
|
||||
|
||||
PR_SME_SET_VL_ONEXEC
|
||||
|
||||
Defer the requested vector length change until the next execve()
|
||||
performed by this thread.
|
||||
|
||||
The effect is equivalent to implicit execution of the following
|
||||
call immediately after the next execve() (if any) by the thread:
|
||||
|
||||
prctl(PR_SME_SET_VL, arg & ~PR_SME_SET_VL_ONEXEC)
|
||||
|
||||
This allows launching of a new program with a different vector
|
||||
length, while avoiding runtime side effects in the caller.
|
||||
|
||||
Without PR_SME_SET_VL_ONEXEC, the requested change takes effect
|
||||
immediately.
|
||||
|
||||
|
||||
Return value: a nonnegative on success, or a negative value on error:
|
||||
EINVAL: SME not supported, invalid vector length requested, or
|
||||
invalid flags.
|
||||
|
||||
|
||||
On success:
|
||||
|
||||
* Either the calling thread's vector length or the deferred vector length
|
||||
to be applied at the next execve() by the thread (dependent on whether
|
||||
PR_SME_SET_VL_ONEXEC is present in arg), is set to the largest value
|
||||
supported by the system that is less than or equal to vl. If vl ==
|
||||
SVE_VL_MAX, the value set will be the largest value supported by the
|
||||
system.
|
||||
|
||||
* Any previously outstanding deferred vector length change in the calling
|
||||
thread is cancelled.
|
||||
|
||||
* The returned value describes the resulting configuration, encoded as for
|
||||
PR_SME_GET_VL. The vector length reported in this value is the new
|
||||
current vector length for this thread if PR_SME_SET_VL_ONEXEC was not
|
||||
present in arg; otherwise, the reported vector length is the deferred
|
||||
vector length that will be applied at the next execve() by the calling
|
||||
thread.
|
||||
|
||||
* Changing the vector length causes all of ZA, P0..P15, FFR and all bits of
|
||||
Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
|
||||
unspecified, including both streaming and non-streaming SVE state.
|
||||
Calling PR_SME_SET_VL with vl equal to the thread's current vector
|
||||
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
|
||||
does not constitute a change to the vector length for this purpose.
|
||||
|
||||
* Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
|
||||
Calling PR_SME_SET_VL with vl equal to the thread's current vector
|
||||
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
|
||||
does not constitute a change to the vector length for this purpose.
|
||||
|
||||
|
||||
prctl(PR_SME_GET_VL)
|
||||
|
||||
Gets the vector length of the calling thread.
|
||||
|
||||
The following flag may be OR-ed into the result:
|
||||
|
||||
PR_SME_VL_INHERIT
|
||||
|
||||
Vector length will be inherited across execve().
|
||||
|
||||
There is no way to determine whether there is an outstanding deferred
|
||||
vector length change (which would only normally be the case between a
|
||||
fork() or vfork() and the corresponding execve() in typical use).
|
||||
|
||||
To extract the vector length from the result, bitwise and it with
|
||||
PR_SME_VL_LEN_MASK.
|
||||
|
||||
Return value: a nonnegative value on success, or a negative value on error:
|
||||
EINVAL: SME not supported.
|
||||
|
||||
|
||||
7. ptrace extensions
|
||||
---------------------
|
||||
|
||||
* A new regset NT_ARM_SSVE is defined for access to streaming mode SVE
|
||||
state via PTRACE_GETREGSET and PTRACE_SETREGSET, this is documented in
|
||||
sve.rst.
|
||||
|
||||
* A new regset NT_ARM_ZA is defined for ZA state for access to ZA state via
|
||||
PTRACE_GETREGSET and PTRACE_SETREGSET.
|
||||
|
||||
Refer to [2] for definitions.
|
||||
|
||||
The regset data starts with struct user_za_header, containing:
|
||||
|
||||
size
|
||||
|
||||
Size of the complete regset, in bytes.
|
||||
This depends on vl and possibly on other things in the future.
|
||||
|
||||
If a call to PTRACE_GETREGSET requests less data than the value of
|
||||
size, the caller can allocate a larger buffer and retry in order to
|
||||
read the complete regset.
|
||||
|
||||
max_size
|
||||
|
||||
Maximum size in bytes that the regset can grow to for the target
|
||||
thread. The regset won't grow bigger than this even if the target
|
||||
thread changes its vector length etc.
|
||||
|
||||
vl
|
||||
|
||||
Target thread's current streaming vector length, in bytes.
|
||||
|
||||
max_vl
|
||||
|
||||
Maximum possible streaming vector length for the target thread.
|
||||
|
||||
flags
|
||||
|
||||
Zero or more of the following flags, which have the same
|
||||
meaning and behaviour as the corresponding PR_SET_VL_* flags:
|
||||
|
||||
SME_PT_VL_INHERIT
|
||||
|
||||
SME_PT_VL_ONEXEC (SETREGSET only).
|
||||
|
||||
* The effects of changing the vector length and/or flags are equivalent to
|
||||
those documented for PR_SME_SET_VL.
|
||||
|
||||
The caller must make a further GETREGSET call if it needs to know what VL is
|
||||
actually set by SETREGSET, unless is it known in advance that the requested
|
||||
VL is supported.
|
||||
|
||||
* The size and layout of the payload depends on the header fields. The
|
||||
SME_PT_ZA_*() macros are provided to facilitate access to the data.
|
||||
|
||||
* In either case, for SETREGSET it is permissible to omit the payload, in which
|
||||
case the vector length and flags are changed and PSTATE.ZA is set to 0
|
||||
(along with any consequences of those changes). If a payload is provided
|
||||
then PSTATE.ZA will be set to 1.
|
||||
|
||||
* For SETREGSET, if the requested VL is not supported, the effect will be the
|
||||
same as if the payload were omitted, except that an EIO error is reported.
|
||||
No attempt is made to translate the payload data to the correct layout
|
||||
for the vector length actually set. It is up to the caller to translate the
|
||||
payload layout for the actual VL and retry.
|
||||
|
||||
* The effect of writing a partial, incomplete payload is unspecified.
|
||||
|
||||
|
||||
8. ELF coredump extensions
|
||||
---------------------------
|
||||
|
||||
* NT_ARM_SSVE notes will be added to each coredump for
|
||||
each thread of the dumped process. The contents will be equivalent to the
|
||||
data that would have been read if a PTRACE_GETREGSET of the corresponding
|
||||
type were executed for each thread when the coredump was generated.
|
||||
|
||||
* A NT_ARM_ZA note will be added to each coredump for each thread of the
|
||||
dumped process. The contents will be equivalent to the data that would have
|
||||
been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
|
||||
when the coredump was generated.
|
||||
|
||||
|
||||
9. System runtime configuration
|
||||
--------------------------------
|
||||
|
||||
* To mitigate the ABI impact of expansion of the signal frame, a policy
|
||||
mechanism is provided for administrators, distro maintainers and developers
|
||||
to set the default vector length for userspace processes:
|
||||
|
||||
/proc/sys/abi/sme_default_vector_length
|
||||
|
||||
Writing the text representation of an integer to this file sets the system
|
||||
default vector length to the specified value, unless the value is greater
|
||||
than the maximum vector length supported by the system in which case the
|
||||
default vector length is set to that maximum.
|
||||
|
||||
The result can be determined by reopening the file and reading its
|
||||
contents.
|
||||
|
||||
At boot, the default vector length is initially set to 32 or the maximum
|
||||
supported vector length, whichever is smaller and supported. This
|
||||
determines the initial vector length of the init process (PID 1).
|
||||
|
||||
Reading this file returns the current system default vector length.
|
||||
|
||||
* At every execve() call, the new vector length of the new process is set to
|
||||
the system default vector length, unless
|
||||
|
||||
* PR_SME_VL_INHERIT (or equivalently SME_PT_VL_INHERIT) is set for the
|
||||
calling thread, or
|
||||
|
||||
* a deferred vector length change is pending, established via the
|
||||
PR_SME_SET_VL_ONEXEC flag (or SME_PT_VL_ONEXEC).
|
||||
|
||||
* Modifying the system default vector length does not affect the vector length
|
||||
of any existing process or thread that does not make an execve() call.
|
||||
|
||||
|
||||
Appendix A. SME programmer's model (informative)
|
||||
=================================================
|
||||
|
||||
This section provides a minimal description of the additions made by SVE to the
|
||||
ARMv8-A programmer's model that are relevant to this document.
|
||||
|
||||
Note: This section is for information only and not intended to be complete or
|
||||
to replace any architectural specification.
|
||||
|
||||
A.1. Registers
|
||||
---------------
|
||||
|
||||
In A64 state, SME adds the following:
|
||||
|
||||
* A new mode, streaming mode, in which a subset of the normal FPSIMD and SVE
|
||||
features are available. When supported EL0 software may enter and leave
|
||||
streaming mode at any time.
|
||||
|
||||
For best system performance it is strongly encouraged for software to enable
|
||||
streaming mode only when it is actively being used.
|
||||
|
||||
* A new vector length controlling the size of ZA and the Z registers when in
|
||||
streaming mode, separately to the vector length used for SVE when not in
|
||||
streaming mode. There is no requirement that either the currently selected
|
||||
vector length or the set of vector lengths supported for the two modes in
|
||||
a given system have any relationship. The streaming mode vector length
|
||||
is referred to as SVL.
|
||||
|
||||
* A new ZA matrix register. This is a square matrix of SVLxSVL bits. Most
|
||||
operations on ZA require that streaming mode be enabled but ZA can be
|
||||
enabled without streaming mode in order to load, save and retain data.
|
||||
|
||||
For best system performance it is strongly encouraged for software to enable
|
||||
ZA only when it is actively being used.
|
||||
|
||||
* Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
|
||||
SMSTOP instructions or by access to the SVCR system register:
|
||||
|
||||
* PSTATE.ZA, if this is 1 then the ZA matrix is accessible and has valid
|
||||
data while if it is 0 then ZA can not be accessed. When PSTATE.ZA is
|
||||
changed from 0 to 1 all bits in ZA are cleared.
|
||||
|
||||
* PSTATE.SM, if this is 1 then the PE is in streaming mode. When the value
|
||||
of PSTATE.SM is changed then it is implementation defined if the subset
|
||||
of the floating point register bits valid in both modes may be retained.
|
||||
Any other bits will be cleared.
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
[1] arch/arm64/include/uapi/asm/sigcontext.h
|
||||
AArch64 Linux signal ABI definitions
|
||||
|
||||
[2] arch/arm64/include/uapi/asm/ptrace.h
|
||||
AArch64 Linux ptrace ABI definitions
|
||||
|
||||
[3] Documentation/arm64/cpu-feature-registers.rst
|
||||
|
|
@ -7,7 +7,9 @@ Author: Dave Martin <Dave.Martin@arm.com>
|
|||
Date: 4 August 2017
|
||||
|
||||
This document outlines briefly the interface provided to userspace by Linux in
|
||||
order to support use of the ARM Scalable Vector Extension (SVE).
|
||||
order to support use of the ARM Scalable Vector Extension (SVE), including
|
||||
interactions with Streaming SVE mode added by the Scalable Matrix Extension
|
||||
(SME).
|
||||
|
||||
This is an outline of the most important features and issues only and not
|
||||
intended to be exhaustive.
|
||||
|
|
@ -23,6 +25,10 @@ model features for SVE is included in Appendix A.
|
|||
* SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
|
||||
tracked per-thread.
|
||||
|
||||
* In streaming mode FFR is not accessible unless HWCAP2_SME_FA64 is present
|
||||
in the system, when it is not supported and these interfaces are used to
|
||||
access streaming mode FFR is read and written as zero.
|
||||
|
||||
* The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
|
||||
AT_HWCAP entry. Presence of this flag implies the presence of the SVE
|
||||
instructions and registers, and the Linux-specific system interfaces
|
||||
|
|
@ -53,10 +59,19 @@ model features for SVE is included in Appendix A.
|
|||
which userspace can read using an MRS instruction. See elf_hwcaps.txt and
|
||||
cpu-feature-registers.txt for details.
|
||||
|
||||
* On hardware that supports the SME extensions, HWCAP2_SME will also be
|
||||
reported in the AT_HWCAP2 aux vector entry. Among other things SME adds
|
||||
streaming mode which provides a subset of the SVE feature set using a
|
||||
separate SME vector length and the same Z/V registers. See sme.rst
|
||||
for more details.
|
||||
|
||||
* Debuggers should restrict themselves to interacting with the target via the
|
||||
NT_ARM_SVE regset. The recommended way of detecting support for this regset
|
||||
is to connect to a target process first and then attempt a
|
||||
ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
|
||||
ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). Note that when SME is
|
||||
present and streaming SVE mode is in use the FPSIMD subset of registers
|
||||
will be read via NT_ARM_SVE and NT_ARM_SVE writes will exit streaming mode
|
||||
in the target.
|
||||
|
||||
* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
|
||||
between userspace and the kernel, the register value is encoded in memory in
|
||||
|
|
@ -126,6 +141,11 @@ the SVE instruction set architecture.
|
|||
are only present in fpsimd_context. For convenience, the content of V0..V31
|
||||
is duplicated between sve_context and fpsimd_context.
|
||||
|
||||
* The record contains a flag field which includes a flag SVE_SIG_FLAG_SM which
|
||||
if set indicates that the thread is in streaming mode and the vector length
|
||||
and register data (if present) describe the streaming SVE data and vector
|
||||
length.
|
||||
|
||||
* The signal frame record for SVE always contains basic metadata, in particular
|
||||
the thread's vector length (in sve_context.vl).
|
||||
|
||||
|
|
@ -170,6 +190,11 @@ When returning from a signal handler:
|
|||
the signal frame does not match the current vector length, the signal return
|
||||
attempt is treated as illegal, resulting in a forced SIGSEGV.
|
||||
|
||||
* It is permitted to enter or leave streaming mode by setting or clearing
|
||||
the SVE_SIG_FLAG_SM flag but applications should take care to ensure that
|
||||
when doing so sve_context.vl and any register data are appropriate for the
|
||||
vector length in the new mode.
|
||||
|
||||
|
||||
6. prctl extensions
|
||||
--------------------
|
||||
|
|
@ -265,8 +290,14 @@ prctl(PR_SVE_GET_VL)
|
|||
7. ptrace extensions
|
||||
---------------------
|
||||
|
||||
* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
|
||||
PTRACE_SETREGSET.
|
||||
* New regsets NT_ARM_SVE and NT_ARM_SSVE are defined for use with
|
||||
PTRACE_GETREGSET and PTRACE_SETREGSET. NT_ARM_SSVE describes the
|
||||
streaming mode SVE registers and NT_ARM_SVE describes the
|
||||
non-streaming mode SVE registers.
|
||||
|
||||
In this description a register set is referred to as being "live" when
|
||||
the target is in the appropriate streaming or non-streaming mode and is
|
||||
using data beyond the subset shared with the FPSIMD Vn registers.
|
||||
|
||||
Refer to [2] for definitions.
|
||||
|
||||
|
|
@ -297,7 +328,7 @@ The regset data starts with struct user_sve_header, containing:
|
|||
|
||||
flags
|
||||
|
||||
either
|
||||
at most one of
|
||||
|
||||
SVE_PT_REGS_FPSIMD
|
||||
|
||||
|
|
@ -331,6 +362,10 @@ The regset data starts with struct user_sve_header, containing:
|
|||
|
||||
SVE_PT_VL_ONEXEC (SETREGSET only).
|
||||
|
||||
If neither FPSIMD nor SVE flags are provided then no register
|
||||
payload is available, this is only possible when SME is implemented.
|
||||
|
||||
|
||||
* The effects of changing the vector length and/or flags are equivalent to
|
||||
those documented for PR_SVE_SET_VL.
|
||||
|
||||
|
|
@ -346,6 +381,13 @@ The regset data starts with struct user_sve_header, containing:
|
|||
case only the vector length and flags are changed (along with any
|
||||
consequences of those changes).
|
||||
|
||||
* In systems supporting SME when in streaming mode a GETREGSET for
|
||||
NT_REG_SVE will return only the user_sve_header with no register data,
|
||||
similarly a GETREGSET for NT_REG_SSVE will not return any register data
|
||||
when not in streaming mode.
|
||||
|
||||
* A GETREGSET for NT_ARM_SSVE will never return SVE_PT_REGS_FPSIMD.
|
||||
|
||||
* For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
|
||||
requested VL is not supported, the effect will be the same as if the
|
||||
payload were omitted, except that an EIO error is reported. No
|
||||
|
|
@ -355,17 +397,25 @@ The regset data starts with struct user_sve_header, containing:
|
|||
unspecified. It is up to the caller to translate the payload layout
|
||||
for the actual VL and retry.
|
||||
|
||||
* Where SME is implemented it is not possible to GETREGSET the register
|
||||
state for normal SVE when in streaming mode, nor the streaming mode
|
||||
register state when in normal mode, regardless of the implementation defined
|
||||
behaviour of the hardware for sharing data between the two modes.
|
||||
|
||||
* Any SETREGSET of NT_ARM_SVE will exit streaming mode if the target was in
|
||||
streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
|
||||
if the target was not in streaming mode.
|
||||
|
||||
* The effect of writing a partial, incomplete payload is unspecified.
|
||||
|
||||
|
||||
8. ELF coredump extensions
|
||||
---------------------------
|
||||
|
||||
* A NT_ARM_SVE note will be added to each coredump for each thread of the
|
||||
dumped process. The contents will be equivalent to the data that would have
|
||||
been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
|
||||
when the coredump was generated.
|
||||
|
||||
* NT_ARM_SVE and NT_ARM_SSVE notes will be added to each coredump for
|
||||
each thread of the dumped process. The contents will be equivalent to the
|
||||
data that would have been read if a PTRACE_GETREGSET of the corresponding
|
||||
type were executed for each thread when the coredump was generated.
|
||||
|
||||
9. System runtime configuration
|
||||
--------------------------------
|
||||
|
|
|
|||
|
|
@ -218,7 +218,6 @@ current *struct* is::
|
|||
int (*tray_move)(struct cdrom_device_info *, int);
|
||||
int (*lock_door)(struct cdrom_device_info *, int);
|
||||
int (*select_speed)(struct cdrom_device_info *, int);
|
||||
int (*select_disc)(struct cdrom_device_info *, int);
|
||||
int (*get_last_session) (struct cdrom_device_info *,
|
||||
struct cdrom_multisession *);
|
||||
int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
|
||||
|
|
@ -419,15 +418,6 @@ this `auto-selection` capability, the decision should be made on the
|
|||
current disc loaded and the return value should be positive. A negative
|
||||
return value indicates an error.
|
||||
|
||||
::
|
||||
|
||||
int select_disc(struct cdrom_device_info *cdi, int number)
|
||||
|
||||
If the drive can store multiple discs (a juke-box) this function
|
||||
will perform disc selection. It should return the number of the
|
||||
selected disc on success, a negative value on error. Currently, only
|
||||
the ide-cd driver supports this functionality.
|
||||
|
||||
::
|
||||
|
||||
int get_last_session(struct cdrom_device_info *cdi,
|
||||
|
|
|
|||
|
|
@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases:
|
|||
.. c:function:: u64 ktime_get_mono_fast_ns( void )
|
||||
u64 ktime_get_raw_fast_ns( void )
|
||||
u64 ktime_get_boot_fast_ns( void )
|
||||
u64 ktime_get_tai_fast_ns( void )
|
||||
u64 ktime_get_real_fast_ns( void )
|
||||
|
||||
These variants are safe to call from any context, including from
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ properties:
|
|||
- items:
|
||||
- enum:
|
||||
- renesas,sata-r8a774b1 # RZ/G2N
|
||||
- renesas,sata-r8a774e1 # RZ/G2H
|
||||
- renesas,sata-r8a7795 # R-Car H3
|
||||
- renesas,sata-r8a77965 # R-Car M3-N
|
||||
- const: renesas,rcar-gen3-sata # generic R-Car Gen3 or RZ/G2
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
|||
title: ARM Generic Interrupt Controller, version 3
|
||||
|
||||
maintainers:
|
||||
- Marc Zyngier <marc.zyngier@arm.com>
|
||||
- Marc Zyngier <maz@kernel.org>
|
||||
|
||||
description: |
|
||||
AArch64 SMP cores are often associated with a GICv3, providing Private
|
||||
|
|
@ -78,7 +78,11 @@ properties:
|
|||
- GIC Hypervisor interface (GICH)
|
||||
- GIC Virtual CPU interface (GICV)
|
||||
|
||||
GICC, GICH and GICV are optional.
|
||||
GICC, GICH and GICV are optional, but must be described if the CPUs
|
||||
support them. Examples of such CPUs are ARM's implementations of the
|
||||
ARMv8.0 architecture such as Cortex-A32, A34, A35, A53, A57, A72 and
|
||||
A73 (this list is not exhaustive).
|
||||
|
||||
minItems: 2
|
||||
maxItems: 4096 # Should be enough?
|
||||
|
||||
|
|
|
|||
|
|
@ -25,12 +25,6 @@ properties:
|
|||
- const: fsl,qoriq-memory-controller
|
||||
- enum:
|
||||
- fsl,bsc9132-memory-controller
|
||||
- fsl,8540-memory-controller
|
||||
- fsl,8541-memory-controller
|
||||
- fsl,8544-memory-controller
|
||||
- fsl,8548-memory-controller
|
||||
- fsl,8555-memory-controller
|
||||
- fsl,8568-memory-controller
|
||||
- fsl,mpc8536-memory-controller
|
||||
- fsl,mpc8540-memory-controller
|
||||
- fsl,mpc8541-memory-controller
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ properties:
|
|||
compatible:
|
||||
enum:
|
||||
- arm,cmn-600
|
||||
- arm,cmn-650
|
||||
- arm,cmn-700
|
||||
- arm,ci-700
|
||||
|
||||
reg:
|
||||
|
|
|
|||
|
|
@ -6,12 +6,6 @@ The cache bindings explained below are Devicetree Specification compliant
|
|||
Required Properties:
|
||||
|
||||
- compatible : Should include one of the following:
|
||||
"fsl,8540-l2-cache-controller"
|
||||
"fsl,8541-l2-cache-controller"
|
||||
"fsl,8544-l2-cache-controller"
|
||||
"fsl,8548-l2-cache-controller"
|
||||
"fsl,8555-l2-cache-controller"
|
||||
"fsl,8568-l2-cache-controller"
|
||||
"fsl,b4420-l2-cache-controller"
|
||||
"fsl,b4860-l2-cache-controller"
|
||||
"fsl,bsc9131-l2-cache-controller"
|
||||
|
|
|
|||
|
|
@ -211,6 +211,7 @@ r200_reg_safe.h
|
|||
r300_reg_safe.h
|
||||
r420_reg_safe.h
|
||||
r600_reg_safe.h
|
||||
randstruct.seed
|
||||
randomize_layout_hash.h
|
||||
randomize_layout_seed.h
|
||||
recordmcount
|
||||
|
|
|
|||
|
|
@ -417,30 +417,66 @@ struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip.
|
|||
If you do this, the additional irq_chip will be set up by gpiolib at the
|
||||
same time as setting up the rest of the GPIO functionality. The following
|
||||
is a typical example of a chained cascaded interrupt handler using
|
||||
the gpio_irq_chip:
|
||||
the gpio_irq_chip. Note how the mask/unmask (or disable/enable) functions
|
||||
call into the core gpiolib code:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
/* Typical state container with dynamic irqchip */
|
||||
/* Typical state container */
|
||||
struct my_gpio {
|
||||
struct gpio_chip gc;
|
||||
struct irq_chip irq;
|
||||
};
|
||||
|
||||
static void my_gpio_mask_irq(struct irq_data *d)
|
||||
{
|
||||
struct gpio_chip *gc = irq_desc_get_handler_data(d);
|
||||
|
||||
/*
|
||||
* Perform any necessary action to mask the interrupt,
|
||||
* and then call into the core code to synchronise the
|
||||
* state.
|
||||
*/
|
||||
|
||||
gpiochip_disable_irq(gc, d->hwirq);
|
||||
}
|
||||
|
||||
static void my_gpio_unmask_irq(struct irq_data *d)
|
||||
{
|
||||
struct gpio_chip *gc = irq_desc_get_handler_data(d);
|
||||
|
||||
gpiochip_enable_irq(gc, d->hwirq);
|
||||
|
||||
/*
|
||||
* Perform any necessary action to unmask the interrupt,
|
||||
* after having called into the core code to synchronise
|
||||
* the state.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Statically populate the irqchip. Note that it is made const
|
||||
* (further indicated by the IRQCHIP_IMMUTABLE flag), and that
|
||||
* the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
|
||||
* callbacks to the structure.
|
||||
*/
|
||||
static const struct irq_chip my_gpio_irq_chip = {
|
||||
.name = "my_gpio_irq",
|
||||
.irq_ack = my_gpio_ack_irq,
|
||||
.irq_mask = my_gpio_mask_irq,
|
||||
.irq_unmask = my_gpio_unmask_irq,
|
||||
.irq_set_type = my_gpio_set_irq_type,
|
||||
.flags = IRQCHIP_IMMUTABLE,
|
||||
/* Provide the gpio resource callbacks */
|
||||
GPIOCHIP_IRQ_RESOURCE_HELPERS,
|
||||
};
|
||||
|
||||
int irq; /* from platform etc */
|
||||
struct my_gpio *g;
|
||||
struct gpio_irq_chip *girq;
|
||||
|
||||
/* Set up the irqchip dynamically */
|
||||
g->irq.name = "my_gpio_irq";
|
||||
g->irq.irq_ack = my_gpio_ack_irq;
|
||||
g->irq.irq_mask = my_gpio_mask_irq;
|
||||
g->irq.irq_unmask = my_gpio_unmask_irq;
|
||||
g->irq.irq_set_type = my_gpio_set_irq_type;
|
||||
|
||||
/* Get a pointer to the gpio_irq_chip */
|
||||
girq = &g->gc.irq;
|
||||
girq->chip = &g->irq;
|
||||
gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
|
||||
girq->parent_handler = ftgpio_gpio_irq_handler;
|
||||
girq->num_parents = 1;
|
||||
girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
|
||||
|
|
@ -458,23 +494,58 @@ the interrupt separately and go with it:
|
|||
|
||||
.. code-block:: c
|
||||
|
||||
/* Typical state container with dynamic irqchip */
|
||||
/* Typical state container */
|
||||
struct my_gpio {
|
||||
struct gpio_chip gc;
|
||||
struct irq_chip irq;
|
||||
};
|
||||
|
||||
static void my_gpio_mask_irq(struct irq_data *d)
|
||||
{
|
||||
struct gpio_chip *gc = irq_desc_get_handler_data(d);
|
||||
|
||||
/*
|
||||
* Perform any necessary action to mask the interrupt,
|
||||
* and then call into the core code to synchronise the
|
||||
* state.
|
||||
*/
|
||||
|
||||
gpiochip_disable_irq(gc, d->hwirq);
|
||||
}
|
||||
|
||||
static void my_gpio_unmask_irq(struct irq_data *d)
|
||||
{
|
||||
struct gpio_chip *gc = irq_desc_get_handler_data(d);
|
||||
|
||||
gpiochip_enable_irq(gc, d->hwirq);
|
||||
|
||||
/*
|
||||
* Perform any necessary action to unmask the interrupt,
|
||||
* after having called into the core code to synchronise
|
||||
* the state.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Statically populate the irqchip. Note that it is made const
|
||||
* (further indicated by the IRQCHIP_IMMUTABLE flag), and that
|
||||
* the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
|
||||
* callbacks to the structure.
|
||||
*/
|
||||
static const struct irq_chip my_gpio_irq_chip = {
|
||||
.name = "my_gpio_irq",
|
||||
.irq_ack = my_gpio_ack_irq,
|
||||
.irq_mask = my_gpio_mask_irq,
|
||||
.irq_unmask = my_gpio_unmask_irq,
|
||||
.irq_set_type = my_gpio_set_irq_type,
|
||||
.flags = IRQCHIP_IMMUTABLE,
|
||||
/* Provide the gpio resource callbacks */
|
||||
GPIOCHIP_IRQ_RESOURCE_HELPERS,
|
||||
};
|
||||
|
||||
int irq; /* from platform etc */
|
||||
struct my_gpio *g;
|
||||
struct gpio_irq_chip *girq;
|
||||
|
||||
/* Set up the irqchip dynamically */
|
||||
g->irq.name = "my_gpio_irq";
|
||||
g->irq.irq_ack = my_gpio_ack_irq;
|
||||
g->irq.irq_mask = my_gpio_mask_irq;
|
||||
g->irq.irq_unmask = my_gpio_unmask_irq;
|
||||
g->irq.irq_set_type = my_gpio_set_irq_type;
|
||||
|
||||
ret = devm_request_threaded_irq(dev, irq, NULL,
|
||||
irq_thread_fn, IRQF_ONESHOT, "my-chip", g);
|
||||
if (ret < 0)
|
||||
|
|
@ -482,7 +553,7 @@ the interrupt separately and go with it:
|
|||
|
||||
/* Get a pointer to the gpio_irq_chip */
|
||||
girq = &g->gc.irq;
|
||||
girq->chip = &g->irq;
|
||||
gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
|
||||
/* This will let us handle the parent IRQ in the driver */
|
||||
girq->parent_handler = NULL;
|
||||
girq->num_parents = 0;
|
||||
|
|
@ -500,24 +571,61 @@ In this case the typical set-up will look like this:
|
|||
/* Typical state container with dynamic irqchip */
|
||||
struct my_gpio {
|
||||
struct gpio_chip gc;
|
||||
struct irq_chip irq;
|
||||
struct fwnode_handle *fwnode;
|
||||
};
|
||||
|
||||
int irq; /* from platform etc */
|
||||
static void my_gpio_mask_irq(struct irq_data *d)
|
||||
{
|
||||
struct gpio_chip *gc = irq_desc_get_handler_data(d);
|
||||
|
||||
/*
|
||||
* Perform any necessary action to mask the interrupt,
|
||||
* and then call into the core code to synchronise the
|
||||
* state.
|
||||
*/
|
||||
|
||||
gpiochip_disable_irq(gc, d->hwirq);
|
||||
irq_mask_mask_parent(d);
|
||||
}
|
||||
|
||||
static void my_gpio_unmask_irq(struct irq_data *d)
|
||||
{
|
||||
struct gpio_chip *gc = irq_desc_get_handler_data(d);
|
||||
|
||||
gpiochip_enable_irq(gc, d->hwirq);
|
||||
|
||||
/*
|
||||
* Perform any necessary action to unmask the interrupt,
|
||||
* after having called into the core code to synchronise
|
||||
* the state.
|
||||
*/
|
||||
|
||||
irq_mask_unmask_parent(d);
|
||||
}
|
||||
|
||||
/*
|
||||
* Statically populate the irqchip. Note that it is made const
|
||||
* (further indicated by the IRQCHIP_IMMUTABLE flag), and that
|
||||
* the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
|
||||
* callbacks to the structure.
|
||||
*/
|
||||
static const struct irq_chip my_gpio_irq_chip = {
|
||||
.name = "my_gpio_irq",
|
||||
.irq_ack = my_gpio_ack_irq,
|
||||
.irq_mask = my_gpio_mask_irq,
|
||||
.irq_unmask = my_gpio_unmask_irq,
|
||||
.irq_set_type = my_gpio_set_irq_type,
|
||||
.flags = IRQCHIP_IMMUTABLE,
|
||||
/* Provide the gpio resource callbacks */
|
||||
GPIOCHIP_IRQ_RESOURCE_HELPERS,
|
||||
};
|
||||
|
||||
struct my_gpio *g;
|
||||
struct gpio_irq_chip *girq;
|
||||
|
||||
/* Set up the irqchip dynamically */
|
||||
g->irq.name = "my_gpio_irq";
|
||||
g->irq.irq_ack = my_gpio_ack_irq;
|
||||
g->irq.irq_mask = my_gpio_mask_irq;
|
||||
g->irq.irq_unmask = my_gpio_unmask_irq;
|
||||
g->irq.irq_set_type = my_gpio_set_irq_type;
|
||||
|
||||
/* Get a pointer to the gpio_irq_chip */
|
||||
girq = &g->gc.irq;
|
||||
girq->chip = &g->irq;
|
||||
gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
|
||||
girq->default_type = IRQ_TYPE_NONE;
|
||||
girq->handler = handle_bad_irq;
|
||||
girq->fwnode = g->fwnode;
|
||||
|
|
@ -605,8 +713,9 @@ When implementing an irqchip inside a GPIO driver, these two functions should
|
|||
typically be called in the .irq_disable() and .irq_enable() callbacks from the
|
||||
irqchip.
|
||||
|
||||
When using the gpiolib irqchip helpers, these callbacks are automatically
|
||||
assigned.
|
||||
When IRQCHIP_IMMUTABLE is not advertised by the irqchip, these callbacks
|
||||
are automatically assigned. This behaviour is deprecated and on its way
|
||||
to be removed from the kernel.
|
||||
|
||||
|
||||
Real-Time compliance for GPIO IRQ chips
|
||||
|
|
|
|||
|
|
@ -424,12 +424,6 @@ How commands are issued
|
|||
-----------------------
|
||||
|
||||
Internal commands
|
||||
First, qc is allocated and initialized using :c:func:`ata_qc_new_init`.
|
||||
Although :c:func:`ata_qc_new_init` doesn't implement any wait or retry
|
||||
mechanism when qc is not available, internal commands are currently
|
||||
issued only during initialization and error recovery, so no other
|
||||
command is active and allocation is guaranteed to succeed.
|
||||
|
||||
Once allocated qc's taskfile is initialized for the command to be
|
||||
executed. qc currently has two mechanisms to notify completion. One
|
||||
is via ``qc->complete_fn()`` callback and the other is completion
|
||||
|
|
@ -447,11 +441,6 @@ SCSI commands
|
|||
translated. No qc is involved in processing a simulated scmd. The
|
||||
result is computed right away and the scmd is completed.
|
||||
|
||||
For a translated scmd, :c:func:`ata_qc_new_init` is invoked to allocate a
|
||||
qc and the scmd is translated into the qc. SCSI midlayer's
|
||||
completion notification function pointer is stored into
|
||||
``qc->scsidone``.
|
||||
|
||||
``qc->complete_fn()`` callback is used for completion notification. ATA
|
||||
commands use :c:func:`ata_scsi_qc_complete` while ATAPI commands use
|
||||
:c:func:`atapi_qc_complete`. Both functions end up calling ``qc->scsidone``
|
||||
|
|
|
|||
|
|
@ -27,5 +27,5 @@
|
|||
| sparc: | TODO |
|
||||
| um: | TODO |
|
||||
| x86: | ok |
|
||||
| xtensa: | TODO |
|
||||
| xtensa: | ok |
|
||||
-----------------------
|
||||
|
|
|
|||
|
|
@ -27,5 +27,5 @@
|
|||
| sparc: | ok |
|
||||
| um: | TODO |
|
||||
| x86: | ok |
|
||||
| xtensa: | TODO |
|
||||
| xtensa: | ok |
|
||||
-----------------------
|
||||
|
|
|
|||
|
|
@ -27,5 +27,5 @@
|
|||
| sparc: | ok |
|
||||
| um: | TODO |
|
||||
| x86: | ok |
|
||||
| xtensa: | TODO |
|
||||
| xtensa: | ok |
|
||||
-----------------------
|
||||
|
|
|
|||
|
|
@ -1183,85 +1183,7 @@ Provides counts of softirq handlers serviced since boot time, for each CPU.
|
|||
HRTIMER: 0 0 0 0
|
||||
RCU: 1678 1769 2178 2250
|
||||
|
||||
|
||||
1.3 IDE devices in /proc/ide
|
||||
----------------------------
|
||||
|
||||
The subdirectory /proc/ide contains information about all IDE devices of which
|
||||
the kernel is aware. There is one subdirectory for each IDE controller, the
|
||||
file drivers and a link for each IDE device, pointing to the device directory
|
||||
in the controller specific subtree.
|
||||
|
||||
The file 'drivers' contains general information about the drivers used for the
|
||||
IDE devices::
|
||||
|
||||
> cat /proc/ide/drivers
|
||||
ide-cdrom version 4.53
|
||||
ide-disk version 1.08
|
||||
|
||||
More detailed information can be found in the controller specific
|
||||
subdirectories. These are named ide0, ide1 and so on. Each of these
|
||||
directories contains the files shown in table 1-6.
|
||||
|
||||
|
||||
.. table:: Table 1-6: IDE controller info in /proc/ide/ide?
|
||||
|
||||
======= =======================================
|
||||
File Content
|
||||
======= =======================================
|
||||
channel IDE channel (0 or 1)
|
||||
config Configuration (only for PCI/IDE bridge)
|
||||
mate Mate name
|
||||
model Type/Chipset of IDE controller
|
||||
======= =======================================
|
||||
|
||||
Each device connected to a controller has a separate subdirectory in the
|
||||
controllers directory. The files listed in table 1-7 are contained in these
|
||||
directories.
|
||||
|
||||
|
||||
.. table:: Table 1-7: IDE device information
|
||||
|
||||
================ ==========================================
|
||||
File Content
|
||||
================ ==========================================
|
||||
cache The cache
|
||||
capacity Capacity of the medium (in 512Byte blocks)
|
||||
driver driver and version
|
||||
geometry physical and logical geometry
|
||||
identify device identify block
|
||||
media media type
|
||||
model device identifier
|
||||
settings device setup
|
||||
smart_thresholds IDE disk management thresholds
|
||||
smart_values IDE disk management values
|
||||
================ ==========================================
|
||||
|
||||
The most interesting file is ``settings``. This file contains a nice
|
||||
overview of the drive parameters::
|
||||
|
||||
# cat /proc/ide/ide0/hda/settings
|
||||
name value min max mode
|
||||
---- ----- --- --- ----
|
||||
bios_cyl 526 0 65535 rw
|
||||
bios_head 255 0 255 rw
|
||||
bios_sect 63 0 63 rw
|
||||
breada_readahead 4 0 127 rw
|
||||
bswap 0 0 1 r
|
||||
file_readahead 72 0 2097151 rw
|
||||
io_32bit 0 0 3 rw
|
||||
keepsettings 0 0 1 rw
|
||||
max_kb_per_request 122 1 127 rw
|
||||
multcount 0 0 8 rw
|
||||
nice1 1 0 1 rw
|
||||
nowerr 0 0 1 rw
|
||||
pio_mode write-only 0 255 w
|
||||
slow 0 0 1 rw
|
||||
unmaskirq 0 0 1 rw
|
||||
using_dma 0 0 1 rw
|
||||
|
||||
|
||||
1.4 Networking info in /proc/net
|
||||
1.3 Networking info in /proc/net
|
||||
--------------------------------
|
||||
|
||||
The subdirectory /proc/net follows the usual pattern. Table 1-8 shows the
|
||||
|
|
@ -1340,7 +1262,7 @@ It will contain information that is specific to that bond, such as the
|
|||
current slaves of the bond, the link status of the slaves, and how
|
||||
many times the slaves link has failed.
|
||||
|
||||
1.5 SCSI info
|
||||
1.4 SCSI info
|
||||
-------------
|
||||
|
||||
If you have a SCSI host adapter in your system, you'll find a subdirectory
|
||||
|
|
@ -1403,7 +1325,7 @@ AHA-2940 SCSI adapter::
|
|||
Total transfers 0 (0 reads and 0 writes)
|
||||
|
||||
|
||||
1.6 Parallel port info in /proc/parport
|
||||
1.5 Parallel port info in /proc/parport
|
||||
---------------------------------------
|
||||
|
||||
The directory /proc/parport contains information about the parallel ports of
|
||||
|
|
@ -1428,7 +1350,7 @@ These directories contain the four files shown in Table 1-10.
|
|||
number or none).
|
||||
========= ====================================================================
|
||||
|
||||
1.7 TTY info in /proc/tty
|
||||
1.6 TTY info in /proc/tty
|
||||
-------------------------
|
||||
|
||||
Information about the available and actually used tty's can be found in the
|
||||
|
|
@ -1463,7 +1385,7 @@ To see which tty's are currently in use, you can simply look into the file
|
|||
unknown /dev/tty 4 1-63 console
|
||||
|
||||
|
||||
1.8 Miscellaneous kernel statistics in /proc/stat
|
||||
1.7 Miscellaneous kernel statistics in /proc/stat
|
||||
-------------------------------------------------
|
||||
|
||||
Various pieces of information about kernel activity are available in the
|
||||
|
|
@ -1536,7 +1458,7 @@ softirqs serviced; each subsequent column is the total for that particular
|
|||
softirq.
|
||||
|
||||
|
||||
1.9 Ext4 file system parameters
|
||||
1.8 Ext4 file system parameters
|
||||
-------------------------------
|
||||
|
||||
Information about mounted ext4 file systems can be found in
|
||||
|
|
@ -1552,7 +1474,7 @@ in Table 1-12, below.
|
|||
mb_groups details of multiblock allocator buddy cache of free blocks
|
||||
============== ==========================================================
|
||||
|
||||
1.10 /proc/consoles
|
||||
1.9 /proc/consoles
|
||||
-------------------
|
||||
Shows registered system console lines.
|
||||
|
||||
|
|
|
|||
|
|
@ -306,8 +306,15 @@ Further notes:
|
|||
Mount options
|
||||
-------------
|
||||
|
||||
zonefs define the "errors=<behavior>" mount option to allow the user to specify
|
||||
zonefs behavior in response to I/O errors, inode size inconsistencies or zone
|
||||
zonefs defines several mount options:
|
||||
* errors=<behavior>
|
||||
* explicit-open
|
||||
|
||||
"errors=<behavior>" option
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The "errors=<behavior>" option mount option allows the user to specify zonefs
|
||||
behavior in response to I/O errors, inode size inconsistencies or zone
|
||||
condition changes. The defined behaviors are as follow:
|
||||
|
||||
* remount-ro (default)
|
||||
|
|
@ -326,6 +333,9 @@ discover the amount of data that has been written to the zone. In the case of a
|
|||
read-only zone discovered at run-time, as indicated in the previous section.
|
||||
The size of the zone file is left unchanged from its last updated value.
|
||||
|
||||
"explicit-open" option
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
A zoned block device (e.g. an NVMe Zoned Namespace device) may have limits on
|
||||
the number of zones that can be active, that is, zones that are in the
|
||||
implicit open, explicit open or closed conditions. This potential limitation
|
||||
|
|
@ -341,6 +351,44 @@ guaranteed that write requests can be processed. Conversely, the
|
|||
to the device on the last close() of a zone file if the zone is not full nor
|
||||
empty.
|
||||
|
||||
Runtime sysfs attributes
|
||||
------------------------
|
||||
|
||||
zonefs defines several sysfs attributes for mounted devices. All attributes
|
||||
are user readable and can be found in the directory /sys/fs/zonefs/<dev>/,
|
||||
where <dev> is the name of the mounted zoned block device.
|
||||
|
||||
The attributes defined are as follows.
|
||||
|
||||
* **max_wro_seq_files**: This attribute reports the maximum number of
|
||||
sequential zone files that can be open for writing. This number corresponds
|
||||
to the maximum number of explicitly or implicitly open zones that the device
|
||||
supports. A value of 0 means that the device has no limit and that any zone
|
||||
(any file) can be open for writing and written at any time, regardless of the
|
||||
state of other zones. When the *explicit-open* mount option is used, zonefs
|
||||
will fail any open() system call requesting to open a sequential zone file for
|
||||
writing when the number of sequential zone files already open for writing has
|
||||
reached the *max_wro_seq_files* limit.
|
||||
* **nr_wro_seq_files**: This attribute reports the current number of sequential
|
||||
zone files open for writing. When the "explicit-open" mount option is used,
|
||||
this number can never exceed *max_wro_seq_files*. If the *explicit-open*
|
||||
mount option is not used, the reported number can be greater than
|
||||
*max_wro_seq_files*. In such case, it is the responsibility of the
|
||||
application to not write simultaneously more than *max_wro_seq_files*
|
||||
sequential zone files. Failure to do so can result in write errors.
|
||||
* **max_active_seq_files**: This attribute reports the maximum number of
|
||||
sequential zone files that are in an active state, that is, sequential zone
|
||||
files that are partially writen (not empty nor full) or that have a zone that
|
||||
is explicitly open (which happens only if the *explicit-open* mount option is
|
||||
used). This number is always equal to the maximum number of active zones that
|
||||
the device supports. A value of 0 means that the mounted device has no limit
|
||||
on the number of sequential zone files that can be active.
|
||||
* **nr_active_seq_files**: This attributes reports the current number of
|
||||
sequential zone files that are active. If *max_active_seq_files* is not 0,
|
||||
then the value of *nr_active_seq_files* can never exceed the value of
|
||||
*nr_active_seq_files*, regardless of the use of the *explicit-open* mount
|
||||
option.
|
||||
|
||||
Zonefs User Space Tools
|
||||
=======================
|
||||
|
||||
|
|
|
|||
|
|
@ -99,10 +99,10 @@ unreproducible parts can be treated as sources:
|
|||
Structure randomisation
|
||||
-----------------------
|
||||
|
||||
If you enable ``CONFIG_GCC_PLUGIN_RANDSTRUCT``, you will need to
|
||||
pre-generate the random seed in
|
||||
``scripts/gcc-plugins/randomize_layout_seed.h`` so the same value
|
||||
is used in rebuilds.
|
||||
If you enable ``CONFIG_RANDSTRUCT``, you will need to pre-generate
|
||||
the random seed in ``scripts/basic/randstruct.seed`` so the same
|
||||
value is used by each build. See ``scripts/gen-randstruct-seed.sh``
|
||||
for details.
|
||||
|
||||
Debug info conflicts
|
||||
--------------------
|
||||
|
|
|
|||
|
|
@ -437,6 +437,20 @@ in a private repository which allows interested people to easily pull the
|
|||
series for testing. The usual way to offer this is a git URL in the cover
|
||||
letter of the patch series.
|
||||
|
||||
Testing
|
||||
^^^^^^^
|
||||
|
||||
Code should be tested before submitting to the tip maintainers. Anything
|
||||
other than minor changes should be built, booted and tested with
|
||||
comprehensive (and heavyweight) kernel debugging options enabled.
|
||||
|
||||
These debugging options can be found in kernel/configs/x86_debug.config
|
||||
and can be added to an existing kernel config by running:
|
||||
|
||||
make x86_debug.config
|
||||
|
||||
Some of these options are x86-specific and can be left out when testing
|
||||
on other architectures.
|
||||
|
||||
Coding style notes
|
||||
------------------
|
||||
|
|
|
|||
|
|
@ -17,3 +17,4 @@ Security Documentation
|
|||
tpm/index
|
||||
digsig
|
||||
landlock
|
||||
secrets/index
|
||||
|
|
|
|||
103
Documentation/security/secrets/coco.rst
Normal file
103
Documentation/security/secrets/coco.rst
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==============================
|
||||
Confidential Computing secrets
|
||||
==============================
|
||||
|
||||
This document describes how Confidential Computing secret injection is handled
|
||||
from the firmware to the operating system, in the EFI driver and the efi_secret
|
||||
kernel module.
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
Confidential Computing (coco) hardware such as AMD SEV (Secure Encrypted
|
||||
Virtualization) allows guest owners to inject secrets into the VMs
|
||||
memory without the host/hypervisor being able to read them. In SEV,
|
||||
secret injection is performed early in the VM launch process, before the
|
||||
guest starts running.
|
||||
|
||||
The efi_secret kernel module allows userspace applications to access these
|
||||
secrets via securityfs.
|
||||
|
||||
|
||||
Secret data flow
|
||||
================
|
||||
|
||||
The guest firmware may reserve a designated memory area for secret injection,
|
||||
and publish its location (base GPA and length) in the EFI configuration table
|
||||
under a ``LINUX_EFI_COCO_SECRET_AREA_GUID`` entry
|
||||
(``adf956ad-e98c-484c-ae11-b51c7d336447``). This memory area should be marked
|
||||
by the firmware as ``EFI_RESERVED_TYPE``, and therefore the kernel should not
|
||||
be use it for its own purposes.
|
||||
|
||||
During the VM's launch, the virtual machine manager may inject a secret to that
|
||||
area. In AMD SEV and SEV-ES this is performed using the
|
||||
``KVM_SEV_LAUNCH_SECRET`` command (see [sev]_). The strucutre of the injected
|
||||
Guest Owner secret data should be a GUIDed table of secret values; the binary
|
||||
format is described in ``drivers/virt/coco/efi_secret/efi_secret.c`` under
|
||||
"Structure of the EFI secret area".
|
||||
|
||||
On kernel start, the kernel's EFI driver saves the location of the secret area
|
||||
(taken from the EFI configuration table) in the ``efi.coco_secret`` field.
|
||||
Later it checks if the secret area is populated: it maps the area and checks
|
||||
whether its content begins with ``EFI_SECRET_TABLE_HEADER_GUID``
|
||||
(``1e74f542-71dd-4d66-963e-ef4287ff173b``). If the secret area is populated,
|
||||
the EFI driver will autoload the efi_secret kernel module, which exposes the
|
||||
secrets to userspace applications via securityfs. The details of the
|
||||
efi_secret filesystem interface are in [secrets-coco-abi]_.
|
||||
|
||||
|
||||
Application usage example
|
||||
=========================
|
||||
|
||||
Consider a guest performing computations on encrypted files. The Guest Owner
|
||||
provides the decryption key (= secret) using the secret injection mechanism.
|
||||
The guest application reads the secret from the efi_secret filesystem and
|
||||
proceeds to decrypt the files into memory and then performs the needed
|
||||
computations on the content.
|
||||
|
||||
In this example, the host can't read the files from the disk image
|
||||
because they are encrypted. Host can't read the decryption key because
|
||||
it is passed using the secret injection mechanism (= secure channel).
|
||||
Host can't read the decrypted content from memory because it's a
|
||||
confidential (memory-encrypted) guest.
|
||||
|
||||
Here is a simple example for usage of the efi_secret module in a guest
|
||||
to which an EFI secret area with 4 secrets was injected during launch::
|
||||
|
||||
# ls -la /sys/kernel/security/secrets/coco
|
||||
total 0
|
||||
drwxr-xr-x 2 root root 0 Jun 28 11:54 .
|
||||
drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
|
||||
|
||||
# hd /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
|
||||
00000000 74 68 65 73 65 2d 61 72 65 2d 74 68 65 2d 6b 61 |these-are-the-ka|
|
||||
00000010 74 61 2d 73 65 63 72 65 74 73 00 01 02 03 04 05 |ta-secrets......|
|
||||
00000020 06 07 |..|
|
||||
00000022
|
||||
|
||||
# rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
|
||||
|
||||
# ls -la /sys/kernel/security/secrets/coco
|
||||
total 0
|
||||
drwxr-xr-x 2 root root 0 Jun 28 11:55 .
|
||||
drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
|
||||
-r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
See [sev-api-spec]_ for more info regarding SEV ``LAUNCH_SECRET`` operation.
|
||||
|
||||
.. [sev] Documentation/virt/kvm/amd-memory-encryption.rst
|
||||
.. [secrets-coco-abi] Documentation/ABI/testing/securityfs-secrets-coco
|
||||
.. [sev-api-spec] https://www.amd.com/system/files/TechDocs/55766_SEV-KM_API_Specification.pdf
|
||||
9
Documentation/security/secrets/index.rst
Normal file
9
Documentation/security/secrets/index.rst
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=====================
|
||||
Secrets documentation
|
||||
=====================
|
||||
|
||||
.. toctree::
|
||||
|
||||
coco
|
||||
|
|
@ -718,6 +718,9 @@ CDROMPLAYBLK
|
|||
|
||||
|
||||
CDROMGETSPINDOWN
|
||||
Obsolete, was ide-cd only
|
||||
|
||||
|
||||
usage::
|
||||
|
||||
char spindown;
|
||||
|
|
@ -736,6 +739,9 @@ CDROMGETSPINDOWN
|
|||
|
||||
|
||||
CDROMSETSPINDOWN
|
||||
Obsolete, was ide-cd only
|
||||
|
||||
|
||||
usage::
|
||||
|
||||
char spindown
|
||||
|
|
|
|||
|
|
@ -271,6 +271,16 @@ notifying process it will be replaced. The supervisor can also add an FD, and
|
|||
respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
|
||||
value will be the injected file descriptor number.
|
||||
|
||||
The notifying process can be preempted, resulting in the notification being
|
||||
aborted. This can be problematic when trying to take actions on behalf of the
|
||||
notifying process that are long-running and typically retryable (mounting a
|
||||
filesytem). Alternatively, at filter installation time, the
|
||||
``SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV`` flag can be set. This flag makes it
|
||||
such that when a user notification is received by the supervisor, the notifying
|
||||
process will ignore non-fatal signals until the response is sent. Signals that
|
||||
are sent prior to the notification being received by userspace are handled
|
||||
normally.
|
||||
|
||||
It is worth noting that ``struct seccomp_data`` contains the values of register
|
||||
arguments to the syscall, but does not contain pointers to memory. The task's
|
||||
memory is accessible to suitably privileged traces via ``ptrace()`` or
|
||||
|
|
|
|||
155
Documentation/virt/coco/sev-guest.rst
Normal file
155
Documentation/virt/coco/sev-guest.rst
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================================================================
|
||||
The Definitive SEV Guest API Documentation
|
||||
===================================================================
|
||||
|
||||
1. General description
|
||||
======================
|
||||
|
||||
The SEV API is a set of ioctls that are used by the guest or hypervisor
|
||||
to get or set a certain aspect of the SEV virtual machine. The ioctls belong
|
||||
to the following classes:
|
||||
|
||||
- Hypervisor ioctls: These query and set global attributes which affect the
|
||||
whole SEV firmware. These ioctl are used by platform provisioning tools.
|
||||
|
||||
- Guest ioctls: These query and set attributes of the SEV virtual machine.
|
||||
|
||||
2. API description
|
||||
==================
|
||||
|
||||
This section describes ioctls that is used for querying the SEV guest report
|
||||
from the SEV firmware. For each ioctl, the following information is provided
|
||||
along with a description:
|
||||
|
||||
Technology:
|
||||
which SEV technology provides this ioctl. SEV, SEV-ES, SEV-SNP or all.
|
||||
|
||||
Type:
|
||||
hypervisor or guest. The ioctl can be used inside the guest or the
|
||||
hypervisor.
|
||||
|
||||
Parameters:
|
||||
what parameters are accepted by the ioctl.
|
||||
|
||||
Returns:
|
||||
the return value. General error numbers (-ENOMEM, -EINVAL)
|
||||
are not detailed, but errors with specific meanings are.
|
||||
|
||||
The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device.
|
||||
The ioctl accepts struct snp_user_guest_request. The input and output structure is
|
||||
specified through the req_data and resp_data field respectively. If the ioctl fails
|
||||
to execute due to a firmware error, then fw_err code will be set otherwise the
|
||||
fw_err will be set to 0x00000000000000ff.
|
||||
|
||||
The firmware checks that the message sequence counter is one greater than
|
||||
the guests message sequence counter. If guest driver fails to increment message
|
||||
counter (e.g. counter overflow), then -EIO will be returned.
|
||||
|
||||
::
|
||||
|
||||
struct snp_guest_request_ioctl {
|
||||
/* Message version number */
|
||||
__u32 msg_version;
|
||||
|
||||
/* Request and response structure address */
|
||||
__u64 req_data;
|
||||
__u64 resp_data;
|
||||
|
||||
/* firmware error code on failure (see psp-sev.h) */
|
||||
__u64 fw_err;
|
||||
};
|
||||
|
||||
2.1 SNP_GET_REPORT
|
||||
------------------
|
||||
|
||||
:Technology: sev-snp
|
||||
:Type: guest ioctl
|
||||
:Parameters (in): struct snp_report_req
|
||||
:Returns (out): struct snp_report_resp on success, -negative on error
|
||||
|
||||
The SNP_GET_REPORT ioctl can be used to query the attestation report from the
|
||||
SEV-SNP firmware. The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command
|
||||
provided by the SEV-SNP firmware to query the attestation report.
|
||||
|
||||
On success, the snp_report_resp.data will contains the report. The report
|
||||
contain the format described in the SEV-SNP specification. See the SEV-SNP
|
||||
specification for further details.
|
||||
|
||||
2.2 SNP_GET_DERIVED_KEY
|
||||
-----------------------
|
||||
:Technology: sev-snp
|
||||
:Type: guest ioctl
|
||||
:Parameters (in): struct snp_derived_key_req
|
||||
:Returns (out): struct snp_derived_key_resp on success, -negative on error
|
||||
|
||||
The SNP_GET_DERIVED_KEY ioctl can be used to get a key derive from a root key.
|
||||
The derived key can be used by the guest for any purpose, such as sealing keys
|
||||
or communicating with external entities.
|
||||
|
||||
The ioctl uses the SNP_GUEST_REQUEST (MSG_KEY_REQ) command provided by the
|
||||
SEV-SNP firmware to derive the key. See SEV-SNP specification for further details
|
||||
on the various fields passed in the key derivation request.
|
||||
|
||||
On success, the snp_derived_key_resp.data contains the derived key value. See
|
||||
the SEV-SNP specification for further details.
|
||||
|
||||
|
||||
2.3 SNP_GET_EXT_REPORT
|
||||
----------------------
|
||||
:Technology: sev-snp
|
||||
:Type: guest ioctl
|
||||
:Parameters (in/out): struct snp_ext_report_req
|
||||
:Returns (out): struct snp_report_resp on success, -negative on error
|
||||
|
||||
The SNP_GET_EXT_REPORT ioctl is similar to the SNP_GET_REPORT. The difference is
|
||||
related to the additional certificate data that is returned with the report.
|
||||
The certificate data returned is being provided by the hypervisor through the
|
||||
SNP_SET_EXT_CONFIG.
|
||||
|
||||
The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command provided by the SEV-SNP
|
||||
firmware to get the attestation report.
|
||||
|
||||
On success, the snp_ext_report_resp.data will contain the attestation report
|
||||
and snp_ext_report_req.certs_address will contain the certificate blob. If the
|
||||
length of the blob is smaller than expected then snp_ext_report_req.certs_len will
|
||||
be updated with the expected value.
|
||||
|
||||
See GHCB specification for further detail on how to parse the certificate blob.
|
||||
|
||||
3. SEV-SNP CPUID Enforcement
|
||||
============================
|
||||
|
||||
SEV-SNP guests can access a special page that contains a table of CPUID values
|
||||
that have been validated by the PSP as part of the SNP_LAUNCH_UPDATE firmware
|
||||
command. It provides the following assurances regarding the validity of CPUID
|
||||
values:
|
||||
|
||||
- Its address is obtained via bootloader/firmware (via CC blob), and those
|
||||
binaries will be measured as part of the SEV-SNP attestation report.
|
||||
- Its initial state will be encrypted/pvalidated, so attempts to modify
|
||||
it during run-time will result in garbage being written, or #VC exceptions
|
||||
being generated due to changes in validation state if the hypervisor tries
|
||||
to swap the backing page.
|
||||
- Attempts to bypass PSP checks by the hypervisor by using a normal page, or
|
||||
a non-CPUID encrypted page will change the measurement provided by the
|
||||
SEV-SNP attestation report.
|
||||
- The CPUID page contents are *not* measured, but attempts to modify the
|
||||
expected contents of a CPUID page as part of guest initialization will be
|
||||
gated by the PSP CPUID enforcement policy checks performed on the page
|
||||
during SNP_LAUNCH_UPDATE, and noticeable later if the guest owner
|
||||
implements their own checks of the CPUID values.
|
||||
|
||||
It is important to note that this last assurance is only useful if the kernel
|
||||
has taken care to make use of the SEV-SNP CPUID throughout all stages of boot.
|
||||
Otherwise, guest owner attestation provides no assurance that the kernel wasn't
|
||||
fed incorrect values at some point during boot.
|
||||
|
||||
|
||||
Reference
|
||||
---------
|
||||
|
||||
SEV-SNP and GHCB specification: developer.amd.com/sev
|
||||
|
||||
The driver is based on SEV-SNP firmware spec 0.9 and GHCB spec version 2.0.
|
||||
|
|
@ -13,6 +13,7 @@ Linux Virtualization Support
|
|||
guest-halt-polling
|
||||
ne_overview
|
||||
acrn/index
|
||||
coco/sev-guest
|
||||
|
||||
.. only:: html and subproject
|
||||
|
||||
|
|
|
|||
|
|
@ -5713,6 +5713,8 @@ affect the device's behavior. Current defined flags::
|
|||
#define KVM_RUN_X86_SMM (1 << 0)
|
||||
/* x86, set if bus lock detected in VM */
|
||||
#define KVM_RUN_BUS_LOCK (1 << 1)
|
||||
/* arm64, set for KVM_EXIT_DEBUG */
|
||||
#define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0)
|
||||
|
||||
::
|
||||
|
||||
|
|
|
|||
|
|
@ -140,9 +140,8 @@ from #define X86_FEATURE_UMIP (16*32 + 2).
|
|||
|
||||
In addition, there exists a variety of custom command-line parameters that
|
||||
disable specific features. The list of parameters includes, but is not limited
|
||||
to, nofsgsbase, nosmap, and nosmep. 5-level paging can also be disabled using
|
||||
"no5lvl". SMAP and SMEP are disabled with the aforementioned parameters,
|
||||
respectively.
|
||||
to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using
|
||||
"no5lvl".
|
||||
|
||||
e: The feature was known to be non-functional.
|
||||
----------------------------------------------
|
||||
|
|
|
|||
2
Documentation/x86/ifs.rst
Normal file
2
Documentation/x86/ifs.rst
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. kernel-doc:: drivers/platform/x86/intel/ifs/ifs.h
|
||||
|
|
@ -26,6 +26,7 @@ x86-specific Documentation
|
|||
intel_txt
|
||||
amd-memory-encryption
|
||||
amd_hsmp
|
||||
tdx
|
||||
pti
|
||||
mds
|
||||
microcode
|
||||
|
|
@ -35,6 +36,7 @@ x86-specific Documentation
|
|||
usb-legacy-support
|
||||
i386/index
|
||||
x86_64/index
|
||||
ifs
|
||||
sva
|
||||
sgx
|
||||
features
|
||||
|
|
|
|||
218
Documentation/x86/tdx.rst
Normal file
218
Documentation/x86/tdx.rst
Normal file
|
|
@ -0,0 +1,218 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=====================================
|
||||
Intel Trust Domain Extensions (TDX)
|
||||
=====================================
|
||||
|
||||
Intel's Trust Domain Extensions (TDX) protect confidential guest VMs from
|
||||
the host and physical attacks by isolating the guest register state and by
|
||||
encrypting the guest memory. In TDX, a special module running in a special
|
||||
mode sits between the host and the guest and manages the guest/host
|
||||
separation.
|
||||
|
||||
Since the host cannot directly access guest registers or memory, much
|
||||
normal functionality of a hypervisor must be moved into the guest. This is
|
||||
implemented using a Virtualization Exception (#VE) that is handled by the
|
||||
guest kernel. A #VE is handled entirely inside the guest kernel, but some
|
||||
require the hypervisor to be consulted.
|
||||
|
||||
TDX includes new hypercall-like mechanisms for communicating from the
|
||||
guest to the hypervisor or the TDX module.
|
||||
|
||||
New TDX Exceptions
|
||||
==================
|
||||
|
||||
TDX guests behave differently from bare-metal and traditional VMX guests.
|
||||
In TDX guests, otherwise normal instructions or memory accesses can cause
|
||||
#VE or #GP exceptions.
|
||||
|
||||
Instructions marked with an '*' conditionally cause exceptions. The
|
||||
details for these instructions are discussed below.
|
||||
|
||||
Instruction-based #VE
|
||||
---------------------
|
||||
|
||||
- Port I/O (INS, OUTS, IN, OUT)
|
||||
- HLT
|
||||
- MONITOR, MWAIT
|
||||
- WBINVD, INVD
|
||||
- VMCALL
|
||||
- RDMSR*,WRMSR*
|
||||
- CPUID*
|
||||
|
||||
Instruction-based #GP
|
||||
---------------------
|
||||
|
||||
- All VMX instructions: INVEPT, INVVPID, VMCLEAR, VMFUNC, VMLAUNCH,
|
||||
VMPTRLD, VMPTRST, VMREAD, VMRESUME, VMWRITE, VMXOFF, VMXON
|
||||
- ENCLS, ENCLU
|
||||
- GETSEC
|
||||
- RSM
|
||||
- ENQCMD
|
||||
- RDMSR*,WRMSR*
|
||||
|
||||
RDMSR/WRMSR Behavior
|
||||
--------------------
|
||||
|
||||
MSR access behavior falls into three categories:
|
||||
|
||||
- #GP generated
|
||||
- #VE generated
|
||||
- "Just works"
|
||||
|
||||
In general, the #GP MSRs should not be used in guests. Their use likely
|
||||
indicates a bug in the guest. The guest may try to handle the #GP with a
|
||||
hypercall but it is unlikely to succeed.
|
||||
|
||||
The #VE MSRs are typically able to be handled by the hypervisor. Guests
|
||||
can make a hypercall to the hypervisor to handle the #VE.
|
||||
|
||||
The "just works" MSRs do not need any special guest handling. They might
|
||||
be implemented by directly passing through the MSR to the hardware or by
|
||||
trapping and handling in the TDX module. Other than possibly being slow,
|
||||
these MSRs appear to function just as they would on bare metal.
|
||||
|
||||
CPUID Behavior
|
||||
--------------
|
||||
|
||||
For some CPUID leaves and sub-leaves, the virtualized bit fields of CPUID
|
||||
return values (in guest EAX/EBX/ECX/EDX) are configurable by the
|
||||
hypervisor. For such cases, the Intel TDX module architecture defines two
|
||||
virtualization types:
|
||||
|
||||
- Bit fields for which the hypervisor controls the value seen by the guest
|
||||
TD.
|
||||
|
||||
- Bit fields for which the hypervisor configures the value such that the
|
||||
guest TD either sees their native value or a value of 0. For these bit
|
||||
fields, the hypervisor can mask off the native values, but it can not
|
||||
turn *on* values.
|
||||
|
||||
A #VE is generated for CPUID leaves and sub-leaves that the TDX module does
|
||||
not know how to handle. The guest kernel may ask the hypervisor for the
|
||||
value with a hypercall.
|
||||
|
||||
#VE on Memory Accesses
|
||||
======================
|
||||
|
||||
There are essentially two classes of TDX memory: private and shared.
|
||||
Private memory receives full TDX protections. Its content is protected
|
||||
against access from the hypervisor. Shared memory is expected to be
|
||||
shared between guest and hypervisor and does not receive full TDX
|
||||
protections.
|
||||
|
||||
A TD guest is in control of whether its memory accesses are treated as
|
||||
private or shared. It selects the behavior with a bit in its page table
|
||||
entries. This helps ensure that a guest does not place sensitive
|
||||
information in shared memory, exposing it to the untrusted hypervisor.
|
||||
|
||||
#VE on Shared Memory
|
||||
--------------------
|
||||
|
||||
Access to shared mappings can cause a #VE. The hypervisor ultimately
|
||||
controls whether a shared memory access causes a #VE, so the guest must be
|
||||
careful to only reference shared pages it can safely handle a #VE. For
|
||||
instance, the guest should be careful not to access shared memory in the
|
||||
#VE handler before it reads the #VE info structure (TDG.VP.VEINFO.GET).
|
||||
|
||||
Shared mapping content is entirely controlled by the hypervisor. The guest
|
||||
should only use shared mappings for communicating with the hypervisor.
|
||||
Shared mappings must never be used for sensitive memory content like kernel
|
||||
stacks. A good rule of thumb is that hypervisor-shared memory should be
|
||||
treated the same as memory mapped to userspace. Both the hypervisor and
|
||||
userspace are completely untrusted.
|
||||
|
||||
MMIO for virtual devices is implemented as shared memory. The guest must
|
||||
be careful not to access device MMIO regions unless it is also prepared to
|
||||
handle a #VE.
|
||||
|
||||
#VE on Private Pages
|
||||
--------------------
|
||||
|
||||
An access to private mappings can also cause a #VE. Since all kernel
|
||||
memory is also private memory, the kernel might theoretically need to
|
||||
handle a #VE on arbitrary kernel memory accesses. This is not feasible, so
|
||||
TDX guests ensure that all guest memory has been "accepted" before memory
|
||||
is used by the kernel.
|
||||
|
||||
A modest amount of memory (typically 512M) is pre-accepted by the firmware
|
||||
before the kernel runs to ensure that the kernel can start up without
|
||||
being subjected to a #VE.
|
||||
|
||||
The hypervisor is permitted to unilaterally move accepted pages to a
|
||||
"blocked" state. However, if it does this, page access will not generate a
|
||||
#VE. It will, instead, cause a "TD Exit" where the hypervisor is required
|
||||
to handle the exception.
|
||||
|
||||
Linux #VE handler
|
||||
=================
|
||||
|
||||
Just like page faults or #GP's, #VE exceptions can be either handled or be
|
||||
fatal. Typically, an unhandled userspace #VE results in a SIGSEGV.
|
||||
An unhandled kernel #VE results in an oops.
|
||||
|
||||
Handling nested exceptions on x86 is typically nasty business. A #VE
|
||||
could be interrupted by an NMI which triggers another #VE and hilarity
|
||||
ensues. The TDX #VE architecture anticipated this scenario and includes a
|
||||
feature to make it slightly less nasty.
|
||||
|
||||
During #VE handling, the TDX module ensures that all interrupts (including
|
||||
NMIs) are blocked. The block remains in place until the guest makes a
|
||||
TDG.VP.VEINFO.GET TDCALL. This allows the guest to control when interrupts
|
||||
or a new #VE can be delivered.
|
||||
|
||||
However, the guest kernel must still be careful to avoid potential
|
||||
#VE-triggering actions (discussed above) while this block is in place.
|
||||
While the block is in place, any #VE is elevated to a double fault (#DF)
|
||||
which is not recoverable.
|
||||
|
||||
MMIO handling
|
||||
=============
|
||||
|
||||
In non-TDX VMs, MMIO is usually implemented by giving a guest access to a
|
||||
mapping which will cause a VMEXIT on access, and then the hypervisor
|
||||
emulates the access. That is not possible in TDX guests because VMEXIT
|
||||
will expose the register state to the host. TDX guests don't trust the host
|
||||
and can't have their state exposed to the host.
|
||||
|
||||
In TDX, MMIO regions typically trigger a #VE exception in the guest. The
|
||||
guest #VE handler then emulates the MMIO instruction inside the guest and
|
||||
converts it into a controlled TDCALL to the host, rather than exposing
|
||||
guest state to the host.
|
||||
|
||||
MMIO addresses on x86 are just special physical addresses. They can
|
||||
theoretically be accessed with any instruction that accesses memory.
|
||||
However, the kernel instruction decoding method is limited. It is only
|
||||
designed to decode instructions like those generated by io.h macros.
|
||||
|
||||
MMIO access via other means (like structure overlays) may result in an
|
||||
oops.
|
||||
|
||||
Shared Memory Conversions
|
||||
=========================
|
||||
|
||||
All TDX guest memory starts out as private at boot. This memory can not
|
||||
be accessed by the hypervisor. However, some kernel users like device
|
||||
drivers might have a need to share data with the hypervisor. To do this,
|
||||
memory must be converted between shared and private. This can be
|
||||
accomplished using some existing memory encryption helpers:
|
||||
|
||||
* set_memory_decrypted() converts a range of pages to shared.
|
||||
* set_memory_encrypted() converts memory back to private.
|
||||
|
||||
Device drivers are the primary user of shared memory, but there's no need
|
||||
to touch every driver. DMA buffers and ioremap() do the conversions
|
||||
automatically.
|
||||
|
||||
TDX uses SWIOTLB for most DMA allocations. The SWIOTLB buffer is
|
||||
converted to shared on boot.
|
||||
|
||||
For coherent DMA allocation, the DMA buffer gets converted on the
|
||||
allocation. Check force_dma_unencrypted() for details.
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
TDX reference material is collected here:
|
||||
|
||||
https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html
|
||||
|
|
@ -157,15 +157,6 @@ Rebooting
|
|||
newer BIOS, or newer board) using this option will ignore the built-in
|
||||
quirk table, and use the generic default reboot actions.
|
||||
|
||||
Non Executable Mappings
|
||||
=======================
|
||||
|
||||
noexec=on|off
|
||||
on
|
||||
Enable(default)
|
||||
off
|
||||
Disable
|
||||
|
||||
NUMA
|
||||
====
|
||||
|
||||
|
|
@ -310,3 +301,17 @@ Miscellaneous
|
|||
Do not use GB pages for kernel direct mappings.
|
||||
gbpages
|
||||
Use GB pages for kernel direct mappings.
|
||||
|
||||
|
||||
AMD SEV (Secure Encrypted Virtualization)
|
||||
=========================================
|
||||
Options relating to AMD SEV, specified via the following format:
|
||||
|
||||
::
|
||||
|
||||
sev=option1[,option2]
|
||||
|
||||
The available options are:
|
||||
|
||||
debug
|
||||
Enable debug messages.
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ Offset/Size Proto Name Meaning
|
|||
058/008 ALL tboot_addr Physical address of tboot shared page
|
||||
060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information
|
||||
(struct ist_info)
|
||||
070/008 ALL acpi_rsdp_addr Physical address of ACPI RSDP table
|
||||
080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!!
|
||||
090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!!
|
||||
0A0/010 ALL sys_desc_table System description table (struct sys_desc_table),
|
||||
|
|
@ -27,6 +28,7 @@ Offset/Size Proto Name Meaning
|
|||
0C0/004 ALL ext_ramdisk_image ramdisk_image high 32bits
|
||||
0C4/004 ALL ext_ramdisk_size ramdisk_size high 32bits
|
||||
0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits
|
||||
13C/004 ALL cc_blob_address Physical address of Confidential Computing blob
|
||||
140/080 ALL edid_info Video mode setup (struct edid_info)
|
||||
1C0/020 ALL efi_info EFI 32 information (struct efi_info)
|
||||
1E0/004 ALL alt_mem_k Alternative mem check, in KB
|
||||
|
|
|
|||
10
MAINTAINERS
10
MAINTAINERS
|
|
@ -7385,7 +7385,6 @@ L: linux-mm@kvack.org
|
|||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
|
||||
F: arch/alpha/kernel/binfmt_loader.c
|
||||
F: arch/x86/ia32/ia32_aout.c
|
||||
F: fs/*binfmt_*.c
|
||||
F: fs/exec.c
|
||||
F: include/linux/binfmts.h
|
||||
|
|
@ -9871,6 +9870,14 @@ B: https://bugzilla.kernel.org
|
|||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git
|
||||
F: drivers/idle/intel_idle.c
|
||||
|
||||
INTEL IN FIELD SCAN (IFS) DEVICE
|
||||
M: Jithu Joseph <jithu.joseph@intel.com>
|
||||
R: Ashok Raj <ashok.raj@intel.com>
|
||||
R: Tony Luck <tony.luck@intel.com>
|
||||
S: Maintained
|
||||
F: drivers/platform/x86/intel/ifs
|
||||
F: include/trace/events/intel_ifs.h
|
||||
|
||||
INTEL INTEGRATED SENSOR HUB DRIVER
|
||||
M: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
||||
M: Jiri Kosina <jikos@kernel.org>
|
||||
|
|
@ -17524,6 +17531,7 @@ R: Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
|
|||
R: Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
|
||||
R: Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
|
||||
R: Daniel Bristot de Oliveira <bristot@redhat.com> (SCHED_DEADLINE)
|
||||
R: Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core
|
||||
|
|
|
|||
3
Makefile
3
Makefile
|
|
@ -1037,6 +1037,7 @@ include-$(CONFIG_KASAN) += scripts/Makefile.kasan
|
|||
include-$(CONFIG_KCSAN) += scripts/Makefile.kcsan
|
||||
include-$(CONFIG_UBSAN) += scripts/Makefile.ubsan
|
||||
include-$(CONFIG_KCOV) += scripts/Makefile.kcov
|
||||
include-$(CONFIG_RANDSTRUCT) += scripts/Makefile.randstruct
|
||||
include-$(CONFIG_GCC_PLUGINS) += scripts/Makefile.gcc-plugins
|
||||
|
||||
include $(addprefix $(srctree)/, $(include-y))
|
||||
|
|
@ -1339,7 +1340,7 @@ install: sub_make_done :=
|
|||
# ---------------------------------------------------------------------------
|
||||
# Tools
|
||||
|
||||
ifdef CONFIG_STACK_VALIDATION
|
||||
ifdef CONFIG_OBJTOOL
|
||||
prepare: tools/objtool
|
||||
endif
|
||||
|
||||
|
|
|
|||
31
arch/Kconfig
31
arch/Kconfig
|
|
@ -24,6 +24,13 @@ config KEXEC_ELF
|
|||
config HAVE_IMA_KEXEC
|
||||
bool
|
||||
|
||||
config ARCH_HAS_SUBPAGE_FAULTS
|
||||
bool
|
||||
help
|
||||
Select if the architecture can check permissions at sub-page
|
||||
granularity (e.g. arm64 MTE). The probe_user_*() functions
|
||||
must be implemented.
|
||||
|
||||
config HOTPLUG_SMT
|
||||
bool
|
||||
|
||||
|
|
@ -35,6 +42,7 @@ config KPROBES
|
|||
depends on MODULES
|
||||
depends on HAVE_KPROBES
|
||||
select KALLSYMS
|
||||
select TASKS_RCU if PREEMPTION
|
||||
help
|
||||
Kprobes allows you to trap at almost any kernel address and
|
||||
execute a callback function. register_kprobe() establishes
|
||||
|
|
@ -46,6 +54,7 @@ config JUMP_LABEL
|
|||
bool "Optimize very unlikely/likely branches"
|
||||
depends on HAVE_ARCH_JUMP_LABEL
|
||||
depends on CC_HAS_ASM_GOTO
|
||||
select OBJTOOL if HAVE_JUMP_LABEL_HACK
|
||||
help
|
||||
This option enables a transparent branch optimization that
|
||||
makes certain almost-always-true or almost-always-false branch
|
||||
|
|
@ -723,10 +732,7 @@ config ARCH_SUPPORTS_CFI_CLANG
|
|||
config CFI_CLANG
|
||||
bool "Use Clang's Control Flow Integrity (CFI)"
|
||||
depends on LTO_CLANG && ARCH_SUPPORTS_CFI_CLANG
|
||||
# Clang >= 12:
|
||||
# - https://bugs.llvm.org/show_bug.cgi?id=46258
|
||||
# - https://bugs.llvm.org/show_bug.cgi?id=47479
|
||||
depends on CLANG_VERSION >= 120000
|
||||
depends on CLANG_VERSION >= 140000
|
||||
select KALLSYMS
|
||||
help
|
||||
This option enables Clang’s forward-edge Control Flow Integrity
|
||||
|
|
@ -1026,11 +1032,23 @@ config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
|
|||
depends on MMU
|
||||
select ARCH_HAS_ELF_RANDOMIZE
|
||||
|
||||
config HAVE_OBJTOOL
|
||||
bool
|
||||
|
||||
config HAVE_JUMP_LABEL_HACK
|
||||
bool
|
||||
|
||||
config HAVE_NOINSTR_HACK
|
||||
bool
|
||||
|
||||
config HAVE_NOINSTR_VALIDATION
|
||||
bool
|
||||
|
||||
config HAVE_STACK_VALIDATION
|
||||
bool
|
||||
help
|
||||
Architecture supports the 'objtool check' host tool command, which
|
||||
performs compile-time stack metadata validation.
|
||||
Architecture supports objtool compile-time frame pointer rule
|
||||
validation.
|
||||
|
||||
config HAVE_RELIABLE_STACKTRACE
|
||||
bool
|
||||
|
|
@ -1300,6 +1318,7 @@ config HAVE_STATIC_CALL
|
|||
config HAVE_STATIC_CALL_INLINE
|
||||
bool
|
||||
depends on HAVE_STATIC_CALL
|
||||
select OBJTOOL
|
||||
|
||||
config HAVE_PREEMPT_DYNAMIC
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void)
|
|||
__asm__ __volatile__ ("rpcc %0" : "=r"(ret));
|
||||
return ret;
|
||||
}
|
||||
#define get_cycles get_cycles
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -972,6 +972,17 @@ config ARM_ERRATA_764369
|
|||
relevant cache maintenance functions and sets a specific bit
|
||||
in the diagnostic control register of the SCU.
|
||||
|
||||
config ARM_ERRATA_764319
|
||||
bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction"
|
||||
depends on CPU_V7
|
||||
help
|
||||
This option enables the workaround for the 764319 Cortex A-9 erratum.
|
||||
CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an
|
||||
unexpected Undefined Instruction exception when the DBGSWENABLE
|
||||
external pin is set to 0, even when the CP14 accesses are performed
|
||||
from a privileged mode. This work around catches the exception in a
|
||||
way the kernel does not stop execution.
|
||||
|
||||
config ARM_ERRATA_775420
|
||||
bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
|
||||
depends on CPU_V7
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ CONFIG_ARM_APPENDED_DTB=y
|
|||
# CONFIG_BLK_DEV_BSG is not set
|
||||
CONFIG_BINFMT_FLAT=y
|
||||
CONFIG_BINFMT_ZFLAT=y
|
||||
CONFIG_BINFMT_SHARED_FLAT=y
|
||||
# CONFIG_COREDUMP is not set
|
||||
CONFIG_NET=y
|
||||
CONFIG_PACKET=y
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ CONFIG_PREEMPT_VOLUNTARY=y
|
|||
CONFIG_ZBOOT_ROM_TEXT=0x0
|
||||
CONFIG_ZBOOT_ROM_BSS=0x0
|
||||
CONFIG_BINFMT_FLAT=y
|
||||
CONFIG_BINFMT_SHARED_FLAT=y
|
||||
# CONFIG_COREDUMP is not set
|
||||
# CONFIG_SUSPEND is not set
|
||||
CONFIG_NET=y
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ CONFIG_ZBOOT_ROM_BSS=0x0
|
|||
CONFIG_XIP_KERNEL=y
|
||||
CONFIG_XIP_PHYS_ADDR=0x08008000
|
||||
CONFIG_BINFMT_FLAT=y
|
||||
CONFIG_BINFMT_SHARED_FLAT=y
|
||||
# CONFIG_COREDUMP is not set
|
||||
CONFIG_DEVTMPFS=y
|
||||
CONFIG_DEVTMPFS_MOUNT=y
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ CONFIG_XIP_KERNEL=y
|
|||
CONFIG_XIP_PHYS_ADDR=0x0f000080
|
||||
CONFIG_BINFMT_FLAT=y
|
||||
CONFIG_BINFMT_ZFLAT=y
|
||||
CONFIG_BINFMT_SHARED_FLAT=y
|
||||
# CONFIG_SUSPEND is not set
|
||||
# CONFIG_UEVENT_HELPER is not set
|
||||
# CONFIG_STANDALONE is not set
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \
|
|||
return read_sysreg(a32); \
|
||||
} \
|
||||
|
||||
CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1)
|
||||
CPUIF_MAP(ICC_PMR, ICC_PMR_EL1)
|
||||
CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1)
|
||||
CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1)
|
||||
|
|
@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1)
|
|||
|
||||
/* Low-level accessors */
|
||||
|
||||
static inline void gic_write_eoir(u32 irq)
|
||||
{
|
||||
write_sysreg(irq, ICC_EOIR1);
|
||||
isb();
|
||||
}
|
||||
|
||||
static inline void gic_write_dir(u32 val)
|
||||
{
|
||||
write_sysreg(val, ICC_DIR);
|
||||
|
|
|
|||
|
|
@ -666,12 +666,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
|
|||
__adldst_l str, \src, \sym, \tmp, \cond
|
||||
.endm
|
||||
|
||||
.macro __ldst_va, op, reg, tmp, sym, cond
|
||||
.macro __ldst_va, op, reg, tmp, sym, cond, offset
|
||||
#if __LINUX_ARM_ARCH__ >= 7 || \
|
||||
!defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
|
||||
(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
|
||||
mov_l \tmp, \sym, \cond
|
||||
\op\cond \reg, [\tmp]
|
||||
#else
|
||||
/*
|
||||
* Avoid a literal load, by emitting a sequence of ADD/LDR instructions
|
||||
|
|
@ -683,24 +682,29 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
|
|||
.reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym
|
||||
.reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym
|
||||
.reloc .L2_\@, R_ARM_LDR_PC_G2, \sym
|
||||
.L0_\@: sub\cond \tmp, pc, #8
|
||||
.L1_\@: sub\cond \tmp, \tmp, #4
|
||||
.L2_\@: \op\cond \reg, [\tmp, #0]
|
||||
.L0_\@: sub\cond \tmp, pc, #8 - \offset
|
||||
.L1_\@: sub\cond \tmp, \tmp, #4 - \offset
|
||||
.L2_\@:
|
||||
#endif
|
||||
\op\cond \reg, [\tmp, #\offset]
|
||||
.endm
|
||||
|
||||
/*
|
||||
* ldr_va - load a 32-bit word from the virtual address of \sym
|
||||
*/
|
||||
.macro ldr_va, rd:req, sym:req, cond
|
||||
__ldst_va ldr, \rd, \rd, \sym, \cond
|
||||
.macro ldr_va, rd:req, sym:req, cond, tmp, offset=0
|
||||
.ifnb \tmp
|
||||
__ldst_va ldr, \rd, \tmp, \sym, \cond, \offset
|
||||
.else
|
||||
__ldst_va ldr, \rd, \rd, \sym, \cond, \offset
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* str_va - store a 32-bit word to the virtual address of \sym
|
||||
*/
|
||||
.macro str_va, rn:req, sym:req, tmp:req, cond
|
||||
__ldst_va str, \rn, \tmp, \sym, \cond
|
||||
__ldst_va str, \rn, \tmp, \sym, \cond, 0
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
|
@ -727,9 +731,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
|
|||
* are permitted to overlap with 'rd' if != sp
|
||||
*/
|
||||
.macro ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
|
||||
#if __LINUX_ARM_ARCH__ >= 7 || \
|
||||
!defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
|
||||
(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
|
||||
#ifndef CONFIG_SMP
|
||||
ldr_va \rd, \sym, tmp=\t1
|
||||
#elif __LINUX_ARM_ARCH__ >= 7 || \
|
||||
!defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
|
||||
(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
|
||||
this_cpu_offset \t1
|
||||
mov_l \t2, \sym
|
||||
ldr \rd, [\t1, \t2]
|
||||
|
|
|
|||
|
|
@ -3,20 +3,10 @@
|
|||
#define _ASM_ARM_MODULE_H
|
||||
|
||||
#include <asm-generic/module.h>
|
||||
|
||||
struct unwind_table;
|
||||
#include <asm/unwind.h>
|
||||
|
||||
#ifdef CONFIG_ARM_UNWIND
|
||||
enum {
|
||||
ARM_SEC_INIT,
|
||||
ARM_SEC_DEVINIT,
|
||||
ARM_SEC_CORE,
|
||||
ARM_SEC_EXIT,
|
||||
ARM_SEC_DEVEXIT,
|
||||
ARM_SEC_HOT,
|
||||
ARM_SEC_UNLIKELY,
|
||||
ARM_SEC_MAX,
|
||||
};
|
||||
#define ELF_SECTION_UNWIND 0x70000001
|
||||
#endif
|
||||
|
||||
#define PLT_ENT_STRIDE L1_CACHE_BYTES
|
||||
|
|
@ -36,7 +26,8 @@ struct mod_plt_sec {
|
|||
|
||||
struct mod_arch_specific {
|
||||
#ifdef CONFIG_ARM_UNWIND
|
||||
struct unwind_table *unwind[ARM_SEC_MAX];
|
||||
struct list_head unwind_list;
|
||||
struct unwind_table *init_table;
|
||||
#endif
|
||||
#ifdef CONFIG_ARM_MODULE_PLTS
|
||||
struct mod_plt_sec core;
|
||||
|
|
|
|||
|
|
@ -11,5 +11,6 @@
|
|||
|
||||
typedef unsigned long cycles_t;
|
||||
#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; })
|
||||
#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ struct unwind_idx {
|
|||
|
||||
struct unwind_table {
|
||||
struct list_head list;
|
||||
struct list_head mod_list;
|
||||
const struct unwind_idx *start;
|
||||
const struct unwind_idx *origin;
|
||||
const struct unwind_idx *stop;
|
||||
|
|
|
|||
|
|
@ -61,9 +61,8 @@
|
|||
.macro pabt_helper
|
||||
@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
|
||||
#ifdef MULTI_PABORT
|
||||
ldr ip, .LCprocfns
|
||||
mov lr, pc
|
||||
ldr pc, [ip, #PROCESSOR_PABT_FUNC]
|
||||
ldr_va ip, processor, offset=PROCESSOR_PABT_FUNC
|
||||
bl_r ip
|
||||
#else
|
||||
bl CPU_PABORT_HANDLER
|
||||
#endif
|
||||
|
|
@ -82,9 +81,8 @@
|
|||
@ the fault status register in r1. r9 must be preserved.
|
||||
@
|
||||
#ifdef MULTI_DABORT
|
||||
ldr ip, .LCprocfns
|
||||
mov lr, pc
|
||||
ldr pc, [ip, #PROCESSOR_DABT_FUNC]
|
||||
ldr_va ip, processor, offset=PROCESSOR_DABT_FUNC
|
||||
bl_r ip
|
||||
#else
|
||||
bl CPU_DABORT_HANDLER
|
||||
#endif
|
||||
|
|
@ -302,16 +300,6 @@ __fiq_svc:
|
|||
UNWIND(.fnend )
|
||||
ENDPROC(__fiq_svc)
|
||||
|
||||
.align 5
|
||||
.LCcralign:
|
||||
.word cr_alignment
|
||||
#ifdef MULTI_DABORT
|
||||
.LCprocfns:
|
||||
.word processor
|
||||
#endif
|
||||
.LCfp:
|
||||
.word fp_enter
|
||||
|
||||
/*
|
||||
* Abort mode handlers
|
||||
*/
|
||||
|
|
@ -370,7 +358,7 @@ ENDPROC(__fiq_abt)
|
|||
THUMB( stmia sp, {r0 - r12} )
|
||||
|
||||
ATRAP( mrc p15, 0, r7, c1, c0, 0)
|
||||
ATRAP( ldr r8, .LCcralign)
|
||||
ATRAP( ldr_va r8, cr_alignment)
|
||||
|
||||
ldmia r0, {r3 - r5}
|
||||
add r0, sp, #S_PC @ here for interlock avoidance
|
||||
|
|
@ -379,8 +367,6 @@ ENDPROC(__fiq_abt)
|
|||
str r3, [sp] @ save the "real" r0 copied
|
||||
@ from the exception stack
|
||||
|
||||
ATRAP( ldr r8, [r8, #0])
|
||||
|
||||
@
|
||||
@ We are now ready to fill in the remaining blanks on the stack:
|
||||
@
|
||||
|
|
@ -505,9 +491,7 @@ __und_usr_thumb:
|
|||
*/
|
||||
#if __LINUX_ARM_ARCH__ < 7
|
||||
/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
|
||||
#define NEED_CPU_ARCHITECTURE
|
||||
ldr r5, .LCcpu_architecture
|
||||
ldr r5, [r5]
|
||||
ldr_va r5, cpu_architecture
|
||||
cmp r5, #CPU_ARCH_ARMv7
|
||||
blo __und_usr_fault_16 @ 16bit undefined instruction
|
||||
/*
|
||||
|
|
@ -654,12 +638,6 @@ call_fpe:
|
|||
ret.w lr @ CP#14 (Debug)
|
||||
ret.w lr @ CP#15 (Control)
|
||||
|
||||
#ifdef NEED_CPU_ARCHITECTURE
|
||||
.align 2
|
||||
.LCcpu_architecture:
|
||||
.word __cpu_architecture
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NEON
|
||||
.align 6
|
||||
|
||||
|
|
@ -685,9 +663,8 @@ call_fpe:
|
|||
#endif
|
||||
|
||||
do_fpe:
|
||||
ldr r4, .LCfp
|
||||
add r10, r10, #TI_FPSTATE @ r10 = workspace
|
||||
ldr pc, [r4] @ Call FP module USR entry point
|
||||
ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
|
||||
|
||||
/*
|
||||
* The FP module is called with these registers set:
|
||||
|
|
@ -1101,6 +1078,12 @@ __kuser_helper_end:
|
|||
*/
|
||||
.macro vector_stub, name, mode, correction=0
|
||||
.align 5
|
||||
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
|
||||
vector_bhb_bpiall_\name:
|
||||
mcr p15, 0, r0, c7, c5, 6 @ BPIALL
|
||||
@ isb not needed due to "movs pc, lr" in the vector stub
|
||||
@ which gives a "context synchronisation".
|
||||
#endif
|
||||
|
||||
vector_\name:
|
||||
.if \correction
|
||||
|
|
@ -1111,7 +1094,8 @@ vector_\name:
|
|||
stmia sp, {r0, lr} @ save r0, lr
|
||||
|
||||
@ Save spsr_<exception> (parent CPSR)
|
||||
2: mrs lr, spsr
|
||||
.Lvec_\name:
|
||||
mrs lr, spsr
|
||||
str lr, [sp, #8] @ save spsr
|
||||
|
||||
@
|
||||
|
|
@ -1148,25 +1132,11 @@ vector_bhb_loop8_\name:
|
|||
3: W(b) . + 4
|
||||
subs r0, r0, #1
|
||||
bne 3b
|
||||
dsb
|
||||
isb
|
||||
b 2b
|
||||
ENDPROC(vector_bhb_loop8_\name)
|
||||
|
||||
vector_bhb_bpiall_\name:
|
||||
.if \correction
|
||||
sub lr, lr, #\correction
|
||||
.endif
|
||||
|
||||
@ Save r0, lr_<exception> (parent PC)
|
||||
stmia sp, {r0, lr}
|
||||
|
||||
@ bhb workaround
|
||||
mcr p15, 0, r0, c7, c5, 6 @ BPIALL
|
||||
dsb nsh
|
||||
@ isb not needed due to "movs pc, lr" in the vector stub
|
||||
@ which gives a "context synchronisation".
|
||||
b 2b
|
||||
ENDPROC(vector_bhb_bpiall_\name)
|
||||
b .Lvec_\name
|
||||
ENDPROC(vector_bhb_loop8_\name)
|
||||
.previous
|
||||
#endif
|
||||
|
||||
|
|
@ -1176,10 +1146,15 @@ ENDPROC(vector_bhb_bpiall_\name)
|
|||
.endm
|
||||
|
||||
.section .stubs, "ax", %progbits
|
||||
@ This must be the first word
|
||||
@ These need to remain at the start of the section so that
|
||||
@ they are in range of the 'SWI' entries in the vector tables
|
||||
@ located 4k down.
|
||||
.L__vector_swi:
|
||||
.word vector_swi
|
||||
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
|
||||
.L__vector_bhb_loop8_swi:
|
||||
.word vector_bhb_loop8_swi
|
||||
.L__vector_bhb_bpiall_swi:
|
||||
.word vector_bhb_bpiall_swi
|
||||
#endif
|
||||
|
||||
|
|
@ -1322,10 +1297,11 @@ vector_addrexcptn:
|
|||
.globl vector_fiq
|
||||
|
||||
.section .vectors, "ax", %progbits
|
||||
.L__vectors_start:
|
||||
W(b) vector_rst
|
||||
W(b) vector_und
|
||||
W(ldr) pc, .L__vectors_start + 0x1000
|
||||
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi )
|
||||
THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi )
|
||||
W(ldr) pc, .
|
||||
W(b) vector_pabt
|
||||
W(b) vector_dabt
|
||||
W(b) vector_addrexcptn
|
||||
|
|
@ -1334,10 +1310,11 @@ vector_addrexcptn:
|
|||
|
||||
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
|
||||
.section .vectors.bhb.loop8, "ax", %progbits
|
||||
.L__vectors_bhb_loop8_start:
|
||||
W(b) vector_rst
|
||||
W(b) vector_bhb_loop8_und
|
||||
W(ldr) pc, .L__vectors_bhb_loop8_start + 0x1004
|
||||
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi )
|
||||
THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi )
|
||||
W(ldr) pc, .
|
||||
W(b) vector_bhb_loop8_pabt
|
||||
W(b) vector_bhb_loop8_dabt
|
||||
W(b) vector_addrexcptn
|
||||
|
|
@ -1345,10 +1322,11 @@ vector_addrexcptn:
|
|||
W(b) vector_bhb_loop8_fiq
|
||||
|
||||
.section .vectors.bhb.bpiall, "ax", %progbits
|
||||
.L__vectors_bhb_bpiall_start:
|
||||
W(b) vector_rst
|
||||
W(b) vector_bhb_bpiall_und
|
||||
W(ldr) pc, .L__vectors_bhb_bpiall_start + 0x1008
|
||||
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi )
|
||||
THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_bpiall_swi )
|
||||
W(ldr) pc, .
|
||||
W(b) vector_bhb_bpiall_pabt
|
||||
W(b) vector_bhb_bpiall_dabt
|
||||
W(b) vector_addrexcptn
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ ENTRY(vector_bhb_loop8_swi)
|
|||
1: b 2f
|
||||
2: subs r8, r8, #1
|
||||
bne 1b
|
||||
dsb
|
||||
dsb nsh
|
||||
isb
|
||||
b 3f
|
||||
ENDPROC(vector_bhb_loop8_swi)
|
||||
|
|
@ -198,7 +198,7 @@ ENTRY(vector_swi)
|
|||
#endif
|
||||
reload_current r10, ip
|
||||
zero_fp
|
||||
alignment_trap r10, ip, __cr_alignment
|
||||
alignment_trap r10, ip, cr_alignment
|
||||
asm_trace_hardirqs_on save=0
|
||||
enable_irq_notrace
|
||||
ct_user_exit save=0
|
||||
|
|
@ -328,14 +328,6 @@ __sys_trace_return:
|
|||
bl syscall_trace_exit
|
||||
b ret_slow_syscall
|
||||
|
||||
.align 5
|
||||
#ifdef CONFIG_ALIGNMENT_TRAP
|
||||
.type __cr_alignment, #object
|
||||
__cr_alignment:
|
||||
.word cr_alignment
|
||||
#endif
|
||||
.ltorg
|
||||
|
||||
.macro syscall_table_start, sym
|
||||
.equ __sys_nr, 0
|
||||
.type \sym, #object
|
||||
|
|
|
|||
|
|
@ -48,8 +48,7 @@
|
|||
.macro alignment_trap, rtmp1, rtmp2, label
|
||||
#ifdef CONFIG_ALIGNMENT_TRAP
|
||||
mrc p15, 0, \rtmp2, c1, c0, 0
|
||||
ldr \rtmp1, \label
|
||||
ldr \rtmp1, [\rtmp1]
|
||||
ldr_va \rtmp1, \label
|
||||
teq \rtmp1, \rtmp2
|
||||
mcrne p15, 0, \rtmp1, c1, c0, 0
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -941,6 +941,23 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
|
|||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM_ERRATA_764319
|
||||
static int oslsr_fault;
|
||||
|
||||
static int debug_oslsr_trap(struct pt_regs *regs, unsigned int instr)
|
||||
{
|
||||
oslsr_fault = 1;
|
||||
instruction_pointer(regs) += 4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct undef_hook debug_oslsr_hook = {
|
||||
.instr_mask = 0xffffffff,
|
||||
.instr_val = 0xee115e91,
|
||||
.fn = debug_oslsr_trap,
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* One-time initialisation.
|
||||
*/
|
||||
|
|
@ -974,7 +991,16 @@ static bool core_has_os_save_restore(void)
|
|||
case ARM_DEBUG_ARCH_V7_1:
|
||||
return true;
|
||||
case ARM_DEBUG_ARCH_V7_ECP14:
|
||||
#ifdef CONFIG_ARM_ERRATA_764319
|
||||
oslsr_fault = 0;
|
||||
register_undef_hook(&debug_oslsr_hook);
|
||||
ARM_DBG_READ(c1, c1, 4, oslsr);
|
||||
unregister_undef_hook(&debug_oslsr_hook);
|
||||
if (oslsr_fault)
|
||||
return false;
|
||||
#else
|
||||
ARM_DBG_READ(c1, c1, 4, oslsr);
|
||||
#endif
|
||||
if (oslsr & ARM_OSLSR_OSLM0)
|
||||
return true;
|
||||
fallthrough;
|
||||
|
|
|
|||
|
|
@ -459,46 +459,40 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
|
|||
#ifdef CONFIG_ARM_UNWIND
|
||||
const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
|
||||
const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum;
|
||||
struct mod_unwind_map maps[ARM_SEC_MAX];
|
||||
int i;
|
||||
struct list_head *unwind_list = &mod->arch.unwind_list;
|
||||
|
||||
memset(maps, 0, sizeof(maps));
|
||||
INIT_LIST_HEAD(unwind_list);
|
||||
mod->arch.init_table = NULL;
|
||||
|
||||
for (s = sechdrs; s < sechdrs_end; s++) {
|
||||
const char *secname = secstrs + s->sh_name;
|
||||
const char *txtname;
|
||||
const Elf_Shdr *txt_sec;
|
||||
|
||||
if (!(s->sh_flags & SHF_ALLOC))
|
||||
if (!(s->sh_flags & SHF_ALLOC) ||
|
||||
s->sh_type != ELF_SECTION_UNWIND)
|
||||
continue;
|
||||
|
||||
if (strcmp(".ARM.exidx.init.text", secname) == 0)
|
||||
maps[ARM_SEC_INIT].unw_sec = s;
|
||||
else if (strcmp(".ARM.exidx", secname) == 0)
|
||||
maps[ARM_SEC_CORE].unw_sec = s;
|
||||
else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
|
||||
maps[ARM_SEC_EXIT].unw_sec = s;
|
||||
else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0)
|
||||
maps[ARM_SEC_UNLIKELY].unw_sec = s;
|
||||
else if (strcmp(".ARM.exidx.text.hot", secname) == 0)
|
||||
maps[ARM_SEC_HOT].unw_sec = s;
|
||||
else if (strcmp(".init.text", secname) == 0)
|
||||
maps[ARM_SEC_INIT].txt_sec = s;
|
||||
else if (strcmp(".text", secname) == 0)
|
||||
maps[ARM_SEC_CORE].txt_sec = s;
|
||||
else if (strcmp(".exit.text", secname) == 0)
|
||||
maps[ARM_SEC_EXIT].txt_sec = s;
|
||||
else if (strcmp(".text.unlikely", secname) == 0)
|
||||
maps[ARM_SEC_UNLIKELY].txt_sec = s;
|
||||
else if (strcmp(".text.hot", secname) == 0)
|
||||
maps[ARM_SEC_HOT].txt_sec = s;
|
||||
}
|
||||
if (!strcmp(".ARM.exidx", secname))
|
||||
txtname = ".text";
|
||||
else
|
||||
txtname = secname + strlen(".ARM.exidx");
|
||||
txt_sec = find_mod_section(hdr, sechdrs, txtname);
|
||||
|
||||
for (i = 0; i < ARM_SEC_MAX; i++)
|
||||
if (maps[i].unw_sec && maps[i].txt_sec)
|
||||
mod->arch.unwind[i] =
|
||||
unwind_table_add(maps[i].unw_sec->sh_addr,
|
||||
maps[i].unw_sec->sh_size,
|
||||
maps[i].txt_sec->sh_addr,
|
||||
maps[i].txt_sec->sh_size);
|
||||
if (txt_sec) {
|
||||
struct unwind_table *table =
|
||||
unwind_table_add(s->sh_addr,
|
||||
s->sh_size,
|
||||
txt_sec->sh_addr,
|
||||
txt_sec->sh_size);
|
||||
|
||||
list_add(&table->mod_list, unwind_list);
|
||||
|
||||
/* save init table for module_arch_freeing_init */
|
||||
if (strcmp(".ARM.exidx.init.text", secname) == 0)
|
||||
mod->arch.init_table = table;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
|
||||
s = find_mod_section(hdr, sechdrs, ".pv_table");
|
||||
|
|
@ -519,19 +513,27 @@ void
|
|||
module_arch_cleanup(struct module *mod)
|
||||
{
|
||||
#ifdef CONFIG_ARM_UNWIND
|
||||
int i;
|
||||
struct unwind_table *tmp;
|
||||
struct unwind_table *n;
|
||||
|
||||
for (i = 0; i < ARM_SEC_MAX; i++) {
|
||||
unwind_table_del(mod->arch.unwind[i]);
|
||||
mod->arch.unwind[i] = NULL;
|
||||
list_for_each_entry_safe(tmp, n,
|
||||
&mod->arch.unwind_list, mod_list) {
|
||||
list_del(&tmp->mod_list);
|
||||
unwind_table_del(tmp);
|
||||
}
|
||||
mod->arch.init_table = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
void __weak module_arch_freeing_init(struct module *mod)
|
||||
{
|
||||
#ifdef CONFIG_ARM_UNWIND
|
||||
unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]);
|
||||
mod->arch.unwind[ARM_SEC_INIT] = NULL;
|
||||
struct unwind_table *init = mod->arch.init_table;
|
||||
|
||||
if (init) {
|
||||
mod->arch.init_table = NULL;
|
||||
list_del(&init->mod_list);
|
||||
unwind_table_del(init);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -708,6 +708,7 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x18);
|
|||
static_assert(offsetof(siginfo_t, si_pkey) == 0x14);
|
||||
static_assert(offsetof(siginfo_t, si_perf_data) == 0x10);
|
||||
static_assert(offsetof(siginfo_t, si_perf_type) == 0x14);
|
||||
static_assert(offsetof(siginfo_t, si_perf_flags) == 0x18);
|
||||
static_assert(offsetof(siginfo_t, si_band) == 0x0c);
|
||||
static_assert(offsetof(siginfo_t, si_fd) == 0x10);
|
||||
static_assert(offsetof(siginfo_t, si_call_addr) == 0x0c);
|
||||
|
|
|
|||
|
|
@ -4,10 +4,7 @@ menuconfig ARCH_SUNXI
|
|||
depends on ARCH_MULTI_V5 || ARCH_MULTI_V7
|
||||
select ARCH_HAS_RESET_CONTROLLER
|
||||
select CLKSRC_MMIO
|
||||
select GENERIC_IRQ_CHIP
|
||||
select GPIOLIB
|
||||
select IRQ_DOMAIN_HIERARCHY
|
||||
select IRQ_FASTEOI_HIERARCHY_HANDLERS
|
||||
select PINCTRL
|
||||
select PM_OPP
|
||||
select SUN4I_TIMER
|
||||
|
|
@ -22,10 +19,12 @@ if ARCH_MULTI_V7
|
|||
config MACH_SUN4I
|
||||
bool "Allwinner A10 (sun4i) SoCs support"
|
||||
default ARCH_SUNXI
|
||||
select SUN4I_INTC
|
||||
|
||||
config MACH_SUN5I
|
||||
bool "Allwinner A10s / A13 (sun5i) SoCs support"
|
||||
default ARCH_SUNXI
|
||||
select SUN4I_INTC
|
||||
select SUN5I_HSTIMER
|
||||
|
||||
config MACH_SUN6I
|
||||
|
|
@ -34,6 +33,8 @@ config MACH_SUN6I
|
|||
select ARM_GIC
|
||||
select MFD_SUN6I_PRCM
|
||||
select SUN5I_HSTIMER
|
||||
select SUN6I_R_INTC
|
||||
select SUNXI_NMI_INTC
|
||||
|
||||
config MACH_SUN7I
|
||||
bool "Allwinner A20 (sun7i) SoCs support"
|
||||
|
|
@ -43,17 +44,21 @@ config MACH_SUN7I
|
|||
select ARCH_SUPPORTS_BIG_ENDIAN
|
||||
select HAVE_ARM_ARCH_TIMER
|
||||
select SUN5I_HSTIMER
|
||||
select SUNXI_NMI_INTC
|
||||
|
||||
config MACH_SUN8I
|
||||
bool "Allwinner sun8i Family SoCs support"
|
||||
default ARCH_SUNXI
|
||||
select ARM_GIC
|
||||
select MFD_SUN6I_PRCM
|
||||
select SUN6I_R_INTC
|
||||
select SUNXI_NMI_INTC
|
||||
|
||||
config MACH_SUN9I
|
||||
bool "Allwinner (sun9i) SoCs support"
|
||||
default ARCH_SUNXI
|
||||
select ARM_GIC
|
||||
select SUNXI_NMI_INTC
|
||||
|
||||
config ARCH_SUNXI_MC_SMP
|
||||
bool
|
||||
|
|
@ -69,6 +74,7 @@ if ARCH_MULTI_V5
|
|||
config MACH_SUNIV
|
||||
bool "Allwinner ARMv5 F-series (suniv) SoCs support"
|
||||
default ARCH_SUNXI
|
||||
select SUN4I_INTC
|
||||
help
|
||||
Support for Allwinner suniv ARMv5 SoCs.
|
||||
(F1C100A, F1C100s, F1C200s, F1C500, F1C600)
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
|
|||
CFLAGS_REMOVE_vdso.o = -pg
|
||||
|
||||
# Force -O2 to avoid libgcc dependencies
|
||||
CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(GCC_PLUGINS_CFLAGS)
|
||||
CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS)
|
||||
ifeq ($(c-gettimeofday-y),)
|
||||
CFLAGS_vgettimeofday.o = -O2
|
||||
else
|
||||
|
|
|
|||
|
|
@ -262,31 +262,31 @@ config ARM64_CONT_PMD_SHIFT
|
|||
default 4
|
||||
|
||||
config ARCH_MMAP_RND_BITS_MIN
|
||||
default 14 if ARM64_64K_PAGES
|
||||
default 16 if ARM64_16K_PAGES
|
||||
default 18
|
||||
default 14 if ARM64_64K_PAGES
|
||||
default 16 if ARM64_16K_PAGES
|
||||
default 18
|
||||
|
||||
# max bits determined by the following formula:
|
||||
# VA_BITS - PAGE_SHIFT - 3
|
||||
config ARCH_MMAP_RND_BITS_MAX
|
||||
default 19 if ARM64_VA_BITS=36
|
||||
default 24 if ARM64_VA_BITS=39
|
||||
default 27 if ARM64_VA_BITS=42
|
||||
default 30 if ARM64_VA_BITS=47
|
||||
default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
|
||||
default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
|
||||
default 33 if ARM64_VA_BITS=48
|
||||
default 14 if ARM64_64K_PAGES
|
||||
default 16 if ARM64_16K_PAGES
|
||||
default 18
|
||||
default 19 if ARM64_VA_BITS=36
|
||||
default 24 if ARM64_VA_BITS=39
|
||||
default 27 if ARM64_VA_BITS=42
|
||||
default 30 if ARM64_VA_BITS=47
|
||||
default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
|
||||
default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
|
||||
default 33 if ARM64_VA_BITS=48
|
||||
default 14 if ARM64_64K_PAGES
|
||||
default 16 if ARM64_16K_PAGES
|
||||
default 18
|
||||
|
||||
config ARCH_MMAP_RND_COMPAT_BITS_MIN
|
||||
default 7 if ARM64_64K_PAGES
|
||||
default 9 if ARM64_16K_PAGES
|
||||
default 11
|
||||
default 7 if ARM64_64K_PAGES
|
||||
default 9 if ARM64_16K_PAGES
|
||||
default 11
|
||||
|
||||
config ARCH_MMAP_RND_COMPAT_BITS_MAX
|
||||
default 16
|
||||
default 16
|
||||
|
||||
config NO_IOPORT_MAP
|
||||
def_bool y if !PCI
|
||||
|
|
@ -313,7 +313,7 @@ config GENERIC_HWEIGHT
|
|||
def_bool y
|
||||
|
||||
config GENERIC_CSUM
|
||||
def_bool y
|
||||
def_bool y
|
||||
|
||||
config GENERIC_CALIBRATE_DELAY
|
||||
def_bool y
|
||||
|
|
@ -1046,8 +1046,7 @@ config SOCIONEXT_SYNQUACER_PREITS
|
|||
|
||||
If unsure, say Y.
|
||||
|
||||
endmenu
|
||||
|
||||
endmenu # "ARM errata workarounds via the alternatives framework"
|
||||
|
||||
choice
|
||||
prompt "Page size"
|
||||
|
|
@ -1575,9 +1574,9 @@ config SETEND_EMULATION
|
|||
be unexpected results in the applications.
|
||||
|
||||
If unsure, say Y
|
||||
endif
|
||||
endif # ARMV8_DEPRECATED
|
||||
|
||||
endif
|
||||
endif # COMPAT
|
||||
|
||||
menu "ARMv8.1 architectural features"
|
||||
|
||||
|
|
@ -1602,15 +1601,15 @@ config ARM64_PAN
|
|||
bool "Enable support for Privileged Access Never (PAN)"
|
||||
default y
|
||||
help
|
||||
Privileged Access Never (PAN; part of the ARMv8.1 Extensions)
|
||||
prevents the kernel or hypervisor from accessing user-space (EL0)
|
||||
memory directly.
|
||||
Privileged Access Never (PAN; part of the ARMv8.1 Extensions)
|
||||
prevents the kernel or hypervisor from accessing user-space (EL0)
|
||||
memory directly.
|
||||
|
||||
Choosing this option will cause any unprotected (not using
|
||||
copy_to_user et al) memory access to fail with a permission fault.
|
||||
Choosing this option will cause any unprotected (not using
|
||||
copy_to_user et al) memory access to fail with a permission fault.
|
||||
|
||||
The feature is detected at runtime, and will remain as a 'nop'
|
||||
instruction if the cpu does not implement the feature.
|
||||
The feature is detected at runtime, and will remain as a 'nop'
|
||||
instruction if the cpu does not implement the feature.
|
||||
|
||||
config AS_HAS_LDAPR
|
||||
def_bool $(as-instr,.arch_extension rcpc)
|
||||
|
|
@ -1638,15 +1637,15 @@ config ARM64_USE_LSE_ATOMICS
|
|||
built with binutils >= 2.25 in order for the new instructions
|
||||
to be used.
|
||||
|
||||
endmenu
|
||||
endmenu # "ARMv8.1 architectural features"
|
||||
|
||||
menu "ARMv8.2 architectural features"
|
||||
|
||||
config AS_HAS_ARMV8_2
|
||||
def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
|
||||
def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
|
||||
|
||||
config AS_HAS_SHA3
|
||||
def_bool $(as-instr,.arch armv8.2-a+sha3)
|
||||
def_bool $(as-instr,.arch armv8.2-a+sha3)
|
||||
|
||||
config ARM64_PMEM
|
||||
bool "Enable support for persistent memory"
|
||||
|
|
@ -1690,7 +1689,7 @@ config ARM64_CNP
|
|||
at runtime, and does not affect PEs that do not implement
|
||||
this feature.
|
||||
|
||||
endmenu
|
||||
endmenu # "ARMv8.2 architectural features"
|
||||
|
||||
menu "ARMv8.3 architectural features"
|
||||
|
||||
|
|
@ -1753,7 +1752,7 @@ config AS_HAS_PAC
|
|||
config AS_HAS_CFI_NEGATE_RA_STATE
|
||||
def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n)
|
||||
|
||||
endmenu
|
||||
endmenu # "ARMv8.3 architectural features"
|
||||
|
||||
menu "ARMv8.4 architectural features"
|
||||
|
||||
|
|
@ -1794,7 +1793,7 @@ config ARM64_TLB_RANGE
|
|||
The feature introduces new assembly instructions, and they were
|
||||
support when binutils >= 2.30.
|
||||
|
||||
endmenu
|
||||
endmenu # "ARMv8.4 architectural features"
|
||||
|
||||
menu "ARMv8.5 architectural features"
|
||||
|
||||
|
|
@ -1880,6 +1879,7 @@ config ARM64_MTE
|
|||
depends on AS_HAS_LSE_ATOMICS
|
||||
# Required for tag checking in the uaccess routines
|
||||
depends on ARM64_PAN
|
||||
select ARCH_HAS_SUBPAGE_FAULTS
|
||||
select ARCH_USES_HIGH_VMA_FLAGS
|
||||
help
|
||||
Memory Tagging (part of the ARMv8.5 Extensions) provides
|
||||
|
|
@ -1901,7 +1901,7 @@ config ARM64_MTE
|
|||
|
||||
Documentation/arm64/memory-tagging-extension.rst.
|
||||
|
||||
endmenu
|
||||
endmenu # "ARMv8.5 architectural features"
|
||||
|
||||
menu "ARMv8.7 architectural features"
|
||||
|
||||
|
|
@ -1910,12 +1910,12 @@ config ARM64_EPAN
|
|||
default y
|
||||
depends on ARM64_PAN
|
||||
help
|
||||
Enhanced Privileged Access Never (EPAN) allows Privileged
|
||||
Access Never to be used with Execute-only mappings.
|
||||
Enhanced Privileged Access Never (EPAN) allows Privileged
|
||||
Access Never to be used with Execute-only mappings.
|
||||
|
||||
The feature is detected at runtime, and will remain disabled
|
||||
if the cpu does not implement the feature.
|
||||
endmenu
|
||||
The feature is detected at runtime, and will remain disabled
|
||||
if the cpu does not implement the feature.
|
||||
endmenu # "ARMv8.7 architectural features"
|
||||
|
||||
config ARM64_SVE
|
||||
bool "ARM Scalable Vector Extension support"
|
||||
|
|
@ -1948,6 +1948,17 @@ config ARM64_SVE
|
|||
booting the kernel. If unsure and you are not observing these
|
||||
symptoms, you should assume that it is safe to say Y.
|
||||
|
||||
config ARM64_SME
|
||||
bool "ARM Scalable Matrix Extension support"
|
||||
default y
|
||||
depends on ARM64_SVE
|
||||
help
|
||||
The Scalable Matrix Extension (SME) is an extension to the AArch64
|
||||
execution state which utilises a substantial subset of the SVE
|
||||
instruction set, together with the addition of new architectural
|
||||
register state capable of holding two dimensional matrix tiles to
|
||||
enable various matrix operations.
|
||||
|
||||
config ARM64_MODULE_PLTS
|
||||
bool "Use PLTs to allow module memory to spill over into vmalloc area"
|
||||
depends on MODULES
|
||||
|
|
@ -1991,7 +2002,7 @@ config ARM64_DEBUG_PRIORITY_MASKING
|
|||
the validity of ICC_PMR_EL1 when calling concerned functions.
|
||||
|
||||
If unsure, say N
|
||||
endif
|
||||
endif # ARM64_PSEUDO_NMI
|
||||
|
||||
config RELOCATABLE
|
||||
bool "Build a relocatable kernel image" if EXPERT
|
||||
|
|
@ -2050,7 +2061,19 @@ config STACKPROTECTOR_PER_TASK
|
|||
def_bool y
|
||||
depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG
|
||||
|
||||
endmenu
|
||||
# The GPIO number here must be sorted by descending number. In case of
|
||||
# a multiplatform kernel, we just want the highest value required by the
|
||||
# selected platforms.
|
||||
config ARCH_NR_GPIO
|
||||
int
|
||||
default 2048 if ARCH_APPLE
|
||||
default 0
|
||||
help
|
||||
Maximum number of GPIOs in the system.
|
||||
|
||||
If unsure, leave the default value.
|
||||
|
||||
endmenu # "Kernel Features"
|
||||
|
||||
menu "Boot options"
|
||||
|
||||
|
|
@ -2114,7 +2137,7 @@ config EFI
|
|||
help
|
||||
This option provides support for runtime services provided
|
||||
by UEFI firmware (such as non-volatile variables, realtime
|
||||
clock, and platform reset). A UEFI stub is also provided to
|
||||
clock, and platform reset). A UEFI stub is also provided to
|
||||
allow the kernel to be booted as an EFI application. This
|
||||
is only useful on systems that have UEFI firmware.
|
||||
|
||||
|
|
@ -2129,7 +2152,7 @@ config DMI
|
|||
However, even with this option, the resultant kernel should
|
||||
continue to boot on existing non-UEFI platforms.
|
||||
|
||||
endmenu
|
||||
endmenu # "Boot options"
|
||||
|
||||
config SYSVIPC_COMPAT
|
||||
def_bool y
|
||||
|
|
@ -2150,7 +2173,7 @@ config ARCH_HIBERNATION_HEADER
|
|||
config ARCH_SUSPEND_POSSIBLE
|
||||
def_bool y
|
||||
|
||||
endmenu
|
||||
endmenu # "Power management options"
|
||||
|
||||
menu "CPU Power Management"
|
||||
|
||||
|
|
@ -2158,7 +2181,7 @@ source "drivers/cpuidle/Kconfig"
|
|||
|
||||
source "drivers/cpufreq/Kconfig"
|
||||
|
||||
endmenu
|
||||
endmenu # "CPU Power Management"
|
||||
|
||||
source "drivers/acpi/Kconfig"
|
||||
|
||||
|
|
@ -2166,4 +2189,4 @@ source "arch/arm64/kvm/Kconfig"
|
|||
|
||||
if CRYPTO
|
||||
source "arch/arm64/crypto/Kconfig"
|
||||
endif
|
||||
endif # CRYPTO
|
||||
|
|
|
|||
|
|
@ -11,12 +11,11 @@ config ARCH_ACTIONS
|
|||
config ARCH_SUNXI
|
||||
bool "Allwinner sunxi 64-bit SoC Family"
|
||||
select ARCH_HAS_RESET_CONTROLLER
|
||||
select GENERIC_IRQ_CHIP
|
||||
select IRQ_DOMAIN_HIERARCHY
|
||||
select IRQ_FASTEOI_HIERARCHY_HANDLERS
|
||||
select PINCTRL
|
||||
select RESET_CONTROLLER
|
||||
select SUN4I_TIMER
|
||||
select SUN6I_R_INTC
|
||||
select SUNXI_NMI_INTC
|
||||
help
|
||||
This enables support for Allwinner sunxi based SoCs like the A64.
|
||||
|
||||
|
|
@ -253,6 +252,7 @@ config ARCH_INTEL_SOCFPGA
|
|||
|
||||
config ARCH_SYNQUACER
|
||||
bool "Socionext SynQuacer SoC Family"
|
||||
select IRQ_FASTEOI_HIERARCHY_HANDLERS
|
||||
|
||||
config ARCH_TEGRA
|
||||
bool "NVIDIA Tegra SoC Family"
|
||||
|
|
@ -325,4 +325,4 @@ config ARCH_ZYNQMP
|
|||
help
|
||||
This enables support for Xilinx ZynqMP Family
|
||||
|
||||
endmenu
|
||||
endmenu # "Platform selection"
|
||||
|
|
|
|||
|
|
@ -7,3 +7,4 @@ generic-y += parport.h
|
|||
generic-y += user.h
|
||||
|
||||
generated-y += cpucaps.h
|
||||
generated-y += sysreg-defs.h
|
||||
|
|
|
|||
|
|
@ -26,12 +26,6 @@
|
|||
* sets the GP register's most significant bits to 0 with an explicit cast.
|
||||
*/
|
||||
|
||||
static inline void gic_write_eoir(u32 irq)
|
||||
{
|
||||
write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
|
||||
isb();
|
||||
}
|
||||
|
||||
static __always_inline void gic_write_dir(u32 irq)
|
||||
{
|
||||
write_sysreg_s(irq, SYS_ICC_DIR_EL1);
|
||||
|
|
|
|||
|
|
@ -142,7 +142,7 @@ static inline bool __init __early_cpu_has_rndr(void)
|
|||
{
|
||||
/* Open code as we run prior to the first call to cpufeature. */
|
||||
unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1);
|
||||
return (ftr >> ID_AA64ISAR0_RNDR_SHIFT) & 0xf;
|
||||
return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
|
||||
}
|
||||
|
||||
static inline bool __init __must_check
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
14472: .string file; \
|
||||
.popsection; \
|
||||
\
|
||||
.long 14472b - 14470b; \
|
||||
.long 14472b - .; \
|
||||
.short line;
|
||||
#else
|
||||
#define _BUGVERBOSE_LOCATION(file, line)
|
||||
|
|
@ -25,7 +25,7 @@
|
|||
#define __BUG_ENTRY(flags) \
|
||||
.pushsection __bug_table,"aw"; \
|
||||
.align 2; \
|
||||
14470: .long 14471f - 14470b; \
|
||||
14470: .long 14471f - .; \
|
||||
_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
|
||||
.short flags; \
|
||||
.popsection; \
|
||||
|
|
|
|||
|
|
@ -23,20 +23,4 @@
|
|||
#define __builtin_return_address(val) \
|
||||
(void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val)))
|
||||
|
||||
#ifdef CONFIG_CFI_CLANG
|
||||
/*
|
||||
* With CONFIG_CFI_CLANG, the compiler replaces function address
|
||||
* references with the address of the function's CFI jump table
|
||||
* entry. The function_nocfi macro always returns the address of the
|
||||
* actual function instead.
|
||||
*/
|
||||
#define function_nocfi(x) ({ \
|
||||
void *addr; \
|
||||
asm("adrp %0, " __stringify(x) "\n\t" \
|
||||
"add %0, %0, :lo12:" __stringify(x) \
|
||||
: "=r" (addr)); \
|
||||
addr; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#endif /* __ASM_COMPILER_H */
|
||||
|
|
|
|||
|
|
@ -58,11 +58,15 @@ struct cpuinfo_arm64 {
|
|||
u64 reg_id_aa64pfr0;
|
||||
u64 reg_id_aa64pfr1;
|
||||
u64 reg_id_aa64zfr0;
|
||||
u64 reg_id_aa64smfr0;
|
||||
|
||||
struct cpuinfo_32bit aarch32;
|
||||
|
||||
/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
|
||||
u64 reg_zcr;
|
||||
|
||||
/* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
|
||||
u64 reg_smcr;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
|
||||
|
|
|
|||
|
|
@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
|
|||
return val > 0;
|
||||
}
|
||||
|
||||
static inline bool id_aa64pfr1_sme(u64 pfr1)
|
||||
{
|
||||
u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT);
|
||||
|
||||
return val > 0;
|
||||
}
|
||||
|
||||
static inline bool id_aa64pfr1_mte(u64 pfr1)
|
||||
{
|
||||
u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
|
||||
|
|
@ -759,6 +766,23 @@ static __always_inline bool system_supports_sve(void)
|
|||
cpus_have_const_cap(ARM64_SVE);
|
||||
}
|
||||
|
||||
static __always_inline bool system_supports_sme(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_ARM64_SME) &&
|
||||
cpus_have_const_cap(ARM64_SME);
|
||||
}
|
||||
|
||||
static __always_inline bool system_supports_fa64(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_ARM64_SME) &&
|
||||
cpus_have_const_cap(ARM64_SME_FA64);
|
||||
}
|
||||
|
||||
static __always_inline bool system_supports_tpidr2(void)
|
||||
{
|
||||
return system_supports_sme();
|
||||
}
|
||||
|
||||
static __always_inline bool system_supports_cnp(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_ARM64_CNP) &&
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@
|
|||
#define MIDR_VARIANT(midr) \
|
||||
(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
|
||||
#define MIDR_IMPLEMENTOR_SHIFT 24
|
||||
#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT)
|
||||
#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT)
|
||||
#define MIDR_IMPLEMENTOR(midr) \
|
||||
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ struct task_struct;
|
|||
|
||||
struct step_hook {
|
||||
struct list_head node;
|
||||
int (*fn)(struct pt_regs *regs, unsigned int esr);
|
||||
int (*fn)(struct pt_regs *regs, unsigned long esr);
|
||||
};
|
||||
|
||||
void register_user_step_hook(struct step_hook *hook);
|
||||
|
|
@ -75,7 +75,7 @@ void unregister_kernel_step_hook(struct step_hook *hook);
|
|||
|
||||
struct break_hook {
|
||||
struct list_head node;
|
||||
int (*fn)(struct pt_regs *regs, unsigned int esr);
|
||||
int (*fn)(struct pt_regs *regs, unsigned long esr);
|
||||
u16 imm;
|
||||
u16 mask; /* These bits are ignored when comparing with imm */
|
||||
};
|
||||
|
|
|
|||
|
|
@ -143,6 +143,50 @@
|
|||
.Lskip_sve_\@:
|
||||
.endm
|
||||
|
||||
/* SME register access and priority mapping */
|
||||
.macro __init_el2_nvhe_sme
|
||||
mrs x1, id_aa64pfr1_el1
|
||||
ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
|
||||
cbz x1, .Lskip_sme_\@
|
||||
|
||||
bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps
|
||||
msr cptr_el2, x0 // Disable copro. traps to EL2
|
||||
isb
|
||||
|
||||
mrs x1, sctlr_el2
|
||||
orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
|
||||
msr sctlr_el2, x1
|
||||
isb
|
||||
|
||||
mov x1, #0 // SMCR controls
|
||||
|
||||
mrs_s x2, SYS_ID_AA64SMFR0_EL1
|
||||
ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM?
|
||||
cbz x2, .Lskip_sme_fa64_\@
|
||||
|
||||
orr x1, x1, SMCR_ELx_FA64_MASK
|
||||
.Lskip_sme_fa64_\@:
|
||||
|
||||
orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector
|
||||
msr_s SYS_SMCR_EL2, x1 // length for EL1.
|
||||
|
||||
mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
|
||||
ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
|
||||
cbz x1, .Lskip_sme_\@
|
||||
|
||||
msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
|
||||
|
||||
mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
|
||||
ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
|
||||
cbz x1, .Lskip_sme_\@
|
||||
|
||||
mrs_s x1, SYS_HCRX_EL2
|
||||
orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
|
||||
msr_s SYS_HCRX_EL2, x1
|
||||
|
||||
.Lskip_sme_\@:
|
||||
.endm
|
||||
|
||||
/* Disable any fine grained traps */
|
||||
.macro __init_el2_fgt
|
||||
mrs x1, id_aa64mmfr0_el1
|
||||
|
|
@ -153,15 +197,26 @@
|
|||
mrs x1, id_aa64dfr0_el1
|
||||
ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
|
||||
cmp x1, #3
|
||||
b.lt .Lset_fgt_\@
|
||||
b.lt .Lset_debug_fgt_\@
|
||||
/* Disable PMSNEVFR_EL1 read and write traps */
|
||||
orr x0, x0, #(1 << 62)
|
||||
|
||||
.Lset_fgt_\@:
|
||||
.Lset_debug_fgt_\@:
|
||||
msr_s SYS_HDFGRTR_EL2, x0
|
||||
msr_s SYS_HDFGWTR_EL2, x0
|
||||
msr_s SYS_HFGRTR_EL2, xzr
|
||||
msr_s SYS_HFGWTR_EL2, xzr
|
||||
|
||||
mov x0, xzr
|
||||
mrs x1, id_aa64pfr1_el1
|
||||
ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
|
||||
cbz x1, .Lset_fgt_\@
|
||||
|
||||
/* Disable nVHE traps of TPIDR2 and SMPRI */
|
||||
orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
|
||||
orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
|
||||
|
||||
.Lset_fgt_\@:
|
||||
msr_s SYS_HFGRTR_EL2, x0
|
||||
msr_s SYS_HFGWTR_EL2, x0
|
||||
msr_s SYS_HFGITR_EL2, xzr
|
||||
|
||||
mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
|
||||
|
|
@ -196,6 +251,7 @@
|
|||
__init_el2_nvhe_idregs
|
||||
__init_el2_nvhe_cptr
|
||||
__init_el2_nvhe_sve
|
||||
__init_el2_nvhe_sme
|
||||
__init_el2_fgt
|
||||
__init_el2_nvhe_prepare_eret
|
||||
.endm
|
||||
|
|
|
|||
|
|
@ -37,7 +37,8 @@
|
|||
#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
|
||||
/* Unallocated EC: 0x1B */
|
||||
#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
|
||||
/* Unallocated EC: 0x1D - 0x1E */
|
||||
#define ESR_ELx_EC_SME (0x1D)
|
||||
/* Unallocated EC: 0x1E */
|
||||
#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
|
||||
#define ESR_ELx_EC_IABT_LOW (0x20)
|
||||
#define ESR_ELx_EC_IABT_CUR (0x21)
|
||||
|
|
@ -75,6 +76,7 @@
|
|||
#define ESR_ELx_IL_SHIFT (25)
|
||||
#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
|
||||
#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
|
||||
#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK)
|
||||
|
||||
/* ISS field definitions shared by different classes */
|
||||
#define ESR_ELx_WNR_SHIFT (6)
|
||||
|
|
@ -136,7 +138,7 @@
|
|||
#define ESR_ELx_WFx_ISS_TI (UL(1) << 0)
|
||||
#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
|
||||
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
|
||||
#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
|
||||
#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1)
|
||||
|
||||
#define DISR_EL1_IDS (UL(1) << 24)
|
||||
/*
|
||||
|
|
@ -327,17 +329,26 @@
|
|||
#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\
|
||||
ESR_ELx_CP15_32_ISS_DIR_READ)
|
||||
|
||||
/*
|
||||
* ISS values for SME traps
|
||||
*/
|
||||
|
||||
#define ESR_ELx_SME_ISS_SME_DISABLED 0
|
||||
#define ESR_ELx_SME_ISS_ILL 1
|
||||
#define ESR_ELx_SME_ISS_SM_DISABLED 2
|
||||
#define ESR_ELx_SME_ISS_ZA_DISABLED 3
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <asm/types.h>
|
||||
|
||||
static inline bool esr_is_data_abort(u32 esr)
|
||||
static inline bool esr_is_data_abort(unsigned long esr)
|
||||
{
|
||||
const u32 ec = ESR_ELx_EC(esr);
|
||||
const unsigned long ec = ESR_ELx_EC(esr);
|
||||
|
||||
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
|
||||
}
|
||||
|
||||
const char *esr_get_class_string(u32 esr);
|
||||
const char *esr_get_class_string(unsigned long esr);
|
||||
#endif /* __ASSEMBLY */
|
||||
|
||||
#endif /* __ASM_ESR_H */
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@
|
|||
#define __exception_irq_entry __kprobes
|
||||
#endif
|
||||
|
||||
static inline u32 disr_to_esr(u64 disr)
|
||||
static inline unsigned long disr_to_esr(u64 disr)
|
||||
{
|
||||
unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
|
||||
unsigned long esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
|
||||
|
||||
if ((disr & DISR_EL1_IDS) == 0)
|
||||
esr |= (disr & DISR_EL1_ESR_MASK);
|
||||
|
|
@ -57,23 +57,24 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs,
|
|||
void (*func)(struct pt_regs *));
|
||||
asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);
|
||||
|
||||
void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
|
||||
void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs);
|
||||
void do_undefinstr(struct pt_regs *regs);
|
||||
void do_bti(struct pt_regs *regs);
|
||||
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
|
||||
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
|
||||
struct pt_regs *regs);
|
||||
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
|
||||
void do_sve_acc(unsigned int esr, struct pt_regs *regs);
|
||||
void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
|
||||
void do_sysinstr(unsigned int esr, struct pt_regs *regs);
|
||||
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
|
||||
void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
|
||||
void do_cp15instr(unsigned int esr, struct pt_regs *regs);
|
||||
void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
|
||||
void do_sve_acc(unsigned long esr, struct pt_regs *regs);
|
||||
void do_sme_acc(unsigned long esr, struct pt_regs *regs);
|
||||
void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs);
|
||||
void do_sysinstr(unsigned long esr, struct pt_regs *regs);
|
||||
void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
|
||||
void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
|
||||
void do_cp15instr(unsigned long esr, struct pt_regs *regs);
|
||||
void do_el0_svc(struct pt_regs *regs);
|
||||
void do_el0_svc_compat(struct pt_regs *regs);
|
||||
void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
|
||||
void do_serror(struct pt_regs *regs, unsigned int esr);
|
||||
void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr);
|
||||
void do_serror(struct pt_regs *regs, unsigned long esr);
|
||||
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
|
||||
|
||||
void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far);
|
||||
void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
|
||||
#endif /* __ASM_EXCEPTION_H */
|
||||
|
|
|
|||
|
|
@ -32,6 +32,18 @@
|
|||
#define VFP_STATE_SIZE ((32 * 8) + 4)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* When we defined the maximum SVE vector length we defined the ABI so
|
||||
* that the maximum vector length included all the reserved for future
|
||||
* expansion bits in ZCR rather than those just currently defined by
|
||||
* the architecture. While SME follows a similar pattern the fact that
|
||||
* it includes a square matrix means that any allocations that attempt
|
||||
* to cover the maximum potential vector length (such as happen with
|
||||
* the regset used for ptrace) end up being extremely large. Define
|
||||
* the much lower actual limit for use in such situations.
|
||||
*/
|
||||
#define SME_VQ_MAX 16
|
||||
|
||||
struct task_struct;
|
||||
|
||||
extern void fpsimd_save_state(struct user_fpsimd_state *state);
|
||||
|
|
@ -46,11 +58,23 @@ extern void fpsimd_restore_current_state(void);
|
|||
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
|
||||
|
||||
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
|
||||
void *sve_state, unsigned int sve_vl);
|
||||
void *sve_state, unsigned int sve_vl,
|
||||
void *za_state, unsigned int sme_vl,
|
||||
u64 *svcr);
|
||||
|
||||
extern void fpsimd_flush_task_state(struct task_struct *target);
|
||||
extern void fpsimd_save_and_flush_cpu_state(void);
|
||||
|
||||
static inline bool thread_sm_enabled(struct thread_struct *thread)
|
||||
{
|
||||
return system_supports_sme() && (thread->svcr & SVCR_SM_MASK);
|
||||
}
|
||||
|
||||
static inline bool thread_za_enabled(struct thread_struct *thread)
|
||||
{
|
||||
return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
|
||||
}
|
||||
|
||||
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
|
||||
#define VL_ARCH_MAX 0x100
|
||||
|
||||
|
|
@ -62,7 +86,14 @@ static inline size_t sve_ffr_offset(int vl)
|
|||
|
||||
static inline void *sve_pffr(struct thread_struct *thread)
|
||||
{
|
||||
return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread));
|
||||
unsigned int vl;
|
||||
|
||||
if (system_supports_sme() && thread_sm_enabled(thread))
|
||||
vl = thread_get_sme_vl(thread);
|
||||
else
|
||||
vl = thread_get_sve_vl(thread);
|
||||
|
||||
return (char *)thread->sve_state + sve_ffr_offset(vl);
|
||||
}
|
||||
|
||||
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
|
||||
|
|
@ -71,11 +102,17 @@ extern void sve_load_state(void const *state, u32 const *pfpsr,
|
|||
extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
|
||||
extern unsigned int sve_get_vl(void);
|
||||
extern void sve_set_vq(unsigned long vq_minus_1);
|
||||
extern void sme_set_vq(unsigned long vq_minus_1);
|
||||
extern void za_save_state(void *state);
|
||||
extern void za_load_state(void const *state);
|
||||
|
||||
struct arm64_cpu_capabilities;
|
||||
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
||||
extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
||||
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
||||
|
||||
extern u64 read_zcr_features(void);
|
||||
extern u64 read_smcr_features(void);
|
||||
|
||||
/*
|
||||
* Helpers to translate bit indices in sve_vq_map to VQ values (and
|
||||
|
|
@ -119,6 +156,7 @@ struct vl_info {
|
|||
extern void sve_alloc(struct task_struct *task);
|
||||
extern void fpsimd_release_task(struct task_struct *task);
|
||||
extern void fpsimd_sync_to_sve(struct task_struct *task);
|
||||
extern void fpsimd_force_sync_to_sve(struct task_struct *task);
|
||||
extern void sve_sync_to_fpsimd(struct task_struct *task);
|
||||
extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
|
||||
|
||||
|
|
@ -170,6 +208,12 @@ static inline void write_vl(enum vec_type type, u64 val)
|
|||
tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
|
||||
write_sysreg_s(tmp | val, SYS_ZCR_EL1);
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_ARM64_SME
|
||||
case ARM64_VEC_SME:
|
||||
tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
|
||||
write_sysreg_s(tmp | val, SYS_SMCR_EL1);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
|
|
@ -208,6 +252,8 @@ static inline bool sve_vq_available(unsigned int vq)
|
|||
return vq_available(ARM64_VEC_SVE, vq);
|
||||
}
|
||||
|
||||
size_t sve_state_size(struct task_struct const *task);
|
||||
|
||||
#else /* ! CONFIG_ARM64_SVE */
|
||||
|
||||
static inline void sve_alloc(struct task_struct *task) { }
|
||||
|
|
@ -247,8 +293,93 @@ static inline void vec_update_vq_map(enum vec_type t) { }
|
|||
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
|
||||
static inline void sve_setup(void) { }
|
||||
|
||||
static inline size_t sve_state_size(struct task_struct const *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* ! CONFIG_ARM64_SVE */
|
||||
|
||||
#ifdef CONFIG_ARM64_SME
|
||||
|
||||
static inline void sme_user_disable(void)
|
||||
{
|
||||
sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
|
||||
}
|
||||
|
||||
static inline void sme_user_enable(void)
|
||||
{
|
||||
sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
|
||||
}
|
||||
|
||||
static inline void sme_smstart_sm(void)
|
||||
{
|
||||
asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
|
||||
}
|
||||
|
||||
static inline void sme_smstop_sm(void)
|
||||
{
|
||||
asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
|
||||
}
|
||||
|
||||
static inline void sme_smstop(void)
|
||||
{
|
||||
asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
|
||||
}
|
||||
|
||||
extern void __init sme_setup(void);
|
||||
|
||||
static inline int sme_max_vl(void)
|
||||
{
|
||||
return vec_max_vl(ARM64_VEC_SME);
|
||||
}
|
||||
|
||||
static inline int sme_max_virtualisable_vl(void)
|
||||
{
|
||||
return vec_max_virtualisable_vl(ARM64_VEC_SME);
|
||||
}
|
||||
|
||||
extern void sme_alloc(struct task_struct *task);
|
||||
extern unsigned int sme_get_vl(void);
|
||||
extern int sme_set_current_vl(unsigned long arg);
|
||||
extern int sme_get_current_vl(void);
|
||||
|
||||
/*
|
||||
* Return how many bytes of memory are required to store the full SME
|
||||
* specific state (currently just ZA) for task, given task's currently
|
||||
* configured vector length.
|
||||
*/
|
||||
static inline size_t za_state_size(struct task_struct const *task)
|
||||
{
|
||||
unsigned int vl = task_get_sme_vl(task);
|
||||
|
||||
return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void sme_user_disable(void) { BUILD_BUG(); }
|
||||
static inline void sme_user_enable(void) { BUILD_BUG(); }
|
||||
|
||||
static inline void sme_smstart_sm(void) { }
|
||||
static inline void sme_smstop_sm(void) { }
|
||||
static inline void sme_smstop(void) { }
|
||||
|
||||
static inline void sme_alloc(struct task_struct *task) { }
|
||||
static inline void sme_setup(void) { }
|
||||
static inline unsigned int sme_get_vl(void) { return 0; }
|
||||
static inline int sme_max_vl(void) { return 0; }
|
||||
static inline int sme_max_virtualisable_vl(void) { return 0; }
|
||||
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
|
||||
static inline int sme_get_current_vl(void) { return -EINVAL; }
|
||||
|
||||
static inline size_t za_state_size(struct task_struct const *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* ! CONFIG_ARM64_SME */
|
||||
|
||||
/* For use by EFI runtime services calls only */
|
||||
extern void __efi_fpsimd_begin(void);
|
||||
extern void __efi_fpsimd_end(void);
|
||||
|
|
|
|||
|
|
@ -93,6 +93,12 @@
|
|||
.endif
|
||||
.endm
|
||||
|
||||
.macro _sme_check_wv v
|
||||
.if (\v) < 12 || (\v) > 15
|
||||
.error "Bad vector select register \v."
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* SVE instruction encodings for non-SVE-capable assemblers */
|
||||
/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
|
||||
|
||||
|
|
@ -174,6 +180,54 @@
|
|||
| (\np)
|
||||
.endm
|
||||
|
||||
/* SME instruction encodings for non-SME-capable assemblers */
|
||||
/* (pre binutils 2.38/LLVM 13) */
|
||||
|
||||
/* RDSVL X\nx, #\imm */
|
||||
.macro _sme_rdsvl nx, imm
|
||||
_check_general_reg \nx
|
||||
_check_num (\imm), -0x20, 0x1f
|
||||
.inst 0x04bf5800 \
|
||||
| (\nx) \
|
||||
| (((\imm) & 0x3f) << 5)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* STR (vector from ZA array):
|
||||
* STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
|
||||
*/
|
||||
.macro _sme_str_zav nw, nxbase, offset=0
|
||||
_sme_check_wv \nw
|
||||
_check_general_reg \nxbase
|
||||
_check_num (\offset), -0x100, 0xff
|
||||
.inst 0xe1200000 \
|
||||
| (((\nw) & 3) << 13) \
|
||||
| ((\nxbase) << 5) \
|
||||
| ((\offset) & 7)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* LDR (vector to ZA array):
|
||||
* LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
|
||||
*/
|
||||
.macro _sme_ldr_zav nw, nxbase, offset=0
|
||||
_sme_check_wv \nw
|
||||
_check_general_reg \nxbase
|
||||
_check_num (\offset), -0x100, 0xff
|
||||
.inst 0xe1000000 \
|
||||
| (((\nw) & 3) << 13) \
|
||||
| ((\nxbase) << 5) \
|
||||
| ((\offset) & 7)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Zero the entire ZA array
|
||||
* ZERO ZA
|
||||
*/
|
||||
.macro zero_za
|
||||
.inst 0xc00800ff
|
||||
.endm
|
||||
|
||||
.macro __for from:req, to:req
|
||||
.if (\from) == (\to)
|
||||
_for__body %\from
|
||||
|
|
@ -208,6 +262,17 @@
|
|||
921:
|
||||
.endm
|
||||
|
||||
/* Update SMCR_EL1.LEN with the new VQ */
|
||||
.macro sme_load_vq xvqminus1, xtmp, xtmp2
|
||||
mrs_s \xtmp, SYS_SMCR_EL1
|
||||
bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
|
||||
orr \xtmp2, \xtmp2, \xvqminus1
|
||||
cmp \xtmp2, \xtmp
|
||||
b.eq 921f
|
||||
msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
|
||||
921:
|
||||
.endm
|
||||
|
||||
/* Preserve the first 128-bits of Znz and zero the rest. */
|
||||
.macro _sve_flush_z nz
|
||||
_sve_check_zreg \nz
|
||||
|
|
@ -254,3 +319,25 @@
|
|||
ldr w\nxtmp, [\xpfpsr, #4]
|
||||
msr fpcr, x\nxtmp
|
||||
.endm
|
||||
|
||||
.macro sme_save_za nxbase, xvl, nw
|
||||
mov w\nw, #0
|
||||
|
||||
423:
|
||||
_sme_str_zav \nw, \nxbase
|
||||
add x\nxbase, x\nxbase, \xvl
|
||||
add x\nw, x\nw, #1
|
||||
cmp \xvl, x\nw
|
||||
bne 423b
|
||||
.endm
|
||||
|
||||
.macro sme_load_za nxbase, xvl, nw
|
||||
mov w\nw, #0
|
||||
|
||||
423:
|
||||
_sme_ldr_zav \nw, \nxbase
|
||||
add x\nxbase, x\nxbase, \xvl
|
||||
add x\nw, x\nw, #1
|
||||
cmp \xvl, x\nw
|
||||
bne 423b
|
||||
.endm
|
||||
|
|
|
|||
|
|
@ -80,8 +80,15 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
|||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
||||
struct dyn_ftrace;
|
||||
struct ftrace_ops;
|
||||
struct ftrace_regs;
|
||||
|
||||
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
|
||||
#define ftrace_init_nop ftrace_init_nop
|
||||
|
||||
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct ftrace_regs *fregs);
|
||||
#define ftrace_graph_func ftrace_graph_func
|
||||
#endif
|
||||
|
||||
#define ftrace_return_address(n) return_address(n)
|
||||
|
|
|
|||
|
|
@ -44,6 +44,8 @@ extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
|
|||
#define __HAVE_ARCH_HUGE_PTE_CLEAR
|
||||
extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, unsigned long sz);
|
||||
#define __HAVE_ARCH_HUGE_PTEP_GET
|
||||
extern pte_t huge_ptep_get(pte_t *ptep);
|
||||
extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte, unsigned long sz);
|
||||
#define set_huge_swap_pte_at set_huge_swap_pte_at
|
||||
|
|
|
|||
|
|
@ -109,6 +109,14 @@
|
|||
#define KERNEL_HWCAP_AFP __khwcap2_feature(AFP)
|
||||
#define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES)
|
||||
#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3)
|
||||
#define KERNEL_HWCAP_SME __khwcap2_feature(SME)
|
||||
#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64)
|
||||
#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64)
|
||||
#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32)
|
||||
#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32)
|
||||
#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32)
|
||||
#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32)
|
||||
#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64)
|
||||
|
||||
/*
|
||||
* This yields a mask that user programs can use to figure out what
|
||||
|
|
|
|||
|
|
@ -279,6 +279,7 @@
|
|||
#define CPTR_EL2_TCPAC (1U << 31)
|
||||
#define CPTR_EL2_TAM (1 << 30)
|
||||
#define CPTR_EL2_TTA (1 << 20)
|
||||
#define CPTR_EL2_TSM (1 << 12)
|
||||
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
|
||||
#define CPTR_EL2_TZ (1 << 8)
|
||||
#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
|
||||
|
|
|
|||
|
|
@ -236,14 +236,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
|
|||
return mode != PSR_MODE_EL0t;
|
||||
}
|
||||
|
||||
static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
|
||||
static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.fault.esr_el2;
|
||||
}
|
||||
|
||||
static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 esr = kvm_vcpu_get_esr(vcpu);
|
||||
u64 esr = kvm_vcpu_get_esr(vcpu);
|
||||
|
||||
if (esr & ESR_ELx_CV)
|
||||
return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
|
||||
|
|
@ -374,7 +374,7 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
|
|||
|
||||
static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 esr = kvm_vcpu_get_esr(vcpu);
|
||||
u64 esr = kvm_vcpu_get_esr(vcpu);
|
||||
return ESR_ELx_SYS64_ISS_RT(esr);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ struct kvm_arch {
|
|||
};
|
||||
|
||||
struct kvm_vcpu_fault_info {
|
||||
u32 esr_el2; /* Hyp Syndrom Register */
|
||||
u64 esr_el2; /* Hyp Syndrom Register */
|
||||
u64 far_el2; /* Hyp Fault Address Register */
|
||||
u64 hpfar_el2; /* Hyp IPA Fault Address Register */
|
||||
u64 disr_el1; /* Deferred [SError] Status Register */
|
||||
|
|
@ -295,8 +295,11 @@ struct vcpu_reset_state {
|
|||
|
||||
struct kvm_vcpu_arch {
|
||||
struct kvm_cpu_context ctxt;
|
||||
|
||||
/* Guest floating point state */
|
||||
void *sve_state;
|
||||
unsigned int sve_max_vl;
|
||||
u64 svcr;
|
||||
|
||||
/* Stage 2 paging state used by the hardware on next switch */
|
||||
struct kvm_s2_mmu *hw_mmu;
|
||||
|
|
@ -451,6 +454,7 @@ struct kvm_vcpu_arch {
|
|||
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
|
||||
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
|
||||
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
|
||||
#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */
|
||||
|
||||
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
|
||||
KVM_GUESTDBG_USE_SW_BP | \
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
* Was this synchronous external abort a RAS notification?
|
||||
* Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
|
||||
*/
|
||||
static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr)
|
||||
static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
|
||||
{
|
||||
/* apei_claim_sea(NULL) expects to mask interrupts itself */
|
||||
lockdep_assert_irqs_enabled();
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg);
|
|||
long get_mte_ctrl(struct task_struct *task);
|
||||
int mte_ptrace_copy_tags(struct task_struct *child, long request,
|
||||
unsigned long addr, unsigned long data);
|
||||
size_t mte_probe_user_range(const char __user *uaddr, size_t size);
|
||||
|
||||
#else /* CONFIG_ARM64_MTE */
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue