diff --git a/BUILD.bazel b/BUILD.bazel index b5a6fa2c0cdc..590568112290 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -198,6 +198,34 @@ copy_to_dist_dir( log = "info", ) +kernel_build( + name = "kernel_aarch64_microdroid_16k", + srcs = ["//common:kernel_aarch64_sources"], + outs = [ + "Image", + "System.map", + "modules.builtin", + "modules.builtin.modinfo", + "vmlinux", + "vmlinux.symvers", + ], + build_config = "build.config.microdroid.aarch64", + make_goals = [ + "Image", + ], + page_size = "16k", +) + +copy_to_dist_dir( + name = "kernel_aarch64_microdroid_16k_dist", + data = [ + ":kernel_aarch64_microdroid_16k", + ], + dist_dir = "out/kernel_aarch64_microdroid_16k/dist", + flat = True, + log = "info", +) + # Microdroid is not a real device. The kernel image is built with special # configs to reduce the size. Hence, not using mixed build. kernel_build( diff --git a/Documentation/core-api/maple_tree.rst b/Documentation/core-api/maple_tree.rst index 45defcf15da7..285e2d2b21ae 100644 --- a/Documentation/core-api/maple_tree.rst +++ b/Documentation/core-api/maple_tree.rst @@ -81,6 +81,9 @@ section. Sometimes it is necessary to ensure the next call to store to a maple tree does not allocate memory, please see :ref:`maple-tree-advanced-api` for this use case. +You can use mtree_dup() to duplicate an entire maple tree. It is a more +efficient way than inserting all elements one by one into a new tree. + Finally, you can remove all entries from a maple tree by calling mtree_destroy(). If the maple tree entries are pointers, you may wish to free the entries first. @@ -112,6 +115,7 @@ Takes ma_lock internally: * mtree_insert() * mtree_insert_range() * mtree_erase() + * mtree_dup() * mtree_destroy() * mt_set_in_rcu() * mt_clear_in_rcu() diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 2a708ffe2af0..a7a7e77c8c92 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -3098,6 +3098,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x07b1db01 } +pointer_reference { + id: 0x0b7d7682 + kind: POINTER + pointee_type_id: 0x07b53c95 +} pointer_reference { id: 0x0b85846c kind: POINTER @@ -8263,6 +8268,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x6255e5da } +pointer_reference { + id: 0x120b1632 + kind: POINTER + pointee_type_id: 0x626cbe56 +} pointer_reference { id: 0x12191e2a kind: POINTER @@ -9008,6 +9018,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x72d62916 } +pointer_reference { + id: 0x1625e208 + kind: POINTER + pointee_type_id: 0x72d76ebd +} pointer_reference { id: 0x162c7a70 kind: POINTER @@ -10828,6 +10843,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x52606d54 } +pointer_reference { + id: 0x1e0dbd15 + kind: POINTER + pointee_type_id: 0x527612cb +} pointer_reference { id: 0x1e20e7eb kind: POINTER @@ -12283,6 +12303,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xb94739b9 } +pointer_reference { + id: 0x24c218d7 + kind: POINTER + pointee_type_id: 0xb94885c2 +} pointer_reference { id: 0x24c6c7eb kind: POINTER @@ -15938,6 +15963,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x990a74b4 } +pointer_reference { + id: 0x2cd2cd79 + kind: POINTER + pointee_type_id: 0x990bd378 +} pointer_reference { id: 0x2cd31328 kind: POINTER @@ -18158,6 +18188,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x9d41cc1a } +pointer_reference { + id: 0x2dc069c5 + kind: POINTER + pointee_type_id: 0x9d414188 +} pointer_reference { id: 0x2dc1540f kind: POINTER @@ -30688,6 +30723,11 @@ typedef { name: "p4d_t" referred_type_id: 0x148546d4 } +typedef { + id: 0xbad82a2c + name: "parse_endpoint_func" + referred_type_id: 0x2dc069c5 +} typedef { id: 0x8ef19fe7 name: "pci_bus_flags_t" @@ -40287,6 +40327,11 @@ member { type_id: 0x797868f8 offset: 32 } +member { + id: 0x3dbb0f88 + type_id: 0x79c25039 + offset: 2048 +} member { id: 0x3dbd80ff type_id: 0x79d85976 @@ -49114,6 +49159,12 @@ member { name: "array" type_id: 0x38d23361 } +member { + id: 0xdfa4a7f6 + name: "array" + type_id: 0x030b9acf + offset: 64 +} member { id: 0xdfcad7c4 name: "array" @@ -52153,6 +52204,11 @@ member { name: "base" type_id: 0x180f82e8 } +member { + id: 0x85d2e2e4 + name: "base" + type_id: 0x080c6fc2 +} member { id: 0x85d6188a name: "base" @@ -56383,6 +56439,12 @@ member { name: "bus" type_id: 0x2309ad3e } +member { + id: 0xdaf846cc + name: "bus" + type_id: 0x286a95aa + offset: 160 +} member { id: 0x1639ef00 name: "bus_cleanup" @@ -56612,6 +56674,12 @@ member { type_id: 0x945e7ef6 offset: 448 } +member { + id: 0x2c928e64 + name: "bus_type" + type_id: 0x3c57148f + offset: 128 +} member { id: 0xb43c45b4 name: "bus_width" @@ -59742,6 +59810,11 @@ member { type_id: 0x1a8b04e5 offset: 384 } +member { + id: 0x15741053 + name: "chain" + type_id: 0x030b9acf +} member { id: 0x15798222 name: "chain" @@ -61356,6 +61429,11 @@ member { type_id: 0xe62ebf07 offset: 128 } +member { + id: 0x86c22df0 + name: "class" + type_id: 0x83714889 +} member { id: 0x86f2bb02 name: "class" @@ -64770,6 +64848,12 @@ member { type_id: 0x2d7be27a offset: 448 } +member { + id: 0x73c32817 + name: "configure" + type_id: 0x2cd2cd79 + offset: 1280 +} member { id: 0x73c333e5 name: "configure" @@ -100848,6 +100932,11 @@ member { type_id: 0x24cb3ae4 offset: 896 } +member { + id: 0xbcc50199 + name: "host_stage2_mod_prot_range" + type_id: 0x24c218d7 +} member { id: 0xedc7b540 name: "host_status" @@ -116539,6 +116628,12 @@ member { name: "link_fd" type_id: 0xe62ebf07 } +member { + id: 0x6075ccdc + name: "link_frequencies" + type_id: 0x2e18f543 + offset: 512 +} member { id: 0x178cf8a4 name: "link_gen" @@ -126751,6 +126846,18 @@ member { name: "mipi_csi1" type_id: 0xe49bfc8b } +member { + id: 0xa7e5d7c1 + name: "mipi_csi1" + type_id: 0xe49bfc8b + offset: 64 +} +member { + id: 0xeda56411 + name: "mipi_csi2" + type_id: 0xe72f0de6 + offset: 128 +} member { id: 0xeda56dd3 name: "mipi_csi2" @@ -136063,6 +136170,12 @@ member { type_id: 0xe62ebf07 offset: 672 } +member { + id: 0x4519d21b + name: "nr_of_link_frequencies" + type_id: 0x4585663f + offset: 576 +} member { id: 0x9c6b34f7 name: "nr_off" @@ -162486,6 +162599,12 @@ member { type_id: 0x2da2fbac offset: 704 } +member { + id: 0xb48f08f4 + name: "remove" + type_id: 0x2cd2cd79 + offset: 1344 +} member { id: 0xb48fbf27 name: "remove" @@ -175428,6 +175547,12 @@ member { type_id: 0x2cdc0ac8 offset: 9088 } +member { + id: 0x84e59dd8 + name: "setup" + type_id: 0x2cd2cd79 + offset: 1216 +} member { id: 0x84e68e26 name: "setup" @@ -210963,6 +211088,16 @@ struct_union { member_id: 0x9683f73d } } +struct_union { + id: 0x286a95aa + kind: STRUCT + definition { + bytesize: 40 + member_id: 0xc0bc4db7 + member_id: 0xa7e5d7c1 + member_id: 0xeda56411 + } +} struct_union { id: 0x2880e524 kind: STRUCT @@ -215152,6 +215287,16 @@ struct_union { member_id: 0x3bfa35f3 } } +struct_union { + id: 0x79c25039 + kind: UNION + definition { + bytesize: 8 + member_id: 0xbcc50199 + member_id: 0x27000c61 + member_id: 0x36752b74 + } +} struct_union { id: 0x79d85976 kind: UNION @@ -224600,6 +224745,17 @@ struct_union { member_id: 0x14d0dfac } } +struct_union { + id: 0x07b53c95 + kind: STRUCT + name: "dma_fence_unwrap" + definition { + bytesize: 24 + member_id: 0x15741053 + member_id: 0xdfa4a7f6 + member_id: 0xad7c841b + } +} struct_union { id: 0x7f49bdff kind: STRUCT @@ -247063,7 +247219,7 @@ struct_union { member_id: 0x636da10f member_id: 0x6f066e7f member_id: 0x3afd0925 - member_id: 0x2d0812b0 + member_id: 0x3dbb0f88 member_id: 0x637607e0 member_id: 0xac894cc9 member_id: 0xe0f63db8 @@ -260298,6 +260454,18 @@ struct_union { member_id: 0xba11b0ec } } +struct_union { + id: 0x527612cb + kind: STRUCT + name: "transport_class" + definition { + bytesize: 176 + member_id: 0x86c22df0 + member_id: 0x84e59dd8 + member_id: 0x73c32817 + member_id: 0xb48f08f4 + } +} struct_union { id: 0x626cbe56 kind: STRUCT @@ -265706,6 +265874,19 @@ struct_union { member_id: 0x465224ed } } +struct_union { + id: 0x72d76ebd + kind: STRUCT + name: "v4l2_fwnode_endpoint" + definition { + bytesize: 80 + member_id: 0x85d2e2e4 + member_id: 0x2c928e64 + member_id: 0xdaf846cc + member_id: 0x6075ccdc + member_id: 0x4519d21b + } +} struct_union { id: 0xccd4dc1a kind: STRUCT @@ -282947,6 +283128,57 @@ enumeration { } } } +enumeration { + id: 0xd7ffc9ea + name: "scsi_device_event" + definition { + underlying_type_id: 0x4585663f + enumerator { + name: "SDEV_EVT_MEDIA_CHANGE" + value: 1 + } + enumerator { + name: "SDEV_EVT_INQUIRY_CHANGE_REPORTED" + value: 2 + } + enumerator { + name: "SDEV_EVT_CAPACITY_CHANGE_REPORTED" + value: 3 + } + enumerator { + name: "SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED" + value: 4 + } + enumerator { + name: "SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED" + value: 5 + } + enumerator { + name: "SDEV_EVT_LUN_CHANGE_REPORTED" + value: 6 + } + enumerator { + name: "SDEV_EVT_ALUA_STATE_CHANGE_REPORTED" + value: 7 + } + enumerator { + name: "SDEV_EVT_POWER_ON_RESET_OCCURRED" + value: 8 + } + enumerator { + name: "SDEV_EVT_FIRST" + value: 1 + } + enumerator { + name: "SDEV_EVT_LAST" + value: 8 + } + enumerator { + name: "SDEV_EVT_MAXBITS" + value: 9 + } + } +} enumeration { id: 0xdf9e95f6 name: "scsi_device_state" @@ -286898,6 +287130,13 @@ enumeration { } } } +function { + id: 0x003279c7 + return_type_id: 0x3c2dd1ca + parameter_id: 0x3cfe7778 + parameter_id: 0x0490bb4a + parameter_id: 0x4585663f +} function { id: 0x004cf563 return_type_id: 0x48b5725f @@ -291511,6 +291750,11 @@ function { parameter_id: 0x14528516 parameter_id: 0x2712b6f9 } +function { + id: 0x15112911 + return_type_id: 0x48b5725f + parameter_id: 0x1625e208 +} function { id: 0x151457b1 return_type_id: 0xd5cc9c9a @@ -292922,6 +293166,11 @@ function { parameter_id: 0x1d44326e parameter_id: 0x34d3469d } +function { + id: 0x171b3ed6 + return_type_id: 0x48b5725f + parameter_id: 0x1e0dbd15 +} function { id: 0x171c8621 return_type_id: 0xd5cc9c9a @@ -294723,6 +294972,13 @@ function { parameter_id: 0x1259e377 parameter_id: 0xe276adef } +function { + id: 0x1aa483a8 + return_type_id: 0x48b5725f + parameter_id: 0x257d12af + parameter_id: 0xd7ffc9ea + parameter_id: 0xf1a6dfed +} function { id: 0x1aa56a0d return_type_id: 0x48b5725f @@ -298379,6 +298635,11 @@ function { parameter_id: 0x3e10b518 parameter_id: 0x0bb0c019 } +function { + id: 0x1f821b4c + return_type_id: 0x48b5725f + parameter_id: 0x3c692b7e +} function { id: 0x1f835b6f return_type_id: 0x48b5725f @@ -298394,6 +298655,12 @@ function { parameter_id: 0x3e10b518 parameter_id: 0xa52a0930 } +function { + id: 0x1f84fe6e + return_type_id: 0x48b5725f + parameter_id: 0x3f949c69 + parameter_id: 0x3e6239e1 +} function { id: 0x1f85d3ef return_type_id: 0x48b5725f @@ -298484,6 +298751,11 @@ function { parameter_id: 0x3c2755a3 parameter_id: 0x0cbf60eb } +function { + id: 0x1fa7cc4d + return_type_id: 0x48b5725f + parameter_id: 0x3cfe7778 +} function { id: 0x1fa8b2bc return_type_id: 0x48b5725f @@ -302438,6 +302710,11 @@ function { parameter_id: 0xe276adef parameter_id: 0xc93e017b } +function { + id: 0x62985582 + return_type_id: 0x34cf6c51 + parameter_id: 0x3f949c69 +} function { id: 0x62b8d7ec return_type_id: 0x09427c40 @@ -311268,6 +311545,14 @@ function { return_type_id: 0x6720d32f parameter_id: 0x21069feb } +function { + id: 0x95c2268d + return_type_id: 0xf435685e + parameter_id: 0x0483e6f8 + parameter_id: 0xf435685e + parameter_id: 0x384c5795 + parameter_id: 0xf435685e +} function { id: 0x95c3652e return_type_id: 0x6720d32f @@ -312483,6 +312768,12 @@ function { parameter_id: 0xf1a6dfed parameter_id: 0x0292b875 } +function { + id: 0x97a1ddd3 + return_type_id: 0x6720d32f + parameter_id: 0x257d12af + parameter_id: 0xdf9e95f6 +} function { id: 0x97a3c07a return_type_id: 0x6720d32f @@ -314033,6 +314324,13 @@ function { parameter_id: 0x6720d32f parameter_id: 0x064d6086 } +function { + id: 0x990bd378 + return_type_id: 0x6720d32f + parameter_id: 0x120b1632 + parameter_id: 0x0258f96e + parameter_id: 0x0258f96e +} function { id: 0x99132caa return_type_id: 0x6720d32f @@ -315108,6 +315406,11 @@ function { parameter_id: 0x1d19a9d5 parameter_id: 0x310ec01d } +function { + id: 0x9a038c6a + return_type_id: 0x6720d32f + parameter_id: 0x1e0dbd15 +} function { id: 0x9a03c4d6 return_type_id: 0x6720d32f @@ -315308,6 +315611,14 @@ function { parameter_id: 0x6720d32f parameter_id: 0x3c2755a3 } +function { + id: 0x9a2ab624 + return_type_id: 0x6720d32f + parameter_id: 0x18bd6530 + parameter_id: 0x6720d32f + parameter_id: 0x6720d32f + parameter_id: 0x11cfee5a +} function { id: 0x9a2abc7b return_type_id: 0x6720d32f @@ -316987,6 +317298,12 @@ function { parameter_id: 0x064d6086 parameter_id: 0x064d6086 } +function { + id: 0x9b32d0a3 + return_type_id: 0x6720d32f + parameter_id: 0x18bd6530 + parameter_id: 0x275ab027 +} function { id: 0x9b32f2ad return_type_id: 0x6720d32f @@ -318689,6 +319006,13 @@ function { parameter_id: 0x3e10b518 parameter_id: 0x33756485 } +function { + id: 0x9ba47dcc + return_type_id: 0x6720d32f + parameter_id: 0x18bd6530 + parameter_id: 0x0483e6f8 + parameter_id: 0x6720d32f +} function { id: 0x9ba4eebd return_type_id: 0x6720d32f @@ -320477,6 +320801,15 @@ function { parameter_id: 0x6720d32f parameter_id: 0x92233392 } +function { + id: 0x9c09446b + return_type_id: 0x6720d32f + parameter_id: 0x00c72527 + parameter_id: 0x6720d32f + parameter_id: 0x914dbfdc + parameter_id: 0x295c7202 + parameter_id: 0x6d7f5ff6 +} function { id: 0x9c09d6aa return_type_id: 0x6720d32f @@ -321348,6 +321681,12 @@ function { parameter_id: 0x04b193cc parameter_id: 0x0335a07f } +function { + id: 0x9ca0dc77 + return_type_id: 0x6720d32f + parameter_id: 0x074f1a14 + parameter_id: 0x3cfe7778 +} function { id: 0x9ca1921c return_type_id: 0x6720d32f @@ -321812,6 +322151,12 @@ function { parameter_id: 0x054f691a parameter_id: 0x0aa1f0ee } +function { + id: 0x9cfc5a75 + return_type_id: 0x6720d32f + parameter_id: 0x0490bb4a + parameter_id: 0x1625e208 +} function { id: 0x9cfd713b return_type_id: 0x6720d32f @@ -321834,6 +322179,12 @@ function { parameter_id: 0x02ed0755 parameter_id: 0x0e68dab6 } +function { + id: 0x9d027320 + return_type_id: 0x6720d32f + parameter_id: 0x01c5a749 + parameter_id: 0x3cfe7778 +} function { id: 0x9d038726 return_type_id: 0x6720d32f @@ -322382,6 +322733,13 @@ function { parameter_id: 0x0258f96e parameter_id: 0x15f20052 } +function { + id: 0x9d414188 + return_type_id: 0x6720d32f + parameter_id: 0x0258f96e + parameter_id: 0x1625e208 + parameter_id: 0x3c2dd1ca +} function { id: 0x9d419277 return_type_id: 0x6720d32f @@ -323502,6 +323860,14 @@ function { parameter_id: 0x33756485 parameter_id: 0x064d6086 } +function { + id: 0x9ddac293 + return_type_id: 0x6720d32f + parameter_id: 0x0258f96e + parameter_id: 0x3cfe7778 + parameter_id: 0xf435685e + parameter_id: 0xbad82a2c +} function { id: 0x9ddaf106 return_type_id: 0x6720d32f @@ -325937,6 +326303,11 @@ function { parameter_id: 0x0cf3d8fe parameter_id: 0x4585663f } +function { + id: 0x9faad4c6 + return_type_id: 0x6720d32f + parameter_id: 0x08a8dfa4 +} function { id: 0x9fab680a return_type_id: 0x6720d32f @@ -327594,6 +327965,13 @@ function { parameter_id: 0x18bd6530 parameter_id: 0x310ec01d } +function { + id: 0xb94885c2 + return_type_id: 0x6720d32f + parameter_id: 0x92233392 + parameter_id: 0x1908b154 + parameter_id: 0x92233392 +} function { id: 0xb94d0c8b return_type_id: 0x06835e9c @@ -327602,6 +327980,14 @@ function { parameter_id: 0x4585663f parameter_id: 0x6d7f5ff6 } +function { + id: 0xb94f7fed + return_type_id: 0x06835e9c + parameter_id: 0x1023f4f6 + parameter_id: 0x33756485 + parameter_id: 0x4585663f + parameter_id: 0xf1a6dfed +} function { id: 0xb957d705 return_type_id: 0x6720d32f @@ -328584,6 +328970,13 @@ function { parameter_id: 0xc9082b19 parameter_id: 0xc9082b19 } +function { + id: 0xce0d9e89 + return_type_id: 0x030b9acf + parameter_id: 0x4585663f + parameter_id: 0x0a52df14 + parameter_id: 0x0b7d7682 +} function { id: 0xce0dc24b return_type_id: 0x4585663f @@ -328919,6 +329312,13 @@ function { return_type_id: 0x02eb105a parameter_id: 0x3e10b518 } +function { + id: 0xd981a35c + return_type_id: 0x3f0185ef + parameter_id: 0x3f0185ef + parameter_id: 0x6720d32f + parameter_id: 0x6720d32f +} function { id: 0xd9bb2b92 return_type_id: 0x4585663f @@ -329006,6 +329406,11 @@ function { parameter_id: 0x1e9745d3 parameter_id: 0x1e9745d3 } +function { + id: 0xdd980e87 + return_type_id: 0x030b9acf + parameter_id: 0x0b7d7682 +} function { id: 0xddb49ff7 return_type_id: 0x3ae3ff84 @@ -329165,6 +329570,12 @@ function { return_type_id: 0x030b9acf parameter_id: 0x030b9acf } +function { + id: 0xdfa8404e + return_type_id: 0x030b9acf + parameter_id: 0x030b9acf + parameter_id: 0x0b7d7682 +} function { id: 0xdfba2774 return_type_id: 0x4585663f @@ -331936,6 +332347,15 @@ elf_symbol { type_id: 0x9b8e2bf2 full_name: "__clocksource_register_scale" } +elf_symbol { + id: 0xc7d06fb9 + name: "__cma_alloc" + is_defined: true + symbol_type: FUNCTION + crc: 0x5dbf1ea9 + type_id: 0xb94f7fed + full_name: "__cma_alloc" +} elf_symbol { id: 0xac1ff1ce name: "__const_udelay" @@ -332323,6 +332743,15 @@ elf_symbol { type_id: 0x4058e56a full_name: "__devres_alloc_node" } +elf_symbol { + id: 0xfa3b077f + name: "__dma_fence_unwrap_merge" + is_defined: true + symbol_type: FUNCTION + crc: 0xd88defca + type_id: 0xce0d9e89 + full_name: "__dma_fence_unwrap_merge" +} elf_symbol { id: 0x0a6e3e89 name: "__dma_request_channel" @@ -334214,6 +334643,15 @@ elf_symbol { type_id: 0xa017504e full_name: "__scsi_device_lookup_by_target" } +elf_symbol { + id: 0xe18b6ee8 + name: "__scsi_format_command" + is_defined: true + symbol_type: FUNCTION + crc: 0x93022ba6 + type_id: 0x95c2268d + full_name: "__scsi_format_command" +} elf_symbol { id: 0x0166be18 name: "__scsi_iterate_devices" @@ -336464,6 +336902,15 @@ elf_symbol { type_id: 0x9bcd4ff7 full_name: "__traceiter_android_vh_encrypt_page" } +elf_symbol { + id: 0x7f1591a1 + name: "__traceiter_android_vh_ep_create_wakeup_source" + is_defined: true + symbol_type: FUNCTION + crc: 0x1e8ed582 + type_id: 0x9ba47dcc + full_name: "__traceiter_android_vh_ep_create_wakeup_source" +} elf_symbol { id: 0x1921d10d name: "__traceiter_android_vh_exit_check" @@ -337229,6 +337676,15 @@ elf_symbol { type_id: 0x9b3343fb full_name: "__traceiter_android_vh_ra_tuning_max_page" } +elf_symbol { + id: 0xb35da0ec + name: "__traceiter_android_vh_read_pages" + is_defined: true + symbol_type: FUNCTION + crc: 0x4cb21384 + type_id: 0x9b32d0a3 + full_name: "__traceiter_android_vh_read_pages" +} elf_symbol { id: 0x7d069e91 name: "__traceiter_android_vh_record_mutex_lock_starttime" @@ -337337,6 +337793,15 @@ elf_symbol { type_id: 0x9b08a261 full_name: "__traceiter_android_vh_rproc_recovery_set" } +elf_symbol { + id: 0xd56fbf76 + name: "__traceiter_android_vh_rt_mutex_steal" + is_defined: true + symbol_type: FUNCTION + crc: 0xf0a6d2df + type_id: 0x9a2ab624 + full_name: "__traceiter_android_vh_rt_mutex_steal" +} elf_symbol { id: 0x3ef508a2 name: "__traceiter_android_vh_rtmutex_wait_finish" @@ -337760,6 +338225,15 @@ elf_symbol { type_id: 0x9ab83ca3 full_name: "__traceiter_android_vh_timer_calc_index" } +elf_symbol { + id: 0x641d703d + name: "__traceiter_android_vh_timerfd_create" + is_defined: true + symbol_type: FUNCTION + crc: 0x8c68d59c + type_id: 0x9ba47dcc + full_name: "__traceiter_android_vh_timerfd_create" +} elf_symbol { id: 0x2bc25325 name: "__traceiter_android_vh_try_to_freeze_todo" @@ -340442,6 +340916,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_encrypt_page" } +elf_symbol { + id: 0xdef7c547 + name: "__tracepoint_android_vh_ep_create_wakeup_source" + is_defined: true + symbol_type: OBJECT + crc: 0x7db48833 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_ep_create_wakeup_source" +} elf_symbol { id: 0x684e5f4f name: "__tracepoint_android_vh_exit_check" @@ -341207,6 +341690,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_ra_tuning_max_page" } +elf_symbol { + id: 0x9fc2933e + name: "__tracepoint_android_vh_read_pages" + is_defined: true + symbol_type: OBJECT + crc: 0xb3878023 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_read_pages" +} elf_symbol { id: 0x761f292f name: "__tracepoint_android_vh_record_mutex_lock_starttime" @@ -341315,6 +341807,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_rproc_recovery_set" } +elf_symbol { + id: 0xed43b088 + name: "__tracepoint_android_vh_rt_mutex_steal" + is_defined: true + symbol_type: OBJECT + crc: 0xdc6b8d43 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_rt_mutex_steal" +} elf_symbol { id: 0xa3915d70 name: "__tracepoint_android_vh_rtmutex_wait_finish" @@ -341738,6 +342239,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_timer_calc_index" } +elf_symbol { + id: 0x2df766e3 + name: "__tracepoint_android_vh_timerfd_create" + is_defined: true + symbol_type: OBJECT + crc: 0x181a4352 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_timerfd_create" +} elf_symbol { id: 0xd9d2bcff name: "__tracepoint_android_vh_try_to_freeze_todo" @@ -342674,6 +343184,24 @@ elf_symbol { type_id: 0x20cd94dc full_name: "__usecs_to_jiffies" } +elf_symbol { + id: 0xf51d746f + name: "__v4l2_async_nf_add_fwnode" + is_defined: true + symbol_type: FUNCTION + crc: 0x03599cac + type_id: 0x003279c7 + full_name: "__v4l2_async_nf_add_fwnode" +} +elf_symbol { + id: 0xe13e16ca + name: "__v4l2_async_nf_add_fwnode_remote" + is_defined: true + symbol_type: FUNCTION + crc: 0x82966749 + type_id: 0x003279c7 + full_name: "__v4l2_async_nf_add_fwnode_remote" +} elf_symbol { id: 0x4c0a941a name: "__v4l2_ctrl_handler_setup" @@ -344058,6 +344586,24 @@ elf_symbol { type_id: 0x9048c0ea full_name: "atomic_notifier_chain_unregister" } +elf_symbol { + id: 0x41765c03 + name: "attribute_container_register" + is_defined: true + symbol_type: FUNCTION + crc: 0x167c84c3 + type_id: 0x9faad4c6 + full_name: "attribute_container_register" +} +elf_symbol { + id: 0xcd05507b + name: "attribute_container_unregister" + is_defined: true + symbol_type: FUNCTION + crc: 0x15baabca + type_id: 0x9faad4c6 + full_name: "attribute_container_unregister" +} elf_symbol { id: 0x82786c66 name: "autoremove_wake_function" @@ -353807,6 +354353,24 @@ elf_symbol { type_id: 0x1023f4f6 full_name: "dma_contiguous_default_area" } +elf_symbol { + id: 0x279bd3a7 + name: "dma_direct_alloc" + is_defined: true + symbol_type: FUNCTION + crc: 0xb02b3af0 + type_id: 0x55df36a2 + full_name: "dma_direct_alloc" +} +elf_symbol { + id: 0x0e847130 + name: "dma_direct_free" + is_defined: true + symbol_type: FUNCTION + crc: 0x051debf6 + type_id: 0x13db1955 + full_name: "dma_direct_free" +} elf_symbol { id: 0xd13969dd name: "dma_fence_add_callback" @@ -353978,6 +354542,24 @@ elf_symbol { type_id: 0x9d05158e full_name: "dma_fence_signal_timestamp_locked" } +elf_symbol { + id: 0x2012ba51 + name: "dma_fence_unwrap_first" + is_defined: true + symbol_type: FUNCTION + crc: 0xc3cd6929 + type_id: 0xdfa8404e + full_name: "dma_fence_unwrap_first" +} +elf_symbol { + id: 0xf5f318e9 + name: "dma_fence_unwrap_next" + is_defined: true + symbol_type: FUNCTION + crc: 0xd13e4af8 + type_id: 0xdd980e87 + full_name: "dma_fence_unwrap_next" +} elf_symbol { id: 0xf18ac584 name: "dma_fence_wait_any_timeout" @@ -358829,6 +359411,15 @@ elf_symbol { type_id: 0x10985193 full_name: "dump_stack" } +elf_symbol { + id: 0x652fbf96 + name: "dump_tasks" + is_defined: true + symbol_type: FUNCTION + crc: 0x6fe3e49b + type_id: 0x1f821b4c + full_name: "dump_tasks" +} elf_symbol { id: 0xda364c85 name: "dw_handle_msi_irq" @@ -364763,6 +365354,15 @@ elf_symbol { type_id: 0x16dc304e full_name: "iio_trigger_unregister" } +elf_symbol { + id: 0xfb09b362 + name: "iio_trigger_using_own" + is_defined: true + symbol_type: FUNCTION + crc: 0xe2c1359e + type_id: 0xf886bca4 + full_name: "iio_trigger_using_own" +} elf_symbol { id: 0xdf3e8655 name: "iio_update_buffers" @@ -372329,6 +372929,15 @@ elf_symbol { type_id: 0xfcd23386 full_name: "ns_capable" } +elf_symbol { + id: 0x27a870d1 + name: "ns_capable_noaudit" + is_defined: true + symbol_type: FUNCTION + crc: 0x3c75b0e0 + type_id: 0xfcd23386 + full_name: "ns_capable_noaudit" +} elf_symbol { id: 0xf68f8b33 name: "ns_to_kernel_old_timeval" @@ -374924,6 +375533,15 @@ elf_symbol { type_id: 0x93acae9b full_name: "pci_host_probe" } +elf_symbol { + id: 0xec0d5441 + name: "pci_intx" + is_defined: true + symbol_type: FUNCTION + crc: 0x4e899f5e + type_id: 0x157d734c + full_name: "pci_intx" +} elf_symbol { id: 0x9c6c58ea name: "pci_iomap" @@ -375464,6 +376082,15 @@ elf_symbol { type_id: 0x986a45dd full_name: "pcim_iomap_regions" } +elf_symbol { + id: 0xae61b91f + name: "pcim_iomap_regions_request_all" + is_defined: true + symbol_type: FUNCTION + crc: 0xd111489f + type_id: 0x986a45dd + full_name: "pcim_iomap_regions_request_all" +} elf_symbol { id: 0xc37c9a74 name: "pcim_iomap_table" @@ -375482,6 +376109,15 @@ elf_symbol { type_id: 0x157d734c full_name: "pcim_iounmap_regions" } +elf_symbol { + id: 0xfa9dbeca + name: "pcim_pin_device" + is_defined: true + symbol_type: FUNCTION + crc: 0xfdffeca6 + type_id: 0x14e1f000 + full_name: "pcim_pin_device" +} elf_symbol { id: 0x123cd197 name: "pcpu_nr_pages" @@ -380756,6 +381392,15 @@ elf_symbol { type_id: 0x1a9c8a01 full_name: "reset_control_put" } +elf_symbol { + id: 0x642147cd + name: "reset_control_rearm" + is_defined: true + symbol_type: FUNCTION + crc: 0x5d2bc42a + type_id: 0x978438bd + full_name: "reset_control_rearm" +} elf_symbol { id: 0x9c7a2d6c name: "reset_control_release" @@ -382205,6 +382850,15 @@ elf_symbol { type_id: 0x954324c8 full_name: "scsi_change_queue_depth" } +elf_symbol { + id: 0xebec291e + name: "scsi_check_sense" + is_defined: true + symbol_type: FUNCTION + crc: 0x05404117 + type_id: 0x62985582 + full_name: "scsi_check_sense" +} elf_symbol { id: 0xe4036f2e name: "scsi_cmd_allowed" @@ -382250,6 +382904,33 @@ elf_symbol { type_id: 0x19c71538 full_name: "scsi_device_put" } +elf_symbol { + id: 0x61df84bc + name: "scsi_device_quiesce" + is_defined: true + symbol_type: FUNCTION + crc: 0x0daef571 + type_id: 0x94dfa784 + full_name: "scsi_device_quiesce" +} +elf_symbol { + id: 0x054c0bba + name: "scsi_device_resume" + is_defined: true + symbol_type: FUNCTION + crc: 0x069ea5a4 + type_id: 0x19c71538 + full_name: "scsi_device_resume" +} +elf_symbol { + id: 0x55968d64 + name: "scsi_device_set_state" + is_defined: true + symbol_type: FUNCTION + crc: 0x3e16b971 + type_id: 0x97a1ddd3 + full_name: "scsi_device_set_state" +} elf_symbol { id: 0xf10245da name: "scsi_dma_map" @@ -382277,6 +382958,24 @@ elf_symbol { type_id: 0x1f7d7689 full_name: "scsi_done" } +elf_symbol { + id: 0xb77321e1 + name: "scsi_eh_finish_cmd" + is_defined: true + symbol_type: FUNCTION + crc: 0x8e5f7b03 + type_id: 0x1f84fe6e + full_name: "scsi_eh_finish_cmd" +} +elf_symbol { + id: 0xe584e576 + name: "scsi_eh_flush_done_q" + is_defined: true + symbol_type: FUNCTION + crc: 0xf811e69d + type_id: 0x1f00dfeb + full_name: "scsi_eh_flush_done_q" +} elf_symbol { id: 0x8ef5c221 name: "scsi_execute_cmd" @@ -382403,6 +383102,15 @@ elf_symbol { type_id: 0x14f27dac full_name: "scsi_report_bus_reset" } +elf_symbol { + id: 0x24093af7 + name: "scsi_rescan_device" + is_defined: true + symbol_type: FUNCTION + crc: 0x83fa9f1b + type_id: 0x94dfa784 + full_name: "scsi_rescan_device" +} elf_symbol { id: 0xc9021692 name: "scsi_scan_host" @@ -382412,6 +383120,33 @@ elf_symbol { type_id: 0x156efee0 full_name: "scsi_scan_host" } +elf_symbol { + id: 0x51e78cea + name: "scsi_schedule_eh" + is_defined: true + symbol_type: FUNCTION + crc: 0xd78a6752 + type_id: 0x156efee0 + full_name: "scsi_schedule_eh" +} +elf_symbol { + id: 0x9489f8a9 + name: "scsi_sense_desc_find" + is_defined: true + symbol_type: FUNCTION + crc: 0x10d9f885 + type_id: 0xd981a35c + full_name: "scsi_sense_desc_find" +} +elf_symbol { + id: 0x494ae459 + name: "scsi_set_sense_field_pointer" + is_defined: true + symbol_type: FUNCTION + crc: 0x3ab7b1cc + type_id: 0x9c09446b + full_name: "scsi_set_sense_field_pointer" +} elf_symbol { id: 0xcf17c9a6 name: "scsi_set_sense_information" @@ -382430,6 +383165,15 @@ elf_symbol { type_id: 0x156efee0 full_name: "scsi_unblock_requests" } +elf_symbol { + id: 0xe6808261 + name: "sdev_evt_send_simple" + is_defined: true + symbol_type: FUNCTION + crc: 0x1727f774 + type_id: 0x1aa483a8 + full_name: "sdev_evt_send_simple" +} elf_symbol { id: 0x771aea1d name: "sdev_prefix_printk" @@ -388068,6 +388812,15 @@ elf_symbol { type_id: 0x4585663f full_name: "sysctl_sched_features" } +elf_symbol { + id: 0xe6ea21b1 + name: "sysctl_sched_idle_min_granularity" + is_defined: true + symbol_type: OBJECT + crc: 0x69545cfa + type_id: 0x4585663f + full_name: "sysctl_sched_idle_min_granularity" +} elf_symbol { id: 0x87812861 name: "sysctl_sched_latency" @@ -388077,6 +388830,15 @@ elf_symbol { type_id: 0x4585663f full_name: "sysctl_sched_latency" } +elf_symbol { + id: 0x34555a8a + name: "sysctl_sched_min_granularity" + is_defined: true + symbol_type: OBJECT + crc: 0x04390257 + type_id: 0x4585663f + full_name: "sysctl_sched_min_granularity" +} elf_symbol { id: 0x18d0dd21 name: "sysctl_vals" @@ -388329,6 +389091,15 @@ elf_symbol { type_id: 0x599826a1 full_name: "system_32bit_el0_cpumask" } +elf_symbol { + id: 0x991b4bfd + name: "system_entering_hibernation" + is_defined: true + symbol_type: FUNCTION + crc: 0x13f42152 + type_id: 0xfea45b04 + full_name: "system_entering_hibernation" +} elf_symbol { id: 0xb5701f35 name: "system_freezable_power_efficient_wq" @@ -389364,6 +390135,69 @@ elf_symbol { type_id: 0x10985193 full_name: "tracing_off" } +elf_symbol { + id: 0x8f8403dc + name: "transport_add_device" + is_defined: true + symbol_type: FUNCTION + crc: 0x9d7e8343 + type_id: 0x9d16dd74 + full_name: "transport_add_device" +} +elf_symbol { + id: 0x5911125b + name: "transport_class_register" + is_defined: true + symbol_type: FUNCTION + crc: 0x071cb3f2 + type_id: 0x9a038c6a + full_name: "transport_class_register" +} +elf_symbol { + id: 0x113cbc59 + name: "transport_class_unregister" + is_defined: true + symbol_type: FUNCTION + crc: 0xce941924 + type_id: 0x171b3ed6 + full_name: "transport_class_unregister" +} +elf_symbol { + id: 0x7640c32b + name: "transport_configure_device" + is_defined: true + symbol_type: FUNCTION + crc: 0x106dd54f + type_id: 0x100e6fc8 + full_name: "transport_configure_device" +} +elf_symbol { + id: 0xc0be90d8 + name: "transport_destroy_device" + is_defined: true + symbol_type: FUNCTION + crc: 0x1870a351 + type_id: 0x100e6fc8 + full_name: "transport_destroy_device" +} +elf_symbol { + id: 0x09f20ac9 + name: "transport_remove_device" + is_defined: true + symbol_type: FUNCTION + crc: 0xcd97ee1a + type_id: 0x100e6fc8 + full_name: "transport_remove_device" +} +elf_symbol { + id: 0xd75a472d + name: "transport_setup_device" + is_defined: true + symbol_type: FUNCTION + crc: 0x66ba89d2 + type_id: 0x100e6fc8 + full_name: "transport_setup_device" +} elf_symbol { id: 0x3f07269b name: "truncate_inode_pages" @@ -393990,6 +394824,87 @@ elf_symbol { type_id: 0x927d452a full_name: "uuid_parse" } +elf_symbol { + id: 0x4e2f55da + name: "v4l2_async_nf_cleanup" + is_defined: true + symbol_type: FUNCTION + crc: 0xdad12cba + type_id: 0x1fa7cc4d + full_name: "v4l2_async_nf_cleanup" +} +elf_symbol { + id: 0x04aadf7f + name: "v4l2_async_nf_init" + is_defined: true + symbol_type: FUNCTION + crc: 0xc88abf32 + type_id: 0x1fa7cc4d + full_name: "v4l2_async_nf_init" +} +elf_symbol { + id: 0x7920fabe + name: "v4l2_async_nf_parse_fwnode_endpoints" + is_defined: true + symbol_type: FUNCTION + crc: 0xde590e4b + type_id: 0x9ddac293 + full_name: "v4l2_async_nf_parse_fwnode_endpoints" +} +elf_symbol { + id: 0x48e55006 + name: "v4l2_async_nf_register" + is_defined: true + symbol_type: FUNCTION + crc: 0x8be566ca + type_id: 0x9ca0dc77 + full_name: "v4l2_async_nf_register" +} +elf_symbol { + id: 0x65ffd1d0 + name: "v4l2_async_nf_unregister" + is_defined: true + symbol_type: FUNCTION + crc: 0xc74894f9 + type_id: 0x1fa7cc4d + full_name: "v4l2_async_nf_unregister" +} +elf_symbol { + id: 0x507a9ef5 + name: "v4l2_async_register_subdev" + is_defined: true + symbol_type: FUNCTION + crc: 0x64ab86bc + type_id: 0x9df18afd + full_name: "v4l2_async_register_subdev" +} +elf_symbol { + id: 0x050dd932 + name: "v4l2_async_register_subdev_sensor" + is_defined: true + symbol_type: FUNCTION + crc: 0x61c8f608 + type_id: 0x9df18afd + full_name: "v4l2_async_register_subdev_sensor" +} +elf_symbol { + id: 0x0664687c + name: "v4l2_async_subdev_nf_register" + is_defined: true + symbol_type: FUNCTION + crc: 0x4d890f4b + type_id: 0x9d027320 + full_name: "v4l2_async_subdev_nf_register" +} +elf_symbol { + id: 0xf440f7f1 + name: "v4l2_async_unregister_subdev" + is_defined: true + symbol_type: FUNCTION + crc: 0x2592ea78 + type_id: 0x10e93841 + full_name: "v4l2_async_unregister_subdev" +} elf_symbol { id: 0xf39bae65 name: "v4l2_compat_ioctl32" @@ -394395,6 +395310,33 @@ elf_symbol { type_id: 0x209ae488 full_name: "v4l2_format_info" } +elf_symbol { + id: 0x7ba36329 + name: "v4l2_fwnode_endpoint_alloc_parse" + is_defined: true + symbol_type: FUNCTION + crc: 0x05930b06 + type_id: 0x9cfc5a75 + full_name: "v4l2_fwnode_endpoint_alloc_parse" +} +elf_symbol { + id: 0x2643c2c9 + name: "v4l2_fwnode_endpoint_free" + is_defined: true + symbol_type: FUNCTION + crc: 0xf01d6f06 + type_id: 0x15112911 + full_name: "v4l2_fwnode_endpoint_free" +} +elf_symbol { + id: 0xcb8b4f14 + name: "v4l2_fwnode_endpoint_parse" + is_defined: true + symbol_type: FUNCTION + crc: 0x9dcd6cfe + type_id: 0x9cfc5a75 + full_name: "v4l2_fwnode_endpoint_parse" +} elf_symbol { id: 0x58330374 name: "v4l2_g_parm_cap" @@ -397960,6 +398902,7 @@ interface { symbol_id: 0x6a30419a symbol_id: 0x021741b4 symbol_id: 0x9339caba + symbol_id: 0xc7d06fb9 symbol_id: 0xac1ff1ce symbol_id: 0xba429af2 symbol_id: 0xe495eb53 @@ -398003,6 +398946,7 @@ interface { symbol_id: 0x279e51a3 symbol_id: 0xe78c29b1 symbol_id: 0x95c24824 + symbol_id: 0xfa3b077f symbol_id: 0x0a6e3e89 symbol_id: 0x347a699c symbol_id: 0x27ce6aa1 @@ -398213,6 +399157,7 @@ interface { symbol_id: 0x99aa632e symbol_id: 0xe68925b8 symbol_id: 0x6e3bb1cf + symbol_id: 0xe18b6ee8 symbol_id: 0x0166be18 symbol_id: 0xc5953732 symbol_id: 0x1d4d84d0 @@ -398463,6 +399408,7 @@ interface { symbol_id: 0xdcaa59a3 symbol_id: 0x7ebac47a symbol_id: 0xf586d5b6 + symbol_id: 0x7f1591a1 symbol_id: 0x1921d10d symbol_id: 0x1f554c2a symbol_id: 0x343adff1 @@ -398548,6 +399494,7 @@ interface { symbol_id: 0xf2c39651 symbol_id: 0x93303c51 symbol_id: 0x3a545b61 + symbol_id: 0xb35da0ec symbol_id: 0x7d069e91 symbol_id: 0x0fa39b81 symbol_id: 0x1a91ec8c @@ -398560,6 +399507,7 @@ interface { symbol_id: 0x8d62858f symbol_id: 0xcef5d79f symbol_id: 0x91384eff + symbol_id: 0xd56fbf76 symbol_id: 0x3ef508a2 symbol_id: 0xfb1b8d64 symbol_id: 0xc56d7179 @@ -398607,6 +399555,7 @@ interface { symbol_id: 0x226cc38b symbol_id: 0xeecc1529 symbol_id: 0xfeff2e7f + symbol_id: 0x641d703d symbol_id: 0x2bc25325 symbol_id: 0x0119fc41 symbol_id: 0xd9f43028 @@ -398905,6 +399854,7 @@ interface { symbol_id: 0x54b2cd01 symbol_id: 0x188eab44 symbol_id: 0xe7584e1c + symbol_id: 0xdef7c547 symbol_id: 0x684e5f4f symbol_id: 0x0d418d38 symbol_id: 0x2121385f @@ -398990,6 +399940,7 @@ interface { symbol_id: 0x0e92ee53 symbol_id: 0xb0c197a3 symbol_id: 0x811d5fab + symbol_id: 0x9fc2933e symbol_id: 0x761f292f symbol_id: 0xef7ad117 symbol_id: 0x158c4cfa @@ -399002,6 +399953,7 @@ interface { symbol_id: 0x04365139 symbol_id: 0xd94bc301 symbol_id: 0x3fc5ffc9 + symbol_id: 0xed43b088 symbol_id: 0xa3915d70 symbol_id: 0xf01f02ea symbol_id: 0xeaebbadf @@ -399049,6 +400001,7 @@ interface { symbol_id: 0xa5c71571 symbol_id: 0xfa3284c7 symbol_id: 0x69721329 + symbol_id: 0x2df766e3 symbol_id: 0xd9d2bcff symbol_id: 0x09ba106b symbol_id: 0xf9580976 @@ -399153,6 +400106,8 @@ interface { symbol_id: 0x7c261545 symbol_id: 0xf497de36 symbol_id: 0xf44f6a18 + symbol_id: 0xf51d746f + symbol_id: 0xe13e16ca symbol_id: 0x4c0a941a symbol_id: 0xfc85c168 symbol_id: 0xb6af2644 @@ -399307,6 +400262,8 @@ interface { symbol_id: 0x5f6a1554 symbol_id: 0x3beebbde symbol_id: 0x24064426 + symbol_id: 0x41765c03 + symbol_id: 0xcd05507b symbol_id: 0x82786c66 symbol_id: 0xd772fde3 symbol_id: 0x1abdc14f @@ -400389,6 +401346,8 @@ interface { symbol_id: 0x710f1fc2 symbol_id: 0xaa54a71f symbol_id: 0x5f554bc7 + symbol_id: 0x279bd3a7 + symbol_id: 0x0e847130 symbol_id: 0xd13969dd symbol_id: 0xe2ee283f symbol_id: 0xf5808a3e @@ -400408,6 +401367,8 @@ interface { symbol_id: 0xe2a2feec symbol_id: 0x904cad71 symbol_id: 0x2b7d2f8e + symbol_id: 0x2012ba51 + symbol_id: 0xf5f318e9 symbol_id: 0xf18ac584 symbol_id: 0x7ffe50b7 symbol_id: 0x3b69b427 @@ -400947,6 +401908,7 @@ interface { symbol_id: 0xe09fd784 symbol_id: 0xded28924 symbol_id: 0xe3421d56 + symbol_id: 0x652fbf96 symbol_id: 0xda364c85 symbol_id: 0x68e0756b symbol_id: 0x12cb063e @@ -401606,6 +402568,7 @@ interface { symbol_id: 0x7551a60b symbol_id: 0x08fd4b84 symbol_id: 0xc6d8f246 + symbol_id: 0xfb09b362 symbol_id: 0xdf3e8655 symbol_id: 0x6f2f4bd1 symbol_id: 0xf87ecda4 @@ -402447,6 +403410,7 @@ interface { symbol_id: 0xea37502b symbol_id: 0x0bb7f730 symbol_id: 0xb65e3baf + symbol_id: 0x27a870d1 symbol_id: 0xf68f8b33 symbol_id: 0xfab02ca8 symbol_id: 0xd7668767 @@ -402735,6 +403699,7 @@ interface { symbol_id: 0x9ac8ef20 symbol_id: 0x35c96922 symbol_id: 0xbe6406c3 + symbol_id: 0xec0d5441 symbol_id: 0x9c6c58ea symbol_id: 0x2fefe933 symbol_id: 0x1c994923 @@ -402795,8 +403760,10 @@ interface { symbol_id: 0xffa3ecd1 symbol_id: 0x42595f98 symbol_id: 0xd085753f + symbol_id: 0xae61b91f symbol_id: 0xc37c9a74 symbol_id: 0xd03f3f09 + symbol_id: 0xfa9dbeca symbol_id: 0x123cd197 symbol_id: 0xe57e5e73 symbol_id: 0x8ba9d028 @@ -403383,6 +404350,7 @@ interface { symbol_id: 0x57ee69c1 symbol_id: 0xd76b82b2 symbol_id: 0x30c7b7f4 + symbol_id: 0x642147cd symbol_id: 0x9c7a2d6c symbol_id: 0x48fc2cb6 symbol_id: 0xd41c441b @@ -403544,14 +404512,20 @@ interface { symbol_id: 0xd3148537 symbol_id: 0xb5b25b58 symbol_id: 0x278a6b59 + symbol_id: 0xebec291e symbol_id: 0xe4036f2e symbol_id: 0xd49d7abc symbol_id: 0x76dea2aa symbol_id: 0x14eb95fa symbol_id: 0x474e9bcc + symbol_id: 0x61df84bc + symbol_id: 0x054c0bba + symbol_id: 0x55968d64 symbol_id: 0xf10245da symbol_id: 0x18cbd7f9 symbol_id: 0x30f6b9b1 + symbol_id: 0xb77321e1 + symbol_id: 0xe584e576 symbol_id: 0x8ef5c221 symbol_id: 0x32b196e0 symbol_id: 0x022517f0 @@ -403566,9 +404540,14 @@ interface { symbol_id: 0x42390c70 symbol_id: 0x8deacb1d symbol_id: 0x2e407415 + symbol_id: 0x24093af7 symbol_id: 0xc9021692 + symbol_id: 0x51e78cea + symbol_id: 0x9489f8a9 + symbol_id: 0x494ae459 symbol_id: 0xcf17c9a6 symbol_id: 0x9c54c873 + symbol_id: 0xe6808261 symbol_id: 0x771aea1d symbol_id: 0x8d3c4841 symbol_id: 0xf399cd48 @@ -404196,7 +405175,9 @@ interface { symbol_id: 0x2f857527 symbol_id: 0x3e5f4f82 symbol_id: 0xbf1515af + symbol_id: 0xe6ea21b1 symbol_id: 0x87812861 + symbol_id: 0x34555a8a symbol_id: 0x18d0dd21 symbol_id: 0x92705587 symbol_id: 0xdbe66171 @@ -404225,6 +405206,7 @@ interface { symbol_id: 0xda44819e symbol_id: 0x46cd3193 symbol_id: 0xb6c44fb1 + symbol_id: 0x991b4bfd symbol_id: 0xb5701f35 symbol_id: 0xeeb4dc4c symbol_id: 0x314b4b2e @@ -404340,6 +405322,13 @@ interface { symbol_id: 0x3df2f359 symbol_id: 0x33172d21 symbol_id: 0x54bbaa46 + symbol_id: 0x8f8403dc + symbol_id: 0x5911125b + symbol_id: 0x113cbc59 + symbol_id: 0x7640c32b + symbol_id: 0xc0be90d8 + symbol_id: 0x09f20ac9 + symbol_id: 0xd75a472d symbol_id: 0x3f07269b symbol_id: 0x3c7c6ce9 symbol_id: 0x7a43283c @@ -404854,6 +405843,15 @@ interface { symbol_id: 0xb0c1eaf9 symbol_id: 0xe7b3f166 symbol_id: 0xb21b47da + symbol_id: 0x4e2f55da + symbol_id: 0x04aadf7f + symbol_id: 0x7920fabe + symbol_id: 0x48e55006 + symbol_id: 0x65ffd1d0 + symbol_id: 0x507a9ef5 + symbol_id: 0x050dd932 + symbol_id: 0x0664687c + symbol_id: 0xf440f7f1 symbol_id: 0xf39bae65 symbol_id: 0xfd78bf45 symbol_id: 0x218d39b6 @@ -404899,6 +405897,9 @@ interface { symbol_id: 0xe66642fe symbol_id: 0x538ad5cc symbol_id: 0x2244c8f0 + symbol_id: 0x7ba36329 + symbol_id: 0x2643c2c9 + symbol_id: 0xcb8b4f14 symbol_id: 0x58330374 symbol_id: 0xdb18c924 symbol_id: 0x5e36dba6 diff --git a/android/abi_gki_aarch64_galaxy b/android/abi_gki_aarch64_galaxy index 01f6927b0592..4dfd51b49f67 100644 --- a/android/abi_gki_aarch64_galaxy +++ b/android/abi_gki_aarch64_galaxy @@ -274,6 +274,8 @@ sched_clock sched_show_task scnprintf + scsi_device_quiesce + scsi_device_resume seq_hex_dump seq_lseek seq_printf diff --git a/android/abi_gki_aarch64_imx b/android/abi_gki_aarch64_imx index c3797c477a4c..1b556c8705f6 100644 --- a/android/abi_gki_aarch64_imx +++ b/android/abi_gki_aarch64_imx @@ -1025,6 +1025,7 @@ iio_trigger_poll_chained iio_trigger_register iio_trigger_unregister + iio_trigger_using_own import_iovec in4_pton inet_csk_get_port diff --git a/android/abi_gki_aarch64_oplus b/android/abi_gki_aarch64_oplus index a374dcf65636..8c9846b3a27d 100644 --- a/android/abi_gki_aarch64_oplus +++ b/android/abi_gki_aarch64_oplus @@ -158,6 +158,7 @@ __traceiter_android_vh_dm_bufio_shrink_scan_bypass __traceiter_android_vh_mutex_unlock_slowpath __traceiter_android_vh_rtmutex_waiter_prio + __traceiter_android_vh_rt_mutex_steal __traceiter_android_vh_rwsem_can_spin_on_owner __traceiter_android_vh_rwsem_opt_spin_finish __traceiter_android_vh_rwsem_opt_spin_start @@ -258,6 +259,7 @@ __tracepoint_android_vh_record_rtmutex_lock_starttime __tracepoint_android_vh_record_rwsem_lock_starttime __tracepoint_android_vh_rtmutex_waiter_prio + __tracepoint_android_vh_rt_mutex_steal __tracepoint_android_vh_rwsem_can_spin_on_owner __tracepoint_android_vh_rwsem_opt_spin_finish __tracepoint_android_vh_rwsem_opt_spin_start diff --git a/android/abi_gki_aarch64_pixel b/android/abi_gki_aarch64_pixel index 8da36c314c22..fac865d11e03 100644 --- a/android/abi_gki_aarch64_pixel +++ b/android/abi_gki_aarch64_pixel @@ -3,8 +3,11 @@ add_cpu add_timer add_timer_on + add_uevent_var add_wait_queue adjust_managed_page_count + aes_encrypt + aes_expandkey alarm_cancel alarm_init alarm_start_relative @@ -19,6 +22,7 @@ __alloc_percpu __alloc_percpu_gfp __alloc_skb + alloc_skb_with_frags alloc_workqueue alt_cb_patch_nops amba_bustype @@ -188,6 +192,7 @@ clockevents_config_and_register clocks_calc_mult_shift __clocksource_register_scale + __cma_alloc cma_alloc cma_for_each_area cma_get_name @@ -242,6 +247,7 @@ cpufreq_get_policy cpufreq_policy_transition_delay_us cpufreq_quick_get + cpufreq_quick_get_max cpufreq_register_driver cpufreq_register_governor cpufreq_register_notifier @@ -260,6 +266,7 @@ cpu_hwcaps cpuidle_driver_state_disabled cpuidle_get_driver + cpuidle_governor_latency_req cpu_latency_qos_add_request cpu_latency_qos_remove_request cpu_latency_qos_update_request @@ -275,6 +282,7 @@ cpus_read_lock cpus_read_unlock cpu_subsys + cpu_topology crc32_be crc32_le crc8 @@ -297,6 +305,7 @@ crypto_register_shash crypto_req_done crypto_shash_digest + crypto_shash_final crypto_shash_finup crypto_shash_setkey crypto_shash_update @@ -310,10 +319,12 @@ csum_partial csum_tcpudp_nofold _ctype + datagram_poll deactivate_task debugfs_attr_read debugfs_attr_write debugfs_create_atomic_t + debugfs_create_blob debugfs_create_bool debugfs_create_devm_seqfile debugfs_create_dir @@ -339,6 +350,7 @@ desc_to_gpio destroy_workqueue dev_addr_mod + _dev_alert dev_alloc_name __dev_change_net_namespace dev_close @@ -497,7 +509,11 @@ dev_pm_opp_of_remove_table dev_pm_opp_put dev_pm_opp_set_config + dev_pm_qos_add_notifier + dev_pm_qos_add_request dev_pm_qos_read_value + dev_pm_qos_remove_notifier + dev_pm_qos_remove_request dev_pm_qos_update_request _dev_printk dev_printk_emit @@ -538,20 +554,28 @@ dma_buf_unmap_attachment dma_buf_vmap dma_buf_vunmap + dma_direct_alloc + dma_direct_free dmaengine_unmap_put dma_fence_add_callback dma_fence_array_create + dma_fence_array_ops dma_fence_context_alloc dma_fence_default_wait + dma_fence_enable_sw_signaling dma_fence_get_status dma_fence_init dma_fence_release dma_fence_remove_callback dma_fence_signal dma_fence_signal_locked + dma_fence_unwrap_first + __dma_fence_unwrap_merge + dma_fence_unwrap_next dma_fence_wait_timeout dma_free_attrs dma_free_pages + dma_get_sgtable_attrs dma_get_slave_caps dma_get_slave_channel dma_heap_add @@ -733,6 +757,7 @@ drm_kms_helper_poll_fini drm_kms_helper_poll_init drm_match_cea_mode + __drmm_crtc_alloc_with_planes drmm_kmalloc drmm_mode_config_init drm_mode_config_reset @@ -805,6 +830,7 @@ drm_writeback_signal_completion dump_backtrace dump_stack + dump_tasks dw_handle_msi_irq dw_pcie_find_capability dw_pcie_host_init @@ -856,6 +882,7 @@ find_task_by_vpid find_vma_intersection finish_wait + firmware_request_nowarn flush_dcache_page flush_delayed_work flush_work @@ -1028,6 +1055,7 @@ ida_destroy ida_free idr_alloc + idr_alloc_cyclic idr_destroy idr_find idr_for_each @@ -1047,6 +1075,7 @@ in6_pton in_aton inc_zone_page_state + in_egroup_p inet_csk_get_port init_dummy_netdev init_iova_domain @@ -1056,6 +1085,7 @@ __init_swait_queue_head init_task init_timer_key + init_user_ns init_uts_ns init_wait_entry __init_waitqueue_head @@ -1123,8 +1153,10 @@ io_schedule_timeout iounmap iova_domain_init_rcaches + iov_iter_revert ip_compute_csum ip_send_check + __ipv6_addr_type __irq_alloc_descs __irq_apply_affinity_hint irq_create_mapping_affinity @@ -1156,12 +1188,20 @@ jiffies_to_usecs kasan_flag_enabled kasprintf + kernel_bind + kernel_connect kernel_cpustat + kernel_getsockname kernel_kobj kernel_param_lock kernel_param_unlock + kernel_recvmsg kernel_restart + kernel_sendmsg + kernfs_find_and_get_ns + kernfs_notify kernfs_path_from_node + kernfs_put key_create_or_update key_put keyring_alloc @@ -1184,6 +1224,7 @@ kmalloc_trace kmem_cache_alloc kmem_cache_create + kmem_cache_create_usercopy kmem_cache_destroy kmem_cache_free kmemdup @@ -1198,6 +1239,7 @@ kobject_uevent_env kobj_sysfs_ops krealloc + ksize kstat kstrdup kstrndup @@ -1262,6 +1304,7 @@ __list_del_entry_valid list_sort __local_bh_enable_ip + lock_sock_nested log_abnormal_wakeup_reason log_post_read_mmio log_post_write_mmio @@ -1287,6 +1330,12 @@ memdup_user_nul memmove memparse + mempool_alloc + mempool_alloc_slab + mempool_create + mempool_destroy + mempool_free + mempool_free_slab memremap mem_section memset @@ -1338,6 +1387,7 @@ napi_complete_done napi_disable napi_enable + napi_gro_flush napi_gro_receive __napi_schedule napi_schedule_prep @@ -1354,7 +1404,9 @@ netif_receive_skb netif_receive_skb_list netif_rx + netif_tx_lock netif_tx_stop_all_queues + netif_tx_unlock netif_tx_wake_queue netlink_broadcast __netlink_kernel_create @@ -1381,6 +1433,7 @@ nr_cpu_ids nr_irqs ns_capable + ns_capable_noaudit nsec_to_clock_t ns_to_timespec64 __num_online_cpus @@ -1482,6 +1535,7 @@ panic_notifier_list param_array_ops param_get_int + param_get_string param_ops_bool param_ops_byte param_ops_charp @@ -1490,10 +1544,14 @@ param_ops_string param_ops_uint param_ops_ulong + param_set_copystring + param_set_int pci_alloc_irq_vectors_affinity pci_assign_resource pci_clear_master pci_disable_device + pci_disable_msi + pcie_capability_read_word pci_enable_device pci_enable_wake pci_find_bus @@ -1501,6 +1559,9 @@ pci_find_ext_capability pci_free_irq_vectors pci_get_device + pci_iomap + pci_iounmap + pci_irq_vector pci_load_and_free_saved_state pci_load_saved_state pci_msi_mask_irq @@ -1508,7 +1569,9 @@ pci_read_config_dword pci_read_config_word __pci_register_driver + pci_release_region pci_release_regions + pci_request_region pci_rescan_bus pci_restore_msi_state pci_restore_state @@ -1606,6 +1669,7 @@ __pm_runtime_use_autosuspend __pm_stay_awake pm_stay_awake + pm_system_wakeup pm_wakeup_dev_event pm_wakeup_ws_event power_supply_changed @@ -1640,6 +1704,8 @@ proc_remove proc_set_size proc_symlink + proto_register + proto_unregister pskb_expand_head __pskb_pull_tail ___pskb_trim @@ -1660,7 +1726,9 @@ radix_tree_delete_item radix_tree_gang_lookup radix_tree_insert + radix_tree_iter_delete radix_tree_lookup + radix_tree_next_chunk radix_tree_preload ___ratelimit raw_notifier_call_chain @@ -1668,9 +1736,11 @@ raw_notifier_chain_unregister _raw_read_lock _raw_read_lock_bh + _raw_read_lock_irq _raw_read_lock_irqsave _raw_read_unlock _raw_read_unlock_bh + _raw_read_unlock_irq _raw_read_unlock_irqrestore _raw_spin_lock _raw_spin_lock_bh @@ -1684,9 +1754,11 @@ _raw_spin_unlock_irq _raw_spin_unlock_irqrestore _raw_write_lock + _raw_write_lock_bh _raw_write_lock_irq _raw_write_lock_irqsave _raw_write_unlock + _raw_write_unlock_bh _raw_write_unlock_irq _raw_write_unlock_irqrestore rb_erase @@ -1701,6 +1773,7 @@ rdev_get_drvdata rdev_get_id reboot_mode + refcount_dec_not_one refcount_warn_saturate __refrigerator regcache_cache_only @@ -1718,6 +1791,7 @@ register_netdev register_netdevice register_netdevice_notifier + register_netevent_notifier register_oom_notifier register_pernet_device register_pernet_subsys @@ -1760,11 +1834,13 @@ regulator_notifier_call_chain regulator_put regulator_set_active_discharge_regmap + regulator_set_load regulator_set_voltage regulator_set_voltage_sel_regmap regulator_unregister release_firmware __release_region + release_sock remap_pfn_range remap_vmalloc_range remove_cpu @@ -1865,6 +1941,7 @@ seq_read seq_release seq_release_private + seq_vprintf seq_write set_capacity set_capacity_and_notify @@ -1905,20 +1982,25 @@ single_open single_open_size single_release + sk_alloc skb_add_rx_frag skb_checksum skb_checksum_help skb_clone skb_clone_sk + skb_coalesce_rx_frag skb_complete_wifi_ack skb_copy skb_copy_bits + skb_copy_datagram_iter skb_copy_expand skb_dequeue skb_dequeue_tail skb_ensure_writable + skb_free_datagram __skb_get_hash __skb_gso_segment + __skb_pad skb_pull skb_push skb_put @@ -1926,7 +2008,11 @@ skb_queue_purge skb_queue_tail skb_realloc_headroom + skb_recv_datagram + skb_set_owner_w + skb_store_bits skb_trim + sk_free skip_spaces smp_call_function smp_call_function_single @@ -2003,8 +2089,22 @@ snd_soc_unregister_component snprintf soc_device_register + sock_alloc_send_pskb __sock_create + sock_create_kern + sock_gettstamp + sock_init_data + sock_no_accept + sock_no_listen + sock_no_mmap + sock_no_sendpage + sock_no_shutdown + sock_no_socketpair + sock_queue_rcv_skb_reason + sock_register sock_release + sock_setsockopt + sock_unregister sock_wfree softnet_data sort @@ -2042,6 +2142,7 @@ strcasecmp strcat strchr + strchrnul strcmp strcpy strcspn @@ -2081,7 +2182,9 @@ synchronize_rcu syscon_regmap_lookup_by_phandle sysctl_sched_features + sysctl_sched_idle_min_granularity sysctl_sched_latency + sysctl_sched_min_granularity sysfs_add_file_to_group sysfs_add_link_to_group sysfs_create_file_ns @@ -2149,6 +2252,7 @@ thermal_zone_get_temp thermal_zone_get_zone_by_name thread_group_cputime_adjusted + tick_nohz_get_idle_calls_cpu time64_to_tm topology_update_thermal_pressure _totalram_pages @@ -2208,6 +2312,7 @@ __traceiter_android_vh_dup_task_struct __traceiter_android_vh_early_resume_begin __traceiter_android_vh_enable_thermal_genl_check + __traceiter_android_vh_ep_create_wakeup_source __traceiter_android_vh_filemap_get_folio __traceiter_android_vh_ipi_stop __traceiter_android_vh_meminfo_proc_show @@ -2221,6 +2326,7 @@ __traceiter_android_vh_setscheduler_uclamp __traceiter_android_vh_si_meminfo_adjust __traceiter_android_vh_sysrq_crash + __traceiter_android_vh_timerfd_create __traceiter_android_vh_typec_store_partner_src_caps __traceiter_android_vh_typec_tcpci_override_toggling __traceiter_android_vh_typec_tcpm_get_timer @@ -2315,6 +2421,7 @@ __tracepoint_android_vh_dup_task_struct __tracepoint_android_vh_early_resume_begin __tracepoint_android_vh_enable_thermal_genl_check + __tracepoint_android_vh_ep_create_wakeup_source __tracepoint_android_vh_filemap_get_folio __tracepoint_android_vh_ipi_stop __tracepoint_android_vh_meminfo_proc_show @@ -2328,6 +2435,7 @@ __tracepoint_android_vh_setscheduler_uclamp __tracepoint_android_vh_si_meminfo_adjust __tracepoint_android_vh_sysrq_crash + __tracepoint_android_vh_timerfd_create __tracepoint_android_vh_typec_store_partner_src_caps __tracepoint_android_vh_typec_tcpci_override_toggling __tracepoint_android_vh_typec_tcpm_get_timer @@ -2437,6 +2545,7 @@ unregister_netdevice_many unregister_netdevice_notifier unregister_netdevice_queue + unregister_netevent_notifier unregister_oom_notifier unregister_pernet_device unregister_pernet_subsys @@ -2585,6 +2694,7 @@ vring_del_virtqueue vring_interrupt vring_new_virtqueue + vscnprintf vsnprintf vunmap vzalloc @@ -2592,6 +2702,7 @@ wait_for_completion wait_for_completion_interruptible wait_for_completion_interruptible_timeout + wait_for_completion_killable wait_for_completion_timeout wait_woken __wake_up @@ -2609,6 +2720,7 @@ watchdog_set_restart_priority watchdog_unregister_device wireless_nlevent_flush + wireless_send_event woken_wake_function work_busy __write_overflow_field @@ -2620,11 +2732,13 @@ xa_find xa_find_after xa_get_mark + __xa_insert xa_load xa_set_mark xas_find xas_pause __xa_store + xa_store __xfrm_state_destroy xfrm_state_lookup_byspi xfrm_stateonly_find diff --git a/android/abi_gki_aarch64_rockchip b/android/abi_gki_aarch64_rockchip index 8fdda5ad35fb..a051b3843047 100644 --- a/android/abi_gki_aarch64_rockchip +++ b/android/abi_gki_aarch64_rockchip @@ -2,6 +2,7 @@ # commonly used symbols add_timer alloc_chrdev_region + alloc_etherdev_mqs alloc_iova_fast __alloc_pages __alloc_skb @@ -827,9 +828,25 @@ param_ops_int param_ops_string param_ops_uint + param_ops_ulong + pci_disable_device + pci_disable_link_state pcie_capability_clear_and_set_word + pci_find_capability + pcim_enable_device + pcim_iomap_table + pcim_pin_device + pci_read_config_byte pci_read_config_dword + pci_read_config_word + __pci_register_driver + pci_restore_state + pci_save_state + pci_set_master + pci_set_power_state + pci_unregister_driver pci_write_config_dword + pci_write_config_word __per_cpu_offset perf_trace_buf_alloc perf_trace_run_bpf_submit @@ -1023,7 +1040,11 @@ sched_set_fifo schedule schedule_timeout + schedule_timeout_uninterruptible scnprintf + scsi_command_size_tbl + scsi_device_get + scsi_device_put __sdhci_add_host sdhci_cleanup_host sdhci_enable_clk @@ -1247,6 +1268,15 @@ usb_submit_urb __usecs_to_jiffies usleep_range_state + __v4l2_async_nf_add_fwnode_remote + v4l2_async_nf_cleanup + v4l2_async_nf_init + v4l2_async_nf_parse_fwnode_endpoints + v4l2_async_nf_register + v4l2_async_register_subdev + v4l2_async_register_subdev_sensor + v4l2_async_subdev_nf_register + v4l2_async_unregister_subdev v4l2_ctrl_find v4l2_ctrl_g_ctrl v4l2_ctrl_g_ctrl_int64 @@ -1274,6 +1304,9 @@ v4l2_event_subscribe v4l2_event_unsubscribe v4l2_fh_open + v4l2_fwnode_endpoint_alloc_parse + v4l2_fwnode_endpoint_free + v4l2_fwnode_endpoint_parse v4l2_i2c_subdev_init v4l2_match_dv_timings v4l2_pipeline_link_notify @@ -1325,6 +1358,7 @@ vunmap vzalloc wait_for_completion + wait_for_completion_interruptible wait_for_completion_timeout __wake_up wake_up_process @@ -1346,15 +1380,23 @@ skcipher_walk_aead_decrypt skcipher_walk_aead_encrypt +# required by ahci.ko + pci_alloc_irq_vectors_affinity + pci_free_irq_vectors + pci_intx + pci_irq_vector + pci_match_id + pcim_iomap_regions_request_all + sysfs_add_file_to_group + sysfs_remove_file_from_group + # required by analogix_dp.ko drm_atomic_get_old_connector_for_encoder # required by aspm_ext.ko - pci_find_capability pci_find_ext_capability # required by bcmdhd.ko - alloc_etherdev_mqs cpu_bit_bitmap down_interruptible down_timeout @@ -1873,6 +1915,60 @@ # required by ledtrig-heartbeat.ko avenrun +# required by libahci.ko + __printk_ratelimit + +# required by libahci_platform.ko + reset_control_rearm + +# required by libata.ko + async_schedule_node + async_synchronize_cookie + attribute_container_register + attribute_container_unregister + autoremove_wake_function + blk_abort_request + blk_queue_max_hw_sectors + blk_queue_max_segments + blk_queue_update_dma_alignment + blk_queue_update_dma_pad + glob_match + pci_bus_type + pcim_iomap_regions + prepare_to_wait + __scsi_add_device + scsi_add_host_with_dma + scsi_build_sense + scsi_change_queue_depth + scsi_check_sense + scsi_device_set_state + scsi_done + scsi_eh_finish_cmd + scsi_eh_flush_done_q + scsi_execute_cmd + __scsi_format_command + scsi_host_alloc + scsi_host_put + scsi_remove_device + scsi_remove_host + scsi_rescan_device + scsi_schedule_eh + scsi_sense_desc_find + scsi_set_sense_field_pointer + scsi_set_sense_information + sdev_evt_send_simple + system_entering_hibernation + trace_seq_printf + trace_seq_putc + transport_add_device + transport_class_register + transport_class_unregister + transport_configure_device + transport_destroy_device + transport_remove_device + transport_setup_device + vscnprintf + # required by mac80211.ko alloc_netdev_mqs __alloc_percpu_gfp @@ -2787,9 +2883,11 @@ # required by video_rkcif.ko media_entity_setup_link + __v4l2_async_nf_add_fwnode # required by video_rkisp.ko param_ops_ullong + v4l2_async_nf_unregister v4l2_ctrl_poll # required by videobuf2-cma-sg.ko diff --git a/android/abi_gki_aarch64_xiaomi b/android/abi_gki_aarch64_xiaomi index 1ca73267f242..d502877c9b2c 100644 --- a/android/abi_gki_aarch64_xiaomi +++ b/android/abi_gki_aarch64_xiaomi @@ -341,3 +341,7 @@ #required by zram.ko bioset_init bioset_exit + +#required by mi_asap.ko + __traceiter_android_vh_read_pages + __tracepoint_android_vh_read_pages diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7dafeacab872..ce95f67faa64 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -642,7 +642,6 @@ config ARM64_WORKAROUND_REPEAT_TLBI config ARM64_ERRATUM_2441007 bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" - default y select ARM64_WORKAROUND_REPEAT_TLBI help This option adds a workaround for ARM Cortex-A55 erratum #2441007. @@ -881,7 +880,6 @@ config ARM64_ERRATUM_2224489 config ARM64_ERRATUM_2441009 bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" - default y select ARM64_WORKAROUND_REPEAT_TLBI help This option adds a workaround for ARM Cortex-A510 erratum #2441009. diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index abf7b41cdc85..009675466150 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -123,6 +123,9 @@ CONFIG_ANON_VMA_NAME=y CONFIG_USERFAULTFD=y CONFIG_LRU_GEN=y CONFIG_LRU_GEN_ENABLED=y +CONFIG_DAMON=y +CONFIG_DAMON_VADDR=y +CONFIG_DAMON_SYSFS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -428,6 +431,7 @@ CONFIG_THERMAL_WRITABLE_TRIPS=y CONFIG_THERMAL_GOV_USER_SPACE=y CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y CONFIG_CPU_THERMAL=y +CONFIG_CPU_IDLE_THERMAL=y CONFIG_DEVFREQ_THERMAL=y CONFIG_THERMAL_EMULATION=y CONFIG_WATCHDOG=y @@ -577,6 +581,7 @@ CONFIG_IIO_TRIGGER=y CONFIG_PWM=y CONFIG_GENERIC_PHY=y CONFIG_POWERCAP=y +CONFIG_IDLE_INJECT=y CONFIG_ANDROID_BINDER_IPC=y CONFIG_ANDROID_BINDERFS=y CONFIG_ANDROID_DEBUG_SYMBOLS=y diff --git a/arch/arm64/include/asm/kvm_hypevents.h b/arch/arm64/include/asm/kvm_hypevents.h index 8a2dd41b8569..c507d8978444 100644 --- a/arch/arm64/include/asm/kvm_hypevents.h +++ b/arch/arm64/include/asm/kvm_hypevents.h @@ -53,7 +53,7 @@ HYP_EVENT(host_smc, __entry->id = id; __entry->forwarded = forwarded; ), - HE_PRINTK("id=%llu invalid=%u", + HE_PRINTK("id=%llu forwarded=%u", __entry->id, __entry->forwarded) ); diff --git a/arch/arm64/include/asm/kvm_hypevents_defs.h b/arch/arm64/include/asm/kvm_hypevents_defs.h index e228d894a898..606f3477ecd3 100644 --- a/arch/arm64/include/asm/kvm_hypevents_defs.h +++ b/arch/arm64/include/asm/kvm_hypevents_defs.h @@ -15,10 +15,10 @@ struct hyp_entry_hdr { /* * Hyp events definitions common to the hyp and the host */ -#define HYP_EVENT_FORMAT(__name, __struct) \ - struct trace_hyp_format_##__name { \ - struct hyp_entry_hdr hdr; \ - __struct \ +#define HYP_EVENT_FORMAT(__name, __struct) \ + struct __packed trace_hyp_format_##__name { \ + struct hyp_entry_hdr hdr; \ + __struct \ } #define HE_PROTO(args...) args diff --git a/arch/arm64/include/asm/kvm_pkvm_module.h b/arch/arm64/include/asm/kvm_pkvm_module.h index 5752e1d11abd..b8e5f8064358 100644 --- a/arch/arm64/include/asm/kvm_pkvm_module.h +++ b/arch/arm64/include/asm/kvm_pkvm_module.h @@ -72,6 +72,11 @@ enum pkvm_psci_notification { * @register_host_perm_fault_handler), otherwise * pKVM will be unable to handle this fault and the * CPU will be stuck in an infinite loop. + * @host_stage2_mod_prot_range: Similar to @host_stage2_mod_prot, but takes a + * range as an argument (@nr_pages). This + * considerably speeds up the process for a + * contiguous memory region, compared to the + * per-page @host_stage2_mod_prot. * @host_stage2_get_leaf: Query the host's stage2 page-table entry for * the page @phys. * @register_host_smc_handler: @cb is called whenever the host issues an SMC @@ -153,7 +158,8 @@ struct pkvm_module_ops { void* (*hyp_va)(phys_addr_t phys); unsigned long (*kern_hyp_va)(unsigned long x); - ANDROID_KABI_RESERVE(1); + ANDROID_KABI_USE(1, int (*host_stage2_mod_prot_range)(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages)); + ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index b0eabed053d2..f80d15d52be6 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -104,6 +104,7 @@ int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, struct kvm_hyp_memcache *host_mc); int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot); +int module_change_host_page_prot_range(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages); void destroy_hyp_vm_pgt(struct pkvm_hyp_vm *vm); void drain_hyp_pool(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 86cd64130328..3a5193ca0fb3 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -149,22 +149,16 @@ static void prepare_host_vtcr(void) static int prepopulate_host_stage2(void) { struct memblock_region *reg; - u64 addr = 0; - int i, ret; + int i, ret = 0; for (i = 0; i < hyp_memblock_nr; i++) { reg = &hyp_memory[i]; - ret = host_stage2_idmap_locked(addr, reg->base - addr, PKVM_HOST_MMIO_PROT, false); - if (ret) - return ret; ret = host_stage2_idmap_locked(reg->base, reg->size, PKVM_HOST_MEM_PROT, false); if (ret) return ret; - addr = reg->base + reg->size; } - return host_stage2_idmap_locked(addr, BIT(host_mmu.pgt.ia_bits) - addr, PKVM_HOST_MMIO_PROT, - false); + return ret; } int kvm_host_prepare_stage2(void *pgt_pool_base) @@ -881,7 +875,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) int ret = -EPERM; esr = read_sysreg_el2(SYS_ESR); - BUG_ON(!__get_fault_info(esr, &fault)); + if (!__get_fault_info(esr, &fault)) { + addr = (u64)-1; + /* + * We've presumably raced with a page-table change which caused + * AT to fail, try again. + */ + goto return_to_host; + } fault.esr_el2 = esr; addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; @@ -908,6 +909,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) else BUG_ON(ret && ret != -EAGAIN); +return_to_host: trace_host_mem_abort(esr, addr); } @@ -2008,77 +2010,80 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) return ret; } -static int restrict_host_page_perms(u64 addr, kvm_pte_t pte, u32 level, enum kvm_pgtable_prot prot) -{ - int ret = 0; - - /* XXX: optimize ... */ - if (kvm_pte_valid(pte) && (level == KVM_PGTABLE_MAX_LEVELS - 1)) - ret = kvm_pgtable_stage2_unmap(&host_mmu.pgt, addr, PAGE_SIZE); - if (!ret) - ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot, false); - - return ret; -} - #define MODULE_PROT_ALLOWLIST (KVM_PGTABLE_PROT_RWX | \ KVM_PGTABLE_PROT_DEVICE |\ KVM_PGTABLE_PROT_NC | \ KVM_PGTABLE_PROT_PXN | \ KVM_PGTABLE_PROT_UXN) -int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot) + +int module_change_host_page_prot_range(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages) { - u64 addr = hyp_pfn_to_phys(pfn); + u64 i, addr = hyp_pfn_to_phys(pfn); + u64 end = addr + nr_pages * PAGE_SIZE; struct hyp_page *page = NULL; - kvm_pte_t pte; - u32 level; + struct kvm_mem_range range; + bool is_mmio; int ret; if ((prot & MODULE_PROT_ALLOWLIST) != prot) return -EINVAL; + is_mmio = !find_mem_range(addr, &range); + if (end > range.end) { + /* Specified range not in a single mmio or memory block. */ + return -EPERM; + } + host_lock_component(); - ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); - if (ret) - goto unlock; /* * There is no hyp_vmemmap covering MMIO regions, which makes tracking * of module-owned MMIO regions hard, so we trust the modules not to * mess things up. */ - if (!addr_is_memory(addr)) + if (is_mmio) goto update; - ret = -EPERM; + /* Range is memory: we can track module ownership. */ page = hyp_phys_to_page(addr); /* - * Modules can only relax permissions of pages they own, and restrict - * permissions of pristine pages. + * Modules can only modify pages they already own, and pristine host + * pages. The entire range must be consistently one or the other. */ - if (prot == KVM_PGTABLE_PROT_RWX) { - if (!(page->flags & MODULE_OWNED_PAGE)) + if (page->flags & MODULE_OWNED_PAGE) { + /* The entire range must be module-owned. */ + ret = -EPERM; + for (i = 1; i < nr_pages; i++) { + if (!(page[i].flags & MODULE_OWNED_PAGE)) + goto unlock; + } + } else { + /* The entire range must be pristine. */ + ret = __host_check_page_state_range( + addr, nr_pages << PAGE_SHIFT, PKVM_PAGE_OWNED); + if (ret) goto unlock; - } else if (host_get_page_state(pte, addr) != PKVM_PAGE_OWNED) { - goto unlock; } update: - if (prot == default_host_prot(!!page)) - ret = host_stage2_set_owner_locked(addr, PAGE_SIZE, PKVM_ID_HOST); - else if (!prot) - ret = host_stage2_set_owner_locked(addr, PAGE_SIZE, PKVM_ID_PROTECTED); - else - ret = restrict_host_page_perms(addr, pte, level, prot); + if (!prot) { + ret = host_stage2_set_owner_locked( + addr, nr_pages << PAGE_SHIFT, PKVM_ID_PROTECTED); + } else { + ret = host_stage2_idmap_locked( + addr, nr_pages << PAGE_SHIFT, prot, false); + } - if (ret || !page) + if (WARN_ON(ret) || !page) goto unlock; - if (prot != KVM_PGTABLE_PROT_RWX) - hyp_phys_to_page(addr)->flags |= MODULE_OWNED_PAGE; - else - hyp_phys_to_page(addr)->flags &= ~MODULE_OWNED_PAGE; + for (i = 0; i < nr_pages; i++) { + if (prot != KVM_PGTABLE_PROT_RWX) + page[i].flags |= MODULE_OWNED_PAGE; + else + page[i].flags &= ~MODULE_OWNED_PAGE; + } unlock: host_unlock_component(); @@ -2086,6 +2091,11 @@ unlock: return ret; } +int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot) +{ + return module_change_host_page_prot_range(pfn, prot, 1); +} + int hyp_pin_shared_mem(void *from, void *to) { u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); diff --git a/arch/arm64/kvm/hyp/nvhe/modules.c b/arch/arm64/kvm/hyp/nvhe/modules.c index 49e6c2c2e2ae..862e7b7a75ff 100644 --- a/arch/arm64/kvm/hyp/nvhe/modules.c +++ b/arch/arm64/kvm/hyp/nvhe/modules.c @@ -115,6 +115,7 @@ const struct pkvm_module_ops module_ops = { .hyp_pa = hyp_virt_to_phys, .hyp_va = hyp_phys_to_virt, .kern_hyp_va = __kern_hyp_va, + .host_stage2_mod_prot_range = module_change_host_page_prot_range, }; int __pkvm_init_module(void *module_init) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index b7e8faa894c9..64387388584c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -645,8 +645,13 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte) return prot; } -static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new) +static bool stage2_pte_needs_update(struct kvm_pgtable *pgt, + kvm_pte_t old, kvm_pte_t new) { + /* Following filter logic applies only to guest stage-2 entries. */ + if (pgt->flags & KVM_PGTABLE_S2_IDMAP) + return true; + if (!kvm_pte_valid(old) || !kvm_pte_valid(new)) return true; @@ -715,12 +720,15 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, new = data->annotation; /* - * Skip updating the PTE if we are trying to recreate the exact - * same mapping or only change the access permissions. Instead, - * the vCPU will exit one more time from guest if still needed - * and then go through the path of relaxing permissions. + * Skip updating a guest PTE if we are trying to recreate the exact + * same mapping or change only the access permissions. Instead, + * the vCPU will exit one more time from the guest if still needed + * and then go through the path of relaxing permissions. This applies + * only to guest PTEs; Host PTEs are unconditionally updated. The + * host cannot livelock because the abort handler has done prior + * checks before calling here. */ - if (!stage2_pte_needs_update(old, new)) + if (!stage2_pte_needs_update(pgt, old, new)) return -EAGAIN; if (pte_ops->pte_is_counted_cb(old, level)) @@ -775,6 +783,30 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, return 0; } +static void stage2_map_prefault_idmap(struct kvm_pgtable_pte_ops *pte_ops, + u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, kvm_pte_t block_pte) +{ + u64 pa, granule; + int i; + + WARN_ON(pte_ops->pte_is_counted_cb(block_pte, level-1)); + + if (!kvm_pte_valid(block_pte)) + return; + + pa = ALIGN_DOWN(addr, kvm_granule_size(level-1)); + granule = kvm_granule_size(level); + for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep, pa += granule) { + kvm_pte_t pte = kvm_init_valid_leaf_pte(pa, block_pte, level); + /* Skip ptes in the range being modified by the caller. */ + if ((pa < addr) || (pa >= end)) { + /* We can write non-atomically: ptep isn't yet live. */ + *ptep = pte; + } + } +} + static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) { @@ -805,6 +837,11 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, if (!childp) return -ENOMEM; + if (pgt->flags & KVM_PGTABLE_S2_IDMAP) { + stage2_map_prefault_idmap(pte_ops, addr, end, level + 1, + childp, pte); + } + /* * If we've run into an existing block mapping then replace it with * a table. Accesses beyond 'end' that fall within the new table diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 9970a4785819..7d522b037d9a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -619,6 +619,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + mm_flags |= FAULT_FLAG_TRIED; /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b1723094d464..ec23164ad768 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -496,6 +496,8 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; if (fault_signal_pending(fault, regs)) return user_mode(regs) ? 0 : SIGBUS; diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 34a44febae86..d710bb834a2a 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -310,6 +310,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs) goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 0843adb266d1..60fed3c88332 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -420,6 +420,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) goto out; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; + /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { fault = VM_FAULT_SIGNAL; diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index e65f0968e0d9..9c91cc40f456 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -103,6 +103,16 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + return ES_OK; +} + +static bool fault_in_kernel_space(unsigned long address) +{ + return false; +} + #undef __init #undef __pa #define __init diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 34a85587c4f4..7e2df44033bc 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -118,6 +118,9 @@ CONFIG_ANON_VMA_NAME=y CONFIG_USERFAULTFD=y CONFIG_LRU_GEN=y CONFIG_LRU_GEN_ENABLED=y +CONFIG_DAMON=y +CONFIG_DAMON_VADDR=y +CONFIG_DAMON_SYSFS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -393,6 +396,7 @@ CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100 CONFIG_THERMAL_WRITABLE_TRIPS=y CONFIG_THERMAL_GOV_USER_SPACE=y CONFIG_CPU_THERMAL=y +CONFIG_CPU_IDLE_THERMAL=y CONFIG_DEVFREQ_THERMAL=y CONFIG_THERMAL_EMULATION=y # CONFIG_X86_PKG_TEMP_THERMAL is not set @@ -520,6 +524,7 @@ CONFIG_IIO=y CONFIG_IIO_BUFFER=y CONFIG_IIO_TRIGGER=y CONFIG_POWERCAP=y +CONFIG_IDLE_INJECT=y CONFIG_ANDROID_BINDER_IPC=y CONFIG_ANDROID_BINDERFS=y CONFIG_ANDROID_DEBUG_SYMBOLS=y diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 7dce812ce253..71d8698702ce 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -629,6 +629,23 @@ fail: sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } +static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt, + unsigned long address, + bool write) +{ + if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) { + ctxt->fi.vector = X86_TRAP_PF; + ctxt->fi.error_code = X86_PF_USER; + ctxt->fi.cr2 = address; + if (write) + ctxt->fi.error_code |= X86_PF_WRITE; + + return ES_EXCEPTION; + } + + return ES_OK; +} + static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, void *src, char *buf, unsigned int data_size, @@ -636,7 +653,12 @@ static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, bool backwards) { int i, b = backwards ? -1 : 1; - enum es_result ret = ES_OK; + unsigned long address = (unsigned long)src; + enum es_result ret; + + ret = vc_insn_string_check(ctxt, address, false); + if (ret != ES_OK) + return ret; for (i = 0; i < count; i++) { void *s = src + (i * data_size * b); @@ -657,7 +679,12 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, bool backwards) { int i, s = backwards ? -1 : 1; - enum es_result ret = ES_OK; + unsigned long address = (unsigned long)dst; + enum es_result ret; + + ret = vc_insn_string_check(ctxt, address, true); + if (ret != ES_OK) + return ret; for (i = 0; i < count; i++) { void *d = dst + (i * data_size * s); @@ -693,6 +720,9 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) { struct insn *insn = &ctxt->insn; + size_t size; + u64 port; + *exitinfo = 0; switch (insn->opcode.bytes[0]) { @@ -701,7 +731,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6d: *exitinfo |= IOIO_TYPE_INS; *exitinfo |= IOIO_SEG_ES; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUTS opcodes */ @@ -709,41 +739,43 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6f: *exitinfo |= IOIO_TYPE_OUTS; *exitinfo |= IOIO_SEG_DS; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* IN immediate opcodes */ case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* IN register opcodes */ case 0xec: case 0xed: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUT register opcodes */ case 0xee: case 0xef: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; default: return ES_DECODE_FAILED; } + *exitinfo |= port << 16; + switch (insn->opcode.bytes[0]) { case 0x6c: case 0x6e: @@ -753,12 +785,15 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xee: /* Single byte opcodes */ *exitinfo |= IOIO_DATA_8; + size = 1; break; default: /* Length determined by instruction parsing */ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 : IOIO_DATA_32; + size = (insn->opnd_bytes == 2) ? 2 : 4; } + switch (insn->addr_bytes) { case 2: *exitinfo |= IOIO_ADDR_16; @@ -774,7 +809,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) if (insn_has_rep_prefix(insn)) *exitinfo |= IOIO_REP; - return ES_OK; + return vc_ioio_check(ctxt, (u16)port, size); } static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index afda719dd725..392097f7c241 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -512,6 +512,33 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + BUG_ON(size > 4); + + if (user_mode(ctxt->regs)) { + struct thread_struct *t = ¤t->thread; + struct io_bitmap *iobm = t->io_bitmap; + size_t idx; + + if (!iobm) + goto fault; + + for (idx = port; idx < port + size; ++idx) { + if (test_bit(idx, iobm->bitmap)) + goto fault; + } + } + + return ES_OK; + +fault: + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + + return ES_EXCEPTION; +} + /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 97599581ec6b..bcb5678b5b91 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1369,6 +1369,8 @@ void do_user_addr_fault(struct pt_regs *regs, goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5e680e039d0e..4686c1d9d0cf 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2553,3 +2553,49 @@ void bpf_jit_free(struct bpf_prog *prog) bpf_prog_unlock_free(prog); } + +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + u8 *old_addr, *new_addr, *old_bypass_addr; + int ret; + + old_bypass_addr = old ? NULL : poke->bypass_addr; + old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; + new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; + + /* + * On program loading or teardown, the program's kallsym entry + * might not be in place, so we use __bpf_arch_text_poke to skip + * the kallsyms check. + */ + if (new) { + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, new_addr); + BUG_ON(ret < 0); + if (!old) { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + poke->bypass_addr, + NULL); + BUG_ON(ret < 0); + } + } else { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + old_bypass_addr, + poke->bypass_addr); + BUG_ON(ret < 0); + /* let other CPUs finish the execution of program + * so that it will not possible to expose them + * to invalid nop, stack unwind, nop state + */ + if (!ret) + synchronize_rcu(); + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, NULL); + BUG_ON(ret < 0); + } +} diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 766b9d5dffb1..b0188e8ee00b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2127,24 +2127,23 @@ static void binder_deferred_fd_close(int fd) static void binder_transaction_buffer_release(struct binder_proc *proc, struct binder_thread *thread, struct binder_buffer *buffer, - binder_size_t failed_at, + binder_size_t off_end_offset, bool is_failure) { int debug_id = buffer->debug_id; - binder_size_t off_start_offset, buffer_offset, off_end_offset; + binder_size_t off_start_offset, buffer_offset; binder_debug(BINDER_DEBUG_TRANSACTION, "%d buffer release %d, size %zd-%zd, failed at %llx\n", proc->pid, buffer->debug_id, buffer->data_size, buffer->offsets_size, - (unsigned long long)failed_at); + (unsigned long long)off_end_offset); if (buffer->target_node) binder_dec_node(buffer->target_node, 1, 0); off_start_offset = ALIGN(buffer->data_size, sizeof(void *)); - off_end_offset = is_failure && failed_at ? failed_at : - off_start_offset + buffer->offsets_size; + for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; buffer_offset += sizeof(binder_size_t)) { struct binder_object_header *hdr; @@ -2304,6 +2303,21 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, } } +/* Clean up all the objects in the buffer */ +static inline void binder_release_entire_buffer(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_buffer *buffer, + bool is_failure) +{ + binder_size_t off_end_offset; + + off_end_offset = ALIGN(buffer->data_size, sizeof(void *)); + off_end_offset += buffer->offsets_size; + + binder_transaction_buffer_release(proc, thread, buffer, + off_end_offset, is_failure); +} + static int binder_translate_binder(struct flat_binder_object *fp, struct binder_transaction *t, struct binder_thread *thread) @@ -3013,7 +3027,7 @@ static int binder_proc_transaction(struct binder_transaction *t, t_outdated->buffer = NULL; buffer->transaction = NULL; trace_binder_transaction_update_buffer_release(buffer); - binder_transaction_buffer_release(proc, NULL, buffer, 0, 0); + binder_release_entire_buffer(proc, NULL, buffer, false); binder_alloc_free_buf(&proc->alloc, buffer); kfree(t_outdated); binder_stats_deleted(BINDER_STAT_TRANSACTION); @@ -4004,7 +4018,7 @@ binder_free_buf(struct binder_proc *proc, binder_node_inner_unlock(buf_node); } trace_binder_transaction_buffer_release(buffer); - binder_transaction_buffer_release(proc, thread, buffer, 0, is_failure); + binder_release_entire_buffer(proc, thread, buffer, is_failure); binder_alloc_free_buf(&proc->alloc, buffer); } diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 8425e8709b41..14bb47fcf8b0 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_task_blocks_on_rtmutex); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rtmutex_waiter_prio); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rtmutex_wait_start); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rtmutex_wait_finish); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rt_mutex_steal); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_opt_spin_start); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_opt_spin_finish); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_can_spin_on_owner); @@ -310,6 +312,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_thermal_unregister); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_get_thermal_zone_device); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_thermal_power_cap); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_enable_thermal_power_throttle); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_read_pages); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_pages_reclaim_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_pages_failure_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_madvise_pageout_swap_entry); @@ -364,3 +367,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mmc_blk_mq_rw_recovery); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sd_update_bus_speed_mode); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_slab_folio_alloced); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_kmalloc_large_alloced); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_netlink_poll); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ep_create_wakeup_source); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_timerfd_create); diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index fe6644f99887..8e9ba701a643 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -461,10 +461,14 @@ static void devfreq_monitor(struct work_struct *work) if (err) dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err); + if (devfreq->stop_polling) + goto out; + queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); - mutex_unlock(&devfreq->lock); +out: + mutex_unlock(&devfreq->lock); trace_devfreq_monitor(devfreq); } @@ -482,6 +486,10 @@ void devfreq_monitor_start(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (delayed_work_pending(&devfreq->work)) + goto out; + switch (devfreq->profile->timer) { case DEVFREQ_TIMER_DEFERRABLE: INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); @@ -490,12 +498,16 @@ void devfreq_monitor_start(struct devfreq *devfreq) INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor); break; default: - return; + goto out; } if (devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); + +out: + devfreq->stop_polling = false; + mutex_unlock(&devfreq->lock); } EXPORT_SYMBOL(devfreq_monitor_start); @@ -512,6 +524,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (devfreq->stop_polling) { + mutex_unlock(&devfreq->lock); + return; + } + + devfreq->stop_polling = true; + mutex_unlock(&devfreq->lock); cancel_delayed_work_sync(&devfreq->work); } EXPORT_SYMBOL(devfreq_monitor_stop); diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c index 4b680e10c15a..4f3ee92dbe1b 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "dma-buf-sysfs-stats.h" @@ -168,35 +169,76 @@ void dma_buf_uninit_sysfs_statistics(void) kset_unregister(dma_buf_stats_kset); } +struct dma_buf_create_sysfs_entry { + struct dma_buf *dmabuf; + struct work_struct work; +}; + +union dma_buf_create_sysfs_work_entry { + struct dma_buf_create_sysfs_entry create_entry; + struct dma_buf_sysfs_entry sysfs_entry; +}; + +static void sysfs_add_workfn(struct work_struct *work) +{ + struct dma_buf_create_sysfs_entry *create_entry = + container_of(work, struct dma_buf_create_sysfs_entry, work); + struct dma_buf *dmabuf = create_entry->dmabuf; + + /* + * A dmabuf is ref-counted via its file member. If this handler holds the only + * reference to the dmabuf, there is no need for sysfs kobject creation. This is an + * optimization and a race; when the reference count drops to 1 immediately after + * this check it is not harmful as the sysfs entry will still get cleaned up in + * dma_buf_stats_teardown, which won't get called until the final dmabuf reference + * is released, and that can't happen until the end of this function. + */ + if (file_count(dmabuf->file) > 1) { + dmabuf->sysfs_entry->dmabuf = dmabuf; + /* + * kobject_init_and_add expects kobject to be zero-filled, but we have populated it + * (the sysfs_add_work union member) to trigger this work function. + */ + memset(&dmabuf->sysfs_entry->kobj, 0, sizeof(dmabuf->sysfs_entry->kobj)); + dmabuf->sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset; + if (kobject_init_and_add(&dmabuf->sysfs_entry->kobj, &dma_buf_ktype, NULL, + "%lu", file_inode(dmabuf->file)->i_ino)) { + kobject_put(&dmabuf->sysfs_entry->kobj); + dmabuf->sysfs_entry = NULL; + } + } else { + /* + * Free the sysfs_entry and reset the pointer so dma_buf_stats_teardown doesn't + * attempt to operate on it. + */ + kfree(dmabuf->sysfs_entry); + dmabuf->sysfs_entry = NULL; + } + dma_buf_put(dmabuf); +} + int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) { - struct dma_buf_sysfs_entry *sysfs_entry; - int ret; + struct dma_buf_create_sysfs_entry *create_entry; + union dma_buf_create_sysfs_work_entry *work_entry; if (!dmabuf->exp_name) { pr_err("exporter name must not be empty if stats needed\n"); return -EINVAL; } - sysfs_entry = kzalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL); - if (!sysfs_entry) + work_entry = kmalloc(sizeof(union dma_buf_create_sysfs_work_entry), GFP_KERNEL); + if (!work_entry) return -ENOMEM; - sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset; - sysfs_entry->dmabuf = dmabuf; + dmabuf->sysfs_entry = &work_entry->sysfs_entry; - dmabuf->sysfs_entry = sysfs_entry; + create_entry = &work_entry->create_entry; + create_entry->dmabuf = dmabuf; - /* create the directory for buffer stats */ - ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL, - "%lu", file_inode(file)->i_ino); - if (ret) - goto err_sysfs_dmabuf; + INIT_WORK(&create_entry->work, sysfs_add_workfn); + get_dma_buf(dmabuf); /* This reference will be dropped in sysfs_add_workfn. */ + schedule_work(&create_entry->work); return 0; - -err_sysfs_dmabuf: - kobject_put(&sysfs_entry->kobj); - dmabuf->sysfs_entry = NULL; - return ret; } diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index e0d42ee76b43..fbe8a07552ef 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -727,10 +727,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) dmabuf->resv = resv; } - ret = dma_buf_stats_setup(dmabuf, file); - if (ret) - goto err_dmabuf; - file->private_data = dmabuf; file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file; @@ -739,9 +735,19 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) list_add(&dmabuf->list_node, &db_list.head); mutex_unlock(&db_list.lock); + ret = dma_buf_stats_setup(dmabuf, file); + if (ret) + goto err_sysfs; + return dmabuf; -err_dmabuf: +err_sysfs: + mutex_lock(&db_list.lock); + list_del(&dmabuf->list_node); + mutex_unlock(&db_list.lock); + dmabuf->file = NULL; + file->f_path.dentry->d_fsdata = NULL; + file->private_data = NULL; if (!resv) dma_resv_fini(dmabuf->resv); kfree(dmabuf); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index fd68500ae2ad..1112a0a694e0 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -656,7 +656,9 @@ static void verity_end_io(struct bio *bio) struct dm_verity_io *io = bio->bi_private; if (bio->bi_status && - (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { + (!verity_fec_is_enabled(io->v) || + verity_is_system_shutting_down() || + (bio->bi_opf & REQ_RAHEAD))) { verity_finish_io(io, bio->bi_status); return; } diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 5e29da94f72d..355d80323b83 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -345,6 +345,7 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) { + queue->rcv_state = NVMET_TCP_RECV_ERR; if (status == -EPIPE || status == -ECONNRESET) kernel_sock_shutdown(queue->sock, SHUT_RDWR); else @@ -871,15 +872,11 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (ret < 0) - goto free_crypto; + return ret; /* queue removal will cleanup */ queue->state = NVMET_TCP_Q_LIVE; nvmet_prepare_receive_pdu(queue); return 0; -free_crypto: - if (queue->hdr_digest || queue->data_digest) - nvmet_tcp_free_crypto(queue); - return ret; } static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig index 5c98850f5a36..1d3a8062e651 100644 --- a/drivers/phy/qualcomm/Kconfig +++ b/drivers/phy/qualcomm/Kconfig @@ -50,13 +50,55 @@ config PHY_QCOM_PCIE2 Enable this to support the Qualcomm PCIe PHY, used with the Synopsys based PCIe controller. -config PHY_QCOM_QMP - tristate "Qualcomm QMP PHY Driver" +menuconfig PHY_QCOM_QMP + tristate "Qualcomm QMP PHY Drivers" depends on OF && COMMON_CLK && (ARCH_QCOM || COMPILE_TEST) + +if PHY_QCOM_QMP + +config PHY_QCOM_QMP_COMBO + tristate "Qualcomm QMP Combo PHY Driver" + default PHY_QCOM_QMP select GENERIC_PHY help - Enable this to support the QMP PHY transceiver that is used - with controllers such as PCIe, UFS, and USB on Qualcomm chips. + Enable this to support the QMP Combo PHY transceiver that is used + with USB3 and DisplayPort controllers on Qualcomm chips. + +config PHY_QCOM_QMP_PCIE + tristate "Qualcomm QMP PCIe PHY Driver" + depends on PCI || COMPILE_TEST + select GENERIC_PHY + default PHY_QCOM_QMP + help + Enable this to support the QMP PCIe PHY transceiver that is used + with PCIe controllers on Qualcomm chips. + +config PHY_QCOM_QMP_PCIE_8996 + tristate "Qualcomm QMP PCIe 8996 PHY Driver" + depends on PCI || COMPILE_TEST + select GENERIC_PHY + default PHY_QCOM_QMP + help + Enable this to support the QMP PCIe PHY transceiver that is used + with PCIe controllers on Qualcomm msm8996 chips. + +config PHY_QCOM_QMP_UFS + tristate "Qualcomm QMP UFS PHY Driver" + select GENERIC_PHY + default PHY_QCOM_QMP + help + Enable this to support the QMP UFS PHY transceiver that is used + with UFS controllers on Qualcomm chips. + +config PHY_QCOM_QMP_USB + tristate "Qualcomm QMP USB PHY Driver" + select GENERIC_PHY + default PHY_QCOM_QMP + help + Enable this to support the QMP USB PHY transceiver that is used + with USB3 controllers on Qualcomm chips. + +endif # PHY_QCOM_QMP config PHY_QCOM_QUSB2 tristate "Qualcomm QUSB2 PHY Driver" diff --git a/drivers/phy/qualcomm/Makefile b/drivers/phy/qualcomm/Makefile index 65f6c30a3e93..79dd4e507961 100644 --- a/drivers/phy/qualcomm/Makefile +++ b/drivers/phy/qualcomm/Makefile @@ -5,12 +5,12 @@ obj-$(CONFIG_PHY_QCOM_EDP) += phy-qcom-edp.o obj-$(CONFIG_PHY_QCOM_IPQ4019_USB) += phy-qcom-ipq4019-usb.o obj-$(CONFIG_PHY_QCOM_IPQ806X_SATA) += phy-qcom-ipq806x-sata.o obj-$(CONFIG_PHY_QCOM_PCIE2) += phy-qcom-pcie2.o -obj-$(CONFIG_PHY_QCOM_QMP) += \ - phy-qcom-qmp-combo.o \ - phy-qcom-qmp-pcie.o \ - phy-qcom-qmp-pcie-msm8996.o \ - phy-qcom-qmp-ufs.o \ - phy-qcom-qmp-usb.o + +obj-$(CONFIG_PHY_QCOM_QMP_COMBO) += phy-qcom-qmp-combo.o +obj-$(CONFIG_PHY_QCOM_QMP_PCIE) += phy-qcom-qmp-pcie.o +obj-$(CONFIG_PHY_QCOM_QMP_PCIE_8996) += phy-qcom-qmp-pcie-msm8996.o +obj-$(CONFIG_PHY_QCOM_QMP_UFS) += phy-qcom-qmp-ufs.o +obj-$(CONFIG_PHY_QCOM_QMP_USB) += phy-qcom-qmp-usb.o obj-$(CONFIG_PHY_QCOM_QUSB2) += phy-qcom-qusb2.o obj-$(CONFIG_PHY_QCOM_USB_HS) += phy-qcom-usb-hs.o diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 94db7033989a..0f0cfea31cbf 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8683,7 +8683,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba) ufs_bsg_probe(hba); ufshpb_init(hba); scsi_scan_host(hba->host); - pm_runtime_put_sync(hba->dev); out: return ret; @@ -8916,15 +8915,12 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) /* Probe and add UFS logical units */ ret = ufshcd_add_lus(hba); + out: - /* - * If we failed to initialize the device or the device is not - * present, turn off the power/clocks etc. - */ - if (ret) { - pm_runtime_put_sync(hba->dev); - ufshcd_hba_exit(hba); - } + pm_runtime_put_sync(hba->dev); + + if (ret) + dev_err(hba->dev, "%s failed: %d\n", __func__, ret); } static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 3ee70ffaf003..2b01c3e05ebe 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1233,6 +1233,9 @@ static int dwc3_core_init(struct dwc3 *dwc) if (dwc->parkmode_disable_ss_quirk) reg |= DWC3_GUCTL1_PARKMODE_DISABLE_SS; + if (dwc->parkmode_disable_hs_quirk) + reg |= DWC3_GUCTL1_PARKMODE_DISABLE_HS; + if (DWC3_VER_IS_WITHIN(DWC3, 290A, ANY) && (dwc->maximum_speed == USB_SPEED_HIGH || dwc->maximum_speed == USB_SPEED_FULL)) @@ -1539,6 +1542,8 @@ static void dwc3_get_properties(struct dwc3 *dwc) "snps,resume-hs-terminations"); dwc->parkmode_disable_ss_quirk = device_property_read_bool(dev, "snps,parkmode-disable-ss-quirk"); + dwc->parkmode_disable_hs_quirk = device_property_read_bool(dev, + "snps,parkmode-disable-hs-quirk"); dwc->gfladj_refclk_lpm_sel = device_property_read_bool(dev, "snps,gfladj-refclk-lpm-sel-quirk"); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 89219a14efb0..d21888658806 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -263,6 +263,7 @@ #define DWC3_GUCTL1_DEV_FORCE_20_CLK_FOR_30_CLK BIT(26) #define DWC3_GUCTL1_DEV_L1_EXIT_BY_HW BIT(24) #define DWC3_GUCTL1_PARKMODE_DISABLE_SS BIT(17) +#define DWC3_GUCTL1_PARKMODE_DISABLE_HS BIT(16) #define DWC3_GUCTL1_RESUME_OPMODE_HS_HOST BIT(10) /* Global Status Register */ @@ -1113,6 +1114,8 @@ struct dwc3_scratchpad_array { * generation after resume from suspend. * @parkmode_disable_ss_quirk: set if we need to disable all SuperSpeed * instances in park mode. + * @parkmode_disable_hs_quirk: set if we need to disable all HishSpeed + * instances in park mode. * @tx_de_emphasis_quirk: set if we enable Tx de-emphasis quirk * @tx_de_emphasis: Tx de-emphasis value * 0 - -6dB de-emphasis @@ -1330,6 +1333,7 @@ struct dwc3 { unsigned dis_tx_ipgap_linecheck_quirk:1; unsigned resume_hs_terminations:1; unsigned parkmode_disable_ss_quirk:1; + unsigned parkmode_disable_hs_quirk:1; unsigned gfladj_refclk_lpm_sel:1; unsigned tx_de_emphasis_quirk:1; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6fdac0fae461..121092e35ec6 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2093,7 +2093,17 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, list_for_each_entry(r, &dep->pending_list, list) { if (r == req) { - dwc3_gadget_giveback(dep, req, -ECONNRESET); + /* + * Explicitly check for EP0/1 as dequeue for those + * EPs need to be handled differently. Control EP + * only deals with one USB req, and giveback will + * occur during dwc3_ep0_stall_and_restart(). EP0 + * requests are never added to started_list. + */ + if (dep->number > 1) + dwc3_gadget_giveback(dep, req, -ECONNRESET); + else + dwc3_ep0_reset_state(dwc); goto out; } } diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index d7e992ee7743..1f56b770465e 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1619,8 +1619,6 @@ static void gadget_unbind_driver(struct device *dev) dev_dbg(&udc->dev, "unbinding gadget driver [%s]\n", driver->function); - kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); - udc->allow_connect = false; cancel_work_sync(&udc->vbus_work); mutex_lock(&udc->connect_lock); @@ -1640,6 +1638,8 @@ static void gadget_unbind_driver(struct device *dev) driver->is_bound = false; udc->driver = NULL; mutex_unlock(&udc_lock); + + kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); } /* ------------------------------------------------------------------------- */ diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 544028862de0..10eb2175e749 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1053,19 +1053,19 @@ static void xhci_get_usb3_port_status(struct xhci_port *port, u32 *status, *status |= USB_PORT_STAT_C_CONFIG_ERROR << 16; /* USB3 specific wPortStatus bits */ - if (portsc & PORT_POWER) { + if (portsc & PORT_POWER) *status |= USB_SS_PORT_STAT_POWER; - /* link state handling */ - if (link_state == XDEV_U0) - bus_state->suspended_ports &= ~(1 << portnum); - } - /* remote wake resume signaling complete */ - if (bus_state->port_remote_wakeup & (1 << portnum) && + /* no longer suspended or resuming */ + if (link_state != XDEV_U3 && link_state != XDEV_RESUME && link_state != XDEV_RECOVERY) { - bus_state->port_remote_wakeup &= ~(1 << portnum); - usb_hcd_end_port_resume(&hcd->self, portnum); + /* remote wake resume signaling complete */ + if (bus_state->port_remote_wakeup & (1 << portnum)) { + bus_state->port_remote_wakeup &= ~(1 << portnum); + usb_hcd_end_port_resume(&hcd->self, portnum); + } + bus_state->suspended_ports &= ~(1 << portnum); } xhci_hub_report_usb3_link_state(xhci, status, portsc); @@ -1111,6 +1111,21 @@ static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status, return; } } + + /* + * Clear usb2 resume signalling variables if port is no longer suspended + * or resuming. Port either resumed to U0/U1/U2, disconnected, or in a + * error state. Resume related variables should be cleared in all those cases. + */ + if (link_state != XDEV_U3 && link_state != XDEV_RESUME) { + if (bus_state->resume_done[portnum] || + test_bit(portnum, &bus_state->resuming_ports)) { + bus_state->resume_done[portnum] = 0; + clear_bit(portnum, &bus_state->resuming_ports); + usb_hcd_end_port_resume(&port->rhub->hcd->self, portnum); + } + bus_state->suspended_ports &= ~(1 << portnum); + } } /* diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index f00e69bf4c64..c3dd132f02fe 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2855,7 +2855,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS); } else { - if (port->send_discover) { + if (port->send_discover && port->negotiated_rev < PD_REV30) { tcpm_queue_message(port, PD_MSG_CTRL_WAIT); break; } @@ -2871,7 +2871,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS); } else { - if (port->send_discover) { + if (port->send_discover && port->negotiated_rev < PD_REV30) { tcpm_queue_message(port, PD_MSG_CTRL_WAIT); break; } @@ -2880,7 +2880,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port, } break; case PD_CTRL_VCONN_SWAP: - if (port->send_discover) { + if (port->send_discover && port->negotiated_rev < PD_REV30) { tcpm_queue_message(port, PD_MSG_CTRL_WAIT); break; } diff --git a/drivers/virt/gunyah/gunyah_vcpu.c b/drivers/virt/gunyah/gunyah_vcpu.c index 82a0cbf55caf..bb13a1aed2e4 100644 --- a/drivers/virt/gunyah/gunyah_vcpu.c +++ b/drivers/virt/gunyah/gunyah_vcpu.c @@ -196,6 +196,7 @@ static int gh_vcpu_run(struct gh_vcpu *vcpu) } gh_error = gh_hypercall_vcpu_run(vcpu->rsc->capid, state_data, &vcpu_run_resp); + memset(state_data, 0, sizeof(state_data)); if (gh_error == GH_ERROR_OK) { switch (vcpu_run_resp.state) { case GH_VCPU_STATE_READY: diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 26fa170090b8..b1b846504027 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -89,8 +89,7 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, unsigned int padbufsize); -int z_erofs_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool); +extern const struct z_erofs_decompressor erofs_decompressors[]; /* prototypes for specific algorithms */ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 83532525282e..bbfbaf25ee59 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -404,6 +404,8 @@ const struct address_space_operations erofs_raw_access_aops = { .readahead = erofs_readahead, .bmap = erofs_bmap, .direct_IO = noop_direct_IO, + .release_folio = iomap_release_folio, + .invalidate_folio = iomap_invalidate_folio, }; #ifdef CONFIG_FS_DAX diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 7021e2cf6146..38c7f9c96c68 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -122,11 +122,11 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, } static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, - void *inpage, unsigned int *inputmargin, int *maptype, - bool may_inplace) + void *inpage, void *out, unsigned int *inputmargin, + int *maptype, bool may_inplace) { struct z_erofs_decompress_req *rq = ctx->rq; - unsigned int omargin, total, i, j; + unsigned int omargin, total, i; struct page **in; void *src, *tmp; @@ -136,20 +136,20 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) goto docopy; - for (i = 0; i < ctx->inpages; ++i) { - DBG_BUGON(rq->in[i] == NULL); - for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j) - if (rq->out[j] == rq->in[i]) - goto docopy; - } + for (i = 0; i < ctx->inpages; ++i) + if (rq->out[ctx->outpages - ctx->inpages + i] != + rq->in[i]) + goto docopy; + kunmap_local(inpage); + *maptype = 3; + return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT); } if (ctx->inpages <= 1) { *maptype = 0; return inpage; } - kunmap_atomic(inpage); - might_sleep(); + kunmap_local(inpage); src = erofs_vm_map_ram(rq->in, ctx->inpages); if (!src) return ERR_PTR(-ENOMEM); @@ -162,7 +162,7 @@ docopy: src = erofs_get_pcpubuf(ctx->inpages); if (!src) { DBG_BUGON(1); - kunmap_atomic(inpage); + kunmap_local(inpage); return ERR_PTR(-EFAULT); } @@ -173,9 +173,9 @@ docopy: min_t(unsigned int, total, PAGE_SIZE - *inputmargin); if (!inpage) - inpage = kmap_atomic(*in); + inpage = kmap_local_page(*in); memcpy(tmp, inpage + *inputmargin, page_copycnt); - kunmap_atomic(inpage); + kunmap_local(inpage); inpage = NULL; tmp += page_copycnt; total -= page_copycnt; @@ -205,16 +205,16 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, } static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, - u8 *out) + u8 *dst) { struct z_erofs_decompress_req *rq = ctx->rq; bool support_0padding = false, may_inplace = false; unsigned int inputmargin; - u8 *headpage, *src; + u8 *out, *headpage, *src; int ret, maptype; DBG_BUGON(*rq->in == NULL); - headpage = kmap_atomic(*rq->in); + headpage = kmap_local_page(*rq->in); /* LZ4 decompression inplace is only safe if zero_padding is enabled */ if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) { @@ -223,7 +223,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, min_t(unsigned int, rq->inputsize, rq->sb->s_blocksize - rq->pageofs_in)); if (ret) { - kunmap_atomic(headpage); + kunmap_local(headpage); return ret; } may_inplace = !((rq->pageofs_in + rq->inputsize) & @@ -231,11 +231,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } inputmargin = rq->pageofs_in; - src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin, + src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin, &maptype, may_inplace); if (IS_ERR(src)) return PTR_ERR(src); + out = dst + rq->pageofs_out; /* legacy format could compress extra data in a pcluster. */ if (rq->partial_decoding || !support_0padding) ret = LZ4_decompress_safe_partial(src + inputmargin, out, @@ -261,12 +262,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } if (maptype == 0) { - kunmap_atomic(headpage); + kunmap_local(headpage); } else if (maptype == 1) { vm_unmap_ram(src, ctx->inpages); } else if (maptype == 2) { erofs_put_pcpubuf(src); - } else { + } else if (maptype != 3) { DBG_BUGON(1); return -EFAULT; } @@ -289,7 +290,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, /* one optimized fast path only for non bigpcluster cases yet */ if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) { DBG_BUGON(!*rq->out); - dst = kmap_atomic(*rq->out); + dst = kmap_local_page(*rq->out); dst_maptype = 0; goto dstmap_out; } @@ -309,9 +310,9 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, } dstmap_out: - ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out); + ret = z_erofs_lz4_decompress_mem(&ctx, dst); if (!dst_maptype) - kunmap_atomic(dst); + kunmap_local(dst); else if (dst_maptype == 2) vm_unmap_ram(dst, ctx.outpages); return ret; @@ -320,50 +321,63 @@ dstmap_out: static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, struct page **pagepool) { - const unsigned int inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; - const unsigned int outpages = + const unsigned int nrpages_in = + PAGE_ALIGN(rq->pageofs_in + rq->inputsize) >> PAGE_SHIFT; + const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; - const unsigned int righthalf = min_t(unsigned int, rq->outputsize, - PAGE_SIZE - rq->pageofs_out); - const unsigned int lefthalf = rq->outputsize - righthalf; - const unsigned int interlaced_offset = - rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out; - unsigned char *src, *dst; + const unsigned int bs = rq->sb->s_blocksize; + unsigned int cur = 0, ni = 0, no, pi, po, insz, cnt; + u8 *kin; - if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) { - DBG_BUGON(1); - return -EFSCORRUPTED; - } - - if (rq->out[0] == *rq->in) { - DBG_BUGON(rq->pageofs_out); - return 0; - } - - src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in; - if (rq->out[0]) { - dst = kmap_local_page(rq->out[0]); - memcpy(dst + rq->pageofs_out, src + interlaced_offset, - righthalf); - kunmap_local(dst); - } - - if (outpages > inpages) { - DBG_BUGON(!rq->out[outpages - 1]); - if (rq->out[outpages - 1] != rq->in[inpages - 1]) { - dst = kmap_local_page(rq->out[outpages - 1]); - memcpy(dst, interlaced_offset ? src : - (src + righthalf), lefthalf); - kunmap_local(dst); - } else if (!interlaced_offset) { - memmove(src, src + righthalf, lefthalf); + DBG_BUGON(rq->outputsize > rq->inputsize); + if (rq->alg == Z_EROFS_COMPRESSION_INTERLACED) { + cur = bs - (rq->pageofs_out & (bs - 1)); + pi = (rq->pageofs_in + rq->inputsize - cur) & ~PAGE_MASK; + cur = min(cur, rq->outputsize); + if (cur && rq->out[0]) { + kin = kmap_local_page(rq->in[nrpages_in - 1]); + if (rq->out[0] == rq->in[nrpages_in - 1]) { + memmove(kin + rq->pageofs_out, kin + pi, cur); + flush_dcache_page(rq->out[0]); + } else { + memcpy_to_page(rq->out[0], rq->pageofs_out, + kin + pi, cur); + } + kunmap_local(kin); } + rq->outputsize -= cur; } - kunmap_local(src); + + for (; rq->outputsize; rq->pageofs_in = 0, cur += PAGE_SIZE, ni++) { + insz = min_t(unsigned int, PAGE_SIZE - rq->pageofs_in, + rq->outputsize); + rq->outputsize -= insz; + if (!rq->in[ni]) + continue; + kin = kmap_local_page(rq->in[ni]); + pi = 0; + do { + no = (rq->pageofs_out + cur + pi) >> PAGE_SHIFT; + po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK; + DBG_BUGON(no >= nrpages_out); + cnt = min_t(unsigned int, insz - pi, PAGE_SIZE - po); + if (rq->out[no] == rq->in[ni]) { + memmove(kin + po, + kin + rq->pageofs_in + pi, cnt); + flush_dcache_page(rq->out[no]); + } else if (rq->out[no]) { + memcpy_to_page(rq->out[no], po, + kin + rq->pageofs_in + pi, cnt); + } + pi += cnt; + } while (pi < insz); + kunmap_local(kin); + } + DBG_BUGON(ni > nrpages_in); return 0; } -static struct z_erofs_decompressor decompressors[] = { +const struct z_erofs_decompressor erofs_decompressors[] = { [Z_EROFS_COMPRESSION_SHIFTED] = { .decompress = z_erofs_transform_plain, .name = "shifted" @@ -383,9 +397,3 @@ static struct z_erofs_decompressor decompressors[] = { }, #endif }; - -int z_erofs_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool) -{ - return decompressors[rq->alg].decompress(rq, pagepool); -} diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 8fc41fd1620c..190bc3ad5622 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -291,14 +291,19 @@ static int erofs_fill_inode(struct inode *inode) } if (erofs_inode_is_data_compressed(vi->datalayout)) { - if (!erofs_is_fscache_mode(inode->i_sb) && - inode->i_sb->s_blocksize_bits == PAGE_SHIFT) + if (!erofs_is_fscache_mode(inode->i_sb)) { + DO_ONCE_LITE_IF(inode->i_sb->s_blocksize != PAGE_SIZE, + erofs_info, inode->i_sb, + "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); err = z_erofs_fill_inode(inode); + } else err = -EOPNOTSUPP; goto out_unlock; } inode->i_mapping->a_ops = &erofs_raw_access_aops; + if (!erofs_is_fscache_mode(inode->i_sb)) + mapping_set_large_folios(inode->i_mapping); #ifdef CONFIG_EROFS_FS_ONDEMAND if (erofs_is_fscache_mode(inode->i_sb)) inode->i_mapping->a_ops = &erofs_fscache_access_aops; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 1c03daf83a68..23151da13a23 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -544,7 +544,7 @@ int __init z_erofs_init_zip_subsystem(void); void z_erofs_exit_zip_subsystem(void); int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, struct erofs_workgroup *egrp); -int erofs_try_to_free_cached_page(struct page *page); +int erofs_init_managed_cache(struct super_block *sb); int z_erofs_load_lz4_config(struct super_block *sb, struct erofs_super_block *dsb, struct z_erofs_lz4_cfgs *lz4, int len); @@ -565,6 +565,7 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb, } return 0; } +static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; } #endif /* !CONFIG_EROFS_FS_ZIP */ #ifdef CONFIG_EROFS_FS_ZIP_LZMA diff --git a/fs/erofs/super.c b/fs/erofs/super.c index b073b38c1c77..19af9bbcb8f1 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -597,68 +597,6 @@ static int erofs_fc_parse_param(struct fs_context *fc, return 0; } -#ifdef CONFIG_EROFS_FS_ZIP -static const struct address_space_operations managed_cache_aops; - -static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp) -{ - bool ret = true; - struct address_space *const mapping = folio->mapping; - - DBG_BUGON(!folio_test_locked(folio)); - DBG_BUGON(mapping->a_ops != &managed_cache_aops); - - if (folio_test_private(folio)) - ret = erofs_try_to_free_cached_page(&folio->page); - - return ret; -} - -/* - * It will be called only on inode eviction. In case that there are still some - * decompression requests in progress, wait with rescheduling for a bit here. - * We could introduce an extra locking instead but it seems unnecessary. - */ -static void erofs_managed_cache_invalidate_folio(struct folio *folio, - size_t offset, size_t length) -{ - const size_t stop = length + offset; - - DBG_BUGON(!folio_test_locked(folio)); - - /* Check for potential overflow in debug mode */ - DBG_BUGON(stop > folio_size(folio) || stop < length); - - if (offset == 0 && stop == folio_size(folio)) - while (!erofs_managed_cache_release_folio(folio, GFP_NOFS)) - cond_resched(); -} - -static const struct address_space_operations managed_cache_aops = { - .release_folio = erofs_managed_cache_release_folio, - .invalidate_folio = erofs_managed_cache_invalidate_folio, -}; - -static int erofs_init_managed_cache(struct super_block *sb) -{ - struct erofs_sb_info *const sbi = EROFS_SB(sb); - struct inode *const inode = new_inode(sb); - - if (!inode) - return -ENOMEM; - - set_nlink(inode, 1); - inode->i_size = OFFSET_MAX; - - inode->i_mapping->a_ops = &managed_cache_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); - sbi->managed_cache = inode; - return 0; -} -#else -static int erofs_init_managed_cache(struct super_block *sb) { return 0; } -#endif - static struct inode *erofs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) { diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 451b9a6cba68..0b1b6ca804b3 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -5,7 +5,6 @@ * Copyright (C) 2022 Alibaba Cloud */ #include "compress.h" -#include #include #include #include @@ -58,6 +57,9 @@ struct z_erofs_pcluster { /* L: total number of bvecs */ unsigned int vcnt; + /* I: pcluster size (compressed size) in bytes */ + unsigned int pclustersize; + /* I: page offset of start position of decompression */ unsigned short pageofs_out; @@ -72,14 +74,6 @@ struct z_erofs_pcluster { struct rcu_head rcu; }; - union { - /* I: physical cluster size in pages */ - unsigned short pclusterpages; - - /* I: tailpacking inline compressed size */ - unsigned short tailpacking_size; - }; - /* I: compression algorithm format */ unsigned char algorithmformat; @@ -119,9 +113,7 @@ static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl) static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) { - if (z_erofs_is_inline_pcluster(pcl)) - return 1; - return pcl->pclusterpages; + return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT; } /* @@ -239,14 +231,20 @@ static void z_erofs_bvec_iter_begin(struct z_erofs_bvec_iter *iter, static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter, struct z_erofs_bvec *bvec, - struct page **candidate_bvpage) + struct page **candidate_bvpage, + struct page **pagepool) { - if (iter->cur == iter->nr) { - if (!*candidate_bvpage) - return -EAGAIN; + if (iter->cur >= iter->nr) { + struct page *nextpage = *candidate_bvpage; + if (!nextpage) { + nextpage = erofs_allocpage(pagepool, GFP_NOFS); + if (!nextpage) + return -ENOMEM; + set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE); + } DBG_BUGON(iter->bvset->nextpage); - iter->bvset->nextpage = *candidate_bvpage; + iter->bvset->nextpage = nextpage; z_erofs_bvset_flip(iter); iter->bvset->nextpage = NULL; @@ -301,12 +299,12 @@ static int z_erofs_create_pcluster_pool(void) return 0; } -static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages) +static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size) { - int i; + unsigned int nrpages = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct z_erofs_pcluster_slab *pcs = pcluster_pool; - for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { - struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; + for (; pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) { struct z_erofs_pcluster *pcl; if (nrpages > pcs->maxpages) @@ -315,7 +313,7 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages) pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS); if (!pcl) return ERR_PTR(-ENOMEM); - pcl->pclusterpages = nrpages; + pcl->pclustersize = size; return pcl; } return ERR_PTR(-EINVAL); @@ -507,19 +505,17 @@ enum z_erofs_pclustermode { */ Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE, /* - * The current collection has been linked with the owned chain, and - * could also be linked with the remaining collections, which means - * if the processing page is the tail page of the collection, thus - * the current collection can safely use the whole page (since - * the previous collection is under control) for in-place I/O, as - * illustrated below: - * ________________________________________________________________ - * | tail (partial) page | head (partial) page | - * | (of the current cl) | (of the previous collection) | - * | | | - * |__PCLUSTER_FOLLOWED___|___________PCLUSTER_FOLLOWED____________| + * The pcluster was just linked to a decompression chain by us. It can + * also be linked with the remaining pclusters, which means if the + * processing page is the tail page of a pcluster, this pcluster can + * safely use the whole page (since the previous pcluster is within the + * same chain) for in-place I/O, as illustrated below: + * ___________________________________________________ + * | tail (partial) page | head (partial) page | + * | (of the current pcl) | (of the previous pcl) | + * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____| * - * [ (*) the above page can be used as inplace I/O. ] + * [ (*) the page above can be used as inplace I/O. ] */ Z_EROFS_PCLUSTER_FOLLOWED, }; @@ -529,12 +525,12 @@ struct z_erofs_decompress_frontend { struct erofs_map_blocks map; struct z_erofs_bvec_iter biter; + struct page *pagepool; struct page *candidate_bvpage; struct z_erofs_pcluster *pcl; z_erofs_next_pcluster_t owned_head; enum z_erofs_pclustermode mode; - bool readahead; /* used for applying cache strategy on the fly */ bool backmost; erofs_off_t headoffset; @@ -564,11 +560,11 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe) return false; } -static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, - struct page **pagepool) +static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) { struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode)); struct z_erofs_pcluster *pcl = fe->pcl; + unsigned int pclusterpages = z_erofs_pclusterpages(pcl); bool shouldalloc = z_erofs_should_alloc_cache(fe); bool standalone = true; /* @@ -579,13 +575,14 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; unsigned int i; + if (i_blocksize(fe->inode) != PAGE_SIZE) + return; if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) return; - for (i = 0; i < pcl->pclusterpages; ++i) { - struct page *page; + for (i = 0; i < pclusterpages; ++i) { + struct page *page, *newpage; void *t; /* mark pages just found for debugging */ - struct page *newpage = NULL; /* the compressed page was loaded before */ if (READ_ONCE(pcl->compressed_bvecs[i].page)) @@ -595,6 +592,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, if (page) { t = (void *)((unsigned long)page | 1); + newpage = NULL; } else { /* I/O is needed, no possible to decompress directly */ standalone = false; @@ -602,11 +600,10 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, continue; /* - * try to use cached I/O if page allocation - * succeeds or fallback to in-place I/O instead - * to avoid any direct reclaim. + * Try cached I/O if allocation succeeds or fallback to + * in-place I/O instead to avoid any direct reclaim. */ - newpage = erofs_allocpage(pagepool, gfp); + newpage = erofs_allocpage(&fe->pagepool, gfp); if (!newpage) continue; set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); @@ -619,7 +616,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, if (page) put_page(page); else if (newpage) - erofs_pagepool_add(pagepool, newpage); + erofs_pagepool_add(&fe->pagepool, newpage); } /* @@ -636,6 +633,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, { struct z_erofs_pcluster *const pcl = container_of(grp, struct z_erofs_pcluster, obj); + unsigned int pclusterpages = z_erofs_pclusterpages(pcl); int i; DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); @@ -643,7 +641,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, * refcount of workgroup is now freezed as 1, * therefore no need to worry about available decompression users. */ - for (i = 0; i < pcl->pclusterpages; ++i) { + for (i = 0; i < pclusterpages; ++i) { struct page *page = pcl->compressed_bvecs[i].page; if (!page) @@ -664,29 +662,73 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, return 0; } -int erofs_try_to_free_cached_page(struct page *page) +static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) { - struct z_erofs_pcluster *const pcl = (void *)page_private(page); - int ret, i; + struct z_erofs_pcluster *pcl = folio_get_private(folio); + unsigned int pclusterpages = z_erofs_pclusterpages(pcl); + bool ret; + int i; + + if (!folio_test_private(folio)) + return true; if (!erofs_workgroup_try_to_freeze(&pcl->obj, 1)) - return 0; + return false; - ret = 0; + ret = false; DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - for (i = 0; i < pcl->pclusterpages; ++i) { - if (pcl->compressed_bvecs[i].page == page) { + for (i = 0; i < pclusterpages; ++i) { + if (pcl->compressed_bvecs[i].page == &folio->page) { WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); - ret = 1; + ret = true; break; } } erofs_workgroup_unfreeze(&pcl->obj, 1); + if (ret) - detach_page_private(page); + folio_detach_private(folio); return ret; } +/* + * It will be called only on inode eviction. In case that there are still some + * decompression requests in progress, wait with rescheduling for a bit here. + * An extra lock could be introduced instead but it seems unnecessary. + */ +static void z_erofs_cache_invalidate_folio(struct folio *folio, + size_t offset, size_t length) +{ + const size_t stop = length + offset; + + /* Check for potential overflow in debug mode */ + DBG_BUGON(stop > folio_size(folio) || stop < length); + + if (offset == 0 && stop == folio_size(folio)) + while (!z_erofs_cache_release_folio(folio, GFP_NOFS)) + cond_resched(); +} + +static const struct address_space_operations z_erofs_cache_aops = { + .release_folio = z_erofs_cache_release_folio, + .invalidate_folio = z_erofs_cache_invalidate_folio, +}; + +int erofs_init_managed_cache(struct super_block *sb) +{ + struct inode *const inode = new_inode(sb); + + if (!inode) + return -ENOMEM; + + set_nlink(inode, 1); + inode->i_size = OFFSET_MAX; + inode->i_mapping->a_ops = &z_erofs_cache_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + EROFS_SB(sb)->managed_cache = inode; + return 0; +} + static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, struct z_erofs_bvec *bvec) { @@ -717,7 +759,8 @@ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, !fe->candidate_bvpage) fe->candidate_bvpage = bvec->page; } - ret = z_erofs_bvec_enqueue(&fe->biter, bvec, &fe->candidate_bvpage); + ret = z_erofs_bvec_enqueue(&fe->biter, bvec, &fe->candidate_bvpage, + &fe->pagepool); fe->pcl->vcnt += (ret >= 0); return ret; } @@ -743,20 +786,20 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f) static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) { struct erofs_map_blocks *map = &fe->map; + struct super_block *sb = fe->inode->i_sb; bool ztailpacking = map->m_flags & EROFS_MAP_META; struct z_erofs_pcluster *pcl; struct erofs_workgroup *grp; int err; if (!(map->m_flags & EROFS_MAP_ENCODED) || - (!ztailpacking && !(map->m_pa >> PAGE_SHIFT))) { + (!ztailpacking && !erofs_blknr(sb, map->m_pa))) { DBG_BUGON(1); return -EFSCORRUPTED; } /* no available pcluster, let's allocate one */ - pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 : - map->m_plen >> PAGE_SHIFT); + pcl = z_erofs_alloc_pcluster(map->m_plen); if (IS_ERR(pcl)) return PTR_ERR(pcl); @@ -779,10 +822,8 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) if (ztailpacking) { pcl->obj.index = 0; /* which indicates ztailpacking */ - pcl->pageofs_in = erofs_blkoff(fe->inode->i_sb, map->m_pa); - pcl->tailpacking_size = map->m_plen; } else { - pcl->obj.index = map->m_pa >> PAGE_SHIFT; + pcl->obj.index = erofs_blknr(sb, map->m_pa); grp = erofs_insert_workgroup(fe->inode->i_sb, &pcl->obj); if (IS_ERR(grp)) { @@ -807,9 +848,11 @@ err_out: return err; } -static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe) +static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) { struct erofs_map_blocks *map = &fe->map; + struct super_block *sb = fe->inode->i_sb; + erofs_blk_t blknr = erofs_blknr(sb, map->m_pa); struct erofs_workgroup *grp = NULL; int ret; @@ -819,8 +862,7 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe) DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); if (!(map->m_flags & EROFS_MAP_META)) { - grp = erofs_find_workgroup(fe->inode->i_sb, - map->m_pa >> PAGE_SHIFT); + grp = erofs_find_workgroup(sb, blknr); } else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) { DBG_BUGON(1); return -EFSCORRUPTED; @@ -839,9 +881,27 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe) } else if (ret) { return ret; } + z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset, Z_EROFS_INLINE_BVECS, fe->pcl->vcnt); - /* since file-backed online pages are traversed in reverse order */ + if (!z_erofs_is_inline_pcluster(fe->pcl)) { + /* bind cache first when cached decompression is preferred */ + z_erofs_bind_cache(fe); + } else { + void *mptr; + + mptr = erofs_read_metabuf(&map->buf, sb, blknr, EROFS_NO_KMAP); + if (IS_ERR(mptr)) { + ret = PTR_ERR(mptr); + erofs_err(sb, "failed to get inline data %d", ret); + return ret; + } + get_page(map->buf.page); + WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page); + fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK; + fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; + } + /* file-backed inplace I/O pages are traversed in reverse order */ fe->icur = z_erofs_pclusterpages(fe->pcl); return 0; } @@ -864,20 +924,18 @@ void erofs_workgroup_free_rcu(struct erofs_workgroup *grp) call_rcu(&pcl->rcu, z_erofs_rcu_callback); } -static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) +static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe) { struct z_erofs_pcluster *pcl = fe->pcl; if (!pcl) - return false; + return; z_erofs_bvec_iter_end(&fe->biter); mutex_unlock(&pcl->lock); - if (fe->candidate_bvpage) { - DBG_BUGON(z_erofs_is_shortlived_page(fe->candidate_bvpage)); + if (fe->candidate_bvpage) fe->candidate_bvpage = NULL; - } /* * if all pending pages are added, don't hold its reference @@ -887,25 +945,22 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) erofs_workgroup_put(&pcl->obj); fe->pcl = NULL; - return true; + fe->backmost = false; } -static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, - struct page *page, unsigned int pageofs, - unsigned int len) +static int z_erofs_read_fragment(struct super_block *sb, struct page *page, + unsigned int cur, unsigned int end, erofs_off_t pos) { - struct super_block *sb = inode->i_sb; - struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode; + struct inode *packed_inode = EROFS_SB(sb)->packed_inode; struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - u8 *src, *dst; - unsigned int i, cnt; + unsigned int cnt; + u8 *src; if (!packed_inode) return -EFSCORRUPTED; - pos += EROFS_I(inode)->z_fragmentoff; - for (i = 0; i < len; i += cnt) { - cnt = min_t(unsigned int, len - i, + for (; cur < end; cur += cnt, pos += cnt) { + cnt = min_t(unsigned int, end - cur, sb->s_blocksize - erofs_blkoff(sb, pos)); src = erofs_bread(&buf, packed_inode, erofs_blknr(sb, pos), EROFS_KMAP); @@ -913,80 +968,66 @@ static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, erofs_put_metabuf(&buf); return PTR_ERR(src); } - - dst = kmap_local_page(page); - memcpy(dst + pageofs + i, src + erofs_blkoff(sb, pos), cnt); - kunmap_local(dst); - pos += cnt; + memcpy_to_page(page, cur, src + erofs_blkoff(sb, pos), cnt); } erofs_put_metabuf(&buf); return 0; } static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page, struct page **pagepool) + struct page *page) { struct inode *const inode = fe->inode; struct erofs_map_blocks *const map = &fe->map; const loff_t offset = page_offset(page); + const unsigned int bs = i_blocksize(inode); bool tight = true, exclusive; - unsigned int cur, end, spiltted; + unsigned int cur, end, len, split; int err = 0; - /* register locked file pages as online pages in pack */ z_erofs_onlinepage_init(page); - - spiltted = 0; + split = 0; end = PAGE_SIZE; repeat: - cur = end - 1; - - if (offset + cur < map->m_la || - offset + cur >= map->m_la + map->m_llen) { - erofs_dbg("out-of-range map @ pos %llu", offset + cur); - - if (z_erofs_collector_end(fe)) - fe->backmost = false; - map->m_la = offset + cur; + if (offset + end - 1 < map->m_la || + offset + end - 1 >= map->m_la + map->m_llen) { + erofs_dbg("out-of-range map @ pos %llu", offset + end - 1); + z_erofs_pcluster_end(fe); + map->m_la = offset + end - 1; map->m_llen = 0; err = z_erofs_map_blocks_iter(inode, map, 0); if (err) goto out; - } else { - if (fe->pcl) - goto hitted; - /* didn't get a valid pcluster previously (very rare) */ } - if (!(map->m_flags & EROFS_MAP_MAPPED) || - map->m_flags & EROFS_MAP_FRAGMENT) - goto hitted; + cur = offset > map->m_la ? 0 : map->m_la - offset; + /* bump split parts first to avoid several separate cases */ + ++split; - err = z_erofs_collector_begin(fe); - if (err) - goto out; + if (!(map->m_flags & EROFS_MAP_MAPPED)) { + zero_user_segment(page, cur, end); + tight = false; + goto next_part; + } - if (z_erofs_is_inline_pcluster(fe->pcl)) { - void *mp; + if (map->m_flags & EROFS_MAP_FRAGMENT) { + erofs_off_t fpos = offset + cur - map->m_la; - mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb, - erofs_blknr(inode->i_sb, map->m_pa), - EROFS_NO_KMAP); - if (IS_ERR(mp)) { - err = PTR_ERR(mp); - erofs_err(inode->i_sb, - "failed to get inline page, err %d", err); + len = min_t(unsigned int, map->m_llen - fpos, end - cur); + err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len, + EROFS_I(inode)->z_fragmentoff + fpos); + if (err) goto out; - } - get_page(fe->map.buf.page); - WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, - fe->map.buf.page); - fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; - } else { - /* bind cache first when cached decompression is preferred */ - z_erofs_bind_cache(fe, pagepool); + tight = false; + goto next_part; } -hitted: + + if (!fe->pcl) { + err = z_erofs_pcluster_begin(fe); + if (err) + goto out; + } + /* * Ensure the current partial page belongs to this submit chain rather * than other concurrent submit chains or the noio(bypass) chain since @@ -994,59 +1035,19 @@ hitted: * for inplace I/O or bvpage (should be processed in a strict order.) */ tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); - - cur = end - min_t(erofs_off_t, offset + end - map->m_la, end); - if (!(map->m_flags & EROFS_MAP_MAPPED)) { - zero_user_segment(page, cur, end); - ++spiltted; - tight = false; - goto next_part; - } - if (map->m_flags & EROFS_MAP_FRAGMENT) { - unsigned int pageofs, skip, len; - - if (offset > map->m_la) { - pageofs = 0; - skip = offset - map->m_la; - } else { - pageofs = map->m_la & ~PAGE_MASK; - skip = 0; - } - len = min_t(unsigned int, map->m_llen - skip, end - cur); - err = z_erofs_read_fragment(inode, skip, page, pageofs, len); - if (err) - goto out; - ++spiltted; - tight = false; - goto next_part; - } - - exclusive = (!cur && (!spiltted || tight)); + exclusive = (!cur && ((split <= 1) || (tight && bs == PAGE_SIZE))); if (cur) tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED); -retry: err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) { .page = page, .offset = offset - map->m_la, .end = end, }), exclusive); - /* should allocate an additional short-lived page for bvset */ - if (err == -EAGAIN && !fe->candidate_bvpage) { - fe->candidate_bvpage = alloc_page(GFP_NOFS | __GFP_NOFAIL); - set_page_private(fe->candidate_bvpage, - Z_EROFS_SHORTLIVED_PAGE); - goto retry; - } - - if (err) { - DBG_BUGON(err == -EAGAIN && fe->candidate_bvpage); + if (err) goto out; - } z_erofs_onlinepage_split(page); - /* bump up the number of spiltted parts of a page */ - ++spiltted; if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) fe->pcl->multibases = true; if (fe->pcl->length < offset + end - map->m_la) { @@ -1071,12 +1072,12 @@ out: z_erofs_page_mark_eio(page); z_erofs_onlinepage_endio(page); - erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu", - __func__, page, spiltted, map->m_llen); + erofs_dbg("%s, finish page: %pK split: %u map->m_llen %llu", + __func__, page, split, map->m_llen); return err; } -static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi, +static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi, unsigned int readahead_pages) { /* auto: enable for read_folio, disable for readahead */ @@ -1256,8 +1257,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, struct erofs_sb_info *const sbi = EROFS_SB(be->sb); struct z_erofs_pcluster *pcl = be->pcl; unsigned int pclusterpages = z_erofs_pclusterpages(pcl); - unsigned int i, inputsize; - int err2; + const struct z_erofs_decompressor *decompressor = + &erofs_decompressors[pcl->algorithmformat]; + int i, err2; struct page *page; bool overlapped; @@ -1294,18 +1296,13 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, if (err) goto out; - if (z_erofs_is_inline_pcluster(pcl)) - inputsize = pcl->tailpacking_size; - else - inputsize = pclusterpages * PAGE_SIZE; - - err = z_erofs_decompress(&(struct z_erofs_decompress_req) { + err = decompressor->decompress(&(struct z_erofs_decompress_req) { .sb = be->sb, .in = be->compressed_pages, .out = be->decompressed_pages, .pageofs_in = pcl->pageofs_in, .pageofs_out = pcl->pageofs_out, - .inputsize = inputsize, + .inputsize = pcl->pclustersize, .outputsize = pcl->length, .alg = pcl->algorithmformat, .inplace_io = overlapped, @@ -1321,12 +1318,11 @@ out: put_page(page); } else { for (i = 0; i < pclusterpages; ++i) { - page = pcl->compressed_bvecs[i].page; + /* consider shortlived pages added when decompressing */ + page = be->compressed_pages[i]; if (erofs_page_is_managed(sbi, page)) continue; - - /* recycle all individual short-lived pages */ (void)z_erofs_put_shortlivedpage(be->pagepool, page); WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); } @@ -1446,86 +1442,85 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, z_erofs_decompressqueue_work(&io->u.work); } -static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, - unsigned int nr, - struct page **pagepool, - struct address_space *mc) +static void z_erofs_fill_bio_vec(struct bio_vec *bvec, + struct z_erofs_decompress_frontend *f, + struct z_erofs_pcluster *pcl, + unsigned int nr, + struct address_space *mc) { - const pgoff_t index = pcl->obj.index; gfp_t gfp = mapping_gfp_mask(mc); bool tocache = false; - + struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr; struct address_space *mapping; - struct page *oldpage, *page; - int justfound; + struct page *page, *oldpage; + int justfound, bs = i_blocksize(f->inode); + /* Except for inplace pages, the entire page can be used for I/Os */ + bvec->bv_offset = 0; + bvec->bv_len = PAGE_SIZE; repeat: - page = READ_ONCE(pcl->compressed_bvecs[nr].page); - oldpage = page; - - if (!page) + oldpage = READ_ONCE(zbv->page); + if (!oldpage) goto out_allocpage; - justfound = (unsigned long)page & 1UL; - page = (struct page *)((unsigned long)page & ~1UL); + justfound = (unsigned long)oldpage & 1UL; + page = (struct page *)((unsigned long)oldpage & ~1UL); + bvec->bv_page = page; + DBG_BUGON(z_erofs_is_shortlived_page(page)); /* - * preallocated cached pages, which is used to avoid direct reclaim - * otherwise, it will go inplace I/O path instead. + * Handle preallocated cached pages. We tried to allocate such pages + * without triggering direct reclaim. If allocation failed, inplace + * file-backed pages will be used instead. */ if (page->private == Z_EROFS_PREALLOCATED_PAGE) { - WRITE_ONCE(pcl->compressed_bvecs[nr].page, page); set_page_private(page, 0); + WRITE_ONCE(zbv->page, page); tocache = true; goto out_tocache; } + mapping = READ_ONCE(page->mapping); - /* - * file-backed online pages in plcuster are all locked steady, - * therefore it is impossible for `mapping' to be NULL. + * File-backed pages for inplace I/Os are all locked steady, + * therefore it is impossible for `mapping` to be NULL. */ - if (mapping && mapping != mc) - /* ought to be unmanaged pages */ - goto out; - - /* directly return for shortlived page as well */ - if (z_erofs_is_shortlived_page(page)) - goto out; + if (mapping && mapping != mc) { + if (zbv->offset < 0) + bvec->bv_offset = round_up(-zbv->offset, bs); + bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset; + return; + } lock_page(page); - /* only true if page reclaim goes wrong, should never happen */ DBG_BUGON(justfound && PagePrivate(page)); - /* the page is still in manage cache */ + /* the cached page is still in managed cache */ if (page->mapping == mc) { - WRITE_ONCE(pcl->compressed_bvecs[nr].page, page); - + WRITE_ONCE(zbv->page, page); + /* + * The cached page is still available but without a valid + * `->private` pcluster hint. Let's reconnect them. + */ if (!PagePrivate(page)) { - /* - * impossible to be !PagePrivate(page) for - * the current restriction as well if - * the page is already in compressed_bvecs[]. - */ DBG_BUGON(!justfound); - - justfound = 0; - set_page_private(page, (unsigned long)pcl); - SetPagePrivate(page); + /* compressed_bvecs[] already takes a ref */ + attach_page_private(page, pcl); + put_page(page); } - /* no need to submit io if it is already up-to-date */ + /* no need to submit if it is already up-to-date */ if (PageUptodate(page)) { unlock_page(page); - page = NULL; + bvec->bv_page = NULL; } - goto out; + return; } /* - * the managed page has been truncated, it's unsafe to - * reuse this one, let's allocate a new cache-managed page. + * It has been truncated, so it's unsafe to reuse this one. Let's + * allocate a new page for compressed data. */ DBG_BUGON(page->mapping); DBG_BUGON(!justfound); @@ -1534,25 +1529,23 @@ repeat: unlock_page(page); put_page(page); out_allocpage: - page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL); - if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page, - oldpage, page)) { - erofs_pagepool_add(pagepool, page); + page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); + if (oldpage != cmpxchg(&zbv->page, oldpage, page)) { + erofs_pagepool_add(&f->pagepool, page); cond_resched(); goto repeat; } + bvec->bv_page = page; out_tocache: - if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) { - /* turn into temporary page if fails (1 ref) */ + if (!tocache || bs != PAGE_SIZE || + add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) { + /* turn into a temporary shortlived page (1 ref) */ set_page_private(page, Z_EROFS_SHORTLIVED_PAGE); - goto out; + return; } attach_page_private(page, pcl); - /* drop a refcount added by allocpage (then we have 2 refs here) */ + /* drop a refcount added by allocpage (then 2 refs in total here) */ put_page(page); - -out: /* the only exit (for tracing and debugging) */ - return page; } static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb, @@ -1607,7 +1600,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, qtail[JQ_BYPASS] = &pcl->next; } -static void z_erofs_decompressqueue_endio(struct bio *bio) +static void z_erofs_submissionqueue_endio(struct bio *bio) { struct z_erofs_decompressqueue *q = bio->bi_private; blk_status_t err = bio->bi_status; @@ -1619,7 +1612,6 @@ static void z_erofs_decompressqueue_endio(struct bio *bio) DBG_BUGON(PageUptodate(page)); DBG_BUGON(z_erofs_page_is_invalidated(page)); - if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { if (!err) SetPageUptodate(page); @@ -1633,9 +1625,8 @@ static void z_erofs_decompressqueue_endio(struct bio *bio) } static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, - struct page **pagepool, struct z_erofs_decompressqueue *fgq, - bool *force_fg) + bool *force_fg, bool readahead) { struct super_block *sb = f->inode->i_sb; struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb)); @@ -1643,17 +1634,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; z_erofs_next_pcluster_t owned_head = f->owned_head; /* bio is NULL initially, so no need to initialize last_{index,bdev} */ - pgoff_t last_index; + erofs_off_t last_pa; struct block_device *last_bdev; unsigned int nr_bios = 0; struct bio *bio = NULL; unsigned long pflags; int memstall = 0; - /* - * if managed cache is enabled, bypass jobqueue is needed, - * no need to read from device for all pclusters in this queue. - */ + /* No need to read from device for pclusters in the bypass queue. */ q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL); q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, force_fg); @@ -1666,7 +1654,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, do { struct erofs_map_dev mdev; struct z_erofs_pcluster *pcl; - pgoff_t cur, end; + erofs_off_t cur, end; + struct bio_vec bvec; unsigned int i = 0; bool bypass = true; @@ -1685,18 +1674,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, }; (void)erofs_map_dev(sb, &mdev); - cur = erofs_blknr(sb, mdev.m_pa); - end = cur + pcl->pclusterpages; - + cur = mdev.m_pa; + end = cur + pcl->pclustersize; do { - struct page *page; - - page = pickup_page_for_submission(pcl, i++, pagepool, - mc); - if (!page) + z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc); + if (!bvec.bv_page) continue; - if (bio && (cur != last_index + 1 || + if (bio && (cur != last_pa || last_bdev != mdev.m_bdev)) { submit_bio_retry: submit_bio(bio); @@ -1707,7 +1692,8 @@ submit_bio_retry: bio = NULL; } - if (unlikely(PageWorkingset(page)) && !memstall) { + if (unlikely(PageWorkingset(bvec.bv_page)) && + !memstall) { psi_memstall_enter(&pflags); memstall = 1; } @@ -1715,23 +1701,24 @@ submit_bio_retry: if (!bio) { bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, REQ_OP_READ, GFP_NOIO); - bio->bi_end_io = z_erofs_decompressqueue_endio; - - last_bdev = mdev.m_bdev; - bio->bi_iter.bi_sector = (sector_t)cur << - (sb->s_blocksize_bits - 9); + bio->bi_end_io = z_erofs_submissionqueue_endio; + bio->bi_iter.bi_sector = cur >> 9; bio->bi_private = q[JQ_SUBMIT]; - if (f->readahead) + if (readahead) bio->bi_opf |= REQ_RAHEAD; ++nr_bios; + last_bdev = mdev.m_bdev; } - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) + if (cur + bvec.bv_len > end) + bvec.bv_len = end - cur; + if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, + bvec.bv_offset)) goto submit_bio_retry; - last_index = cur; + last_pa = cur + bvec.bv_len; bypass = false; - } while (++cur < end); + } while ((cur += bvec.bv_len) < end); if (!bypass) qtail[JQ_SUBMIT] = &pcl->next; @@ -1757,16 +1744,16 @@ submit_bio_retry: } static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f, - struct page **pagepool, bool force_fg) + bool force_fg, bool ra) { struct z_erofs_decompressqueue io[NR_JOBQUEUES]; if (f->owned_head == Z_EROFS_PCLUSTER_TAIL) return; - z_erofs_submit_queue(f, pagepool, io, &force_fg); + z_erofs_submit_queue(f, io, &force_fg, ra); /* handle bypass queue (no i/o pclusters) immediately */ - z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool); + z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool); if (!force_fg) return; @@ -1775,7 +1762,7 @@ static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f, wait_for_completion_io(&io[JQ_SUBMIT].u.done); /* handle synchronous decompress queue in the caller context */ - z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool); + z_erofs_decompress_queue(&io[JQ_SUBMIT], &f->pagepool); } /* @@ -1783,29 +1770,28 @@ static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f, * approximate readmore strategies as a start. */ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, - struct readahead_control *rac, - erofs_off_t end, - struct page **pagepool, - bool backmost) + struct readahead_control *rac, bool backmost) { struct inode *inode = f->inode; struct erofs_map_blocks *map = &f->map; - erofs_off_t cur; + erofs_off_t cur, end, headoffset = f->headoffset; int err; if (backmost) { + if (rac) + end = headoffset + readahead_length(rac) - 1; + else + end = headoffset + PAGE_SIZE - 1; map->m_la = end; err = z_erofs_map_blocks_iter(inode, map, EROFS_GET_BLOCKS_READMORE); if (err) return; - /* expend ra for the trailing edge if readahead */ + /* expand ra for the trailing edge if readahead */ if (rac) { - loff_t newstart = readahead_pos(rac); - cur = round_up(map->m_la + map->m_llen, PAGE_SIZE); - readahead_expand(rac, newstart, cur - newstart); + readahead_expand(rac, headoffset, cur - headoffset); return; } end = round_up(end, PAGE_SIZE); @@ -1826,7 +1812,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, if (PageUptodate(page)) { unlock_page(page); } else { - err = z_erofs_do_read_page(f, page, pagepool); + err = z_erofs_do_read_page(f, page); if (err) erofs_err(inode->i_sb, "readmore error at page %lu @ nid %llu", @@ -1847,28 +1833,24 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) struct inode *const inode = page->mapping->host; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); - struct page *pagepool = NULL; int err; trace_erofs_readpage(page, false); f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT; - z_erofs_pcluster_readmore(&f, NULL, f.headoffset + PAGE_SIZE - 1, - &pagepool, true); - err = z_erofs_do_read_page(&f, page, &pagepool); - z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false); - - (void)z_erofs_collector_end(&f); + z_erofs_pcluster_readmore(&f, NULL, true); + err = z_erofs_do_read_page(&f, page); + z_erofs_pcluster_readmore(&f, NULL, false); + z_erofs_pcluster_end(&f); /* if some compressed cluster ready, need submit them anyway */ - z_erofs_runqueue(&f, &pagepool, - z_erofs_get_sync_decompress_policy(sbi, 0)); + z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false); if (err) erofs_err(inode->i_sb, "failed to read, err [%d]", err); erofs_put_metabuf(&f.map.buf); - erofs_release_pages(&pagepool); + erofs_release_pages(&f.pagepool); return err; } @@ -1877,14 +1859,12 @@ static void z_erofs_readahead(struct readahead_control *rac) struct inode *const inode = rac->mapping->host; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); - struct page *pagepool = NULL, *head = NULL, *page; + struct page *head = NULL, *page; unsigned int nr_pages; - f.readahead = true; f.headoffset = readahead_pos(rac); - z_erofs_pcluster_readmore(&f, rac, f.headoffset + - readahead_length(rac) - 1, &pagepool, true); + z_erofs_pcluster_readmore(&f, rac, true); nr_pages = readahead_count(rac); trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false); @@ -1900,20 +1880,19 @@ static void z_erofs_readahead(struct readahead_control *rac) /* traversal in reverse order */ head = (void *)page_private(page); - err = z_erofs_do_read_page(&f, page, &pagepool); + err = z_erofs_do_read_page(&f, page); if (err) erofs_err(inode->i_sb, "readahead error at page %lu @ nid %llu", page->index, EROFS_I(inode)->nid); put_page(page); } - z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false); - (void)z_erofs_collector_end(&f); + z_erofs_pcluster_readmore(&f, rac, false); + z_erofs_pcluster_end(&f); - z_erofs_runqueue(&f, &pagepool, - z_erofs_get_sync_decompress_policy(sbi, nr_pages)); + z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_pages), true); erofs_put_metabuf(&f.map.buf); - erofs_release_pages(&pagepool); + erofs_release_pages(&f.pagepool); } const struct address_space_operations z_erofs_aops = { diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 8973ccad707d..f5d3ba39dd42 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -101,29 +101,26 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, } static unsigned int decode_compactedbits(unsigned int lobits, - unsigned int lomask, u8 *in, unsigned int pos, u8 *type) { const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7); - const unsigned int lo = v & lomask; + const unsigned int lo = v & ((1 << lobits) - 1); *type = (v >> lobits) & 3; return lo; } -static int get_compacted_la_distance(unsigned int lclusterbits, +static int get_compacted_la_distance(unsigned int lobits, unsigned int encodebits, unsigned int vcnt, u8 *in, int i) { - const unsigned int lomask = (1 << lclusterbits) - 1; unsigned int lo, d1 = 0; u8 type; DBG_BUGON(i >= vcnt); do { - lo = decode_compactedbits(lclusterbits, lomask, - in, encodebits * i, &type); + lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) return d1; @@ -142,15 +139,14 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, { struct erofs_inode *const vi = EROFS_I(m->inode); const unsigned int lclusterbits = vi->z_logical_clusterbits; - const unsigned int lomask = (1 << lclusterbits) - 1; - unsigned int vcnt, base, lo, encodebits, nblk, eofs; + unsigned int vcnt, base, lo, lobits, encodebits, nblk, eofs; int i; u8 *in, type; bool big_pcluster; if (1 << amortizedshift == 4 && lclusterbits <= 14) vcnt = 2; - else if (1 << amortizedshift == 2 && lclusterbits == 12) + else if (1 << amortizedshift == 2 && lclusterbits <= 12) vcnt = 16; else return -EOPNOTSUPP; @@ -159,6 +155,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, m->nextpackoff = round_down(pos, vcnt << amortizedshift) + (vcnt << amortizedshift); big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; + lobits = max(lclusterbits, ilog2(Z_EROFS_VLE_DI_D0_CBLKCNT) + 1U); encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; eofs = erofs_blkoff(m->inode->i_sb, pos); base = round_down(eofs, vcnt << amortizedshift); @@ -166,15 +163,14 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, i = (eofs - base) >> amortizedshift; - lo = decode_compactedbits(lclusterbits, lomask, - in, encodebits * i, &type); + lo = decode_compactedbits(lobits, in, encodebits * i, &type); m->type = type; if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { m->clusterofs = 1 << lclusterbits; /* figure out lookahead_distance: delta[1] if needed */ if (lookahead) - m->delta[1] = get_compacted_la_distance(lclusterbits, + m->delta[1] = get_compacted_la_distance(lobits, encodebits, vcnt, in, i); if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { if (!big_pcluster) { @@ -193,8 +189,8 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, * of which lo saves delta[1] rather than delta[0]. * Hence, get delta[0] by the previous lcluster indirectly. */ - lo = decode_compactedbits(lclusterbits, lomask, - in, encodebits * (i - 1), &type); + lo = decode_compactedbits(lobits, in, + encodebits * (i - 1), &type); if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) lo = 0; else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) @@ -209,8 +205,8 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, nblk = 1; while (i > 0) { --i; - lo = decode_compactedbits(lclusterbits, lomask, - in, encodebits * i, &type); + lo = decode_compactedbits(lobits, in, + encodebits * i, &type); if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) i -= lo; @@ -221,8 +217,8 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, nblk = 0; while (i > 0) { --i; - lo = decode_compactedbits(lclusterbits, lomask, - in, encodebits * i, &type); + lo = decode_compactedbits(lobits, in, + encodebits * i, &type); if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { --i; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index eccecd3fac90..30217f0fed81 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -39,6 +39,8 @@ #include #include +#include + /* * LOCKING: * There are three level of locking required by epoll : @@ -1373,15 +1375,20 @@ static int ep_create_wakeup_source(struct epitem *epi) { struct name_snapshot n; struct wakeup_source *ws; + char ws_name[64]; + strlcpy(ws_name, "eventpoll", sizeof(ws_name)); + trace_android_vh_ep_create_wakeup_source(ws_name, sizeof(ws_name)); if (!epi->ep->ws) { - epi->ep->ws = wakeup_source_register(NULL, "eventpoll"); + epi->ep->ws = wakeup_source_register(NULL, ws_name); if (!epi->ep->ws) return -ENOMEM; } take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); - ws = wakeup_source_register(NULL, n.name.name); + strlcpy(ws_name, n.name.name, sizeof(ws_name)); + trace_android_vh_ep_create_wakeup_source(ws_name, sizeof(ws_name)); + ws = wakeup_source_register(NULL, ws_name); release_dentry_name_snapshot(&n); if (!ws) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5010a33acb8a..60708d47aaa8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2734,7 +2734,9 @@ recover_xnid: f2fs_update_inode_page(inode); /* 3: update and set xattr node page dirty */ - memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE); + if (page) + memcpy(F2FS_NODE(xpage), F2FS_NODE(page), + VALID_XATTR_BLOCK_SIZE); set_page_dirty(xpage); f2fs_put_page(xpage, 1); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index db3b641f2158..adaad16468d8 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -363,10 +363,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name); if (!*xe) { - f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + f2fs_err(F2FS_I_SB(inode), "lookup inode (%lu) has corrupted xattr", inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); - err = -EFSCORRUPTED; + err = -ENODATA; f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); goto out; @@ -583,13 +583,12 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if ((void *)(entry) + sizeof(__u32) > last_base_addr || (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) { - f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + f2fs_err(F2FS_I_SB(inode), "list inode (%lu) has corrupted xattr", inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); - error = -EFSCORRUPTED; f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); - goto cleanup; + break; } if (!handler || (handler->list && !handler->list(dentry))) @@ -650,7 +649,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (size > MAX_VALUE_LEN(inode)) return -E2BIG; - +retry: error = read_all_xattrs(inode, ipage, &base_addr); if (error) return error; @@ -660,7 +659,14 @@ static int __f2fs_setxattr(struct inode *inode, int index, /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name); if (!here) { - f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + if (!F2FS_I(inode)->i_xattr_nid) { + f2fs_notice(F2FS_I_SB(inode), + "recover xattr in inode (%lu)", inode->i_ino); + f2fs_recover_xattr_data(inode, NULL); + kfree(base_addr); + goto retry; + } + f2fs_err(F2FS_I_SB(inode), "set inode (%lu) has corrupted xattr", inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); error = -EFSCORRUPTED; diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c index e16457c75944..6ca74987f7da 100644 --- a/fs/fuse/backing.c +++ b/fs/fuse/backing.c @@ -1117,7 +1117,6 @@ int fuse_lookup_backing(struct fuse_bpf_args *fa, struct inode *dir, struct kstat stat; int err; - /* TODO this will not handle lookups over mount points */ inode_lock_nested(dir_backing_inode, I_MUTEX_PARENT); backing_entry = lookup_one_len(entry->d_name.name, dir_backing_entry, strlen(entry->d_name.name)); @@ -1136,16 +1135,22 @@ int fuse_lookup_backing(struct fuse_bpf_args *fa, struct inode *dir, return 0; } + err = follow_down(&fuse_entry->backing_path); + if (err) + goto err_out; + err = vfs_getattr(&fuse_entry->backing_path, &stat, STATX_BASIC_STATS, 0); - if (err) { - path_put_init(&fuse_entry->backing_path); - return err; - } + if (err) + goto err_out; fuse_stat_to_attr(get_fuse_conn(dir), backing_entry->d_inode, &stat, &feo->attr); return 0; + +err_out: + path_put_init(&fuse_entry->backing_path); + return err; } int fuse_handle_backing(struct fuse_entry_bpf *feb, struct inode **backing_inode, diff --git a/fs/timerfd.c b/fs/timerfd.c index e9c96a0c79f1..de8e736bbf7b 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -28,6 +28,8 @@ #include #include +#include + struct timerfd_ctx { union { struct hrtimer tmr; @@ -407,6 +409,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { int ufd; struct timerfd_ctx *ctx; + char file_name_buf[32]; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); @@ -443,7 +446,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) ctx->moffs = ktime_mono_to_real(0); - ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, + strlcpy(file_name_buf, "[timerfd]", sizeof(file_name_buf)); + trace_android_vh_timerfd_create(file_name_buf, sizeof(file_name_buf)); + ufd = anon_inode_getfd(file_name_buf, &timerfd_fops, ctx, O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); if (ufd < 0) kfree(ctx); @@ -451,7 +456,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) return ufd; } -static int do_timerfd_settime(int ufd, int flags, +static int do_timerfd_settime(int ufd, int flags, const struct itimerspec64 *new, struct itimerspec64 *old) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 320d3b287ed0..6a6ff277501d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2697,6 +2697,9 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old); + void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); diff --git a/include/linux/damon.h b/include/linux/damon.h index 620ada094c3b..35630634d790 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -357,6 +357,7 @@ struct damon_operations { * @after_wmarks_check: Called after each schemes' watermarks check. * @after_sampling: Called after each sampling. * @after_aggregation: Called after each aggregation. + * @before_damos_apply: Called before applying DAMOS action. * @before_terminate: Called before terminating the monitoring. * @private: User private data. * @@ -385,6 +386,10 @@ struct damon_callback { int (*after_wmarks_check)(struct damon_ctx *context); int (*after_sampling)(struct damon_ctx *context); int (*after_aggregation)(struct damon_ctx *context); + int (*before_damos_apply)(struct damon_ctx *context, + struct damon_target *target, + struct damon_region *region, + struct damos *scheme); void (*before_terminate)(struct damon_ctx *context); }; diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 0ff8ce8cd06a..7ccb05ba08ce 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -249,6 +249,8 @@ struct maple_tree { struct maple_tree name = MTREE_INIT(name, 0) #define mtree_lock(mt) spin_lock((&(mt)->ma_lock)) +#define mtree_lock_nested(mas, subclass) \ + spin_lock_nested((&(mt)->ma_lock), subclass) #define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock)) /* @@ -320,6 +322,9 @@ int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, gfp_t gfp); void *mtree_erase(struct maple_tree *mt, unsigned long index); +int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); +int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); + void mtree_destroy(struct maple_tree *mt); void __mt_destroy(struct maple_tree *mt); @@ -399,6 +404,8 @@ struct ma_wr_state { }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) +#define mas_lock_nested(mas, subclass) \ + spin_lock_nested(&((mas)->tree->ma_lock), subclass) #define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) @@ -525,6 +532,22 @@ static inline void mas_reset(struct ma_state *mas) */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) +/** + * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the + * current location. + * @mas: Maple Tree operation state. + * @start: New start of range in the Maple Tree. + * @last: New end of range in the Maple Tree. + * + * set the internal maple state values to a sub-range. + * Please use mas_set_range() if you do not know where you are in the tree. + */ +static inline void __mas_set_range(struct ma_state *mas, unsigned long start, + unsigned long last) +{ + mas->index = start; + mas->last = last; +} /** * mas_set_range() - Set up Maple Tree operation state for a different index. @@ -539,9 +562,8 @@ static inline void mas_reset(struct ma_state *mas) static inline void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { - mas->index = start; - mas->last = last; - mas->node = MAS_START; + __mas_set_range(mas, start, last); + mas->node = MAS_START; } /** diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 445494c502ba..49dc95d21f01 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -243,7 +243,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, break; case NFPROTO_BRIDGE: #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE - hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); + hook_head = rcu_dereference(get_nf_hooks_bridge(net)[hook]); #endif break; default: diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 8c3587d5c308..6641c4543d18 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -188,6 +188,36 @@ struct net { #endif } __randomize_layout; +/* + * To work around a KMI issue, hooks_bridge[] could not be + * added to struct netns_nf. Since the only use of netns_nf + * is embedded in struct net, struct ext_net is added to + * contain struct net plus the new field. Users of the new + * field must use get_nf_hooks_bridge() to access the field. + */ +struct ext_net { + struct net net; +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS]; +#endif + ANDROID_VENDOR_DATA(1); +}; + +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE +extern struct net init_net; +extern struct nf_hook_entries **init_nf_hooks_bridgep; + +static inline struct nf_hook_entries __rcu **get_nf_hooks_bridge(const struct net *net) +{ + struct ext_net *ext_net; + + if (net == &init_net) + return init_nf_hooks_bridgep; + ext_net = container_of(net, struct ext_net, net); + return ext_net->hooks_bridge; +} +#endif + #include /* Init's network namespace */ diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 3b7eb0cb1201..56c72117b5b3 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -22,9 +22,6 @@ struct netns_nf { #ifdef CONFIG_NETFILTER_FAMILY_ARP struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS]; #endif -#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE - struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS]; -#endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) unsigned int defrag_ipv4_users; #endif diff --git a/include/trace/hooks/dtask.h b/include/trace/hooks/dtask.h index a63a2868e626..b51147089b2d 100644 --- a/include/trace/hooks/dtask.h +++ b/include/trace/hooks/dtask.h @@ -42,6 +42,9 @@ DECLARE_HOOK(android_vh_rtmutex_wait_start, DECLARE_HOOK(android_vh_rtmutex_wait_finish, TP_PROTO(struct rt_mutex_base *lock), TP_ARGS(lock)); +DECLARE_HOOK(android_vh_rt_mutex_steal, + TP_PROTO(int waiter_prio, int top_waiter_prio, bool *ret), + TP_ARGS(waiter_prio, top_waiter_prio, ret)); DECLARE_HOOK(android_vh_rwsem_read_wait_start, TP_PROTO(struct rw_semaphore *sem), diff --git a/include/trace/hooks/fs.h b/include/trace/hooks/fs.h new file mode 100644 index 000000000000..bb8f177db5c1 --- /dev/null +++ b/include/trace/hooks/fs.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM fs + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH trace/hooks + +#if !defined(_TRACE_HOOK_FS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HOOK_FS_H + +#include + +DECLARE_HOOK(android_vh_ep_create_wakeup_source, + TP_PROTO(char *name, int len), + TP_ARGS(name, len)); + +DECLARE_HOOK(android_vh_timerfd_create, + TP_PROTO(char *name, int len), + TP_ARGS(name, len)); +#endif /* _TRACE_HOOK_FS_H */ + +/* This part must be outside protection */ +#include \ No newline at end of file diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h index 0bd0c34e17b9..50addc57dc10 100644 --- a/include/trace/hooks/mm.h +++ b/include/trace/hooks/mm.h @@ -76,6 +76,9 @@ struct slabinfo; DECLARE_HOOK(android_vh_cache_show, TP_PROTO(struct seq_file *m, struct slabinfo *sinfo, struct kmem_cache *s), TP_ARGS(m, sinfo, s)); +DECLARE_HOOK(android_vh_read_pages, + TP_PROTO(struct readahead_control *ractl), + TP_ARGS(ractl)); DECLARE_HOOK(android_vh_alloc_pages_reclaim_bypass, TP_PROTO(gfp_t gfp_mask, int order, int alloc_flags, int migratetype, struct page **page), diff --git a/include/trace/hooks/net.h b/include/trace/hooks/net.h index 50988f672216..835943c31f3d 100644 --- a/include/trace/hooks/net.h +++ b/include/trace/hooks/net.h @@ -25,6 +25,13 @@ DECLARE_RESTRICTED_HOOK(android_rvh_sk_alloc, DECLARE_RESTRICTED_HOOK(android_rvh_sk_free, TP_PROTO(struct sock *sock), TP_ARGS(sock), 1); +struct poll_table_struct; +typedef struct poll_table_struct poll_table; +DECLARE_HOOK(android_vh_netlink_poll, + TP_PROTO(struct file *file, struct socket *sock, poll_table *wait, + __poll_t *mask), + TP_ARGS(file, sock, wait, mask)); + /* macro versions of hooks are no longer required */ #endif /* _TRACE_HOOK_NET_VH_H */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f3af16ce1f64..50a59769828a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -567,7 +567,8 @@ * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC * (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID, * %NL80211_ATTR_FILS_CACHE_ID, and %NL80211_ATTR_PMKID in case of FILS - * authentication. + * authentication. Additionally in case of SAE offload and OWE offloads + * PMKSA entry can be deleted using %NL80211_ATTR_SSID. * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries. * * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain diff --git a/init/Kconfig.gki b/init/Kconfig.gki index 081b1cdc9c7e..1a17a3d6e27b 100644 --- a/init/Kconfig.gki +++ b/init/Kconfig.gki @@ -202,6 +202,7 @@ config GKI_HIDDEN_NET_CONFIGS select PAGE_POOL select NET_PTP_CLASSIFY select NET_DEVLINK + select NETFILTER_FAMILY_BRIDGE help Dummy config option used to enable the networking hidden config, required by various SoC platforms. diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 882bd56b01ed..ea2c2ded4e41 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -51,7 +51,6 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { - struct io_sq_data *sq = NULL; struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings; unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; @@ -62,6 +61,7 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, unsigned int cq_shift = 0; unsigned int sq_shift = 0; unsigned int sq_entries, cq_entries; + int sq_pid = -1, sq_cpu = -1; bool has_lock; unsigned int i; @@ -139,13 +139,19 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, has_lock = mutex_trylock(&ctx->uring_lock); if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { - sq = ctx->sq_data; - if (!sq->thread) - sq = NULL; + struct io_sq_data *sq = ctx->sq_data; + + if (mutex_trylock(&sq->lock)) { + if (sq->thread) { + sq_pid = task_pid_nr(sq->thread); + sq_cpu = task_cpu(sq->thread); + } + mutex_unlock(&sq->lock); + } } - seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1); - seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1); + seq_printf(m, "SqThread:\t%d\n", sq_pid); + seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu); seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); for (i = 0; has_lock && i < ctx->nr_user_files; i++) { struct file *f = io_file_from_index(&ctx->file_table, i); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 832b2659e96e..00f23febb9a7 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -997,11 +997,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map, mutex_unlock(&aux->poke_mutex); } +void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + WARN_ON_ONCE(1); +} + static void prog_array_map_poke_run(struct bpf_map *map, u32 key, struct bpf_prog *old, struct bpf_prog *new) { - u8 *old_addr, *new_addr, *old_bypass_addr; struct prog_poke_elem *elem; struct bpf_array_aux *aux; @@ -1010,7 +1015,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, list_for_each_entry(elem, &aux->poke_progs, list) { struct bpf_jit_poke_descriptor *poke; - int i, ret; + int i; for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; @@ -1029,21 +1034,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, * activated, so tail call updates can arrive from here * while JIT is still finishing its final fixup for * non-activated poke entries. - * 3) On program teardown, the program's kallsym entry gets - * removed out of RCU callback, but we can only untrack - * from sleepable context, therefore bpf_arch_text_poke() - * might not see that this is in BPF text section and - * bails out with -EINVAL. As these are unreachable since - * RCU grace period already passed, we simply skip them. - * 4) Also programs reaching refcount of zero while patching + * 3) Also programs reaching refcount of zero while patching * is in progress is okay since we're protected under * poke_mutex and untrack the programs before the JIT - * buffer is freed. When we're still in the middle of - * patching and suddenly kallsyms entry of the program - * gets evicted, we just skip the rest which is fine due - * to point 3). - * 5) Any other error happening below from bpf_arch_text_poke() - * is a unexpected bug. + * buffer is freed. */ if (!READ_ONCE(poke->tailcall_target_stable)) continue; @@ -1053,39 +1047,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, poke->tail_call.key != key) continue; - old_bypass_addr = old ? NULL : poke->bypass_addr; - old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; - new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; - - if (new) { - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, new_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - if (!old) { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - poke->bypass_addr, - NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } - } else { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - old_bypass_addr, - poke->bypass_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - /* let other CPUs finish the execution of program - * so that it will not possible to expose them - * to invalid nop, stack unwind, nop state - */ - if (!ret) - synchronize_rcu(); - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } + bpf_arch_poke_desc_update(poke, new, old); } } } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 71bb2e3440e2..09ca202ac480 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -43,6 +43,7 @@ u64 dma_direct_get_required_mask(struct device *dev) return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; } +EXPORT_SYMBOL_GPL(dma_direct_get_required_mask); static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, u64 *phys_limit) @@ -320,6 +321,7 @@ out_free_pages: __dma_direct_free_pages(dev, page, size); return NULL; } +EXPORT_SYMBOL_GPL(dma_direct_alloc); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) @@ -365,6 +367,7 @@ void dma_direct_free(struct device *dev, size_t size, __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size); } +EXPORT_SYMBOL_GPL(dma_direct_free); struct page *dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index af4a6ef48ce0..e28e1e17eaf5 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -27,6 +27,7 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); return ret; } +EXPORT_SYMBOL_GPL(dma_common_get_sgtable); /* * Create userspace mapping for the DMA-coherent memory. @@ -57,6 +58,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return -ENXIO; #endif /* CONFIG_MMU */ } +EXPORT_SYMBOL_GPL(dma_common_mmap); struct page *dma_common_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) diff --git a/kernel/fork.c b/kernel/fork.c index 9ef103c05891..1109a10c5ccd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -659,7 +659,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, int retval; unsigned long charge = 0; LIST_HEAD(uf); - MA_STATE(old_mas, &oldmm->mm_mt, 0, 0); MA_STATE(mas, &mm->mm_mt, 0, 0); uprobe_start_dup_mmap(); @@ -687,16 +686,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; khugepaged_fork(mm, oldmm); - retval = mas_expected_entries(&mas, oldmm->map_count); - if (retval) + /* Use __mt_dup() to efficiently build an identical maple tree. */ + retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL); + if (unlikely(retval)) goto out; mt_clear_in_rcu(mas.tree); - mas_for_each(&old_mas, mpnt, ULONG_MAX) { + mas_for_each(&mas, mpnt, ULONG_MAX) { struct file *file; vma_start_write(mpnt); if (mpnt->vm_flags & VM_DONTCOPY) { + __mas_set_range(&mas, mpnt->vm_start, mpnt->vm_end - 1); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + if (unlikely(mas_is_err(&mas))) { + retval = -ENOMEM; + goto loop_out; + } vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); continue; } @@ -758,12 +764,13 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (is_vm_hugetlb_page(tmp)) hugetlb_dup_vma_private(tmp); - /* Link the vma into the MT */ + /* + * Link the vma into the MT. After using __mt_dup(), memory + * allocation is not necessary here, so it cannot fail. + */ mas.index = tmp->vm_start; mas.last = tmp->vm_end - 1; mas_store(&mas, tmp); - if (mas_is_err(&mas)) - goto fail_nomem_mas_store; mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) @@ -772,15 +779,28 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); - if (retval) + if (retval) { + mpnt = mas_find(&mas, ULONG_MAX); goto loop_out; + } } /* a new mm has just been created */ retval = arch_dup_mmap(oldmm, mm); loop_out: mas_destroy(&mas); - if (!retval) + if (!retval) { mt_set_in_rcu(mas.tree); + } else if (mpnt) { + /* + * The entire maple tree has already been duplicated. If the + * mmap duplication fails, mark the failure point with + * XA_ZERO_ENTRY. In exit_mmap(), if this marker is encountered, + * stop releasing VMAs that have not been duplicated after this + * point. + */ + mas_set_range(&mas, mpnt->vm_start, mpnt->vm_end - 1); + mas_store(&mas, XA_ZERO_ENTRY); + } out: mmap_write_unlock(mm); flush_tlb_mm(oldmm); @@ -790,8 +810,6 @@ fail_uprobe_end: uprobe_end_dup_mmap(); return retval; -fail_nomem_mas_store: - unlink_anon_vmas(tmp); fail_nomem_anon_vma_fork: mpol_put(vma_policy(tmp)); fail_nomem_policy: diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 351716fe9138..8207fdade4a8 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -391,9 +391,15 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, struct rt_mutex_waiter *top_waiter) { + bool ret = false; + if (rt_mutex_waiter_less(waiter, top_waiter)) return true; + trace_android_vh_rt_mutex_steal(waiter->prio, top_waiter->prio, &ret); + if (ret) + return true; + #ifdef RT_MUTEX_BUILD_SPINLOCKS /* * Note that RT tasks are excluded from same priority (lateral) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8a1564c2268f..fc3f3dad20c3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7837,6 +7837,7 @@ change: if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { __setscheduler_params(p, attr); __setscheduler_prio(p, newprio); + trace_android_rvh_setscheduler(p); } __setscheduler_uclamp(p, attr); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 507d1fc4e163..ac870e416e2a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -96,6 +96,7 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) */ unsigned int sysctl_sched_min_granularity = 750000ULL; +EXPORT_SYMBOL_GPL(sysctl_sched_min_granularity); static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; /* @@ -105,6 +106,7 @@ static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; * (default: 0.75 msec) */ unsigned int sysctl_sched_idle_min_granularity = 750000ULL; +EXPORT_SYMBOL_GPL(sysctl_sched_idle_min_granularity); /* * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 826f7b8d5e05..1200ff73c1b0 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4,6 +4,8 @@ * Copyright (c) 2018-2022 Oracle Corporation * Authors: Liam R. Howlett * Matthew Wilcox + * Copyright (c) 2023 ByteDance + * Author: Peng Zhang */ /* @@ -158,6 +160,11 @@ static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); } +static inline void mt_free_one(struct maple_node *node) +{ + kmem_cache_free(maple_node_cache, node); +} + static inline void mt_free_bulk(size_t size, void __rcu **nodes) { kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); @@ -199,6 +206,11 @@ static unsigned int mas_mt_height(struct ma_state *mas) return mt_height(mas->tree); } +static inline unsigned int mt_attr(struct maple_tree *mt) +{ + return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; +} + static inline enum maple_type mte_node_type(const struct maple_enode *entry) { return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & @@ -5702,7 +5714,7 @@ void mas_destroy(struct ma_state *mas) mt_free_bulk(count, (void __rcu **)&node->slot[1]); total -= count; } - kmem_cache_free(maple_node_cache, node); + mt_free_one(ma_mnode_ptr(node)); total--; } @@ -6527,6 +6539,278 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index) } EXPORT_SYMBOL(mtree_erase); +/* + * mas_dup_free() - Free an incomplete duplication of a tree. + * @mas: The maple state of a incomplete tree. + * + * The parameter @mas->node passed in indicates that the allocation failed on + * this node. This function frees all nodes starting from @mas->node in the + * reverse order of mas_dup_build(). There is no need to hold the source tree + * lock at this time. + */ +static void mas_dup_free(struct ma_state *mas) +{ + struct maple_node *node; + enum maple_type type; + void __rcu **slots; + unsigned char count, i; + + /* Maybe the first node allocation failed. */ + if (mas_is_none(mas)) + return; + + while (!mte_is_root(mas->node)) { + mas_ascend(mas); + if (mas->offset) { + mas->offset--; + do { + mas_descend(mas); + mas->offset = mas_data_end(mas); + } while (!mte_is_leaf(mas->node)); + + mas_ascend(mas); + } + + node = mte_to_node(mas->node); + type = mte_node_type(mas->node); + slots = ma_slots(node, type); + count = mas_data_end(mas) + 1; + for (i = 0; i < count; i++) + ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK; + mt_free_bulk(count, slots); + } + + node = mte_to_node(mas->node); + mt_free_one(node); +} + +/* + * mas_copy_node() - Copy a maple node and replace the parent. + * @mas: The maple state of source tree. + * @new_mas: The maple state of new tree. + * @parent: The parent of the new node. + * + * Copy @mas->node to @new_mas->node, set @parent to be the parent of + * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM. + */ +static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas, + struct maple_pnode *parent) +{ + struct maple_node *node = mte_to_node(mas->node); + struct maple_node *new_node = mte_to_node(new_mas->node); + unsigned long val; + + /* Copy the node completely. */ + memcpy(new_node, node, sizeof(struct maple_node)); + /* Update the parent node pointer. */ + val = (unsigned long)node->parent & MAPLE_NODE_MASK; + new_node->parent = ma_parent_ptr(val | (unsigned long)parent); +} + +/* + * mas_dup_alloc() - Allocate child nodes for a maple node. + * @mas: The maple state of source tree. + * @new_mas: The maple state of new tree. + * @gfp: The GFP_FLAGS to use for allocations. + * + * This function allocates child nodes for @new_mas->node during the duplication + * process. If memory allocation fails, @mas is set to -ENOMEM. + */ +static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, + gfp_t gfp) +{ + struct maple_node *node = mte_to_node(mas->node); + struct maple_node *new_node = mte_to_node(new_mas->node); + enum maple_type type; + unsigned char request, count, i; + void __rcu **slots; + void __rcu **new_slots; + unsigned long val; + + /* Allocate memory for child nodes. */ + type = mte_node_type(mas->node); + new_slots = ma_slots(new_node, type); + request = mas_data_end(mas) + 1; + count = mt_alloc_bulk(gfp, request, (void **)new_slots); + if (unlikely(count < request)) { + memset(new_slots, 0, request * sizeof(void *)); + mas_set_err(mas, -ENOMEM); + return; + } + + /* Restore node type information in slots. */ + slots = ma_slots(node, type); + for (i = 0; i < count; i++) { + val = (unsigned long)mt_slot_locked(mas->tree, slots, i); + val &= MAPLE_NODE_MASK; + ((unsigned long *)new_slots)[i] |= val; + } +} + +/* + * mas_dup_build() - Build a new maple tree from a source tree + * @mas: The maple state of source tree, need to be in MAS_START state. + * @new_mas: The maple state of new tree, need to be in MAS_START state. + * @gfp: The GFP_FLAGS to use for allocations. + * + * This function builds a new tree in DFS preorder. If the memory allocation + * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the + * last node. mas_dup_free() will free the incomplete duplication of a tree. + * + * Note that the attributes of the two trees need to be exactly the same, and the + * new tree needs to be empty, otherwise -EINVAL will be set in @mas. + */ +static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, + gfp_t gfp) +{ + struct maple_node *node; + struct maple_pnode *parent = NULL; + struct maple_enode *root; + enum maple_type type; + + if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) || + unlikely(!mtree_empty(new_mas->tree))) { + mas_set_err(mas, -EINVAL); + return; + } + + root = mas_start(mas); + if (mas_is_ptr(mas) || mas_is_none(mas)) + goto set_new_tree; + + node = mt_alloc_one(gfp); + if (!node) { + new_mas->node = MAS_NONE; + mas_set_err(mas, -ENOMEM); + return; + } + + type = mte_node_type(mas->node); + root = mt_mk_node(node, type); + new_mas->node = root; + new_mas->min = 0; + new_mas->max = ULONG_MAX; + root = mte_mk_root(root); + while (1) { + mas_copy_node(mas, new_mas, parent); + if (!mte_is_leaf(mas->node)) { + /* Only allocate child nodes for non-leaf nodes. */ + mas_dup_alloc(mas, new_mas, gfp); + if (unlikely(mas_is_err(mas))) + return; + } else { + /* + * This is the last leaf node and duplication is + * completed. + */ + if (mas->max == ULONG_MAX) + goto done; + + /* This is not the last leaf node and needs to go up. */ + do { + mas_ascend(mas); + mas_ascend(new_mas); + } while (mas->offset == mas_data_end(mas)); + + /* Move to the next subtree. */ + mas->offset++; + new_mas->offset++; + } + + mas_descend(mas); + parent = ma_parent_ptr(mte_to_node(new_mas->node)); + mas_descend(new_mas); + mas->offset = 0; + new_mas->offset = 0; + } +done: + /* Specially handle the parent of the root node. */ + mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas)); +set_new_tree: + /* Make them the same height */ + new_mas->tree->ma_flags = mas->tree->ma_flags; + rcu_assign_pointer(new_mas->tree->ma_root, root); +} + +/** + * __mt_dup(): Duplicate an entire maple tree + * @mt: The source maple tree + * @new: The new maple tree + * @gfp: The GFP_FLAGS to use for allocations + * + * This function duplicates a maple tree in Depth-First Search (DFS) pre-order + * traversal. It uses memcpy() to copy nodes in the source tree and allocate + * new child nodes in non-leaf nodes. The new node is exactly the same as the + * source node except for all the addresses stored in it. It will be faster than + * traversing all elements in the source tree and inserting them one by one into + * the new tree. + * The user needs to ensure that the attributes of the source tree and the new + * tree are the same, and the new tree needs to be an empty tree, otherwise + * -EINVAL will be returned. + * Note that the user needs to manually lock the source tree and the new tree. + * + * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If + * the attributes of the two trees are different or the new tree is not an empty + * tree. + */ +int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) +{ + int ret = 0; + MA_STATE(mas, mt, 0, 0); + MA_STATE(new_mas, new, 0, 0); + + mas_dup_build(&mas, &new_mas, gfp); + if (unlikely(mas_is_err(&mas))) { + ret = xa_err(mas.node); + if (ret == -ENOMEM) + mas_dup_free(&new_mas); + } + + return ret; +} +EXPORT_SYMBOL(__mt_dup); + +/** + * mtree_dup(): Duplicate an entire maple tree + * @mt: The source maple tree + * @new: The new maple tree + * @gfp: The GFP_FLAGS to use for allocations + * + * This function duplicates a maple tree in Depth-First Search (DFS) pre-order + * traversal. It uses memcpy() to copy nodes in the source tree and allocate + * new child nodes in non-leaf nodes. The new node is exactly the same as the + * source node except for all the addresses stored in it. It will be faster than + * traversing all elements in the source tree and inserting them one by one into + * the new tree. + * The user needs to ensure that the attributes of the source tree and the new + * tree are the same, and the new tree needs to be an empty tree, otherwise + * -EINVAL will be returned. + * + * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If + * the attributes of the two trees are different or the new tree is not an empty + * tree. + */ +int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) +{ + int ret = 0; + MA_STATE(mas, mt, 0, 0); + MA_STATE(new_mas, new, 0, 0); + + mas_lock(&new_mas); + mas_lock_nested(&mas, SINGLE_DEPTH_NESTING); + mas_dup_build(&mas, &new_mas, gfp); + mas_unlock(&mas); + if (unlikely(mas_is_err(&mas))) { + ret = xa_err(mas.node); + if (ret == -ENOMEM) + mas_dup_free(&new_mas); + } + + mas_unlock(&new_mas); + return ret; +} +EXPORT_SYMBOL(mtree_dup); + /** * __mt_destroy() - Walk and free all nodes of a locked maple tree. * @mt: The maple tree @@ -6541,7 +6825,7 @@ void __mt_destroy(struct maple_tree *mt) if (xa_is_node(root)) mte_destroy_walk(root, mt); - mt->ma_flags = 0; + mt->ma_flags = mt_attr(mt); } EXPORT_SYMBOL_GPL(__mt_destroy); diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index ab9d4461abc9..68b2c387fddb 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -1671,47 +1671,48 @@ static noinline void __init bench_mt_for_each(struct maple_tree *mt) #endif /* check_forking - simulate the kernel forking sequence with the tree. */ -static noinline void __init check_forking(struct maple_tree *mt) +static noinline void __init check_forking(void) { - - struct maple_tree newmt; - int i, nr_entries = 134; + struct maple_tree mt, newmt; + int i, nr_entries = 134, ret; void *val; - MA_STATE(mas, mt, 0, 0); - MA_STATE(newmas, mt, 0, 0); - struct rw_semaphore newmt_lock; + MA_STATE(mas, &mt, 0, 0); + MA_STATE(newmas, &newmt, 0, 0); + struct rw_semaphore mt_lock, newmt_lock; + init_rwsem(&mt_lock); init_rwsem(&newmt_lock); - for (i = 0; i <= nr_entries; i++) - mtree_store_range(mt, i*10, i*10 + 5, - xa_mk_value(i), GFP_KERNEL); + mt_init_flags(&mt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&mt, &mt_lock); - mt_set_non_kernel(99999); mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); mt_set_external_lock(&newmt, &newmt_lock); - newmas.tree = &newmt; - mas_reset(&newmas); - mas_reset(&mas); - down_write(&newmt_lock); - mas.index = 0; - mas.last = 0; - if (mas_expected_entries(&newmas, nr_entries)) { + + down_write(&mt_lock); + for (i = 0; i <= nr_entries; i++) { + mas_set_range(&mas, i*10, i*10 + 5); + mas_store_gfp(&mas, xa_mk_value(i), GFP_KERNEL); + } + + down_write_nested(&newmt_lock, SINGLE_DEPTH_NESTING); + ret = __mt_dup(&mt, &newmt, GFP_KERNEL); + if (ret) { pr_err("OOM!"); BUG_ON(1); } - rcu_read_lock(); - mas_for_each(&mas, val, ULONG_MAX) { - newmas.index = mas.index; - newmas.last = mas.last; + + mas_set(&newmas, 0); + mas_for_each(&newmas, val, ULONG_MAX) mas_store(&newmas, val); - } - rcu_read_unlock(); + mas_destroy(&newmas); + mas_destroy(&mas); mt_validate(&newmt); - mt_set_non_kernel(0); __mt_destroy(&newmt); + __mt_destroy(&mt); up_write(&newmt_lock); + up_write(&mt_lock); } static noinline void __init check_iteration(struct maple_tree *mt) @@ -1815,49 +1816,51 @@ static noinline void __init check_mas_store_gfp(struct maple_tree *mt) } #if defined(BENCH_FORK) -static noinline void __init bench_forking(struct maple_tree *mt) +static noinline void __init bench_forking(void) { - - struct maple_tree newmt; - int i, nr_entries = 134, nr_fork = 80000; + struct maple_tree mt, newmt; + int i, nr_entries = 134, nr_fork = 80000, ret; void *val; - MA_STATE(mas, mt, 0, 0); - MA_STATE(newmas, mt, 0, 0); - struct rw_semaphore newmt_lock; + MA_STATE(mas, &mt, 0, 0); + MA_STATE(newmas, &newmt, 0, 0); + struct rw_semaphore mt_lock, newmt_lock; + init_rwsem(&mt_lock); init_rwsem(&newmt_lock); - mt_set_external_lock(&newmt, &newmt_lock); - for (i = 0; i <= nr_entries; i++) - mtree_store_range(mt, i*10, i*10 + 5, - xa_mk_value(i), GFP_KERNEL); + mt_init_flags(&mt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&mt, &mt_lock); + + down_write(&mt_lock); + for (i = 0; i <= nr_entries; i++) { + mas_set_range(&mas, i*10, i*10 + 5); + mas_store_gfp(&mas, xa_mk_value(i), GFP_KERNEL); + } for (i = 0; i < nr_fork; i++) { - mt_set_non_kernel(99999); - mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE); - newmas.tree = &newmt; - mas_reset(&newmas); - mas_reset(&mas); - mas.index = 0; - mas.last = 0; - rcu_read_lock(); - down_write(&newmt_lock); - if (mas_expected_entries(&newmas, nr_entries)) { - printk("OOM!"); + mt_init_flags(&newmt, + MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&newmt, &newmt_lock); + + down_write_nested(&newmt_lock, SINGLE_DEPTH_NESTING); + ret = __mt_dup(&mt, &newmt, GFP_KERNEL); + if (ret) { + pr_err("OOM!"); BUG_ON(1); } - mas_for_each(&mas, val, ULONG_MAX) { - newmas.index = mas.index; - newmas.last = mas.last; + + mas_set(&newmas, 0); + mas_for_each(&newmas, val, ULONG_MAX) mas_store(&newmas, val); - } + mas_destroy(&newmas); - rcu_read_unlock(); mt_validate(&newmt); - mt_set_non_kernel(0); __mt_destroy(&newmt); up_write(&newmt_lock); } + mas_destroy(&mas); + __mt_destroy(&mt); + up_write(&mt_lock); } #endif @@ -2741,10 +2744,6 @@ static int __init maple_tree_seed(void) pr_info("\nTEST STARTING\n\n"); - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); - check_root_expand(&tree); - mtree_destroy(&tree); - #if defined(BENCH_SLOT_STORE) #define BENCH mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); @@ -2775,9 +2774,7 @@ static int __init maple_tree_seed(void) #endif #if defined(BENCH_FORK) #define BENCH - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); - bench_forking(&tree); - mtree_destroy(&tree); + bench_forking(); goto skip; #endif #if defined(BENCH_MT_FOR_EACH) @@ -2789,13 +2786,15 @@ static int __init maple_tree_seed(void) #endif mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); - check_iteration(&tree); + check_root_expand(&tree); mtree_destroy(&tree); mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); - check_forking(&tree); + check_iteration(&tree); mtree_destroy(&tree); + check_forking(); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_mas_store_gfp(&tree); mtree_destroy(&tree); diff --git a/mm/cma.c b/mm/cma.c index b64768625d82..6d466c77630f 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -438,6 +438,9 @@ struct page *__cma_alloc(struct cma *cma, unsigned long count, int ret = -ENOMEM; int num_attempts = 0; int max_retries = 5; + const char *name = cma ? cma->name : NULL; + + trace_cma_alloc_start(name, count, align); if (WARN_ON_ONCE((gfp_mask & GFP_KERNEL) == 0 || (gfp_mask & ~(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY)) != 0)) @@ -452,8 +455,6 @@ struct page *__cma_alloc(struct cma *cma, unsigned long count, if (!count) goto out; - trace_cma_alloc_start(cma->name, count, align); - mask = cma_bitmap_aligned_mask(cma, align); offset = cma_bitmap_aligned_offset(cma, align); bitmap_maxno = cma_bitmap_maxno(cma); @@ -522,8 +523,6 @@ struct page *__cma_alloc(struct cma *cma, unsigned long count, start = bitmap_no + mask + 1; } - trace_cma_alloc_finish(cma->name, pfn, page, count, align); - /* * CMA can allocate multiple page blocks, which results in different * blocks being marked with different tags. Reset the tags to ignore @@ -542,6 +541,7 @@ struct page *__cma_alloc(struct cma *cma, unsigned long count, pr_debug("%s(): returned %p\n", __func__, page); out: + trace_cma_alloc_finish(name, pfn, page, count, align); if (page) { count_vm_event(CMA_ALLOC_SUCCESS); cma_sysfs_account_success_pages(cma, count); diff --git a/mm/damon/Makefile b/mm/damon/Makefile index 3e6b8ad73858..f7add3f4aa79 100644 --- a/mm/damon/Makefile +++ b/mm/damon/Makefile @@ -3,7 +3,7 @@ obj-y := core.o obj-$(CONFIG_DAMON_VADDR) += ops-common.o vaddr.o obj-$(CONFIG_DAMON_PADDR) += ops-common.o paddr.o -obj-$(CONFIG_DAMON_SYSFS) += sysfs.o +obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs-schemes.o sysfs.o obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o -obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o -obj-$(CONFIG_DAMON_LRU_SORT) += lru_sort.o +obj-$(CONFIG_DAMON_RECLAIM) += modules-common.o reclaim.o +obj-$(CONFIG_DAMON_LRU_SORT) += modules-common.o lru_sort.o diff --git a/mm/damon/core.c b/mm/damon/core.c index 36d098d06c55..f338691e4591 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -694,6 +694,115 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score; } +/* + * damos_skip_charged_region() - Check if the given region or starting part of + * it is already charged for the DAMOS quota. + * @t: The target of the region. + * @rp: The pointer to the region. + * @s: The scheme to be applied. + * + * If a quota of a scheme has exceeded in a quota charge window, the scheme's + * action would applied to only a part of the target access pattern fulfilling + * regions. To avoid applying the scheme action to only already applied + * regions, DAMON skips applying the scheme action to the regions that charged + * in the previous charge window. + * + * This function checks if a given region should be skipped or not for the + * reason. If only the starting part of the region has previously charged, + * this function splits the region into two so that the second one covers the + * area that not charged in the previous charge widnow and saves the second + * region in *rp and returns false, so that the caller can apply DAMON action + * to the second one. + * + * Return: true if the region should be entirely skipped, false otherwise. + */ +static bool damos_skip_charged_region(struct damon_target *t, + struct damon_region **rp, struct damos *s) +{ + struct damon_region *r = *rp; + struct damos_quota *quota = &s->quota; + unsigned long sz_to_skip; + + /* Skip previously charged regions */ + if (quota->charge_target_from) { + if (t != quota->charge_target_from) + return true; + if (r == damon_last_region(t)) { + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + return true; + } + if (quota->charge_addr_from && + r->ar.end <= quota->charge_addr_from) + return true; + + if (quota->charge_addr_from && r->ar.start < + quota->charge_addr_from) { + sz_to_skip = ALIGN_DOWN(quota->charge_addr_from - + r->ar.start, DAMON_MIN_REGION); + if (!sz_to_skip) { + if (damon_sz_region(r) <= DAMON_MIN_REGION) + return true; + sz_to_skip = DAMON_MIN_REGION; + } + damon_split_region_at(t, r, sz_to_skip); + r = damon_next_region(r); + *rp = r; + } + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + } + return false; +} + +static void damos_update_stat(struct damos *s, + unsigned long sz_tried, unsigned long sz_applied) +{ + s->stat.nr_tried++; + s->stat.sz_tried += sz_tried; + if (sz_applied) + s->stat.nr_applied++; + s->stat.sz_applied += sz_applied; +} + +static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + struct damos_quota *quota = &s->quota; + unsigned long sz = damon_sz_region(r); + struct timespec64 begin, end; + unsigned long sz_applied = 0; + int err = 0; + + if (c->ops.apply_scheme) { + if (quota->esz && quota->charged_sz + sz > quota->esz) { + sz = ALIGN_DOWN(quota->esz - quota->charged_sz, + DAMON_MIN_REGION); + if (!sz) + goto update_stat; + damon_split_region_at(t, r, sz); + } + ktime_get_coarse_ts64(&begin); + if (c->callback.before_damos_apply) + err = c->callback.before_damos_apply(c, t, r, s); + if (!err) + sz_applied = c->ops.apply_scheme(c, t, r, s); + ktime_get_coarse_ts64(&end); + quota->total_charged_ns += timespec64_to_ns(&end) - + timespec64_to_ns(&begin); + quota->charged_sz += sz; + if (quota->esz && quota->charged_sz >= quota->esz) { + quota->charge_target_from = t; + quota->charge_addr_from = r->ar.end + 1; + } + } + if (s->action != DAMOS_STAT) + r->age = 0; + +update_stat: + damos_update_stat(s, sz, sz_applied); +} + static void damon_do_apply_schemes(struct damon_ctx *c, struct damon_target *t, struct damon_region *r) @@ -702,9 +811,6 @@ static void damon_do_apply_schemes(struct damon_ctx *c, damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; - unsigned long sz = damon_sz_region(r); - struct timespec64 begin, end; - unsigned long sz_applied = 0; if (!s->wmarks.activated) continue; @@ -713,70 +819,13 @@ static void damon_do_apply_schemes(struct damon_ctx *c, if (quota->esz && quota->charged_sz >= quota->esz) continue; - /* Skip previously charged regions */ - if (quota->charge_target_from) { - if (t != quota->charge_target_from) - continue; - if (r == damon_last_region(t)) { - quota->charge_target_from = NULL; - quota->charge_addr_from = 0; - continue; - } - if (quota->charge_addr_from && - r->ar.end <= quota->charge_addr_from) - continue; - - if (quota->charge_addr_from && r->ar.start < - quota->charge_addr_from) { - sz = ALIGN_DOWN(quota->charge_addr_from - - r->ar.start, DAMON_MIN_REGION); - if (!sz) { - if (damon_sz_region(r) <= - DAMON_MIN_REGION) - continue; - sz = DAMON_MIN_REGION; - } - damon_split_region_at(t, r, sz); - r = damon_next_region(r); - sz = damon_sz_region(r); - } - quota->charge_target_from = NULL; - quota->charge_addr_from = 0; - } + if (damos_skip_charged_region(t, &r, s)) + continue; if (!damos_valid_target(c, t, r, s)) continue; - /* Apply the scheme */ - if (c->ops.apply_scheme) { - if (quota->esz && - quota->charged_sz + sz > quota->esz) { - sz = ALIGN_DOWN(quota->esz - quota->charged_sz, - DAMON_MIN_REGION); - if (!sz) - goto update_stat; - damon_split_region_at(t, r, sz); - } - ktime_get_coarse_ts64(&begin); - sz_applied = c->ops.apply_scheme(c, t, r, s); - ktime_get_coarse_ts64(&end); - quota->total_charged_ns += timespec64_to_ns(&end) - - timespec64_to_ns(&begin); - quota->charged_sz += sz; - if (quota->esz && quota->charged_sz >= quota->esz) { - quota->charge_target_from = t; - quota->charge_addr_from = r->ar.end + 1; - } - } - if (s->action != DAMOS_STAT) - r->age = 0; - -update_stat: - s->stat.nr_tried++; - s->stat.sz_tried += sz; - if (sz_applied) - s->stat.nr_applied++; - s->stat.sz_applied += sz_applied; + damos_apply_scheme(c, t, r, s); } } @@ -803,6 +852,53 @@ static void damos_set_effective_quota(struct damos_quota *quota) quota->esz = esz; } +static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) +{ + struct damos_quota *quota = &s->quota; + struct damon_target *t; + struct damon_region *r; + unsigned long cumulated_sz; + unsigned int score, max_score = 0; + + if (!quota->ms && !quota->sz) + return; + + /* New charge window starts */ + if (time_after_eq(jiffies, quota->charged_from + + msecs_to_jiffies(quota->reset_interval))) { + if (quota->esz && quota->charged_sz >= quota->esz) + s->stat.qt_exceeds++; + quota->total_charged_sz += quota->charged_sz; + quota->charged_from = jiffies; + quota->charged_sz = 0; + damos_set_effective_quota(quota); + } + + if (!c->ops.get_scheme_score) + return; + + /* Fill up the score histogram */ + memset(quota->histogram, 0, sizeof(quota->histogram)); + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + if (!__damos_valid_target(r, s)) + continue; + score = c->ops.get_scheme_score(c, t, r, s); + quota->histogram[score] += damon_sz_region(r); + if (score > max_score) + max_score = score; + } + } + + /* Set the min score limit */ + for (cumulated_sz = 0, score = max_score; ; score--) { + cumulated_sz += quota->histogram[score]; + if (cumulated_sz >= quota->esz || !score) + break; + } + quota->min_score = score; +} + static void kdamond_apply_schemes(struct damon_ctx *c) { struct damon_target *t; @@ -810,52 +906,10 @@ static void kdamond_apply_schemes(struct damon_ctx *c) struct damos *s; damon_for_each_scheme(s, c) { - struct damos_quota *quota = &s->quota; - unsigned long cumulated_sz; - unsigned int score, max_score = 0; - if (!s->wmarks.activated) continue; - if (!quota->ms && !quota->sz) - continue; - - /* New charge window starts */ - if (time_after_eq(jiffies, quota->charged_from + - msecs_to_jiffies( - quota->reset_interval))) { - if (quota->esz && quota->charged_sz >= quota->esz) - s->stat.qt_exceeds++; - quota->total_charged_sz += quota->charged_sz; - quota->charged_from = jiffies; - quota->charged_sz = 0; - damos_set_effective_quota(quota); - } - - if (!c->ops.get_scheme_score) - continue; - - /* Fill up the score histogram */ - memset(quota->histogram, 0, sizeof(quota->histogram)); - damon_for_each_target(t, c) { - damon_for_each_region(r, t) { - if (!__damos_valid_target(r, s)) - continue; - score = c->ops.get_scheme_score( - c, t, r, s); - quota->histogram[score] += damon_sz_region(r); - if (score > max_score) - max_score = score; - } - } - - /* Set the min score limit */ - for (cumulated_sz = 0, score = max_score; ; score--) { - cumulated_sz += quota->histogram[score]; - if (cumulated_sz >= quota->esz || !score) - break; - } - quota->min_score = score; + damos_adjust_quota(c, s); } damon_for_each_target(t, c) { @@ -1176,7 +1230,8 @@ static int kdamond_fn(void *data) if (ctx->callback.after_aggregation && ctx->callback.after_aggregation(ctx)) break; - kdamond_apply_schemes(ctx); + if (!list_empty(&ctx->schemes)) + kdamond_apply_schemes(ctx); kdamond_reset_aggregated(ctx); kdamond_split_regions(ctx); if (ctx->ops.reset_aggregated) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index efbc2bda8b9c..7b8fce2f67a8 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -8,10 +8,8 @@ #define pr_fmt(fmt) "damon-lru-sort: " fmt #include -#include +#include #include -#include -#include #include "modules-common.h" @@ -237,38 +235,31 @@ static int damon_lru_sort_turn(bool on) return 0; } -static struct delayed_work damon_lru_sort_timer; -static void damon_lru_sort_timer_fn(struct work_struct *work) -{ - static bool last_enabled; - bool now_enabled; - - now_enabled = enabled; - if (last_enabled != now_enabled) { - if (!damon_lru_sort_turn(now_enabled)) - last_enabled = now_enabled; - else - enabled = last_enabled; - } -} -static DECLARE_DELAYED_WORK(damon_lru_sort_timer, damon_lru_sort_timer_fn); - -static bool damon_lru_sort_initialized; - static int damon_lru_sort_enabled_store(const char *val, const struct kernel_param *kp) { - int rc = param_set_bool(val, kp); + bool is_enabled = enabled; + bool enable; + int err; - if (rc < 0) - return rc; + err = kstrtobool(val, &enable); + if (err) + return err; - if (!damon_lru_sort_initialized) - return rc; + if (is_enabled == enable) + return 0; - schedule_delayed_work(&damon_lru_sort_timer, 0); + /* Called before init function. The function will handle this. */ + if (!ctx) + goto set_param_out; - return 0; + err = damon_lru_sort_turn(enable); + if (err) + return err; + +set_param_out: + enabled = enable; + return err; } static const struct kernel_param_ops enabled_param_ops = { @@ -314,29 +305,19 @@ static int damon_lru_sort_after_wmarks_check(struct damon_ctx *c) static int __init damon_lru_sort_init(void) { - ctx = damon_new_ctx(); - if (!ctx) - return -ENOMEM; + int err = damon_modules_new_paddr_ctx_target(&ctx, &target); - if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { - damon_destroy_ctx(ctx); - return -EINVAL; - } + if (err) + return err; ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check; ctx->callback.after_aggregation = damon_lru_sort_after_aggregation; - target = damon_new_target(); - if (!target) { - damon_destroy_ctx(ctx); - return -ENOMEM; - } - damon_add_target(ctx, target); + /* 'enabled' has set before this function, probably via command line */ + if (enabled) + err = damon_lru_sort_turn(true); - schedule_delayed_work(&damon_lru_sort_timer, 0); - - damon_lru_sort_initialized = true; - return 0; + return err; } module_init(damon_lru_sort_init); diff --git a/mm/damon/modules-common.c b/mm/damon/modules-common.c new file mode 100644 index 000000000000..b2381a8466ec --- /dev/null +++ b/mm/damon/modules-common.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Primitives for DAMON Modules + * + * Author: SeongJae Park + */ + +#include + +#include "modules-common.h" + +/* + * Allocate, set, and return a DAMON context for the physical address space. + * @ctxp: Pointer to save the point to the newly created context + * @targetp: Pointer to save the point to the newly created target + */ +int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp, + struct damon_target **targetp) +{ + struct damon_ctx *ctx; + struct damon_target *target; + + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + + if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + + target = damon_new_target(); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + + *ctxp = ctx; + *targetp = target; + return 0; +} diff --git a/mm/damon/modules-common.h b/mm/damon/modules-common.h index 5a4921851d32..f49cdb417005 100644 --- a/mm/damon/modules-common.h +++ b/mm/damon/modules-common.h @@ -44,3 +44,6 @@ 0400); \ module_param_named(nr_##qt_exceed_name, stat.qt_exceeds, ulong, \ 0400); + +int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp, + struct damon_target **targetp); diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index 13b99975cbc2..073481023bea 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -35,21 +35,12 @@ struct page *damon_get_page(unsigned long pfn) void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr) { - bool referenced = false; struct page *page = damon_get_page(pte_pfn(*pte)); if (!page) return; - if (ptep_test_and_clear_young(vma, addr, pte)) - referenced = true; - -#ifdef CONFIG_MMU_NOTIFIER - if (mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE)) - referenced = true; -#endif /* CONFIG_MMU_NOTIFIER */ - - if (referenced) + if (ptep_clear_young_notify(vma, addr, pte)) set_page_young(page); set_page_idle(page); @@ -59,21 +50,12 @@ void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - bool referenced = false; struct page *page = damon_get_page(pmd_pfn(*pmd)); if (!page) return; - if (pmdp_test_and_clear_young(vma, addr, pmd)) - referenced = true; - -#ifdef CONFIG_MMU_NOTIFIER - if (mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE)) - referenced = true; -#endif /* CONFIG_MMU_NOTIFIER */ - - if (referenced) + if (pmdp_clear_young_notify(vma, addr, pmd)) set_page_young(page); set_page_idle(page); diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 162c9b1ca00f..e82631f39481 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -8,10 +8,8 @@ #define pr_fmt(fmt) "damon-reclaim: " fmt #include -#include +#include #include -#include -#include #include "modules-common.h" @@ -183,38 +181,31 @@ static int damon_reclaim_turn(bool on) return 0; } -static struct delayed_work damon_reclaim_timer; -static void damon_reclaim_timer_fn(struct work_struct *work) -{ - static bool last_enabled; - bool now_enabled; - - now_enabled = enabled; - if (last_enabled != now_enabled) { - if (!damon_reclaim_turn(now_enabled)) - last_enabled = now_enabled; - else - enabled = last_enabled; - } -} -static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); - -static bool damon_reclaim_initialized; - static int damon_reclaim_enabled_store(const char *val, const struct kernel_param *kp) { - int rc = param_set_bool(val, kp); + bool is_enabled = enabled; + bool enable; + int err; - if (rc < 0) - return rc; + err = kstrtobool(val, &enable); + if (err) + return err; - /* system_wq might not initialized yet */ - if (!damon_reclaim_initialized) - return rc; + if (is_enabled == enable) + return 0; - schedule_delayed_work(&damon_reclaim_timer, 0); - return 0; + /* Called before init function. The function will handle this. */ + if (!ctx) + goto set_param_out; + + err = damon_reclaim_turn(enable); + if (err) + return err; + +set_param_out: + enabled = enable; + return err; } static const struct kernel_param_ops enabled_param_ops = { @@ -256,29 +247,19 @@ static int damon_reclaim_after_wmarks_check(struct damon_ctx *c) static int __init damon_reclaim_init(void) { - ctx = damon_new_ctx(); - if (!ctx) - return -ENOMEM; + int err = damon_modules_new_paddr_ctx_target(&ctx, &target); - if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { - damon_destroy_ctx(ctx); - return -EINVAL; - } + if (err) + return err; ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check; ctx->callback.after_aggregation = damon_reclaim_after_aggregation; - target = damon_new_target(); - if (!target) { - damon_destroy_ctx(ctx); - return -ENOMEM; - } - damon_add_target(ctx, target); + /* 'enabled' has set before this function, probably via command line */ + if (enabled) + err = damon_reclaim_turn(true); - schedule_delayed_work(&damon_reclaim_timer, 0); - - damon_reclaim_initialized = true; - return 0; + return err; } module_init(damon_reclaim_init); diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c new file mode 100644 index 000000000000..52bebf242f74 --- /dev/null +++ b/mm/damon/sysfs-common.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Primitives for DAMON Sysfs Interface + * + * Author: SeongJae Park + */ + +#include + +#include "sysfs-common.h" + +DEFINE_MUTEX(damon_sysfs_lock); + +/* + * unsigned long range directory + */ + +struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc( + unsigned long min, + unsigned long max) +{ + struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range), + GFP_KERNEL); + + if (!range) + return NULL; + range->kobj = (struct kobject){}; + range->min = min; + range->max = max; + + return range; +} + +static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + + return sysfs_emit(buf, "%lu\n", range->min); +} + +static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + unsigned long min; + int err; + + err = kstrtoul(buf, 0, &min); + if (err) + return err; + + range->min = min; + return count; +} + +static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + + return sysfs_emit(buf, "%lu\n", range->max); +} + +static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + unsigned long max; + int err; + + err = kstrtoul(buf, 0, &max); + if (err) + return err; + + range->max = max; + return count; +} + +void damon_sysfs_ul_range_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj)); +} + +static struct kobj_attribute damon_sysfs_ul_range_min_attr = + __ATTR_RW_MODE(min, 0600); + +static struct kobj_attribute damon_sysfs_ul_range_max_attr = + __ATTR_RW_MODE(max, 0600); + +static struct attribute *damon_sysfs_ul_range_attrs[] = { + &damon_sysfs_ul_range_min_attr.attr, + &damon_sysfs_ul_range_max_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_ul_range); + +struct kobj_type damon_sysfs_ul_range_ktype = { + .release = damon_sysfs_ul_range_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_ul_range_groups, +}; + diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h new file mode 100644 index 000000000000..3db199c84ed3 --- /dev/null +++ b/mm/damon/sysfs-common.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common Primitives for DAMON Sysfs Interface + * + * Author: SeongJae Park + */ + +#include +#include + +extern struct mutex damon_sysfs_lock; + +struct damon_sysfs_ul_range { + struct kobject kobj; + unsigned long min; + unsigned long max; +}; + +struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc( + unsigned long min, + unsigned long max); +void damon_sysfs_ul_range_release(struct kobject *kobj); + +extern struct kobj_type damon_sysfs_ul_range_ktype; + +/* + * schemes directory + */ + +struct damon_sysfs_schemes { + struct kobject kobj; + struct damon_sysfs_scheme **schemes_arr; + int nr; +}; + +struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void); +void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes); + +extern struct kobj_type damon_sysfs_schemes_ktype; + +int damon_sysfs_set_schemes(struct damon_ctx *ctx, + struct damon_sysfs_schemes *sysfs_schemes); + +void damon_sysfs_schemes_update_stats( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx); + +int damon_sysfs_schemes_update_regions_start( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx, bool total_bytes_only); + +bool damos_sysfs_regions_upd_done(void); + +int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx); + +int damon_sysfs_schemes_clear_regions( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx); diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c new file mode 100644 index 000000000000..12b2c903b0a0 --- /dev/null +++ b/mm/damon/sysfs-schemes.c @@ -0,0 +1,1458 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON sysfs Interface + * + * Copyright (c) 2022 SeongJae Park + */ + +#include + +#include "sysfs-common.h" + +/* + * scheme region directory + */ + +struct damon_sysfs_scheme_region { + struct kobject kobj; + struct damon_addr_range ar; + unsigned int nr_accesses; + unsigned int age; + struct list_head list; +}; + +static struct damon_sysfs_scheme_region *damon_sysfs_scheme_region_alloc( + struct damon_region *region) +{ + struct damon_sysfs_scheme_region *sysfs_region = kmalloc( + sizeof(*sysfs_region), GFP_KERNEL); + + if (!sysfs_region) + return NULL; + sysfs_region->kobj = (struct kobject){}; + sysfs_region->ar = region->ar; + sysfs_region->nr_accesses = region->nr_accesses; + sysfs_region->age = region->age; + INIT_LIST_HEAD(&sysfs_region->list); + return sysfs_region; +} + +static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%lu\n", region->ar.start); +} + +static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%lu\n", region->ar.end); +} + +static ssize_t nr_accesses_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%u\n", region->nr_accesses); +} + +static ssize_t age_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%u\n", region->age); +} + +static void damon_sysfs_scheme_region_release(struct kobject *kobj) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + list_del(®ion->list); + kfree(region); +} + +static struct kobj_attribute damon_sysfs_scheme_region_start_attr = + __ATTR_RO_MODE(start, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_end_attr = + __ATTR_RO_MODE(end, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_nr_accesses_attr = + __ATTR_RO_MODE(nr_accesses, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_age_attr = + __ATTR_RO_MODE(age, 0400); + +static struct attribute *damon_sysfs_scheme_region_attrs[] = { + &damon_sysfs_scheme_region_start_attr.attr, + &damon_sysfs_scheme_region_end_attr.attr, + &damon_sysfs_scheme_region_nr_accesses_attr.attr, + &damon_sysfs_scheme_region_age_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme_region); + +static struct kobj_type damon_sysfs_scheme_region_ktype = { + .release = damon_sysfs_scheme_region_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_region_groups, +}; + +/* + * scheme regions directory + */ + +/* + * enum damos_sysfs_regions_upd_status - Represent DAMOS tried regions update + * status + * @DAMOS_TRIED_REGIONS_UPD_IDLE: Waiting for next request. + * @DAMOS_TRIED_REGIONS_UPD_STARTED: Update started. + * @DAMOS_TRIED_REGIONS_UPD_FINISHED: Update finished. + * + * Each DAMON-based operation scheme (&struct damos) has its own apply + * interval, and we need to expose the scheme tried regions based on only + * single snapshot. For this, we keep the tried regions update status for each + * scheme. The status becomes 'idle' at the beginning. + * + * Once the tried regions update request is received, the request handling + * start function (damon_sysfs_scheme_update_regions_start()) sets the status + * of all schemes as 'idle' again, and register ->before_damos_apply() and + * ->after_sampling() callbacks. + * + * Then, the first followup ->before_damos_apply() callback + * (damon_sysfs_before_damos_apply()) sets the status 'started'. The first + * ->after_sampling() callback (damon_sysfs_after_sampling()) after the call + * is called only after the scheme is completely applied + * to the given snapshot. Hence the callback knows the situation by showing + * 'started' status, and sets the status as 'finished'. Then, + * damon_sysfs_before_damos_apply() understands the situation by showing the + * 'finished' status and do nothing. + * + * Finally, the tried regions request handling finisher function + * (damon_sysfs_schemes_update_regions_stop()) unregisters the callbacks. + */ +enum damos_sysfs_regions_upd_status { + DAMOS_TRIED_REGIONS_UPD_IDLE, + DAMOS_TRIED_REGIONS_UPD_STARTED, + DAMOS_TRIED_REGIONS_UPD_FINISHED, +}; + +struct damon_sysfs_scheme_regions { + struct kobject kobj; + struct list_head regions_list; + int nr_regions; + unsigned long total_bytes; + enum damos_sysfs_regions_upd_status upd_status; +}; + +static struct damon_sysfs_scheme_regions * +damon_sysfs_scheme_regions_alloc(void) +{ + struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions), + GFP_KERNEL); + + if (!regions) + return NULL; + + regions->kobj = (struct kobject){}; + INIT_LIST_HEAD(®ions->regions_list); + regions->nr_regions = 0; + regions->total_bytes = 0; + regions->upd_status = DAMOS_TRIED_REGIONS_UPD_IDLE; + return regions; +} + +static ssize_t total_bytes_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_regions *regions = container_of(kobj, + struct damon_sysfs_scheme_regions, kobj); + + return sysfs_emit(buf, "%lu\n", regions->total_bytes); +} + +static void damon_sysfs_scheme_regions_rm_dirs( + struct damon_sysfs_scheme_regions *regions) +{ + struct damon_sysfs_scheme_region *r, *next; + + list_for_each_entry_safe(r, next, ®ions->regions_list, list) { + /* release function deletes it from the list */ + kobject_put(&r->kobj); + regions->nr_regions--; + } +} + +static void damon_sysfs_scheme_regions_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_scheme_regions, kobj)); +} + +static struct kobj_attribute damon_sysfs_scheme_regions_total_bytes_attr = + __ATTR_RO_MODE(total_bytes, 0400); + +static struct attribute *damon_sysfs_scheme_regions_attrs[] = { + &damon_sysfs_scheme_regions_total_bytes_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme_regions); + +static struct kobj_type damon_sysfs_scheme_regions_ktype = { + .release = damon_sysfs_scheme_regions_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_regions_groups, +}; + +/* + * schemes/stats directory + */ + +struct damon_sysfs_stats { + struct kobject kobj; + unsigned long nr_tried; + unsigned long sz_tried; + unsigned long nr_applied; + unsigned long sz_applied; + unsigned long qt_exceeds; +}; + +static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL); +} + +static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->nr_tried); +} + +static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->sz_tried); +} + +static ssize_t nr_applied_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->nr_applied); +} + +static ssize_t sz_applied_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->sz_applied); +} + +static ssize_t qt_exceeds_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->qt_exceeds); +} + +static void damon_sysfs_stats_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_stats, kobj)); +} + +static struct kobj_attribute damon_sysfs_stats_nr_tried_attr = + __ATTR_RO_MODE(nr_tried, 0400); + +static struct kobj_attribute damon_sysfs_stats_sz_tried_attr = + __ATTR_RO_MODE(sz_tried, 0400); + +static struct kobj_attribute damon_sysfs_stats_nr_applied_attr = + __ATTR_RO_MODE(nr_applied, 0400); + +static struct kobj_attribute damon_sysfs_stats_sz_applied_attr = + __ATTR_RO_MODE(sz_applied, 0400); + +static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr = + __ATTR_RO_MODE(qt_exceeds, 0400); + +static struct attribute *damon_sysfs_stats_attrs[] = { + &damon_sysfs_stats_nr_tried_attr.attr, + &damon_sysfs_stats_sz_tried_attr.attr, + &damon_sysfs_stats_nr_applied_attr.attr, + &damon_sysfs_stats_sz_applied_attr.attr, + &damon_sysfs_stats_qt_exceeds_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_stats); + +static struct kobj_type damon_sysfs_stats_ktype = { + .release = damon_sysfs_stats_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_stats_groups, +}; + +/* + * watermarks directory + */ + +struct damon_sysfs_watermarks { + struct kobject kobj; + enum damos_wmark_metric metric; + unsigned long interval_us; + unsigned long high; + unsigned long mid; + unsigned long low; +}; + +static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc( + enum damos_wmark_metric metric, unsigned long interval_us, + unsigned long high, unsigned long mid, unsigned long low) +{ + struct damon_sysfs_watermarks *watermarks = kmalloc( + sizeof(*watermarks), GFP_KERNEL); + + if (!watermarks) + return NULL; + watermarks->kobj = (struct kobject){}; + watermarks->metric = metric; + watermarks->interval_us = interval_us; + watermarks->high = high; + watermarks->mid = mid; + watermarks->low = low; + return watermarks; +} + +/* Should match with enum damos_wmark_metric */ +static const char * const damon_sysfs_wmark_metric_strs[] = { + "none", + "free_mem_rate", +}; + +static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%s\n", + damon_sysfs_wmark_metric_strs[watermarks->metric]); +} + +static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + enum damos_wmark_metric metric; + + for (metric = 0; metric < NR_DAMOS_WMARK_METRICS; metric++) { + if (sysfs_streq(buf, damon_sysfs_wmark_metric_strs[metric])) { + watermarks->metric = metric; + return count; + } + } + return -EINVAL; +} + +static ssize_t interval_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->interval_us); +} + +static ssize_t interval_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->interval_us); + + return err ? err : count; +} + +static ssize_t high_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->high); +} + +static ssize_t high_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->high); + + return err ? err : count; +} + +static ssize_t mid_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->mid); +} + +static ssize_t mid_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->mid); + + return err ? err : count; +} + +static ssize_t low_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->low); +} + +static ssize_t low_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->low); + + return err ? err : count; +} + +static void damon_sysfs_watermarks_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj)); +} + +static struct kobj_attribute damon_sysfs_watermarks_metric_attr = + __ATTR_RW_MODE(metric, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr = + __ATTR_RW_MODE(interval_us, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_high_attr = + __ATTR_RW_MODE(high, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_mid_attr = + __ATTR_RW_MODE(mid, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_low_attr = + __ATTR_RW_MODE(low, 0600); + +static struct attribute *damon_sysfs_watermarks_attrs[] = { + &damon_sysfs_watermarks_metric_attr.attr, + &damon_sysfs_watermarks_interval_us_attr.attr, + &damon_sysfs_watermarks_high_attr.attr, + &damon_sysfs_watermarks_mid_attr.attr, + &damon_sysfs_watermarks_low_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_watermarks); + +static struct kobj_type damon_sysfs_watermarks_ktype = { + .release = damon_sysfs_watermarks_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_watermarks_groups, +}; + +/* + * scheme/weights directory + */ + +struct damon_sysfs_weights { + struct kobject kobj; + unsigned int sz; + unsigned int nr_accesses; + unsigned int age; +}; + +static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz, + unsigned int nr_accesses, unsigned int age) +{ + struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights), + GFP_KERNEL); + + if (!weights) + return NULL; + weights->kobj = (struct kobject){}; + weights->sz = sz; + weights->nr_accesses = nr_accesses; + weights->age = age; + return weights; +} + +static ssize_t sz_permil_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + + return sysfs_emit(buf, "%u\n", weights->sz); +} + +static ssize_t sz_permil_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + int err = kstrtouint(buf, 0, &weights->sz); + + return err ? err : count; +} + +static ssize_t nr_accesses_permil_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + + return sysfs_emit(buf, "%u\n", weights->nr_accesses); +} + +static ssize_t nr_accesses_permil_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + int err = kstrtouint(buf, 0, &weights->nr_accesses); + + return err ? err : count; +} + +static ssize_t age_permil_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + + return sysfs_emit(buf, "%u\n", weights->age); +} + +static ssize_t age_permil_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + int err = kstrtouint(buf, 0, &weights->age); + + return err ? err : count; +} + +static void damon_sysfs_weights_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_weights, kobj)); +} + +static struct kobj_attribute damon_sysfs_weights_sz_attr = + __ATTR_RW_MODE(sz_permil, 0600); + +static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr = + __ATTR_RW_MODE(nr_accesses_permil, 0600); + +static struct kobj_attribute damon_sysfs_weights_age_attr = + __ATTR_RW_MODE(age_permil, 0600); + +static struct attribute *damon_sysfs_weights_attrs[] = { + &damon_sysfs_weights_sz_attr.attr, + &damon_sysfs_weights_nr_accesses_attr.attr, + &damon_sysfs_weights_age_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_weights); + +static struct kobj_type damon_sysfs_weights_ktype = { + .release = damon_sysfs_weights_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_weights_groups, +}; + +/* + * quotas directory + */ + +struct damon_sysfs_quotas { + struct kobject kobj; + struct damon_sysfs_weights *weights; + unsigned long ms; + unsigned long sz; + unsigned long reset_interval_ms; +}; + +static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL); +} + +static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas) +{ + struct damon_sysfs_weights *weights; + int err; + + weights = damon_sysfs_weights_alloc(0, 0, 0); + if (!weights) + return -ENOMEM; + + err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype, + "as->kobj, "weights"); + if (err) + kobject_put(&weights->kobj); + else + quotas->weights = weights; + return err; +} + +static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas) +{ + kobject_put("as->weights->kobj); +} + +static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->ms); +} + +static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + int err = kstrtoul(buf, 0, "as->ms); + + if (err) + return -EINVAL; + return count; +} + +static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->sz); +} + +static ssize_t bytes_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + int err = kstrtoul(buf, 0, "as->sz); + + if (err) + return -EINVAL; + return count; +} + +static ssize_t reset_interval_ms_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms); +} + +static ssize_t reset_interval_ms_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + int err = kstrtoul(buf, 0, "as->reset_interval_ms); + + if (err) + return -EINVAL; + return count; +} + +static void damon_sysfs_quotas_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_quotas, kobj)); +} + +static struct kobj_attribute damon_sysfs_quotas_ms_attr = + __ATTR_RW_MODE(ms, 0600); + +static struct kobj_attribute damon_sysfs_quotas_sz_attr = + __ATTR_RW_MODE(bytes, 0600); + +static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr = + __ATTR_RW_MODE(reset_interval_ms, 0600); + +static struct attribute *damon_sysfs_quotas_attrs[] = { + &damon_sysfs_quotas_ms_attr.attr, + &damon_sysfs_quotas_sz_attr.attr, + &damon_sysfs_quotas_reset_interval_ms_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_quotas); + +static struct kobj_type damon_sysfs_quotas_ktype = { + .release = damon_sysfs_quotas_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_quotas_groups, +}; + +/* + * access_pattern directory + */ + +struct damon_sysfs_access_pattern { + struct kobject kobj; + struct damon_sysfs_ul_range *sz; + struct damon_sysfs_ul_range *nr_accesses; + struct damon_sysfs_ul_range *age; +}; + +static +struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void) +{ + struct damon_sysfs_access_pattern *access_pattern = + kmalloc(sizeof(*access_pattern), GFP_KERNEL); + + if (!access_pattern) + return NULL; + access_pattern->kobj = (struct kobject){}; + return access_pattern; +} + +static int damon_sysfs_access_pattern_add_range_dir( + struct damon_sysfs_access_pattern *access_pattern, + struct damon_sysfs_ul_range **range_dir_ptr, + char *name) +{ + struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0); + int err; + + if (!range) + return -ENOMEM; + err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype, + &access_pattern->kobj, name); + if (err) + kobject_put(&range->kobj); + else + *range_dir_ptr = range; + return err; +} + +static int damon_sysfs_access_pattern_add_dirs( + struct damon_sysfs_access_pattern *access_pattern) +{ + int err; + + err = damon_sysfs_access_pattern_add_range_dir(access_pattern, + &access_pattern->sz, "sz"); + if (err) + goto put_sz_out; + + err = damon_sysfs_access_pattern_add_range_dir(access_pattern, + &access_pattern->nr_accesses, "nr_accesses"); + if (err) + goto put_nr_accesses_sz_out; + + err = damon_sysfs_access_pattern_add_range_dir(access_pattern, + &access_pattern->age, "age"); + if (err) + goto put_age_nr_accesses_sz_out; + return 0; + +put_age_nr_accesses_sz_out: + kobject_put(&access_pattern->age->kobj); + access_pattern->age = NULL; +put_nr_accesses_sz_out: + kobject_put(&access_pattern->nr_accesses->kobj); + access_pattern->nr_accesses = NULL; +put_sz_out: + kobject_put(&access_pattern->sz->kobj); + access_pattern->sz = NULL; + return err; +} + +static void damon_sysfs_access_pattern_rm_dirs( + struct damon_sysfs_access_pattern *access_pattern) +{ + kobject_put(&access_pattern->sz->kobj); + kobject_put(&access_pattern->nr_accesses->kobj); + kobject_put(&access_pattern->age->kobj); +} + +static void damon_sysfs_access_pattern_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj)); +} + +static struct attribute *damon_sysfs_access_pattern_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_access_pattern); + +static struct kobj_type damon_sysfs_access_pattern_ktype = { + .release = damon_sysfs_access_pattern_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_access_pattern_groups, +}; + +/* + * scheme directory + */ + +struct damon_sysfs_scheme { + struct kobject kobj; + enum damos_action action; + struct damon_sysfs_access_pattern *access_pattern; + struct damon_sysfs_quotas *quotas; + struct damon_sysfs_watermarks *watermarks; + struct damon_sysfs_stats *stats; + struct damon_sysfs_scheme_regions *tried_regions; +}; + +/* This should match with enum damos_action */ +static const char * const damon_sysfs_damos_action_strs[] = { + "willneed", + "cold", + "pageout", + "hugepage", + "nohugepage", + "lru_prio", + "lru_deprio", + "stat", +}; + +static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc( + enum damos_action action) +{ + struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme), + GFP_KERNEL); + + if (!scheme) + return NULL; + scheme->kobj = (struct kobject){}; + scheme->action = action; + return scheme; +} + +static int damon_sysfs_scheme_set_access_pattern( + struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_access_pattern *access_pattern; + int err; + + access_pattern = damon_sysfs_access_pattern_alloc(); + if (!access_pattern) + return -ENOMEM; + err = kobject_init_and_add(&access_pattern->kobj, + &damon_sysfs_access_pattern_ktype, &scheme->kobj, + "access_pattern"); + if (err) + goto out; + err = damon_sysfs_access_pattern_add_dirs(access_pattern); + if (err) + goto out; + scheme->access_pattern = access_pattern; + return 0; + +out: + kobject_put(&access_pattern->kobj); + return err; +} + +static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc(); + int err; + + if (!quotas) + return -ENOMEM; + err = kobject_init_and_add("as->kobj, &damon_sysfs_quotas_ktype, + &scheme->kobj, "quotas"); + if (err) + goto out; + err = damon_sysfs_quotas_add_dirs(quotas); + if (err) + goto out; + scheme->quotas = quotas; + return 0; + +out: + kobject_put("as->kobj); + return err; +} + +static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_watermarks *watermarks = + damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0); + int err; + + if (!watermarks) + return -ENOMEM; + err = kobject_init_and_add(&watermarks->kobj, + &damon_sysfs_watermarks_ktype, &scheme->kobj, + "watermarks"); + if (err) + kobject_put(&watermarks->kobj); + else + scheme->watermarks = watermarks; + return err; +} + +static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc(); + int err; + + if (!stats) + return -ENOMEM; + err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype, + &scheme->kobj, "stats"); + if (err) + kobject_put(&stats->kobj); + else + scheme->stats = stats; + return err; +} + +static int damon_sysfs_scheme_set_tried_regions( + struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_scheme_regions *tried_regions = + damon_sysfs_scheme_regions_alloc(); + int err; + + if (!tried_regions) + return -ENOMEM; + err = kobject_init_and_add(&tried_regions->kobj, + &damon_sysfs_scheme_regions_ktype, &scheme->kobj, + "tried_regions"); + if (err) + kobject_put(&tried_regions->kobj); + else + scheme->tried_regions = tried_regions; + return err; +} + +static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) +{ + int err; + + err = damon_sysfs_scheme_set_access_pattern(scheme); + if (err) + return err; + err = damon_sysfs_scheme_set_quotas(scheme); + if (err) + goto put_access_pattern_out; + err = damon_sysfs_scheme_set_watermarks(scheme); + if (err) + goto put_quotas_access_pattern_out; + err = damon_sysfs_scheme_set_stats(scheme); + if (err) + goto put_watermarks_quotas_access_pattern_out; + err = damon_sysfs_scheme_set_tried_regions(scheme); + if (err) + goto put_tried_regions_out; + return 0; + +put_tried_regions_out: + kobject_put(&scheme->tried_regions->kobj); + scheme->tried_regions = NULL; +put_watermarks_quotas_access_pattern_out: + kobject_put(&scheme->watermarks->kobj); + scheme->watermarks = NULL; +put_quotas_access_pattern_out: + kobject_put(&scheme->quotas->kobj); + scheme->quotas = NULL; +put_access_pattern_out: + kobject_put(&scheme->access_pattern->kobj); + scheme->access_pattern = NULL; + return err; +} + +static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) +{ + damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); + kobject_put(&scheme->access_pattern->kobj); + damon_sysfs_quotas_rm_dirs(scheme->quotas); + kobject_put(&scheme->quotas->kobj); + kobject_put(&scheme->watermarks->kobj); + kobject_put(&scheme->stats->kobj); + damon_sysfs_scheme_regions_rm_dirs(scheme->tried_regions); + kobject_put(&scheme->tried_regions->kobj); +} + +static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + + return sysfs_emit(buf, "%s\n", + damon_sysfs_damos_action_strs[scheme->action]); +} + +static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + enum damos_action action; + + for (action = 0; action < NR_DAMOS_ACTIONS; action++) { + if (sysfs_streq(buf, damon_sysfs_damos_action_strs[action])) { + scheme->action = action; + return count; + } + } + return -EINVAL; +} + +static void damon_sysfs_scheme_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_scheme, kobj)); +} + +static struct kobj_attribute damon_sysfs_scheme_action_attr = + __ATTR_RW_MODE(action, 0600); + +static struct attribute *damon_sysfs_scheme_attrs[] = { + &damon_sysfs_scheme_action_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme); + +static struct kobj_type damon_sysfs_scheme_ktype = { + .release = damon_sysfs_scheme_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_groups, +}; + +/* + * schemes directory + */ + +struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL); +} + +void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes) +{ + struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr; + int i; + + for (i = 0; i < schemes->nr; i++) { + damon_sysfs_scheme_rm_dirs(schemes_arr[i]); + kobject_put(&schemes_arr[i]->kobj); + } + schemes->nr = 0; + kfree(schemes_arr); + schemes->schemes_arr = NULL; +} + +static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes, + int nr_schemes) +{ + struct damon_sysfs_scheme **schemes_arr, *scheme; + int err, i; + + damon_sysfs_schemes_rm_dirs(schemes); + if (!nr_schemes) + return 0; + + schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!schemes_arr) + return -ENOMEM; + schemes->schemes_arr = schemes_arr; + + for (i = 0; i < nr_schemes; i++) { + scheme = damon_sysfs_scheme_alloc(DAMOS_STAT); + if (!scheme) { + damon_sysfs_schemes_rm_dirs(schemes); + return -ENOMEM; + } + + err = kobject_init_and_add(&scheme->kobj, + &damon_sysfs_scheme_ktype, &schemes->kobj, + "%d", i); + if (err) + goto out; + err = damon_sysfs_scheme_add_dirs(scheme); + if (err) + goto out; + + schemes_arr[i] = scheme; + schemes->nr++; + } + return 0; + +out: + damon_sysfs_schemes_rm_dirs(schemes); + kobject_put(&scheme->kobj); + return err; +} + +static ssize_t nr_schemes_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_schemes *schemes = container_of(kobj, + struct damon_sysfs_schemes, kobj); + + return sysfs_emit(buf, "%d\n", schemes->nr); +} + +static ssize_t nr_schemes_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_schemes *schemes; + int nr, err = kstrtoint(buf, 0, &nr); + + if (err) + return err; + if (nr < 0) + return -EINVAL; + + schemes = container_of(kobj, struct damon_sysfs_schemes, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damon_sysfs_schemes_add_dirs(schemes, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + return count; +} + +static void damon_sysfs_schemes_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_schemes, kobj)); +} + +static struct kobj_attribute damon_sysfs_schemes_nr_attr = + __ATTR_RW_MODE(nr_schemes, 0600); + +static struct attribute *damon_sysfs_schemes_attrs[] = { + &damon_sysfs_schemes_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_schemes); + +struct kobj_type damon_sysfs_schemes_ktype = { + .release = damon_sysfs_schemes_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_schemes_groups, +}; + +static struct damos *damon_sysfs_mk_scheme( + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + struct damos_access_pattern pattern = { + .min_sz_region = access_pattern->sz->min, + .max_sz_region = access_pattern->sz->max, + .min_nr_accesses = access_pattern->nr_accesses->min, + .max_nr_accesses = access_pattern->nr_accesses->max, + .min_age_region = access_pattern->age->min, + .max_age_region = access_pattern->age->max, + }; + struct damos_quota quota = { + .ms = sysfs_quotas->ms, + .sz = sysfs_quotas->sz, + .reset_interval = sysfs_quotas->reset_interval_ms, + .weight_sz = sysfs_weights->sz, + .weight_nr_accesses = sysfs_weights->nr_accesses, + .weight_age = sysfs_weights->age, + }; + struct damos_watermarks wmarks = { + .metric = sysfs_wmarks->metric, + .interval = sysfs_wmarks->interval_us, + .high = sysfs_wmarks->high, + .mid = sysfs_wmarks->mid, + .low = sysfs_wmarks->low, + }; + + return damon_new_scheme(&pattern, sysfs_scheme->action, "a, + &wmarks); +} + +static void damon_sysfs_update_scheme(struct damos *scheme, + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + scheme->pattern.min_sz_region = access_pattern->sz->min; + scheme->pattern.max_sz_region = access_pattern->sz->max; + scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; + scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; + scheme->pattern.min_age_region = access_pattern->age->min; + scheme->pattern.max_age_region = access_pattern->age->max; + + scheme->action = sysfs_scheme->action; + + scheme->quota.ms = sysfs_quotas->ms; + scheme->quota.sz = sysfs_quotas->sz; + scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; + scheme->quota.weight_sz = sysfs_weights->sz; + scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; + scheme->quota.weight_age = sysfs_weights->age; + + scheme->wmarks.metric = sysfs_wmarks->metric; + scheme->wmarks.interval = sysfs_wmarks->interval_us; + scheme->wmarks.high = sysfs_wmarks->high; + scheme->wmarks.mid = sysfs_wmarks->mid; + scheme->wmarks.low = sysfs_wmarks->low; +} + +int damon_sysfs_set_schemes(struct damon_ctx *ctx, + struct damon_sysfs_schemes *sysfs_schemes) +{ + struct damos *scheme, *next; + int i = 0; + + damon_for_each_scheme_safe(scheme, next, ctx) { + if (i < sysfs_schemes->nr) + damon_sysfs_update_scheme(scheme, + sysfs_schemes->schemes_arr[i]); + else + damon_destroy_scheme(scheme); + i++; + } + + for (; i < sysfs_schemes->nr; i++) { + struct damos *scheme, *next; + + scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); + if (!scheme) { + damon_for_each_scheme_safe(scheme, next, ctx) + damon_destroy_scheme(scheme); + return -ENOMEM; + } + damon_add_scheme(ctx, scheme); + } + return 0; +} + +void damon_sysfs_schemes_update_stats( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) +{ + struct damos *scheme; + int schemes_idx = 0; + + damon_for_each_scheme(scheme, ctx) { + struct damon_sysfs_stats *sysfs_stats; + + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + + sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats; + sysfs_stats->nr_tried = scheme->stat.nr_tried; + sysfs_stats->sz_tried = scheme->stat.sz_tried; + sysfs_stats->nr_applied = scheme->stat.nr_applied; + sysfs_stats->sz_applied = scheme->stat.sz_applied; + sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds; + } +} + +/* + * damon_sysfs_schemes that need to update its schemes regions dir. Protected + * by damon_sysfs_lock + */ +static struct damon_sysfs_schemes *damon_sysfs_schemes_for_damos_callback; +static int damon_sysfs_schemes_region_idx; +static bool damos_regions_upd_total_bytes_only; + +/* + * DAMON callback that called before damos apply. While this callback is + * registered, damon_sysfs_lock should be held to ensure the regions + * directories exist. + */ +static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *s) +{ + struct damos *scheme; + struct damon_sysfs_scheme_regions *sysfs_regions; + struct damon_sysfs_scheme_region *region; + struct damon_sysfs_schemes *sysfs_schemes = + damon_sysfs_schemes_for_damos_callback; + int schemes_idx = 0; + + damon_for_each_scheme(scheme, ctx) { + if (scheme == s) + break; + schemes_idx++; + } + + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + return 0; + + sysfs_regions = sysfs_schemes->schemes_arr[schemes_idx]->tried_regions; + if (sysfs_regions->upd_status == DAMOS_TRIED_REGIONS_UPD_FINISHED) + return 0; + if (sysfs_regions->upd_status == DAMOS_TRIED_REGIONS_UPD_IDLE) + sysfs_regions->upd_status = DAMOS_TRIED_REGIONS_UPD_STARTED; + sysfs_regions->total_bytes += r->ar.end - r->ar.start; + if (damos_regions_upd_total_bytes_only) + return 0; + + region = damon_sysfs_scheme_region_alloc(r); + if (!region) + return 0; + list_add_tail(®ion->list, &sysfs_regions->regions_list); + sysfs_regions->nr_regions++; + if (kobject_init_and_add(®ion->kobj, + &damon_sysfs_scheme_region_ktype, + &sysfs_regions->kobj, "%d", + damon_sysfs_schemes_region_idx++)) { + kobject_put(®ion->kobj); + } + return 0; +} + +/* + * DAMON callback that called after each accesses sampling. While this + * callback is registered, damon_sysfs_lock should be held to ensure the + * regions directories exist. + */ +static int damon_sysfs_after_sampling(struct damon_ctx *ctx) +{ + struct damon_sysfs_schemes *sysfs_schemes = + damon_sysfs_schemes_for_damos_callback; + struct damon_sysfs_scheme_regions *sysfs_regions; + int i; + + for (i = 0; i < sysfs_schemes->nr; i++) { + sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions; + if (sysfs_regions->upd_status == + DAMOS_TRIED_REGIONS_UPD_STARTED) + sysfs_regions->upd_status = + DAMOS_TRIED_REGIONS_UPD_FINISHED; + } + + return 0; +} + +/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */ +int damon_sysfs_schemes_clear_regions( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) +{ + struct damos *scheme; + int schemes_idx = 0; + + damon_for_each_scheme(scheme, ctx) { + struct damon_sysfs_scheme *sysfs_scheme; + + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + + sysfs_scheme = sysfs_schemes->schemes_arr[schemes_idx++]; + damon_sysfs_scheme_regions_rm_dirs( + sysfs_scheme->tried_regions); + sysfs_scheme->tried_regions->total_bytes = 0; + } + return 0; +} + +static void damos_tried_regions_init_upd_status( + struct damon_sysfs_schemes *sysfs_schemes) +{ + int i; + + for (i = 0; i < sysfs_schemes->nr; i++) + sysfs_schemes->schemes_arr[i]->tried_regions->upd_status = + DAMOS_TRIED_REGIONS_UPD_IDLE; +} + +/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */ +int damon_sysfs_schemes_update_regions_start( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx, bool total_bytes_only) +{ + damon_sysfs_schemes_clear_regions(sysfs_schemes, ctx); + damon_sysfs_schemes_for_damos_callback = sysfs_schemes; + damos_tried_regions_init_upd_status(sysfs_schemes); + damos_regions_upd_total_bytes_only = total_bytes_only; + ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply; + ctx->callback.after_sampling = damon_sysfs_after_sampling; + return 0; +} + +bool damos_sysfs_regions_upd_done(void) +{ + struct damon_sysfs_schemes *sysfs_schemes = + damon_sysfs_schemes_for_damos_callback; + struct damon_sysfs_scheme_regions *sysfs_regions; + int i; + + for (i = 0; i < sysfs_schemes->nr; i++) { + sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions; + if (sysfs_regions->upd_status != + DAMOS_TRIED_REGIONS_UPD_FINISHED) + return false; + } + return true; +} + +/* + * Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock. Caller + * should unlock damon_sysfs_lock which held before + * damon_sysfs_schemes_update_regions_start() + */ +int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx) +{ + damon_sysfs_schemes_for_damos_callback = NULL; + ctx->callback.before_damos_apply = NULL; + ctx->callback.after_sampling = NULL; + damon_sysfs_schemes_region_idx = 0; + return 0; +} diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 07e5f1bdf025..0a6b4625de9f 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -5,1056 +5,11 @@ * Copyright (c) 2022 SeongJae Park */ -#include -#include #include #include #include -static DEFINE_MUTEX(damon_sysfs_lock); - -/* - * unsigned long range directory - */ - -struct damon_sysfs_ul_range { - struct kobject kobj; - unsigned long min; - unsigned long max; -}; - -static struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc( - unsigned long min, - unsigned long max) -{ - struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range), - GFP_KERNEL); - - if (!range) - return NULL; - range->kobj = (struct kobject){}; - range->min = min; - range->max = max; - - return range; -} - -static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_ul_range *range = container_of(kobj, - struct damon_sysfs_ul_range, kobj); - - return sysfs_emit(buf, "%lu\n", range->min); -} - -static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_ul_range *range = container_of(kobj, - struct damon_sysfs_ul_range, kobj); - unsigned long min; - int err; - - err = kstrtoul(buf, 0, &min); - if (err) - return err; - - range->min = min; - return count; -} - -static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_ul_range *range = container_of(kobj, - struct damon_sysfs_ul_range, kobj); - - return sysfs_emit(buf, "%lu\n", range->max); -} - -static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_ul_range *range = container_of(kobj, - struct damon_sysfs_ul_range, kobj); - unsigned long max; - int err; - - err = kstrtoul(buf, 0, &max); - if (err) - return err; - - range->max = max; - return count; -} - -static void damon_sysfs_ul_range_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj)); -} - -static struct kobj_attribute damon_sysfs_ul_range_min_attr = - __ATTR_RW_MODE(min, 0600); - -static struct kobj_attribute damon_sysfs_ul_range_max_attr = - __ATTR_RW_MODE(max, 0600); - -static struct attribute *damon_sysfs_ul_range_attrs[] = { - &damon_sysfs_ul_range_min_attr.attr, - &damon_sysfs_ul_range_max_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_ul_range); - -static struct kobj_type damon_sysfs_ul_range_ktype = { - .release = damon_sysfs_ul_range_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_ul_range_groups, -}; - -/* - * schemes/stats directory - */ - -struct damon_sysfs_stats { - struct kobject kobj; - unsigned long nr_tried; - unsigned long sz_tried; - unsigned long nr_applied; - unsigned long sz_applied; - unsigned long qt_exceeds; -}; - -static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void) -{ - return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL); -} - -static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->nr_tried); -} - -static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->sz_tried); -} - -static ssize_t nr_applied_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->nr_applied); -} - -static ssize_t sz_applied_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->sz_applied); -} - -static ssize_t qt_exceeds_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_stats *stats = container_of(kobj, - struct damon_sysfs_stats, kobj); - - return sysfs_emit(buf, "%lu\n", stats->qt_exceeds); -} - -static void damon_sysfs_stats_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_stats, kobj)); -} - -static struct kobj_attribute damon_sysfs_stats_nr_tried_attr = - __ATTR_RO_MODE(nr_tried, 0400); - -static struct kobj_attribute damon_sysfs_stats_sz_tried_attr = - __ATTR_RO_MODE(sz_tried, 0400); - -static struct kobj_attribute damon_sysfs_stats_nr_applied_attr = - __ATTR_RO_MODE(nr_applied, 0400); - -static struct kobj_attribute damon_sysfs_stats_sz_applied_attr = - __ATTR_RO_MODE(sz_applied, 0400); - -static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr = - __ATTR_RO_MODE(qt_exceeds, 0400); - -static struct attribute *damon_sysfs_stats_attrs[] = { - &damon_sysfs_stats_nr_tried_attr.attr, - &damon_sysfs_stats_sz_tried_attr.attr, - &damon_sysfs_stats_nr_applied_attr.attr, - &damon_sysfs_stats_sz_applied_attr.attr, - &damon_sysfs_stats_qt_exceeds_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_stats); - -static struct kobj_type damon_sysfs_stats_ktype = { - .release = damon_sysfs_stats_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_stats_groups, -}; - -/* - * watermarks directory - */ - -struct damon_sysfs_watermarks { - struct kobject kobj; - enum damos_wmark_metric metric; - unsigned long interval_us; - unsigned long high; - unsigned long mid; - unsigned long low; -}; - -static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc( - enum damos_wmark_metric metric, unsigned long interval_us, - unsigned long high, unsigned long mid, unsigned long low) -{ - struct damon_sysfs_watermarks *watermarks = kmalloc( - sizeof(*watermarks), GFP_KERNEL); - - if (!watermarks) - return NULL; - watermarks->kobj = (struct kobject){}; - watermarks->metric = metric; - watermarks->interval_us = interval_us; - watermarks->high = high; - watermarks->mid = mid; - watermarks->low = low; - return watermarks; -} - -/* Should match with enum damos_wmark_metric */ -static const char * const damon_sysfs_wmark_metric_strs[] = { - "none", - "free_mem_rate", -}; - -static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%s\n", - damon_sysfs_wmark_metric_strs[watermarks->metric]); -} - -static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - enum damos_wmark_metric metric; - - for (metric = 0; metric < NR_DAMOS_WMARK_METRICS; metric++) { - if (sysfs_streq(buf, damon_sysfs_wmark_metric_strs[metric])) { - watermarks->metric = metric; - return count; - } - } - return -EINVAL; -} - -static ssize_t interval_us_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%lu\n", watermarks->interval_us); -} - -static ssize_t interval_us_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - int err = kstrtoul(buf, 0, &watermarks->interval_us); - - return err ? err : count; -} - -static ssize_t high_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%lu\n", watermarks->high); -} - -static ssize_t high_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - int err = kstrtoul(buf, 0, &watermarks->high); - - return err ? err : count; -} - -static ssize_t mid_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%lu\n", watermarks->mid); -} - -static ssize_t mid_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - int err = kstrtoul(buf, 0, &watermarks->mid); - - return err ? err : count; -} - -static ssize_t low_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - - return sysfs_emit(buf, "%lu\n", watermarks->low); -} - -static ssize_t low_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_watermarks *watermarks = container_of(kobj, - struct damon_sysfs_watermarks, kobj); - int err = kstrtoul(buf, 0, &watermarks->low); - - return err ? err : count; -} - -static void damon_sysfs_watermarks_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj)); -} - -static struct kobj_attribute damon_sysfs_watermarks_metric_attr = - __ATTR_RW_MODE(metric, 0600); - -static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr = - __ATTR_RW_MODE(interval_us, 0600); - -static struct kobj_attribute damon_sysfs_watermarks_high_attr = - __ATTR_RW_MODE(high, 0600); - -static struct kobj_attribute damon_sysfs_watermarks_mid_attr = - __ATTR_RW_MODE(mid, 0600); - -static struct kobj_attribute damon_sysfs_watermarks_low_attr = - __ATTR_RW_MODE(low, 0600); - -static struct attribute *damon_sysfs_watermarks_attrs[] = { - &damon_sysfs_watermarks_metric_attr.attr, - &damon_sysfs_watermarks_interval_us_attr.attr, - &damon_sysfs_watermarks_high_attr.attr, - &damon_sysfs_watermarks_mid_attr.attr, - &damon_sysfs_watermarks_low_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_watermarks); - -static struct kobj_type damon_sysfs_watermarks_ktype = { - .release = damon_sysfs_watermarks_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_watermarks_groups, -}; - -/* - * scheme/weights directory - */ - -struct damon_sysfs_weights { - struct kobject kobj; - unsigned int sz; - unsigned int nr_accesses; - unsigned int age; -}; - -static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz, - unsigned int nr_accesses, unsigned int age) -{ - struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights), - GFP_KERNEL); - - if (!weights) - return NULL; - weights->kobj = (struct kobject){}; - weights->sz = sz; - weights->nr_accesses = nr_accesses; - weights->age = age; - return weights; -} - -static ssize_t sz_permil_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - - return sysfs_emit(buf, "%u\n", weights->sz); -} - -static ssize_t sz_permil_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - int err = kstrtouint(buf, 0, &weights->sz); - - return err ? err : count; -} - -static ssize_t nr_accesses_permil_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - - return sysfs_emit(buf, "%u\n", weights->nr_accesses); -} - -static ssize_t nr_accesses_permil_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - int err = kstrtouint(buf, 0, &weights->nr_accesses); - - return err ? err : count; -} - -static ssize_t age_permil_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - - return sysfs_emit(buf, "%u\n", weights->age); -} - -static ssize_t age_permil_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_weights *weights = container_of(kobj, - struct damon_sysfs_weights, kobj); - int err = kstrtouint(buf, 0, &weights->age); - - return err ? err : count; -} - -static void damon_sysfs_weights_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_weights, kobj)); -} - -static struct kobj_attribute damon_sysfs_weights_sz_attr = - __ATTR_RW_MODE(sz_permil, 0600); - -static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr = - __ATTR_RW_MODE(nr_accesses_permil, 0600); - -static struct kobj_attribute damon_sysfs_weights_age_attr = - __ATTR_RW_MODE(age_permil, 0600); - -static struct attribute *damon_sysfs_weights_attrs[] = { - &damon_sysfs_weights_sz_attr.attr, - &damon_sysfs_weights_nr_accesses_attr.attr, - &damon_sysfs_weights_age_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_weights); - -static struct kobj_type damon_sysfs_weights_ktype = { - .release = damon_sysfs_weights_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_weights_groups, -}; - -/* - * quotas directory - */ - -struct damon_sysfs_quotas { - struct kobject kobj; - struct damon_sysfs_weights *weights; - unsigned long ms; - unsigned long sz; - unsigned long reset_interval_ms; -}; - -static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void) -{ - return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL); -} - -static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas) -{ - struct damon_sysfs_weights *weights; - int err; - - weights = damon_sysfs_weights_alloc(0, 0, 0); - if (!weights) - return -ENOMEM; - - err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype, - "as->kobj, "weights"); - if (err) - kobject_put(&weights->kobj); - else - quotas->weights = weights; - return err; -} - -static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas) -{ - kobject_put("as->weights->kobj); -} - -static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - - return sysfs_emit(buf, "%lu\n", quotas->ms); -} - -static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - int err = kstrtoul(buf, 0, "as->ms); - - if (err) - return -EINVAL; - return count; -} - -static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - - return sysfs_emit(buf, "%lu\n", quotas->sz); -} - -static ssize_t bytes_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - int err = kstrtoul(buf, 0, "as->sz); - - if (err) - return -EINVAL; - return count; -} - -static ssize_t reset_interval_ms_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - - return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms); -} - -static ssize_t reset_interval_ms_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_quotas *quotas = container_of(kobj, - struct damon_sysfs_quotas, kobj); - int err = kstrtoul(buf, 0, "as->reset_interval_ms); - - if (err) - return -EINVAL; - return count; -} - -static void damon_sysfs_quotas_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_quotas, kobj)); -} - -static struct kobj_attribute damon_sysfs_quotas_ms_attr = - __ATTR_RW_MODE(ms, 0600); - -static struct kobj_attribute damon_sysfs_quotas_sz_attr = - __ATTR_RW_MODE(bytes, 0600); - -static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr = - __ATTR_RW_MODE(reset_interval_ms, 0600); - -static struct attribute *damon_sysfs_quotas_attrs[] = { - &damon_sysfs_quotas_ms_attr.attr, - &damon_sysfs_quotas_sz_attr.attr, - &damon_sysfs_quotas_reset_interval_ms_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_quotas); - -static struct kobj_type damon_sysfs_quotas_ktype = { - .release = damon_sysfs_quotas_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_quotas_groups, -}; - -/* - * access_pattern directory - */ - -struct damon_sysfs_access_pattern { - struct kobject kobj; - struct damon_sysfs_ul_range *sz; - struct damon_sysfs_ul_range *nr_accesses; - struct damon_sysfs_ul_range *age; -}; - -static -struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void) -{ - struct damon_sysfs_access_pattern *access_pattern = - kmalloc(sizeof(*access_pattern), GFP_KERNEL); - - if (!access_pattern) - return NULL; - access_pattern->kobj = (struct kobject){}; - return access_pattern; -} - -static int damon_sysfs_access_pattern_add_range_dir( - struct damon_sysfs_access_pattern *access_pattern, - struct damon_sysfs_ul_range **range_dir_ptr, - char *name) -{ - struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0); - int err; - - if (!range) - return -ENOMEM; - err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype, - &access_pattern->kobj, name); - if (err) - kobject_put(&range->kobj); - else - *range_dir_ptr = range; - return err; -} - -static int damon_sysfs_access_pattern_add_dirs( - struct damon_sysfs_access_pattern *access_pattern) -{ - int err; - - err = damon_sysfs_access_pattern_add_range_dir(access_pattern, - &access_pattern->sz, "sz"); - if (err) - goto put_sz_out; - - err = damon_sysfs_access_pattern_add_range_dir(access_pattern, - &access_pattern->nr_accesses, "nr_accesses"); - if (err) - goto put_nr_accesses_sz_out; - - err = damon_sysfs_access_pattern_add_range_dir(access_pattern, - &access_pattern->age, "age"); - if (err) - goto put_age_nr_accesses_sz_out; - return 0; - -put_age_nr_accesses_sz_out: - kobject_put(&access_pattern->age->kobj); - access_pattern->age = NULL; -put_nr_accesses_sz_out: - kobject_put(&access_pattern->nr_accesses->kobj); - access_pattern->nr_accesses = NULL; -put_sz_out: - kobject_put(&access_pattern->sz->kobj); - access_pattern->sz = NULL; - return err; -} - -static void damon_sysfs_access_pattern_rm_dirs( - struct damon_sysfs_access_pattern *access_pattern) -{ - kobject_put(&access_pattern->sz->kobj); - kobject_put(&access_pattern->nr_accesses->kobj); - kobject_put(&access_pattern->age->kobj); -} - -static void damon_sysfs_access_pattern_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj)); -} - -static struct attribute *damon_sysfs_access_pattern_attrs[] = { - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_access_pattern); - -static struct kobj_type damon_sysfs_access_pattern_ktype = { - .release = damon_sysfs_access_pattern_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_access_pattern_groups, -}; - -/* - * scheme directory - */ - -struct damon_sysfs_scheme { - struct kobject kobj; - enum damos_action action; - struct damon_sysfs_access_pattern *access_pattern; - struct damon_sysfs_quotas *quotas; - struct damon_sysfs_watermarks *watermarks; - struct damon_sysfs_stats *stats; -}; - -/* This should match with enum damos_action */ -static const char * const damon_sysfs_damos_action_strs[] = { - "willneed", - "cold", - "pageout", - "hugepage", - "nohugepage", - "lru_prio", - "lru_deprio", - "stat", -}; - -static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc( - enum damos_action action) -{ - struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme), - GFP_KERNEL); - - if (!scheme) - return NULL; - scheme->kobj = (struct kobject){}; - scheme->action = action; - return scheme; -} - -static int damon_sysfs_scheme_set_access_pattern( - struct damon_sysfs_scheme *scheme) -{ - struct damon_sysfs_access_pattern *access_pattern; - int err; - - access_pattern = damon_sysfs_access_pattern_alloc(); - if (!access_pattern) - return -ENOMEM; - err = kobject_init_and_add(&access_pattern->kobj, - &damon_sysfs_access_pattern_ktype, &scheme->kobj, - "access_pattern"); - if (err) - goto out; - err = damon_sysfs_access_pattern_add_dirs(access_pattern); - if (err) - goto out; - scheme->access_pattern = access_pattern; - return 0; - -out: - kobject_put(&access_pattern->kobj); - return err; -} - -static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme) -{ - struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc(); - int err; - - if (!quotas) - return -ENOMEM; - err = kobject_init_and_add("as->kobj, &damon_sysfs_quotas_ktype, - &scheme->kobj, "quotas"); - if (err) - goto out; - err = damon_sysfs_quotas_add_dirs(quotas); - if (err) - goto out; - scheme->quotas = quotas; - return 0; - -out: - kobject_put("as->kobj); - return err; -} - -static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme) -{ - struct damon_sysfs_watermarks *watermarks = - damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0); - int err; - - if (!watermarks) - return -ENOMEM; - err = kobject_init_and_add(&watermarks->kobj, - &damon_sysfs_watermarks_ktype, &scheme->kobj, - "watermarks"); - if (err) - kobject_put(&watermarks->kobj); - else - scheme->watermarks = watermarks; - return err; -} - -static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme) -{ - struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc(); - int err; - - if (!stats) - return -ENOMEM; - err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype, - &scheme->kobj, "stats"); - if (err) - kobject_put(&stats->kobj); - else - scheme->stats = stats; - return err; -} - -static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) -{ - int err; - - err = damon_sysfs_scheme_set_access_pattern(scheme); - if (err) - return err; - err = damon_sysfs_scheme_set_quotas(scheme); - if (err) - goto put_access_pattern_out; - err = damon_sysfs_scheme_set_watermarks(scheme); - if (err) - goto put_quotas_access_pattern_out; - err = damon_sysfs_scheme_set_stats(scheme); - if (err) - goto put_watermarks_quotas_access_pattern_out; - return 0; - -put_watermarks_quotas_access_pattern_out: - kobject_put(&scheme->watermarks->kobj); - scheme->watermarks = NULL; -put_quotas_access_pattern_out: - kobject_put(&scheme->quotas->kobj); - scheme->quotas = NULL; -put_access_pattern_out: - kobject_put(&scheme->access_pattern->kobj); - scheme->access_pattern = NULL; - return err; -} - -static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) -{ - damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); - kobject_put(&scheme->access_pattern->kobj); - damon_sysfs_quotas_rm_dirs(scheme->quotas); - kobject_put(&scheme->quotas->kobj); - kobject_put(&scheme->watermarks->kobj); - kobject_put(&scheme->stats->kobj); -} - -static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - struct damon_sysfs_scheme *scheme = container_of(kobj, - struct damon_sysfs_scheme, kobj); - - return sysfs_emit(buf, "%s\n", - damon_sysfs_damos_action_strs[scheme->action]); -} - -static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct damon_sysfs_scheme *scheme = container_of(kobj, - struct damon_sysfs_scheme, kobj); - enum damos_action action; - - for (action = 0; action < NR_DAMOS_ACTIONS; action++) { - if (sysfs_streq(buf, damon_sysfs_damos_action_strs[action])) { - scheme->action = action; - return count; - } - } - return -EINVAL; -} - -static void damon_sysfs_scheme_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_scheme, kobj)); -} - -static struct kobj_attribute damon_sysfs_scheme_action_attr = - __ATTR_RW_MODE(action, 0600); - -static struct attribute *damon_sysfs_scheme_attrs[] = { - &damon_sysfs_scheme_action_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_scheme); - -static struct kobj_type damon_sysfs_scheme_ktype = { - .release = damon_sysfs_scheme_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_scheme_groups, -}; - -/* - * schemes directory - */ - -struct damon_sysfs_schemes { - struct kobject kobj; - struct damon_sysfs_scheme **schemes_arr; - int nr; -}; - -static struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void) -{ - return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL); -} - -static void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes) -{ - struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr; - int i; - - for (i = 0; i < schemes->nr; i++) { - damon_sysfs_scheme_rm_dirs(schemes_arr[i]); - kobject_put(&schemes_arr[i]->kobj); - } - schemes->nr = 0; - kfree(schemes_arr); - schemes->schemes_arr = NULL; -} - -static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes, - int nr_schemes) -{ - struct damon_sysfs_scheme **schemes_arr, *scheme; - int err, i; - - damon_sysfs_schemes_rm_dirs(schemes); - if (!nr_schemes) - return 0; - - schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr), - GFP_KERNEL | __GFP_NOWARN); - if (!schemes_arr) - return -ENOMEM; - schemes->schemes_arr = schemes_arr; - - for (i = 0; i < nr_schemes; i++) { - scheme = damon_sysfs_scheme_alloc(DAMOS_STAT); - if (!scheme) { - damon_sysfs_schemes_rm_dirs(schemes); - return -ENOMEM; - } - - err = kobject_init_and_add(&scheme->kobj, - &damon_sysfs_scheme_ktype, &schemes->kobj, - "%d", i); - if (err) - goto out; - err = damon_sysfs_scheme_add_dirs(scheme); - if (err) - goto out; - - schemes_arr[i] = scheme; - schemes->nr++; - } - return 0; - -out: - damon_sysfs_schemes_rm_dirs(schemes); - kobject_put(&scheme->kobj); - return err; -} - -static ssize_t nr_schemes_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct damon_sysfs_schemes *schemes = container_of(kobj, - struct damon_sysfs_schemes, kobj); - - return sysfs_emit(buf, "%d\n", schemes->nr); -} - -static ssize_t nr_schemes_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - struct damon_sysfs_schemes *schemes; - int nr, err = kstrtoint(buf, 0, &nr); - - if (err) - return err; - if (nr < 0) - return -EINVAL; - - schemes = container_of(kobj, struct damon_sysfs_schemes, kobj); - - if (!mutex_trylock(&damon_sysfs_lock)) - return -EBUSY; - err = damon_sysfs_schemes_add_dirs(schemes, nr); - mutex_unlock(&damon_sysfs_lock); - if (err) - return err; - return count; -} - -static void damon_sysfs_schemes_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct damon_sysfs_schemes, kobj)); -} - -static struct kobj_attribute damon_sysfs_schemes_nr_attr = - __ATTR_RW_MODE(nr_schemes, 0600); - -static struct attribute *damon_sysfs_schemes_attrs[] = { - &damon_sysfs_schemes_nr_attr.attr, - NULL, -}; -ATTRIBUTE_GROUPS(damon_sysfs_schemes); - -static struct kobj_type damon_sysfs_schemes_ktype = { - .release = damon_sysfs_schemes_release, - .sysfs_ops = &kobj_sysfs_ops, - .default_groups = damon_sysfs_schemes_groups, -}; +#include "sysfs-common.h" /* * init region directory @@ -1062,23 +17,12 @@ static struct kobj_type damon_sysfs_schemes_ktype = { struct damon_sysfs_region { struct kobject kobj; - unsigned long start; - unsigned long end; + struct damon_addr_range ar; }; -static struct damon_sysfs_region *damon_sysfs_region_alloc( - unsigned long start, - unsigned long end) +static struct damon_sysfs_region *damon_sysfs_region_alloc(void) { - struct damon_sysfs_region *region = kmalloc(sizeof(*region), - GFP_KERNEL); - - if (!region) - return NULL; - region->kobj = (struct kobject){}; - region->start = start; - region->end = end; - return region; + return kzalloc(sizeof(struct damon_sysfs_region), GFP_KERNEL); } static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -1087,7 +31,7 @@ static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr, struct damon_sysfs_region *region = container_of(kobj, struct damon_sysfs_region, kobj); - return sysfs_emit(buf, "%lu\n", region->start); + return sysfs_emit(buf, "%lu\n", region->ar.start); } static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -1095,7 +39,7 @@ static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr, { struct damon_sysfs_region *region = container_of(kobj, struct damon_sysfs_region, kobj); - int err = kstrtoul(buf, 0, ®ion->start); + int err = kstrtoul(buf, 0, ®ion->ar.start); return err ? err : count; } @@ -1106,7 +50,7 @@ static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr, struct damon_sysfs_region *region = container_of(kobj, struct damon_sysfs_region, kobj); - return sysfs_emit(buf, "%lu\n", region->end); + return sysfs_emit(buf, "%lu\n", region->ar.end); } static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -1114,7 +58,7 @@ static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr, { struct damon_sysfs_region *region = container_of(kobj, struct damon_sysfs_region, kobj); - int err = kstrtoul(buf, 0, ®ion->end); + int err = kstrtoul(buf, 0, ®ion->ar.end); return err ? err : count; } @@ -1187,7 +131,7 @@ static int damon_sysfs_regions_add_dirs(struct damon_sysfs_regions *regions, regions->regions_arr = regions_arr; for (i = 0; i < nr_regions; i++) { - region = damon_sysfs_region_alloc(0, 0); + region = damon_sysfs_region_alloc(); if (!region) { damon_sysfs_regions_rm_dirs(regions); return -ENOMEM; @@ -2055,6 +999,21 @@ enum damon_sysfs_cmd { * files. */ DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS, + /* + * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES: Update + * tried_regions/total_bytes sysfs files for each scheme. + */ + DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES, + /* + * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS: Update schemes tried + * regions + */ + DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS, + /* + * @DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS: Clear schemes tried + * regions + */ + DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS, /* * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands. */ @@ -2067,6 +1026,9 @@ static const char * const damon_sysfs_cmd_strs[] = { "off", "commit", "update_schemes_stats", + "update_schemes_tried_bytes", + "update_schemes_tried_regions", + "clear_schemes_tried_regions", }; /* @@ -2147,11 +1109,11 @@ static int damon_sysfs_set_regions(struct damon_target *t, struct damon_sysfs_region *sys_region = sysfs_regions->regions_arr[i]; - if (sys_region->start > sys_region->end) + if (sys_region->ar.start > sys_region->ar.end) goto out; - ranges[i].start = sys_region->start; - ranges[i].end = sys_region->end; + ranges[i].start = sys_region->ar.start; + ranges[i].end = sys_region->ar.end; if (i == 0) continue; if (ranges[i - 1].end > ranges[i].start) @@ -2188,165 +1150,100 @@ destroy_targets_out: return err; } -/* - * Search a target in a context that corresponds to the sysfs target input. - * - * Return: pointer to the target if found, NULL if not found, or negative - * error code if the search failed. - */ -static struct damon_target *damon_sysfs_existing_target( - struct damon_sysfs_target *sys_target, struct damon_ctx *ctx) +static int damon_sysfs_update_target_pid(struct damon_target *target, int pid) { - struct pid *pid; - struct damon_target *t; + struct pid *pid_new; - if (!damon_target_has_pid(ctx)) { - /* Up to only one target for paddr could exist */ - damon_for_each_target(t, ctx) - return t; - return NULL; + pid_new = find_get_pid(pid); + if (!pid_new) + return -EINVAL; + + if (pid_new == target->pid) { + put_pid(pid_new); + return 0; } - /* ops.id should be DAMON_OPS_VADDR or DAMON_OPS_FVADDR */ - pid = find_get_pid(sys_target->pid); - if (!pid) - return ERR_PTR(-EINVAL); - damon_for_each_target(t, ctx) { - if (t->pid == pid) { - put_pid(pid); - return t; - } + put_pid(target->pid); + target->pid = pid_new; + return 0; +} + +static int damon_sysfs_update_target(struct damon_target *target, + struct damon_ctx *ctx, + struct damon_sysfs_target *sys_target) +{ + int err = 0; + + if (damon_target_has_pid(ctx)) { + err = damon_sysfs_update_target_pid(target, sys_target->pid); + if (err) + return err; } - put_pid(pid); - return NULL; + + /* + * Do monitoring target region boundary update only if one or more + * regions are set by the user. This is for keeping current monitoring + * target results and range easier, especially for dynamic monitoring + * target regions update ops like 'vaddr'. + */ + if (sys_target->regions->nr) + err = damon_sysfs_set_regions(target, sys_target->regions); + return err; } static int damon_sysfs_set_targets(struct damon_ctx *ctx, struct damon_sysfs_targets *sysfs_targets) { - int i, err; + struct damon_target *t, *next; + int i = 0, err; /* Multiple physical address space monitoring targets makes no sense */ if (ctx->ops.id == DAMON_OPS_PADDR && sysfs_targets->nr > 1) return -EINVAL; - for (i = 0; i < sysfs_targets->nr; i++) { - struct damon_sysfs_target *st = sysfs_targets->targets_arr[i]; - struct damon_target *t = damon_sysfs_existing_target(st, ctx); + damon_for_each_target_safe(t, next, ctx) { + if (i < sysfs_targets->nr) { + err = damon_sysfs_update_target(t, ctx, + sysfs_targets->targets_arr[i]); + if (err) + return err; + } else { + if (damon_target_has_pid(ctx)) + put_pid(t->pid); + damon_destroy_target(t); + } + i++; + } - if (IS_ERR(t)) - return PTR_ERR(t); - if (!t) - err = damon_sysfs_add_target(st, ctx); - else - err = damon_sysfs_set_regions(t, st->regions); + for (; i < sysfs_targets->nr; i++) { + struct damon_sysfs_target *st = sysfs_targets->targets_arr[i]; + + err = damon_sysfs_add_target(st, ctx); if (err) return err; } return 0; } -static struct damos *damon_sysfs_mk_scheme( - struct damon_sysfs_scheme *sysfs_scheme) -{ - struct damon_sysfs_access_pattern *access_pattern = - sysfs_scheme->access_pattern; - struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; - struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; - struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; - - struct damos_access_pattern pattern = { - .min_sz_region = access_pattern->sz->min, - .max_sz_region = access_pattern->sz->max, - .min_nr_accesses = access_pattern->nr_accesses->min, - .max_nr_accesses = access_pattern->nr_accesses->max, - .min_age_region = access_pattern->age->min, - .max_age_region = access_pattern->age->max, - }; - struct damos_quota quota = { - .ms = sysfs_quotas->ms, - .sz = sysfs_quotas->sz, - .reset_interval = sysfs_quotas->reset_interval_ms, - .weight_sz = sysfs_weights->sz, - .weight_nr_accesses = sysfs_weights->nr_accesses, - .weight_age = sysfs_weights->age, - }; - struct damos_watermarks wmarks = { - .metric = sysfs_wmarks->metric, - .interval = sysfs_wmarks->interval_us, - .high = sysfs_wmarks->high, - .mid = sysfs_wmarks->mid, - .low = sysfs_wmarks->low, - }; - - return damon_new_scheme(&pattern, sysfs_scheme->action, "a, - &wmarks); -} - -static void damon_sysfs_update_scheme(struct damos *scheme, - struct damon_sysfs_scheme *sysfs_scheme) -{ - struct damon_sysfs_access_pattern *access_pattern = - sysfs_scheme->access_pattern; - struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; - struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; - struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; - - scheme->pattern.min_sz_region = access_pattern->sz->min; - scheme->pattern.max_sz_region = access_pattern->sz->max; - scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; - scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; - scheme->pattern.min_age_region = access_pattern->age->min; - scheme->pattern.max_age_region = access_pattern->age->max; - - scheme->action = sysfs_scheme->action; - - scheme->quota.ms = sysfs_quotas->ms; - scheme->quota.sz = sysfs_quotas->sz; - scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; - scheme->quota.weight_sz = sysfs_weights->sz; - scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; - scheme->quota.weight_age = sysfs_weights->age; - - scheme->wmarks.metric = sysfs_wmarks->metric; - scheme->wmarks.interval = sysfs_wmarks->interval_us; - scheme->wmarks.high = sysfs_wmarks->high; - scheme->wmarks.mid = sysfs_wmarks->mid; - scheme->wmarks.low = sysfs_wmarks->low; -} - -static int damon_sysfs_set_schemes(struct damon_ctx *ctx, - struct damon_sysfs_schemes *sysfs_schemes) -{ - struct damos *scheme, *next; - int i = 0; - - damon_for_each_scheme_safe(scheme, next, ctx) { - if (i < sysfs_schemes->nr) - damon_sysfs_update_scheme(scheme, - sysfs_schemes->schemes_arr[i]); - else - damon_destroy_scheme(scheme); - i++; - } - - for (; i < sysfs_schemes->nr; i++) { - struct damos *scheme, *next; - - scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); - if (!scheme) { - damon_for_each_scheme_safe(scheme, next, ctx) - damon_destroy_scheme(scheme); - return -ENOMEM; - } - damon_add_scheme(ctx, scheme); - } - return 0; -} +static bool damon_sysfs_schemes_regions_updating; static void damon_sysfs_before_terminate(struct damon_ctx *ctx) { struct damon_target *t, *next; + struct damon_sysfs_kdamond *kdamond; + enum damon_sysfs_cmd cmd; + + /* damon_sysfs_schemes_update_regions_stop() might not yet called */ + kdamond = damon_sysfs_cmd_request.kdamond; + cmd = damon_sysfs_cmd_request.cmd; + if (kdamond && ctx == kdamond->damon_ctx && + (cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS || + cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES) && + damon_sysfs_schemes_regions_updating) { + damon_sysfs_schemes_update_regions_stop(ctx); + damon_sysfs_schemes_regions_updating = false; + mutex_unlock(&damon_sysfs_lock); + } if (!damon_target_has_pid(ctx)) return; @@ -2371,30 +1268,47 @@ static void damon_sysfs_before_terminate(struct damon_ctx *ctx) static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond) { struct damon_ctx *ctx = kdamond->damon_ctx; - struct damon_sysfs_schemes *sysfs_schemes; - struct damos *scheme; - int schemes_idx = 0; if (!ctx) return -EINVAL; - sysfs_schemes = kdamond->contexts->contexts_arr[0]->schemes; - damon_for_each_scheme(scheme, ctx) { - struct damon_sysfs_stats *sysfs_stats; - - /* user could have removed the scheme sysfs dir */ - if (schemes_idx >= sysfs_schemes->nr) - break; - - sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats; - sysfs_stats->nr_tried = scheme->stat.nr_tried; - sysfs_stats->sz_tried = scheme->stat.sz_tried; - sysfs_stats->nr_applied = scheme->stat.nr_applied; - sysfs_stats->sz_applied = scheme->stat.sz_applied; - sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds; - } + damon_sysfs_schemes_update_stats( + kdamond->contexts->contexts_arr[0]->schemes, ctx); return 0; } +static int damon_sysfs_upd_schemes_regions_start( + struct damon_sysfs_kdamond *kdamond, bool total_bytes_only) +{ + struct damon_ctx *ctx = kdamond->damon_ctx; + + if (!ctx) + return -EINVAL; + return damon_sysfs_schemes_update_regions_start( + kdamond->contexts->contexts_arr[0]->schemes, ctx, + total_bytes_only); +} + +static int damon_sysfs_upd_schemes_regions_stop( + struct damon_sysfs_kdamond *kdamond) +{ + struct damon_ctx *ctx = kdamond->damon_ctx; + + if (!ctx) + return -EINVAL; + return damon_sysfs_schemes_update_regions_stop(ctx); +} + +static int damon_sysfs_clear_schemes_regions( + struct damon_sysfs_kdamond *kdamond) +{ + struct damon_ctx *ctx = kdamond->damon_ctx; + + if (!ctx) + return -EINVAL; + return damon_sysfs_schemes_clear_regions( + kdamond->contexts->contexts_arr[0]->schemes, ctx); +} + static inline bool damon_sysfs_kdamond_running( struct damon_sysfs_kdamond *kdamond) { @@ -2439,18 +1353,21 @@ static int damon_sysfs_commit_input(struct damon_sysfs_kdamond *kdamond) /* * damon_sysfs_cmd_request_callback() - DAMON callback for handling requests. - * @c: The DAMON context of the callback. + * @c: The DAMON context of the callback. + * @active: Whether @c is not deactivated due to watermarks. * * This function is periodically called back from the kdamond thread for @c. * Then, it checks if there is a waiting DAMON sysfs request and handles it. */ -static int damon_sysfs_cmd_request_callback(struct damon_ctx *c) +static int damon_sysfs_cmd_request_callback(struct damon_ctx *c, bool active) { struct damon_sysfs_kdamond *kdamond; + bool total_bytes_only = false; int err = 0; /* avoid deadlock due to concurrent state_store('off') */ - if (!mutex_trylock(&damon_sysfs_lock)) + if (!damon_sysfs_schemes_regions_updating && + !mutex_trylock(&damon_sysfs_lock)) return 0; kdamond = damon_sysfs_cmd_request.kdamond; if (!kdamond || kdamond->damon_ctx != c) @@ -2462,16 +1379,62 @@ static int damon_sysfs_cmd_request_callback(struct damon_ctx *c) case DAMON_SYSFS_CMD_COMMIT: err = damon_sysfs_commit_input(kdamond); break; + case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES: + total_bytes_only = true; + fallthrough; + case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS: + if (!damon_sysfs_schemes_regions_updating) { + err = damon_sysfs_upd_schemes_regions_start(kdamond, + total_bytes_only); + if (!err) { + damon_sysfs_schemes_regions_updating = true; + goto keep_lock_out; + } + } else { + /* + * Continue regions updating if DAMON is till + * active and the update for all schemes is not + * finished. + */ + if (active && !damos_sysfs_regions_upd_done()) + goto keep_lock_out; + err = damon_sysfs_upd_schemes_regions_stop(kdamond); + damon_sysfs_schemes_regions_updating = false; + } + break; + case DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS: + err = damon_sysfs_clear_schemes_regions(kdamond); + break; default: break; } /* Mark the request as invalid now. */ damon_sysfs_cmd_request.kdamond = NULL; out: - mutex_unlock(&damon_sysfs_lock); + if (!damon_sysfs_schemes_regions_updating) + mutex_unlock(&damon_sysfs_lock); +keep_lock_out: return err; } +static int damon_sysfs_after_wmarks_check(struct damon_ctx *c) +{ + /* + * after_wmarks_check() is called back while the context is deactivated + * by watermarks. + */ + return damon_sysfs_cmd_request_callback(c, false); +} + +static int damon_sysfs_after_aggregation(struct damon_ctx *c) +{ + /* + * after_aggregation() is called back only while the context is not + * deactivated by watermarks. + */ + return damon_sysfs_cmd_request_callback(c, true); +} + static struct damon_ctx *damon_sysfs_build_ctx( struct damon_sysfs_context *sys_ctx) { @@ -2487,8 +1450,8 @@ static struct damon_ctx *damon_sysfs_build_ctx( return ERR_PTR(err); } - ctx->callback.after_wmarks_check = damon_sysfs_cmd_request_callback; - ctx->callback.after_aggregation = damon_sysfs_cmd_request_callback; + ctx->callback.after_wmarks_check = damon_sysfs_after_wmarks_check; + ctx->callback.after_aggregation = damon_sysfs_after_aggregation; ctx->callback.before_terminate = damon_sysfs_before_terminate; return ctx; } diff --git a/mm/filemap.c b/mm/filemap.c index 63a846a1c1a3..c38dc43bfc8c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2967,7 +2967,7 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio, /* * NOTE! This will make us return with VM_FAULT_RETRY, but with - * the mmap_lock still held. That's how FAULT_FLAG_RETRY_NOWAIT + * the fault lock still held. That's how FAULT_FLAG_RETRY_NOWAIT * is supposed to work. We have way too many special cases.. */ if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) @@ -2977,13 +2977,14 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio, if (vmf->flags & FAULT_FLAG_KILLABLE) { if (__folio_lock_killable(folio)) { /* - * We didn't have the right flags to drop the mmap_lock, - * but all fault_handlers only check for fatal signals - * if we return VM_FAULT_RETRY, so we need to drop the - * mmap_lock here and return 0 if we don't have a fpin. + * We didn't have the right flags to drop the + * fault lock, but all fault_handlers only check + * for fatal signals if we return VM_FAULT_RETRY, + * so we need to drop the fault lock here and + * return 0 if we don't have a fpin. */ if (*fpin == NULL) - mmap_read_unlock(vmf->vma->vm_mm); + release_fault_lock(vmf); return 0; } } else diff --git a/mm/memory.c b/mm/memory.c index 56057f97afaf..5e161551a5d1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -411,6 +411,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, * be 0. This will underflow and is okay. */ next = mas_find(&mas, ceiling - 1); + if (unlikely(xa_is_zero(next))) + next = NULL; /* * Hide vma from rmap and truncate_pagecache before freeing @@ -432,6 +434,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, && !is_vm_hugetlb_page(next)) { vma = next; next = mas_find(&mas, ceiling - 1); + if (unlikely(xa_is_zero(next))) + next = NULL; if (mm_wr_locked) vma_start_write(vma); unlink_anon_vmas(vma); @@ -1736,7 +1740,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, do { unmap_single_vma(tlb, vma, start_addr, end_addr, &details, mm_wr_locked); - } while ((vma = mas_find(&mas, end_t - 1)) != NULL); + vma = mas_find(&mas, end_t - 1); + } while (vma && likely(!xa_is_zero(vma))); mmu_notifier_invalidate_range_end(&range); } @@ -3099,6 +3104,36 @@ static inline void wp_page_reuse(struct vm_fault *vmf) count_vm_event(PGREUSE); } +/* + * We could add a bitflag somewhere, but for now, we know that all + * vm_ops that have a ->map_pages have been audited and don't need + * the mmap_lock to be held. + */ +static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + + if (vma->vm_ops->map_pages || !(vmf->flags & FAULT_FLAG_VMA_LOCK)) + return 0; + vma_end_read(vma); + return VM_FAULT_RETRY; +} + +static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + + if (likely(vma->anon_vma)) + return 0; + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + if (__anon_vma_prepare(vma)) + return VM_FAULT_OOM; + return 0; +} + /* * Handle the case of a page which we actually need to copy to a new page, * either due to COW or unsharing. @@ -3126,12 +3161,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) pte_t entry; int page_copied = 0; struct mmu_notifier_range range; - int ret; + vm_fault_t ret; delayacct_wpcopy_start(); - if (unlikely(anon_vma_prepare(vma))) - goto oom; + ret = vmf_anon_prepare(vmf); + if (unlikely(ret)) + goto out; if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { new_page = alloc_zeroed_user_highpage_movable(vma, @@ -3139,13 +3175,14 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) if (!new_page) goto oom; } else { + int err; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (!new_page) goto oom; - ret = __wp_page_copy_user(new_page, old_page, vmf); - if (ret) { + err = __wp_page_copy_user(new_page, old_page, vmf); + if (err) { /* * COW failed, if the fault was solved by other, * it's fine. If not, userspace would re-fault on @@ -3158,7 +3195,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) put_page(old_page); delayacct_wpcopy_end(); - return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0; + return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0; } kmsan_copy_page_meta(new_page, old_page); } @@ -3271,11 +3308,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) oom_free_new: put_page(new_page); oom: + ret = VM_FAULT_OOM; +out: if (old_page) put_page(old_page); delayacct_wpcopy_end(); - return VM_FAULT_OOM; + return ret; } /** @@ -3324,10 +3363,9 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) vm_fault_t ret; pte_unmap_unlock(vmf->pte, vmf->ptl); - if (vmf->flags & FAULT_FLAG_VMA_LOCK) { - vma_end_read(vmf->vma); - return VM_FAULT_RETRY; - } + ret = vmf_can_call_fault(vmf); + if (ret) + return ret; vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vmf); @@ -3351,10 +3389,10 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf) vm_fault_t tmp; pte_unmap_unlock(vmf->pte, vmf->ptl); - if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + tmp = vmf_can_call_fault(vmf); + if (tmp) { put_page(vmf->page); - vma_end_read(vmf->vma); - return VM_FAULT_RETRY; + return tmp; } tmp = do_page_mkwrite(vmf); @@ -3510,12 +3548,6 @@ reuse: return wp_page_shared(vmf); } copy: - if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - vma_end_read(vmf->vma); - return VM_FAULT_RETRY; - } - /* * Ok, we need to copy. Oh, well.. */ @@ -4623,10 +4655,9 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf) return ret; } - if (vmf->flags & FAULT_FLAG_VMA_LOCK) { - vma_end_read(vmf->vma); - return VM_FAULT_RETRY; - } + ret = vmf_can_call_fault(vmf); + if (ret) + return ret; ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) @@ -4644,13 +4675,11 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; - if (vmf->flags & FAULT_FLAG_VMA_LOCK) { - vma_end_read(vma); - return VM_FAULT_RETRY; - } - - if (unlikely(anon_vma_prepare(vma))) - return VM_FAULT_OOM; + ret = vmf_can_call_fault(vmf); + if (!ret) + ret = vmf_anon_prepare(vmf); + if (ret) + return ret; vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (!vmf->cow_page) @@ -4688,10 +4717,9 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret, tmp; - if (vmf->flags & FAULT_FLAG_VMA_LOCK) { - vma_end_read(vma); - return VM_FAULT_RETRY; - } + ret = vmf_can_call_fault(vmf); + if (ret) + return ret; ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) @@ -5514,7 +5542,7 @@ retry: * concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA * from its anon_vma. */ - if (unlikely(!vma->anon_vma)) + if (vma_is_anonymous(vma) && !vma->anon_vma) goto inval_end_read; /* Check since vm_start/vm_end might change before we lock the VMA */ diff --git a/mm/mmap.c b/mm/mmap.c index bd2140cfcf36..d5c48b243869 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3303,10 +3303,11 @@ void exit_mmap(struct mm_struct *mm) arch_exit_mmap(mm); vma = mas_find(&mas, ULONG_MAX); - if (!vma) { + if (!vma || unlikely(xa_is_zero(vma))) { /* Can happen if dup_mmap() received an OOM */ mmap_read_unlock(mm); - return; + mmap_write_lock(mm); + goto destroy; } lru_add_drain(); @@ -3339,11 +3340,13 @@ void exit_mmap(struct mm_struct *mm) remove_vma(vma, true); count++; cond_resched(); - } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL); + vma = mas_find(&mas, ULONG_MAX); + } while (vma && likely(!xa_is_zero(vma))); BUG_ON(count != mm->map_count); trace_exit_mmap(mm); +destroy: __mt_destroy(&mm->mm_mt); mmap_write_unlock(mm); vm_unacct_memory(nr_accounted); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 2c5b854f767b..76a1954071e1 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -420,7 +420,7 @@ static int dump_task(struct task_struct *p, void *arg) * State information includes task's pid, uid, tgid, vm size, rss, * pgtables_bytes, swapents, oom_score_adj value, and name. */ -static void dump_tasks(struct oom_control *oc) +void dump_tasks(struct oom_control *oc) { pr_info("Tasks state (memory values in pages):\n"); pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n"); @@ -436,6 +436,7 @@ static void dump_tasks(struct oom_control *oc) rcu_read_unlock(); } } +EXPORT_SYMBOL_GPL(dump_tasks); static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim) { diff --git a/mm/readahead.c b/mm/readahead.c index a8620cac2d83..dc5cc73775e7 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -167,6 +167,7 @@ static void read_pages(struct readahead_control *rac) psi_memstall_enter(&rac->_pflags); blk_start_plug(&plug); + trace_android_vh_read_pages(rac); if (aops->readahead) { aops->readahead(rac); /* diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 6bb272894c96..0da15e1f3f72 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -243,7 +243,7 @@ static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb) goto frame_finish; #endif - e = rcu_dereference(net->nf.hooks_bridge[NF_BR_PRE_ROUTING]); + e = rcu_dereference(get_nf_hooks_bridge(net)[NF_BR_PRE_ROUTING]); if (!e) goto frame_finish; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 01d690d9fe5f..c7f0aedf2244 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1016,7 +1016,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, unsigned int i; int ret; - e = rcu_dereference(net->nf.hooks_bridge[hook]); + e = rcu_dereference(get_nf_hooks_bridge(net)[hook]); if (!e) return okfn(net, sk, skb); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 4c1707d0eb9b..1d0110152862 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -1093,9 +1093,13 @@ void __init net_ns_init(void) struct net_generic *ng; #ifdef CONFIG_NET_NS - net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), - SMP_CACHE_BYTES, - SLAB_PANIC|SLAB_ACCOUNT, NULL); + /* Allocate size for struct ext_net instead of struct net + * to fix a KMI issue when CONFIG_NETFILTER_FAMILY_BRIDGE + * is enabled + */ + net_cachep = kmem_cache_create("net_namespace", sizeof(struct ext_net), + SMP_CACHE_BYTES, + SLAB_PANIC | SLAB_ACCOUNT, NULL); /* Create workqueue for cleanup */ netns_wq = create_singlethread_workqueue("netns"); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index cbc4816ed7d8..ac53ef7eec91 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -216,8 +216,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) int tv = prandom_u32_max(max_delay); im->tm_running = 1; - if (!mod_timer(&im->timer, jiffies+tv+2)) - refcount_inc(&im->refcnt); + if (refcount_inc_not_zero(&im->refcnt)) { + if (mod_timer(&im->timer, jiffies + tv + 2)) + ip_ma_put(im); + } } static void igmp_gq_start_timer(struct in_device *in_dev) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 55a7f72d547c..6c7a44f84b93 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -39,6 +39,12 @@ struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE +struct nf_hook_entries __rcu *init_nf_hooks_bridge[NF_INET_NUMHOOKS]; +struct nf_hook_entries __rcu **init_nf_hooks_bridgep = &init_nf_hooks_bridge[0]; +EXPORT_SYMBOL_GPL(init_nf_hooks_bridgep); +#endif + static DEFINE_MUTEX(nf_hook_mutex); /* max hooks per family/hooknum */ @@ -278,9 +284,9 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, #endif #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE case NFPROTO_BRIDGE: - if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum)) + if (WARN_ON_ONCE(hooknum >= NF_INET_NUMHOOKS)) return NULL; - return net->nf.hooks_bridge + hooknum; + return get_nf_hooks_bridge(net) + hooknum; #endif #ifdef CONFIG_NETFILTER_INGRESS case NFPROTO_INET: @@ -747,7 +753,7 @@ static int __net_init netfilter_net_init(struct net *net) __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp)); #endif #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE - __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); + __netfilter_net_init(get_nf_hooks_bridge(net), NF_INET_NUMHOOKS); #endif #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 63d1516816b1..566f7794bf58 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -281,7 +281,7 @@ static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf switch (pf) { #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE case NFPROTO_BRIDGE: - return rcu_dereference(net->nf.hooks_bridge[hooknum]); + return rcu_dereference(get_nf_hooks_bridge(net)[hooknum]); #endif case NFPROTO_IPV4: return rcu_dereference(net->nf.hooks_ipv4[hooknum]); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6b5f22dc1d94..30802f7f2114 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9474,7 +9474,7 @@ static void nft_set_commit_update(struct list_head *set_update_list) list_for_each_entry_safe(set, next, set_update_list, pending_update) { list_del_init(&set->pending_update); - if (!set->ops->commit) + if (!set->ops->commit || set->dead) continue; set->ops->commit(set); diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 8120aadf6a0f..3ca3c3a3ba01 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -210,9 +210,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de break; case NFPROTO_BRIDGE: #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE - if (hook >= ARRAY_SIZE(net->nf.hooks_bridge)) + if (hook >= NF_INET_NUMHOOKS) return ERR_PTR(-EINVAL); - hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); + hook_head = rcu_dereference(get_nf_hooks_bridge(net)[hook]); #endif break; #if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index deea6196d992..4e1cc31729b8 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2042,6 +2042,9 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, e = f->mt[r].e; + if (!nft_set_elem_active(&e->ext, iter->genmask)) + goto cont; + elem.priv = e; iter->err = iter->fn(ctx, set, iter, &elem); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index cb833302270a..5b328a82ea70 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -71,7 +71,8 @@ #include #define CREATE_TRACE_POINTS #include - +#undef CREATE_TRACE_POINTS +#include #include "af_netlink.h" struct listeners { @@ -1966,6 +1967,15 @@ out: return err ? : copied; } +static __poll_t netlink_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + __poll_t mask = datagram_poll(file, sock, wait); + + trace_android_vh_netlink_poll(file, sock, wait, &mask); + return mask; +} + static void netlink_data_ready(struct sock *sk) { BUG(); @@ -2766,7 +2776,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll = datagram_poll, + .poll = netlink_poll, .ioctl = netlink_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d919eff62ebe..0fdf95420bec 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12017,16 +12017,18 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) return err; } -static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) +static int nl80211_set_pmksa(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_pmksa *pmksa) = NULL; struct net_device *dev = info->user_ptr[1]; struct cfg80211_pmksa pmksa; + bool ap_pmksa_caching_support = false; memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); + ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_AP_PMKSA_CACHING); + if (!info->attrs[NL80211_ATTR_PMKID]) return -EINVAL; @@ -12035,16 +12037,15 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) { pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); } else if (info->attrs[NL80211_ATTR_SSID] && - info->attrs[NL80211_ATTR_FILS_CACHE_ID] && - (info->genlhdr->cmd == NL80211_CMD_DEL_PMKSA || - info->attrs[NL80211_ATTR_PMK])) { + info->attrs[NL80211_ATTR_FILS_CACHE_ID] && + info->attrs[NL80211_ATTR_PMK]) { pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); - pmksa.cache_id = - nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); + pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); } else { return -EINVAL; } + if (info->attrs[NL80211_ATTR_PMK]) { pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); @@ -12056,32 +12057,71 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]) pmksa.pmk_reauth_threshold = - nla_get_u8( - info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]); + nla_get_u8(info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && - !(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP && - wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_AP_PMKSA_CACHING))) + !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP || + dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) && + ap_pmksa_caching_support)) return -EOPNOTSUPP; - switch (info->genlhdr->cmd) { - case NL80211_CMD_SET_PMKSA: - rdev_ops = rdev->ops->set_pmksa; - break; - case NL80211_CMD_DEL_PMKSA: - rdev_ops = rdev->ops->del_pmksa; - break; - default: - WARN_ON(1); - break; + if (!rdev->ops->set_pmksa) + return -EOPNOTSUPP; + + return rdev_set_pmksa(rdev, dev, &pmksa); +} + +static int nl80211_del_pmksa(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_pmksa pmksa; + bool sae_offload_support = false; + bool owe_offload_support = false; + bool ap_pmksa_caching_support = false; + + memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); + + sae_offload_support = wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_SAE_OFFLOAD); + owe_offload_support = wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_OWE_OFFLOAD); + ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_AP_PMKSA_CACHING); + + if (info->attrs[NL80211_ATTR_PMKID]) + pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); + + if (info->attrs[NL80211_ATTR_MAC]) { + pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + } else if (info->attrs[NL80211_ATTR_SSID]) { + /* SSID based pmksa flush suppported only for FILS, + * OWE/SAE OFFLOAD cases + */ + if (info->attrs[NL80211_ATTR_FILS_CACHE_ID] && + info->attrs[NL80211_ATTR_PMK]) { + pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); + } else if (!sae_offload_support && !owe_offload_support) { + return -EINVAL; + } + pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + } else { + return -EINVAL; } - if (!rdev_ops) + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && + !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP || + dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) && + ap_pmksa_caching_support)) return -EOPNOTSUPP; - return rdev_ops(&rdev->wiphy, dev, &pmksa); + if (!rdev->ops->del_pmksa) + return -EOPNOTSUPP; + + return rdev_del_pmksa(rdev, dev, &pmksa); } static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) @@ -16817,7 +16857,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { { .cmd = NL80211_CMD_SET_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = nl80211_setdel_pmksa, + .doit = nl80211_set_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), @@ -16825,7 +16865,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { { .cmd = NL80211_CMD_DEL_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = nl80211_setdel_pmksa, + .doit = nl80211_del_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1e5e66ae5a52..25fdb7fda112 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -74,6 +74,8 @@ my $git_command ='export LANGUAGE=en_US.UTF-8; git'; my $tabsize = 8; my ${CONFIG_} = "CONFIG_"; +my %maybe_linker_symbol; # for externs in c exceptions, when seen in *vmlinux.lds.h + sub help { my ($exitcode) = @_; @@ -620,6 +622,22 @@ our $signature_tags = qr{(?xi: Cc: )}; +our @link_tags = qw(Link Closes); + +#Create a search and print patterns for all these strings to be used directly below +our $link_tags_search = ""; +our $link_tags_print = ""; +foreach my $entry (@link_tags) { + if ($link_tags_search ne "") { + $link_tags_search .= '|'; + $link_tags_print .= ' or '; + } + $entry .= ':'; + $link_tags_search .= $entry; + $link_tags_print .= "'$entry'"; +} +$link_tags_search = "(?:${link_tags_search})"; + our $tracing_logging_tags = qr{(?xi: [=-]*> | <[=-]* | @@ -702,6 +720,17 @@ sub find_standard_signature { return ""; } +our $obsolete_archives = qr{(?xi: + \Qfreedesktop.org/archives/dri-devel\E | + \Qlists.infradead.org\E | + \Qlkml.org\E | + \Qmail-archive.com\E | + \Qmailman.alsa-project.org/pipermail\E | + \Qmarc.info\E | + \Qozlabs.org/pipermail\E | + \Qspinics.net\E +)}; + our @typeListMisordered = ( qr{char\s+(?:un)?signed}, qr{int\s+(?:(?:un)?signed\s+)?short\s}, @@ -812,7 +841,9 @@ our %deprecated_apis = ( "get_state_synchronize_sched" => "get_state_synchronize_rcu", "cond_synchronize_sched" => "cond_synchronize_rcu", "kmap" => "kmap_local_page", + "kunmap" => "kunmap_local", "kmap_atomic" => "kmap_local_page", + "kunmap_atomic" => "kunmap_local", ); #Create a search pattern for all these strings to speed up a loop below @@ -3131,21 +3162,33 @@ sub process { if ($sign_off =~ /^co-developed-by:$/i) { if ($email eq $author) { WARN("BAD_SIGN_OFF", - "Co-developed-by: should not be used to attribute nominal patch author '$author'\n" . "$here\n" . $rawline); + "Co-developed-by: should not be used to attribute nominal patch author '$author'\n" . $herecurr); } if (!defined $lines[$linenr]) { WARN("BAD_SIGN_OFF", - "Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline); - } elsif ($rawlines[$linenr] !~ /^\s*signed-off-by:\s*(.*)/i) { + "Co-developed-by: must be immediately followed by Signed-off-by:\n" . $herecurr); + } elsif ($rawlines[$linenr] !~ /^signed-off-by:\s*(.*)/i) { WARN("BAD_SIGN_OFF", - "Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline . "\n" .$rawlines[$linenr]); + "Co-developed-by: must be immediately followed by Signed-off-by:\n" . $herecurr . $rawlines[$linenr] . "\n"); } elsif ($1 ne $email) { WARN("BAD_SIGN_OFF", - "Co-developed-by and Signed-off-by: name/email do not match \n" . "$here\n" . $rawline . "\n" .$rawlines[$linenr]); + "Co-developed-by and Signed-off-by: name/email do not match\n" . $herecurr . $rawlines[$linenr] . "\n"); + } + } + +# check if Reported-by: is followed by a Closes: tag + if ($sign_off =~ /^reported(?:|-and-tested)-by:$/i) { + if (!defined $lines[$linenr]) { + WARN("BAD_REPORTED_BY_LINK", + "Reported-by: should be immediately followed by Closes: with a URL to the report\n" . $herecurr . "\n"); + } elsif ($rawlines[$linenr] !~ /^closes:\s*/i) { + WARN("BAD_REPORTED_BY_LINK", + "Reported-by: should be immediately followed by Closes: with a URL to the report\n" . $herecurr . $rawlines[$linenr] . "\n"); } } } + # Check Fixes: styles is correct if (!$in_header_lines && $line =~ /^\s*fixes:?\s*(?:commit\s*)?[0-9a-f]{5,}\b/i) { @@ -3225,11 +3268,11 @@ sub process { # file delta changes $line =~ /^\s*(?:[\w\.\-\+]*\/)++[\w\.\-\+]+:/ || # filename then : - $line =~ /^\s*(?:Fixes:|Link:|$signature_tags)/i || - # A Fixes: or Link: line or signature tag line + $line =~ /^\s*(?:Fixes:|$link_tags_search|$signature_tags)/i || + # A Fixes:, link or signature tag line $commit_log_possible_stack_dump)) { WARN("COMMIT_LOG_LONG_LINE", - "Possible unwrapped commit description (prefer a maximum 75 chars per line)\n" . $herecurr); + "Prefer a maximum 75 chars per line (possible unwrapped commit description?)\n" . $herecurr); $commit_log_long_line = 1; } @@ -3239,6 +3282,29 @@ sub process { $commit_log_possible_stack_dump = 0; } +# Check for odd tags before a URI/URL + if ($in_commit_log && + $line =~ /^\s*(\w+:)\s*http/ && $1 !~ /^$link_tags_search$/) { + if ($1 =~ /^v(?:ersion)?\d+/i) { + WARN("COMMIT_LOG_VERSIONING", + "Patch version information should be after the --- line\n" . $herecurr); + } else { + WARN("COMMIT_LOG_USE_LINK", + "Unknown link reference '$1', use $link_tags_print instead\n" . $herecurr); + } + } + +# Check for misuse of the link tags + if ($in_commit_log && + $line =~ /^\s*(\w+:)\s*(\S+)/) { + my $tag = $1; + my $value = $2; + if ($tag =~ /^$link_tags_search$/ && $value !~ m{^https?://}) { + WARN("COMMIT_LOG_WRONG_LINK", + "'$tag' should be followed by a public http(s) link\n" . $herecurr); + } + } + # Check for lines starting with a # if ($in_commit_log && $line =~ /^#/) { if (WARN("COMMIT_COMMENT_SYMBOL", @@ -3324,6 +3390,12 @@ sub process { $last_git_commit_id_linenr = $linenr if ($line =~ /\bcommit\s*$/i); } +# Check for mailing list archives other than lore.kernel.org + if ($rawline =~ m{http.*\b$obsolete_archives}) { + WARN("PREFER_LORE_ARCHIVE", + "Use lore.kernel.org archive links when possible - see https://lore.kernel.org/lists.html\n" . $herecurr); + } + # Check for added, moved or deleted files if (!$reported_maintainer_file && !$in_commit_log && ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ || @@ -3693,7 +3765,7 @@ sub process { "'$spdx_license' is not supported in LICENSES/...\n" . $herecurr); } if ($realfile =~ m@^Documentation/devicetree/bindings/@ && - not $spdx_license =~ /GPL-2\.0.*BSD-2-Clause/) { + $spdx_license !~ /GPL-2\.0(?:-only)? OR BSD-2-Clause/) { my $msg_level = \&WARN; $msg_level = \&CHK if ($file); if (&{$msg_level}("SPDX_LICENSE_TAG", @@ -3703,12 +3775,17 @@ sub process { $fixed[$fixlinenr] =~ s/SPDX-License-Identifier: .*/SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)/; } } + if ($realfile =~ m@^include/dt-bindings/@ && + $spdx_license !~ /GPL-2\.0(?:-only)? OR \S+/) { + WARN("SPDX_LICENSE_TAG", + "DT binding headers should be licensed (GPL-2.0-only OR .*)\n" . $herecurr); + } } } } # check for embedded filenames - if ($rawline =~ /^\+.*\Q$realfile\E/) { + if ($rawline =~ /^\+.*\b\Q$realfile\E\b/) { WARN("EMBEDDED_FILENAME", "It's generally not useful to have the filename in the file\n" . $herecurr); } @@ -4971,7 +5048,7 @@ sub process { if|for|while|switch|return|case| volatile|__volatile__| __attribute__|format|__extension__| - asm|__asm__)$/x) + asm|__asm__|scoped_guard)$/x) { # cpp #define statements have non-optional spaces, ie # if there is a space between the name and the open @@ -5766,6 +5843,8 @@ sub process { $var !~ /^(?:[A-Z]+_){1,5}[A-Z]{1,3}[a-z]/ && #Ignore Page variants $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && +#Ignore ETHTOOL_LINK_MODE_ variants + $var !~ /^ETHTOOL_LINK_MODE_/ && #Ignore SI style variants like nS, mV and dB #(ie: max_uV, regulator_min_uA_show, RANGE_mA_VALUE) $var !~ /^(?:[a-z0-9_]*|[A-Z0-9_]*)?_?[a-z][A-Z](?:_[a-z0-9_]+|_[A-Z0-9_]+)?$/ && @@ -5901,6 +5980,7 @@ sub process { $dstat !~ /$exceptions/ && $dstat !~ /^\.$Ident\s*=/ && # .foo = $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo + $dstat !~ /^case\b/ && # case ... $dstat !~ /^do\s*$Constant\s*while\s*$Constant;?$/ && # do {...} while (...); // do {...} while (...) $dstat !~ /^while\s*$Constant\s*$Constant\s*$/ && # while (...) {...} $dstat !~ /^for\s*$Constant$/ && # for (...) @@ -5973,6 +6053,9 @@ sub process { # check for line continuations outside of #defines, preprocessor #, and asm + } elsif ($realfile =~ m@/vmlinux.lds.h$@) { + $line =~ s/(\w+)/$maybe_linker_symbol{$1}++/ge; + #print "REAL: $realfile\nln: $line\nkeys:", sort keys %maybe_linker_symbol; } else { if ($prevline !~ /^..*\\$/ && $line !~ /^\+\s*\#.*\\$/ && # preprocessor @@ -6910,10 +6993,22 @@ sub process { # } # } +# strcpy uses that should likely be strscpy + if ($line =~ /\bstrcpy\s*\(/) { + WARN("STRCPY", + "Prefer strscpy over strcpy - see: https://github.com/KSPP/linux/issues/88\n" . $herecurr); + } + # strlcpy uses that should likely be strscpy if ($line =~ /\bstrlcpy\s*\(/) { WARN("STRLCPY", - "Prefer strscpy over strlcpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw\@mail.gmail.com/\n" . $herecurr); + "Prefer strscpy over strlcpy - see: https://github.com/KSPP/linux/issues/89\n" . $herecurr); + } + +# strncpy uses that should likely be strscpy or strscpy_pad + if ($line =~ /\bstrncpy\s*\(/) { + WARN("STRNCPY", + "Prefer strscpy, strscpy_pad, or __nonstring over strncpy - see: https://github.com/KSPP/linux/issues/90\n" . $herecurr); } # typecasts on min/max could be min_t/max_t @@ -7020,6 +7115,21 @@ sub process { "arguments for function declarations should follow identifier\n" . $herecurr); } + } elsif ($realfile =~ /\.c$/ && defined $stat && + $stat =~ /^\+extern struct\s+(\w+)\s+(\w+)\[\];/) + { + my ($st_type, $st_name) = ($1, $2); + + for my $s (keys %maybe_linker_symbol) { + #print "Linker symbol? $st_name : $s\n"; + goto LIKELY_LINKER_SYMBOL + if $st_name =~ /$s/; + } + WARN("AVOID_EXTERNS", + "found a file-scoped extern type:$st_type name:$st_name in .c file\n" + . "is this a linker symbol ?\n" . $herecurr); + LIKELY_LINKER_SYMBOL: + } elsif ($realfile =~ /\.c$/ && defined $stat && $stat =~ /^.\s*extern\s+/) { @@ -7128,7 +7238,7 @@ sub process { } # check for alloc argument mismatch - if ($line =~ /\b((?:devm_)?(?:kcalloc|kmalloc_array))\s*\(\s*sizeof\b/) { + if ($line =~ /\b((?:devm_)?((?:k|kv)?(calloc|malloc_array)(?:_node)?))\s*\(\s*sizeof\b/) { WARN("ALLOC_ARRAY_ARGS", "$1 uses number as first arg, sizeof is generally wrong\n" . $herecurr); } @@ -7331,6 +7441,16 @@ sub process { } } +# check for array definition/declarations that should use flexible arrays instead + if ($sline =~ /^[\+ ]\s*\}(?:\s*__packed)?\s*;\s*$/ && + $prevline =~ /^\+\s*(?:\}(?:\s*__packed\s*)?|$Type)\s*$Ident\s*\[\s*(0|1)\s*\]\s*;\s*$/) { + if (ERROR("FLEXIBLE_ARRAY", + "Use C99 flexible arrays - see https://docs.kernel.org/process/deprecated.html#zero-length-and-one-element-arrays\n" . $hereprev) && + $1 == '0' && $fix) { + $fixed[$fixlinenr - 1] =~ s/\[\s*0\s*\]/[]/; + } + } + # nested likely/unlikely calls if ($line =~ /\b(?:(?:un)?likely)\s*\(\s*!?\s*(IS_ERR(?:_OR_NULL|_VALUE)?|WARN)/) { WARN("LIKELY_MISUSE", @@ -7348,6 +7468,30 @@ sub process { } } +# Complain about RCU Tasks Trace used outside of BPF (and of course, RCU). + our $rcu_trace_funcs = qr{(?x: + rcu_read_lock_trace | + rcu_read_lock_trace_held | + rcu_read_unlock_trace | + call_rcu_tasks_trace | + synchronize_rcu_tasks_trace | + rcu_barrier_tasks_trace | + rcu_request_urgent_qs_task + )}; + our $rcu_trace_paths = qr{(?x: + kernel/bpf/ | + include/linux/bpf | + net/bpf/ | + kernel/rcu/ | + include/linux/rcu + )}; + if ($line =~ /\b($rcu_trace_funcs)\s*\(/) { + if ($realfile !~ m{^$rcu_trace_paths}) { + WARN("RCU_TASKS_TRACE", + "use of RCU tasks trace is incorrect outside BPF or core RCU code\n" . $herecurr); + } + } + # check for lockdep_set_novalidate_class if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ || $line =~ /__lockdep_no_validate__\s*\)/ ) { diff --git a/tools/include/linux/rwsem.h b/tools/include/linux/rwsem.h index 83971b3cbfce..f8bffd4a987c 100644 --- a/tools/include/linux/rwsem.h +++ b/tools/include/linux/rwsem.h @@ -37,4 +37,8 @@ static inline int up_write(struct rw_semaphore *sem) { return pthread_rwlock_unlock(&sem->lock); } + +#define down_read_nested(sem, subclass) down_read(sem) +#define down_write_nested(sem, subclass) down_write(sem) + #endif /* _TOOLS_RWSEM_H */ diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index 622266b197d0..a6cdf25b6b9d 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -11,6 +11,7 @@ #define spin_lock_init(x) pthread_mutex_init(x, NULL) #define spin_lock(x) pthread_mutex_lock(x) +#define spin_lock_nested(x, subclass) pthread_mutex_lock(x) #define spin_unlock(x) pthread_mutex_unlock(x) #define spin_lock_bh(x) pthread_mutex_lock(x) #define spin_unlock_bh(x) pthread_mutex_unlock(x) diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index d587a558997f..64a1645ff94c 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -93,13 +93,9 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, return p; } -void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) +void __kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) { assert(objp); - uatomic_dec(&nr_allocated); - uatomic_dec(&cachep->nr_allocated); - if (kmalloc_verbose) - printf("Freeing %p to slab\n", objp); if (cachep->nr_objs > 10 || cachep->align) { memset(objp, POISON_FREE, cachep->size); free(objp); @@ -111,6 +107,15 @@ void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) } } +void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) +{ + uatomic_dec(&nr_allocated); + uatomic_dec(&cachep->nr_allocated); + if (kmalloc_verbose) + printf("Freeing %p to slab\n", objp); + __kmem_cache_free_locked(cachep, objp); +} + void kmem_cache_free(struct kmem_cache *cachep, void *objp) { pthread_mutex_lock(&cachep->lock); @@ -141,18 +146,17 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, if (kmalloc_verbose) pr_debug("Bulk alloc %lu\n", size); - if (!(gfp & __GFP_DIRECT_RECLAIM)) { - if (cachep->non_kernel < size) - return 0; - - cachep->non_kernel -= size; - } - pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { struct radix_tree_node *node; for (i = 0; i < size; i++) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (!cachep->non_kernel) + break; + cachep->non_kernel--; + } + node = cachep->objs; cachep->nr_objs--; cachep->objs = node->parent; @@ -163,11 +167,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } else { pthread_mutex_unlock(&cachep->lock); for (i = 0; i < size; i++) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (!cachep->non_kernel) + break; + cachep->non_kernel--; + } + if (cachep->align) { posix_memalign(&p[i], cachep->align, cachep->size * size); } else { p[i] = malloc(cachep->size * size); + if (!p[i]) + break; } if (cachep->ctor) cachep->ctor(p[i]); @@ -176,6 +188,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } } + if (i < size) { + size = i; + pthread_mutex_lock(&cachep->lock); + for (i = 0; i < size; i++) + __kmem_cache_free_locked(cachep, p[i]); + pthread_mutex_unlock(&cachep->lock); + return 0; + } + for (i = 0; i < size; i++) { uatomic_inc(&nr_allocated); uatomic_inc(&cachep->nr_allocated); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index b598b7fe4419..a2626f02f385 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -35753,6 +35753,363 @@ static noinline void __init check_locky(struct maple_tree *mt) mt_clear_in_rcu(mt); } +/* + * Compares two nodes except for the addresses stored in the nodes. + * Returns zero if they are the same, otherwise returns non-zero. + */ +static int __init compare_node(struct maple_enode *enode_a, + struct maple_enode *enode_b) +{ + struct maple_node *node_a, *node_b; + struct maple_node a, b; + void **slots_a, **slots_b; /* Do not use the rcu tag. */ + enum maple_type type; + int i; + + if (((unsigned long)enode_a & MAPLE_NODE_MASK) != + ((unsigned long)enode_b & MAPLE_NODE_MASK)) { + pr_err("The lower 8 bits of enode are different.\n"); + return -1; + } + + type = mte_node_type(enode_a); + node_a = mte_to_node(enode_a); + node_b = mte_to_node(enode_b); + a = *node_a; + b = *node_b; + + /* Do not compare addresses. */ + if (ma_is_root(node_a) || ma_is_root(node_b)) { + a.parent = (struct maple_pnode *)((unsigned long)a.parent & + MA_ROOT_PARENT); + b.parent = (struct maple_pnode *)((unsigned long)b.parent & + MA_ROOT_PARENT); + } else { + a.parent = (struct maple_pnode *)((unsigned long)a.parent & + MAPLE_NODE_MASK); + b.parent = (struct maple_pnode *)((unsigned long)b.parent & + MAPLE_NODE_MASK); + } + + if (a.parent != b.parent) { + pr_err("The lower 8 bits of parents are different. %p %p\n", + a.parent, b.parent); + return -1; + } + + /* + * If it is a leaf node, the slots do not contain the node address, and + * no special processing of slots is required. + */ + if (ma_is_leaf(type)) + goto cmp; + + slots_a = ma_slots(&a, type); + slots_b = ma_slots(&b, type); + + for (i = 0; i < mt_slots[type]; i++) { + if (!slots_a[i] && !slots_b[i]) + break; + + if (!slots_a[i] || !slots_b[i]) { + pr_err("The number of slots is different.\n"); + return -1; + } + + /* Do not compare addresses in slots. */ + ((unsigned long *)slots_a)[i] &= MAPLE_NODE_MASK; + ((unsigned long *)slots_b)[i] &= MAPLE_NODE_MASK; + } + +cmp: + /* + * Compare all contents of two nodes, including parent (except address), + * slots (except address), pivots, gaps and metadata. + */ + return memcmp(&a, &b, sizeof(struct maple_node)); +} + +/* + * Compare two trees and return 0 if they are the same, non-zero otherwise. + */ +static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b) +{ + MA_STATE(mas_a, mt_a, 0, 0); + MA_STATE(mas_b, mt_b, 0, 0); + + if (mt_a->ma_flags != mt_b->ma_flags) { + pr_err("The flags of the two trees are different.\n"); + return -1; + } + + mas_dfs_preorder(&mas_a); + mas_dfs_preorder(&mas_b); + + if (mas_is_ptr(&mas_a) || mas_is_ptr(&mas_b)) { + if (!(mas_is_ptr(&mas_a) && mas_is_ptr(&mas_b))) { + pr_err("One is MAS_ROOT and the other is not.\n"); + return -1; + } + return 0; + } + + while (!mas_is_none(&mas_a) || !mas_is_none(&mas_b)) { + + if (mas_is_none(&mas_a) || mas_is_none(&mas_b)) { + pr_err("One is MAS_NONE and the other is not.\n"); + return -1; + } + + if (mas_a.min != mas_b.min || + mas_a.max != mas_b.max) { + pr_err("mas->min, mas->max do not match.\n"); + return -1; + } + + if (compare_node(mas_a.node, mas_b.node)) { + pr_err("The contents of nodes %p and %p are different.\n", + mas_a.node, mas_b.node); + mt_dump(mt_a, mt_dump_dec); + mt_dump(mt_b, mt_dump_dec); + return -1; + } + + mas_dfs_preorder(&mas_a); + mas_dfs_preorder(&mas_b); + } + + return 0; +} + +static __init void mas_subtree_max_range(struct ma_state *mas) +{ + unsigned long limit = mas->max; + MA_STATE(newmas, mas->tree, 0, 0); + void *entry; + + mas_for_each(mas, entry, limit) { + if (mas->last - mas->index >= + newmas.last - newmas.index) { + newmas = *mas; + } + } + + *mas = newmas; +} + +/* + * build_full_tree() - Build a full tree. + * @mt: The tree to build. + * @flags: Use @flags to build the tree. + * @height: The height of the tree to build. + * + * Build a tree with full leaf nodes and internal nodes. Note that the height + * should not exceed 3, otherwise it will take a long time to build. + * Return: zero if the build is successful, non-zero if it fails. + */ +static __init int build_full_tree(struct maple_tree *mt, unsigned int flags, + int height) +{ + MA_STATE(mas, mt, 0, 0); + unsigned long step; + int ret = 0, cnt = 1; + enum maple_type type; + + mt_init_flags(mt, flags); + mtree_insert_range(mt, 0, ULONG_MAX, xa_mk_value(5), GFP_KERNEL); + + mtree_lock(mt); + + while (1) { + mas_set(&mas, 0); + if (mt_height(mt) < height) { + mas.max = ULONG_MAX; + goto store; + } + + while (1) { + mas_dfs_preorder(&mas); + if (mas_is_none(&mas)) + goto unlock; + + type = mte_node_type(mas.node); + if (mas_data_end(&mas) + 1 < mt_slots[type]) { + mas_set(&mas, mas.min); + goto store; + } + } +store: + mas_subtree_max_range(&mas); + step = mas.last - mas.index; + if (step < 1) { + ret = -1; + goto unlock; + } + + step /= 2; + mas.last = mas.index + step; + mas_store_gfp(&mas, xa_mk_value(5), + GFP_KERNEL); + ++cnt; + } +unlock: + mtree_unlock(mt); + + MT_BUG_ON(mt, mt_height(mt) != height); + /* pr_info("height:%u number of elements:%d\n", mt_height(mt), cnt); */ + return ret; +} + +static noinline void __init check_mtree_dup(struct maple_tree *mt) +{ + DEFINE_MTREE(new); + int i, j, ret, count = 0; + unsigned int rand_seed = 17, rand; + + /* store a value at [0, 0] */ + mt_init_flags(mt, 0); + mtree_store_range(mt, 0, 0, xa_mk_value(0), GFP_KERNEL); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + + /* The two trees have different attributes. */ + mt_init_flags(mt, 0); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret != -EINVAL); + mtree_destroy(mt); + mtree_destroy(&new); + + /* The new tree is not empty */ + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + mtree_store(&new, 5, xa_mk_value(5), GFP_KERNEL); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret != -EINVAL); + mtree_destroy(mt); + mtree_destroy(&new); + + /* Test for duplicating full trees. */ + for (i = 1; i <= 3; i++) { + ret = build_full_tree(mt, 0, i); + MT_BUG_ON(mt, ret); + mt_init_flags(&new, 0); + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + for (i = 1; i <= 3; i++) { + ret = build_full_tree(mt, MT_FLAGS_ALLOC_RANGE, i); + MT_BUG_ON(mt, ret); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* Test for normal duplicating. */ + for (i = 0; i < 1000; i += 3) { + if (i & 1) { + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + } else { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + } + + for (j = 0; j < i; j++) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* Test memory allocation failed. */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + for (i = 0; i < 30; i += 3) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + + /* Failed at the first node. */ + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + mt_set_non_kernel(0); + ret = mtree_dup(mt, &new, GFP_NOWAIT); + mt_set_non_kernel(0); + MT_BUG_ON(&new, ret != -ENOMEM); + mtree_destroy(mt); + mtree_destroy(&new); + + /* Random maple tree fails at a random node. */ + for (i = 0; i < 1000; i += 3) { + if (i & 1) { + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + } else { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + } + + for (j = 0; j < i; j++) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + /* + * The rand() library function is not used, so we can generate + * the same random numbers on any platform. + */ + rand_seed = rand_seed * 1103515245 + 12345; + rand = rand_seed / 65536 % 128; + mt_set_non_kernel(rand); + + ret = mtree_dup(mt, &new, GFP_NOWAIT); + mt_set_non_kernel(0); + if (ret != 0) { + MT_BUG_ON(&new, ret != -ENOMEM); + count++; + mtree_destroy(mt); + continue; + } + + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* pr_info("mtree_dup() fail %d times\n", count); */ + BUG_ON(!count); +} + extern void test_kmem_cache_bulk(void); void farmer_tests(void) @@ -35800,6 +36157,10 @@ void farmer_tests(void) check_null_expand(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, 0); + check_mtree_dup(&tree); + mtree_destroy(&tree); + /* RCU testing */ mt_init_flags(&tree, 0); check_erase_testset(&tree); @@ -35834,7 +36195,9 @@ void farmer_tests(void) void maple_tree_tests(void) { +#if !defined(BENCH) farmer_tests(); +#endif maple_tree_seed(); maple_tree_harvest(); } diff --git a/tools/testing/selftests/filesystems/fuse/bpf_loader.c b/tools/testing/selftests/filesystems/fuse/bpf_loader.c index 5bf26eadd421..94f884c64d29 100644 --- a/tools/testing/selftests/filesystems/fuse/bpf_loader.c +++ b/tools/testing/selftests/filesystems/fuse/bpf_loader.c @@ -394,6 +394,29 @@ int s_rename(struct s oldpathname, struct s newpathname) return res; } +int s_mount(struct s source, struct s target, struct s filesystem, + unsigned long mountflags, struct s data) +{ + int res; + + res = mount(source.s, target.s, filesystem.s, mountflags, data.s); + free(source.s); + free(target.s); + free(filesystem.s); + free(data.s); + + return res; +} + +int s_umount(struct s target) +{ + int res; + + res = umount(target.s); + free(target.s); + return res; +} + int s_fuse_attr(struct s pathname, struct fuse_attr *fuse_attr_out) { @@ -574,7 +597,10 @@ static int mount_fuse_maybe_init(const char *mount_dir, int bpf_fd, int dir_fd, })); } - *fuse_dev_ptr = fuse_dev; + if (fuse_dev_ptr) + *fuse_dev_ptr = fuse_dev; + else + TESTSYSCALL(close(fuse_dev)); fuse_dev = -1; result = TEST_SUCCESS; out: diff --git a/tools/testing/selftests/filesystems/fuse/fuse_test.c b/tools/testing/selftests/filesystems/fuse/fuse_test.c index 528595a8e82f..ad24ed48853e 100644 --- a/tools/testing/selftests/filesystems/fuse/fuse_test.c +++ b/tools/testing/selftests/filesystems/fuse/fuse_test.c @@ -2114,6 +2114,50 @@ out: return result; } +/** + * Test that fuse passthrough correctly traverses a mount point on the lower fs + */ +static int bpf_test_follow_mounts(const char *mount_dir) +{ + const char *bind_src = "bind_src"; + const char *bind_dst = "bind_dst"; + const char *file = "file"; + int fd = -1; + int src_fd = -1; + int result = TEST_FAILURE; + + TESTSYSCALL(s_mkdir(s_path(s(ft_src), s(bind_src)), 0777)); + TESTSYSCALL(s_mkdir(s_path(s(ft_src), s(bind_dst)), 0777)); + TEST(fd = s_creat(s_pathn(3, s(ft_src), s(bind_src), s(file)), 0777), + fd != -1); + TESTSYSCALL(close(fd)); + fd = -1; + TESTSYSCALL(s_mount(s_path(s(ft_src), s(bind_src)), + s_path(s(ft_src), s(bind_dst)), + s(NULL), MS_BIND, s(NULL))); + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(mount_fuse_no_init(mount_dir, -1, src_fd, NULL), 0); + TEST(fd = s_open(s_pathn(3, s(mount_dir), s(bind_src), s(file)), + O_RDONLY), + fd != -1); + TESTSYSCALL(close(fd)); + fd = -1; + TEST(fd = s_open(s_pathn(3, s(mount_dir), s(bind_dst), s(file)), + O_RDONLY), + fd != -1); + TESTSYSCALL(close(fd)); + fd = -1; + + result = TEST_SUCCESS; +out: + umount(mount_dir); + close(src_fd); + s_umount(s_path(s(ft_src), s(bind_dst))); + close(fd); + return result; +} + static void parse_range(const char *ranges, bool *run_test, size_t tests) { size_t i; @@ -2244,6 +2288,7 @@ int main(int argc, char *argv[]) MAKE_TEST(bpf_test_create_and_remove_bpf), MAKE_TEST(bpf_test_mkdir_and_remove_bpf), MAKE_TEST(bpf_test_readahead), + MAKE_TEST(bpf_test_follow_mounts), }; #undef MAKE_TEST diff --git a/tools/testing/selftests/filesystems/fuse/test_fuse.h b/tools/testing/selftests/filesystems/fuse/test_fuse.h index 69dadc9c7e45..e62e2ee07713 100644 --- a/tools/testing/selftests/filesystems/fuse/test_fuse.h +++ b/tools/testing/selftests/filesystems/fuse/test_fuse.h @@ -64,6 +64,9 @@ int s_setxattr(struct s pathname, const char name[], const void *value, size_t size, int flags); int s_removexattr(struct s pathname, const char name[]); int s_rename(struct s oldpathname, struct s newpathname); +int s_mount(struct s source, struct s target, struct s filesystem, + unsigned long mountflags, struct s data); +int s_umount(struct s target); struct s tracing_folder(void); int tracing_on(void);