-----BEGIN PGP SIGNATURE-----
iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmOwLA8ACgkQONu9yGCS
aT6RYxAAhsnIlIBCtaca7Uio9TZdluV7Fzn3c9+QogVisrwVMTtP1iHX43ofFC89
BCmiQOS9fForddjNP0vkqjZlshMYYSCDPX0s0mK6R4UoNPVg8oehZ9vJfOiR3MMX
C3fApQQhYf5Bx/rC50i58ChdAw/Dqj0WNBZX/ZWod4B2JKUq7ORk7GjnorfuJxuP
xO2K6KdpajZufkxtTyKtwqK8FG3dkZP9YF6MqFIvTfQ8qkLnQsrL3moFGU9giSH5
swRCFH/QII+kumKS2bir87QHz0CmvtSa3Ob4DyKiJMkNN8tspE7nOMkds4usCov6
+yM84sWp03j2RKFyadctAMKwdH16IGU0kdgqlhb9OmzGNRvX6/l5q4+QzqzPJHHQ
F+v/PEJoKz3K6CK2ai8DPXoTUMgDDCaYDHg139Tv2Dj/ulDg9xzJ+CS6WBMQxMoU
xO1OWhpLMDKT8soPogGY13yOsSbhPY6ef3+//eRczxLf8bg3qzoKo362PjqHVxlq
IY01Ul+MB3M4NdFuFNMKM2/DBHn9qBeoZdQxnQ/vpxhBbpP2hIyEflyfsUQOmUYU
lWBcnxbSLxf87CmJ3f1VSsms6kbgnxYJyNBgkXiU3WHFfcRZqoU/R+SFu2THRMPt
ugor1zCHNxBBIdDEMRDWJvDTt34vRsT51Xbig+hH5BVdiKQzQ3k=
=MYDV
-----END PGP SIGNATURE-----
Merge 6.1.2 into android14-6.1
Changes in 6.1.2
MIPS: DTS: CI20: fix reset line polarity of the ethernet controller
usb: musb: remove extra check in musb_gadget_vbus_draw
arm64: dts: renesas: r8a779g0: Fix HSCIF0 "brg_int" clock
arm64: dts: qcom: ipq6018-cp01-c1: use BLSPI1 pins
arm64: dts: qcom: sm8250-sony-xperia-edo: fix touchscreen bias-disable
arm64: dts: qcom: sdm845-xiaomi-polaris: fix codec pin conf name
arm64: dts: qcom: msm8996: Add MSM8996 Pro support
arm64: dts: qcom: msm8996: fix supported-hw in cpufreq OPP tables
arm64: dts: qcom: msm8996: fix GPU OPP table
ARM: dts: qcom: apq8064: fix coresight compatible
arm64: dts: qcom: sdm630: fix UART1 pin bias
arm64: dts: qcom: sdm845-cheza: fix AP suspend pin bias
arm64: dts: qcom: msm8916: Drop MSS fallback compatible
arm64: dts: fsd: fix drive strength macros as per FSD HW UM
arm64: dts: fsd: fix drive strength values as per FSD HW UM
memory: renesas-rpc-if: Clear HS bit during hardware initialization
objtool, kcsan: Add volatile read/write instrumentation to whitelist
ARM: dts: stm32: Drop stm32mp15xc.dtsi from Avenger96
ARM: dts: stm32: Fix AV96 WLAN regulator gpio property
drivers: soc: ti: knav_qmss_queue: Mark knav_acc_firmwares as static
firmware: ti_sci: Fix polled mode during system suspend
riscv: dts: microchip: fix memory node unit address for icicle
arm64: dts: qcom: pm660: Use unique ADC5_VCOIN address in node name
arm64: dts: qcom: sm8250: correct LPASS pin pull down
arm64: dts: qcom: sc7180-trogdor-homestar: fully configure secondary I2S pins
soc: qcom: llcc: make irq truly optional
arm64: dts: qcom: sm8150: fix UFS PHY registers
arm64: dts: qcom: sm8250: fix UFS PHY registers
arm64: dts: qcom: sm8350: fix UFS PHY registers
arm64: dts: qcom: sm8450: fix UFS PHY registers
arm64: dts: qcom: msm8996: fix sound card reset line polarity
arm64: dts: qcom: sm8250-mtp: fix reset line polarity
arm64: dts: qcom: sc7280: fix codec reset line polarity for CRD 3.0/3.1
arm64: dts: qcom: sc7280: fix codec reset line polarity for CRD 1.0/2.0
arm64: dts: qcom: sm8250: drop bogus DP PHY clock
arm64: dts: qcom: sm6350: drop bogus DP PHY clock
soc: qcom: apr: Add check for idr_alloc and of_property_read_string_index
arm64: dts: qcom: pm6350: Include header for KEY_POWER
arm64: dts: qcom: sm6125: fix SDHCI CQE reg names
arm64: dts: renesas: r8a779f0: Fix HSCIF "brg_int" clock
arm64: dts: renesas: r8a779f0: Fix SCIF "brg_int" clock
arm64: dts: renesas: r9a09g011: Fix unit address format error
arm64: dts: renesas: r9a09g011: Fix I2C SoC specific strings
dt-bindings: pwm: fix microchip corePWM's pwm-cells
soc: sifive: ccache: fix missing iounmap() in error path in sifive_ccache_init()
soc: sifive: ccache: fix missing free_irq() in error path in sifive_ccache_init()
soc: sifive: ccache: fix missing of_node_put() in sifive_ccache_init()
arm64: dts: mt7986: fix trng node name
soc/tegra: cbb: Use correct master_id mask for CBB NOC in Tegra194
soc/tegra: cbb: Update slave maps for Tegra234
soc/tegra: cbb: Add checks for potential out of bound errors
soc/tegra: cbb: Check firewall before enabling error reporting
arm64: dts: qcom: sc7280: Mark all Qualcomm reference boards as LTE
arm: dts: spear600: Fix clcd interrupt
riscv: dts: microchip: fix the icicle's #pwm-cells
soc: ti: knav_qmss_queue: Fix PM disable depth imbalance in knav_queue_probe
soc: ti: smartreflex: Fix PM disable depth imbalance in omap_sr_probe
arm64: mm: kfence: only handle translation faults
perf: arm_dsu: Fix hotplug callback leak in dsu_pmu_init()
drivers: perf: marvell_cn10k: Fix hotplug callback leak in tad_pmu_init()
perf/arm_dmc620: Fix hotplug callback leak in dmc620_pmu_init()
perf/smmuv3: Fix hotplug callback leak in arm_smmu_pmu_init()
arm64: dts: ti: k3-am65-main: Drop dma-coherent in crypto node
arm64: dts: ti: k3-j721e-main: Drop dma-coherent in crypto node
arm64: dts: ti: k3-j7200-mcu-wakeup: Drop dma-coherent in crypto node
arm64: dts: ti: k3-j721s2: Fix the interrupt ranges property for main & wkup gpio intr
riscv: dts: microchip: remove pcie node from the sev kit
ARM: dts: nuvoton: Remove bogus unit addresses from fixed-partition nodes
arm64: dts: mediatek: mt8195: Fix CPUs capacity-dmips-mhz
arm64: dts: mt7896a: Fix unit_address_vs_reg warning for oscillator
arm64: dts: mt6779: Fix devicetree build warnings
arm64: dts: mt2712e: Fix unit_address_vs_reg warning for oscillators
arm64: dts: mt2712e: Fix unit address for pinctrl node
arm64: dts: mt2712-evb: Fix vproc fixed regulators unit names
arm64: dts: mt2712-evb: Fix usb vbus regulators unit names
arm64: dts: mediatek: pumpkin-common: Fix devicetree warnings
arm64: dts: mediatek: mt6797: Fix 26M oscillator unit name
arm64: tegra: Fix Prefetchable aperture ranges of Tegra234 PCIe controllers
arm64: tegra: Fix non-prefetchable aperture of PCIe C3 controller
arm64: dts: mt7986: move wed_pcie node
ARM: dts: dove: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-370: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-xp: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-375: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-38x: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-39x: Fix assigned-addresses for every PCIe Root Port
ARM: dts: turris-omnia: Add ethernet aliases
ARM: dts: turris-omnia: Add switch port 6 node
arm64: dts: armada-3720-turris-mox: Add missing interrupt for RTC
soc: apple: sart: Stop casting function pointer signatures
soc: apple: rtkit: Stop casting function pointer signatures
drivers/perf: hisi: Fix some event id for hisi-pcie-pmu
seccomp: Move copy_seccomp() to no failure path.
pstore/ram: Fix error return code in ramoops_probe()
ARM: mmp: fix timer_read delay
pstore: Avoid kcore oops by vmap()ing with VM_IOREMAP
arch: arm64: apple: t8103: Use standard "iommu" node name
tpm: tis_i2c: Fix sanity check interrupt enable mask
tpm: Add flag to use default cancellation policy
tpm/tpm_ftpm_tee: Fix error handling in ftpm_mod_init()
tpm/tpm_crb: Fix error message in __crb_relinquish_locality()
ovl: remove privs in ovl_copyfile()
ovl: remove privs in ovl_fallocate()
sched/uclamp: Fix relationship between uclamp and migration margin
sched/uclamp: Make task_fits_capacity() use util_fits_cpu()
sched/uclamp: Fix fits_capacity() check in feec()
sched/uclamp: Make select_idle_capacity() use util_fits_cpu()
sched/uclamp: Make asym_fits_capacity() use util_fits_cpu()
sched/uclamp: Make cpu_overutilized() use util_fits_cpu()
sched/uclamp: Cater for uclamp in find_energy_efficient_cpu()'s early exit condition
cpuidle: dt: Return the correct numbers of parsed idle states
alpha: fix TIF_NOTIFY_SIGNAL handling
alpha: fix syscall entry in !AUDUT_SYSCALL case
sched/psi: Fix possible missing or delayed pending event
x86/sgx: Reduce delay and interference of enclave release
PM: hibernate: Fix mistake in kerneldoc comment
fs: don't audit the capability check in simple_xattr_list()
cpufreq: qcom-hw: Fix memory leak in qcom_cpufreq_hw_read_lut()
x86/split_lock: Add sysctl to control the misery mode
ACPI: irq: Fix some kernel-doc issues
selftests/ftrace: event_triggers: wait longer for test_event_enable
perf: Fix possible memleak in pmu_dev_alloc()
lib/debugobjects: fix stat count and optimize debug_objects_mem_init
platform/x86: huawei-wmi: fix return value calculation
timerqueue: Use rb_entry_safe() in timerqueue_getnext()
proc: fixup uptime selftest
lib/fonts: fix undefined behavior in bit shift for get_default_font
ocfs2: fix memory leak in ocfs2_stack_glue_init()
selftests: cgroup: fix unsigned comparison with less than zero
cpufreq: qcom-hw: Fix the frequency returned by cpufreq_driver->get()
MIPS: vpe-mt: fix possible memory leak while module exiting
MIPS: vpe-cmp: fix possible memory leak while module exiting
selftests/efivarfs: Add checking of the test return value
PNP: fix name memory leak in pnp_alloc_dev()
mailbox: pcc: Reset pcc_chan_count to zero in case of PCC probe failure
ACPI: pfr_telemetry: use ACPI_FREE() to free acpi_object
ACPI: pfr_update: use ACPI_FREE() to free acpi_object
perf/x86/intel/uncore: Fix reference count leak in sad_cfg_iio_topology()
perf/x86/intel/uncore: Fix reference count leak in hswep_has_limit_sbox()
perf/x86/intel/uncore: Fix reference count leak in snr_uncore_mmio_map()
perf/x86/intel/uncore: Fix reference count leak in __uncore_imc_init_box()
platform/chrome: cros_usbpd_notify: Fix error handling in cros_usbpd_notify_init()
thermal: core: fix some possible name leaks in error paths
irqchip/loongson-pch-pic: Fix translate callback for DT path
irqchip: gic-pm: Use pm_runtime_resume_and_get() in gic_probe()
irqchip/wpcm450: Fix memory leak in wpcm450_aic_of_init()
irqchip/loongson-liointc: Fix improper error handling in liointc_init()
EDAC/i10nm: fix refcount leak in pci_get_dev_wrapper()
NFSD: Finish converting the NFSv2 GETACL result encoder
NFSD: Finish converting the NFSv3 GETACL result encoder
nfsd: don't call nfsd_file_put from client states seqfile display
genirq/irqdesc: Don't try to remove non-existing sysfs files
cpufreq: amd_freq_sensitivity: Add missing pci_dev_put()
libfs: add DEFINE_SIMPLE_ATTRIBUTE_SIGNED for signed value
lib/notifier-error-inject: fix error when writing -errno to debugfs file
debugfs: fix error when writing negative value to atomic_t debugfs file
ocfs2: fix memory leak in ocfs2_mount_volume()
rapidio: fix possible name leaks when rio_add_device() fails
rapidio: rio: fix possible name leak in rio_register_mport()
clocksource/drivers/sh_cmt: Access registers according to spec
futex: Resend potentially swallowed owner death notification
cpu/hotplug: Make target_store() a nop when target == state
cpu/hotplug: Do not bail-out in DYING/STARTING sections
clocksource/drivers/timer-ti-dm: Fix warning for omap_timer_match
clocksource/drivers/timer-ti-dm: Fix missing clk_disable_unprepare in dmtimer_systimer_init_clock()
ACPICA: Fix use-after-free in acpi_ut_copy_ipackage_to_ipackage()
uprobes/x86: Allow to probe a NOP instruction with 0x66 prefix
x86/xen: Fix memory leak in xen_smp_intr_init{_pv}()
x86/xen: Fix memory leak in xen_init_lock_cpu()
xen/privcmd: Fix a possible warning in privcmd_ioctl_mmap_resource()
PM: runtime: Do not call __rpm_callback() from rpm_idle()
erofs: check the uniqueness of fsid in shared domain in advance
erofs: Fix pcluster memleak when its block address is zero
erofs: fix missing unmap if z_erofs_get_extent_compressedlen() fails
erofs: validate the extent length for uncompressed pclusters
platform/chrome: cros_ec_typec: zero out stale pointers
platform/x86: mxm-wmi: fix memleak in mxm_wmi_call_mx[ds|mx]()
platform/x86: intel_scu_ipc: fix possible name leak in __intel_scu_ipc_register()
MIPS: BCM63xx: Add check for NULL for clk in clk_enable
MIPS: OCTEON: warn only once if deprecated link status is being used
lockd: set other missing fields when unlocking files
nfsd: return error if nfs4_setacl fails
NFSD: pass range end to vfs_fsync_range() instead of count
fs: sysv: Fix sysv_nblocks() returns wrong value
rapidio: fix possible UAF when kfifo_alloc() fails
eventfd: change int to __u64 in eventfd_signal() ifndef CONFIG_EVENTFD
relay: fix type mismatch when allocating memory in relay_create_buf()
hfs: Fix OOB Write in hfs_asc2mac
rapidio: devices: fix missing put_device in mport_cdev_open
ipc: fix memory leak in init_mqueue_fs()
platform/mellanox: mlxbf-pmc: Fix event typo
selftests/bpf: Add missing bpf_iter_vma_offset__destroy call
wifi: fix multi-link element subelement iteration
wifi: mac80211: mlme: fix null-ptr deref on failed assoc
wifi: mac80211: check link ID in auth/assoc continuation
wifi: mac80211: fix ifdef symbol name
drm/atomic-helper: Don't allocate new plane state in CRTC check
wifi: ath9k: hif_usb: fix memory leak of urbs in ath9k_hif_usb_dealloc_tx_urbs()
wifi: ath9k: hif_usb: Fix use-after-free in ath9k_hif_usb_reg_in_cb()
wifi: rtl8xxxu: Fix reading the vendor of combo chips
wifi: ath11k: fix firmware assert during bandwidth change for peer sta
drm/bridge: adv7533: remove dynamic lane switching from adv7533 bridge
libbpf: Fix use-after-free in btf_dump_name_dups
libbpf: Fix memory leak in parse_usdt_arg()
selftests/bpf: Fix memory leak caused by not destroying skeleton
selftest/bpf: Fix memory leak in kprobe_multi_test
selftests/bpf: Fix error failure of case test_xdp_adjust_tail_grow
selftest/bpf: Fix error usage of ASSERT_OK in xdp_adjust_tail.c
libbpf: Use elf_getshdrnum() instead of e_shnum
libbpf: Deal with section with no data gracefully
libbpf: Fix null-pointer dereference in find_prog_by_sec_insn()
drm: lcdif: Switch to limited range for RGB to YUV conversion
ata: libata: fix NCQ autosense logic
pinctrl: ocelot: add missing destroy_workqueue() in error path in ocelot_pinctrl_probe()
ASoC: Intel: avs: Fix DMA mask assignment
ASoC: Intel: avs: Fix potential RX buffer overflow
ipmi: kcs: Poll OBF briefly to reduce OBE latency
drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"
drm/amdgpu/powerplay/psm: Fix memory leak in power state init
net: ethernet: adi: adin1110: Fix SPI transfers
samples/bpf: Fix map iteration in xdp1_user
samples/bpf: Fix MAC address swapping in xdp2_kern
selftests/bpf: fix missing BPF object files
drm/bridge: it6505: Initialize AUX channel in it6505_i2c_probe
Input: iqs7222 - protect against undefined slider size
media: v4l2-ctrls: Fix off-by-one error in integer menu control check
media: coda: jpeg: Add check for kmalloc
media: amphion: reset instance if it's aborted before codec header parsed
media: adv748x: afe: Select input port when initializing AFE
media: v4l2-ioctl.c: Unify YCbCr/YUV terms in format descriptions
media: cedrus: hevc: Fix offset adjustments
media: mediatek: vcodec: fix h264 cavlc bitstream fail
drm/i915/guc: Limit scheduling properties to avoid overflow
drm/i915: Fix compute pre-emption w/a to apply to compute engines
media: i2c: hi846: Fix memory leak in hi846_parse_dt()
media: i2c: ad5820: Fix error path
venus: pm_helpers: Fix error check in vcodec_domains_get()
soreuseport: Fix socket selection for SO_INCOMING_CPU.
media: i2c: ov5648: Free V4L2 fwnode data on unbind
media: exynos4-is: don't rely on the v4l2_async_subdev internals
libbpf: Btf dedup identical struct test needs check for nested structs/arrays
can: kvaser_usb: kvaser_usb_leaf: Get capabilities from device
can: kvaser_usb: kvaser_usb_leaf: Rename {leaf,usbcan}_cmd_error_event to {leaf,usbcan}_cmd_can_error_event
can: kvaser_usb: kvaser_usb_leaf: Handle CMD_ERROR_EVENT
can: kvaser_usb_leaf: Set Warning state even without bus errors
can: kvaser_usb_leaf: Fix improved state not being reported
can: kvaser_usb_leaf: Fix wrong CAN state after stopping
can: kvaser_usb_leaf: Fix bogus restart events
can: kvaser_usb: Add struct kvaser_usb_busparams
can: kvaser_usb: Compare requested bittiming parameters with actual parameters in do_set_{,data}_bittiming
clk: renesas: r8a779f0: Fix SD0H clock name
clk: renesas: r8a779a0: Fix SD0H clock name
ASoC: dt-bindings: rt5682: Set sound-dai-cells to 1
drm/i915/guc: Add error-capture init warnings when needed
drm/i915/guc: Fix GuC error capture sizing estimation and reporting
dw9768: Enable low-power probe on ACPI
drm/amd/display: wait for vblank during pipe programming
drm/rockchip: lvds: fix PM usage counter unbalance in poweron
drm/i915: Handle all GTs on driver (un)load paths
drm/i915: Refactor ttm ghost obj detection
drm/i915: Encapsulate lmem rpm stuff in intel_runtime_pm
drm/i915/dgfx: Grab wakeref at i915_ttm_unmap_virtual
clk: renesas: r9a06g032: Repair grave increment error
drm: lcdif: change burst size to 256B
drm/panel/panel-sitronix-st7701: Fix RTNI calculation
spi: Update reference to struct spi_controller
drm/panel/panel-sitronix-st7701: Remove panel on DSI attach failure
drm/ttm: fix undefined behavior in bit shift for TTM_TT_FLAG_PRIV_POPULATED
drm/msm/mdp5: stop overriding drvdata
ima: Handle -ESTALE returned by ima_filter_rule_match()
drm/msm/hdmi: use devres helper for runtime PM management
bpf: Clobber stack slot when writing over spilled PTR_TO_BTF_ID
bpf: Fix slot type check in check_stack_write_var_off
drm/msm/dpu1: Account for DSC's bits_per_pixel having 4 fractional bits
drm/msm/dsi: Remove useless math in DSC calculations
drm/msm/dsi: Remove repeated calculation of slice_per_intf
drm/msm/dsi: Use DIV_ROUND_UP instead of conditional increment on modulo
drm/msm/dsi: Reuse earlier computed dsc->slice_chunk_size
drm/msm/dsi: Appropriately set dsc->mux_word_size based on bpc
drm/msm/dsi: Migrate to drm_dsc_compute_rc_parameters()
drm/msm/dsi: Account for DSC's bits_per_pixel having 4 fractional bits
drm/msm/dsi: Disallow 8 BPC DSC configuration for alternative BPC values
drm/msm/dsi: Prevent signed BPG offsets from bleeding into adjacent bits
media: platform: mtk-mdp3: fix error handling in mdp_cmdq_send()
media: platform: mtk-mdp3: fix error handling about components clock_on
media: platform: mtk-mdp3: fix error handling in mdp_probe()
media: rkvdec: Add required padding
media: vivid: fix compose size exceed boundary
media: platform: exynos4-is: fix return value check in fimc_md_probe()
bpf: propagate precision in ALU/ALU64 operations
bpf: propagate precision across all frames, not just the last one
clk: qcom: gcc-ipq806x: use parent_data for the last remaining entry
clk: qcom: dispcc-sm6350: Add CLK_OPS_PARENT_ENABLE to pixel&byte src
clk: qcom: gcc-sm8250: Use retention mode for USB GDSCs
mtd: Fix device name leak when register device failed in add_mtd_device()
mtd: core: fix possible resource leak in init_mtd()
Input: joystick - fix Kconfig warning for JOYSTICK_ADC
wifi: rsi: Fix handling of 802.3 EAPOL frames sent via control port
media: camss: Clean up received buffers on failed start of streaming
media: camss: Do not attach an already attached power domain on MSM8916 platform
clk: renesas: r8a779f0: Fix HSCIF parent clocks
clk: renesas: r8a779f0: Fix SCIF parent clocks
virt/sev-guest: Add a MODULE_ALIAS
net, proc: Provide PROC_FS=n fallback for proc_create_net_single_write()
rxrpc: Fix ack.bufferSize to be 0 when generating an ack
drm: lcdif: Set and enable FIFO Panic threshold
wifi: rtw89: use u32_encode_bits() to fill MAC quota value
drm: rcar-du: Drop leftovers dependencies from Kconfig
regmap-irq: Use the new num_config_regs property in regmap_add_irq_chip_fwnode
drbd: use blk_queue_max_discard_sectors helper
bfq: fix waker_bfqq inconsistency crash
drm/radeon: Add the missed acpi_put_table() to fix memory leak
dt-bindings: pinctrl: update uart/mmc bindings for MT7986 SoC
pinctrl: mediatek: fix the pinconf register offset of some pins
wifi: iwlwifi: mei: make sure ownership confirmed message is sent
wifi: iwlwifi: mei: don't send SAP commands if AMT is disabled
wifi: iwlwifi: mei: fix tx DHCP packet for devices with new Tx API
wifi: iwlwifi: mei: avoid blocking sap messages handling due to rtnl lock
wifi: iwlwifi: mei: fix potential NULL-ptr deref after clone
module: Fix NULL vs IS_ERR checking for module_get_next_page
ASoC: codecs: wsa883x: Use proper shutdown GPIO polarity
ASoC: codecs: wsa883x: use correct header file
selftests/bpf: Fix xdp_synproxy compilation failure in 32-bit arch
selftests/bpf: Fix incorrect ASSERT in the tcp_hdr_options test
drm/mediatek: Modify dpi power on/off sequence.
ASoC: pxa: fix null-pointer dereference in filter()
nvmet: only allocate a single slab for bvecs
regulator: core: fix unbalanced of node refcount in regulator_dev_lookup()
amdgpu/pm: prevent array underflow in vega20_odn_edit_dpm_table()
nvme: return err on nvme_init_non_mdts_limits fail
wifi: rtw89: Fix some error handling path in rtw89_core_sta_assoc()
regulator: qcom-rpmh: Fix PMR735a S3 regulator spec
drm/fourcc: Fix vsub/hsub for Q410 and Q401
ALSA: memalloc: Allocate more contiguous pages for fallback case
integrity: Fix memory leakage in keyring allocation error path
ima: Fix misuse of dereference of pointer in template_desc_init_fields()
block: clear ->slave_dir when dropping the main slave_dir reference
dm: cleanup open_table_device
dm: cleanup close_table_device
dm: make sure create and remove dm device won't race with open and close table
dm: track per-add_disk holder relations in DM
selftests/bpf: fix memory leak of lsm_cgroup
wifi: ath10k: Fix return value in ath10k_pci_init()
drm/msm/a6xx: Fix speed-bin detection vs probe-defer
mtd: lpddr2_nvm: Fix possible null-ptr-deref
Input: elants_i2c - properly handle the reset GPIO when power is off
ASoC: amd: acp: Fix possible UAF in acp_dma_open
net: ethernet: mtk_eth_soc: do not overwrite mtu configuration running reset routine
media: amphion: add lock around vdec_g_fmt
media: amphion: apply vb2_queue_error instead of setting manually
media: vidtv: Fix use-after-free in vidtv_bridge_dvb_init()
media: solo6x10: fix possible memory leak in solo_sysfs_init()
media: platform: exynos4-is: Fix error handling in fimc_md_init()
media: amphion: Fix error handling in vpu_driver_init()
media: videobuf-dma-contig: use dma_mmap_coherent
net: ethernet: mtk_eth_soc: fix RSTCTRL_PPE{0,1} definitions
udp: Clean up some functions.
net: Return errno in sk->sk_prot->get_port().
mtd: spi-nor: hide jedec_id sysfs attribute if not present
mtd: spi-nor: Fix the number of bytes for the dummy cycles
clk: imx93: correct the flexspi1 clock setting
bpf: Pin the start cgroup in cgroup_iter_seq_init()
HID: i2c: let RMI devices decide what constitutes wakeup event
clk: imx93: unmap anatop base in error handling path
clk: imx93: correct enet clock
bpf: Move skb->len == 0 checks into __bpf_redirect
HID: hid-sensor-custom: set fixed size for custom attributes
clk: imx: imxrt1050: fix IMXRT1050_CLK_LCDIF_APB offsets
pinctrl: k210: call of_node_put()
wifi: rtw89: fix physts IE page check
ASoC: Intel: Skylake: Fix Kconfig dependency
ASoC: Intel: avs: Lock substream before snd_pcm_stop()
ALSA: pcm: fix undefined behavior in bit shift for SNDRV_PCM_RATE_KNOT
ALSA: seq: fix undefined behavior in bit shift for SNDRV_SEQ_FILTER_USE_EVENT
regulator: core: use kfree_const() to free space conditionally
clk: rockchip: Fix memory leak in rockchip_clk_register_pll()
drm/amdgpu: fix pci device refcount leak
drm/i915/guc: make default_lists const data
selftests/bpf: Make sure zero-len skbs aren't redirectable
selftests/bpf: Mount debugfs in setns_by_fd
bonding: fix link recovery in mode 2 when updelay is nonzero
clk: microchip: check for null return of devm_kzalloc()
mtd: core: Fix refcount error in del_mtd_device()
mtd: maps: pxa2xx-flash: fix memory leak in probe
drbd: remove call to memset before free device/resource/connection
drbd: destroy workqueue when drbd device was freed
ASoC: qcom: Add checks for devm_kcalloc
ASoC: qcom: cleanup and fix dependency of QCOM_COMMON
ASoC: mediatek: mt8186: Correct I2S shared clocks
media: vimc: Fix wrong function called when vimc_init() fails
media: imon: fix a race condition in send_packet()
media: imx: imx7-media-csi: Clear BIT_MIPI_DOUBLE_CMPNT for <16b formats
media: mt9p031: Drop bogus v4l2_subdev_get_try_crop() call from mt9p031_init_cfg()
clk: imx8mn: rename vpu_pll to m7_alt_pll
clk: imx: replace osc_hdmi with dummy
clk: imx: rename video_pll1 to video_pll
clk: imx8mn: fix imx8mn_sai2_sels clocks list
clk: imx8mn: fix imx8mn_enet_phy_sels clocks list
pinctrl: pinconf-generic: add missing of_node_put()
media: dvb-core: Fix ignored return value in dvb_register_frontend()
media: dvb-usb: az6027: fix null-ptr-deref in az6027_i2c_xfer()
x86/boot: Skip realmode init code when running as Xen PV guest
media: sun6i-mipi-csi2: Require both pads to be connected for streaming
media: sun8i-a83t-mipi-csi2: Require both pads to be connected for streaming
media: sun6i-mipi-csi2: Register async subdev with no sensor attached
media: sun8i-a83t-mipi-csi2: Register async subdev with no sensor attached
media: amphion: try to wakeup vpu core to avoid failure
media: amphion: cancel vpu before release instance
media: amphion: lock and check m2m_ctx in event handler
media: mediatek: vcodec: Fix getting NULL pointer for dst buffer
media: mediatek: vcodec: Fix h264 set lat buffer error
media: mediatek: vcodec: Setting lat buf to lat_list when lat decode error
media: mediatek: vcodec: Core thread depends on core_list
media: s5p-mfc: Add variant data for MFC v7 hardware for Exynos 3250 SoC
drm/tegra: Add missing clk_disable_unprepare() in tegra_dc_probe()
ASoC: dt-bindings: wcd9335: fix reset line polarity in example
ASoC: mediatek: mtk-btcvsd: Add checks for write and read of mtk_btcvsd_snd
drm/msm/mdp5: fix reading hw revision on db410c platform
NFSv4.2: Clear FATTR4_WORD2_SECURITY_LABEL when done decoding
NFSv4.2: Always decode the security label
NFSv4.2: Fix a memory stomp in decode_attr_security_label
NFSv4.2: Fix initialisation of struct nfs4_label
NFSv4: Fix a credential leak in _nfs4_discover_trunking()
NFSv4: Fix a deadlock between nfs4_open_recover_helper() and delegreturn
NFS: Fix an Oops in nfs_d_automount()
ALSA: asihpi: fix missing pci_disable_device()
wifi: plfxlc: fix potential memory leak in __lf_x_usb_enable_rx()
wifi: rtl8xxxu: Fix use after rcu_read_unlock in rtl8xxxu_bss_info_changed
wifi: iwlwifi: mvm: fix double free on tx path.
ASoC: mediatek: mt8173: Enable IRQ when pdata is ready
clk: mediatek: fix dependency of MT7986 ADC clocks
drm/amd/pm/smu11: BACO is supported when it's in BACO state
amdgpu/nv.c: Corrected typo in the video capabilities resolution
drm/radeon: Fix PCI device refcount leak in radeon_atrm_get_bios()
drm/amdgpu: Fix PCI device refcount leak in amdgpu_atrm_get_bios()
drm/amdkfd: Fix memory leakage
drm/i915/bios: fix a memory leak in generate_lfp_data_ptrs
ASoC: pcm512x: Fix PM disable depth imbalance in pcm512x_probe
clk: visconti: Fix memory leak in visconti_register_pll()
netfilter: conntrack: set icmpv6 redirects as RELATED
Input: wistron_btns - disable on UML
bpf, sockmap: Fix repeated calls to sock_put() when msg has more_data
bpf, sockmap: Fix missing BPF_F_INGRESS flag when using apply_bytes
bpf, sockmap: Fix data loss caused by using apply_bytes on ingress redirect
bonding: uninitialized variable in bond_miimon_inspect()
spi: spidev: mask SPI_CS_HIGH in SPI_IOC_RD_MODE
wifi: nl80211: Add checks for nla_nest_start() in nl80211_send_iface()
wifi: mac80211: fix memory leak in ieee80211_if_add()
wifi: mac80211: fix maybe-unused warning
wifi: cfg80211: Fix not unregister reg_pdev when load_builtin_regdb_keys() fails
wifi: mt76: mt7921: fix antenna signal are way off in monitor mode
wifi: mt76: mt7915: fix mt7915_mac_set_timing()
wifi: mt76: mt7915: fix reporting of TX AGGR histogram
wifi: mt76: mt7921: fix reporting of TX AGGR histogram
wifi: mt76: mt7915: rework eeprom tx paths and streams init
wifi: mt76: mt7915: Fix chainmask calculation on mt7915 DBDC
wifi: mt76: mt7921: fix wrong power after multiple SAR set
wifi: mt76: fix coverity overrun-call in mt76_get_txpower()
wifi: mt76: mt7921: Add missing __packed annotation of struct mt7921_clc
wifi: mt76: do not send firmware FW_FEATURE_NON_DL region
mt76: mt7915: Fix PCI device refcount leak in mt7915_pci_init_hif2()
regulator: core: fix module refcount leak in set_supply()
clk: qcom: lpass-sc7280: Fix pm_runtime usage
clk: qcom: lpass-sc7180: Fix pm_runtime usage
clk: qcom: clk-krait: fix wrong div2 functions
Revert "net: hsr: use hlist_head instead of list_head for mac addresses"
hsr: Add a rcu-read lock to hsr_forward_skb().
hsr: Avoid double remove of a node.
hsr: Disable netpoll.
hsr: Synchronize sending frames to have always incremented outgoing seq nr.
hsr: Synchronize sequence number updates.
configfs: fix possible memory leak in configfs_create_dir()
regulator: core: fix resource leak in regulator_register()
hwmon: (jc42) Convert register access and caching to regmap/regcache
hwmon: (jc42) Restore the min/max/critical temperatures on resume
bpf: Add dummy type reference to nf_conn___init to fix type deduplication
bpf, sockmap: fix race in sock_map_free()
ALSA: pcm: Set missing stop_operating flag at undoing trigger start
media: saa7164: fix missing pci_disable_device()
media: ov5640: set correct default link frequency
ALSA: mts64: fix possible null-ptr-defer in snd_mts64_interrupt
pinctrl: thunderbay: fix possible memory leak in thunderbay_build_functions()
xprtrdma: Fix regbuf data not freed in rpcrdma_req_create()
SUNRPC: Fix missing release socket in rpc_sockname()
NFSv4.2: Set the correct size scratch buffer for decoding READ_PLUS
NFS: Allow very small rsize & wsize again
NFSv4.x: Fail client initialisation if state manager thread can't run
riscv, bpf: Emit fixed-length instructions for BPF_PSEUDO_FUNC
bpftool: Fix memory leak in do_build_table_cb
hwmon: (emc2305) fix unable to probe emc2301/2/3
hwmon: (emc2305) fix pwm never being able to set lower
mmc: alcor: fix return value check of mmc_add_host()
mmc: moxart: fix return value check of mmc_add_host()
mmc: mxcmmc: fix return value check of mmc_add_host()
mmc: pxamci: fix return value check of mmc_add_host()
mmc: rtsx_pci: fix return value check of mmc_add_host()
mmc: rtsx_usb_sdmmc: fix return value check of mmc_add_host()
mmc: toshsd: fix return value check of mmc_add_host()
mmc: vub300: fix return value check of mmc_add_host()
mmc: wmt-sdmmc: fix return value check of mmc_add_host()
mmc: litex_mmc: ensure `host->irq == 0` if polling
mmc: atmel-mci: fix return value check of mmc_add_host()
mmc: omap_hsmmc: fix return value check of mmc_add_host()
mmc: meson-gx: fix return value check of mmc_add_host()
mmc: via-sdmmc: fix return value check of mmc_add_host()
mmc: wbsd: fix return value check of mmc_add_host()
mmc: mmci: fix return value check of mmc_add_host()
mmc: renesas_sdhi: alway populate SCC pointer
memstick/ms_block: Add check for alloc_ordered_workqueue
mmc: core: Normalize the error handling branch in sd_read_ext_regs()
nvme: pass nr_maps explicitly to nvme_alloc_io_tag_set
regulator: qcom-labibb: Fix missing of_node_put() in qcom_labibb_regulator_probe()
media: c8sectpfe: Add of_node_put() when breaking out of loop
media: coda: Add check for dcoda_iram_alloc
media: coda: Add check for kmalloc
media: staging: stkwebcam: Restore MEDIA_{USB,CAMERA}_SUPPORT dependencies
clk: samsung: Fix memory leak in _samsung_clk_register_pll()
spi: spi-gpio: Don't set MOSI as an input if not 3WIRE mode
wifi: rtl8xxxu: Add __packed to struct rtl8723bu_c2h
wifi: rtl8xxxu: Fix the channel width reporting
wifi: brcmfmac: Fix error return code in brcmf_sdio_download_firmware()
blktrace: Fix output non-blktrace event when blk_classic option enabled
bpf: Do not zero-extend kfunc return values
clk: socfpga: Fix memory leak in socfpga_gate_init()
net: vmw_vsock: vmci: Check memcpy_from_msg()
net: defxx: Fix missing err handling in dfx_init()
net: stmmac: selftests: fix potential memleak in stmmac_test_arpoffload()
net: stmmac: fix possible memory leak in stmmac_dvr_probe()
drivers: net: qlcnic: Fix potential memory leak in qlcnic_sriov_init()
ipvs: use u64_stats_t for the per-cpu counters
of: overlay: fix null pointer dereferencing in find_dup_cset_node_entry() and find_dup_cset_prop()
ethernet: s2io: don't call dev_kfree_skb() under spin_lock_irqsave()
net: farsync: Fix kmemleak when rmmods farsync
net/tunnel: wait until all sk_user_data reader finish before releasing the sock
net: apple: mace: don't call dev_kfree_skb() under spin_lock_irqsave()
net: apple: bmac: don't call dev_kfree_skb() under spin_lock_irqsave()
net: emaclite: don't call dev_kfree_skb() under spin_lock_irqsave()
net: ethernet: dnet: don't call dev_kfree_skb() under spin_lock_irqsave()
hamradio: don't call dev_kfree_skb() under spin_lock_irqsave()
net: amd: lance: don't call dev_kfree_skb() under spin_lock_irqsave()
net: setsockopt: fix IPV6_UNICAST_IF option for connected sockets
af_unix: call proto_unregister() in the error path in af_unix_init()
net: amd-xgbe: Fix logic around active and passive cables
net: amd-xgbe: Check only the minimum speed for active/passive cables
can: tcan4x5x: Remove invalid write in clear_interrupts
can: m_can: Call the RAM init directly from m_can_chip_config
can: tcan4x5x: Fix use of register error status mask
net: ethernet: ti: am65-cpsw: Fix PM runtime leakage in am65_cpsw_nuss_ndo_slave_open()
net: lan9303: Fix read error execution path
ntb_netdev: Use dev_kfree_skb_any() in interrupt context
sctp: sysctl: make extra pointers netns aware
Bluetooth: hci_core: fix error handling in hci_register_dev()
Bluetooth: MGMT: Fix error report for ADD_EXT_ADV_PARAMS
Bluetooth: Fix EALREADY and ELOOP cases in bt_status()
Bluetooth: hci_conn: Fix crash on hci_create_cis_sync
Bluetooth: btintel: Fix missing free skb in btintel_setup_combined()
Bluetooth: btusb: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_qca: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_ll: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_h5: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_bcsp: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_core: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: RFCOMM: don't call kfree_skb() under spin_lock_irqsave()
octeontx2-af: cn10k: mcs: Fix a resource leak in the probe and remove functions
stmmac: fix potential division by 0
i40e: Fix the inability to attach XDP program on downed interface
net: dsa: tag_8021q: avoid leaking ctx on dsa_tag_8021q_register() error path
apparmor: fix a memleak in multi_transaction_new()
apparmor: fix lockdep warning when removing a namespace
apparmor: Fix abi check to include v8 abi
apparmor: Fix regression in stacking due to label flags
crypto: hisilicon/qm - fix incorrect parameters usage
crypto: hisilicon/qm - re-enable communicate interrupt before notifying PF
crypto: sun8i-ss - use dma_addr instead u32
crypto: nitrox - avoid double free on error path in nitrox_sriov_init()
crypto: tcrypt - fix return value for multiple subtests
scsi: core: Fix a race between scsi_done() and scsi_timeout()
apparmor: Use pointer to struct aa_label for lbs_cred
PCI: dwc: Fix n_fts[] array overrun
RDMA/core: Fix order of nldev_exit call
PCI: pci-epf-test: Register notifier if only core_init_notifier is enabled
f2fs: Fix the race condition of resize flag between resizefs
crypto: rockchip - do not do custom power management
crypto: rockchip - do not store mode globally
crypto: rockchip - add fallback for cipher
crypto: rockchip - add fallback for ahash
crypto: rockchip - better handle cipher key
crypto: rockchip - remove non-aligned handling
crypto: rockchip - rework by using crypto_engine
apparmor: Fix memleak in alloc_ns()
fortify: Do not cast to "unsigned char"
f2fs: fix to invalidate dcc->f2fs_issue_discard in error path
f2fs: fix gc mode when gc_urgent_high_remaining is 1
f2fs: fix normal discard process
f2fs: allow to set compression for inlined file
f2fs: fix the assign logic of iocb
f2fs: fix to destroy sbi->post_read_wq in error path of f2fs_fill_super()
RDMA/irdma: Report the correct link speed
scsi: qla2xxx: Fix set-but-not-used variable warnings
RDMA/siw: Fix immediate work request flush to completion queue
IB/mad: Don't call to function that might sleep while in atomic context
PCI: vmd: Disable MSI remapping after suspend
PCI: imx6: Initialize PHY before deasserting core reset
f2fs: fix to avoid accessing uninitialized spinlock
RDMA/restrack: Release MR restrack when delete
RDMA/core: Make sure "ib_port" is valid when access sysfs node
RDMA/nldev: Return "-EAGAIN" if the cm_id isn't from expected port
RDMA/siw: Set defined status for work completion with undefined status
RDMA/irdma: Fix inline for multiple SGE's
RDMA/irdma: Fix RQ completion opcode
RDMA/irdma: Do not request 2-level PBLEs for CQ alloc
scsi: scsi_debug: Fix a warning in resp_write_scat()
crypto: ccree - Remove debugfs when platform_driver_register failed
crypto: cryptd - Use request context instead of stack for sub-request
crypto: hisilicon/qm - add missing pci_dev_put() in q_num_set()
RDMA/rxe: Fix mr->map double free
RDMA/hns: Fix ext_sge num error when post send
RDMA/hns: Fix incorrect sge nums calculation
PCI: Check for alloc failure in pci_request_irq()
RDMA/hfi: Decrease PCI device reference count in error path
crypto: ccree - Make cc_debugfs_global_fini() available for module init function
RDMA/irdma: Initialize net_type before checking it
RDMA/hns: fix memory leak in hns_roce_alloc_mr()
RDMA/rxe: Fix NULL-ptr-deref in rxe_qp_do_cleanup() when socket create failed
dt-bindings: imx6q-pcie: Fix clock names for imx6sx and imx8mq
dt-bindings: visconti-pcie: Fix interrupts array max constraints
PCI: endpoint: pci-epf-vntb: Fix call pci_epc_mem_free_addr() in error path
scsi: hpsa: Fix possible memory leak in hpsa_init_one()
crypto: tcrypt - Fix multibuffer skcipher speed test mem leak
padata: Always leave BHs disabled when running ->parallel()
padata: Fix list iterator in padata_do_serial()
crypto: x86/aegis128 - fix possible crash with CFI enabled
crypto: x86/aria - fix crash with CFI enabled
crypto: x86/sha1 - fix possible crash with CFI enabled
crypto: x86/sha256 - fix possible crash with CFI enabled
crypto: x86/sha512 - fix possible crash with CFI enabled
crypto: x86/sm3 - fix possible crash with CFI enabled
crypto: x86/sm4 - fix crash with CFI enabled
crypto: arm64/sm3 - add NEON assembly implementation
crypto: arm64/sm3 - fix possible crash with CFI enabled
crypto: hisilicon/qm - fix 'QM_XEQ_DEPTH_CAP' mask value
scsi: mpt3sas: Fix possible resource leaks in mpt3sas_transport_port_add()
scsi: hpsa: Fix error handling in hpsa_add_sas_host()
scsi: hpsa: Fix possible memory leak in hpsa_add_sas_device()
scsi: efct: Fix possible memleak in efct_device_init()
scsi: scsi_debug: Fix a warning in resp_verify()
scsi: scsi_debug: Fix a warning in resp_report_zones()
scsi: fcoe: Fix possible name leak when device_register() fails
scsi: scsi_debug: Fix possible name leak in sdebug_add_host_helper()
scsi: ipr: Fix WARNING in ipr_init()
scsi: fcoe: Fix transport not deattached when fcoe_if_init() fails
scsi: snic: Fix possible UAF in snic_tgt_create()
scsi: ufs: core: Fix the polling implementation
RDMA/nldev: Add checks for nla_nest_start() in fill_stat_counter_qps()
f2fs: set zstd compress level correctly
f2fs: fix to enable compress for newly created file if extension matches
f2fs: avoid victim selection from previous victim section
RDMA/nldev: Fix failure to send large messages
crypto: qat - fix error return code in adf_probe
crypto: amlogic - Remove kcalloc without check
crypto: omap-sham - Use pm_runtime_resume_and_get() in omap_sham_probe()
riscv/mm: add arch hook arch_clear_hugepage_flags
RDMA: Disable IB HW for UML
RDMA/hfi1: Fix error return code in parse_platform_config()
RDMA/srp: Fix error return code in srp_parse_options()
PCI: vmd: Fix secondary bus reset for Intel bridges
orangefs: Fix sysfs not cleanup when dev init failed
RDMA/hns: Fix the gid problem caused by free mr
RDMA/hns: Fix AH attr queried by query_qp
RDMA/hns: Fix PBL page MTR find
RDMA/hns: Fix page size cap from firmware
RDMA/hns: Fix error code of CMD
RDMA/hns: Fix XRC caps on HIP08
RISC-V: Fix unannoted hardirqs-on in return to userspace slow-path
RISC-V: Fix MEMREMAP_WB for systems with Svpbmt
riscv: Fix crash during early errata patching
crypto: img-hash - Fix variable dereferenced before check 'hdev->req'
hwrng: amd - Fix PCI device refcount leak
hwrng: geode - Fix PCI device refcount leak
IB/IPoIB: Fix queue count inconsistency for PKEY child interfaces
RISC-V: Align the shadow stack
f2fs: fix iostat parameter for discard
riscv: Fix P4D_SHIFT definition for 3-level page table mode
drivers: dio: fix possible memory leak in dio_init()
serial: tegra: Read DMA status before terminating
serial: 8250_bcm7271: Fix error handling in brcmuart_init()
drivers: staging: r8188eu: Fix sleep-in-atomic-context bug in rtw_join_timeout_handler
class: fix possible memory leak in __class_register()
vfio: platform: Do not pass return buffer to ACPI _RST method
vfio/iova_bitmap: Fix PAGE_SIZE unaligned bitmaps
uio: uio_dmem_genirq: Fix missing unlock in irq configuration
uio: uio_dmem_genirq: Fix deadlock between irq config and handling
usb: fotg210-udc: Fix ages old endianness issues
interconnect: qcom: sc7180: fix dropped const of qcom_icc_bcm
staging: vme_user: Fix possible UAF in tsi148_dma_list_add
usb: typec: Check for ops->exit instead of ops->enter in altmode_exit
usb: typec: tcpci: fix of node refcount leak in tcpci_register_port()
usb: typec: tipd: Cleanup resources if devm_tps6598_psy_register fails
usb: typec: tipd: Fix spurious fwnode_handle_put in error path
usb: typec: tipd: Fix typec_unregister_port error paths
usb: musb: omap2430: Fix probe regression for missing resources
extcon: usbc-tusb320: Update state on probe even if no IRQ pending
USB: gadget: Fix use-after-free during usb config switch
serial: amba-pl011: avoid SBSA UART accessing DMACR register
serial: pl011: Do not clear RX FIFO & RX interrupt in unthrottle.
serial: stm32: move dma_request_chan() before clk_prepare_enable()
serial: pch: Fix PCI device refcount leak in pch_request_dma()
serial: altera_uart: fix locking in polling mode
serial: sunsab: Fix error handling in sunsab_init()
habanalabs: fix return value check in hl_fw_get_sec_attest_data()
test_firmware: fix memory leak in test_firmware_init()
misc: ocxl: fix possible name leak in ocxl_file_register_afu()
ocxl: fix pci device refcount leak when calling get_function_0()
misc: tifm: fix possible memory leak in tifm_7xx1_switch_media()
misc: sgi-gru: fix use-after-free error in gru_set_context_option, gru_fault and gru_handle_user_call_os
firmware: raspberrypi: fix possible memory leak in rpi_firmware_probe()
cxl: fix possible null-ptr-deref in cxl_guest_init_afu|adapter()
cxl: fix possible null-ptr-deref in cxl_pci_init_afu|adapter()
iio: temperature: ltc2983: make bulk write buffer DMA-safe
iio: adis: add '__adis_enable_irq()' implementation
counter: stm32-lptimer-cnt: fix the check on arr and cmp registers update
coresight: trbe: remove cpuhp instance node before remove cpuhp state
coresight: cti: Fix null pointer error on CTI init before ETM
tracing/user_events: Fix call print_fmt leak
usb: roles: fix of node refcount leak in usb_role_switch_is_parent()
usb: core: hcd: Fix return value check in usb_hcd_setup_local_mem()
usb: gadget: f_hid: fix f_hidg lifetime vs cdev
usb: gadget: f_hid: fix refcount leak on error path
drivers: mcb: fix resource leak in mcb_probe()
mcb: mcb-parse: fix error handing in chameleon_parse_gdd()
chardev: fix error handling in cdev_device_add()
vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries
i2c: pxa-pci: fix missing pci_disable_device() on error in ce4100_i2c_probe
staging: rtl8192u: Fix use after free in ieee80211_rx()
staging: rtl8192e: Fix potential use-after-free in rtllib_rx_Monitor()
vme: Fix error not catched in fake_init()
gpiolib: cdev: fix NULL-pointer dereferences
gpiolib: protect the GPIO device against being dropped while in use by user-space
i2c: mux: reg: check return value after calling platform_get_resource()
i2c: ismt: Fix an out-of-bounds bug in ismt_access()
usb: storage: Add check for kcalloc
usb: typec: wusb3801: fix fwnode refcount leak in wusb3801_probe()
tracing/hist: Fix issue of losting command info in error_log
ksmbd: Fix resource leak in ksmbd_session_rpc_open()
samples: vfio-mdev: Fix missing pci_disable_device() in mdpy_fb_probe()
thermal/drivers/imx8mm_thermal: Validate temperature range
thermal/drivers/k3_j72xx_bandgap: Fix the debug print message
thermal/of: Fix memory leak on thermal_of_zone_register() failure
thermal/drivers/qcom/temp-alarm: Fix inaccurate warning for gen2
thermal/drivers/qcom/lmh: Fix irq handler return value
fbdev: ssd1307fb: Drop optional dependency
fbdev: pm2fb: fix missing pci_disable_device()
fbdev: via: Fix error in via_core_init()
fbdev: vermilion: decrease reference count in error path
fbdev: ep93xx-fb: Add missing clk_disable_unprepare in ep93xxfb_probe()
fbdev: geode: don't build on UML
fbdev: uvesafb: don't build on UML
fbdev: uvesafb: Fixes an error handling path in uvesafb_probe()
led: qcom-lpg: Fix sleeping in atomic
perf tools: Fix "kernel lock contention analysis" test by not printing warnings in quiet mode
perf stat: Use evsel__is_hybrid() more
perf stat: Move common code in print_metric_headers()
HSI: omap_ssi_core: fix unbalanced pm_runtime_disable()
HSI: omap_ssi_core: fix possible memory leak in ssi_probe()
power: supply: fix residue sysfs file in error handle route of __power_supply_register()
watchdog: iTCO_wdt: Set NO_REBOOT if the watchdog is not already running
perf trace: Return error if a system call doesn't exist
perf trace: Use macro RAW_SYSCALL_ARGS_NUM to replace number
perf trace: Handle failure when trace point folder is missed
perf symbol: correction while adjusting symbol
power: supply: z2_battery: Fix possible memleak in z2_batt_probe()
power: supply: cw2015: Fix potential null-ptr-deref in cw_bat_probe()
HSI: omap_ssi_core: Fix error handling in ssi_init()
power: supply: ab8500: Fix error handling in ab8500_charger_init()
power: supply: Fix refcount leak in rk817_charger_probe
power: supply: bq25890: Factor out regulator registration code
power: supply: bq25890: Convert to i2c's .probe_new()
power: supply: bq25890: Ensure pump_express_work is cancelled on remove
perf branch: Fix interpretation of branch records
power: supply: fix null pointer dereferencing in power_supply_get_battery_info
gfs2: Partially revert gfs2_inode_lookup change
leds: is31fl319x: Fix setting current limit for is31fl319{0,1,3}
perf off_cpu: Fix a typo in BTF tracepoint name, it should be 'btf_trace_sched_switch'
ftrace: Allow WITH_ARGS flavour of graph tracer with shadow call stack
perf stat: Do not delay the workload with --delay
RDMA/siw: Fix pointer cast warning
fs/ntfs3: Avoid UBSAN error on true_sectors_per_clst()
fs/ntfs3: Harden against integer overflows
phy: marvell: phy-mvebu-a3700-comphy: Reset COMPHY registers before USB 3.0 power on
phy: qcom-qmp-pcie: drop bogus register update
dmaengine: idxd: Make max batch size attributes in sysfs invisible for Intel IAA
dmaengine: apple-admac: Allocate cache SRAM to channels
remoteproc: core: Auto select rproc-virtio device id
phy: qcom-qmp-pcie: drop power-down delay config
phy: qcom-qmp-pcie: replace power-down delay
phy: qcom-qmp-pcie: fix sc8180x initialisation
phy: qcom-qmp-pcie: fix ipq8074-gen3 initialisation
phy: qcom-qmp-pcie: fix ipq6018 initialisation
phy: qcom-qmp-usb: clean up power-down handling
phy: qcom-qmp-usb: drop sc8280xp power-down delay
phy: qcom-qmp-usb: drop power-down delay config
phy: qcom-qmp-usb: clean up status polling
phy: qcom-qmp-usb: drop start and pwrdn-ctrl abstraction
phy: qcom-qmp-usb: correct registers layout for IPQ8074 USB3 PHY
iommu/s390: Fix duplicate domain attachments
iommu/sun50i: Fix reset release
iommu/sun50i: Consider all fault sources for reset
iommu/sun50i: Fix R/W permission check
iommu/sun50i: Fix flush size
iommu/sun50i: Implement .iotlb_sync_map
iommu/rockchip: fix permission bits in page table entries v2
dmaengine: idxd: Make read buffer sysfs attributes invisible for Intel IAA
phy: qcom-qmp-usb: fix sc8280xp PCS_USB offset
phy: usb: s2 WoL wakeup_count not incremented for USB->Eth devices
phy: usb: Use slow clock for wake enabled suspend
phy: usb: Fix clock imbalance for suspend/resume
include/uapi/linux/swab: Fix potentially missing __always_inline
pwm: tegra: Improve required rate calculation
pwm: tegra: Ensure the clock rate is not less than needed
phy: qcom-qmp-pcie: split register tables into common and extra parts
phy: qcom-qmp-pcie: split pcs_misc init cfg for ipq8074 pcs table
phy: qcom-qmp-pcie: support separate tables for EP mode
phy: qcom-qmp-pcie: Support SM8450 PCIe1 PHY in EP mode
phy: qcom-qmp-pcie: Fix high latency with 4x2 PHY when ASPM is enabled
phy: qcom-qmp-pcie: Fix sm8450_qmp_gen4x2_pcie_pcs_tbl[] register names
fs/ntfs3: Fix slab-out-of-bounds read in ntfs_trim_fs
dmaengine: idxd: Fix crc_val field for completion record
rtc: rzn1: Check return value in rzn1_rtc_probe
rtc: class: Fix potential memleak in devm_rtc_allocate_device()
rtc: pcf2127: Convert to .probe_new()
rtc: cmos: Call cmos_wake_setup() from cmos_do_probe()
rtc: cmos: Call rtc_wake_setup() from cmos_do_probe()
rtc: cmos: Eliminate forward declarations of some functions
rtc: cmos: Rename ACPI-related functions
rtc: cmos: Disable ACPI RTC event on removal
rtc: snvs: Allow a time difference on clock register read
rtc: pcf85063: Fix reading alarm
iommu/mediatek: Check return value after calling platform_get_resource()
iommu: Avoid races around device probe
iommu/amd: Fix pci device refcount leak in ppr_notifier()
iommu/fsl_pamu: Fix resource leak in fsl_pamu_probe()
macintosh: fix possible memory leak in macio_add_one_device()
macintosh/macio-adb: check the return value of ioremap()
powerpc/52xx: Fix a resource leak in an error handling path
cxl: Fix refcount leak in cxl_calc_capp_routing
powerpc/xmon: Fix -Wswitch-unreachable warning in bpt_cmds
powerpc/xive: add missing iounmap() in error path in xive_spapr_populate_irq_data()
powerpc/pseries: fix the object owners enum value in plpks driver
powerpc/pseries: Fix the H_CALL error code in PLPKS driver
powerpc/pseries: Return -EIO instead of -EINTR for H_ABORTED error
powerpc/pseries: fix plpks_read_var() code for different consumers
kprobes: Fix check for probe enabled in kill_kprobe()
powerpc: dts: turris1x.dts: Add channel labels for temperature sensor
powerpc/perf: callchain validate kernel stack pointer bounds
powerpc/83xx/mpc832x_rdb: call platform_device_put() in error case in of_fsl_spi_probe()
powerpc/hv-gpci: Fix hv_gpci event list
selftests/powerpc: Fix resource leaks
iommu/mediatek: Add platform_device_put for recovering the device refcnt
iommu/mediatek: Use component_match_add
iommu/mediatek: Add error path for loop of mm_dts_parse
iommu/mediatek: Validate number of phandles associated with "mediatek,larbs"
iommu/sun50i: Remove IOMMU_DOMAIN_IDENTITY
pwm: sifive: Call pwm_sifive_update_clock() while mutex is held
pwm: mtk-disp: Fix the parameters calculated by the enabled flag of disp_pwm
pwm: mediatek: always use bus clock for PWM on MT7622
RISC-V: KVM: Fix reg_val check in kvm_riscv_vcpu_set_reg_config()
remoteproc: sysmon: fix memory leak in qcom_add_sysmon_subdev()
remoteproc: qcom: q6v5: Fix potential null-ptr-deref in q6v5_wcss_init_mmio()
remoteproc: qcom_q6v5_pas: disable wakeup on probe fail or remove
remoteproc: qcom_q6v5_pas: detach power domains on remove
remoteproc: qcom_q6v5_pas: Fix missing of_node_put() in adsp_alloc_memory_region()
remoteproc: qcom: q6v5: Fix missing clk_disable_unprepare() in q6v5_wcss_qcs404_power_on()
powerpc/pseries/eeh: use correct API for error log size
dt-bindings: mfd: qcom,spmi-pmic: Drop PWM reg dependency
mfd: axp20x: Do not sleep in the power off handler
mfd: bd957x: Fix Kconfig dependency on REGMAP_IRQ
mfd: qcom_rpm: Fix an error handling path in qcom_rpm_probe()
mfd: pm8008: Fix return value check in pm8008_probe()
netfilter: flowtable: really fix NAT IPv6 offload
rtc: st-lpc: Add missing clk_disable_unprepare in st_rtc_probe()
rtc: pic32: Move devm_rtc_allocate_device earlier in pic32_rtc_probe()
rtc: pcf85063: fix pcf85063_clkout_control
iommu/mediatek: Fix forever loop in error handling
nfsd: under NFSv4.1, fix double svc_xprt_put on rpc_create failure
net: macsec: fix net device access prior to holding a lock
bonding: add missed __rcu annotation for curr_active_slave
bonding: do failover when high prio link up
mISDN: hfcsusb: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave()
mISDN: hfcpci: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave()
mISDN: hfcmulti: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave()
block, bfq: fix possible uaf for 'bfqq->bic'
selftests/bpf: Select CONFIG_FUNCTION_ERROR_INJECTION
bpf: prevent leak of lsm program after failed attach
media: v4l2-ctrls-api.c: add back dropped ctrl->is_new = 1
net: enetc: avoid buffer leaks on xdp_do_redirect() failure
nfc: pn533: Clear nfc_target before being used
unix: Fix race in SOCK_SEQPACKET's unix_dgram_sendmsg()
r6040: Fix kmemleak in probe and remove
net: dsa: mv88e6xxx: avoid reg_lock deadlock in mv88e6xxx_setup_port()
igc: Enhance Qbv scheduling by using first flag bit
igc: Use strict cycles for Qbv scheduling
igc: Add checking for basetime less than zero
igc: allow BaseTime 0 enrollment for Qbv
igc: recalculate Qbv end_time by considering cycle time
igc: Set Qbv start_time and end_time to end_time if not being configured in GCL
rtc: mxc_v2: Add missing clk_disable_unprepare()
devlink: hold region lock when flushing snapshots
selftests: devlink: fix the fd redirect in dummy_reporter_test
openvswitch: Fix flow lookup to use unmasked key
soc: mediatek: pm-domains: Fix the power glitch issue
arm64: dts: mt8183: Fix Mali GPU clock
devlink: protect devlink dump by the instance lock
skbuff: Account for tail adjustment during pull operations
mailbox: mpfs: read the system controller's status
mailbox: arm_mhuv2: Fix return value check in mhuv2_probe()
mailbox: zynq-ipi: fix error handling while device_register() fails
net_sched: reject TCF_EM_SIMPLE case for complex ematch module
rxrpc: Fix missing unlock in rxrpc_do_sendmsg()
myri10ge: Fix an error handling path in myri10ge_probe()
net: stream: purge sk_error_queue in sk_stream_kill_queues()
mctp: serial: Fix starting value for frame check sequence
cifs: don't leak -ENOMEM in smb2_open_file()
net: dsa: microchip: remove IRQF_TRIGGER_FALLING in request_threaded_irq
mctp: Remove device type check at unregister
HID: amd_sfh: Add missing check for dma_alloc_coherent
net: fec: check the return value of build_skb()
rcu: Fix __this_cpu_read() lockdep warning in rcu_force_quiescent_state()
arm64: make is_ttbrX_addr() noinstr-safe
ARM: dts: aspeed: rainier,everest: Move reserved memory regions
video: hyperv_fb: Avoid taking busy spinlock on panic path
x86/hyperv: Remove unregister syscore call from Hyper-V cleanup
binfmt_misc: fix shift-out-of-bounds in check_special_flags
arm64: dts: qcom: sm8450: disable SDHCI SDR104/SDR50 on all boards
arm64: dts: qcom: sm6350: Add apps_smmu with streamID to SDHCI 1/2 nodes
fs: jfs: fix shift-out-of-bounds in dbAllocAG
udf: Avoid double brelse() in udf_rename()
jfs: Fix fortify moan in symlink
fs: jfs: fix shift-out-of-bounds in dbDiscardAG
ACPI: processor: idle: Check acpi_fetch_acpi_dev() return value
ACPI: EC: Add quirk for the HP Pavilion Gaming 15-cx0041ur
ACPICA: Fix error code path in acpi_ds_call_control_method()
thermal/core: Ensure that thermal device is registered in thermal_zone_get_temp
ACPI: video: Change GIGABYTE GB-BXBT-2807 quirk to force_none
ACPI: video: Change Sony Vaio VPCEH3U1E quirk to force_native
ACPI: video: Add force_vendor quirk for Sony Vaio PCG-FRV35
ACPI: video: Add force_native quirk for Sony Vaio VPCY11S1E
nilfs2: fix shift-out-of-bounds/overflow in nilfs_sb2_bad_offset()
nilfs2: fix shift-out-of-bounds due to too large exponent of block size
acct: fix potential integer overflow in encode_comp_t()
x86/apic: Handle no CONFIG_X86_X2APIC on systems with x2APIC enabled by BIOS
ACPI: x86: Add skip i2c clients quirk for Lenovo Yoga Tab 3 Pro (YT3-X90F)
btrfs: do not panic if we can't allocate a prealloc extent state
ACPI: x86: Add skip i2c clients quirk for Medion Lifetab S10346
hfs: fix OOB Read in __hfs_brec_find
drm/etnaviv: add missing quirks for GC300
media: imx-jpeg: Disable useless interrupt to avoid kernel panic
brcmfmac: return error when getting invalid max_flowrings from dongle
wifi: ath9k: verify the expected usb_endpoints are present
wifi: ar5523: Fix use-after-free on ar5523_cmd() timed out
ASoC: codecs: rt298: Add quirk for KBL-R RVP platform
ASoC: Intel: avs: Add quirk for KBL-R RVP platform
ipmi: fix memleak when unload ipmi driver
wifi: ath10k: Delay the unmapping of the buffer
openvswitch: Use kmalloc_size_roundup() to match ksize() usage
bnx2: Use kmalloc_size_roundup() to match ksize() usage
drm/amd/display: skip commit minimal transition state
drm/amd/display: prevent memory leak
drm/edid: add a quirk for two LG monitors to get them to work on 10bpc
Revert "drm/amd/display: Limit max DSC target bpp for specific monitors"
drm/rockchip: use pm_runtime_resume_and_get() instead of pm_runtime_get_sync()
blk-mq: avoid double ->queue_rq() because of early timeout
HID: apple: fix key translations where multiple quirks attempt to translate the same key
HID: apple: enable APPLE_ISO_TILDE_QUIRK for the keyboards of Macs with the T2 chip
wifi: ath11k: Fix qmi_msg_handler data structure initialization
qed (gcc13): use u16 for fid to be big enough
drm/meson: Fix return type of meson_encoder_cvbs_mode_valid()
bpf: make sure skb->len != 0 when redirecting to a tunneling device
net: ethernet: ti: Fix return type of netcp_ndo_start_xmit()
hamradio: baycom_epp: Fix return type of baycom_send_packet()
wifi: brcmfmac: Fix potential shift-out-of-bounds in brcmf_fw_alloc_request()
wifi: brcmfmac: Fix potential NULL pointer dereference in 'brcmf_c_preinit_dcmds()'
HID: input: do not query XP-PEN Deco LW battery
HID: uclogic: Add support for XP-PEN Deco LW
igb: Do not free q_vector unless new one was allocated
drm/amdgpu: Fix type of second parameter in trans_msg() callback
drm/amdgpu: Fix type of second parameter in odn_edit_dpm_table() callback
s390/ctcm: Fix return type of ctc{mp,}m_tx()
s390/netiucv: Fix return type of netiucv_tx()
s390/lcs: Fix return type of lcs_start_xmit()
drm/amd/display: Use min transition for SubVP into MPO
drm/amd/display: Disable DRR actions during state commit
drm/msm: Use drm_mode_copy()
drm/rockchip: Use drm_mode_copy()
drm/sti: Use drm_mode_copy()
drm/mediatek: Fix return type of mtk_hdmi_bridge_mode_valid()
drivers/md/md-bitmap: check the return value of md_bitmap_get_counter()
md/raid0, raid10: Don't set discard sectors for request queue
md/raid1: stop mdx_raid1 thread when raid1 array run failed
drm/amd/display: Workaround to increase phantom pipe vactive in pipesplit
drm/amd/display: fix array index out of bound error in bios parser
nvme-auth: don't override ctrl keys before validation
net: add atomic_long_t to net_device_stats fields
ipv6/sit: use DEV_STATS_INC() to avoid data-races
mrp: introduce active flags to prevent UAF when applicant uninit
net: ethernet: mtk_eth_soc: drop packets to WDMA if the ring is full
bpf/verifier: Use kmalloc_size_roundup() to match ksize() usage
ppp: associate skb with a device at tx
drm/amd/display: Fix display corruption w/ VSR enable
bpf: Fix a BTF_ID_LIST bug with CONFIG_DEBUG_INFO_BTF not set
bpf: Prevent decl_tag from being referenced in func_proto arg
ethtool: avoiding integer overflow in ethtool_phys_id()
media: dvb-frontends: fix leak of memory fw
media: dvbdev: adopts refcnt to avoid UAF
media: dvb-usb: fix memory leak in dvb_usb_adapter_init()
media: mediatek: vcodec: Can't set dst buffer to done when lat decode error
blk-mq: fix possible memleak when register 'hctx' failed
ALSA: usb-audio: Add quirk for Tascam Model 12
drm/amdgpu: Fix potential double free and null pointer dereference
drm/amd/display: Use the largest vready_offset in pipe group
drm/amd/display: Fix DTBCLK disable requests and SRC_SEL programming
ASoC: amd: yc: Add Xiaomi Redmi Book Pro 14 2022 into DMI table
libbpf: Avoid enum forward-declarations in public API in C++ mode
regulator: core: fix use_count leakage when handling boot-on
wifi: mt76: do not run mt76u_status_worker if the device is not running
hwmon: (nct6775) add ASUS CROSSHAIR VIII/TUF/ProArt B550M
selftests/bpf: Fix conflicts with built-in functions in bpf_iter_ksym
nfs: fix possible null-ptr-deref when parsing param
mmc: f-sdh30: Add quirks for broken timeout clock capability
mmc: renesas_sdhi: add quirk for broken register layout
mmc: renesas_sdhi: better reset from HS400 mode
mmc: sdhci-tegra: Issue CMD and DAT resets together
media: si470x: Fix use-after-free in si470x_int_in_callback()
clk: st: Fix memory leak in st_of_quadfs_setup()
regulator: core: Use different devices for resource allocation and DT lookup
ice: synchronize the misc IRQ when tearing down Tx tracker
Bluetooth: hci_bcm: Add CYW4373A0 support
Bluetooth: Add quirk to disable extended scanning
Bluetooth: Add quirk to disable MWS Transport Configuration
regulator: core: Fix resolve supply lookup issue
crypto: hisilicon/hpre - fix resource leak in remove process
scsi: lpfc: Fix hard lockup when reading the rx_monitor from debugfs
scsi: ufs: Reduce the START STOP UNIT timeout
crypto: hisilicon/qm - increase the memory of local variables
Revert "PCI: Clear PCI_STATUS when setting up device"
scsi: elx: libefc: Fix second parameter type in state callbacks
hugetlbfs: fix null-ptr-deref in hugetlbfs_parse_param()
scsi: smartpqi: Add new controller PCI IDs
scsi: smartpqi: Correct device removal for multi-actuator devices
drm/fsl-dcu: Fix return type of fsl_dcu_drm_connector_mode_valid()
drm/sti: Fix return type of sti_{dvo,hda,hdmi}_connector_mode_valid()
scsi: target: iscsi: Fix a race condition between login_work and the login thread
orangefs: Fix kmemleak in orangefs_prepare_debugfs_help_string()
orangefs: Fix kmemleak in orangefs_sysfs_init()
orangefs: Fix kmemleak in orangefs_{kernel,client}_debug_init()
hwmon: (jc42) Fix missing unlock on error in jc42_write()
ASoC: sof_es8336: fix possible use-after-free in sof_es8336_remove()
ASoC: Intel: Skylake: Fix driver hang during shutdown
ASoC: mediatek: mt8173-rt5650-rt5514: fix refcount leak in mt8173_rt5650_rt5514_dev_probe()
ASoC: audio-graph-card: fix refcount leak of cpu_ep in __graph_for_each_link()
ASoC: rockchip: pdm: Add missing clk_disable_unprepare() in rockchip_pdm_runtime_resume()
ASoC: mediatek: mt8183: fix refcount leak in mt8183_mt6358_ts3a227_max98357_dev_probe()
ALSA: hda/hdmi: fix i915 silent stream programming flow
ALSA: hda/hdmi: set default audio parameters for KAE silent-stream
ALSA: hda/hdmi: fix stream-id config keep-alive for rt suspend
ASoC: wm8994: Fix potential deadlock
ASoC: rockchip: spdif: Add missing clk_disable_unprepare() in rk_spdif_runtime_resume()
ASoC: rt5670: Remove unbalanced pm_runtime_put()
drm/i915/display: Don't disable DDI/Transcoder when setting phy test pattern
LoadPin: Ignore the "contents" argument of the LSM hooks
lkdtm: cfi: Make PAC test work with GCC 7 and 8
pstore: Switch pmsg_lock to an rt_mutex to avoid priority inversion
drm/amd/pm: avoid large variable on kernel stack
perf debug: Set debug_peo_args and redirect_to_stderr variable to correct values in perf_quiet_option()
perf tools: Make quiet mode consistent between tools
perf probe: Check -v and -q options in the right place
MIPS: ralink: mt7621: avoid to init common ralink reset controller
perf test: Fix "all PMU test" to skip parametrized events
afs: Fix lost servers_outstanding count
cfi: Fix CFI failure with KASAN
pstore: Make sure CONFIG_PSTORE_PMSG selects CONFIG_RT_MUTEXES
ima: Simplify ima_lsm_copy_rule
Input: iqs7222 - drop unused device node references
Input: iqs7222 - report malformed properties
Input: iqs7222 - add support for IQS7222A v1.13+
dt-bindings: input: iqs7222: Reduce 'linux,code' to optional
dt-bindings: input: iqs7222: Correct minimum slider size
dt-bindings: input: iqs7222: Add support for IQS7222A v1.13+
ALSA: usb-audio: Workaround for XRUN at prepare
ALSA: usb-audio: add the quirk for KT0206 device
ALSA: hda/realtek: Add quirk for Lenovo TianYi510Pro-14IOB
ALSA: hda/hdmi: Add HP Device 0x8711 to force connect list
HID: logitech-hidpp: Guard FF init code against non-USB devices
usb: cdnsp: fix lack of ZLP for ep0
usb: xhci-mtk: fix leakage of shared hcd when fail to set wakeup irq
arm64: dts: qcom: sm6350: fix USB-DP PHY registers
arm64: dts: qcom: sm8250: fix USB-DP PHY registers
dt-bindings: clocks: imx8mp: Add ID for usb suspend clock
clk: imx: imx8mp: add shared clk gate for usb suspend clk
usb: dwc3: Fix race between dwc3_set_mode and __dwc3_set_mode
usb: dwc3: core: defer probe on ulpi_read_id timeout
usb: dwc3: qcom: Fix memory leak in dwc3_qcom_interconnect_init
xhci: Prevent infinite loop in transaction errors recovery for streams
HID: wacom: Ensure bootloader PID is usable in hidraw mode
HID: mcp2221: don't connect hidraw
loop: Fix the max_loop commandline argument treatment when it is set to 0
9p: set req refcount to zero to avoid uninitialized usage
security: Restrict CONFIG_ZERO_CALL_USED_REGS to gcc or clang > 15.0.6
reiserfs: Add missing calls to reiserfs_security_free()
iio: fix memory leak in iio_device_register_eventset()
iio: adc: ad_sigma_delta: do not use internal iio_dev lock
iio: adc128s052: add proper .data members in adc128_of_match table
iio: addac: ad74413r: fix integer promotion bug in ad74413_get_input_current_offset()
regulator: core: fix deadlock on regulator enable
spi: fsl_spi: Don't change speed while chipselect is active
floppy: Fix memory leak in do_floppy_init()
gcov: add support for checksum field
test_maple_tree: add test for mas_spanning_rebalance() on insufficient data
maple_tree: fix mas_spanning_rebalance() on insufficient data
fbdev: fbcon: release buffer when fbcon_do_set_font() failed
ovl: fix use inode directly in rcu-walk mode
btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range
mm/gup: disallow FOLL_FORCE|FOLL_WRITE on hugetlb mappings
scsi: qla2xxx: Fix crash when I/O abort times out
blk-iolatency: Fix memory leak on add_disk() failures
io_uring/net: introduce IORING_SEND_ZC_REPORT_USAGE flag
io_uring: add completion locking for iopoll
io_uring: dont remove file from msg_ring reqs
io_uring: improve io_double_lock_ctx fail handling
io_uring/net: ensure compat import handlers clear free_iov
io_uring/net: fix cleanup after recycle
io_uring: protect cq_timeouts with timeout_lock
io_uring: remove iopoll spinlock
net: stmmac: fix errno when create_singlethread_workqueue() fails
media: dvbdev: fix build warning due to comments
media: dvbdev: fix refcnt bug
drm/amd/display: revert Disable DRR actions during state commit
mfd: qcom_rpm: Use devm_of_platform_populate() to simplify code
pwm: tegra: Fix 32 bit build
Linux 6.1.2
Change-Id: I8f7c080f3b8288ed319fc0e25aaefb7ad5cd6b84
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
3327 lines
80 KiB
C
3327 lines
80 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* linux/kernel/fork.c
|
|
*
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
*/
|
|
|
|
/*
|
|
* 'fork.c' contains the help-routines for the 'fork' system call
|
|
* (see also entry.S and others).
|
|
* Fork is rather simple, once you get the hang of it, but the memory
|
|
* management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
|
|
*/
|
|
|
|
#include <linux/anon_inodes.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched/autogroup.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/sched/coredump.h>
|
|
#include <linux/sched/user.h>
|
|
#include <linux/sched/numa_balancing.h>
|
|
#include <linux/sched/stat.h>
|
|
#include <linux/sched/task.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/sched/cputime.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/rtmutex.h>
|
|
#include <linux/init.h>
|
|
#include <linux/unistd.h>
|
|
#include <linux/module.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/completion.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/sem.h>
|
|
#include <linux/file.h>
|
|
#include <linux/fdtable.h>
|
|
#include <linux/iocontext.h>
|
|
#include <linux/key.h>
|
|
#include <linux/kmsan.h>
|
|
#include <linux/binfmts.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/mmu_notifier.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/mm_inline.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/capability.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/cgroup.h>
|
|
#include <linux/security.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/seccomp.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/futex.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/task_io_accounting_ops.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/audit.h>
|
|
#include <linux/memcontrol.h>
|
|
#include <linux/ftrace.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/profile.h>
|
|
#include <linux/rmap.h>
|
|
#include <linux/ksm.h>
|
|
#include <linux/acct.h>
|
|
#include <linux/userfaultfd_k.h>
|
|
#include <linux/tsacct_kern.h>
|
|
#include <linux/cn_proc.h>
|
|
#include <linux/freezer.h>
|
|
#include <linux/delayacct.h>
|
|
#include <linux/taskstats_kern.h>
|
|
#include <linux/random.h>
|
|
#include <linux/tty.h>
|
|
#include <linux/fs_struct.h>
|
|
#include <linux/magic.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/posix-timers.h>
|
|
#include <linux/user-return-notifier.h>
|
|
#include <linux/oom.h>
|
|
#include <linux/khugepaged.h>
|
|
#include <linux/signalfd.h>
|
|
#include <linux/uprobes.h>
|
|
#include <linux/aio.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/kcov.h>
|
|
#include <linux/livepatch.h>
|
|
#include <linux/thread_info.h>
|
|
#include <linux/stackleak.h>
|
|
#include <linux/kasan.h>
|
|
#include <linux/scs.h>
|
|
#include <linux/io_uring.h>
|
|
#include <linux/bpf.h>
|
|
#include <linux/cpufreq_times.h>
|
|
|
|
#include <asm/pgalloc.h>
|
|
#include <linux/uaccess.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include <trace/events/sched.h>
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#include <trace/events/task.h>
|
|
|
|
#undef CREATE_TRACE_POINTS
|
|
#include <trace/hooks/sched.h>
|
|
/*
|
|
* Minimum number of threads to boot the kernel
|
|
*/
|
|
#define MIN_THREADS 20
|
|
|
|
/*
|
|
* Maximum number of threads
|
|
*/
|
|
#define MAX_THREADS FUTEX_TID_MASK
|
|
|
|
EXPORT_TRACEPOINT_SYMBOL_GPL(task_newtask);
|
|
|
|
/*
|
|
* Protected counters by write_lock_irq(&tasklist_lock)
|
|
*/
|
|
unsigned long total_forks; /* Handle normal Linux uptimes. */
|
|
int nr_threads; /* The idle threads do not count.. */
|
|
|
|
static int max_threads; /* tunable limit on nr_threads */
|
|
|
|
#define NAMED_ARRAY_INDEX(x) [x] = __stringify(x)
|
|
|
|
static const char * const resident_page_types[] = {
|
|
NAMED_ARRAY_INDEX(MM_FILEPAGES),
|
|
NAMED_ARRAY_INDEX(MM_ANONPAGES),
|
|
NAMED_ARRAY_INDEX(MM_SWAPENTS),
|
|
NAMED_ARRAY_INDEX(MM_SHMEMPAGES),
|
|
};
|
|
|
|
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
|
|
|
|
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
|
|
EXPORT_SYMBOL_GPL(tasklist_lock);
|
|
|
|
#ifdef CONFIG_PROVE_RCU
|
|
int lockdep_tasklist_lock_is_held(void)
|
|
{
|
|
return lockdep_is_held(&tasklist_lock);
|
|
}
|
|
EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
|
|
#endif /* #ifdef CONFIG_PROVE_RCU */
|
|
|
|
int nr_processes(void)
|
|
{
|
|
int cpu;
|
|
int total = 0;
|
|
|
|
for_each_possible_cpu(cpu)
|
|
total += per_cpu(process_counts, cpu);
|
|
|
|
return total;
|
|
}
|
|
|
|
void __weak arch_release_task_struct(struct task_struct *tsk)
|
|
{
|
|
}
|
|
|
|
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
|
|
static struct kmem_cache *task_struct_cachep;
|
|
|
|
static inline struct task_struct *alloc_task_struct_node(int node)
|
|
{
|
|
return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
|
|
}
|
|
|
|
static inline void free_task_struct(struct task_struct *tsk)
|
|
{
|
|
kmem_cache_free(task_struct_cachep, tsk);
|
|
}
|
|
#endif
|
|
|
|
#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
|
|
|
|
/*
|
|
* Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
|
|
* kmemcache based allocator.
|
|
*/
|
|
# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
|
|
|
|
# ifdef CONFIG_VMAP_STACK
|
|
/*
|
|
* vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
|
|
* flush. Try to minimize the number of calls by caching stacks.
|
|
*/
|
|
#define NR_CACHED_STACKS 2
|
|
static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
|
|
|
|
struct vm_stack {
|
|
struct rcu_head rcu;
|
|
struct vm_struct *stack_vm_area;
|
|
};
|
|
|
|
static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < NR_CACHED_STACKS; i++) {
|
|
if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
|
|
continue;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void thread_stack_free_rcu(struct rcu_head *rh)
|
|
{
|
|
struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
|
|
|
|
if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
|
|
return;
|
|
|
|
vfree(vm_stack);
|
|
}
|
|
|
|
static void thread_stack_delayed_free(struct task_struct *tsk)
|
|
{
|
|
struct vm_stack *vm_stack = tsk->stack;
|
|
|
|
vm_stack->stack_vm_area = tsk->stack_vm_area;
|
|
call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
|
|
}
|
|
|
|
static int free_vm_stack_cache(unsigned int cpu)
|
|
{
|
|
struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
|
|
int i;
|
|
|
|
for (i = 0; i < NR_CACHED_STACKS; i++) {
|
|
struct vm_struct *vm_stack = cached_vm_stacks[i];
|
|
|
|
if (!vm_stack)
|
|
continue;
|
|
|
|
vfree(vm_stack->addr);
|
|
cached_vm_stacks[i] = NULL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int memcg_charge_kernel_stack(struct vm_struct *vm)
|
|
{
|
|
int i;
|
|
int ret;
|
|
|
|
BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
|
|
BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
|
|
|
|
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
|
|
ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
|
|
if (ret)
|
|
goto err;
|
|
}
|
|
return 0;
|
|
err:
|
|
/*
|
|
* If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
|
|
* NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
|
|
* ignore this page.
|
|
*/
|
|
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
|
|
memcg_kmem_uncharge_page(vm->pages[i], 0);
|
|
return ret;
|
|
}
|
|
|
|
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
|
|
{
|
|
struct vm_struct *vm;
|
|
void *stack;
|
|
int i;
|
|
|
|
for (i = 0; i < NR_CACHED_STACKS; i++) {
|
|
struct vm_struct *s;
|
|
|
|
s = this_cpu_xchg(cached_stacks[i], NULL);
|
|
|
|
if (!s)
|
|
continue;
|
|
|
|
/* Reset stack metadata. */
|
|
kasan_unpoison_range(s->addr, THREAD_SIZE);
|
|
|
|
stack = kasan_reset_tag(s->addr);
|
|
|
|
/* Clear stale pointers from reused stack. */
|
|
memset(stack, 0, THREAD_SIZE);
|
|
|
|
if (memcg_charge_kernel_stack(s)) {
|
|
vfree(s->addr);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
tsk->stack_vm_area = s;
|
|
tsk->stack = stack;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Allocated stacks are cached and later reused by new threads,
|
|
* so memcg accounting is performed manually on assigning/releasing
|
|
* stacks to tasks. Drop __GFP_ACCOUNT.
|
|
*/
|
|
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
|
|
VMALLOC_START, VMALLOC_END,
|
|
THREADINFO_GFP & ~__GFP_ACCOUNT,
|
|
PAGE_KERNEL,
|
|
0, node, __builtin_return_address(0));
|
|
if (!stack)
|
|
return -ENOMEM;
|
|
|
|
vm = find_vm_area(stack);
|
|
if (memcg_charge_kernel_stack(vm)) {
|
|
vfree(stack);
|
|
return -ENOMEM;
|
|
}
|
|
/*
|
|
* We can't call find_vm_area() in interrupt context, and
|
|
* free_thread_stack() can be called in interrupt context,
|
|
* so cache the vm_struct.
|
|
*/
|
|
tsk->stack_vm_area = vm;
|
|
stack = kasan_reset_tag(stack);
|
|
tsk->stack = stack;
|
|
return 0;
|
|
}
|
|
|
|
static void free_thread_stack(struct task_struct *tsk)
|
|
{
|
|
if (!try_release_thread_stack_to_cache(tsk->stack_vm_area))
|
|
thread_stack_delayed_free(tsk);
|
|
|
|
tsk->stack = NULL;
|
|
tsk->stack_vm_area = NULL;
|
|
}
|
|
|
|
# else /* !CONFIG_VMAP_STACK */
|
|
|
|
static void thread_stack_free_rcu(struct rcu_head *rh)
|
|
{
|
|
__free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
|
|
}
|
|
|
|
static void thread_stack_delayed_free(struct task_struct *tsk)
|
|
{
|
|
struct rcu_head *rh = tsk->stack;
|
|
|
|
call_rcu(rh, thread_stack_free_rcu);
|
|
}
|
|
|
|
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
|
|
{
|
|
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
|
|
THREAD_SIZE_ORDER);
|
|
|
|
if (likely(page)) {
|
|
tsk->stack = kasan_reset_tag(page_address(page));
|
|
return 0;
|
|
}
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static void free_thread_stack(struct task_struct *tsk)
|
|
{
|
|
thread_stack_delayed_free(tsk);
|
|
tsk->stack = NULL;
|
|
}
|
|
|
|
# endif /* CONFIG_VMAP_STACK */
|
|
# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
|
|
|
|
static struct kmem_cache *thread_stack_cache;
|
|
|
|
static void thread_stack_free_rcu(struct rcu_head *rh)
|
|
{
|
|
kmem_cache_free(thread_stack_cache, rh);
|
|
}
|
|
|
|
static void thread_stack_delayed_free(struct task_struct *tsk)
|
|
{
|
|
struct rcu_head *rh = tsk->stack;
|
|
|
|
call_rcu(rh, thread_stack_free_rcu);
|
|
}
|
|
|
|
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
|
|
{
|
|
unsigned long *stack;
|
|
stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
|
|
stack = kasan_reset_tag(stack);
|
|
tsk->stack = stack;
|
|
return stack ? 0 : -ENOMEM;
|
|
}
|
|
|
|
static void free_thread_stack(struct task_struct *tsk)
|
|
{
|
|
thread_stack_delayed_free(tsk);
|
|
tsk->stack = NULL;
|
|
}
|
|
|
|
void thread_stack_cache_init(void)
|
|
{
|
|
thread_stack_cache = kmem_cache_create_usercopy("thread_stack",
|
|
THREAD_SIZE, THREAD_SIZE, 0, 0,
|
|
THREAD_SIZE, NULL);
|
|
BUG_ON(thread_stack_cache == NULL);
|
|
}
|
|
|
|
# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
|
|
#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
|
|
|
|
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
|
|
{
|
|
unsigned long *stack;
|
|
|
|
stack = arch_alloc_thread_stack_node(tsk, node);
|
|
tsk->stack = stack;
|
|
return stack ? 0 : -ENOMEM;
|
|
}
|
|
|
|
static void free_thread_stack(struct task_struct *tsk)
|
|
{
|
|
arch_free_thread_stack(tsk);
|
|
tsk->stack = NULL;
|
|
}
|
|
|
|
#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
|
|
|
|
/* SLAB cache for signal_struct structures (tsk->signal) */
|
|
static struct kmem_cache *signal_cachep;
|
|
|
|
/* SLAB cache for sighand_struct structures (tsk->sighand) */
|
|
struct kmem_cache *sighand_cachep;
|
|
|
|
/* SLAB cache for files_struct structures (tsk->files) */
|
|
struct kmem_cache *files_cachep;
|
|
|
|
/* SLAB cache for fs_struct structures (tsk->fs) */
|
|
struct kmem_cache *fs_cachep;
|
|
|
|
/* SLAB cache for vm_area_struct structures */
|
|
static struct kmem_cache *vm_area_cachep;
|
|
|
|
/* SLAB cache for mm_struct structures (tsk->mm) */
|
|
static struct kmem_cache *mm_cachep;
|
|
|
|
struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
|
|
{
|
|
struct vm_area_struct *vma;
|
|
|
|
vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
|
|
if (vma)
|
|
vma_init(vma, mm);
|
|
return vma;
|
|
}
|
|
|
|
struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
|
|
{
|
|
struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
|
|
|
|
if (new) {
|
|
ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
|
|
ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
|
|
/*
|
|
* orig->shared.rb may be modified concurrently, but the clone
|
|
* will be reinitialized.
|
|
*/
|
|
*new = data_race(*orig);
|
|
INIT_LIST_HEAD(&new->anon_vma_chain);
|
|
dup_anon_vma_name(orig, new);
|
|
}
|
|
return new;
|
|
}
|
|
|
|
void vm_area_free(struct vm_area_struct *vma)
|
|
{
|
|
free_anon_vma_name(vma);
|
|
kmem_cache_free(vm_area_cachep, vma);
|
|
}
|
|
|
|
static void account_kernel_stack(struct task_struct *tsk, int account)
|
|
{
|
|
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
|
struct vm_struct *vm = task_stack_vm_area(tsk);
|
|
int i;
|
|
|
|
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
|
|
mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
|
|
account * (PAGE_SIZE / 1024));
|
|
} else {
|
|
void *stack = task_stack_page(tsk);
|
|
|
|
/* All stack pages are in the same node. */
|
|
mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
|
|
account * (THREAD_SIZE / 1024));
|
|
}
|
|
}
|
|
|
|
void exit_task_stack_account(struct task_struct *tsk)
|
|
{
|
|
account_kernel_stack(tsk, -1);
|
|
|
|
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
|
struct vm_struct *vm;
|
|
int i;
|
|
|
|
vm = task_stack_vm_area(tsk);
|
|
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
|
|
memcg_kmem_uncharge_page(vm->pages[i], 0);
|
|
}
|
|
}
|
|
|
|
static void release_task_stack(struct task_struct *tsk)
|
|
{
|
|
if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
|
|
return; /* Better to leak the stack than to free prematurely */
|
|
|
|
free_thread_stack(tsk);
|
|
}
|
|
|
|
#ifdef CONFIG_THREAD_INFO_IN_TASK
|
|
void put_task_stack(struct task_struct *tsk)
|
|
{
|
|
if (refcount_dec_and_test(&tsk->stack_refcount))
|
|
release_task_stack(tsk);
|
|
}
|
|
#endif
|
|
|
|
void free_task(struct task_struct *tsk)
|
|
{
|
|
#ifdef CONFIG_SECCOMP
|
|
WARN_ON_ONCE(tsk->seccomp.filter);
|
|
#endif
|
|
cpufreq_task_times_exit(tsk);
|
|
release_user_cpus_ptr(tsk);
|
|
scs_release(tsk);
|
|
|
|
trace_android_vh_free_task(tsk);
|
|
#ifndef CONFIG_THREAD_INFO_IN_TASK
|
|
/*
|
|
* The task is finally done with both the stack and thread_info,
|
|
* so free both.
|
|
*/
|
|
release_task_stack(tsk);
|
|
#else
|
|
/*
|
|
* If the task had a separate stack allocation, it should be gone
|
|
* by now.
|
|
*/
|
|
WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0);
|
|
#endif
|
|
rt_mutex_debug_task_free(tsk);
|
|
ftrace_graph_exit_task(tsk);
|
|
arch_release_task_struct(tsk);
|
|
if (tsk->flags & PF_KTHREAD)
|
|
free_kthread_struct(tsk);
|
|
free_task_struct(tsk);
|
|
}
|
|
EXPORT_SYMBOL(free_task);
|
|
|
|
static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
|
|
{
|
|
struct file *exe_file;
|
|
|
|
exe_file = get_mm_exe_file(oldmm);
|
|
RCU_INIT_POINTER(mm->exe_file, exe_file);
|
|
/*
|
|
* We depend on the oldmm having properly denied write access to the
|
|
* exe_file already.
|
|
*/
|
|
if (exe_file && deny_write_access(exe_file))
|
|
pr_warn_once("deny_write_access() failed in %s\n", __func__);
|
|
}
|
|
|
|
#ifdef CONFIG_MMU
|
|
static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
|
struct mm_struct *oldmm)
|
|
{
|
|
struct vm_area_struct *mpnt, *tmp;
|
|
int retval;
|
|
unsigned long charge = 0;
|
|
LIST_HEAD(uf);
|
|
MA_STATE(old_mas, &oldmm->mm_mt, 0, 0);
|
|
MA_STATE(mas, &mm->mm_mt, 0, 0);
|
|
|
|
uprobe_start_dup_mmap();
|
|
if (mmap_write_lock_killable(oldmm)) {
|
|
retval = -EINTR;
|
|
goto fail_uprobe_end;
|
|
}
|
|
flush_cache_dup_mm(oldmm);
|
|
uprobe_dup_mmap(oldmm, mm);
|
|
/*
|
|
* Not linked in yet - no deadlock potential:
|
|
*/
|
|
mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);
|
|
|
|
/* No ordering required: file already has been exposed. */
|
|
dup_mm_exe_file(mm, oldmm);
|
|
|
|
mm->total_vm = oldmm->total_vm;
|
|
mm->data_vm = oldmm->data_vm;
|
|
mm->exec_vm = oldmm->exec_vm;
|
|
mm->stack_vm = oldmm->stack_vm;
|
|
|
|
retval = ksm_fork(mm, oldmm);
|
|
if (retval)
|
|
goto out;
|
|
khugepaged_fork(mm, oldmm);
|
|
|
|
retval = mas_expected_entries(&mas, oldmm->map_count);
|
|
if (retval)
|
|
goto out;
|
|
|
|
mas_for_each(&old_mas, mpnt, ULONG_MAX) {
|
|
struct file *file;
|
|
|
|
if (mpnt->vm_flags & VM_DONTCOPY) {
|
|
vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
|
|
continue;
|
|
}
|
|
charge = 0;
|
|
/*
|
|
* Don't duplicate many vmas if we've been oom-killed (for
|
|
* example)
|
|
*/
|
|
if (fatal_signal_pending(current)) {
|
|
retval = -EINTR;
|
|
goto loop_out;
|
|
}
|
|
if (mpnt->vm_flags & VM_ACCOUNT) {
|
|
unsigned long len = vma_pages(mpnt);
|
|
|
|
if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
|
|
goto fail_nomem;
|
|
charge = len;
|
|
}
|
|
tmp = vm_area_dup(mpnt);
|
|
if (!tmp)
|
|
goto fail_nomem;
|
|
retval = vma_dup_policy(mpnt, tmp);
|
|
if (retval)
|
|
goto fail_nomem_policy;
|
|
tmp->vm_mm = mm;
|
|
retval = dup_userfaultfd(tmp, &uf);
|
|
if (retval)
|
|
goto fail_nomem_anon_vma_fork;
|
|
if (tmp->vm_flags & VM_WIPEONFORK) {
|
|
/*
|
|
* VM_WIPEONFORK gets a clean slate in the child.
|
|
* Don't prepare anon_vma until fault since we don't
|
|
* copy page for current vma.
|
|
*/
|
|
tmp->anon_vma = NULL;
|
|
} else if (anon_vma_fork(tmp, mpnt))
|
|
goto fail_nomem_anon_vma_fork;
|
|
tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
|
|
file = tmp->vm_file;
|
|
if (file) {
|
|
struct address_space *mapping = file->f_mapping;
|
|
|
|
get_file(file);
|
|
i_mmap_lock_write(mapping);
|
|
if (tmp->vm_flags & VM_SHARED)
|
|
mapping_allow_writable(mapping);
|
|
flush_dcache_mmap_lock(mapping);
|
|
/* insert tmp into the share list, just after mpnt */
|
|
vma_interval_tree_insert_after(tmp, mpnt,
|
|
&mapping->i_mmap);
|
|
flush_dcache_mmap_unlock(mapping);
|
|
i_mmap_unlock_write(mapping);
|
|
}
|
|
|
|
/*
|
|
* Copy/update hugetlb private vma information.
|
|
*/
|
|
if (is_vm_hugetlb_page(tmp))
|
|
hugetlb_dup_vma_private(tmp);
|
|
|
|
/* Link the vma into the MT */
|
|
mas.index = tmp->vm_start;
|
|
mas.last = tmp->vm_end - 1;
|
|
mas_store(&mas, tmp);
|
|
if (mas_is_err(&mas))
|
|
goto fail_nomem_mas_store;
|
|
|
|
mm->map_count++;
|
|
if (!(tmp->vm_flags & VM_WIPEONFORK))
|
|
retval = copy_page_range(tmp, mpnt);
|
|
|
|
if (tmp->vm_ops && tmp->vm_ops->open)
|
|
tmp->vm_ops->open(tmp);
|
|
|
|
if (retval)
|
|
goto loop_out;
|
|
}
|
|
/* a new mm has just been created */
|
|
retval = arch_dup_mmap(oldmm, mm);
|
|
loop_out:
|
|
mas_destroy(&mas);
|
|
out:
|
|
mmap_write_unlock(mm);
|
|
flush_tlb_mm(oldmm);
|
|
mmap_write_unlock(oldmm);
|
|
dup_userfaultfd_complete(&uf);
|
|
fail_uprobe_end:
|
|
uprobe_end_dup_mmap();
|
|
return retval;
|
|
|
|
fail_nomem_mas_store:
|
|
unlink_anon_vmas(tmp);
|
|
fail_nomem_anon_vma_fork:
|
|
mpol_put(vma_policy(tmp));
|
|
fail_nomem_policy:
|
|
vm_area_free(tmp);
|
|
fail_nomem:
|
|
retval = -ENOMEM;
|
|
vm_unacct_memory(charge);
|
|
goto loop_out;
|
|
}
|
|
|
|
static inline int mm_alloc_pgd(struct mm_struct *mm)
|
|
{
|
|
mm->pgd = pgd_alloc(mm);
|
|
if (unlikely(!mm->pgd))
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
static inline void mm_free_pgd(struct mm_struct *mm)
|
|
{
|
|
pgd_free(mm, mm->pgd);
|
|
}
|
|
#else
|
|
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
|
|
{
|
|
mmap_write_lock(oldmm);
|
|
dup_mm_exe_file(mm, oldmm);
|
|
mmap_write_unlock(oldmm);
|
|
return 0;
|
|
}
|
|
#define mm_alloc_pgd(mm) (0)
|
|
#define mm_free_pgd(mm)
|
|
#endif /* CONFIG_MMU */
|
|
|
|
static void check_mm(struct mm_struct *mm)
|
|
{
|
|
int i;
|
|
|
|
BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS,
|
|
"Please make sure 'struct resident_page_types[]' is updated as well");
|
|
|
|
for (i = 0; i < NR_MM_COUNTERS; i++) {
|
|
long x = atomic_long_read(&mm->rss_stat.count[i]);
|
|
|
|
if (unlikely(x))
|
|
pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
|
|
mm, resident_page_types[i], x);
|
|
}
|
|
|
|
if (mm_pgtables_bytes(mm))
|
|
pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
|
|
mm_pgtables_bytes(mm));
|
|
|
|
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
|
|
VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
|
|
#endif
|
|
}
|
|
|
|
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
|
|
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
|
|
|
|
/*
|
|
* Called when the last reference to the mm
|
|
* is dropped: either by a lazy thread or by
|
|
* mmput. Free the page directory and the mm.
|
|
*/
|
|
void __mmdrop(struct mm_struct *mm)
|
|
{
|
|
BUG_ON(mm == &init_mm);
|
|
WARN_ON_ONCE(mm == current->mm);
|
|
WARN_ON_ONCE(mm == current->active_mm);
|
|
mm_free_pgd(mm);
|
|
destroy_context(mm);
|
|
mmu_notifier_subscriptions_destroy(mm);
|
|
check_mm(mm);
|
|
put_user_ns(mm->user_ns);
|
|
mm_pasid_drop(mm);
|
|
free_mm(mm);
|
|
}
|
|
EXPORT_SYMBOL_GPL(__mmdrop);
|
|
|
|
static void mmdrop_async_fn(struct work_struct *work)
|
|
{
|
|
struct mm_struct *mm;
|
|
|
|
mm = container_of(work, struct mm_struct, async_put_work);
|
|
__mmdrop(mm);
|
|
}
|
|
|
|
static void mmdrop_async(struct mm_struct *mm)
|
|
{
|
|
if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
|
|
INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
|
|
schedule_work(&mm->async_put_work);
|
|
}
|
|
}
|
|
|
|
static inline void free_signal_struct(struct signal_struct *sig)
|
|
{
|
|
taskstats_tgid_free(sig);
|
|
sched_autogroup_exit(sig);
|
|
/*
|
|
* __mmdrop is not safe to call from softirq context on x86 due to
|
|
* pgd_dtor so postpone it to the async context
|
|
*/
|
|
if (sig->oom_mm)
|
|
mmdrop_async(sig->oom_mm);
|
|
kmem_cache_free(signal_cachep, sig);
|
|
}
|
|
|
|
static inline void put_signal_struct(struct signal_struct *sig)
|
|
{
|
|
if (refcount_dec_and_test(&sig->sigcnt))
|
|
free_signal_struct(sig);
|
|
}
|
|
|
|
void __put_task_struct(struct task_struct *tsk)
|
|
{
|
|
WARN_ON(!tsk->exit_state);
|
|
WARN_ON(refcount_read(&tsk->usage));
|
|
WARN_ON(tsk == current);
|
|
|
|
io_uring_free(tsk);
|
|
cgroup_free(tsk);
|
|
task_numa_free(tsk, true);
|
|
security_task_free(tsk);
|
|
bpf_task_storage_free(tsk);
|
|
exit_creds(tsk);
|
|
delayacct_tsk_free(tsk);
|
|
put_signal_struct(tsk->signal);
|
|
sched_core_free(tsk);
|
|
free_task(tsk);
|
|
}
|
|
EXPORT_SYMBOL_GPL(__put_task_struct);
|
|
|
|
void __init __weak arch_task_cache_init(void) { }
|
|
|
|
/*
|
|
* set_max_threads
|
|
*/
|
|
static void set_max_threads(unsigned int max_threads_suggested)
|
|
{
|
|
u64 threads;
|
|
unsigned long nr_pages = totalram_pages();
|
|
|
|
/*
|
|
* The number of threads shall be limited such that the thread
|
|
* structures may only consume a small part of the available memory.
|
|
*/
|
|
if (fls64(nr_pages) + fls64(PAGE_SIZE) > 64)
|
|
threads = MAX_THREADS;
|
|
else
|
|
threads = div64_u64((u64) nr_pages * (u64) PAGE_SIZE,
|
|
(u64) THREAD_SIZE * 8UL);
|
|
|
|
if (threads > max_threads_suggested)
|
|
threads = max_threads_suggested;
|
|
|
|
max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
|
|
}
|
|
|
|
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
|
/* Initialized by the architecture: */
|
|
int arch_task_struct_size __read_mostly;
|
|
#endif
|
|
|
|
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
|
|
static void task_struct_whitelist(unsigned long *offset, unsigned long *size)
|
|
{
|
|
/* Fetch thread_struct whitelist for the architecture. */
|
|
arch_thread_struct_whitelist(offset, size);
|
|
|
|
/*
|
|
* Handle zero-sized whitelist or empty thread_struct, otherwise
|
|
* adjust offset to position of thread_struct in task_struct.
|
|
*/
|
|
if (unlikely(*size == 0))
|
|
*offset = 0;
|
|
else
|
|
*offset += offsetof(struct task_struct, thread);
|
|
}
|
|
#endif /* CONFIG_ARCH_TASK_STRUCT_ALLOCATOR */
|
|
|
|
void __init fork_init(void)
|
|
{
|
|
int i;
|
|
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
|
|
#ifndef ARCH_MIN_TASKALIGN
|
|
#define ARCH_MIN_TASKALIGN 0
|
|
#endif
|
|
int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
|
|
unsigned long useroffset, usersize;
|
|
|
|
/* create a slab on which task_structs can be allocated */
|
|
task_struct_whitelist(&useroffset, &usersize);
|
|
task_struct_cachep = kmem_cache_create_usercopy("task_struct",
|
|
arch_task_struct_size, align,
|
|
SLAB_PANIC|SLAB_ACCOUNT,
|
|
useroffset, usersize, NULL);
|
|
#endif
|
|
|
|
/* do the arch specific task caches init */
|
|
arch_task_cache_init();
|
|
|
|
set_max_threads(MAX_THREADS);
|
|
|
|
init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
|
|
init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
|
|
init_task.signal->rlim[RLIMIT_SIGPENDING] =
|
|
init_task.signal->rlim[RLIMIT_NPROC];
|
|
|
|
for (i = 0; i < UCOUNT_COUNTS; i++)
|
|
init_user_ns.ucount_max[i] = max_threads/2;
|
|
|
|
set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_NPROC, RLIM_INFINITY);
|
|
set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_MSGQUEUE, RLIM_INFINITY);
|
|
set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_SIGPENDING, RLIM_INFINITY);
|
|
set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, RLIM_INFINITY);
|
|
|
|
#ifdef CONFIG_VMAP_STACK
|
|
cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
|
|
NULL, free_vm_stack_cache);
|
|
#endif
|
|
|
|
scs_init();
|
|
|
|
lockdep_init_task(&init_task);
|
|
uprobes_init();
|
|
}
|
|
|
|
int __weak arch_dup_task_struct(struct task_struct *dst,
|
|
struct task_struct *src)
|
|
{
|
|
*dst = *src;
|
|
return 0;
|
|
}
|
|
|
|
void set_task_stack_end_magic(struct task_struct *tsk)
|
|
{
|
|
unsigned long *stackend;
|
|
|
|
stackend = end_of_stack(tsk);
|
|
*stackend = STACK_END_MAGIC; /* for overflow detection */
|
|
}
|
|
|
|
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
|
{
|
|
struct task_struct *tsk;
|
|
int err;
|
|
|
|
if (node == NUMA_NO_NODE)
|
|
node = tsk_fork_get_node(orig);
|
|
tsk = alloc_task_struct_node(node);
|
|
if (!tsk)
|
|
return NULL;
|
|
|
|
err = arch_dup_task_struct(tsk, orig);
|
|
if (err)
|
|
goto free_tsk;
|
|
|
|
err = alloc_thread_stack_node(tsk, node);
|
|
if (err)
|
|
goto free_tsk;
|
|
|
|
#ifdef CONFIG_THREAD_INFO_IN_TASK
|
|
refcount_set(&tsk->stack_refcount, 1);
|
|
#endif
|
|
account_kernel_stack(tsk, 1);
|
|
|
|
err = scs_prepare(tsk, node);
|
|
if (err)
|
|
goto free_stack;
|
|
|
|
#ifdef CONFIG_SECCOMP
|
|
/*
|
|
* We must handle setting up seccomp filters once we're under
|
|
* the sighand lock in case orig has changed between now and
|
|
* then. Until then, filter must be NULL to avoid messing up
|
|
* the usage counts on the error path calling free_task.
|
|
*/
|
|
tsk->seccomp.filter = NULL;
|
|
#endif
|
|
|
|
setup_thread_stack(tsk, orig);
|
|
clear_user_return_notifier(tsk);
|
|
clear_tsk_need_resched(tsk);
|
|
set_task_stack_end_magic(tsk);
|
|
clear_syscall_work_syscall_user_dispatch(tsk);
|
|
|
|
#ifdef CONFIG_STACKPROTECTOR
|
|
tsk->stack_canary = get_random_canary();
|
|
#endif
|
|
if (orig->cpus_ptr == &orig->cpus_mask)
|
|
tsk->cpus_ptr = &tsk->cpus_mask;
|
|
dup_user_cpus_ptr(tsk, orig, node);
|
|
|
|
/*
|
|
* One for the user space visible state that goes away when reaped.
|
|
* One for the scheduler.
|
|
*/
|
|
refcount_set(&tsk->rcu_users, 2);
|
|
/* One for the rcu users */
|
|
refcount_set(&tsk->usage, 1);
|
|
#ifdef CONFIG_BLK_DEV_IO_TRACE
|
|
tsk->btrace_seq = 0;
|
|
#endif
|
|
tsk->splice_pipe = NULL;
|
|
tsk->task_frag.page = NULL;
|
|
tsk->wake_q.next = NULL;
|
|
tsk->worker_private = NULL;
|
|
|
|
kcov_task_init(tsk);
|
|
kmsan_task_create(tsk);
|
|
kmap_local_fork(tsk);
|
|
|
|
#ifdef CONFIG_FAULT_INJECTION
|
|
tsk->fail_nth = 0;
|
|
#endif
|
|
|
|
#ifdef CONFIG_BLK_CGROUP
|
|
tsk->throttle_queue = NULL;
|
|
tsk->use_memdelay = 0;
|
|
#endif
|
|
|
|
#ifdef CONFIG_IOMMU_SVA
|
|
tsk->pasid_activated = 0;
|
|
#endif
|
|
|
|
#ifdef CONFIG_MEMCG
|
|
tsk->active_memcg = NULL;
|
|
#endif
|
|
|
|
#ifdef CONFIG_CPU_SUP_INTEL
|
|
tsk->reported_split_lock = 0;
|
|
#endif
|
|
|
|
android_init_vendor_data(tsk, 1);
|
|
android_init_oem_data(tsk, 1);
|
|
|
|
return tsk;
|
|
|
|
free_stack:
|
|
exit_task_stack_account(tsk);
|
|
free_thread_stack(tsk);
|
|
free_tsk:
|
|
free_task_struct(tsk);
|
|
return NULL;
|
|
}
|
|
|
|
__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
|
|
|
|
static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
|
|
|
|
static int __init coredump_filter_setup(char *s)
|
|
{
|
|
default_dump_filter =
|
|
(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
|
|
MMF_DUMP_FILTER_MASK;
|
|
return 1;
|
|
}
|
|
|
|
__setup("coredump_filter=", coredump_filter_setup);
|
|
|
|
#include <linux/init_task.h>
|
|
|
|
static void mm_init_aio(struct mm_struct *mm)
|
|
{
|
|
#ifdef CONFIG_AIO
|
|
spin_lock_init(&mm->ioctx_lock);
|
|
mm->ioctx_table = NULL;
|
|
#endif
|
|
}
|
|
|
|
static __always_inline void mm_clear_owner(struct mm_struct *mm,
|
|
struct task_struct *p)
|
|
{
|
|
#ifdef CONFIG_MEMCG
|
|
if (mm->owner == p)
|
|
WRITE_ONCE(mm->owner, NULL);
|
|
#endif
|
|
}
|
|
|
|
static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
|
|
{
|
|
#ifdef CONFIG_MEMCG
|
|
mm->owner = p;
|
|
#endif
|
|
}
|
|
|
|
static void mm_init_uprobes_state(struct mm_struct *mm)
|
|
{
|
|
#ifdef CONFIG_UPROBES
|
|
mm->uprobes_state.xol_area = NULL;
|
|
#endif
|
|
}
|
|
|
|
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
|
struct user_namespace *user_ns)
|
|
{
|
|
mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
|
|
mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
|
|
atomic_set(&mm->mm_users, 1);
|
|
atomic_set(&mm->mm_count, 1);
|
|
seqcount_init(&mm->write_protect_seq);
|
|
mmap_init_lock(mm);
|
|
INIT_LIST_HEAD(&mm->mmlist);
|
|
mm_pgtables_bytes_init(mm);
|
|
mm->map_count = 0;
|
|
mm->locked_vm = 0;
|
|
atomic64_set(&mm->pinned_vm, 0);
|
|
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
|
|
spin_lock_init(&mm->page_table_lock);
|
|
spin_lock_init(&mm->arg_lock);
|
|
mm_init_cpumask(mm);
|
|
mm_init_aio(mm);
|
|
mm_init_owner(mm, p);
|
|
mm_pasid_init(mm);
|
|
RCU_INIT_POINTER(mm->exe_file, NULL);
|
|
mmu_notifier_subscriptions_init(mm);
|
|
init_tlb_flush_pending(mm);
|
|
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
|
|
mm->pmd_huge_pte = NULL;
|
|
#endif
|
|
mm_init_uprobes_state(mm);
|
|
hugetlb_count_init(mm);
|
|
|
|
if (current->mm) {
|
|
mm->flags = current->mm->flags & MMF_INIT_MASK;
|
|
mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
|
|
} else {
|
|
mm->flags = default_dump_filter;
|
|
mm->def_flags = 0;
|
|
}
|
|
|
|
if (mm_alloc_pgd(mm))
|
|
goto fail_nopgd;
|
|
|
|
if (init_new_context(p, mm))
|
|
goto fail_nocontext;
|
|
|
|
mm->user_ns = get_user_ns(user_ns);
|
|
lru_gen_init_mm(mm);
|
|
return mm;
|
|
|
|
fail_nocontext:
|
|
mm_free_pgd(mm);
|
|
fail_nopgd:
|
|
free_mm(mm);
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Allocate and initialize an mm_struct.
|
|
*/
|
|
struct mm_struct *mm_alloc(void)
|
|
{
|
|
struct mm_struct *mm;
|
|
|
|
mm = allocate_mm();
|
|
if (!mm)
|
|
return NULL;
|
|
|
|
memset(mm, 0, sizeof(*mm));
|
|
return mm_init(mm, current, current_user_ns());
|
|
}
|
|
|
|
static inline void __mmput(struct mm_struct *mm)
|
|
{
|
|
VM_BUG_ON(atomic_read(&mm->mm_users));
|
|
|
|
uprobe_clear_state(mm);
|
|
exit_aio(mm);
|
|
ksm_exit(mm);
|
|
khugepaged_exit(mm); /* must run before exit_mmap */
|
|
exit_mmap(mm);
|
|
mm_put_huge_zero_page(mm);
|
|
set_mm_exe_file(mm, NULL);
|
|
if (!list_empty(&mm->mmlist)) {
|
|
spin_lock(&mmlist_lock);
|
|
list_del(&mm->mmlist);
|
|
spin_unlock(&mmlist_lock);
|
|
}
|
|
if (mm->binfmt)
|
|
module_put(mm->binfmt->module);
|
|
lru_gen_del_mm(mm);
|
|
mmdrop(mm);
|
|
}
|
|
|
|
/*
|
|
* Decrement the use count and release all resources for an mm.
|
|
*/
|
|
void mmput(struct mm_struct *mm)
|
|
{
|
|
might_sleep();
|
|
|
|
if (atomic_dec_and_test(&mm->mm_users))
|
|
__mmput(mm);
|
|
}
|
|
EXPORT_SYMBOL_GPL(mmput);
|
|
|
|
#ifdef CONFIG_MMU
|
|
static void mmput_async_fn(struct work_struct *work)
|
|
{
|
|
struct mm_struct *mm = container_of(work, struct mm_struct,
|
|
async_put_work);
|
|
|
|
__mmput(mm);
|
|
}
|
|
|
|
void mmput_async(struct mm_struct *mm)
|
|
{
|
|
if (atomic_dec_and_test(&mm->mm_users)) {
|
|
INIT_WORK(&mm->async_put_work, mmput_async_fn);
|
|
schedule_work(&mm->async_put_work);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(mmput_async);
|
|
#endif
|
|
|
|
/**
|
|
* set_mm_exe_file - change a reference to the mm's executable file
|
|
*
|
|
* This changes mm's executable file (shown as symlink /proc/[pid]/exe).
|
|
*
|
|
* Main users are mmput() and sys_execve(). Callers prevent concurrent
|
|
* invocations: in mmput() nobody alive left, in execve task is single
|
|
* threaded.
|
|
*
|
|
* Can only fail if new_exe_file != NULL.
|
|
*/
|
|
int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
|
|
{
|
|
struct file *old_exe_file;
|
|
|
|
/*
|
|
* It is safe to dereference the exe_file without RCU as
|
|
* this function is only called if nobody else can access
|
|
* this mm -- see comment above for justification.
|
|
*/
|
|
old_exe_file = rcu_dereference_raw(mm->exe_file);
|
|
|
|
if (new_exe_file) {
|
|
/*
|
|
* We expect the caller (i.e., sys_execve) to already denied
|
|
* write access, so this is unlikely to fail.
|
|
*/
|
|
if (unlikely(deny_write_access(new_exe_file)))
|
|
return -EACCES;
|
|
get_file(new_exe_file);
|
|
}
|
|
rcu_assign_pointer(mm->exe_file, new_exe_file);
|
|
if (old_exe_file) {
|
|
allow_write_access(old_exe_file);
|
|
fput(old_exe_file);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* replace_mm_exe_file - replace a reference to the mm's executable file
|
|
*
|
|
* This changes mm's executable file (shown as symlink /proc/[pid]/exe),
|
|
* dealing with concurrent invocation and without grabbing the mmap lock in
|
|
* write mode.
|
|
*
|
|
* Main user is sys_prctl(PR_SET_MM_MAP/EXE_FILE).
|
|
*/
|
|
int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
|
|
{
|
|
struct vm_area_struct *vma;
|
|
struct file *old_exe_file;
|
|
int ret = 0;
|
|
|
|
/* Forbid mm->exe_file change if old file still mapped. */
|
|
old_exe_file = get_mm_exe_file(mm);
|
|
if (old_exe_file) {
|
|
VMA_ITERATOR(vmi, mm, 0);
|
|
mmap_read_lock(mm);
|
|
for_each_vma(vmi, vma) {
|
|
if (!vma->vm_file)
|
|
continue;
|
|
if (path_equal(&vma->vm_file->f_path,
|
|
&old_exe_file->f_path)) {
|
|
ret = -EBUSY;
|
|
break;
|
|
}
|
|
}
|
|
mmap_read_unlock(mm);
|
|
fput(old_exe_file);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
/* set the new file, lockless */
|
|
ret = deny_write_access(new_exe_file);
|
|
if (ret)
|
|
return -EACCES;
|
|
get_file(new_exe_file);
|
|
|
|
old_exe_file = xchg(&mm->exe_file, new_exe_file);
|
|
if (old_exe_file) {
|
|
/*
|
|
* Don't race with dup_mmap() getting the file and disallowing
|
|
* write access while someone might open the file writable.
|
|
*/
|
|
mmap_read_lock(mm);
|
|
allow_write_access(old_exe_file);
|
|
fput(old_exe_file);
|
|
mmap_read_unlock(mm);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* get_mm_exe_file - acquire a reference to the mm's executable file
|
|
*
|
|
* Returns %NULL if mm has no associated executable file.
|
|
* User must release file via fput().
|
|
*/
|
|
struct file *get_mm_exe_file(struct mm_struct *mm)
|
|
{
|
|
struct file *exe_file;
|
|
|
|
rcu_read_lock();
|
|
exe_file = rcu_dereference(mm->exe_file);
|
|
if (exe_file && !get_file_rcu(exe_file))
|
|
exe_file = NULL;
|
|
rcu_read_unlock();
|
|
return exe_file;
|
|
}
|
|
|
|
/**
|
|
* get_task_exe_file - acquire a reference to the task's executable file
|
|
*
|
|
* Returns %NULL if task's mm (if any) has no associated executable file or
|
|
* this is a kernel thread with borrowed mm (see the comment above get_task_mm).
|
|
* User must release file via fput().
|
|
*/
|
|
struct file *get_task_exe_file(struct task_struct *task)
|
|
{
|
|
struct file *exe_file = NULL;
|
|
struct mm_struct *mm;
|
|
|
|
task_lock(task);
|
|
mm = task->mm;
|
|
if (mm) {
|
|
if (!(task->flags & PF_KTHREAD))
|
|
exe_file = get_mm_exe_file(mm);
|
|
}
|
|
task_unlock(task);
|
|
return exe_file;
|
|
}
|
|
|
|
/**
|
|
* get_task_mm - acquire a reference to the task's mm
|
|
*
|
|
* Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
|
|
* this kernel workthread has transiently adopted a user mm with use_mm,
|
|
* to do its AIO) is not set and if so returns a reference to it, after
|
|
* bumping up the use count. User must release the mm via mmput()
|
|
* after use. Typically used by /proc and ptrace.
|
|
*/
|
|
struct mm_struct *get_task_mm(struct task_struct *task)
|
|
{
|
|
struct mm_struct *mm;
|
|
|
|
task_lock(task);
|
|
mm = task->mm;
|
|
if (mm) {
|
|
if (task->flags & PF_KTHREAD)
|
|
mm = NULL;
|
|
else
|
|
mmget(mm);
|
|
}
|
|
task_unlock(task);
|
|
return mm;
|
|
}
|
|
EXPORT_SYMBOL_GPL(get_task_mm);
|
|
|
|
struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
|
|
{
|
|
struct mm_struct *mm;
|
|
int err;
|
|
|
|
err = down_read_killable(&task->signal->exec_update_lock);
|
|
if (err)
|
|
return ERR_PTR(err);
|
|
|
|
mm = get_task_mm(task);
|
|
if (mm && mm != current->mm &&
|
|
!ptrace_may_access(task, mode)) {
|
|
mmput(mm);
|
|
mm = ERR_PTR(-EACCES);
|
|
}
|
|
up_read(&task->signal->exec_update_lock);
|
|
|
|
return mm;
|
|
}
|
|
|
|
static void complete_vfork_done(struct task_struct *tsk)
|
|
{
|
|
struct completion *vfork;
|
|
|
|
task_lock(tsk);
|
|
vfork = tsk->vfork_done;
|
|
if (likely(vfork)) {
|
|
tsk->vfork_done = NULL;
|
|
complete(vfork);
|
|
}
|
|
task_unlock(tsk);
|
|
}
|
|
|
|
static int wait_for_vfork_done(struct task_struct *child,
|
|
struct completion *vfork)
|
|
{
|
|
unsigned int state = TASK_UNINTERRUPTIBLE|TASK_KILLABLE|TASK_FREEZABLE;
|
|
int killed;
|
|
|
|
cgroup_enter_frozen();
|
|
killed = wait_for_completion_state(vfork, state);
|
|
cgroup_leave_frozen(false);
|
|
|
|
if (killed) {
|
|
task_lock(child);
|
|
child->vfork_done = NULL;
|
|
task_unlock(child);
|
|
}
|
|
|
|
put_task_struct(child);
|
|
return killed;
|
|
}
|
|
|
|
/* Please note the differences between mmput and mm_release.
|
|
* mmput is called whenever we stop holding onto a mm_struct,
|
|
* error success whatever.
|
|
*
|
|
* mm_release is called after a mm_struct has been removed
|
|
* from the current process.
|
|
*
|
|
* This difference is important for error handling, when we
|
|
* only half set up a mm_struct for a new process and need to restore
|
|
* the old one. Because we mmput the new mm_struct before
|
|
* restoring the old one. . .
|
|
* Eric Biederman 10 January 1998
|
|
*/
|
|
static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
|
{
|
|
uprobe_free_utask(tsk);
|
|
|
|
/* Get rid of any cached register state */
|
|
deactivate_mm(tsk, mm);
|
|
|
|
/*
|
|
* Signal userspace if we're not exiting with a core dump
|
|
* because we want to leave the value intact for debugging
|
|
* purposes.
|
|
*/
|
|
if (tsk->clear_child_tid) {
|
|
if (atomic_read(&mm->mm_users) > 1) {
|
|
/*
|
|
* We don't check the error code - if userspace has
|
|
* not set up a proper pointer then tough luck.
|
|
*/
|
|
put_user(0, tsk->clear_child_tid);
|
|
do_futex(tsk->clear_child_tid, FUTEX_WAKE,
|
|
1, NULL, NULL, 0, 0);
|
|
}
|
|
tsk->clear_child_tid = NULL;
|
|
}
|
|
|
|
/*
|
|
* All done, finally we can wake up parent and return this mm to him.
|
|
* Also kthread_stop() uses this completion for synchronization.
|
|
*/
|
|
if (tsk->vfork_done)
|
|
complete_vfork_done(tsk);
|
|
}
|
|
|
|
void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
|
{
|
|
futex_exit_release(tsk);
|
|
mm_release(tsk, mm);
|
|
}
|
|
|
|
void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
|
{
|
|
futex_exec_release(tsk);
|
|
mm_release(tsk, mm);
|
|
}
|
|
|
|
/**
|
|
* dup_mm() - duplicates an existing mm structure
|
|
* @tsk: the task_struct with which the new mm will be associated.
|
|
* @oldmm: the mm to duplicate.
|
|
*
|
|
* Allocates a new mm structure and duplicates the provided @oldmm structure
|
|
* content into it.
|
|
*
|
|
* Return: the duplicated mm or NULL on failure.
|
|
*/
|
|
static struct mm_struct *dup_mm(struct task_struct *tsk,
|
|
struct mm_struct *oldmm)
|
|
{
|
|
struct mm_struct *mm;
|
|
int err;
|
|
|
|
mm = allocate_mm();
|
|
if (!mm)
|
|
goto fail_nomem;
|
|
|
|
memcpy(mm, oldmm, sizeof(*mm));
|
|
|
|
if (!mm_init(mm, tsk, mm->user_ns))
|
|
goto fail_nomem;
|
|
|
|
err = dup_mmap(mm, oldmm);
|
|
if (err)
|
|
goto free_pt;
|
|
|
|
mm->hiwater_rss = get_mm_rss(mm);
|
|
mm->hiwater_vm = mm->total_vm;
|
|
|
|
if (mm->binfmt && !try_module_get(mm->binfmt->module))
|
|
goto free_pt;
|
|
|
|
return mm;
|
|
|
|
free_pt:
|
|
/* don't put binfmt in mmput, we haven't got module yet */
|
|
mm->binfmt = NULL;
|
|
mm_init_owner(mm, NULL);
|
|
mmput(mm);
|
|
|
|
fail_nomem:
|
|
return NULL;
|
|
}
|
|
|
|
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
|
|
{
|
|
struct mm_struct *mm, *oldmm;
|
|
|
|
tsk->min_flt = tsk->maj_flt = 0;
|
|
tsk->nvcsw = tsk->nivcsw = 0;
|
|
#ifdef CONFIG_DETECT_HUNG_TASK
|
|
tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
|
|
tsk->last_switch_time = 0;
|
|
#endif
|
|
|
|
tsk->mm = NULL;
|
|
tsk->active_mm = NULL;
|
|
|
|
/*
|
|
* Are we cloning a kernel thread?
|
|
*
|
|
* We need to steal a active VM for that..
|
|
*/
|
|
oldmm = current->mm;
|
|
if (!oldmm)
|
|
return 0;
|
|
|
|
if (clone_flags & CLONE_VM) {
|
|
mmget(oldmm);
|
|
mm = oldmm;
|
|
} else {
|
|
mm = dup_mm(tsk, current->mm);
|
|
if (!mm)
|
|
return -ENOMEM;
|
|
}
|
|
|
|
tsk->mm = mm;
|
|
tsk->active_mm = mm;
|
|
return 0;
|
|
}
|
|
|
|
static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
|
|
{
|
|
struct fs_struct *fs = current->fs;
|
|
if (clone_flags & CLONE_FS) {
|
|
/* tsk->fs is already what we want */
|
|
spin_lock(&fs->lock);
|
|
if (fs->in_exec) {
|
|
spin_unlock(&fs->lock);
|
|
return -EAGAIN;
|
|
}
|
|
fs->users++;
|
|
spin_unlock(&fs->lock);
|
|
return 0;
|
|
}
|
|
tsk->fs = copy_fs_struct(fs);
|
|
if (!tsk->fs)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
|
|
{
|
|
struct files_struct *oldf, *newf;
|
|
int error = 0;
|
|
|
|
/*
|
|
* A background process may not have any files ...
|
|
*/
|
|
oldf = current->files;
|
|
if (!oldf)
|
|
goto out;
|
|
|
|
if (clone_flags & CLONE_FILES) {
|
|
atomic_inc(&oldf->count);
|
|
goto out;
|
|
}
|
|
|
|
newf = dup_fd(oldf, NR_OPEN_MAX, &error);
|
|
if (!newf)
|
|
goto out;
|
|
|
|
tsk->files = newf;
|
|
error = 0;
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
|
|
{
|
|
struct sighand_struct *sig;
|
|
|
|
if (clone_flags & CLONE_SIGHAND) {
|
|
refcount_inc(¤t->sighand->count);
|
|
return 0;
|
|
}
|
|
sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
|
|
RCU_INIT_POINTER(tsk->sighand, sig);
|
|
if (!sig)
|
|
return -ENOMEM;
|
|
|
|
refcount_set(&sig->count, 1);
|
|
spin_lock_irq(¤t->sighand->siglock);
|
|
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
|
|
spin_unlock_irq(¤t->sighand->siglock);
|
|
|
|
/* Reset all signal handler not set to SIG_IGN to SIG_DFL. */
|
|
if (clone_flags & CLONE_CLEAR_SIGHAND)
|
|
flush_signal_handlers(tsk, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void __cleanup_sighand(struct sighand_struct *sighand)
|
|
{
|
|
if (refcount_dec_and_test(&sighand->count)) {
|
|
signalfd_cleanup(sighand);
|
|
/*
|
|
* sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
|
|
* without an RCU grace period, see __lock_task_sighand().
|
|
*/
|
|
kmem_cache_free(sighand_cachep, sighand);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialize POSIX timer handling for a thread group.
|
|
*/
|
|
static void posix_cpu_timers_init_group(struct signal_struct *sig)
|
|
{
|
|
struct posix_cputimers *pct = &sig->posix_cputimers;
|
|
unsigned long cpu_limit;
|
|
|
|
cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
|
|
posix_cputimers_group_init(pct, cpu_limit);
|
|
}
|
|
|
|
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
|
|
{
|
|
struct signal_struct *sig;
|
|
|
|
if (clone_flags & CLONE_THREAD)
|
|
return 0;
|
|
|
|
sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
|
|
tsk->signal = sig;
|
|
if (!sig)
|
|
return -ENOMEM;
|
|
|
|
sig->nr_threads = 1;
|
|
sig->quick_threads = 1;
|
|
atomic_set(&sig->live, 1);
|
|
refcount_set(&sig->sigcnt, 1);
|
|
|
|
/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
|
|
sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
|
|
tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
|
|
|
|
init_waitqueue_head(&sig->wait_chldexit);
|
|
sig->curr_target = tsk;
|
|
init_sigpending(&sig->shared_pending);
|
|
INIT_HLIST_HEAD(&sig->multiprocess);
|
|
seqlock_init(&sig->stats_lock);
|
|
prev_cputime_init(&sig->prev_cputime);
|
|
|
|
#ifdef CONFIG_POSIX_TIMERS
|
|
INIT_LIST_HEAD(&sig->posix_timers);
|
|
hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
|
sig->real_timer.function = it_real_fn;
|
|
#endif
|
|
|
|
task_lock(current->group_leader);
|
|
memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
|
|
task_unlock(current->group_leader);
|
|
|
|
posix_cpu_timers_init_group(sig);
|
|
|
|
tty_audit_fork(sig);
|
|
sched_autogroup_fork(sig);
|
|
|
|
sig->oom_score_adj = current->signal->oom_score_adj;
|
|
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
|
|
|
|
mutex_init(&sig->cred_guard_mutex);
|
|
init_rwsem(&sig->exec_update_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void copy_seccomp(struct task_struct *p)
|
|
{
|
|
#ifdef CONFIG_SECCOMP
|
|
/*
|
|
* Must be called with sighand->lock held, which is common to
|
|
* all threads in the group. Holding cred_guard_mutex is not
|
|
* needed because this new task is not yet running and cannot
|
|
* be racing exec.
|
|
*/
|
|
assert_spin_locked(¤t->sighand->siglock);
|
|
|
|
/* Ref-count the new filter user, and assign it. */
|
|
get_seccomp_filter(current);
|
|
p->seccomp = current->seccomp;
|
|
|
|
/*
|
|
* Explicitly enable no_new_privs here in case it got set
|
|
* between the task_struct being duplicated and holding the
|
|
* sighand lock. The seccomp state and nnp must be in sync.
|
|
*/
|
|
if (task_no_new_privs(current))
|
|
task_set_no_new_privs(p);
|
|
|
|
/*
|
|
* If the parent gained a seccomp mode after copying thread
|
|
* flags and between before we held the sighand lock, we have
|
|
* to manually enable the seccomp thread flag here.
|
|
*/
|
|
if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
|
|
set_task_syscall_work(p, SECCOMP);
|
|
#endif
|
|
}
|
|
|
|
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
|
|
{
|
|
current->clear_child_tid = tidptr;
|
|
|
|
return task_pid_vnr(current);
|
|
}
|
|
|
|
static void rt_mutex_init_task(struct task_struct *p)
|
|
{
|
|
raw_spin_lock_init(&p->pi_lock);
|
|
#ifdef CONFIG_RT_MUTEXES
|
|
p->pi_waiters = RB_ROOT_CACHED;
|
|
p->pi_top_task = NULL;
|
|
p->pi_blocked_on = NULL;
|
|
#endif
|
|
}
|
|
|
|
static inline void init_task_pid_links(struct task_struct *task)
|
|
{
|
|
enum pid_type type;
|
|
|
|
for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type)
|
|
INIT_HLIST_NODE(&task->pid_links[type]);
|
|
}
|
|
|
|
static inline void
|
|
init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
|
|
{
|
|
if (type == PIDTYPE_PID)
|
|
task->thread_pid = pid;
|
|
else
|
|
task->signal->pids[type] = pid;
|
|
}
|
|
|
|
static inline void rcu_copy_process(struct task_struct *p)
|
|
{
|
|
#ifdef CONFIG_PREEMPT_RCU
|
|
p->rcu_read_lock_nesting = 0;
|
|
p->rcu_read_unlock_special.s = 0;
|
|
p->rcu_blocked_node = NULL;
|
|
INIT_LIST_HEAD(&p->rcu_node_entry);
|
|
#endif /* #ifdef CONFIG_PREEMPT_RCU */
|
|
#ifdef CONFIG_TASKS_RCU
|
|
p->rcu_tasks_holdout = false;
|
|
INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
|
|
p->rcu_tasks_idle_cpu = -1;
|
|
#endif /* #ifdef CONFIG_TASKS_RCU */
|
|
#ifdef CONFIG_TASKS_TRACE_RCU
|
|
p->trc_reader_nesting = 0;
|
|
p->trc_reader_special.s = 0;
|
|
INIT_LIST_HEAD(&p->trc_holdout_list);
|
|
INIT_LIST_HEAD(&p->trc_blkd_node);
|
|
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
|
}
|
|
|
|
struct pid *pidfd_pid(const struct file *file)
|
|
{
|
|
if (file->f_op == &pidfd_fops)
|
|
return file->private_data;
|
|
|
|
return ERR_PTR(-EBADF);
|
|
}
|
|
|
|
static int pidfd_release(struct inode *inode, struct file *file)
|
|
{
|
|
struct pid *pid = file->private_data;
|
|
|
|
file->private_data = NULL;
|
|
put_pid(pid);
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
/**
|
|
* pidfd_show_fdinfo - print information about a pidfd
|
|
* @m: proc fdinfo file
|
|
* @f: file referencing a pidfd
|
|
*
|
|
* Pid:
|
|
* This function will print the pid that a given pidfd refers to in the
|
|
* pid namespace of the procfs instance.
|
|
* If the pid namespace of the process is not a descendant of the pid
|
|
* namespace of the procfs instance 0 will be shown as its pid. This is
|
|
* similar to calling getppid() on a process whose parent is outside of
|
|
* its pid namespace.
|
|
*
|
|
* NSpid:
|
|
* If pid namespaces are supported then this function will also print
|
|
* the pid of a given pidfd refers to for all descendant pid namespaces
|
|
* starting from the current pid namespace of the instance, i.e. the
|
|
* Pid field and the first entry in the NSpid field will be identical.
|
|
* If the pid namespace of the process is not a descendant of the pid
|
|
* namespace of the procfs instance 0 will be shown as its first NSpid
|
|
* entry and no others will be shown.
|
|
* Note that this differs from the Pid and NSpid fields in
|
|
* /proc/<pid>/status where Pid and NSpid are always shown relative to
|
|
* the pid namespace of the procfs instance. The difference becomes
|
|
* obvious when sending around a pidfd between pid namespaces from a
|
|
* different branch of the tree, i.e. where no ancestral relation is
|
|
* present between the pid namespaces:
|
|
* - create two new pid namespaces ns1 and ns2 in the initial pid
|
|
* namespace (also take care to create new mount namespaces in the
|
|
* new pid namespace and mount procfs)
|
|
* - create a process with a pidfd in ns1
|
|
* - send pidfd from ns1 to ns2
|
|
* - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid
|
|
* have exactly one entry, which is 0
|
|
*/
|
|
static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
|
|
{
|
|
struct pid *pid = f->private_data;
|
|
struct pid_namespace *ns;
|
|
pid_t nr = -1;
|
|
|
|
if (likely(pid_has_task(pid, PIDTYPE_PID))) {
|
|
ns = proc_pid_ns(file_inode(m->file)->i_sb);
|
|
nr = pid_nr_ns(pid, ns);
|
|
}
|
|
|
|
seq_put_decimal_ll(m, "Pid:\t", nr);
|
|
|
|
#ifdef CONFIG_PID_NS
|
|
seq_put_decimal_ll(m, "\nNSpid:\t", nr);
|
|
if (nr > 0) {
|
|
int i;
|
|
|
|
/* If nr is non-zero it means that 'pid' is valid and that
|
|
* ns, i.e. the pid namespace associated with the procfs
|
|
* instance, is in the pid namespace hierarchy of pid.
|
|
* Start at one below the already printed level.
|
|
*/
|
|
for (i = ns->level + 1; i <= pid->level; i++)
|
|
seq_put_decimal_ll(m, "\t", pid->numbers[i].nr);
|
|
}
|
|
#endif
|
|
seq_putc(m, '\n');
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Poll support for process exit notification.
|
|
*/
|
|
static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
|
|
{
|
|
struct pid *pid = file->private_data;
|
|
__poll_t poll_flags = 0;
|
|
|
|
poll_wait(file, &pid->wait_pidfd, pts);
|
|
|
|
/*
|
|
* Inform pollers only when the whole thread group exits.
|
|
* If the thread group leader exits before all other threads in the
|
|
* group, then poll(2) should block, similar to the wait(2) family.
|
|
*/
|
|
if (thread_group_exited(pid))
|
|
poll_flags = EPOLLIN | EPOLLRDNORM;
|
|
|
|
return poll_flags;
|
|
}
|
|
|
|
const struct file_operations pidfd_fops = {
|
|
.release = pidfd_release,
|
|
.poll = pidfd_poll,
|
|
#ifdef CONFIG_PROC_FS
|
|
.show_fdinfo = pidfd_show_fdinfo,
|
|
#endif
|
|
};
|
|
|
|
static void __delayed_free_task(struct rcu_head *rhp)
|
|
{
|
|
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
|
|
|
|
free_task(tsk);
|
|
}
|
|
|
|
static __always_inline void delayed_free_task(struct task_struct *tsk)
|
|
{
|
|
if (IS_ENABLED(CONFIG_MEMCG))
|
|
call_rcu(&tsk->rcu, __delayed_free_task);
|
|
else
|
|
free_task(tsk);
|
|
}
|
|
|
|
static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk)
|
|
{
|
|
/* Skip if kernel thread */
|
|
if (!tsk->mm)
|
|
return;
|
|
|
|
/* Skip if spawning a thread or using vfork */
|
|
if ((clone_flags & (CLONE_VM | CLONE_THREAD | CLONE_VFORK)) != CLONE_VM)
|
|
return;
|
|
|
|
/* We need to synchronize with __set_oom_adj */
|
|
mutex_lock(&oom_adj_mutex);
|
|
set_bit(MMF_MULTIPROCESS, &tsk->mm->flags);
|
|
/* Update the values in case they were changed after copy_signal */
|
|
tsk->signal->oom_score_adj = current->signal->oom_score_adj;
|
|
tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min;
|
|
mutex_unlock(&oom_adj_mutex);
|
|
}
|
|
|
|
#ifdef CONFIG_RV
|
|
static void rv_task_fork(struct task_struct *p)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < RV_PER_TASK_MONITORS; i++)
|
|
p->rv[i].da_mon.monitoring = false;
|
|
}
|
|
#else
|
|
#define rv_task_fork(p) do {} while (0)
|
|
#endif
|
|
|
|
/*
|
|
* This creates a new process as a copy of the old one,
|
|
* but does not actually start it yet.
|
|
*
|
|
* It copies the registers, and all the appropriate
|
|
* parts of the process environment (as per the clone
|
|
* flags). The actual kick-off is left to the caller.
|
|
*/
|
|
static __latent_entropy struct task_struct *copy_process(
|
|
struct pid *pid,
|
|
int trace,
|
|
int node,
|
|
struct kernel_clone_args *args)
|
|
{
|
|
int pidfd = -1, retval;
|
|
struct task_struct *p;
|
|
struct multiprocess_signals delayed;
|
|
struct file *pidfile = NULL;
|
|
const u64 clone_flags = args->flags;
|
|
struct nsproxy *nsp = current->nsproxy;
|
|
|
|
/*
|
|
* Don't allow sharing the root directory with processes in a different
|
|
* namespace
|
|
*/
|
|
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
/*
|
|
* Thread groups must share signals as well, and detached threads
|
|
* can only be started up within the thread group.
|
|
*/
|
|
if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
/*
|
|
* Shared signal handlers imply shared VM. By way of the above,
|
|
* thread groups also imply shared VM. Blocking this case allows
|
|
* for various simplifications in other code.
|
|
*/
|
|
if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
/*
|
|
* Siblings of global init remain as zombies on exit since they are
|
|
* not reaped by their parent (swapper). To solve this and to avoid
|
|
* multi-rooted process trees, prevent global and container-inits
|
|
* from creating siblings.
|
|
*/
|
|
if ((clone_flags & CLONE_PARENT) &&
|
|
current->signal->flags & SIGNAL_UNKILLABLE)
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
/*
|
|
* If the new process will be in a different pid or user namespace
|
|
* do not allow it to share a thread group with the forking task.
|
|
*/
|
|
if (clone_flags & CLONE_THREAD) {
|
|
if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
|
|
(task_active_pid_ns(current) != nsp->pid_ns_for_children))
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
/*
|
|
* If the new process will be in a different time namespace
|
|
* do not allow it to share VM or a thread group with the forking task.
|
|
*/
|
|
if (clone_flags & (CLONE_THREAD | CLONE_VM)) {
|
|
if (nsp->time_ns != nsp->time_ns_for_children)
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
if (clone_flags & CLONE_PIDFD) {
|
|
/*
|
|
* - CLONE_DETACHED is blocked so that we can potentially
|
|
* reuse it later for CLONE_PIDFD.
|
|
* - CLONE_THREAD is blocked until someone really needs it.
|
|
*/
|
|
if (clone_flags & (CLONE_DETACHED | CLONE_THREAD))
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
/*
|
|
* Force any signals received before this point to be delivered
|
|
* before the fork happens. Collect up signals sent to multiple
|
|
* processes that happen during the fork and delay them so that
|
|
* they appear to happen after the fork.
|
|
*/
|
|
sigemptyset(&delayed.signal);
|
|
INIT_HLIST_NODE(&delayed.node);
|
|
|
|
spin_lock_irq(¤t->sighand->siglock);
|
|
if (!(clone_flags & CLONE_THREAD))
|
|
hlist_add_head(&delayed.node, ¤t->signal->multiprocess);
|
|
recalc_sigpending();
|
|
spin_unlock_irq(¤t->sighand->siglock);
|
|
retval = -ERESTARTNOINTR;
|
|
if (task_sigpending(current))
|
|
goto fork_out;
|
|
|
|
retval = -ENOMEM;
|
|
p = dup_task_struct(current, node);
|
|
if (!p)
|
|
goto fork_out;
|
|
p->flags &= ~PF_KTHREAD;
|
|
if (args->kthread)
|
|
p->flags |= PF_KTHREAD;
|
|
if (args->io_thread) {
|
|
/*
|
|
* Mark us an IO worker, and block any signal that isn't
|
|
* fatal or STOP
|
|
*/
|
|
p->flags |= PF_IO_WORKER;
|
|
siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
|
|
}
|
|
|
|
cpufreq_task_times_init(p);
|
|
|
|
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
|
|
/*
|
|
* Clear TID on mm_release()?
|
|
*/
|
|
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
|
|
|
|
ftrace_graph_init_task(p);
|
|
|
|
rt_mutex_init_task(p);
|
|
|
|
lockdep_assert_irqs_enabled();
|
|
#ifdef CONFIG_PROVE_LOCKING
|
|
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
|
|
#endif
|
|
retval = copy_creds(p, clone_flags);
|
|
if (retval < 0)
|
|
goto bad_fork_free;
|
|
|
|
retval = -EAGAIN;
|
|
if (is_rlimit_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
|
|
if (p->real_cred->user != INIT_USER &&
|
|
!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
|
|
goto bad_fork_cleanup_count;
|
|
}
|
|
current->flags &= ~PF_NPROC_EXCEEDED;
|
|
|
|
/*
|
|
* If multiple threads are within copy_process(), then this check
|
|
* triggers too late. This doesn't hurt, the check is only there
|
|
* to stop root fork bombs.
|
|
*/
|
|
retval = -EAGAIN;
|
|
if (data_race(nr_threads >= max_threads))
|
|
goto bad_fork_cleanup_count;
|
|
|
|
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
|
|
p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE | PF_NO_SETAFFINITY);
|
|
p->flags |= PF_FORKNOEXEC;
|
|
INIT_LIST_HEAD(&p->children);
|
|
INIT_LIST_HEAD(&p->sibling);
|
|
rcu_copy_process(p);
|
|
p->vfork_done = NULL;
|
|
spin_lock_init(&p->alloc_lock);
|
|
|
|
init_sigpending(&p->pending);
|
|
|
|
p->utime = p->stime = p->gtime = 0;
|
|
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
|
|
p->utimescaled = p->stimescaled = 0;
|
|
#endif
|
|
prev_cputime_init(&p->prev_cputime);
|
|
|
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
|
seqcount_init(&p->vtime.seqcount);
|
|
p->vtime.starttime = 0;
|
|
p->vtime.state = VTIME_INACTIVE;
|
|
#endif
|
|
|
|
#ifdef CONFIG_IO_URING
|
|
p->io_uring = NULL;
|
|
#endif
|
|
|
|
#if defined(SPLIT_RSS_COUNTING)
|
|
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
|
|
#endif
|
|
|
|
p->default_timer_slack_ns = current->timer_slack_ns;
|
|
|
|
#ifdef CONFIG_PSI
|
|
p->psi_flags = 0;
|
|
#endif
|
|
|
|
task_io_accounting_init(&p->ioac);
|
|
acct_clear_integrals(p);
|
|
|
|
posix_cputimers_init(&p->posix_cputimers);
|
|
|
|
p->io_context = NULL;
|
|
audit_set_context(p, NULL);
|
|
cgroup_fork(p);
|
|
if (args->kthread) {
|
|
if (!set_kthread_struct(p))
|
|
goto bad_fork_cleanup_delayacct;
|
|
}
|
|
#ifdef CONFIG_NUMA
|
|
p->mempolicy = mpol_dup(p->mempolicy);
|
|
if (IS_ERR(p->mempolicy)) {
|
|
retval = PTR_ERR(p->mempolicy);
|
|
p->mempolicy = NULL;
|
|
goto bad_fork_cleanup_delayacct;
|
|
}
|
|
#endif
|
|
#ifdef CONFIG_CPUSETS
|
|
p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
|
|
p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
|
|
seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock);
|
|
#endif
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
memset(&p->irqtrace, 0, sizeof(p->irqtrace));
|
|
p->irqtrace.hardirq_disable_ip = _THIS_IP_;
|
|
p->irqtrace.softirq_enable_ip = _THIS_IP_;
|
|
p->softirqs_enabled = 1;
|
|
p->softirq_context = 0;
|
|
#endif
|
|
|
|
p->pagefault_disabled = 0;
|
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
lockdep_init_task(p);
|
|
#endif
|
|
|
|
#ifdef CONFIG_DEBUG_MUTEXES
|
|
p->blocked_on = NULL; /* not blocked yet */
|
|
#endif
|
|
#ifdef CONFIG_BCACHE
|
|
p->sequential_io = 0;
|
|
p->sequential_io_avg = 0;
|
|
#endif
|
|
#ifdef CONFIG_BPF_SYSCALL
|
|
RCU_INIT_POINTER(p->bpf_storage, NULL);
|
|
p->bpf_ctx = NULL;
|
|
#endif
|
|
|
|
/* Perform scheduler related setup. Assign this task to a CPU. */
|
|
retval = sched_fork(clone_flags, p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_policy;
|
|
|
|
retval = perf_event_init_task(p, clone_flags);
|
|
if (retval)
|
|
goto bad_fork_cleanup_policy;
|
|
retval = audit_alloc(p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_perf;
|
|
/* copy all the process information */
|
|
shm_init_task(p);
|
|
retval = security_task_alloc(p, clone_flags);
|
|
if (retval)
|
|
goto bad_fork_cleanup_audit;
|
|
retval = copy_semundo(clone_flags, p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_security;
|
|
retval = copy_files(clone_flags, p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_semundo;
|
|
retval = copy_fs(clone_flags, p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_files;
|
|
retval = copy_sighand(clone_flags, p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_fs;
|
|
retval = copy_signal(clone_flags, p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_sighand;
|
|
retval = copy_mm(clone_flags, p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_signal;
|
|
retval = copy_namespaces(clone_flags, p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_mm;
|
|
retval = copy_io(clone_flags, p);
|
|
if (retval)
|
|
goto bad_fork_cleanup_namespaces;
|
|
retval = copy_thread(p, args);
|
|
if (retval)
|
|
goto bad_fork_cleanup_io;
|
|
|
|
stackleak_task_init(p);
|
|
|
|
if (pid != &init_struct_pid) {
|
|
pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid,
|
|
args->set_tid_size);
|
|
if (IS_ERR(pid)) {
|
|
retval = PTR_ERR(pid);
|
|
goto bad_fork_cleanup_thread;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This has to happen after we've potentially unshared the file
|
|
* descriptor table (so that the pidfd doesn't leak into the child
|
|
* if the fd table isn't shared).
|
|
*/
|
|
if (clone_flags & CLONE_PIDFD) {
|
|
retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
|
|
if (retval < 0)
|
|
goto bad_fork_free_pid;
|
|
|
|
pidfd = retval;
|
|
|
|
pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
|
|
O_RDWR | O_CLOEXEC);
|
|
if (IS_ERR(pidfile)) {
|
|
put_unused_fd(pidfd);
|
|
retval = PTR_ERR(pidfile);
|
|
goto bad_fork_free_pid;
|
|
}
|
|
get_pid(pid); /* held by pidfile now */
|
|
|
|
retval = put_user(pidfd, args->pidfd);
|
|
if (retval)
|
|
goto bad_fork_put_pidfd;
|
|
}
|
|
|
|
#ifdef CONFIG_BLOCK
|
|
p->plug = NULL;
|
|
#endif
|
|
futex_init_task(p);
|
|
|
|
/*
|
|
* sigaltstack should be cleared when sharing the same VM
|
|
*/
|
|
if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
|
|
sas_ss_reset(p);
|
|
|
|
/*
|
|
* Syscall tracing and stepping should be turned off in the
|
|
* child regardless of CLONE_PTRACE.
|
|
*/
|
|
user_disable_single_step(p);
|
|
clear_task_syscall_work(p, SYSCALL_TRACE);
|
|
#if defined(CONFIG_GENERIC_ENTRY) || defined(TIF_SYSCALL_EMU)
|
|
clear_task_syscall_work(p, SYSCALL_EMU);
|
|
#endif
|
|
clear_tsk_latency_tracing(p);
|
|
|
|
/* ok, now we should be set up.. */
|
|
p->pid = pid_nr(pid);
|
|
if (clone_flags & CLONE_THREAD) {
|
|
p->group_leader = current->group_leader;
|
|
p->tgid = current->tgid;
|
|
} else {
|
|
p->group_leader = p;
|
|
p->tgid = p->pid;
|
|
}
|
|
|
|
p->nr_dirtied = 0;
|
|
p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
|
|
p->dirty_paused_when = 0;
|
|
|
|
p->pdeath_signal = 0;
|
|
INIT_LIST_HEAD(&p->thread_group);
|
|
p->task_works = NULL;
|
|
clear_posix_cputimers_work(p);
|
|
|
|
#ifdef CONFIG_KRETPROBES
|
|
p->kretprobe_instances.first = NULL;
|
|
#endif
|
|
#ifdef CONFIG_RETHOOK
|
|
p->rethooks.first = NULL;
|
|
#endif
|
|
|
|
/*
|
|
* Ensure that the cgroup subsystem policies allow the new process to be
|
|
* forked. It should be noted that the new process's css_set can be changed
|
|
* between here and cgroup_post_fork() if an organisation operation is in
|
|
* progress.
|
|
*/
|
|
retval = cgroup_can_fork(p, args);
|
|
if (retval)
|
|
goto bad_fork_put_pidfd;
|
|
|
|
/*
|
|
* Now that the cgroups are pinned, re-clone the parent cgroup and put
|
|
* the new task on the correct runqueue. All this *before* the task
|
|
* becomes visible.
|
|
*
|
|
* This isn't part of ->can_fork() because while the re-cloning is
|
|
* cgroup specific, it unconditionally needs to place the task on a
|
|
* runqueue.
|
|
*/
|
|
sched_cgroup_fork(p, args);
|
|
|
|
/*
|
|
* From this point on we must avoid any synchronous user-space
|
|
* communication until we take the tasklist-lock. In particular, we do
|
|
* not want user-space to be able to predict the process start-time by
|
|
* stalling fork(2) after we recorded the start_time but before it is
|
|
* visible to the system.
|
|
*/
|
|
|
|
p->start_time = ktime_get_ns();
|
|
p->start_boottime = ktime_get_boottime_ns();
|
|
|
|
/*
|
|
* Make it visible to the rest of the system, but dont wake it up yet.
|
|
* Need tasklist lock for parent etc handling!
|
|
*/
|
|
write_lock_irq(&tasklist_lock);
|
|
|
|
/* CLONE_PARENT re-uses the old parent */
|
|
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
|
|
p->real_parent = current->real_parent;
|
|
p->parent_exec_id = current->parent_exec_id;
|
|
if (clone_flags & CLONE_THREAD)
|
|
p->exit_signal = -1;
|
|
else
|
|
p->exit_signal = current->group_leader->exit_signal;
|
|
} else {
|
|
p->real_parent = current;
|
|
p->parent_exec_id = current->self_exec_id;
|
|
p->exit_signal = args->exit_signal;
|
|
}
|
|
|
|
klp_copy_process(p);
|
|
|
|
sched_core_fork(p);
|
|
|
|
spin_lock(¤t->sighand->siglock);
|
|
|
|
rv_task_fork(p);
|
|
|
|
rseq_fork(p, clone_flags);
|
|
|
|
/* Don't start children in a dying pid namespace */
|
|
if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
|
|
retval = -ENOMEM;
|
|
goto bad_fork_cancel_cgroup;
|
|
}
|
|
|
|
/* Let kill terminate clone/fork in the middle */
|
|
if (fatal_signal_pending(current)) {
|
|
retval = -EINTR;
|
|
goto bad_fork_cancel_cgroup;
|
|
}
|
|
|
|
/* No more failure paths after this point. */
|
|
|
|
/*
|
|
* Copy seccomp details explicitly here, in case they were changed
|
|
* before holding sighand lock.
|
|
*/
|
|
copy_seccomp(p);
|
|
|
|
init_task_pid_links(p);
|
|
if (likely(p->pid)) {
|
|
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
|
|
|
|
init_task_pid(p, PIDTYPE_PID, pid);
|
|
if (thread_group_leader(p)) {
|
|
init_task_pid(p, PIDTYPE_TGID, pid);
|
|
init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
|
|
init_task_pid(p, PIDTYPE_SID, task_session(current));
|
|
|
|
if (is_child_reaper(pid)) {
|
|
ns_of_pid(pid)->child_reaper = p;
|
|
p->signal->flags |= SIGNAL_UNKILLABLE;
|
|
}
|
|
p->signal->shared_pending.signal = delayed.signal;
|
|
p->signal->tty = tty_kref_get(current->signal->tty);
|
|
/*
|
|
* Inherit has_child_subreaper flag under the same
|
|
* tasklist_lock with adding child to the process tree
|
|
* for propagate_has_child_subreaper optimization.
|
|
*/
|
|
p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
|
|
p->real_parent->signal->is_child_subreaper;
|
|
list_add_tail(&p->sibling, &p->real_parent->children);
|
|
list_add_tail_rcu(&p->tasks, &init_task.tasks);
|
|
attach_pid(p, PIDTYPE_TGID);
|
|
attach_pid(p, PIDTYPE_PGID);
|
|
attach_pid(p, PIDTYPE_SID);
|
|
__this_cpu_inc(process_counts);
|
|
} else {
|
|
current->signal->nr_threads++;
|
|
current->signal->quick_threads++;
|
|
atomic_inc(¤t->signal->live);
|
|
refcount_inc(¤t->signal->sigcnt);
|
|
task_join_group_stop(p);
|
|
list_add_tail_rcu(&p->thread_group,
|
|
&p->group_leader->thread_group);
|
|
list_add_tail_rcu(&p->thread_node,
|
|
&p->signal->thread_head);
|
|
}
|
|
attach_pid(p, PIDTYPE_PID);
|
|
nr_threads++;
|
|
}
|
|
total_forks++;
|
|
hlist_del_init(&delayed.node);
|
|
spin_unlock(¤t->sighand->siglock);
|
|
syscall_tracepoint_update(p);
|
|
write_unlock_irq(&tasklist_lock);
|
|
|
|
if (pidfile)
|
|
fd_install(pidfd, pidfile);
|
|
|
|
proc_fork_connector(p);
|
|
sched_post_fork(p);
|
|
cgroup_post_fork(p, args);
|
|
perf_event_fork(p);
|
|
|
|
trace_task_newtask(p, clone_flags);
|
|
uprobe_copy_process(p, clone_flags);
|
|
|
|
copy_oom_score_adj(clone_flags, p);
|
|
|
|
return p;
|
|
|
|
bad_fork_cancel_cgroup:
|
|
sched_core_free(p);
|
|
spin_unlock(¤t->sighand->siglock);
|
|
write_unlock_irq(&tasklist_lock);
|
|
cgroup_cancel_fork(p, args);
|
|
bad_fork_put_pidfd:
|
|
if (clone_flags & CLONE_PIDFD) {
|
|
fput(pidfile);
|
|
put_unused_fd(pidfd);
|
|
}
|
|
bad_fork_free_pid:
|
|
if (pid != &init_struct_pid)
|
|
free_pid(pid);
|
|
bad_fork_cleanup_thread:
|
|
exit_thread(p);
|
|
bad_fork_cleanup_io:
|
|
if (p->io_context)
|
|
exit_io_context(p);
|
|
bad_fork_cleanup_namespaces:
|
|
exit_task_namespaces(p);
|
|
bad_fork_cleanup_mm:
|
|
if (p->mm) {
|
|
mm_clear_owner(p->mm, p);
|
|
mmput(p->mm);
|
|
}
|
|
bad_fork_cleanup_signal:
|
|
if (!(clone_flags & CLONE_THREAD))
|
|
free_signal_struct(p->signal);
|
|
bad_fork_cleanup_sighand:
|
|
__cleanup_sighand(p->sighand);
|
|
bad_fork_cleanup_fs:
|
|
exit_fs(p); /* blocking */
|
|
bad_fork_cleanup_files:
|
|
exit_files(p); /* blocking */
|
|
bad_fork_cleanup_semundo:
|
|
exit_sem(p);
|
|
bad_fork_cleanup_security:
|
|
security_task_free(p);
|
|
bad_fork_cleanup_audit:
|
|
audit_free(p);
|
|
bad_fork_cleanup_perf:
|
|
perf_event_free_task(p);
|
|
bad_fork_cleanup_policy:
|
|
lockdep_free_task(p);
|
|
#ifdef CONFIG_NUMA
|
|
mpol_put(p->mempolicy);
|
|
#endif
|
|
bad_fork_cleanup_delayacct:
|
|
delayacct_tsk_free(p);
|
|
bad_fork_cleanup_count:
|
|
dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
|
|
exit_creds(p);
|
|
bad_fork_free:
|
|
WRITE_ONCE(p->__state, TASK_DEAD);
|
|
exit_task_stack_account(p);
|
|
put_task_stack(p);
|
|
delayed_free_task(p);
|
|
fork_out:
|
|
spin_lock_irq(¤t->sighand->siglock);
|
|
hlist_del_init(&delayed.node);
|
|
spin_unlock_irq(¤t->sighand->siglock);
|
|
return ERR_PTR(retval);
|
|
}
|
|
|
|
static inline void init_idle_pids(struct task_struct *idle)
|
|
{
|
|
enum pid_type type;
|
|
|
|
for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
|
|
INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
|
|
init_task_pid(idle, type, &init_struct_pid);
|
|
}
|
|
}
|
|
|
|
static int idle_dummy(void *dummy)
|
|
{
|
|
/* This function is never called */
|
|
return 0;
|
|
}
|
|
|
|
struct task_struct * __init fork_idle(int cpu)
|
|
{
|
|
struct task_struct *task;
|
|
struct kernel_clone_args args = {
|
|
.flags = CLONE_VM,
|
|
.fn = &idle_dummy,
|
|
.fn_arg = NULL,
|
|
.kthread = 1,
|
|
.idle = 1,
|
|
};
|
|
|
|
task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
|
|
if (!IS_ERR(task)) {
|
|
init_idle_pids(task);
|
|
init_idle(task, cpu);
|
|
}
|
|
|
|
return task;
|
|
}
|
|
|
|
struct mm_struct *copy_init_mm(void)
|
|
{
|
|
return dup_mm(NULL, &init_mm);
|
|
}
|
|
|
|
/*
|
|
* This is like kernel_clone(), but shaved down and tailored to just
|
|
* creating io_uring workers. It returns a created task, or an error pointer.
|
|
* The returned task is inactive, and the caller must fire it up through
|
|
* wake_up_new_task(p). All signals are blocked in the created task.
|
|
*/
|
|
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
|
|
{
|
|
unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
|
|
CLONE_IO;
|
|
struct kernel_clone_args args = {
|
|
.flags = ((lower_32_bits(flags) | CLONE_VM |
|
|
CLONE_UNTRACED) & ~CSIGNAL),
|
|
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
|
|
.fn = fn,
|
|
.fn_arg = arg,
|
|
.io_thread = 1,
|
|
};
|
|
|
|
return copy_process(NULL, 0, node, &args);
|
|
}
|
|
|
|
/*
|
|
* Ok, this is the main fork-routine.
|
|
*
|
|
* It copies the process, and if successful kick-starts
|
|
* it and waits for it to finish using the VM if required.
|
|
*
|
|
* args->exit_signal is expected to be checked for sanity by the caller.
|
|
*/
|
|
pid_t kernel_clone(struct kernel_clone_args *args)
|
|
{
|
|
u64 clone_flags = args->flags;
|
|
struct completion vfork;
|
|
struct pid *pid;
|
|
struct task_struct *p;
|
|
int trace = 0;
|
|
pid_t nr;
|
|
|
|
/*
|
|
* For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument
|
|
* to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are
|
|
* mutually exclusive. With clone3() CLONE_PIDFD has grown a separate
|
|
* field in struct clone_args and it still doesn't make sense to have
|
|
* them both point at the same memory location. Performing this check
|
|
* here has the advantage that we don't need to have a separate helper
|
|
* to check for legacy clone().
|
|
*/
|
|
if ((args->flags & CLONE_PIDFD) &&
|
|
(args->flags & CLONE_PARENT_SETTID) &&
|
|
(args->pidfd == args->parent_tid))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Determine whether and which event to report to ptracer. When
|
|
* called from kernel_thread or CLONE_UNTRACED is explicitly
|
|
* requested, no event is reported; otherwise, report if the event
|
|
* for the type of forking is enabled.
|
|
*/
|
|
if (!(clone_flags & CLONE_UNTRACED)) {
|
|
if (clone_flags & CLONE_VFORK)
|
|
trace = PTRACE_EVENT_VFORK;
|
|
else if (args->exit_signal != SIGCHLD)
|
|
trace = PTRACE_EVENT_CLONE;
|
|
else
|
|
trace = PTRACE_EVENT_FORK;
|
|
|
|
if (likely(!ptrace_event_enabled(current, trace)))
|
|
trace = 0;
|
|
}
|
|
|
|
p = copy_process(NULL, trace, NUMA_NO_NODE, args);
|
|
add_latent_entropy();
|
|
|
|
if (IS_ERR(p))
|
|
return PTR_ERR(p);
|
|
|
|
cpufreq_task_times_alloc(p);
|
|
|
|
/*
|
|
* Do this prior waking up the new thread - the thread pointer
|
|
* might get invalid after that point, if the thread exits quickly.
|
|
*/
|
|
trace_sched_process_fork(current, p);
|
|
|
|
pid = get_task_pid(p, PIDTYPE_PID);
|
|
nr = pid_vnr(pid);
|
|
|
|
if (clone_flags & CLONE_PARENT_SETTID)
|
|
put_user(nr, args->parent_tid);
|
|
|
|
if (clone_flags & CLONE_VFORK) {
|
|
p->vfork_done = &vfork;
|
|
init_completion(&vfork);
|
|
get_task_struct(p);
|
|
}
|
|
|
|
if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
|
|
/* lock the task to synchronize with memcg migration */
|
|
task_lock(p);
|
|
lru_gen_add_mm(p->mm);
|
|
task_unlock(p);
|
|
}
|
|
|
|
wake_up_new_task(p);
|
|
|
|
/* forking complete and child started to run, tell ptracer */
|
|
if (unlikely(trace))
|
|
ptrace_event_pid(trace, pid);
|
|
|
|
if (clone_flags & CLONE_VFORK) {
|
|
if (!wait_for_vfork_done(p, &vfork))
|
|
ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
|
|
}
|
|
|
|
put_pid(pid);
|
|
return nr;
|
|
}
|
|
|
|
/*
|
|
* Create a kernel thread.
|
|
*/
|
|
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
|
|
{
|
|
struct kernel_clone_args args = {
|
|
.flags = ((lower_32_bits(flags) | CLONE_VM |
|
|
CLONE_UNTRACED) & ~CSIGNAL),
|
|
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
|
|
.fn = fn,
|
|
.fn_arg = arg,
|
|
.kthread = 1,
|
|
};
|
|
|
|
return kernel_clone(&args);
|
|
}
|
|
|
|
/*
|
|
* Create a user mode thread.
|
|
*/
|
|
pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags)
|
|
{
|
|
struct kernel_clone_args args = {
|
|
.flags = ((lower_32_bits(flags) | CLONE_VM |
|
|
CLONE_UNTRACED) & ~CSIGNAL),
|
|
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
|
|
.fn = fn,
|
|
.fn_arg = arg,
|
|
};
|
|
|
|
return kernel_clone(&args);
|
|
}
|
|
|
|
#ifdef __ARCH_WANT_SYS_FORK
|
|
SYSCALL_DEFINE0(fork)
|
|
{
|
|
#ifdef CONFIG_MMU
|
|
struct kernel_clone_args args = {
|
|
.exit_signal = SIGCHLD,
|
|
};
|
|
|
|
return kernel_clone(&args);
|
|
#else
|
|
/* can not support in nommu mode */
|
|
return -EINVAL;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
#ifdef __ARCH_WANT_SYS_VFORK
|
|
SYSCALL_DEFINE0(vfork)
|
|
{
|
|
struct kernel_clone_args args = {
|
|
.flags = CLONE_VFORK | CLONE_VM,
|
|
.exit_signal = SIGCHLD,
|
|
};
|
|
|
|
return kernel_clone(&args);
|
|
}
|
|
#endif
|
|
|
|
#ifdef __ARCH_WANT_SYS_CLONE
|
|
#ifdef CONFIG_CLONE_BACKWARDS
|
|
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
|
|
int __user *, parent_tidptr,
|
|
unsigned long, tls,
|
|
int __user *, child_tidptr)
|
|
#elif defined(CONFIG_CLONE_BACKWARDS2)
|
|
SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
|
|
int __user *, parent_tidptr,
|
|
int __user *, child_tidptr,
|
|
unsigned long, tls)
|
|
#elif defined(CONFIG_CLONE_BACKWARDS3)
|
|
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
|
|
int, stack_size,
|
|
int __user *, parent_tidptr,
|
|
int __user *, child_tidptr,
|
|
unsigned long, tls)
|
|
#else
|
|
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
|
|
int __user *, parent_tidptr,
|
|
int __user *, child_tidptr,
|
|
unsigned long, tls)
|
|
#endif
|
|
{
|
|
struct kernel_clone_args args = {
|
|
.flags = (lower_32_bits(clone_flags) & ~CSIGNAL),
|
|
.pidfd = parent_tidptr,
|
|
.child_tid = child_tidptr,
|
|
.parent_tid = parent_tidptr,
|
|
.exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
|
|
.stack = newsp,
|
|
.tls = tls,
|
|
};
|
|
|
|
return kernel_clone(&args);
|
|
}
|
|
#endif
|
|
|
|
#ifdef __ARCH_WANT_SYS_CLONE3
|
|
|
|
noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
|
|
struct clone_args __user *uargs,
|
|
size_t usize)
|
|
{
|
|
int err;
|
|
struct clone_args args;
|
|
pid_t *kset_tid = kargs->set_tid;
|
|
|
|
BUILD_BUG_ON(offsetofend(struct clone_args, tls) !=
|
|
CLONE_ARGS_SIZE_VER0);
|
|
BUILD_BUG_ON(offsetofend(struct clone_args, set_tid_size) !=
|
|
CLONE_ARGS_SIZE_VER1);
|
|
BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) !=
|
|
CLONE_ARGS_SIZE_VER2);
|
|
BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2);
|
|
|
|
if (unlikely(usize > PAGE_SIZE))
|
|
return -E2BIG;
|
|
if (unlikely(usize < CLONE_ARGS_SIZE_VER0))
|
|
return -EINVAL;
|
|
|
|
err = copy_struct_from_user(&args, sizeof(args), uargs, usize);
|
|
if (err)
|
|
return err;
|
|
|
|
if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL))
|
|
return -EINVAL;
|
|
|
|
if (unlikely(!args.set_tid && args.set_tid_size > 0))
|
|
return -EINVAL;
|
|
|
|
if (unlikely(args.set_tid && args.set_tid_size == 0))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Verify that higher 32bits of exit_signal are unset and that
|
|
* it is a valid signal
|
|
*/
|
|
if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) ||
|
|
!valid_signal(args.exit_signal)))
|
|
return -EINVAL;
|
|
|
|
if ((args.flags & CLONE_INTO_CGROUP) &&
|
|
(args.cgroup > INT_MAX || usize < CLONE_ARGS_SIZE_VER2))
|
|
return -EINVAL;
|
|
|
|
*kargs = (struct kernel_clone_args){
|
|
.flags = args.flags,
|
|
.pidfd = u64_to_user_ptr(args.pidfd),
|
|
.child_tid = u64_to_user_ptr(args.child_tid),
|
|
.parent_tid = u64_to_user_ptr(args.parent_tid),
|
|
.exit_signal = args.exit_signal,
|
|
.stack = args.stack,
|
|
.stack_size = args.stack_size,
|
|
.tls = args.tls,
|
|
.set_tid_size = args.set_tid_size,
|
|
.cgroup = args.cgroup,
|
|
};
|
|
|
|
if (args.set_tid &&
|
|
copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid),
|
|
(kargs->set_tid_size * sizeof(pid_t))))
|
|
return -EFAULT;
|
|
|
|
kargs->set_tid = kset_tid;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* clone3_stack_valid - check and prepare stack
|
|
* @kargs: kernel clone args
|
|
*
|
|
* Verify that the stack arguments userspace gave us are sane.
|
|
* In addition, set the stack direction for userspace since it's easy for us to
|
|
* determine.
|
|
*/
|
|
static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
|
|
{
|
|
if (kargs->stack == 0) {
|
|
if (kargs->stack_size > 0)
|
|
return false;
|
|
} else {
|
|
if (kargs->stack_size == 0)
|
|
return false;
|
|
|
|
if (!access_ok((void __user *)kargs->stack, kargs->stack_size))
|
|
return false;
|
|
|
|
#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64)
|
|
kargs->stack += kargs->stack_size;
|
|
#endif
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool clone3_args_valid(struct kernel_clone_args *kargs)
|
|
{
|
|
/* Verify that no unknown flags are passed along. */
|
|
if (kargs->flags &
|
|
~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
|
|
return false;
|
|
|
|
/*
|
|
* - make the CLONE_DETACHED bit reusable for clone3
|
|
* - make the CSIGNAL bits reusable for clone3
|
|
*/
|
|
if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
|
|
return false;
|
|
|
|
if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) ==
|
|
(CLONE_SIGHAND | CLONE_CLEAR_SIGHAND))
|
|
return false;
|
|
|
|
if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) &&
|
|
kargs->exit_signal)
|
|
return false;
|
|
|
|
if (!clone3_stack_valid(kargs))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* clone3 - create a new process with specific properties
|
|
* @uargs: argument structure
|
|
* @size: size of @uargs
|
|
*
|
|
* clone3() is the extensible successor to clone()/clone2().
|
|
* It takes a struct as argument that is versioned by its size.
|
|
*
|
|
* Return: On success, a positive PID for the child process.
|
|
* On error, a negative errno number.
|
|
*/
|
|
SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
|
|
{
|
|
int err;
|
|
|
|
struct kernel_clone_args kargs;
|
|
pid_t set_tid[MAX_PID_NS_LEVEL];
|
|
|
|
kargs.set_tid = set_tid;
|
|
|
|
err = copy_clone_args_from_user(&kargs, uargs, size);
|
|
if (err)
|
|
return err;
|
|
|
|
if (!clone3_args_valid(&kargs))
|
|
return -EINVAL;
|
|
|
|
return kernel_clone(&kargs);
|
|
}
|
|
#endif
|
|
|
|
void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data)
|
|
{
|
|
struct task_struct *leader, *parent, *child;
|
|
int res;
|
|
|
|
read_lock(&tasklist_lock);
|
|
leader = top = top->group_leader;
|
|
down:
|
|
for_each_thread(leader, parent) {
|
|
list_for_each_entry(child, &parent->children, sibling) {
|
|
res = visitor(child, data);
|
|
if (res) {
|
|
if (res < 0)
|
|
goto out;
|
|
leader = child;
|
|
goto down;
|
|
}
|
|
up:
|
|
;
|
|
}
|
|
}
|
|
|
|
if (leader != top) {
|
|
child = leader;
|
|
parent = child->real_parent;
|
|
leader = parent->group_leader;
|
|
goto up;
|
|
}
|
|
out:
|
|
read_unlock(&tasklist_lock);
|
|
}
|
|
|
|
#ifndef ARCH_MIN_MMSTRUCT_ALIGN
|
|
#define ARCH_MIN_MMSTRUCT_ALIGN 0
|
|
#endif
|
|
|
|
static void sighand_ctor(void *data)
|
|
{
|
|
struct sighand_struct *sighand = data;
|
|
|
|
spin_lock_init(&sighand->siglock);
|
|
init_waitqueue_head(&sighand->signalfd_wqh);
|
|
}
|
|
|
|
void __init proc_caches_init(void)
|
|
{
|
|
unsigned int mm_size;
|
|
|
|
sighand_cachep = kmem_cache_create("sighand_cache",
|
|
sizeof(struct sighand_struct), 0,
|
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
|
|
SLAB_ACCOUNT, sighand_ctor);
|
|
signal_cachep = kmem_cache_create("signal_cache",
|
|
sizeof(struct signal_struct), 0,
|
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
|
NULL);
|
|
files_cachep = kmem_cache_create("files_cache",
|
|
sizeof(struct files_struct), 0,
|
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
|
NULL);
|
|
fs_cachep = kmem_cache_create("fs_cache",
|
|
sizeof(struct fs_struct), 0,
|
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
|
NULL);
|
|
|
|
/*
|
|
* The mm_cpumask is located at the end of mm_struct, and is
|
|
* dynamically sized based on the maximum CPU number this system
|
|
* can have, taking hotplug into account (nr_cpu_ids).
|
|
*/
|
|
mm_size = sizeof(struct mm_struct) + cpumask_size();
|
|
|
|
mm_cachep = kmem_cache_create_usercopy("mm_struct",
|
|
mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
|
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
|
offsetof(struct mm_struct, saved_auxv),
|
|
sizeof_field(struct mm_struct, saved_auxv),
|
|
NULL);
|
|
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
|
|
mmap_init();
|
|
nsproxy_cache_init();
|
|
}
|
|
|
|
/*
|
|
* Check constraints on flags passed to the unshare system call.
|
|
*/
|
|
static int check_unshare_flags(unsigned long unshare_flags)
|
|
{
|
|
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
|
|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
|
|
CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
|
|
CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP|
|
|
CLONE_NEWTIME))
|
|
return -EINVAL;
|
|
/*
|
|
* Not implemented, but pretend it works if there is nothing
|
|
* to unshare. Note that unsharing the address space or the
|
|
* signal handlers also need to unshare the signal queues (aka
|
|
* CLONE_THREAD).
|
|
*/
|
|
if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
|
|
if (!thread_group_empty(current))
|
|
return -EINVAL;
|
|
}
|
|
if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
|
|
if (refcount_read(¤t->sighand->count) > 1)
|
|
return -EINVAL;
|
|
}
|
|
if (unshare_flags & CLONE_VM) {
|
|
if (!current_is_single_threaded())
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Unshare the filesystem structure if it is being shared
|
|
*/
|
|
static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
|
|
{
|
|
struct fs_struct *fs = current->fs;
|
|
|
|
if (!(unshare_flags & CLONE_FS) || !fs)
|
|
return 0;
|
|
|
|
/* don't need lock here; in the worst case we'll do useless copy */
|
|
if (fs->users == 1)
|
|
return 0;
|
|
|
|
*new_fsp = copy_fs_struct(fs);
|
|
if (!*new_fsp)
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Unshare file descriptor table if it is being shared
|
|
*/
|
|
int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
|
|
struct files_struct **new_fdp)
|
|
{
|
|
struct files_struct *fd = current->files;
|
|
int error = 0;
|
|
|
|
if ((unshare_flags & CLONE_FILES) &&
|
|
(fd && atomic_read(&fd->count) > 1)) {
|
|
*new_fdp = dup_fd(fd, max_fds, &error);
|
|
if (!*new_fdp)
|
|
return error;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* unshare allows a process to 'unshare' part of the process
|
|
* context which was originally shared using clone. copy_*
|
|
* functions used by kernel_clone() cannot be used here directly
|
|
* because they modify an inactive task_struct that is being
|
|
* constructed. Here we are modifying the current, active,
|
|
* task_struct.
|
|
*/
|
|
int ksys_unshare(unsigned long unshare_flags)
|
|
{
|
|
struct fs_struct *fs, *new_fs = NULL;
|
|
struct files_struct *new_fd = NULL;
|
|
struct cred *new_cred = NULL;
|
|
struct nsproxy *new_nsproxy = NULL;
|
|
int do_sysvsem = 0;
|
|
int err;
|
|
|
|
/*
|
|
* If unsharing a user namespace must also unshare the thread group
|
|
* and unshare the filesystem root and working directories.
|
|
*/
|
|
if (unshare_flags & CLONE_NEWUSER)
|
|
unshare_flags |= CLONE_THREAD | CLONE_FS;
|
|
/*
|
|
* If unsharing vm, must also unshare signal handlers.
|
|
*/
|
|
if (unshare_flags & CLONE_VM)
|
|
unshare_flags |= CLONE_SIGHAND;
|
|
/*
|
|
* If unsharing a signal handlers, must also unshare the signal queues.
|
|
*/
|
|
if (unshare_flags & CLONE_SIGHAND)
|
|
unshare_flags |= CLONE_THREAD;
|
|
/*
|
|
* If unsharing namespace, must also unshare filesystem information.
|
|
*/
|
|
if (unshare_flags & CLONE_NEWNS)
|
|
unshare_flags |= CLONE_FS;
|
|
|
|
err = check_unshare_flags(unshare_flags);
|
|
if (err)
|
|
goto bad_unshare_out;
|
|
/*
|
|
* CLONE_NEWIPC must also detach from the undolist: after switching
|
|
* to a new ipc namespace, the semaphore arrays from the old
|
|
* namespace are unreachable.
|
|
*/
|
|
if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
|
|
do_sysvsem = 1;
|
|
err = unshare_fs(unshare_flags, &new_fs);
|
|
if (err)
|
|
goto bad_unshare_out;
|
|
err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
|
|
if (err)
|
|
goto bad_unshare_cleanup_fs;
|
|
err = unshare_userns(unshare_flags, &new_cred);
|
|
if (err)
|
|
goto bad_unshare_cleanup_fd;
|
|
err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
|
|
new_cred, new_fs);
|
|
if (err)
|
|
goto bad_unshare_cleanup_cred;
|
|
|
|
if (new_cred) {
|
|
err = set_cred_ucounts(new_cred);
|
|
if (err)
|
|
goto bad_unshare_cleanup_cred;
|
|
}
|
|
|
|
if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
|
|
if (do_sysvsem) {
|
|
/*
|
|
* CLONE_SYSVSEM is equivalent to sys_exit().
|
|
*/
|
|
exit_sem(current);
|
|
}
|
|
if (unshare_flags & CLONE_NEWIPC) {
|
|
/* Orphan segments in old ns (see sem above). */
|
|
exit_shm(current);
|
|
shm_init_task(current);
|
|
}
|
|
|
|
if (new_nsproxy)
|
|
switch_task_namespaces(current, new_nsproxy);
|
|
|
|
task_lock(current);
|
|
|
|
if (new_fs) {
|
|
fs = current->fs;
|
|
spin_lock(&fs->lock);
|
|
current->fs = new_fs;
|
|
if (--fs->users)
|
|
new_fs = NULL;
|
|
else
|
|
new_fs = fs;
|
|
spin_unlock(&fs->lock);
|
|
}
|
|
|
|
if (new_fd)
|
|
swap(current->files, new_fd);
|
|
|
|
task_unlock(current);
|
|
|
|
if (new_cred) {
|
|
/* Install the new user namespace */
|
|
commit_creds(new_cred);
|
|
new_cred = NULL;
|
|
}
|
|
}
|
|
|
|
perf_event_namespaces(current);
|
|
|
|
bad_unshare_cleanup_cred:
|
|
if (new_cred)
|
|
put_cred(new_cred);
|
|
bad_unshare_cleanup_fd:
|
|
if (new_fd)
|
|
put_files_struct(new_fd);
|
|
|
|
bad_unshare_cleanup_fs:
|
|
if (new_fs)
|
|
free_fs_struct(new_fs);
|
|
|
|
bad_unshare_out:
|
|
return err;
|
|
}
|
|
|
|
SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
|
|
{
|
|
return ksys_unshare(unshare_flags);
|
|
}
|
|
|
|
/*
|
|
* Helper to unshare the files of the current task.
|
|
* We don't want to expose copy_files internals to
|
|
* the exec layer of the kernel.
|
|
*/
|
|
|
|
int unshare_files(void)
|
|
{
|
|
struct task_struct *task = current;
|
|
struct files_struct *old, *copy = NULL;
|
|
int error;
|
|
|
|
error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, ©);
|
|
if (error || !copy)
|
|
return error;
|
|
|
|
old = task->files;
|
|
task_lock(task);
|
|
task->files = copy;
|
|
task_unlock(task);
|
|
put_files_struct(old);
|
|
return 0;
|
|
}
|
|
|
|
int sysctl_max_threads(struct ctl_table *table, int write,
|
|
void *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
struct ctl_table t;
|
|
int ret;
|
|
int threads = max_threads;
|
|
int min = 1;
|
|
int max = MAX_THREADS;
|
|
|
|
t = *table;
|
|
t.data = &threads;
|
|
t.extra1 = &min;
|
|
t.extra2 = &max;
|
|
|
|
ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
|
|
if (ret || !write)
|
|
return ret;
|
|
|
|
max_threads = threads;
|
|
|
|
return 0;
|
|
}
|