From 2cf002d194977c4ec8848496a9a9804a317099dd Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Tue, 2 Jun 2020 18:15:16 -0300 Subject: [PATCH 001/433] apparmor: check/put label on apparmor_sk_clone_security() Currently apparmor_sk_clone_security() does not check for existing label/peer in the 'new' struct sock; it just overwrites it, if any (with another reference to the label of the source sock.) static void apparmor_sk_clone_security(const struct sock *sk, struct sock *newsk) { struct aa_sk_ctx *ctx = SK_CTX(sk); struct aa_sk_ctx *new = SK_CTX(newsk); new->label = aa_get_label(ctx->label); new->peer = aa_get_label(ctx->peer); } This might leak label references, which might overflow under load. Thus, check for and put labels, to prevent such errors. Note this is similarly done on: static int apparmor_socket_post_create(struct socket *sock, ...) ... if (sock->sk) { struct aa_sk_ctx *ctx = SK_CTX(sock->sk); aa_put_label(ctx->label); ctx->label = aa_get_label(label); } ... Context: ------- The label reference count leak is observed if apparmor_sock_graft() is called previously: this sets the 'ctx->label' field by getting a reference to the current label (later overwritten, without put.) static void apparmor_sock_graft(struct sock *sk, ...) { struct aa_sk_ctx *ctx = SK_CTX(sk); if (!ctx->label) ctx->label = aa_get_current_label(); } And that is the case on crypto/af_alg.c:af_alg_accept(): int af_alg_accept(struct sock *sk, struct socket *newsock, ...) ... struct sock *sk2; ... sk2 = sk_alloc(...); ... security_sock_graft(sk2, newsock); security_sk_clone(sk, sk2); ... Apparently both calls are done on their own right, especially for other LSMs, being introduced in 2010/2014, before apparmor socket mediation in 2017 (see commits [1,2,3,4]). So, it looks OK there! Let's fix the reference leak in apparmor. Test-case: --------- Exercise that code path enough to overflow label reference count. $ cat aa-refcnt-af_alg.c #include #include #include #include #include int main() { int sockfd; struct sockaddr_alg sa; /* Setup the crypto API socket */ sockfd = socket(AF_ALG, SOCK_SEQPACKET, 0); if (sockfd < 0) { perror("socket"); return 1; } memset(&sa, 0, sizeof(sa)); sa.salg_family = AF_ALG; strcpy((char *) sa.salg_type, "rng"); strcpy((char *) sa.salg_name, "stdrng"); if (bind(sockfd, (struct sockaddr *) &sa, sizeof(sa)) < 0) { perror("bind"); return 1; } /* Accept a "connection" and close it; repeat. */ while (!close(accept(sockfd, NULL, 0))); return 0; } $ gcc -o aa-refcnt-af_alg aa-refcnt-af_alg.c $ ./aa-refcnt-af_alg [ 9928.475953] refcount_t overflow at apparmor_sk_clone_security+0x37/0x70 in aa-refcnt-af_alg[1322], uid/euid: 1000/1000 ... [ 9928.507443] RIP: 0010:apparmor_sk_clone_security+0x37/0x70 ... [ 9928.514286] security_sk_clone+0x33/0x50 [ 9928.514807] af_alg_accept+0x81/0x1c0 [af_alg] [ 9928.516091] alg_accept+0x15/0x20 [af_alg] [ 9928.516682] SYSC_accept4+0xff/0x210 [ 9928.519609] SyS_accept+0x10/0x20 [ 9928.520190] do_syscall_64+0x73/0x130 [ 9928.520808] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Note that other messages may be seen, not just overflow, depending on the value being incremented by kref_get(); on another run: [ 7273.182666] refcount_t: saturated; leaking memory. ... [ 7273.185789] refcount_t: underflow; use-after-free. Kprobes: ------- Using kprobe events to monitor sk -> sk_security -> label -> count (kref): Original v5.7 (one reference leak every iteration) ... (af_alg_accept+0x0/0x1c0) label=0xffff8a0f36c25eb0 label_refcnt=0x11fd2 ... (af_alg_release_parent+0x0/0xd0) label=0xffff8a0f36c25eb0 label_refcnt=0x11fd4 ... (af_alg_accept+0x0/0x1c0) label=0xffff8a0f36c25eb0 label_refcnt=0x11fd3 ... (af_alg_release_parent+0x0/0xd0) label=0xffff8a0f36c25eb0 label_refcnt=0x11fd5 ... (af_alg_accept+0x0/0x1c0) label=0xffff8a0f36c25eb0 label_refcnt=0x11fd4 ... (af_alg_release_parent+0x0/0xd0) label=0xffff8a0f36c25eb0 label_refcnt=0x11fd6 Patched v5.7 (zero reference leak per iteration) ... (af_alg_accept+0x0/0x1c0) label=0xffff9ff376c25eb0 label_refcnt=0x593 ... (af_alg_release_parent+0x0/0xd0) label=0xffff9ff376c25eb0 label_refcnt=0x594 ... (af_alg_accept+0x0/0x1c0) label=0xffff9ff376c25eb0 label_refcnt=0x593 ... (af_alg_release_parent+0x0/0xd0) label=0xffff9ff376c25eb0 label_refcnt=0x594 ... (af_alg_accept+0x0/0x1c0) label=0xffff9ff376c25eb0 label_refcnt=0x593 ... (af_alg_release_parent+0x0/0xd0) label=0xffff9ff376c25eb0 label_refcnt=0x594 Commits: ------- [1] commit 507cad355fc9 ("crypto: af_alg - Make sure sk_security is initialized on accept()ed sockets") [2] commit 4c63f83c2c2e ("crypto: af_alg - properly label AF_ALG socket") [3] commit 2acce6aa9f65 ("Networking") a.k.a ("crypto: af_alg - Avoid sock_graft call warning) [4] commit 56974a6fcfef ("apparmor: add base infastructure for socket mediation") Reported-by: Brian Moyles Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: John Johansen --- security/apparmor/lsm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index b621ad74f54a..66a8504c8bea 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -804,7 +804,12 @@ static void apparmor_sk_clone_security(const struct sock *sk, struct aa_sk_ctx *ctx = SK_CTX(sk); struct aa_sk_ctx *new = SK_CTX(newsk); + if (new->label) + aa_put_label(new->label); new->label = aa_get_label(ctx->label); + + if (new->peer) + aa_put_label(new->peer); new->peer = aa_get_label(ctx->peer); } From 5268d795d6888b202ad9f2b16a254cd00d0de77b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 5 Jun 2020 18:12:21 -0700 Subject: [PATCH 002/433] apparmor: fix introspection of of task mode for unconfined tasks Fix two issues with introspecting the task mode. 1. If a task is attached to a unconfined profile that is not the ns->unconfined profile then. Mode the mode is always reported as - $ ps -Z LABEL PID TTY TIME CMD unconfined 1287 pts/0 00:00:01 bash test (-) 1892 pts/0 00:00:00 ps instead of the correct value of (unconfined) as shown below $ ps -Z LABEL PID TTY TIME CMD unconfined 2483 pts/0 00:00:01 bash test (unconfined) 3591 pts/0 00:00:00 ps 2. if a task is confined by a stack of profiles that are unconfined the output of label mode is again the incorrect value of (-) like above, instead of (unconfined). This is because the visibile profile count increment is skipped by the special casing of unconfined. Signed-off-by: John Johansen --- security/apparmor/label.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 676eebcbfd68..23f7a193df4f 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1558,13 +1558,13 @@ static const char *label_modename(struct aa_ns *ns, struct aa_label *label, label_for_each(i, label, profile) { if (aa_ns_visible(ns, profile->ns, flags & FLAG_VIEW_SUBNS)) { - if (profile->mode == APPARMOR_UNCONFINED) + count++; + if (profile == profile->ns->unconfined) /* special case unconfined so stacks with * unconfined don't report as mixed. ie. * profile_foo//&:ns1:unconfined (mixed) */ continue; - count++; if (mode == -1) mode = profile->mode; else if (mode != profile->mode) From 92de220a7f336367127351da58cff691da5bb17b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 30 Jun 2020 17:00:11 -0700 Subject: [PATCH 003/433] apparmor: update policy capable checks to use a label Previously the policy capable checks assumed they were using the current task. Make them take the task label so the query can be made against an arbitrary task. Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 4 +-- security/apparmor/include/label.h | 1 + security/apparmor/include/policy.h | 6 +++-- security/apparmor/lsm.c | 22 ++++++++-------- security/apparmor/policy.c | 41 ++++++++++++++++++++++++------ 5 files changed, 51 insertions(+), 23 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index d65324415980..3275e074e5f8 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1357,7 +1357,7 @@ static int rawdata_open(struct inode *inode, struct file *file) struct aa_loaddata *loaddata; struct rawdata_f_data *private; - if (!policy_view_capable(NULL)) + if (!aa_current_policy_view_capable(NULL)) return -EACCES; loaddata = __aa_get_loaddata(inode->i_private); @@ -2266,7 +2266,7 @@ static const struct seq_operations aa_sfs_profiles_op = { static int profiles_open(struct inode *inode, struct file *file) { - if (!policy_view_capable(NULL)) + if (!aa_current_policy_view_capable(NULL)) return -EACCES; return seq_open(file, &aa_sfs_profiles_op); diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 255764ab06e2..f5b5485e20c9 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -148,6 +148,7 @@ do { \ #define __label_make_stale(X) ((X)->flags |= FLAG_STALE) #define labels_ns(X) (vec_ns(&((X)->vec[0]), (X)->size)) #define labels_set(X) (&labels_ns(X)->labels) +#define labels_view(X) labels_ns(X) #define labels_profile(X) ((X)->vec[(X)->size - 1]) diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index b5b4b8190e65..cb5ef21991b7 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -301,9 +301,11 @@ static inline int AUDIT_MODE(struct aa_profile *profile) return profile->audit; } -bool policy_view_capable(struct aa_ns *ns); -bool policy_admin_capable(struct aa_ns *ns); +bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns); +bool aa_policy_admin_capable(struct aa_label *label, struct aa_ns *ns); int aa_may_manage_policy(struct aa_label *label, struct aa_ns *ns, u32 mask); +bool aa_current_policy_view_capable(struct aa_ns *ns); +bool aa_current_policy_admin_capable(struct aa_ns *ns); #endif /* __AA_POLICY_H */ diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 66a8504c8bea..64d6020ffd50 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1392,7 +1392,7 @@ static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !policy_admin_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) return -EPERM; return param_set_bool(val, kp); } @@ -1401,7 +1401,7 @@ static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !policy_view_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return param_get_bool(buffer, kp); } @@ -1410,7 +1410,7 @@ static int param_set_aabool(const char *val, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !policy_admin_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) return -EPERM; return param_set_bool(val, kp); } @@ -1419,7 +1419,7 @@ static int param_get_aabool(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !policy_view_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return param_get_bool(buffer, kp); } @@ -1445,7 +1445,7 @@ static int param_get_aauint(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !policy_view_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return param_get_uint(buffer, kp); } @@ -1516,7 +1516,7 @@ static int param_get_aacompressionlevel(char *buffer, { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !policy_view_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return param_get_int(buffer, kp); } @@ -1525,7 +1525,7 @@ static int param_get_audit(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !policy_view_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return sprintf(buffer, "%s", audit_mode_names[aa_g_audit]); } @@ -1538,7 +1538,7 @@ static int param_set_audit(const char *val, const struct kernel_param *kp) return -EINVAL; if (!val) return -EINVAL; - if (apparmor_initialized && !policy_admin_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) return -EPERM; i = match_string(audit_mode_names, AUDIT_MAX_INDEX, val); @@ -1553,7 +1553,7 @@ static int param_get_mode(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; - if (apparmor_initialized && !policy_view_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return sprintf(buffer, "%s", aa_profile_mode_names[aa_g_profile_mode]); @@ -1567,7 +1567,7 @@ static int param_set_mode(const char *val, const struct kernel_param *kp) return -EINVAL; if (!val) return -EINVAL; - if (apparmor_initialized && !policy_admin_capable(NULL)) + if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) return -EPERM; i = match_string(aa_profile_mode_names, APPARMOR_MODE_NAMES_MAX_INDEX, @@ -1703,7 +1703,7 @@ static int __init alloc_buffers(void) static int apparmor_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - if (!policy_admin_capable(NULL)) + if (!aa_current_policy_admin_capable(NULL)) return -EPERM; if (!apparmor_enabled) return -EINVAL; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 269f2f53c0b1..e680121e013e 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -632,17 +632,18 @@ static int audit_policy(struct aa_label *label, const char *op, } /** - * policy_view_capable - check if viewing policy in at @ns is allowed - * ns: namespace being viewed by current task (may be NULL) + * aa_policy_view_capable - check if viewing policy in at @ns is allowed + * label: label that is trying to view policy in ns + * ns: namespace being viewed by @label (may be NULL if @label's ns) * Returns: true if viewing policy is allowed * * If @ns is NULL then the namespace being viewed is assumed to be the * tasks current namespace. */ -bool policy_view_capable(struct aa_ns *ns) +bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns) { struct user_namespace *user_ns = current_user_ns(); - struct aa_ns *view_ns = aa_get_current_ns(); + struct aa_ns *view_ns = labels_view(label); bool root_in_user_ns = uid_eq(current_euid(), make_kuid(user_ns, 0)) || in_egroup_p(make_kgid(user_ns, 0)); bool response = false; @@ -654,12 +655,11 @@ bool policy_view_capable(struct aa_ns *ns) (unprivileged_userns_apparmor_policy != 0 && user_ns->level == view_ns->level))) response = true; - aa_put_ns(view_ns); return response; } -bool policy_admin_capable(struct aa_ns *ns) +bool aa_policy_admin_capable(struct aa_label *label, struct aa_ns *ns) { struct user_namespace *user_ns = current_user_ns(); bool capable = ns_capable(user_ns, CAP_MAC_ADMIN); @@ -667,7 +667,32 @@ bool policy_admin_capable(struct aa_ns *ns) AA_DEBUG("cap_mac_admin? %d\n", capable); AA_DEBUG("policy locked? %d\n", aa_g_lock_policy); - return policy_view_capable(ns) && capable && !aa_g_lock_policy; + return aa_policy_view_capable(label, ns) && capable && + !aa_g_lock_policy; +} + +bool aa_current_policy_view_capable(struct aa_ns *ns) +{ + struct aa_label *label; + bool res; + + label = __begin_current_label_crit_section(); + res = aa_policy_view_capable(label, ns); + __end_current_label_crit_section(label); + + return res; +} + +bool aa_current_policy_admin_capable(struct aa_ns *ns) +{ + struct aa_label *label; + bool res; + + label = __begin_current_label_crit_section(); + res = aa_policy_admin_capable(label, ns); + __end_current_label_crit_section(label); + + return res; } /** @@ -693,7 +718,7 @@ int aa_may_manage_policy(struct aa_label *label, struct aa_ns *ns, u32 mask) return audit_policy(label, op, NULL, NULL, "policy_locked", -EACCES); - if (!policy_admin_capable(ns)) + if (!aa_policy_admin_capable(label, ns)) return audit_policy(label, op, NULL, NULL, "not policy admin", -EACCES); From 31ec99e13346c22a7c8ca18e044684a870063cef Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 1 Jul 2020 02:18:18 -0700 Subject: [PATCH 004/433] apparmor: switch to apparmor to internal capable check for policy management With LSM stacking calling back into capable to check for MAC_ADMIN for apparmor policy results in asking the other stacked LSMs for MAC_ADMIN resulting in the other LSMs answering based on their policy management. For apparmor policy management we just need to call apparmor's capability fn directly. Signed-off-by: John Johansen --- security/apparmor/policy.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index e680121e013e..9ce93966401a 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -631,6 +631,22 @@ static int audit_policy(struct aa_label *label, const char *op, return error; } +/* don't call out to other LSMs in the stack for apparmor policy admin + * permissions + */ +static int policy_ns_capable(struct aa_label *label, + struct user_namespace *userns, int cap) +{ + int err; + + /* check for MAC_ADMIN cap in cred */ + err = cap_capable(current_cred(), userns, cap, CAP_OPT_NONE); + if (!err) + err = aa_capable(label, cap, CAP_OPT_NONE); + + return err; +} + /** * aa_policy_view_capable - check if viewing policy in at @ns is allowed * label: label that is trying to view policy in ns @@ -662,7 +678,7 @@ bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns) bool aa_policy_admin_capable(struct aa_label *label, struct aa_ns *ns) { struct user_namespace *user_ns = current_user_ns(); - bool capable = ns_capable(user_ns, CAP_MAC_ADMIN); + bool capable = policy_ns_capable(label, user_ns, CAP_MAC_ADMIN); AA_DEBUG("cap_mac_admin? %d\n", capable); AA_DEBUG("policy locked? %d\n", aa_g_lock_policy); From ef70454508c00a415a41156a19cb771a186c55d0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 25 Jan 2021 11:53:50 -0800 Subject: [PATCH 005/433] security: apparmor: file.h: delete duplicated word Delete the doubled word "then" in a comment. Signed-off-by: Randy Dunlap Reviewed-by: Seth Arnold Signed-off-by: John Johansen --- security/apparmor/include/file.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index aff26fc71407..a7672dacd001 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -167,7 +167,7 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, * @perms: permission table indexed by the matched state accept entry of @dfa * @trans: transition table for indexed by named x transitions * - * File permission are determined by matching a path against @dfa and then + * File permission are determined by matching a path against @dfa and * then using the value of the accept entry for the matching state as * an index into @perms. If a named exec transition is required it is * looked up in the transition table. From 4af7c863fc85ad756b7a978fe1096b80a855543c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 7 Aug 2020 09:50:55 -0700 Subject: [PATCH 006/433] security: apparmor: delete repeated words in comments Drop repeated words in comments. {a, then, to} Signed-off-by: Randy Dunlap Reviewed-by: Seth Arnold Signed-off-by: John Johansen --- security/apparmor/path.c | 2 +- security/apparmor/policy_unpack.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/path.c b/security/apparmor/path.c index b02dfdbff7cd..45ec994b558d 100644 --- a/security/apparmor/path.c +++ b/security/apparmor/path.c @@ -83,7 +83,7 @@ static int disconnect(const struct path *path, char *buf, char **name, * * Returns: %0 else error code if path lookup fails * When no error the path name is returned in @name which points to - * to a position in @buf + * a position in @buf */ static int d_namespace_path(const struct path *path, char *buf, char **name, int flags, const char *disconnected) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index d9ef9a99c26e..12e6677868c3 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -39,7 +39,7 @@ /* * The AppArmor interface treats data as a type byte followed by the - * actual data. The interface has the notion of a a named entry + * actual data. The interface has the notion of a named entry * which has a name (AA_NAME typecode followed by name string) followed by * the entries typecode and data. Named types allow for optional * elements and extensions to be added and tested for without breaking From d108370c644b153382632b3e5511ade575c91c86 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 4 Oct 2020 07:24:22 -0700 Subject: [PATCH 007/433] apparmor: fix error check clang static analysis reports this representative problem: label.c:1463:16: warning: Assigned value is garbage or undefined label->hname = name; ^ ~~~~ In aa_update_label_name(), this the problem block of code if (aa_label_acntsxprint(&name, ...) == -1) return res; On failure, aa_label_acntsxprint() has a more complicated return that just -1. So check for a negative return. It was also noted that the aa_label_acntsxprint() main comment refers to a nonexistent parameter, so clean up the comment. Fixes: f1bd904175e8 ("apparmor: add the base fns() for domain labels") Signed-off-by: Tom Rix Reviewed-by: Nick Desaulniers Signed-off-by: John Johansen --- security/apparmor/label.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 23f7a193df4f..f5eb9ac07e9b 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1453,7 +1453,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp) if (label->hname || labels_ns(label) != ns) return res; - if (aa_label_acntsxprint(&name, ns, label, FLAGS_NONE, gfp) == -1) + if (aa_label_acntsxprint(&name, ns, label, FLAGS_NONE, gfp) < 0) return res; ls = labels_set(label); @@ -1703,7 +1703,7 @@ int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label, /** * aa_label_acntsxprint - allocate a __counted string buffer and print label - * @strp: buffer to write to. (MAY BE NULL if @size == 0) + * @strp: buffer to write to. * @ns: namespace profile is being viewed from * @label: label to view (NOT NULL) * @flags: flags controlling what label info is printed From 58ae4004b9c4bb040958cf73986b687a5ea4d85d Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Sun, 5 Sep 2021 18:57:32 -0700 Subject: [PATCH 008/433] Input: cpcap-pwrbutton - handle errors from platform_get_irq() The function cpcap_power_button_probe() does not perform sufficient error checking after executing platform_get_irq(), thus fix it. Signed-off-by: Tang Bin Link: https://lore.kernel.org/r/20210802121740.8700-1-tangbin@cmss.chinamobile.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/cpcap-pwrbutton.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/misc/cpcap-pwrbutton.c b/drivers/input/misc/cpcap-pwrbutton.c index 0abef63217e2..372cb44d0635 100644 --- a/drivers/input/misc/cpcap-pwrbutton.c +++ b/drivers/input/misc/cpcap-pwrbutton.c @@ -54,9 +54,13 @@ static irqreturn_t powerbutton_irq(int irq, void *_button) static int cpcap_power_button_probe(struct platform_device *pdev) { struct cpcap_power_button *button; - int irq = platform_get_irq(pdev, 0); + int irq; int err; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + button = devm_kmalloc(&pdev->dev, sizeof(*button), GFP_KERNEL); if (!button) return -ENOMEM; From d5f9c43d41effc3c884e0139ca52015e451039b1 Mon Sep 17 00:00:00 2001 From: "simba.hsu" Date: Tue, 1 Jun 2021 21:38:38 -0700 Subject: [PATCH 009/433] Input: raydium_i2c_ts - read device version in bootloader mode Add support reading device ID when controller is in bootloader mode, which may happen if firmware update has been interrupted. Signed-off-by: simba.hsu Link: https://lore.kernel.org/r/20210818063644.8654-1-simba.hsu@rad-ic.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/raydium_i2c_ts.c | 50 +++++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 4d2d22a86977..92c48d8e35a4 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -37,6 +37,7 @@ #define RM_CMD_BOOT_READ 0x44 /* send wait bl data ready*/ #define RM_BOOT_RDY 0xFF /* bl data ready */ +#define RM_BOOT_CMD_READHWID 0x0E /* read hwid */ /* I2C main commands */ #define RM_CMD_QUERY_BANK 0x2B @@ -290,6 +291,44 @@ static int raydium_i2c_sw_reset(struct i2c_client *client) return 0; } +static int raydium_i2c_query_ts_bootloader_info(struct raydium_data *ts) +{ + struct i2c_client *client = ts->client; + static const u8 get_hwid[] = { RM_BOOT_CMD_READHWID, + 0x10, 0xc0, 0x01, 0x00, 0x04, 0x00 }; + u8 rbuf[5] = { 0 }; + u32 hw_ver; + int error; + + error = raydium_i2c_send(client, RM_CMD_BOOT_WRT, + get_hwid, sizeof(get_hwid)); + if (error) { + dev_err(&client->dev, "WRT HWID command failed: %d\n", error); + return error; + } + + error = raydium_i2c_send(client, RM_CMD_BOOT_ACK, rbuf, 1); + if (error) { + dev_err(&client->dev, "Ack HWID command failed: %d\n", error); + return error; + } + + error = raydium_i2c_read(client, RM_CMD_BOOT_CHK, rbuf, sizeof(rbuf)); + if (error) { + dev_err(&client->dev, "Read HWID command failed: %d (%4ph)\n", + error, rbuf + 1); + hw_ver = 0xffffffffUL; + } else { + hw_ver = get_unaligned_be32(rbuf + 1); + } + + ts->info.hw_ver = cpu_to_le32(hw_ver); + ts->info.main_ver = 0xff; + ts->info.sub_ver = 0xff; + + return error; +} + static int raydium_i2c_query_ts_info(struct raydium_data *ts) { struct i2c_client *client = ts->client; @@ -388,13 +427,10 @@ static int raydium_i2c_initialize(struct raydium_data *ts) if (error) ts->boot_mode = RAYDIUM_TS_BLDR; - if (ts->boot_mode == RAYDIUM_TS_BLDR) { - ts->info.hw_ver = cpu_to_le32(0xffffffffUL); - ts->info.main_ver = 0xff; - ts->info.sub_ver = 0xff; - } else { + if (ts->boot_mode == RAYDIUM_TS_BLDR) + raydium_i2c_query_ts_bootloader_info(ts); + else raydium_i2c_query_ts_info(ts); - } return error; } @@ -1218,7 +1254,7 @@ static SIMPLE_DEV_PM_OPS(raydium_i2c_pm_ops, raydium_i2c_suspend, raydium_i2c_resume); static const struct i2c_device_id raydium_i2c_id[] = { - { "raydium_i2c" , 0 }, + { "raydium_i2c", 0 }, { "rm32380", 0 }, { /* sentinel */ } }; From daf87bffd02e35387a62c77eb32337c934cf631a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 17 Nov 2017 12:31:11 -0800 Subject: [PATCH 010/433] Input: palmas-pwrbutton - handle return value of platform_get_irq() platform_get_irq() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Dmitry Torokhov --- drivers/input/misc/palmas-pwrbutton.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c index 1e1baed63929..f9b05cf09ff5 100644 --- a/drivers/input/misc/palmas-pwrbutton.c +++ b/drivers/input/misc/palmas-pwrbutton.c @@ -210,6 +210,11 @@ static int palmas_pwron_probe(struct platform_device *pdev) INIT_DELAYED_WORK(&pwron->input_work, palmas_power_button_work); pwron->irq = platform_get_irq(pdev, 0); + if (pwron->irq < 0) { + error = pwron->irq; + goto err_free_input; + } + error = request_threaded_irq(pwron->irq, NULL, pwron_irq, IRQF_TRIGGER_HIGH | IRQF_TRIGGER_LOW | From de609b56b8322be0cd242ae364dc49bad0dfa89d Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 9 Sep 2021 17:59:43 -0700 Subject: [PATCH 011/433] Input: ads7846 - add short-hand for spi->dev in probe() function This will make the code a bit more terse. No functional change intended. Signed-off-by: Daniel Mack Reviewed-by: Marco Felsch Link: https://lore.kernel.org/r/20210907200726.2034962-2-daniel@zonque.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 45 +++++++++++++++-------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index f113a27aeb1e..ce2e14816af5 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1216,20 +1216,21 @@ static int ads7846_probe(struct spi_device *spi) { const struct ads7846_platform_data *pdata; struct ads7846 *ts; + struct device *dev = &spi->dev; struct ads7846_packet *packet; struct input_dev *input_dev; unsigned long irq_flags; int err; if (!spi->irq) { - dev_dbg(&spi->dev, "no IRQ?\n"); + dev_dbg(dev, "no IRQ?\n"); return -EINVAL; } /* don't exceed max specified sample rate */ if (spi->max_speed_hz > (125000 * SAMPLE_BITS)) { - dev_err(&spi->dev, "f(sample) %d KHz?\n", - (spi->max_speed_hz/SAMPLE_BITS)/1000); + dev_err(dev, "f(sample) %d KHz?\n", + (spi->max_speed_hz/SAMPLE_BITS)/1000); return -EINVAL; } @@ -1262,9 +1263,9 @@ static int ads7846_probe(struct spi_device *spi) mutex_init(&ts->lock); init_waitqueue_head(&ts->wait); - pdata = dev_get_platdata(&spi->dev); + pdata = dev_get_platdata(dev); if (!pdata) { - pdata = ads7846_probe_dt(&spi->dev); + pdata = ads7846_probe_dt(dev); if (IS_ERR(pdata)) { err = PTR_ERR(pdata); goto err_free_mem; @@ -1306,12 +1307,12 @@ static int ads7846_probe(struct spi_device *spi) ts->wait_for_sync = pdata->wait_for_sync ? : null_wait_for_sync; - snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&spi->dev)); + snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(dev)); snprintf(ts->name, sizeof(ts->name), "ADS%d Touchscreen", ts->model); input_dev->name = ts->name; input_dev->phys = ts->phys; - input_dev->dev.parent = &spi->dev; + input_dev->dev.parent = dev; input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); @@ -1345,16 +1346,16 @@ static int ads7846_probe(struct spi_device *spi) ads7846_setup_spi_msg(ts, pdata); - ts->reg = regulator_get(&spi->dev, "vcc"); + ts->reg = regulator_get(dev, "vcc"); if (IS_ERR(ts->reg)) { err = PTR_ERR(ts->reg); - dev_err(&spi->dev, "unable to get regulator: %d\n", err); + dev_err(dev, "unable to get regulator: %d\n", err); goto err_free_gpio; } err = regulator_enable(ts->reg); if (err) { - dev_err(&spi->dev, "unable to enable regulator: %d\n", err); + dev_err(dev, "unable to enable regulator: %d\n", err); goto err_put_regulator; } @@ -1362,18 +1363,18 @@ static int ads7846_probe(struct spi_device *spi) irq_flags |= IRQF_ONESHOT; err = request_threaded_irq(spi->irq, ads7846_hard_irq, ads7846_irq, - irq_flags, spi->dev.driver->name, ts); + irq_flags, dev->driver->name, ts); if (err && !pdata->irq_flags) { - dev_info(&spi->dev, + dev_info(dev, "trying pin change workaround on irq %d\n", spi->irq); irq_flags |= IRQF_TRIGGER_RISING; err = request_threaded_irq(spi->irq, ads7846_hard_irq, ads7846_irq, - irq_flags, spi->dev.driver->name, ts); + irq_flags, dev->driver->name, ts); } if (err) { - dev_dbg(&spi->dev, "irq %d busy?\n", spi->irq); + dev_dbg(dev, "irq %d busy?\n", spi->irq); goto err_disable_regulator; } @@ -1381,18 +1382,18 @@ static int ads7846_probe(struct spi_device *spi) if (err) goto err_free_irq; - dev_info(&spi->dev, "touchscreen, irq %d\n", spi->irq); + dev_info(dev, "touchscreen, irq %d\n", spi->irq); /* * Take a first sample, leaving nPENIRQ active and vREF off; avoid * the touchscreen, in case it's not connected. */ if (ts->model == 7845) - ads7845_read12_ser(&spi->dev, PWRDOWN); + ads7845_read12_ser(dev, PWRDOWN); else - (void) ads7846_read12_ser(&spi->dev, READ_12BIT_SER(vaux)); + (void) ads7846_read12_ser(dev, READ_12BIT_SER(vaux)); - err = sysfs_create_group(&spi->dev.kobj, &ads784x_attr_group); + err = sysfs_create_group(&dev->kobj, &ads784x_attr_group); if (err) goto err_remove_hwmon; @@ -1400,19 +1401,19 @@ static int ads7846_probe(struct spi_device *spi) if (err) goto err_remove_attr_group; - device_init_wakeup(&spi->dev, pdata->wakeup); + device_init_wakeup(dev, pdata->wakeup); /* * If device does not carry platform data we must have allocated it * when parsing DT data. */ - if (!dev_get_platdata(&spi->dev)) - devm_kfree(&spi->dev, (void *)pdata); + if (!dev_get_platdata(dev)) + devm_kfree(dev, (void *)pdata); return 0; err_remove_attr_group: - sysfs_remove_group(&spi->dev.kobj, &ads784x_attr_group); + sysfs_remove_group(&dev->kobj, &ads784x_attr_group); err_remove_hwmon: ads784x_hwmon_unregister(spi, ts); err_free_irq: From 937f5d5ec642501d2dd3c91918685de30a932b34 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 9 Sep 2021 18:01:28 -0700 Subject: [PATCH 012/433] Input: ads7846 - remove custom filter handling functions from pdata The functions in the platform data struct to initialize, cleanup and apply custom filters are not in use by any mainline board. Remove support for them to pave the road for more cleanups to come. The enum was moved as it has no users outside of the driver code itself. Signed-off-by: Daniel Mack Reviewed-by: Marco Felsch Link: https://lore.kernel.org/r/20210907200726.2034962-3-daniel@zonque.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 25 ++++++++----------------- include/linux/spi/ads7846.h | 15 --------------- 2 files changed, 8 insertions(+), 32 deletions(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index ce2e14816af5..b9c8496155b7 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -142,13 +142,18 @@ struct ads7846 { int (*filter)(void *data, int data_idx, int *val); void *filter_data; - void (*filter_cleanup)(void *data); int (*get_pendown_state)(void); int gpio_pendown; void (*wait_for_sync)(void); }; +enum ads7846_filter { + ADS7846_FILTER_OK, + ADS7846_FILTER_REPEAT, + ADS7846_FILTER_IGNORE, +}; + /* leave chip selected when we're done, for quicker re-select? */ #if 0 #define CS_CHANGE(xfer) ((xfer).cs_change = 1) @@ -1277,15 +1282,7 @@ static int ads7846_probe(struct spi_device *spi) ts->x_plate_ohms = pdata->x_plate_ohms ? : 400; ts->vref_mv = pdata->vref_mv; - if (pdata->filter != NULL) { - if (pdata->filter_init != NULL) { - err = pdata->filter_init(pdata, &ts->filter_data); - if (err < 0) - goto err_free_mem; - } - ts->filter = pdata->filter; - ts->filter_cleanup = pdata->filter_cleanup; - } else if (pdata->debounce_max) { + if (pdata->debounce_max) { ts->debounce_max = pdata->debounce_max; if (ts->debounce_max < 2) ts->debounce_max = 2; @@ -1299,7 +1296,7 @@ static int ads7846_probe(struct spi_device *spi) err = ads7846_setup_pendown(spi, ts, pdata); if (err) - goto err_cleanup_filter; + goto err_free_mem; if (pdata->penirq_recheck_delay_usecs) ts->penirq_recheck_delay_usecs = @@ -1425,9 +1422,6 @@ static int ads7846_probe(struct spi_device *spi) err_free_gpio: if (!ts->get_pendown_state) gpio_free(ts->gpio_pendown); - err_cleanup_filter: - if (ts->filter_cleanup) - ts->filter_cleanup(ts->filter_data); err_free_mem: input_free_device(input_dev); kfree(packet); @@ -1458,9 +1452,6 @@ static int ads7846_remove(struct spi_device *spi) gpio_free(ts->gpio_pendown); } - if (ts->filter_cleanup) - ts->filter_cleanup(ts->filter_data); - kfree(ts->packet); kfree(ts); diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h index 1a5eaef3b7f2..d424c1aadf38 100644 --- a/include/linux/spi/ads7846.h +++ b/include/linux/spi/ads7846.h @@ -1,17 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* linux/spi/ads7846.h */ -/* Touchscreen characteristics vary between boards and models. The - * platform_data for the device's "struct device" holds this information. - * - * It's OK if the min/max values are zero. - */ -enum ads7846_filter { - ADS7846_FILTER_OK, - ADS7846_FILTER_REPEAT, - ADS7846_FILTER_IGNORE, -}; - struct ads7846_platform_data { u16 model; /* 7843, 7845, 7846, 7873. */ u16 vref_delay_usecs; /* 0 for external vref; etc */ @@ -51,10 +40,6 @@ struct ads7846_platform_data { int gpio_pendown_debounce; /* platform specific debounce time for * the gpio_pendown */ int (*get_pendown_state)(void); - int (*filter_init) (const struct ads7846_platform_data *pdata, - void **filter_data); - int (*filter) (void *filter_data, int data_idx, int *val); - void (*filter_cleanup)(void *filter_data); void (*wait_for_sync)(void); bool wakeup; unsigned long irq_flags; From 845ef3a7ce5773638a8c5e9a6e7be6538c107ae1 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 9 Sep 2021 18:03:01 -0700 Subject: [PATCH 013/433] Input: ads7846 - switch to devm initialization This simplies the code a lot and fixes some potential resource leaks in the error return paths. Signed-off-by: Daniel Mack Link: https://lore.kernel.org/r/20210907200726.2034962-4-daniel@zonque.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 136 ++++++++++------------------ 1 file changed, 47 insertions(+), 89 deletions(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index b9c8496155b7..eaa8714ad19d 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -101,10 +101,6 @@ struct ads7846 { struct spi_device *spi; struct regulator *reg; -#if IS_ENABLED(CONFIG_HWMON) - struct device *hwmon; -#endif - u16 model; u16 vref_mv; u16 vref_delay_usecs; @@ -554,6 +550,8 @@ __ATTRIBUTE_GROUPS(ads7846_attr); static int ads784x_hwmon_register(struct spi_device *spi, struct ads7846 *ts) { + struct device *hwmon; + /* hwmon sensors need a reference voltage */ switch (ts->model) { case 7846: @@ -574,17 +572,11 @@ static int ads784x_hwmon_register(struct spi_device *spi, struct ads7846 *ts) break; } - ts->hwmon = hwmon_device_register_with_groups(&spi->dev, spi->modalias, - ts, ads7846_attr_groups); + hwmon = devm_hwmon_device_register_with_groups(&spi->dev, + spi->modalias, ts, + ads7846_attr_groups); - return PTR_ERR_OR_ZERO(ts->hwmon); -} - -static void ads784x_hwmon_unregister(struct spi_device *spi, - struct ads7846 *ts) -{ - if (ts->hwmon) - hwmon_device_unregister(ts->hwmon); + return PTR_ERR_OR_ZERO(hwmon); } #else @@ -593,11 +585,6 @@ static inline int ads784x_hwmon_register(struct spi_device *spi, { return 0; } - -static inline void ads784x_hwmon_unregister(struct spi_device *spi, - struct ads7846 *ts) -{ -} #endif static ssize_t ads7846_pen_down_show(struct device *dev, @@ -1019,8 +1006,8 @@ static int ads7846_setup_pendown(struct spi_device *spi, ts->get_pendown_state = pdata->get_pendown_state; } else if (gpio_is_valid(pdata->gpio_pendown)) { - err = gpio_request_one(pdata->gpio_pendown, GPIOF_IN, - "ads7846_pendown"); + err = devm_gpio_request_one(&spi->dev, pdata->gpio_pendown, + GPIOF_IN, "ads7846_pendown"); if (err) { dev_err(&spi->dev, "failed to request/setup pendown GPIO%d: %d\n", @@ -1217,6 +1204,11 @@ static const struct ads7846_platform_data *ads7846_probe_dt(struct device *dev) } #endif +static void ads7846_regulator_disable(void *regulator) +{ + regulator_disable(regulator); +} + static int ads7846_probe(struct spi_device *spi) { const struct ads7846_platform_data *pdata; @@ -1251,13 +1243,17 @@ static int ads7846_probe(struct spi_device *spi) if (err < 0) return err; - ts = kzalloc(sizeof(struct ads7846), GFP_KERNEL); - packet = kzalloc(sizeof(struct ads7846_packet), GFP_KERNEL); - input_dev = input_allocate_device(); - if (!ts || !packet || !input_dev) { - err = -ENOMEM; - goto err_free_mem; - } + ts = devm_kzalloc(dev, sizeof(struct ads7846), GFP_KERNEL); + if (!ts) + return -ENOMEM; + + packet = devm_kzalloc(dev, sizeof(struct ads7846_packet), GFP_KERNEL); + if (!packet) + return -ENOMEM; + + input_dev = devm_input_allocate_device(dev); + if (!input_dev) + return -ENOMEM; spi_set_drvdata(spi, ts); @@ -1271,10 +1267,8 @@ static int ads7846_probe(struct spi_device *spi) pdata = dev_get_platdata(dev); if (!pdata) { pdata = ads7846_probe_dt(dev); - if (IS_ERR(pdata)) { - err = PTR_ERR(pdata); - goto err_free_mem; - } + if (IS_ERR(pdata)) + return PTR_ERR(pdata); } ts->model = pdata->model ? : 7846; @@ -1296,7 +1290,7 @@ static int ads7846_probe(struct spi_device *spi) err = ads7846_setup_pendown(spi, ts, pdata); if (err) - goto err_free_mem; + return err; if (pdata->penirq_recheck_delay_usecs) ts->penirq_recheck_delay_usecs = @@ -1309,7 +1303,6 @@ static int ads7846_probe(struct spi_device *spi) input_dev->name = ts->name; input_dev->phys = ts->phys; - input_dev->dev.parent = dev; input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); @@ -1343,41 +1336,47 @@ static int ads7846_probe(struct spi_device *spi) ads7846_setup_spi_msg(ts, pdata); - ts->reg = regulator_get(dev, "vcc"); + ts->reg = devm_regulator_get(dev, "vcc"); if (IS_ERR(ts->reg)) { err = PTR_ERR(ts->reg); dev_err(dev, "unable to get regulator: %d\n", err); - goto err_free_gpio; + return err; } err = regulator_enable(ts->reg); if (err) { dev_err(dev, "unable to enable regulator: %d\n", err); - goto err_put_regulator; + return err; } + err = devm_add_action_or_reset(dev, ads7846_regulator_disable, ts->reg); + if (err) + return err; + irq_flags = pdata->irq_flags ? : IRQF_TRIGGER_FALLING; irq_flags |= IRQF_ONESHOT; - err = request_threaded_irq(spi->irq, ads7846_hard_irq, ads7846_irq, - irq_flags, dev->driver->name, ts); + err = devm_request_threaded_irq(dev, spi->irq, + ads7846_hard_irq, ads7846_irq, + irq_flags, dev->driver->name, ts); if (err && !pdata->irq_flags) { dev_info(dev, "trying pin change workaround on irq %d\n", spi->irq); irq_flags |= IRQF_TRIGGER_RISING; - err = request_threaded_irq(spi->irq, - ads7846_hard_irq, ads7846_irq, - irq_flags, dev->driver->name, ts); + err = devm_request_threaded_irq(dev, spi->irq, + ads7846_hard_irq, ads7846_irq, + irq_flags, dev->driver->name, + ts); } if (err) { dev_dbg(dev, "irq %d busy?\n", spi->irq); - goto err_disable_regulator; + return err; } err = ads784x_hwmon_register(spi, ts); if (err) - goto err_free_irq; + return err; dev_info(dev, "touchscreen, irq %d\n", spi->irq); @@ -1390,13 +1389,13 @@ static int ads7846_probe(struct spi_device *spi) else (void) ads7846_read12_ser(dev, READ_12BIT_SER(vaux)); - err = sysfs_create_group(&dev->kobj, &ads784x_attr_group); + err = devm_device_add_group(dev, &ads784x_attr_group); if (err) - goto err_remove_hwmon; + return err; err = input_register_device(input_dev); if (err) - goto err_remove_attr_group; + return err; device_init_wakeup(dev, pdata->wakeup); @@ -1408,54 +1407,13 @@ static int ads7846_probe(struct spi_device *spi) devm_kfree(dev, (void *)pdata); return 0; - - err_remove_attr_group: - sysfs_remove_group(&dev->kobj, &ads784x_attr_group); - err_remove_hwmon: - ads784x_hwmon_unregister(spi, ts); - err_free_irq: - free_irq(spi->irq, ts); - err_disable_regulator: - regulator_disable(ts->reg); - err_put_regulator: - regulator_put(ts->reg); - err_free_gpio: - if (!ts->get_pendown_state) - gpio_free(ts->gpio_pendown); - err_free_mem: - input_free_device(input_dev); - kfree(packet); - kfree(ts); - return err; } static int ads7846_remove(struct spi_device *spi) { struct ads7846 *ts = spi_get_drvdata(spi); - sysfs_remove_group(&spi->dev.kobj, &ads784x_attr_group); - - ads7846_disable(ts); - free_irq(ts->spi->irq, ts); - - input_unregister_device(ts->input); - - ads784x_hwmon_unregister(spi, ts); - - regulator_put(ts->reg); - - if (!ts->get_pendown_state) { - /* - * If we are not using specialized pendown method we must - * have been relying on gpio we set up ourselves. - */ - gpio_free(ts->gpio_pendown); - } - - kfree(ts->packet); - kfree(ts); - - dev_dbg(&spi->dev, "unregistered touchscreen\n"); + ads7846_stop(ts); return 0; } From fcc28e0bfcfd2d98f2d096a185f0263759661c94 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Thu, 9 Sep 2021 22:12:29 -0700 Subject: [PATCH 014/433] Input: cypress-sf - add Cypress StreetFighter touchkey driver This adds support for Cypress StreetFighter touchkey controllers such as sf3155. This driver supports managing regulators and generating input events. Due to lack of documentation, this driver is entirely based on information gathered from a driver written for an old Android kernel fork[1][2]. [1] https://github.com/LineageOS/android_kernel_xiaomi_msm8996/blob/lineage-18.1/drivers/input/touchscreen/cyttsp_button.c [2] https://github.com/LineageOS/android_kernel_xiaomi_msm8996/blob/lineage-18.1/arch/arm/boot/dts/qcom/a4-msm8996-mtp.dtsi#L291-L314 Signed-off-by: Yassine Oudjana Link: https://lore.kernel.org/r/20210907174341.422013-2-y.oudjana@protonmail.com Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 6 + drivers/input/keyboard/Kconfig | 10 ++ drivers/input/keyboard/Makefile | 1 + drivers/input/keyboard/cypress-sf.c | 224 ++++++++++++++++++++++++++++ 4 files changed, 241 insertions(+) create mode 100644 drivers/input/keyboard/cypress-sf.c diff --git a/MAINTAINERS b/MAINTAINERS index b03c68cebe4c..f72af66b2889 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5006,6 +5006,12 @@ L: linux-input@vger.kernel.org S: Maintained F: drivers/input/touchscreen/cy8ctma140.c +CYPRESS STREETFIGHTER TOUCHKEYS DRIVER +M: Yassine Oudjana +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/input/keyboard/cypress-sf.c + CYTTSP TOUCHSCREEN DRIVER M: Linus Walleij L: linux-input@vger.kernel.org diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 1b0afc8bf841..d3bf5c3f9add 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -791,4 +791,14 @@ config KEYBOARD_MTK_PMIC To compile this driver as a module, choose M here: the module will be called pmic-keys. +config KEYBOARD_CYPRESS_SF + tristate "Cypress StreetFighter touchkey support" + depends on I2C + help + Say Y here if you want to enable support for Cypress StreetFighter + touchkeys. + + To compile this driver as a module, choose M here: the + module will be called cypress-sf. + endif diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 1d689fdd5c00..e3c8648f834e 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_KEYBOARD_BCM) += bcm-keypad.o obj-$(CONFIG_KEYBOARD_CAP11XX) += cap11xx.o obj-$(CONFIG_KEYBOARD_CLPS711X) += clps711x-keypad.o obj-$(CONFIG_KEYBOARD_CROS_EC) += cros_ec_keyb.o +obj-$(CONFIG_KEYBOARD_CYPRESS_SF) += cypress-sf.o obj-$(CONFIG_KEYBOARD_DAVINCI) += davinci_keyscan.o obj-$(CONFIG_KEYBOARD_DLINK_DIR685) += dlink-dir685-touchkeys.o obj-$(CONFIG_KEYBOARD_EP93XX) += ep93xx_keypad.o diff --git a/drivers/input/keyboard/cypress-sf.c b/drivers/input/keyboard/cypress-sf.c new file mode 100644 index 000000000000..c28996028e80 --- /dev/null +++ b/drivers/input/keyboard/cypress-sf.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Cypress StreetFighter Touchkey Driver + * + * Copyright (c) 2021 Yassine Oudjana + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CYPRESS_SF_DEV_NAME "cypress-sf" + +#define CYPRESS_SF_REG_BUTTON_STATUS 0x4a + +struct cypress_sf_data { + struct i2c_client *client; + struct input_dev *input_dev; + struct regulator_bulk_data regulators[2]; + u32 *keycodes; + unsigned long keystates; + int num_keys; +}; + +static irqreturn_t cypress_sf_irq_handler(int irq, void *devid) +{ + struct cypress_sf_data *touchkey = devid; + unsigned long keystates, changed; + bool new_state; + int val, key; + + val = i2c_smbus_read_byte_data(touchkey->client, + CYPRESS_SF_REG_BUTTON_STATUS); + if (val < 0) { + dev_err(&touchkey->client->dev, + "Failed to read button status: %d", val); + return IRQ_NONE; + } + keystates = val; + + bitmap_xor(&changed, &keystates, &touchkey->keystates, + touchkey->num_keys); + + for_each_set_bit(key, &changed, touchkey->num_keys) { + new_state = keystates & BIT(key); + dev_dbg(&touchkey->client->dev, + "Key %d changed to %d", key, new_state); + input_report_key(touchkey->input_dev, + touchkey->keycodes[key], new_state); + } + + input_sync(touchkey->input_dev); + touchkey->keystates = keystates; + + return IRQ_HANDLED; +} + +static int cypress_sf_probe(struct i2c_client *client) +{ + struct cypress_sf_data *touchkey; + int key, error; + + touchkey = devm_kzalloc(&client->dev, sizeof(*touchkey), GFP_KERNEL); + if (!touchkey) + return -ENOMEM; + + touchkey->client = client; + i2c_set_clientdata(client, touchkey); + + touchkey->regulators[0].supply = "vdd"; + touchkey->regulators[1].supply = "avdd"; + + error = devm_regulator_bulk_get(&client->dev, + ARRAY_SIZE(touchkey->regulators), + touchkey->regulators); + if (error) { + dev_err(&client->dev, "Failed to get regulators: %d\n", error); + return error; + } + + touchkey->num_keys = device_property_read_u32_array(&client->dev, + "linux,keycodes", + NULL, 0); + if (touchkey->num_keys < 0) { + /* Default key count */ + touchkey->num_keys = 2; + } + + touchkey->keycodes = devm_kcalloc(&client->dev, + touchkey->num_keys, + sizeof(*touchkey->keycodes), + GFP_KERNEL); + if (!touchkey->keycodes) + return -ENOMEM; + + error = device_property_read_u32_array(&client->dev, "linux,keycodes", + touchkey->keycodes, + touchkey->num_keys); + + if (error) { + dev_warn(&client->dev, + "Failed to read keycodes: %d, using defaults\n", + error); + + /* Default keycodes */ + touchkey->keycodes[0] = KEY_BACK; + touchkey->keycodes[1] = KEY_MENU; + } + + error = regulator_bulk_enable(ARRAY_SIZE(touchkey->regulators), + touchkey->regulators); + if (error) { + dev_err(&client->dev, + "Failed to enable regulators: %d\n", error); + return error; + } + + touchkey->input_dev = devm_input_allocate_device(&client->dev); + if (!touchkey->input_dev) { + dev_err(&client->dev, "Failed to allocate input device\n"); + return -ENOMEM; + } + + touchkey->input_dev->name = CYPRESS_SF_DEV_NAME; + touchkey->input_dev->id.bustype = BUS_I2C; + + for (key = 0; key < touchkey->num_keys; ++key) + input_set_capability(touchkey->input_dev, + EV_KEY, touchkey->keycodes[key]); + + error = input_register_device(touchkey->input_dev); + if (error) { + dev_err(&client->dev, + "Failed to register input device: %d\n", error); + return error; + } + + error = devm_request_threaded_irq(&client->dev, client->irq, + NULL, cypress_sf_irq_handler, + IRQF_ONESHOT, + CYPRESS_SF_DEV_NAME, touchkey); + if (error) { + dev_err(&client->dev, + "Failed to register threaded irq: %d", error); + return error; + } + + return 0; +}; + +static int __maybe_unused cypress_sf_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct cypress_sf_data *touchkey = i2c_get_clientdata(client); + int error; + + disable_irq(client->irq); + + error = regulator_bulk_disable(ARRAY_SIZE(touchkey->regulators), + touchkey->regulators); + if (error) { + dev_err(dev, "Failed to disable regulators: %d", error); + enable_irq(client->irq); + return error; + } + + return 0; +} + +static int __maybe_unused cypress_sf_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct cypress_sf_data *touchkey = i2c_get_clientdata(client); + int error; + + error = regulator_bulk_enable(ARRAY_SIZE(touchkey->regulators), + touchkey->regulators); + if (error) { + dev_err(dev, "Failed to enable regulators: %d", error); + return error; + } + + enable_irq(client->irq); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(cypress_sf_pm_ops, + cypress_sf_suspend, cypress_sf_resume); + +static struct i2c_device_id cypress_sf_id_table[] = { + { CYPRESS_SF_DEV_NAME, 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, cypress_sf_id_table); + +#ifdef CONFIG_OF +static const struct of_device_id cypress_sf_of_match[] = { + { .compatible = "cypress,sf3155", }, + { }, +}; +MODULE_DEVICE_TABLE(of, cypress_sf_of_match); +#endif + +static struct i2c_driver cypress_sf_driver = { + .driver = { + .name = CYPRESS_SF_DEV_NAME, + .pm = &cypress_sf_pm_ops, + .of_match_table = of_match_ptr(cypress_sf_of_match), + }, + .id_table = cypress_sf_id_table, + .probe_new = cypress_sf_probe, +}; +module_i2c_driver(cypress_sf_driver); + +MODULE_AUTHOR("Yassine Oudjana "); +MODULE_DESCRIPTION("Cypress StreetFighter Touchkey Driver"); +MODULE_LICENSE("GPL v2"); From e2afe95a87a268bcdca2fb489d9c8a485e3aca85 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Thu, 9 Sep 2021 22:23:39 -0700 Subject: [PATCH 015/433] dt-bindings: input: Add binding for cypress-sf Add a device tree binding for Cypress StreetFighter. Signed-off-by: Yassine Oudjana Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20210907174341.422013-3-y.oudjana@protonmail.com Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/cypress-sf.yaml | 61 +++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 62 insertions(+) create mode 100644 Documentation/devicetree/bindings/input/cypress-sf.yaml diff --git a/Documentation/devicetree/bindings/input/cypress-sf.yaml b/Documentation/devicetree/bindings/input/cypress-sf.yaml new file mode 100644 index 000000000000..c0b051466272 --- /dev/null +++ b/Documentation/devicetree/bindings/input/cypress-sf.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/cypress-sf.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cypress StreetFighter touchkey controller + +maintainers: + - Yassine Oudjana + +allOf: + - $ref: input.yaml# + +properties: + compatible: + const: cypress,sf3155 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + avdd-supply: + description: Regulator for AVDD analog voltage + + vdd-supply: + description: Regulator for VDD digital voltage + + linux,keycodes: + minItems: 1 + maxItems: 8 + +required: + - compatible + - reg + - interrupts + - avdd-supply + - vdd-supply + +additionalProperties: false + +examples: + - | + #include + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + touchkey@28 { + compatible = "cypress,sf3155"; + reg = <0x28>; + interrupt-parent = <&msmgpio>; + interrupts = <77 IRQ_TYPE_EDGE_FALLING>; + avdd-supply = <&vreg_l6a_1p8>; + vdd-supply = <&vdd_3v2_tp>; + linux,keycodes = ; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index f72af66b2889..44e6c428703e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5010,6 +5010,7 @@ CYPRESS STREETFIGHTER TOUCHKEYS DRIVER M: Yassine Oudjana L: linux-input@vger.kernel.org S: Maintained +F: Documentation/devicetree/bindings/input/cypress-sf.yaml F: drivers/input/keyboard/cypress-sf.c CYTTSP TOUCHSCREEN DRIVER From e80704272f5c3f80d315144b5eeaf867082c94ad Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 9 Aug 2021 13:25:09 +0200 Subject: [PATCH 016/433] kcsan: test: Defer kcsan_test_init() after kunit initialization When the test is built into the kernel (not a module), kcsan_test_init() and kunit_init() both use late_initcall(), which means kcsan_test_init() might see a NULL debugfs_rootdir as parent dentry, resulting in kcsan_test_init() and kcsan_debugfs_init() both trying to create a debugfs node named "kcsan" in debugfs root. One of them will show an error and be unsuccessful. Defer kcsan_test_init() until we're sure kunit was initialized. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/kcsan_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index dc55fd5a36fc..df041bdb6088 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -1224,7 +1224,7 @@ static void kcsan_test_exit(void) tracepoint_synchronize_unregister(); } -late_initcall(kcsan_test_init); +late_initcall_sync(kcsan_test_init); module_exit(kcsan_test_exit); MODULE_LICENSE("GPL v2"); From 80804284103ab95e1fe92f167af690ef4c9a6560 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 9 Aug 2021 13:25:10 +0200 Subject: [PATCH 017/433] kcsan: test: Use kunit_skip() to skip tests Use the new kunit_skip() to skip tests if requirements were not met. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/kcsan_test.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index df041bdb6088..d93f226327af 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -29,6 +29,11 @@ #include #include +#define KCSAN_TEST_REQUIRES(test, cond) do { \ + if (!(cond)) \ + kunit_skip((test), "Test requires: " #cond); \ +} while (0) + #ifdef CONFIG_CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE #define __KCSAN_ACCESS_RW(alt) (KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE) #else @@ -642,8 +647,7 @@ static void test_read_plain_atomic_write(struct kunit *test) }; bool match_expect = false; - if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) - return; + KCSAN_TEST_REQUIRES(test, !IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)); begin_test_checks(test_kernel_read, test_kernel_write_atomic); do { @@ -665,8 +669,7 @@ static void test_read_plain_atomic_rmw(struct kunit *test) }; bool match_expect = false; - if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) - return; + KCSAN_TEST_REQUIRES(test, !IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)); begin_test_checks(test_kernel_read, test_kernel_atomic_rmw); do { From ade3a58b2d40555701143930ead3d44d0b52ca9e Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 9 Aug 2021 13:25:11 +0200 Subject: [PATCH 018/433] kcsan: test: Fix flaky test case If CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n, then we may also see data races between the writers only. If we get unlucky and never capture a read-write data race, but only the write-write data races, then the test_no_value_change* test cases may incorrectly fail. The second problem is that the initial value needs to be reset, as otherwise we might actually observe a value change at the start. Fix it by also looking for the write-write data races, and resetting the value to what will be written. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/kcsan_test.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index d93f226327af..e282c1166373 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -493,17 +493,24 @@ static void test_concurrent_races(struct kunit *test) __no_kcsan static void test_novalue_change(struct kunit *test) { - const struct expect_report expect = { + const struct expect_report expect_rw = { .access = { { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, { test_kernel_read, &test_var, sizeof(test_var), 0 }, }, }; + const struct expect_report expect_ww = { + .access = { + { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + }, + }; bool match_expect = false; + test_kernel_write_nochange(); /* Reset value. */ begin_test_checks(test_kernel_write_nochange, test_kernel_read); do { - match_expect = report_matches(&expect); + match_expect = report_matches(&expect_rw) || report_matches(&expect_ww); } while (!end_test_checks(match_expect)); if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY)) KUNIT_EXPECT_FALSE(test, match_expect); @@ -518,17 +525,24 @@ static void test_novalue_change(struct kunit *test) __no_kcsan static void test_novalue_change_exception(struct kunit *test) { - const struct expect_report expect = { + const struct expect_report expect_rw = { .access = { { test_kernel_write_nochange_rcu, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, { test_kernel_read, &test_var, sizeof(test_var), 0 }, }, }; + const struct expect_report expect_ww = { + .access = { + { test_kernel_write_nochange_rcu, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + { test_kernel_write_nochange_rcu, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, + }, + }; bool match_expect = false; + test_kernel_write_nochange_rcu(); /* Reset value. */ begin_test_checks(test_kernel_write_nochange_rcu, test_kernel_read); do { - match_expect = report_matches(&expect); + match_expect = report_matches(&expect_rw) || report_matches(&expect_ww); } while (!end_test_checks(match_expect)); KUNIT_EXPECT_TRUE(test, match_expect); } From 55a55fec5015b326235873b925a5882ac56ecaa2 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 9 Aug 2021 13:25:12 +0200 Subject: [PATCH 019/433] kcsan: Add ability to pass instruction pointer of access to reporting Add the ability to pass an explicitly set instruction pointer of access from check_access() all the way through to reporting. In preparation of using it in reporting. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 55 +++++++++++++++++++++++-------------------- kernel/kcsan/kcsan.h | 8 +++---- kernel/kcsan/report.c | 20 +++++++++------- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 76e67d1e02d4..bffd1d95addb 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -350,6 +350,7 @@ void kcsan_restore_irqtrace(struct task_struct *task) static noinline void kcsan_found_watchpoint(const volatile void *ptr, size_t size, int type, + unsigned long ip, atomic_long_t *watchpoint, long encoded_watchpoint) { @@ -396,7 +397,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, if (consumed) { kcsan_save_irqtrace(current); - kcsan_report_set_info(ptr, size, type, watchpoint - watchpoints); + kcsan_report_set_info(ptr, size, type, ip, watchpoint - watchpoints); kcsan_restore_irqtrace(current); } else { /* @@ -416,7 +417,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, } static noinline void -kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) +kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned long ip) { const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0; const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0; @@ -568,8 +569,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]); - kcsan_report_known_origin(ptr, size, type, value_change, - watchpoint - watchpoints, + kcsan_report_known_origin(ptr, size, type, ip, + value_change, watchpoint - watchpoints, old, new, access_mask); } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ @@ -578,8 +579,10 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (is_assert) atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]); - if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) - kcsan_report_unknown_origin(ptr, size, type, old, new, access_mask); + if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) { + kcsan_report_unknown_origin(ptr, size, type, ip, + old, new, access_mask); + } } /* @@ -596,8 +599,8 @@ out: user_access_restore(ua_flags); } -static __always_inline void check_access(const volatile void *ptr, size_t size, - int type) +static __always_inline void +check_access(const volatile void *ptr, size_t size, int type, unsigned long ip) { const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0; atomic_long_t *watchpoint; @@ -625,13 +628,12 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, */ if (unlikely(watchpoint != NULL)) - kcsan_found_watchpoint(ptr, size, type, watchpoint, - encoded_watchpoint); + kcsan_found_watchpoint(ptr, size, type, ip, watchpoint, encoded_watchpoint); else { struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */ if (unlikely(should_watch(ptr, size, type, ctx))) - kcsan_setup_watchpoint(ptr, size, type); + kcsan_setup_watchpoint(ptr, size, type, ip); else if (unlikely(ctx->scoped_accesses.prev)) kcsan_check_scoped_accesses(); } @@ -757,7 +759,7 @@ kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, { struct kcsan_ctx *ctx = get_ctx(); - __kcsan_check_access(ptr, size, type); + check_access(ptr, size, type, _RET_IP_); ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */ @@ -802,7 +804,7 @@ EXPORT_SYMBOL(kcsan_end_scoped_access); void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { - check_access(ptr, size, type); + check_access(ptr, size, type, _RET_IP_); } EXPORT_SYMBOL(__kcsan_check_access); @@ -823,7 +825,7 @@ EXPORT_SYMBOL(__kcsan_check_access); void __tsan_read##size(void *ptr); \ void __tsan_read##size(void *ptr) \ { \ - check_access(ptr, size, 0); \ + check_access(ptr, size, 0, _RET_IP_); \ } \ EXPORT_SYMBOL(__tsan_read##size); \ void __tsan_unaligned_read##size(void *ptr) \ @@ -832,7 +834,7 @@ EXPORT_SYMBOL(__kcsan_check_access); void __tsan_write##size(void *ptr); \ void __tsan_write##size(void *ptr) \ { \ - check_access(ptr, size, KCSAN_ACCESS_WRITE); \ + check_access(ptr, size, KCSAN_ACCESS_WRITE, _RET_IP_); \ } \ EXPORT_SYMBOL(__tsan_write##size); \ void __tsan_unaligned_write##size(void *ptr) \ @@ -842,7 +844,8 @@ EXPORT_SYMBOL(__kcsan_check_access); void __tsan_read_write##size(void *ptr) \ { \ check_access(ptr, size, \ - KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE); \ + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE, \ + _RET_IP_); \ } \ EXPORT_SYMBOL(__tsan_read_write##size); \ void __tsan_unaligned_read_write##size(void *ptr) \ @@ -858,14 +861,14 @@ DEFINE_TSAN_READ_WRITE(16); void __tsan_read_range(void *ptr, size_t size); void __tsan_read_range(void *ptr, size_t size) { - check_access(ptr, size, 0); + check_access(ptr, size, 0, _RET_IP_); } EXPORT_SYMBOL(__tsan_read_range); void __tsan_write_range(void *ptr, size_t size); void __tsan_write_range(void *ptr, size_t size) { - check_access(ptr, size, KCSAN_ACCESS_WRITE); + check_access(ptr, size, KCSAN_ACCESS_WRITE, _RET_IP_); } EXPORT_SYMBOL(__tsan_write_range); @@ -886,7 +889,8 @@ EXPORT_SYMBOL(__tsan_write_range); IS_ALIGNED((unsigned long)ptr, size); \ if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic) \ return; \ - check_access(ptr, size, is_atomic ? KCSAN_ACCESS_ATOMIC : 0); \ + check_access(ptr, size, is_atomic ? KCSAN_ACCESS_ATOMIC : 0, \ + _RET_IP_); \ } \ EXPORT_SYMBOL(__tsan_volatile_read##size); \ void __tsan_unaligned_volatile_read##size(void *ptr) \ @@ -901,7 +905,8 @@ EXPORT_SYMBOL(__tsan_write_range); return; \ check_access(ptr, size, \ KCSAN_ACCESS_WRITE | \ - (is_atomic ? KCSAN_ACCESS_ATOMIC : 0)); \ + (is_atomic ? KCSAN_ACCESS_ATOMIC : 0), \ + _RET_IP_); \ } \ EXPORT_SYMBOL(__tsan_volatile_write##size); \ void __tsan_unaligned_volatile_write##size(void *ptr) \ @@ -955,7 +960,7 @@ EXPORT_SYMBOL(__tsan_init); u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder) \ { \ if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ - check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_ATOMIC); \ + check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_ATOMIC, _RET_IP_); \ } \ return __atomic_load_n(ptr, memorder); \ } \ @@ -965,7 +970,7 @@ EXPORT_SYMBOL(__tsan_init); { \ if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ check_access(ptr, bits / BITS_PER_BYTE, \ - KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC, _RET_IP_); \ } \ __atomic_store_n(ptr, v, memorder); \ } \ @@ -978,7 +983,7 @@ EXPORT_SYMBOL(__tsan_init); if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ check_access(ptr, bits / BITS_PER_BYTE, \ KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \ - KCSAN_ACCESS_ATOMIC); \ + KCSAN_ACCESS_ATOMIC, _RET_IP_); \ } \ return __atomic_##op##suffix(ptr, v, memorder); \ } \ @@ -1010,7 +1015,7 @@ EXPORT_SYMBOL(__tsan_init); if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ check_access(ptr, bits / BITS_PER_BYTE, \ KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \ - KCSAN_ACCESS_ATOMIC); \ + KCSAN_ACCESS_ATOMIC, _RET_IP_); \ } \ return __atomic_compare_exchange_n(ptr, exp, val, weak, mo, fail_mo); \ } \ @@ -1025,7 +1030,7 @@ EXPORT_SYMBOL(__tsan_init); if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ check_access(ptr, bits / BITS_PER_BYTE, \ KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \ - KCSAN_ACCESS_ATOMIC); \ + KCSAN_ACCESS_ATOMIC, _RET_IP_); \ } \ __atomic_compare_exchange_n(ptr, &exp, val, 0, mo, fail_mo); \ return exp; \ diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index f36e25c497ed..ae33c2a7f07e 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -121,7 +121,7 @@ enum kcsan_value_change { * to be consumed by the reporting thread. No report is printed yet. */ void kcsan_report_set_info(const volatile void *ptr, size_t size, int access_type, - int watchpoint_idx); + unsigned long ip, int watchpoint_idx); /* * The calling thread observed that the watchpoint it set up was hit and @@ -129,14 +129,14 @@ void kcsan_report_set_info(const volatile void *ptr, size_t size, int access_typ * thread. */ void kcsan_report_known_origin(const volatile void *ptr, size_t size, int access_type, - enum kcsan_value_change value_change, int watchpoint_idx, - u64 old, u64 new, u64 mask); + unsigned long ip, enum kcsan_value_change value_change, + int watchpoint_idx, u64 old, u64 new, u64 mask); /* * No other thread was observed to race with the access, but the data value * before and after the stall differs. Reports a race of "unknown origin". */ void kcsan_report_unknown_origin(const volatile void *ptr, size_t size, int access_type, - u64 old, u64 new, u64 mask); + unsigned long ip, u64 old, u64 new, u64 mask); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 21137929d428..50c4119f5cc0 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -31,6 +31,7 @@ struct access_info { int access_type; int task_pid; int cpu_id; + unsigned long ip; }; /* @@ -576,21 +577,22 @@ discard: } static struct access_info prepare_access_info(const volatile void *ptr, size_t size, - int access_type) + int access_type, unsigned long ip) { return (struct access_info) { .ptr = ptr, .size = size, .access_type = access_type, .task_pid = in_task() ? task_pid_nr(current) : -1, - .cpu_id = raw_smp_processor_id() + .cpu_id = raw_smp_processor_id(), + .ip = ip, }; } void kcsan_report_set_info(const volatile void *ptr, size_t size, int access_type, - int watchpoint_idx) + unsigned long ip, int watchpoint_idx) { - const struct access_info ai = prepare_access_info(ptr, size, access_type); + const struct access_info ai = prepare_access_info(ptr, size, access_type, ip); unsigned long flags; kcsan_disable_current(); @@ -603,10 +605,10 @@ void kcsan_report_set_info(const volatile void *ptr, size_t size, int access_typ } void kcsan_report_known_origin(const volatile void *ptr, size_t size, int access_type, - enum kcsan_value_change value_change, int watchpoint_idx, - u64 old, u64 new, u64 mask) + unsigned long ip, enum kcsan_value_change value_change, + int watchpoint_idx, u64 old, u64 new, u64 mask) { - const struct access_info ai = prepare_access_info(ptr, size, access_type); + const struct access_info ai = prepare_access_info(ptr, size, access_type, ip); struct other_info *other_info = &other_infos[watchpoint_idx]; unsigned long flags = 0; @@ -637,9 +639,9 @@ out: } void kcsan_report_unknown_origin(const volatile void *ptr, size_t size, int access_type, - u64 old, u64 new, u64 mask) + unsigned long ip, u64 old, u64 new, u64 mask) { - const struct access_info ai = prepare_access_info(ptr, size, access_type); + const struct access_info ai = prepare_access_info(ptr, size, access_type, ip); unsigned long flags; kcsan_disable_current(); From f4c87dbbef2638f6da6e29b5e998e3b1dcdb08ee Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 9 Aug 2021 13:25:13 +0200 Subject: [PATCH 020/433] kcsan: Save instruction pointer for scoped accesses Save the instruction pointer for scoped accesses, so that it becomes possible for the reporting code to construct more accurate stack traces that will show the start of the scope. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 3 +++ kernel/kcsan/core.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 9fd0ad80fef6..5f5965246877 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -100,9 +100,12 @@ void kcsan_set_access_mask(unsigned long mask); /* Scoped access information. */ struct kcsan_scoped_access { struct list_head list; + /* Access information. */ const volatile void *ptr; size_t size; int type; + /* Location where scoped access was set up. */ + unsigned long ip; }; /* * Automatically call kcsan_end_scoped_access() when kcsan_scoped_access goes diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index bffd1d95addb..8b20af541776 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -202,6 +202,9 @@ static __always_inline struct kcsan_ctx *get_ctx(void) return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); } +static __always_inline void +check_access(const volatile void *ptr, size_t size, int type, unsigned long ip); + /* Check scoped accesses; never inline because this is a slow-path! */ static noinline void kcsan_check_scoped_accesses(void) { @@ -210,8 +213,10 @@ static noinline void kcsan_check_scoped_accesses(void) struct kcsan_scoped_access *scoped_access; ctx->scoped_accesses.prev = NULL; /* Avoid recursion. */ - list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) - __kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type); + list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) { + check_access(scoped_access->ptr, scoped_access->size, + scoped_access->type, scoped_access->ip); + } ctx->scoped_accesses.prev = prev_save; } @@ -767,6 +772,7 @@ kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, sa->ptr = ptr; sa->size = size; sa->type = type; + sa->ip = _RET_IP_; if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */ INIT_LIST_HEAD(&ctx->scoped_accesses); @@ -798,7 +804,7 @@ void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) ctx->disable_count--; - __kcsan_check_access(sa->ptr, sa->size, sa->type); + check_access(sa->ptr, sa->size, sa->type, sa->ip); } EXPORT_SYMBOL(kcsan_end_scoped_access); From 6c65eb75686fc2068c926a73c9c3631b5f0e4c9c Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 9 Aug 2021 13:25:14 +0200 Subject: [PATCH 021/433] kcsan: Start stack trace with explicit location if provided If an explicit access address is set, as is done for scoped accesses, always start the stack trace from that location. get_stack_skipnr() is changed into sanitize_stack_entries(), which if given an address, scans the stack trace for a matching function and then replaces that entry with the explicitly provided address. The previous reports for scoped accesses were all over the place, which could be quite confusing. We now always point at the start of the scope. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/kcsan_test.c | 19 ++++++++------ kernel/kcsan/report.c | 55 +++++++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 13 deletions(-) diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index e282c1166373..a3b12429e1d3 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -338,7 +338,10 @@ static noinline void test_kernel_assert_bits_nochange(void) ASSERT_EXCLUSIVE_BITS(test_var, ~TEST_CHANGE_BITS); } -/* To check that scoped assertions do trigger anywhere in scope. */ +/* + * Scoped assertions do trigger anywhere in scope. However, the report should + * still only point at the start of the scope. + */ static noinline void test_enter_scope(void) { int x = 0; @@ -845,22 +848,22 @@ static void test_assert_exclusive_writer_scoped(struct kunit *test) { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, }, }; - const struct expect_report expect_anywhere = { + const struct expect_report expect_inscope = { .access = { { test_enter_scope, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_SCOPED }, { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE }, }, }; bool match_expect_start = false; - bool match_expect_anywhere = false; + bool match_expect_inscope = false; begin_test_checks(test_kernel_assert_writer_scoped, test_kernel_write_nochange); do { match_expect_start |= report_matches(&expect_start); - match_expect_anywhere |= report_matches(&expect_anywhere); - } while (!end_test_checks(match_expect_start && match_expect_anywhere)); + match_expect_inscope |= report_matches(&expect_inscope); + } while (!end_test_checks(match_expect_inscope)); KUNIT_EXPECT_TRUE(test, match_expect_start); - KUNIT_EXPECT_TRUE(test, match_expect_anywhere); + KUNIT_EXPECT_FALSE(test, match_expect_inscope); } __no_kcsan @@ -889,9 +892,9 @@ static void test_assert_exclusive_access_scoped(struct kunit *test) do { match_expect_start |= report_matches(&expect_start1) || report_matches(&expect_start2); match_expect_inscope |= report_matches(&expect_inscope); - } while (!end_test_checks(match_expect_start && match_expect_inscope)); + } while (!end_test_checks(match_expect_inscope)); KUNIT_EXPECT_TRUE(test, match_expect_start); - KUNIT_EXPECT_TRUE(test, match_expect_inscope); + KUNIT_EXPECT_FALSE(test, match_expect_inscope); } /* diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 50c4119f5cc0..4849cde9db9b 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -301,6 +302,48 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries return skip; } +/* + * Skips to the first entry that matches the function of @ip, and then replaces + * that entry with @ip, returning the entries to skip. + */ +static int +replace_stack_entry(unsigned long stack_entries[], int num_entries, unsigned long ip) +{ + unsigned long symbolsize, offset; + unsigned long target_func; + int skip; + + if (kallsyms_lookup_size_offset(ip, &symbolsize, &offset)) + target_func = ip - offset; + else + goto fallback; + + for (skip = 0; skip < num_entries; ++skip) { + unsigned long func = stack_entries[skip]; + + if (!kallsyms_lookup_size_offset(func, &symbolsize, &offset)) + goto fallback; + func -= offset; + + if (func == target_func) { + stack_entries[skip] = ip; + return skip; + } + } + +fallback: + /* Should not happen; the resulting stack trace is likely misleading. */ + WARN_ONCE(1, "Cannot find frame for %pS in stack trace", (void *)ip); + return get_stack_skipnr(stack_entries, num_entries); +} + +static int +sanitize_stack_entries(unsigned long stack_entries[], int num_entries, unsigned long ip) +{ + return ip ? replace_stack_entry(stack_entries, num_entries, ip) : + get_stack_skipnr(stack_entries, num_entries); +} + /* Compares symbolized strings of addr1 and addr2. */ static int sym_strcmp(void *addr1, void *addr2) { @@ -328,12 +371,12 @@ static void print_verbose_info(struct task_struct *task) static void print_report(enum kcsan_value_change value_change, const struct access_info *ai, - const struct other_info *other_info, + struct other_info *other_info, u64 old, u64 new, u64 mask) { unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); - int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); + int skipnr = sanitize_stack_entries(stack_entries, num_stack_entries, ai->ip); unsigned long this_frame = stack_entries[skipnr]; unsigned long other_frame = 0; int other_skipnr = 0; /* silence uninit warnings */ @@ -345,8 +388,9 @@ static void print_report(enum kcsan_value_change value_change, return; if (other_info) { - other_skipnr = get_stack_skipnr(other_info->stack_entries, - other_info->num_stack_entries); + other_skipnr = sanitize_stack_entries(other_info->stack_entries, + other_info->num_stack_entries, + other_info->ai.ip); other_frame = other_info->stack_entries[other_skipnr]; /* @value_change is only known for the other thread */ @@ -585,7 +629,8 @@ static struct access_info prepare_access_info(const volatile void *ptr, size_t s .access_type = access_type, .task_pid = in_task() ? task_pid_nr(current) : -1, .cpu_id = raw_smp_processor_id(), - .ip = ip, + /* Only replace stack entry with @ip if scoped access. */ + .ip = (access_type & KCSAN_ACCESS_SCOPED) ? ip : 0, }; } From d627c537c2585875bba071bbfa7cda20328f982b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 9 Aug 2021 13:25:15 +0200 Subject: [PATCH 022/433] kcsan: Support reporting scoped read-write access type Support generating the string representation of scoped read-write accesses for completeness. They will become required in planned changes. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/kcsan_test.c | 8 +++++--- kernel/kcsan/report.c | 4 ++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index a3b12429e1d3..660729238588 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -210,10 +210,12 @@ static bool report_matches(const struct expect_report *r) "read-write" : "write") : "read"); + const bool is_atomic = (ty & KCSAN_ACCESS_ATOMIC); + const bool is_scoped = (ty & KCSAN_ACCESS_SCOPED); const char *const access_type_aux = - (ty & KCSAN_ACCESS_ATOMIC) ? - " (marked)" : - ((ty & KCSAN_ACCESS_SCOPED) ? " (scoped)" : ""); + (is_atomic && is_scoped) ? " (marked, scoped)" + : (is_atomic ? " (marked)" + : (is_scoped ? " (scoped)" : "")); if (i == 1) { /* Access 2 */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 4849cde9db9b..fc15077991c4 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -247,6 +247,10 @@ static const char *get_access_type(int type) return "write (scoped)"; case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: return "write (marked, scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE: + return "read-write (scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: + return "read-write (marked, scoped)"; default: BUG(); } From 78c3d954e2b3c323d6ba0a7294a490fef43efc57 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 9 Aug 2021 13:25:16 +0200 Subject: [PATCH 023/433] kcsan: Move ctx to start of argument list It is clearer if ctx is at the start of the function argument list; it'll be more consistent when adding functions with varying arguments but all requiring ctx. No functional change intended. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 8b20af541776..4b84c8e7884b 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -222,7 +222,7 @@ static noinline void kcsan_check_scoped_accesses(void) /* Rules for generic atomic accesses. Called from fast-path. */ static __always_inline bool -is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) +is_atomic(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, int type) { if (type & KCSAN_ACCESS_ATOMIC) return true; @@ -259,7 +259,7 @@ is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx } static __always_inline bool -should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) +should_watch(struct kcsan_ctx *ctx, const volatile void *ptr, size_t size, int type) { /* * Never set up watchpoints when memory operations are atomic. @@ -268,7 +268,7 @@ should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx * * should not count towards skipped instructions, and (2) to actually * decrement kcsan_atomic_next for consecutive instruction stream. */ - if (is_atomic(ptr, size, type, ctx)) + if (is_atomic(ctx, ptr, size, type)) return false; if (this_cpu_dec_return(kcsan_skip) >= 0) @@ -637,7 +637,7 @@ check_access(const volatile void *ptr, size_t size, int type, unsigned long ip) else { struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */ - if (unlikely(should_watch(ptr, size, type, ctx))) + if (unlikely(should_watch(ctx, ptr, size, type))) kcsan_setup_watchpoint(ptr, size, type, ip); else if (unlikely(ctx->scoped_accesses.prev)) kcsan_check_scoped_accesses(); From ac20e39e8d254da3f82b5ed2afc7bb1e804d32c9 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 13 Aug 2021 10:10:55 +0200 Subject: [PATCH 024/433] kcsan: selftest: Cleanup and add missing __init Make test_encode_decode() more readable and add missing __init. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/selftest.c | 70 +++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c index 7f29cb0f5e63..b4295a3892b7 100644 --- a/kernel/kcsan/selftest.c +++ b/kernel/kcsan/selftest.c @@ -18,7 +18,7 @@ #define ITERS_PER_TEST 2000 /* Test requirements. */ -static bool test_requires(void) +static bool __init test_requires(void) { /* random should be initialized for the below tests */ return prandom_u32() + prandom_u32() != 0; @@ -28,14 +28,18 @@ static bool test_requires(void) * Test watchpoint encode and decode: check that encoding some access's info, * and then subsequent decode preserves the access's info. */ -static bool test_encode_decode(void) +static bool __init test_encode_decode(void) { int i; for (i = 0; i < ITERS_PER_TEST; ++i) { size_t size = prandom_u32_max(MAX_ENCODABLE_SIZE) + 1; bool is_write = !!prandom_u32_max(2); + unsigned long verif_masked_addr; + long encoded_watchpoint; + bool verif_is_write; unsigned long addr; + size_t verif_size; prandom_bytes(&addr, sizeof(addr)); if (addr < PAGE_SIZE) @@ -44,53 +48,37 @@ static bool test_encode_decode(void) if (WARN_ON(!check_encodable(addr, size))) return false; - /* Encode and decode */ - { - const long encoded_watchpoint = - encode_watchpoint(addr, size, is_write); - unsigned long verif_masked_addr; - size_t verif_size; - bool verif_is_write; + encoded_watchpoint = encode_watchpoint(addr, size, is_write); - /* Check special watchpoints */ - if (WARN_ON(decode_watchpoint( - INVALID_WATCHPOINT, &verif_masked_addr, - &verif_size, &verif_is_write))) - return false; - if (WARN_ON(decode_watchpoint( - CONSUMED_WATCHPOINT, &verif_masked_addr, - &verif_size, &verif_is_write))) - return false; - - /* Check decoding watchpoint returns same data */ - if (WARN_ON(!decode_watchpoint( - encoded_watchpoint, &verif_masked_addr, - &verif_size, &verif_is_write))) - return false; - if (WARN_ON(verif_masked_addr != - (addr & WATCHPOINT_ADDR_MASK))) - goto fail; - if (WARN_ON(verif_size != size)) - goto fail; - if (WARN_ON(is_write != verif_is_write)) - goto fail; - - continue; -fail: - pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n", - __func__, is_write ? "write" : "read", size, - addr, encoded_watchpoint, - verif_is_write ? "write" : "read", verif_size, - verif_masked_addr); + /* Check special watchpoints */ + if (WARN_ON(decode_watchpoint(INVALID_WATCHPOINT, &verif_masked_addr, &verif_size, &verif_is_write))) return false; - } + if (WARN_ON(decode_watchpoint(CONSUMED_WATCHPOINT, &verif_masked_addr, &verif_size, &verif_is_write))) + return false; + + /* Check decoding watchpoint returns same data */ + if (WARN_ON(!decode_watchpoint(encoded_watchpoint, &verif_masked_addr, &verif_size, &verif_is_write))) + return false; + if (WARN_ON(verif_masked_addr != (addr & WATCHPOINT_ADDR_MASK))) + goto fail; + if (WARN_ON(verif_size != size)) + goto fail; + if (WARN_ON(is_write != verif_is_write)) + goto fail; + + continue; +fail: + pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n", + __func__, is_write ? "write" : "read", size, addr, encoded_watchpoint, + verif_is_write ? "write" : "read", verif_size, verif_masked_addr); + return false; } return true; } /* Test access matching function. */ -static bool test_matching_access(void) +static bool __init test_matching_access(void) { if (WARN_ON(!matching_access(10, 1, 10, 1))) return false; From cef6f5cc140852fcb6c75f85b8e6ba00d7de1bad Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sat, 11 Sep 2021 15:55:27 -0700 Subject: [PATCH 025/433] Input: omap-keypad - prefer struct_size over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kzalloc() function. [1] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Link: https://lore.kernel.org/r/20210911112716.10067-1-len.baker@gmx.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/omap-keypad.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c index dbe836c7ff47..eb3a687796e7 100644 --- a/drivers/input/keyboard/omap-keypad.c +++ b/drivers/input/keyboard/omap-keypad.c @@ -190,8 +190,7 @@ static int omap_kp_probe(struct platform_device *pdev) row_shift = get_count_order(pdata->cols); keycodemax = pdata->rows << row_shift; - omap_kp = kzalloc(sizeof(struct omap_kp) + - keycodemax * sizeof(unsigned short), GFP_KERNEL); + omap_kp = kzalloc(struct_size(omap_kp, keymap, keycodemax), GFP_KERNEL); input_dev = input_allocate_device(); if (!omap_kp || !input_dev) { kfree(omap_kp); From f1c80ba0cc8e7ae015a4b4828564e22f0b583ad5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 20 Sep 2021 18:15:07 -0700 Subject: [PATCH 026/433] Input: tmdc - fix spelling mistake "Millenium" -> "Millennium" There is a spelling mistake in the name of a joystick. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210920184748.18519-1-colin.king@canonical.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/tmdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/tmdc.c b/drivers/input/joystick/tmdc.c index f89e9aa6d328..7416de84b955 100644 --- a/drivers/input/joystick/tmdc.c +++ b/drivers/input/joystick/tmdc.c @@ -83,7 +83,7 @@ static const struct tmdc_model { const signed char *axes; const short *buttons; } tmdc_models[] = { - { 1, "ThrustMaster Millenium 3D Inceptor", 6, 2, { 4, 2 }, { 4, 6 }, tmdc_abs, tmdc_btn_joy }, + { 1, "ThrustMaster Millennium 3D Inceptor", 6, 2, { 4, 2 }, { 4, 6 }, tmdc_abs, tmdc_btn_joy }, { 3, "ThrustMaster Rage 3D Gamepad", 2, 0, { 8, 2 }, { 0, 0 }, tmdc_abs, tmdc_btn_pad }, { 4, "ThrustMaster Attack Throttle", 5, 2, { 4, 6 }, { 4, 2 }, tmdc_abs_at, tmdc_btn_at }, { 8, "ThrustMaster FragMaster", 4, 0, { 8, 2 }, { 0, 0 }, tmdc_abs_fm, tmdc_btn_fm }, From 31ae0102a34ed863c7d32b10e768036324991679 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Sep 2021 21:08:02 -0700 Subject: [PATCH 027/433] Input: goodix - change goodix_i2c_write() len parameter type to int Change the type of the goodix_i2c_write() len parameter to from 'unsigned' to 'int' to avoid bare use of 'unsigned', changing it to 'int' makes goodix_i2c_write()' prototype consistent with goodix_i2c_read(). Reviewed-by: Bastien Nocera Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210920150643.155872-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index c682b028f0a2..00536064f45e 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -290,7 +290,7 @@ static int goodix_i2c_read(struct i2c_client *client, * @len: length of the buffer to write */ static int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, - unsigned len) + int len) { u8 *addr_buf; struct i2c_msg msg; From a2233cb7b65a017067e2f2703375ecc930a0ab30 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Sep 2021 21:08:18 -0700 Subject: [PATCH 028/433] Input: goodix - add a goodix.h header file Add a goodix.h header file, and move the register definitions, and struct declarations there and add prototypes for various helper functions. This is a preparation patch for adding support for controllers without flash, which need to have their firmware uploaded and need some other special handling too. Since MAINTAINERS needs updating because of this change anyways, also add myself as co-maintainer. Reviewed-by: Bastien Nocera Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210920150643.155872-3-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 3 +- drivers/input/touchscreen/goodix.c | 74 +++--------------------------- drivers/input/touchscreen/goodix.h | 73 +++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 69 deletions(-) create mode 100644 drivers/input/touchscreen/goodix.h diff --git a/MAINTAINERS b/MAINTAINERS index 44e6c428703e..7ab4a058fe4d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7716,9 +7716,10 @@ F: drivers/media/usb/go7007/ GOODIX TOUCHSCREEN M: Bastien Nocera +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained -F: drivers/input/touchscreen/goodix.c +F: drivers/input/touchscreen/goodix* GOOGLE ETHERNET DRIVERS M: Catherine Sullivan diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 00536064f45e..76e93bc8389a 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -14,20 +14,15 @@ #include #include #include -#include -#include -#include -#include -#include #include #include #include #include -#include #include #include #include #include +#include "goodix.h" #define GOODIX_GPIO_INT_NAME "irq" #define GOODIX_GPIO_RST_NAME "reset" @@ -38,22 +33,11 @@ #define GOODIX_CONTACT_SIZE 8 #define GOODIX_MAX_CONTACT_SIZE 9 #define GOODIX_MAX_CONTACTS 10 -#define GOODIX_MAX_KEYS 7 #define GOODIX_CONFIG_MIN_LENGTH 186 #define GOODIX_CONFIG_911_LENGTH 186 #define GOODIX_CONFIG_967_LENGTH 228 #define GOODIX_CONFIG_GT9X_LENGTH 240 -#define GOODIX_CONFIG_MAX_LENGTH 240 - -/* Register defines */ -#define GOODIX_REG_COMMAND 0x8040 -#define GOODIX_CMD_SCREEN_OFF 0x05 - -#define GOODIX_READ_COOR_ADDR 0x814E -#define GOODIX_GT1X_REG_CONFIG_DATA 0x8050 -#define GOODIX_GT9X_REG_CONFIG_DATA 0x8047 -#define GOODIX_REG_ID 0x8140 #define GOODIX_BUFFER_STATUS_READY BIT(7) #define GOODIX_HAVE_KEY BIT(4) @@ -68,55 +52,11 @@ #define ACPI_GPIO_SUPPORT #endif -struct goodix_ts_data; - -enum goodix_irq_pin_access_method { - IRQ_PIN_ACCESS_NONE, - IRQ_PIN_ACCESS_GPIO, - IRQ_PIN_ACCESS_ACPI_GPIO, - IRQ_PIN_ACCESS_ACPI_METHOD, -}; - -struct goodix_chip_data { - u16 config_addr; - int config_len; - int (*check_config)(struct goodix_ts_data *ts, const u8 *cfg, int len); - void (*calc_config_checksum)(struct goodix_ts_data *ts); -}; - struct goodix_chip_id { const char *id; const struct goodix_chip_data *data; }; -#define GOODIX_ID_MAX_LEN 4 - -struct goodix_ts_data { - struct i2c_client *client; - struct input_dev *input_dev; - const struct goodix_chip_data *chip; - struct touchscreen_properties prop; - unsigned int max_touch_num; - unsigned int int_trigger_type; - struct regulator *avdd28; - struct regulator *vddio; - struct gpio_desc *gpiod_int; - struct gpio_desc *gpiod_rst; - int gpio_count; - int gpio_int_idx; - char id[GOODIX_ID_MAX_LEN + 1]; - u16 version; - const char *cfg_name; - bool reset_controller_at_probe; - bool load_cfg_from_disk; - struct completion firmware_loading_complete; - unsigned long irq_flags; - enum goodix_irq_pin_access_method irq_pin_access_method; - unsigned int contact_size; - u8 config[GOODIX_CONFIG_MAX_LENGTH]; - unsigned short keymap[GOODIX_MAX_KEYS]; -}; - static int goodix_check_cfg_8(struct goodix_ts_data *ts, const u8 *cfg, int len); static int goodix_check_cfg_16(struct goodix_ts_data *ts, @@ -260,8 +200,7 @@ static const struct dmi_system_id inverted_x_screen[] = { * @buf: raw write data buffer. * @len: length of the buffer to write */ -static int goodix_i2c_read(struct i2c_client *client, - u16 reg, u8 *buf, int len) +int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len) { struct i2c_msg msgs[2]; __be16 wbuf = cpu_to_be16(reg); @@ -289,8 +228,7 @@ static int goodix_i2c_read(struct i2c_client *client, * @buf: raw data buffer to write. * @len: length of the buffer to write */ -static int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, - int len) +int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len) { u8 *addr_buf; struct i2c_msg msg; @@ -314,7 +252,7 @@ static int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, return ret < 0 ? ret : (ret != 1 ? -EIO : 0); } -static int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value) +int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value) { return goodix_i2c_write(client, reg, &value, sizeof(value)); } @@ -598,7 +536,7 @@ static int goodix_check_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len) * @cfg: config firmware to write to device * @len: config data length */ -static int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len) +int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len) { int error; @@ -696,7 +634,7 @@ static int goodix_irq_direction_input(struct goodix_ts_data *ts) return -EINVAL; /* Never reached */ } -static int goodix_int_sync(struct goodix_ts_data *ts) +int goodix_int_sync(struct goodix_ts_data *ts) { int error; diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h new file mode 100644 index 000000000000..cdaced4f2980 --- /dev/null +++ b/drivers/input/touchscreen/goodix.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __GOODIX_H__ +#define __GOODIX_H__ + +#include +#include +#include +#include +#include +#include + +/* Register defines */ +#define GOODIX_REG_COMMAND 0x8040 +#define GOODIX_CMD_SCREEN_OFF 0x05 + +#define GOODIX_GT1X_REG_CONFIG_DATA 0x8050 +#define GOODIX_GT9X_REG_CONFIG_DATA 0x8047 +#define GOODIX_REG_ID 0x8140 +#define GOODIX_READ_COOR_ADDR 0x814E + +#define GOODIX_ID_MAX_LEN 4 +#define GOODIX_CONFIG_MAX_LENGTH 240 +#define GOODIX_MAX_KEYS 7 + +enum goodix_irq_pin_access_method { + IRQ_PIN_ACCESS_NONE, + IRQ_PIN_ACCESS_GPIO, + IRQ_PIN_ACCESS_ACPI_GPIO, + IRQ_PIN_ACCESS_ACPI_METHOD, +}; + +struct goodix_ts_data; + +struct goodix_chip_data { + u16 config_addr; + int config_len; + int (*check_config)(struct goodix_ts_data *ts, const u8 *cfg, int len); + void (*calc_config_checksum)(struct goodix_ts_data *ts); +}; + +struct goodix_ts_data { + struct i2c_client *client; + struct input_dev *input_dev; + const struct goodix_chip_data *chip; + struct touchscreen_properties prop; + unsigned int max_touch_num; + unsigned int int_trigger_type; + struct regulator *avdd28; + struct regulator *vddio; + struct gpio_desc *gpiod_int; + struct gpio_desc *gpiod_rst; + int gpio_count; + int gpio_int_idx; + char id[GOODIX_ID_MAX_LEN + 1]; + u16 version; + const char *cfg_name; + bool reset_controller_at_probe; + bool load_cfg_from_disk; + struct completion firmware_loading_complete; + unsigned long irq_flags; + enum goodix_irq_pin_access_method irq_pin_access_method; + unsigned int contact_size; + u8 config[GOODIX_CONFIG_MAX_LENGTH]; + unsigned short keymap[GOODIX_MAX_KEYS]; +}; + +int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len); +int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len); +int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value); +int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len); +int goodix_int_sync(struct goodix_ts_data *ts); + +#endif From 209bda4741f68f102cf2f272227bfc938e387b51 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Sep 2021 21:08:26 -0700 Subject: [PATCH 029/433] Input: goodix - refactor reset handling Refactor reset handling a bit, change the main reset handler into a new goodix_reset_no_int_sync() helper and add a goodix_reset() wrapper which calls goodix_int_sync() separately. Also push the dev_err() call on reset failure into the goodix_reset_no_int_sync() and goodix_int_sync() functions, so that we don't need to have separate dev_err() calls in all their callers. This is a preparation patch for adding support for controllers without flash, which need to have their firmware uploaded and need some other special handling too. Reviewed-by: Bastien Nocera Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210920150643.155872-4-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 86 ++++++++++++++++++------------ drivers/input/touchscreen/goodix.h | 1 + 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 76e93bc8389a..e5ec64036f06 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -640,15 +640,60 @@ int goodix_int_sync(struct goodix_ts_data *ts) error = goodix_irq_direction_output(ts, 0); if (error) - return error; + goto error; msleep(50); /* T5: 50ms */ error = goodix_irq_direction_input(ts); if (error) - return error; + goto error; return 0; + +error: + dev_err(&ts->client->dev, "Controller irq sync failed.\n"); + return error; +} + +/** + * goodix_reset_no_int_sync - Reset device, leaving interrupt line in output mode + * + * @ts: goodix_ts_data pointer + */ +int goodix_reset_no_int_sync(struct goodix_ts_data *ts) +{ + int error; + + /* begin select I2C slave addr */ + error = gpiod_direction_output(ts->gpiod_rst, 0); + if (error) + goto error; + + msleep(20); /* T2: > 10ms */ + + /* HIGH: 0x28/0x29, LOW: 0xBA/0xBB */ + error = goodix_irq_direction_output(ts, ts->client->addr == 0x14); + if (error) + goto error; + + usleep_range(100, 2000); /* T3: > 100us */ + + error = gpiod_direction_output(ts->gpiod_rst, 1); + if (error) + goto error; + + usleep_range(6000, 10000); /* T4: > 5ms */ + + /* end select I2C slave addr */ + error = gpiod_direction_input(ts->gpiod_rst); + if (error) + goto error; + + return 0; + +error: + dev_err(&ts->client->dev, "Controller reset failed.\n"); + return error; } /** @@ -660,36 +705,11 @@ static int goodix_reset(struct goodix_ts_data *ts) { int error; - /* begin select I2C slave addr */ - error = gpiod_direction_output(ts->gpiod_rst, 0); + error = goodix_reset_no_int_sync(ts); if (error) return error; - msleep(20); /* T2: > 10ms */ - - /* HIGH: 0x28/0x29, LOW: 0xBA/0xBB */ - error = goodix_irq_direction_output(ts, ts->client->addr == 0x14); - if (error) - return error; - - usleep_range(100, 2000); /* T3: > 100us */ - - error = gpiod_direction_output(ts->gpiod_rst, 1); - if (error) - return error; - - usleep_range(6000, 10000); /* T4: > 5ms */ - - /* end select I2C slave addr */ - error = gpiod_direction_input(ts->gpiod_rst); - if (error) - return error; - - error = goodix_int_sync(ts); - if (error) - return error; - - return 0; + return goodix_int_sync(ts); } #ifdef ACPI_GPIO_SUPPORT @@ -1195,10 +1215,8 @@ reset: if (ts->reset_controller_at_probe) { /* reset the controller */ error = goodix_reset(ts); - if (error) { - dev_err(&client->dev, "Controller reset failed.\n"); + if (error) return error; - } } error = goodix_i2c_test(client); @@ -1340,10 +1358,8 @@ static int __maybe_unused goodix_resume(struct device *dev) if (error != 0 || config_ver != ts->config[0]) { error = goodix_reset(ts); - if (error) { - dev_err(dev, "Controller reset failed.\n"); + if (error) return error; - } error = goodix_send_cfg(ts, ts->config, ts->chip->config_len); if (error) diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h index cdaced4f2980..0b88554ba2ae 100644 --- a/drivers/input/touchscreen/goodix.h +++ b/drivers/input/touchscreen/goodix.h @@ -69,5 +69,6 @@ int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len) int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value); int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len); int goodix_int_sync(struct goodix_ts_data *ts); +int goodix_reset_no_int_sync(struct goodix_ts_data *ts); #endif From 7642f29c731e383623d368a234a99ff9fb2eb97b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Sep 2021 21:08:35 -0700 Subject: [PATCH 030/433] Input: goodix - push error logging up into i2c_read and i2c_write helpers Make the goodix_i2c_read() and goodix_i2c_write*() helpers log errors themselves. This allows removing all the error logging from their callers. This already results in a nice cleanup with the current code and it also helps to make the upcoming support for controllers without flash cleaner. Reviewed-by: Bastien Nocera Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210920150643.155872-5-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 53 +++++++++++++----------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index e5ec64036f06..f91a92c998a2 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -217,7 +217,13 @@ int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len) msgs[1].buf = buf; ret = i2c_transfer(client->adapter, msgs, 2); - return ret < 0 ? ret : (ret != ARRAY_SIZE(msgs) ? -EIO : 0); + if (ret >= 0) + ret = (ret == ARRAY_SIZE(msgs) ? 0 : -EIO); + + if (ret) + dev_err(&client->dev, "Error reading %d bytes from 0x%04x: %d\n", + len, reg, ret); + return ret; } /** @@ -248,8 +254,15 @@ int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len) msg.len = len + 2; ret = i2c_transfer(client->adapter, &msg, 1); + if (ret >= 0) + ret = (ret == 1 ? 0 : -EIO); + kfree(addr_buf); - return ret < 0 ? ret : (ret != 1 ? -EIO : 0); + + if (ret) + dev_err(&client->dev, "Error writing %d bytes to 0x%04x: %d\n", + len, reg, ret); + return ret; } int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value) @@ -291,11 +304,8 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) do { error = goodix_i2c_read(ts->client, addr, data, header_contact_keycode_size); - if (error) { - dev_err(&ts->client->dev, "I2C transfer error: %d\n", - error); + if (error) return error; - } if (data[0] & GOODIX_BUFFER_STATUS_READY) { touch_num = data[0] & 0x0f; @@ -418,9 +428,7 @@ static irqreturn_t goodix_ts_irq_handler(int irq, void *dev_id) struct goodix_ts_data *ts = dev_id; goodix_process_events(ts); - - if (goodix_i2c_write_u8(ts->client, GOODIX_READ_COOR_ADDR, 0) < 0) - dev_err(&ts->client->dev, "I2C write end_cmd error\n"); + goodix_i2c_write_u8(ts->client, GOODIX_READ_COOR_ADDR, 0); return IRQ_HANDLED; } @@ -545,11 +553,9 @@ int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len) return error; error = goodix_i2c_write(ts->client, ts->chip->config_addr, cfg, len); - if (error) { - dev_err(&ts->client->dev, "Failed to write config data: %d", - error); + if (error) return error; - } + dev_dbg(&ts->client->dev, "Config sent successfully."); /* Let the firmware reconfigure itself, so sleep for 10ms */ @@ -937,8 +943,6 @@ static void goodix_read_config(struct goodix_ts_data *ts) error = goodix_i2c_read(ts->client, ts->chip->config_addr, ts->config, ts->chip->config_len); if (error) { - dev_warn(&ts->client->dev, "Error reading config: %d\n", - error); ts->int_trigger_type = GOODIX_INT_TRIGGER; ts->max_touch_num = GOODIX_MAX_CONTACTS; return; @@ -969,10 +973,8 @@ static int goodix_read_version(struct goodix_ts_data *ts) char id_str[GOODIX_ID_MAX_LEN + 1]; error = goodix_i2c_read(ts->client, GOODIX_REG_ID, buf, sizeof(buf)); - if (error) { - dev_err(&ts->client->dev, "read version failed: %d\n", error); + if (error) return error; - } memcpy(id_str, buf, GOODIX_ID_MAX_LEN); id_str[GOODIX_ID_MAX_LEN] = 0; @@ -998,13 +1000,10 @@ static int goodix_i2c_test(struct i2c_client *client) u8 test; while (retry++ < 2) { - error = goodix_i2c_read(client, GOODIX_REG_ID, - &test, 1); + error = goodix_i2c_read(client, GOODIX_REG_ID, &test, 1); if (!error) return 0; - dev_err(&client->dev, "i2c test failed attempt %d: %d\n", - retry, error); msleep(20); } @@ -1232,10 +1231,8 @@ reset: } error = goodix_read_version(ts); - if (error) { - dev_err(&client->dev, "Read version failed.\n"); + if (error) return error; - } ts->chip = goodix_get_chip_data(ts->id); @@ -1306,7 +1303,6 @@ static int __maybe_unused goodix_suspend(struct device *dev) error = goodix_i2c_write_u8(ts->client, GOODIX_REG_COMMAND, GOODIX_CMD_SCREEN_OFF); if (error) { - dev_err(&ts->client->dev, "Screen off command failed\n"); goodix_irq_direction_input(ts); goodix_request_irq(ts); return -EAGAIN; @@ -1349,10 +1345,7 @@ static int __maybe_unused goodix_resume(struct device *dev) error = goodix_i2c_read(ts->client, ts->chip->config_addr, &config_ver, 1); - if (error) - dev_warn(dev, "Error reading config version: %d, resetting controller\n", - error); - else if (config_ver != ts->config[0]) + if (!error && config_ver != ts->config[0]) dev_info(dev, "Config version mismatch %d != %d, resetting controller\n", config_ver, ts->config[0]); From 20e317222eeabd74e9ff76cf4daf85f961f608dc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Sep 2021 21:08:44 -0700 Subject: [PATCH 031/433] Input: goodix - allow specifying the config filename The config which needs to be send to the controller on some device-models is model-specific. Allow specifying a model-specific filename through a device-property, rather then always using a fixed filename. Note the "goodix,config-name" device-property used by this is *not* documented in the Documentation/devicetree/bindings/input/touchscreen/goodix.yaml device-tree bindings for now. Not documenting these is intentional. This is done because this code has only been tested on x86/ACPI so far, where devicetree is not used. Instead these properties are set through a software-fwnode attached to the device by the drivers/platform/x86/touchscreen_dmi.c code. This means that the use of this property for now is purely a kernel-internal thing and the name/working of the property may still be changed for now. Reviewed-by: Bastien Nocera Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210920150643.155872-6-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 14 ++++++++++---- drivers/input/touchscreen/goodix.h | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index f91a92c998a2..75c53786516f 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -1165,6 +1165,7 @@ static int goodix_ts_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct goodix_ts_data *ts; + const char *cfg_name; int error; dev_dbg(&client->dev, "I2C Address: 0x%02x\n", client->addr); @@ -1238,10 +1239,15 @@ reset: if (ts->load_cfg_from_disk) { /* update device config */ - ts->cfg_name = devm_kasprintf(&client->dev, GFP_KERNEL, - "goodix_%s_cfg.bin", ts->id); - if (!ts->cfg_name) - return -ENOMEM; + error = device_property_read_string(&client->dev, + "goodix,config-name", + &cfg_name); + if (!error) + snprintf(ts->cfg_name, sizeof(ts->cfg_name), + "goodix/%s", cfg_name); + else + snprintf(ts->cfg_name, sizeof(ts->cfg_name), + "goodix_%s_cfg.bin", ts->id); error = request_firmware_nowait(THIS_MODULE, true, ts->cfg_name, &client->dev, GFP_KERNEL, ts, diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h index 0b88554ba2ae..b1d1bc004091 100644 --- a/drivers/input/touchscreen/goodix.h +++ b/drivers/input/touchscreen/goodix.h @@ -52,8 +52,8 @@ struct goodix_ts_data { int gpio_count; int gpio_int_idx; char id[GOODIX_ID_MAX_LEN + 1]; + char cfg_name[64]; u16 version; - const char *cfg_name; bool reset_controller_at_probe; bool load_cfg_from_disk; struct completion firmware_loading_complete; From 09182ed20c04d1b3a3a0d232d7748e745a438acd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Sep 2021 21:08:53 -0700 Subject: [PATCH 032/433] Input: goodix - add support for controllers without flash Some Goodix touchscreen controllers, such as for example the GT912, don't have flash-storage for their firmware. These models require the OS to load the firmware at runtime, as well as some other special handling. Add support for this to the goodix driver. This patch was developed and tested on a Glavey TM800A550L tablet. Note the "goodix,main-clk" and "firmware-name" device-properties used by the new code are *not* documented in the Documentation/devicetree/bindings/input/touchscreen/goodix.yaml device-tree bindings for now. Not documenting these is intentional. This is done because this code has only been tested on x86/ACPI so far, where devicetree is not used. Instead these properties are set through a software-fwnode attached to the device by the drivers/platform/x86/touchscreen_dmi.c code. This means that the use of this properties for now is purely a kernel-internal thing and the name/working of the properties may still be changed for now. Reviewed-by: Bastien Nocera Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210920150643.155872-7-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/Makefile | 3 +- drivers/input/touchscreen/goodix.c | 42 +- drivers/input/touchscreen/goodix.h | 43 ++ drivers/input/touchscreen/goodix_fwupload.c | 427 ++++++++++++++++++++ 4 files changed, 507 insertions(+), 8 deletions(-) create mode 100644 drivers/input/touchscreen/goodix_fwupload.c diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 7d34100f7f22..39a8127cf6a5 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -6,6 +6,7 @@ # Each configuration option enables a list of files. wm97xx-ts-y := wm97xx-core.o +goodix_ts-y := goodix.o goodix_fwupload.o obj-$(CONFIG_TOUCHSCREEN_88PM860X) += 88pm860x-ts.o obj-$(CONFIG_TOUCHSCREEN_AD7877) += ad7877.o @@ -44,7 +45,7 @@ obj-$(CONFIG_TOUCHSCREEN_EGALAX) += egalax_ts.o obj-$(CONFIG_TOUCHSCREEN_EGALAX_SERIAL) += egalax_ts_serial.o obj-$(CONFIG_TOUCHSCREEN_EXC3000) += exc3000.o obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o -obj-$(CONFIG_TOUCHSCREEN_GOODIX) += goodix.o +obj-$(CONFIG_TOUCHSCREEN_GOODIX) += goodix_ts.o obj-$(CONFIG_TOUCHSCREEN_HIDEEP) += hideep.o obj-$(CONFIG_TOUCHSCREEN_ILI210X) += ili210x.o obj-$(CONFIG_TOUCHSCREEN_ILITEK) += ilitek_ts_i2c.o diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 75c53786516f..906b5a6b52d1 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -326,6 +326,11 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) return touch_num; } + if (data[0] == 0 && ts->firmware_name) { + if (goodix_handle_fw_request(ts)) + return 0; + } + usleep_range(1000, 2000); /* Poll every 1 - 2 ms */ } while (time_before(jiffies, max_timeout)); @@ -940,12 +945,19 @@ static void goodix_read_config(struct goodix_ts_data *ts) int x_max, y_max; int error; - error = goodix_i2c_read(ts->client, ts->chip->config_addr, - ts->config, ts->chip->config_len); - if (error) { - ts->int_trigger_type = GOODIX_INT_TRIGGER; - ts->max_touch_num = GOODIX_MAX_CONTACTS; - return; + /* + * On controllers where we need to upload the firmware + * (controllers without flash) ts->config already has the config + * at this point and the controller itself does not have it yet! + */ + if (!ts->firmware_name) { + error = goodix_i2c_read(ts->client, ts->chip->config_addr, + ts->config, ts->chip->config_len); + if (error) { + ts->int_trigger_type = GOODIX_INT_TRIGGER; + ts->max_touch_num = GOODIX_MAX_CONTACTS; + return; + } } ts->int_trigger_type = ts->config[TRIGGER_LOC] & 0x03; @@ -1139,7 +1151,16 @@ static void goodix_config_cb(const struct firmware *cfg, void *ctx) struct goodix_ts_data *ts = ctx; int error; - if (cfg) { + if (ts->firmware_name) { + if (!cfg) + goto err_release_cfg; + + error = goodix_check_cfg(ts, cfg->data, cfg->size); + if (error) + goto err_release_cfg; + + memcpy(ts->config, cfg->data, cfg->size); + } else if (cfg) { /* send device configuration to the firmware */ error = goodix_send_cfg(ts, cfg->data, cfg->size); if (error) @@ -1231,6 +1252,10 @@ reset: return error; } + error = goodix_firmware_check(ts); + if (error) + return error; + error = goodix_read_version(ts); if (error) return error; @@ -1297,6 +1322,9 @@ static int __maybe_unused goodix_suspend(struct device *dev) /* Free IRQ as IRQ pin is used as output in the suspend sequence */ goodix_free_irq(ts); + /* Save reference (calibration) info if necessary */ + goodix_save_bak_ref(ts); + /* Output LOW on the INT pin for 5 ms */ error = goodix_irq_direction_output(ts, 0); if (error) { diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h index b1d1bc004091..62138f930d1a 100644 --- a/drivers/input/touchscreen/goodix.h +++ b/drivers/input/touchscreen/goodix.h @@ -10,13 +10,48 @@ #include /* Register defines */ +#define GOODIX_REG_MISCTL_DSP_CTL 0x4010 +#define GOODIX_REG_MISCTL_SRAM_BANK 0x4048 +#define GOODIX_REG_MISCTL_MEM_CD_EN 0x4049 +#define GOODIX_REG_MISCTL_CACHE_EN 0x404B +#define GOODIX_REG_MISCTL_TMR0_EN 0x40B0 +#define GOODIX_REG_MISCTL_SWRST 0x4180 +#define GOODIX_REG_MISCTL_CPU_SWRST_PULSE 0x4184 +#define GOODIX_REG_MISCTL_BOOTCTL 0x4190 +#define GOODIX_REG_MISCTL_BOOT_OPT 0x4218 +#define GOODIX_REG_MISCTL_BOOT_CTL 0x5094 + +#define GOODIX_REG_FW_SIG 0x8000 +#define GOODIX_FW_SIG_LEN 10 + +#define GOODIX_REG_MAIN_CLK 0x8020 +#define GOODIX_MAIN_CLK_LEN 6 + #define GOODIX_REG_COMMAND 0x8040 #define GOODIX_CMD_SCREEN_OFF 0x05 +#define GOODIX_REG_SW_WDT 0x8041 + +#define GOODIX_REG_REQUEST 0x8043 +#define GOODIX_RQST_RESPONDED 0x00 +#define GOODIX_RQST_CONFIG 0x01 +#define GOODIX_RQST_BAK_REF 0x02 +#define GOODIX_RQST_RESET 0x03 +#define GOODIX_RQST_MAIN_CLOCK 0x04 +/* + * Unknown request which gets send by the controller aprox. + * every 34 seconds once it is up and running. + */ +#define GOODIX_RQST_UNKNOWN 0x06 +#define GOODIX_RQST_IDLE 0xFF + +#define GOODIX_REG_STATUS 0x8044 + #define GOODIX_GT1X_REG_CONFIG_DATA 0x8050 #define GOODIX_GT9X_REG_CONFIG_DATA 0x8047 #define GOODIX_REG_ID 0x8140 #define GOODIX_READ_COOR_ADDR 0x814E +#define GOODIX_REG_BAK_REF 0x99D0 #define GOODIX_ID_MAX_LEN 4 #define GOODIX_CONFIG_MAX_LENGTH 240 @@ -42,6 +77,7 @@ struct goodix_ts_data { struct i2c_client *client; struct input_dev *input_dev; const struct goodix_chip_data *chip; + const char *firmware_name; struct touchscreen_properties prop; unsigned int max_touch_num; unsigned int int_trigger_type; @@ -62,6 +98,9 @@ struct goodix_ts_data { unsigned int contact_size; u8 config[GOODIX_CONFIG_MAX_LENGTH]; unsigned short keymap[GOODIX_MAX_KEYS]; + u8 main_clk[GOODIX_MAIN_CLK_LEN]; + int bak_ref_len; + u8 *bak_ref; }; int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len); @@ -71,4 +110,8 @@ int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len); int goodix_int_sync(struct goodix_ts_data *ts); int goodix_reset_no_int_sync(struct goodix_ts_data *ts); +int goodix_firmware_check(struct goodix_ts_data *ts); +bool goodix_handle_fw_request(struct goodix_ts_data *ts); +void goodix_save_bak_ref(struct goodix_ts_data *ts); + #endif diff --git a/drivers/input/touchscreen/goodix_fwupload.c b/drivers/input/touchscreen/goodix_fwupload.c new file mode 100644 index 000000000000..c1e7a2413078 --- /dev/null +++ b/drivers/input/touchscreen/goodix_fwupload.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Goodix Touchscreen firmware upload support + * + * Copyright (c) 2021 Hans de Goede + * + * This is a rewrite of gt9xx_update.c from the Allwinner H3 BSP which is: + * Copyright (c) 2010 - 2012 Goodix Technology. + * Author: andrew@goodix.com + */ + +#include +#include +#include +#include "goodix.h" + +#define GOODIX_FW_HEADER_LENGTH sizeof(struct goodix_fw_header) +#define GOODIX_FW_SECTION_LENGTH 0x2000 +#define GOODIX_FW_DSP_LENGTH 0x1000 +#define GOODIX_FW_UPLOAD_ADDRESS 0xc000 + +#define GOODIX_CFG_LOC_HAVE_KEY 7 +#define GOODIX_CFG_LOC_DRVA_NUM 27 +#define GOODIX_CFG_LOC_DRVB_NUM 28 +#define GOODIX_CFG_LOC_SENS_NUM 29 + +struct goodix_fw_header { + u8 hw_info[4]; + u8 pid[8]; + u8 vid[2]; +} __packed; + +static u16 goodix_firmware_checksum(const u8 *data, int size) +{ + u16 checksum = 0; + int i; + + for (i = 0; i < size; i += 2) + checksum += (data[i] << 8) + data[i + 1]; + + return checksum; +} + +static int goodix_firmware_verify(struct device *dev, const struct firmware *fw) +{ + const struct goodix_fw_header *fw_header; + size_t expected_size; + const u8 *data; + u16 checksum; + char buf[9]; + + expected_size = GOODIX_FW_HEADER_LENGTH + 4 * GOODIX_FW_SECTION_LENGTH + + GOODIX_FW_DSP_LENGTH; + if (fw->size != expected_size) { + dev_err(dev, "Firmware has wrong size, expected %zu got %zu\n", + expected_size, fw->size); + return -EINVAL; + } + + data = fw->data + GOODIX_FW_HEADER_LENGTH; + checksum = goodix_firmware_checksum(data, 4 * GOODIX_FW_SECTION_LENGTH); + if (checksum) { + dev_err(dev, "Main firmware checksum error\n"); + return -EINVAL; + } + + data += 4 * GOODIX_FW_SECTION_LENGTH; + checksum = goodix_firmware_checksum(data, GOODIX_FW_DSP_LENGTH); + if (checksum) { + dev_err(dev, "DSP firmware checksum error\n"); + return -EINVAL; + } + + fw_header = (const struct goodix_fw_header *)fw->data; + dev_info(dev, "Firmware hardware info %02x%02x%02x%02x\n", + fw_header->hw_info[0], fw_header->hw_info[1], + fw_header->hw_info[2], fw_header->hw_info[3]); + /* pid is a 8 byte buffer containing a string, weird I know */ + memcpy(buf, fw_header->pid, 8); + buf[8] = 0; + dev_info(dev, "Firmware PID: %s VID: %02x%02x\n", buf, + fw_header->vid[0], fw_header->vid[1]); + return 0; +} + +static int goodix_enter_upload_mode(struct i2c_client *client) +{ + int tries, error; + u8 val; + + tries = 200; + do { + error = goodix_i2c_write_u8(client, + GOODIX_REG_MISCTL_SWRST, 0x0c); + if (error) + return error; + + error = goodix_i2c_read(client, + GOODIX_REG_MISCTL_SWRST, &val, 1); + if (error) + return error; + + if (val == 0x0c) + break; + } while (--tries); + + if (!tries) { + dev_err(&client->dev, "Error could not hold ss51 & dsp\n"); + return -EIO; + } + + /* DSP_CK and DSP_ALU_CK PowerOn */ + error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_DSP_CTL, 0x00); + if (error) + return error; + + /* Disable watchdog */ + error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_TMR0_EN, 0x00); + if (error) + return error; + + /* Clear cache enable */ + error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_CACHE_EN, 0x00); + if (error) + return error; + + /* Set boot from SRAM */ + error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_BOOTCTL, 0x02); + if (error) + return error; + + /* Software reboot */ + error = goodix_i2c_write_u8(client, + GOODIX_REG_MISCTL_CPU_SWRST_PULSE, 0x01); + if (error) + return error; + + /* Clear control flag */ + error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_BOOTCTL, 0x00); + if (error) + return error; + + /* Set scramble */ + error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_BOOT_OPT, 0x00); + if (error) + return error; + + /* Enable accessing code */ + error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_MEM_CD_EN, 0x01); + if (error) + return error; + + return 0; +} + +static int goodix_start_firmware(struct i2c_client *client) +{ + int error; + u8 val; + + /* Init software watchdog */ + error = goodix_i2c_write_u8(client, GOODIX_REG_SW_WDT, 0xaa); + if (error) + return error; + + /* Release SS51 & DSP */ + error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_SWRST, 0x00); + if (error) + return error; + + error = goodix_i2c_read(client, GOODIX_REG_SW_WDT, &val, 1); + if (error) + return error; + + /* The value we've written to SW_WDT should have been cleared now */ + if (val == 0xaa) { + dev_err(&client->dev, "Error SW_WDT reg not cleared on fw startup\n"); + return -EIO; + } + + /* Re-init software watchdog */ + error = goodix_i2c_write_u8(client, GOODIX_REG_SW_WDT, 0xaa); + if (error) + return error; + + return 0; +} + +static int goodix_firmware_upload(struct goodix_ts_data *ts) +{ + const struct firmware *fw; + char fw_name[64]; + const u8 *data; + int error; + + snprintf(fw_name, sizeof(fw_name), "goodix/%s", ts->firmware_name); + + error = request_firmware(&fw, fw_name, &ts->client->dev); + if (error) { + dev_err(&ts->client->dev, "Firmware request error %d\n", error); + return error; + } + + error = goodix_firmware_verify(&ts->client->dev, fw); + if (error) + goto release; + + error = goodix_reset_no_int_sync(ts); + if (error) + return error; + + error = goodix_enter_upload_mode(ts->client); + if (error) + goto release; + + /* Select SRAM bank 0 and upload section 1 & 2 */ + error = goodix_i2c_write_u8(ts->client, + GOODIX_REG_MISCTL_SRAM_BANK, 0x00); + if (error) + goto release; + + data = fw->data + GOODIX_FW_HEADER_LENGTH; + error = goodix_i2c_write(ts->client, GOODIX_FW_UPLOAD_ADDRESS, + data, 2 * GOODIX_FW_SECTION_LENGTH); + if (error) + goto release; + + /* Select SRAM bank 1 and upload section 3 & 4 */ + error = goodix_i2c_write_u8(ts->client, + GOODIX_REG_MISCTL_SRAM_BANK, 0x01); + if (error) + goto release; + + data += 2 * GOODIX_FW_SECTION_LENGTH; + error = goodix_i2c_write(ts->client, GOODIX_FW_UPLOAD_ADDRESS, + data, 2 * GOODIX_FW_SECTION_LENGTH); + if (error) + goto release; + + /* Select SRAM bank 2 and upload the DSP firmware */ + error = goodix_i2c_write_u8(ts->client, + GOODIX_REG_MISCTL_SRAM_BANK, 0x02); + if (error) + goto release; + + data += 2 * GOODIX_FW_SECTION_LENGTH; + error = goodix_i2c_write(ts->client, GOODIX_FW_UPLOAD_ADDRESS, + data, GOODIX_FW_DSP_LENGTH); + if (error) + goto release; + + error = goodix_start_firmware(ts->client); + if (error) + goto release; + + error = goodix_int_sync(ts); +release: + release_firmware(fw); + return error; +} + +static int goodix_prepare_bak_ref(struct goodix_ts_data *ts) +{ + u8 have_key, driver_num, sensor_num; + + if (ts->bak_ref) + return 0; /* Already done */ + + have_key = (ts->config[GOODIX_CFG_LOC_HAVE_KEY] & 0x01); + + driver_num = (ts->config[GOODIX_CFG_LOC_DRVA_NUM] & 0x1f) + + (ts->config[GOODIX_CFG_LOC_DRVB_NUM] & 0x1f); + if (have_key) + driver_num--; + + sensor_num = (ts->config[GOODIX_CFG_LOC_SENS_NUM] & 0x0f) + + ((ts->config[GOODIX_CFG_LOC_SENS_NUM] >> 4) & 0x0f); + + dev_dbg(&ts->client->dev, "Drv %d Sen %d Key %d\n", + driver_num, sensor_num, have_key); + + ts->bak_ref_len = (driver_num * (sensor_num - 2) + 2) * 2; + + ts->bak_ref = devm_kzalloc(&ts->client->dev, + ts->bak_ref_len, GFP_KERNEL); + if (!ts->bak_ref) + return -ENOMEM; + + /* + * The bak_ref array contains the backup of an array of (self/auto) + * calibration related values which the Android version of the driver + * stores on the filesystem so that it can be restored after reboot. + * The mainline kernel never writes directly to the filesystem like + * this, we always start will all the values which give a correction + * factor in approx. the -20 - +20 range (in 2s complement) set to 0. + * + * Note the touchscreen works fine without restoring the reference + * values after a reboot / power-cycle. + * + * The last 2 bytes are a 16 bits unsigned checksum which is expected + * to make the addition al all 16 bit unsigned values in the array add + * up to 1 (rather then the usual 0), so we must set the last byte to 1. + */ + ts->bak_ref[ts->bak_ref_len - 1] = 1; + + return 0; +} + +static int goodix_send_main_clock(struct goodix_ts_data *ts) +{ + u32 main_clk = 54; /* Default main clock */ + u8 checksum = 0; + int i; + + device_property_read_u32(&ts->client->dev, + "goodix,main-clk", &main_clk); + + for (i = 0; i < (GOODIX_MAIN_CLK_LEN - 1); i++) { + ts->main_clk[i] = main_clk; + checksum += main_clk; + } + + /* The value of all bytes combines must be 0 */ + ts->main_clk[GOODIX_MAIN_CLK_LEN - 1] = 256 - checksum; + + return goodix_i2c_write(ts->client, GOODIX_REG_MAIN_CLK, + ts->main_clk, GOODIX_MAIN_CLK_LEN); +} + +int goodix_firmware_check(struct goodix_ts_data *ts) +{ + device_property_read_string(&ts->client->dev, + "firmware-name", &ts->firmware_name); + if (!ts->firmware_name) + return 0; + + if (ts->irq_pin_access_method == IRQ_PIN_ACCESS_NONE) { + dev_err(&ts->client->dev, "Error no IRQ-pin access method, cannot upload fw.\n"); + return -EINVAL; + } + + dev_info(&ts->client->dev, "Touchscreen controller needs fw-upload\n"); + ts->load_cfg_from_disk = true; + + return goodix_firmware_upload(ts); +} + +bool goodix_handle_fw_request(struct goodix_ts_data *ts) +{ + int error; + u8 val; + + error = goodix_i2c_read(ts->client, GOODIX_REG_REQUEST, &val, 1); + if (error) + return false; + + switch (val) { + case GOODIX_RQST_RESPONDED: + /* + * If we read back our own last ack the IRQ was not for + * a request. + */ + return false; + case GOODIX_RQST_CONFIG: + error = goodix_send_cfg(ts, ts->config, ts->chip->config_len); + if (error) + return false; + + break; + case GOODIX_RQST_BAK_REF: + error = goodix_prepare_bak_ref(ts); + if (error) + return false; + + error = goodix_i2c_write(ts->client, GOODIX_REG_BAK_REF, + ts->bak_ref, ts->bak_ref_len); + if (error) + return false; + + break; + case GOODIX_RQST_RESET: + error = goodix_firmware_upload(ts); + if (error) + return false; + + break; + case GOODIX_RQST_MAIN_CLOCK: + error = goodix_send_main_clock(ts); + if (error) + return false; + + break; + case GOODIX_RQST_UNKNOWN: + case GOODIX_RQST_IDLE: + break; + default: + dev_err_ratelimited(&ts->client->dev, "Unknown Request: 0x%02x\n", val); + } + + /* Ack the request */ + goodix_i2c_write_u8(ts->client, + GOODIX_REG_REQUEST, GOODIX_RQST_RESPONDED); + return true; +} + +void goodix_save_bak_ref(struct goodix_ts_data *ts) +{ + int error; + u8 val; + + if (!ts->firmware_name) + return; + + error = goodix_i2c_read(ts->client, GOODIX_REG_STATUS, &val, 1); + if (error) + return; + + if (!(val & 0x80)) + return; + + error = goodix_i2c_read(ts->client, GOODIX_REG_BAK_REF, + ts->bak_ref, ts->bak_ref_len); + if (error) { + memset(ts->bak_ref, 0, ts->bak_ref_len); + ts->bak_ref[ts->bak_ref_len - 1] = 1; + } +} From 9b6e27d01adcec58e046c624874f8a124e8b07ec Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 14 Sep 2021 12:30:32 -0400 Subject: [PATCH 033/433] nfsd: don't alloc under spinlock in rpc_parse_scope_id Dan Carpenter says: The patch d20c11d86d8f: "nfsd: Protect session creation and client confirm using client_lock" from Jul 30, 2014, leads to the following Smatch static checker warning: net/sunrpc/addr.c:178 rpc_parse_scope_id() warn: sleeping in atomic context Reported-by: Dan Carpenter Fixes: d20c11d86d8f ("nfsd: Protect session creation and client...") Signed-off-by: J. Bruce Fields --- net/sunrpc/addr.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 6e4dbd577a39..d435bffc6199 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -162,8 +162,10 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, const size_t buflen, const char *delim, struct sockaddr_in6 *sin6) { - char *p; + char p[IPV6_SCOPE_ID_LEN + 1]; size_t len; + u32 scope_id = 0; + struct net_device *dev; if ((buf + buflen) == delim) return 1; @@ -175,29 +177,23 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, return 0; len = (buf + buflen) - delim - 1; - p = kmemdup_nul(delim + 1, len, GFP_KERNEL); - if (p) { - u32 scope_id = 0; - struct net_device *dev; + if (len > IPV6_SCOPE_ID_LEN) + return 0; - dev = dev_get_by_name(net, p); - if (dev != NULL) { - scope_id = dev->ifindex; - dev_put(dev); - } else { - if (kstrtou32(p, 10, &scope_id) != 0) { - kfree(p); - return 0; - } - } + memcpy(p, delim + 1, len); + p[len] = 0; - kfree(p); - - sin6->sin6_scope_id = scope_id; - return 1; + dev = dev_get_by_name(net, p); + if (dev != NULL) { + scope_id = dev->ifindex; + dev_put(dev); + } else { + if (kstrtou32(p, 10, &scope_id) != 0) + return 0; } - return 0; + sin6->sin6_scope_id = scope_id; + return 1; } static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, From dc451bbc6f54a2c7bacef7ec445718709071b61c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 10 Sep 2021 14:42:54 -0400 Subject: [PATCH 034/433] nfs: reexport documentation We've supported reexport for a while but documentation is limited. This is mainly a simplified version of the text I wrote for the linux-nfs wiki at https://wiki.linux-nfs.org/wiki/index.php/NFS_re-export. Signed-off-by: J. Bruce Fields --- Documentation/filesystems/nfs/index.rst | 1 + Documentation/filesystems/nfs/reexport.rst | 113 +++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 Documentation/filesystems/nfs/reexport.rst diff --git a/Documentation/filesystems/nfs/index.rst b/Documentation/filesystems/nfs/index.rst index 65805624e39b..288d8ddb2bc6 100644 --- a/Documentation/filesystems/nfs/index.rst +++ b/Documentation/filesystems/nfs/index.rst @@ -11,3 +11,4 @@ NFS rpc-server-gss nfs41-server knfsd-stats + reexport diff --git a/Documentation/filesystems/nfs/reexport.rst b/Documentation/filesystems/nfs/reexport.rst new file mode 100644 index 000000000000..ff9ae4a46530 --- /dev/null +++ b/Documentation/filesystems/nfs/reexport.rst @@ -0,0 +1,113 @@ +Reexporting NFS filesystems +=========================== + +Overview +-------- + +It is possible to reexport an NFS filesystem over NFS. However, this +feature comes with a number of limitations. Before trying it, we +recommend some careful research to determine whether it will work for +your purposes. + +A discussion of current known limitations follows. + +"fsid=" required, crossmnt broken +--------------------------------- + +We require the "fsid=" export option on any reexport of an NFS +filesystem. You can use "uuidgen -r" to generate a unique argument. + +The "crossmnt" export does not propagate "fsid=", so it will not allow +traversing into further nfs filesystems; if you wish to export nfs +filesystems mounted under the exported filesystem, you'll need to export +them explicitly, assigning each its own unique "fsid= option. + +Reboot recovery +--------------- + +The NFS protocol's normal reboot recovery mechanisms don't work for the +case when the reexport server reboots. Clients will lose any locks +they held before the reboot, and further IO will result in errors. +Closing and reopening files should clear the errors. + +Filehandle limits +----------------- + +If the original server uses an X byte filehandle for a given object, the +reexport server's filehandle for the reexported object will be X+22 +bytes, rounded up to the nearest multiple of four bytes. + +The result must fit into the RFC-mandated filehandle size limits: + ++-------+-----------+ +| NFSv2 | 32 bytes | ++-------+-----------+ +| NFSv3 | 64 bytes | ++-------+-----------+ +| NFSv4 | 128 bytes | ++-------+-----------+ + +So, for example, you will only be able to reexport a filesystem over +NFSv2 if the original server gives you filehandles that fit in 10 +bytes--which is unlikely. + +In general there's no way to know the maximum filehandle size given out +by an NFS server without asking the server vendor. + +But the following table gives a few examples. The first column is the +typical length of the filehandle from a Linux server exporting the given +filesystem, the second is the length after that nfs export is reexported +by another Linux host: + ++--------+-------------------+----------------+ +| | filehandle length | after reexport | ++========+===================+================+ +| ext4: | 28 bytes | 52 bytes | ++--------+-------------------+----------------+ +| xfs: | 32 bytes | 56 bytes | ++--------+-------------------+----------------+ +| btrfs: | 40 bytes | 64 bytes | ++--------+-------------------+----------------+ + +All will therefore fit in an NFSv3 or NFSv4 filehandle after reexport, +but none are reexportable over NFSv2. + +Linux server filehandles are a bit more complicated than this, though; +for example: + + - The (non-default) "subtreecheck" export option generally + requires another 4 to 8 bytes in the filehandle. + - If you export a subdirectory of a filesystem (instead of + exporting the filesystem root), that also usually adds 4 to 8 + bytes. + - If you export over NFSv2, knfsd usually uses a shorter + filesystem identifier that saves 8 bytes. + - The root directory of an export uses a filehandle that is + shorter. + +As you can see, the 128-byte NFSv4 filehandle is large enough that +you're unlikely to have trouble using NFSv4 to reexport any filesystem +exported from a Linux server. In general, if the original server is +something that also supports NFSv3, you're *probably* OK. Re-exporting +over NFSv3 may be dicier, and reexporting over NFSv2 will probably +never work. + +For more details of Linux filehandle structure, the best reference is +the source code and comments; see in particular: + + - include/linux/exportfs.h:enum fid_type + - include/uapi/linux/nfsd/nfsfh.h:struct nfs_fhbase_new + - fs/nfsd/nfsfh.c:set_version_and_fsid_type + - fs/nfs/export.c:nfs_encode_fh + +Open DENY bits ignored +---------------------- + +NFS since NFSv4 supports ALLOW and DENY bits taken from Windows, which +allow you, for example, to open a file in a mode which forbids other +read opens or write opens. The Linux client doesn't use them, and the +server's support has always been incomplete: they are enforced only +against other NFS users, not against processes accessing the exported +filesystem locally. A reexport server will also not pass them along to +the original server, so they will not be enforced between clients of +different reexport servers. From 8847ecc9274a14114385d1cb4030326baa0766eb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Sep 2021 15:25:21 -0400 Subject: [PATCH 035/433] NFSD: Optimize DRC bucket pruning DRC bucket pruning is done by nfsd_cache_lookup(), which is part of every NFSv2 and NFSv3 dispatch (ie, it's done while the client is waiting). I added a trace_printk() in prune_bucket() to see just how long it takes to prune. Here are two ends of the spectrum: prune_bucket: Scanned 1 and freed 0 in 90 ns, 62 entries remaining prune_bucket: Scanned 2 and freed 1 in 716 ns, 63 entries remaining ... prune_bucket: Scanned 75 and freed 74 in 34149 ns, 1 entries remaining Pruning latency is noticeable on fast transports with fast storage. By noticeable, I mean that the latency measured here in the worst case is the same order of magnitude as the round trip time for cached server operations. We could do something like moving expired entries to an expired list and then free them later instead of freeing them right in prune_bucket(). But simply limiting the number of entries that can be pruned by a lookup is simple and retains more entries in the cache, making the DRC somewhat more effective. Comparison with a 70/30 fio 8KB 12 thread direct I/O test: Before: write: IOPS=61.6k, BW=481MiB/s (505MB/s)(14.1GiB/30001msec); 0 zone resets WRITE: 1848726 ops (30%) avg bytes sent per op: 8340 avg bytes received per op: 136 backlog wait: 0.635158 RTT: 0.128525 total execute time: 0.827242 (milliseconds) After: write: IOPS=63.0k, BW=492MiB/s (516MB/s)(14.4GiB/30001msec); 0 zone resets WRITE: 1891144 ops (30%) avg bytes sent per op: 8340 avg bytes received per op: 136 backlog wait: 0.616114 RTT: 0.126842 total execute time: 0.805348 (milliseconds) Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 96cdf77925f3..6e0b6f3148dc 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -241,8 +241,8 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) list_move_tail(&rp->c_lru, &b->lru_head); } -static long -prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) +static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn, + unsigned int max) { struct svc_cacherep *rp, *tmp; long freed = 0; @@ -258,11 +258,17 @@ prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(b, rp, nn); - freed++; + if (max && freed++ > max) + break; } return freed; } +static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) +{ + return prune_bucket(b, nn, 3); +} + /* * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. @@ -279,7 +285,7 @@ prune_cache_entries(struct nfsd_net *nn) if (list_empty(&b->lru_head)) continue; spin_lock(&b->cache_lock); - freed += prune_bucket(b, nn); + freed += prune_bucket(b, nn, 0); spin_unlock(&b->cache_lock); } return freed; @@ -453,8 +459,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) atomic_inc(&nn->num_drc_entries); nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); - /* go ahead and prune the cache */ - prune_bucket(b, nn); + nfsd_prune_bucket(b, nn); out_unlock: spin_unlock(&b->cache_lock); From b083704fbf6cea21a187a99f6dae20fbca86b44e Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Wed, 22 Sep 2021 21:39:42 -0700 Subject: [PATCH 036/433] Input: elants_i2c - make use of devm_add_action_or_reset() The helper function devm_add_action_or_reset() will internally call devm_add_action(), and if devm_add_action() fails then it will execute the action mentioned and return the error code. So use devm_add_action_or_reset() instead of devm_add_action() to simplify the error handling, reduce the code. Signed-off-by: Cai Huoqing Link: https://lore.kernel.org/r/20210922125212.95-2-caihuoqing@baidu.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 17540bdb1eaf..98ba5cab685b 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -1440,11 +1440,11 @@ static int elants_i2c_probe(struct i2c_client *client, if (error) return error; - error = devm_add_action(&client->dev, elants_i2c_power_off, ts); + error = devm_add_action_or_reset(&client->dev, + elants_i2c_power_off, ts); if (error) { dev_err(&client->dev, "failed to install power off action: %d\n", error); - elants_i2c_power_off(ts); return error; } From 4b3ed1ae2817d78e72a4281be2a9751a41f20508 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Wed, 22 Sep 2021 21:49:14 -0700 Subject: [PATCH 037/433] Input: raydium_i2c_ts - make use of the helper function devm_add_action_or_reset() The helper function devm_add_action_or_reset() will internally call devm_add_action(), and if devm_add_action() fails then it will execute the action mentioned and return the error code. So use devm_add_action_or_reset() instead of devm_add_action() to simplify the error handling, reduce the code. Signed-off-by: Cai Huoqing Link: https://lore.kernel.org/r/20210922125212.95-3-caihuoqing@baidu.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/raydium_i2c_ts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 92c48d8e35a4..3a4952935366 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -1118,11 +1118,11 @@ static int raydium_i2c_probe(struct i2c_client *client, if (error) return error; - error = devm_add_action(&client->dev, raydium_i2c_power_off, ts); + error = devm_add_action_or_reset(&client->dev, + raydium_i2c_power_off, ts); if (error) { dev_err(&client->dev, "failed to install power off action: %d\n", error); - raydium_i2c_power_off(ts); return error; } From d5af8a8f7c4c54758e89fa3b077b62270de3910c Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Wed, 22 Sep 2021 21:50:49 -0700 Subject: [PATCH 038/433] Input: mpr121 - make use of the helper function devm_add_action_or_reset() The helper function devm_add_action_or_reset() will internally call devm_add_action(), and if devm_add_action() fails then it will execute the action mentioned and return the error code. So use devm_add_action_or_reset() instead of devm_add_action() to simplify the error handling, reduce the code. Signed-off-by: Cai Huoqing Link: https://lore.kernel.org/r/20210922125954.533-1-caihuoqing@baidu.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/mpr121_touchkey.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c index 40d6e5087cde..230ab3d50b9e 100644 --- a/drivers/input/keyboard/mpr121_touchkey.c +++ b/drivers/input/keyboard/mpr121_touchkey.c @@ -107,9 +107,9 @@ static struct regulator *mpr121_vdd_supply_init(struct device *dev) return ERR_PTR(err); } - err = devm_add_action(dev, mpr121_vdd_supply_disable, vdd_supply); + err = devm_add_action_or_reset(dev, mpr121_vdd_supply_disable, + vdd_supply); if (err) { - regulator_disable(vdd_supply); dev_err(dev, "failed to add disable regulator action: %d\n", err); return ERR_PTR(err); From 458032fcfa91c8714859b1f01b9ac7dccea5d6cd Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 10 Sep 2021 17:33:24 +0800 Subject: [PATCH 039/433] UNRPC: Return specific error code on kmalloc failure Although the callers of this function only care about whether the return value is null or not, we should still give a rigorous error code. Smatch tool warning: net/sunrpc/auth_gss/svcauth_gss.c:784 gss_write_verf() warn: returning -1 instead of -ENOMEM is sloppy No functional change, just more standardized. Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 3e776e3dff91..7dba6a9c213a 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -781,7 +781,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS); xdr_seq = kmalloc(4, GFP_KERNEL); if (!xdr_seq) - return -1; + return -ENOMEM; *xdr_seq = htonl(seq); iov.iov_base = xdr_seq; From 8719a17613e0233d707eb22e1645d217594631ef Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Sep 2021 20:49:20 +0100 Subject: [PATCH 040/433] rtc: ds1302: Add SPI ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding an id_table listing the SPI IDs for everything. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210923194922.53386-2-broonie@kernel.org --- drivers/rtc/rtc-ds1302.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index b3de6d2e680a..2f83adef966e 100644 --- a/drivers/rtc/rtc-ds1302.c +++ b/drivers/rtc/rtc-ds1302.c @@ -199,11 +199,18 @@ static const struct of_device_id ds1302_dt_ids[] = { MODULE_DEVICE_TABLE(of, ds1302_dt_ids); #endif +static const struct spi_device_id ds1302_spi_ids[] = { + { .name = "ds1302", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(spi, ds1302_spi_ids); + static struct spi_driver ds1302_driver = { .driver.name = "rtc-ds1302", .driver.of_match_table = of_match_ptr(ds1302_dt_ids), .probe = ds1302_probe, .remove = ds1302_remove, + .id_table = ds1302_spi_ids, }; module_spi_driver(ds1302_driver); From da87639d6312afb8855717c791768bf2d4ca8ac8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Sep 2021 20:49:21 +0100 Subject: [PATCH 041/433] rtc: ds1390: Add SPI ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding an id_table listing the SPI IDs for everything. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210923194922.53386-3-broonie@kernel.org --- drivers/rtc/rtc-ds1390.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c index 66fc8617d07e..93ce72b9ae59 100644 --- a/drivers/rtc/rtc-ds1390.c +++ b/drivers/rtc/rtc-ds1390.c @@ -219,12 +219,19 @@ static const struct of_device_id ds1390_of_match[] = { }; MODULE_DEVICE_TABLE(of, ds1390_of_match); +static const struct spi_device_id ds1390_spi_ids[] = { + { .name = "ds1390" }, + {} +}; +MODULE_DEVICE_TABLE(spi, ds1390_spi_ids); + static struct spi_driver ds1390_driver = { .driver = { .name = "rtc-ds1390", .of_match_table = of_match_ptr(ds1390_of_match), }, .probe = ds1390_probe, + .id_table = ds1390_spi_ids, }; module_spi_driver(ds1390_driver); From 5f84478e14aa8b43a4ea85d2e091931741947749 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Sep 2021 20:49:22 +0100 Subject: [PATCH 042/433] rtc: pcf2123: Add SPI ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding an id_table listing the SPI IDs for everything. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210923194922.53386-4-broonie@kernel.org --- drivers/rtc/rtc-pcf2123.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index 0f58cac81d8c..7473e6c8a183 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -451,12 +451,21 @@ static const struct of_device_id pcf2123_dt_ids[] = { MODULE_DEVICE_TABLE(of, pcf2123_dt_ids); #endif +static const struct spi_device_id pcf2123_spi_ids[] = { + { .name = "pcf2123", }, + { .name = "rv2123", }, + { .name = "rtc-pcf2123", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(spi, pcf2123_spi_ids); + static struct spi_driver pcf2123_driver = { .driver = { .name = "rtc-pcf2123", .of_match_table = of_match_ptr(pcf2123_dt_ids), }, .probe = pcf2123_probe, + .id_table = pcf2123_spi_ids, }; module_spi_driver(pcf2123_driver); From 5e295f9402039aaa38d0949f598745b98850fd13 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 16 Sep 2021 18:45:12 +0200 Subject: [PATCH 043/433] rtc: omap: drop unneeded MODULE_ALIAS The MODULE_DEVICE_TABLE already creates proper alias for platform driver. Having another MODULE_ALIAS causes the alias to be duplicated. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210916164513.134725-1-krzysztof.kozlowski@canonical.com --- drivers/rtc/rtc-omap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index d46e0f0cc502..4d4f3b1a7309 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -1029,6 +1029,5 @@ static struct platform_driver omap_rtc_driver = { module_platform_driver(omap_rtc_driver); -MODULE_ALIAS("platform:omap_rtc"); MODULE_AUTHOR("George G. Davis (and others)"); MODULE_LICENSE("GPL"); From 6eee1c48be7cb0b9b14408521a9151c1021901d3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 16 Sep 2021 18:46:04 +0200 Subject: [PATCH 044/433] rtc: s5m: drop unneeded MODULE_ALIAS The MODULE_DEVICE_TABLE already creates proper alias for platform driver. Having another MODULE_ALIAS causes the alias to be duplicated. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210916164604.134924-1-krzysztof.kozlowski@canonical.com --- drivers/rtc/rtc-s5m.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index fb9c6b709e13..4243fe6d3842 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -861,4 +861,3 @@ module_platform_driver(s5m_rtc_driver); MODULE_AUTHOR("Sangbeom Kim "); MODULE_DESCRIPTION("Samsung S5M/S2MPS14 RTC driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:s5m-rtc"); From 38b17bc9c40e8f3138b02c624d9063b6781c8150 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 18 Sep 2021 23:35:51 +0200 Subject: [PATCH 045/433] rtc: rx6110: simplify getting the adapter of a client We have a dedicated pointer for that, so use it. Much easier to read and less computation involved. Signed-off-by: Wolfram Sang Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210918213553.14514-1-wsa+renesas@sang-engineering.com --- drivers/rtc/rtc-rx6110.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rx6110.c b/drivers/rtc/rtc-rx6110.c index f4d425002f7f..758fd6e11a15 100644 --- a/drivers/rtc/rtc-rx6110.c +++ b/drivers/rtc/rtc-rx6110.c @@ -422,7 +422,7 @@ static struct regmap_config regmap_i2c_config = { static int rx6110_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct i2c_adapter *adapter = client->adapter; struct rx6110_data *rx6110; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA From dd49cbedde8a0f1e0d09698f9cad791d37a8e03e Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Mon, 23 Aug 2021 19:16:11 +0200 Subject: [PATCH 046/433] dt-bindings: rtc: Add Mstar MSC313e RTC devicetree bindings documentation This adds the documentation for the devicetree bindings of the Mstar MSC313e RTC driver, found from MSC313e SoCs and newer. Signed-off-by: Romain Perier Reviewed-by: Rob Herring Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210823171613.18941-2-romain.perier@gmail.com --- .../bindings/rtc/mstar,msc313-rtc.yaml | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 Documentation/devicetree/bindings/rtc/mstar,msc313-rtc.yaml diff --git a/Documentation/devicetree/bindings/rtc/mstar,msc313-rtc.yaml b/Documentation/devicetree/bindings/rtc/mstar,msc313-rtc.yaml new file mode 100644 index 000000000000..114199cf4d28 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/mstar,msc313-rtc.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/mstar,msc313-rtc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Mstar MSC313e RTC Device Tree Bindings + +allOf: + - $ref: "rtc.yaml#" + +maintainers: + - Daniel Palmer + - Romain Perier + +properties: + compatible: + enum: + - mstar,msc313-rtc + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + start-year: true + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + #include + rtc@2400 { + compatible = "mstar,msc313-rtc"; + reg = <0x2400 0x40>; + clocks = <&xtal_div2>; + interrupts-extended = <&intc_irq GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>; + }; +... From be7d9c9161b9c76edeff15e79edc2f256568fe05 Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Mon, 23 Aug 2021 19:16:12 +0200 Subject: [PATCH 047/433] rtc: Add support for the MSTAR MSC313 RTC This adds support for the RTC block on the Mstar MSC313e SoCs and newer. Signed-off-by: Daniel Palmer Co-developed-by: Romain Perier Signed-off-by: Romain Perier Reviewed-by: Nobuhiro Iwamatsu Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210823171613.18941-3-romain.perier@gmail.com --- MAINTAINERS | 1 + drivers/rtc/Kconfig | 10 ++ drivers/rtc/Makefile | 1 + drivers/rtc/rtc-msc313.c | 258 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 270 insertions(+) create mode 100644 drivers/rtc/rtc-msc313.c diff --git a/MAINTAINERS b/MAINTAINERS index eeb4c70b3d5b..caedf3c29286 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2247,6 +2247,7 @@ F: arch/arm/boot/dts/mstar-* F: arch/arm/mach-mstar/ F: drivers/clk/mstar/ F: drivers/gpio/gpio-msc313.c +F: drivers/rtc/rtc-msc313.c F: drivers/watchdog/msc313e_wdt.c F: include/dt-bindings/clock/mstar-* F: include/dt-bindings/gpio/msc313-gpio.h diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index e1bc5214494e..59dc1410a160 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1935,4 +1935,14 @@ config RTC_DRV_WILCO_EC This can also be built as a module. If so, the module will be named "rtc_wilco_ec". +config RTC_DRV_MSC313 + tristate "MStar MSC313 RTC" + depends on ARCH_MSTARV7 || COMPILE_TEST + help + If you say yes here you get support for the Mstar MSC313e On-Chip + Real Time Clock. + + This driver can also be built as a module, if so, the module + will be called "rtc-msc313". + endif # RTC_CLASS diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 5ceeafe4d5b2..e76308053b0f 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -103,6 +103,7 @@ obj-$(CONFIG_RTC_DRV_MCP795) += rtc-mcp795.o obj-$(CONFIG_RTC_DRV_MESON) += rtc-meson.o obj-$(CONFIG_RTC_DRV_MOXART) += rtc-moxart.o obj-$(CONFIG_RTC_DRV_MPC5121) += rtc-mpc5121.o +obj-$(CONFIG_RTC_DRV_MSC313) += rtc-msc313.o obj-$(CONFIG_RTC_DRV_MSM6242) += rtc-msm6242.o obj-$(CONFIG_RTC_DRV_MT2712) += rtc-mt2712.o obj-$(CONFIG_RTC_DRV_MT6397) += rtc-mt6397.o diff --git a/drivers/rtc/rtc-msc313.c b/drivers/rtc/rtc-msc313.c new file mode 100644 index 000000000000..f493ca5f181b --- /dev/null +++ b/drivers/rtc/rtc-msc313.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Real time clocks driver for MStar/SigmaStar ARMv7 SoCs. + * Based on "Real Time Clock driver for msb252x." that was contained + * in various MStar kernels. + * + * (C) 2019 Daniel Palmer + * (C) 2021 Romain Perier + */ + +#include +#include +#include +#include +#include +#include + +/* Registers */ +#define REG_RTC_CTRL 0x00 +#define REG_RTC_FREQ_CW_L 0x04 +#define REG_RTC_FREQ_CW_H 0x08 +#define REG_RTC_LOAD_VAL_L 0x0C +#define REG_RTC_LOAD_VAL_H 0x10 +#define REG_RTC_MATCH_VAL_L 0x14 +#define REG_RTC_MATCH_VAL_H 0x18 +#define REG_RTC_STATUS_INT 0x1C +#define REG_RTC_CNT_VAL_L 0x20 +#define REG_RTC_CNT_VAL_H 0x24 + +/* Control bits for REG_RTC_CTRL */ +#define SOFT_RSTZ_BIT BIT(0) +#define CNT_EN_BIT BIT(1) +#define WRAP_EN_BIT BIT(2) +#define LOAD_EN_BIT BIT(3) +#define READ_EN_BIT BIT(4) +#define INT_MASK_BIT BIT(5) +#define INT_FORCE_BIT BIT(6) +#define INT_CLEAR_BIT BIT(7) + +/* Control bits for REG_RTC_STATUS_INT */ +#define RAW_INT_BIT BIT(0) +#define ALM_INT_BIT BIT(1) + +struct msc313_rtc { + struct rtc_device *rtc_dev; + void __iomem *rtc_base; +}; + +static int msc313_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct msc313_rtc *priv = dev_get_drvdata(dev); + unsigned long seconds; + + seconds = readw(priv->rtc_base + REG_RTC_MATCH_VAL_L) + | (readw(priv->rtc_base + REG_RTC_MATCH_VAL_H) << 16); + + rtc_time64_to_tm(seconds, &alarm->time); + + if (!(readw(priv->rtc_base + REG_RTC_CTRL) & INT_MASK_BIT)) + alarm->enabled = 1; + + return 0; +} + +static int msc313_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) +{ + struct msc313_rtc *priv = dev_get_drvdata(dev); + u16 reg; + + reg = readw(priv->rtc_base + REG_RTC_CTRL); + if (enabled) + reg &= ~INT_MASK_BIT; + else + reg |= INT_MASK_BIT; + writew(reg, priv->rtc_base + REG_RTC_CTRL); + return 0; +} + +static int msc313_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct msc313_rtc *priv = dev_get_drvdata(dev); + unsigned long seconds; + + seconds = rtc_tm_to_time64(&alarm->time); + writew((seconds & 0xFFFF), priv->rtc_base + REG_RTC_MATCH_VAL_L); + writew((seconds >> 16) & 0xFFFF, priv->rtc_base + REG_RTC_MATCH_VAL_H); + + msc313_rtc_alarm_irq_enable(dev, alarm->enabled); + + return 0; +} + +static bool msc313_rtc_get_enabled(struct msc313_rtc *priv) +{ + return readw(priv->rtc_base + REG_RTC_CTRL) & CNT_EN_BIT; +} + +static void msc313_rtc_set_enabled(struct msc313_rtc *priv) +{ + u16 reg; + + reg = readw(priv->rtc_base + REG_RTC_CTRL); + reg |= CNT_EN_BIT; + writew(reg, priv->rtc_base + REG_RTC_CTRL); +} + +static int msc313_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct msc313_rtc *priv = dev_get_drvdata(dev); + u32 seconds; + u16 reg; + + if (!msc313_rtc_get_enabled(priv)) + return -EINVAL; + + reg = readw(priv->rtc_base + REG_RTC_CTRL); + writew(reg | READ_EN_BIT, priv->rtc_base + REG_RTC_CTRL); + + /* Wait for HW latch done */ + while (readw(priv->rtc_base + REG_RTC_CTRL) & READ_EN_BIT) + udelay(1); + + seconds = readw(priv->rtc_base + REG_RTC_CNT_VAL_L) + | (readw(priv->rtc_base + REG_RTC_CNT_VAL_H) << 16); + + rtc_time64_to_tm(seconds, tm); + + return 0; +} + +static int msc313_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct msc313_rtc *priv = dev_get_drvdata(dev); + unsigned long seconds; + u16 reg; + + seconds = rtc_tm_to_time64(tm); + writew(seconds & 0xFFFF, priv->rtc_base + REG_RTC_LOAD_VAL_L); + writew((seconds >> 16) & 0xFFFF, priv->rtc_base + REG_RTC_LOAD_VAL_H); + + /* Enable load for loading value into internal RTC counter */ + reg = readw(priv->rtc_base + REG_RTC_CTRL); + writew(reg | LOAD_EN_BIT, priv->rtc_base + REG_RTC_CTRL); + + /* Wait for HW latch done */ + while (readw(priv->rtc_base + REG_RTC_CTRL) & LOAD_EN_BIT) + udelay(1); + msc313_rtc_set_enabled(priv); + return 0; +} + +static const struct rtc_class_ops msc313_rtc_ops = { + .read_time = msc313_rtc_read_time, + .set_time = msc313_rtc_set_time, + .read_alarm = msc313_rtc_read_alarm, + .set_alarm = msc313_rtc_set_alarm, + .alarm_irq_enable = msc313_rtc_alarm_irq_enable, +}; + +static irqreturn_t msc313_rtc_interrupt(s32 irq, void *dev_id) +{ + struct msc313_rtc *priv = dev_get_drvdata(dev_id); + u16 reg; + + reg = readw(priv->rtc_base + REG_RTC_STATUS_INT); + if (!(reg & ALM_INT_BIT)) + return IRQ_NONE; + + reg = readw(priv->rtc_base + REG_RTC_CTRL); + reg |= INT_CLEAR_BIT; + reg &= ~INT_FORCE_BIT; + writew(reg, priv->rtc_base + REG_RTC_CTRL); + + rtc_update_irq(priv->rtc_dev, 1, RTC_IRQF | RTC_AF); + + return IRQ_HANDLED; +} + +static int msc313_rtc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct msc313_rtc *priv; + unsigned long rate; + struct clk *clk; + int ret; + int irq; + + priv = devm_kzalloc(&pdev->dev, sizeof(struct msc313_rtc), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->rtc_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->rtc_base)) + return PTR_ERR(priv->rtc_base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -EINVAL; + + priv->rtc_dev = devm_rtc_allocate_device(dev); + if (IS_ERR(priv->rtc_dev)) + return PTR_ERR(priv->rtc_dev); + + priv->rtc_dev->ops = &msc313_rtc_ops; + priv->rtc_dev->range_max = U32_MAX; + + ret = devm_request_irq(dev, irq, msc313_rtc_interrupt, IRQF_SHARED, + dev_name(&pdev->dev), &pdev->dev); + if (ret) { + dev_err(dev, "Could not request IRQ\n"); + return ret; + } + + clk = devm_clk_get(dev, NULL); + if (IS_ERR(clk)) { + dev_err(dev, "No input reference clock\n"); + return PTR_ERR(clk); + } + + ret = clk_prepare_enable(clk); + if (ret) { + dev_err(dev, "Failed to enable the reference clock, %d\n", ret); + return ret; + } + + ret = devm_add_action_or_reset(dev, (void (*) (void *))clk_disable_unprepare, clk); + if (ret) + return ret; + + rate = clk_get_rate(clk); + writew(rate & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_L); + writew((rate >> 16) & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_H); + + platform_set_drvdata(pdev, priv); + + return devm_rtc_register_device(priv->rtc_dev); +} + +static const struct of_device_id msc313_rtc_of_match_table[] = { + { .compatible = "mstar,msc313-rtc" }, + { } +}; +MODULE_DEVICE_TABLE(of, msc313_rtc_of_match_table); + +static struct platform_driver msc313_rtc_driver = { + .probe = msc313_rtc_probe, + .driver = { + .name = "msc313-rtc", + .of_match_table = msc313_rtc_of_match_table, + }, +}; + +module_platform_driver(msc313_rtc_driver); + +MODULE_AUTHOR("Daniel Palmer "); +MODULE_AUTHOR("Romain Perier "); +MODULE_DESCRIPTION("MStar RTC Driver"); +MODULE_LICENSE("GPL v2"); From 27ff63eb076c31086e0a72d41b5c635193a58516 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 27 Sep 2021 08:37:23 +0200 Subject: [PATCH 048/433] rtc: msc313: fix missing include The driver needs io.h Reported-by: Stephen Rothwell Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210927063724.312687-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-msc313.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-msc313.c b/drivers/rtc/rtc-msc313.c index f493ca5f181b..5f178d29cfd8 100644 --- a/drivers/rtc/rtc-msc313.c +++ b/drivers/rtc/rtc-msc313.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include From 61bc346ce64a3864ac55f5d18bdc1572cda4fb18 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Wed, 25 Aug 2021 19:06:13 +0200 Subject: [PATCH 049/433] uapi/linux/prctl: provide macro definitions for the PR_SCHED_CORE type argument Commit 7ac592aa35a684ff ("sched: prctl() core-scheduling interface") made use of enum pid_type in prctl's arg4; this type and the associated enumeration definitions are not exposed to userspace. Christian has suggested to provide additional macro definitions that convey the meaning of the type argument more in alignment with its actual usage, and this patch does exactly that. Link: https://lore.kernel.org/r/20210825170613.GA3884@asgard.redhat.com Suggested-by: Christian Brauner Acked-by: Christian Brauner Signed-off-by: Eugene Syromiatnikov Complements: 7ac592aa35a684ff ("sched: prctl() core-scheduling interface") Signed-off-by: Christian Brauner --- Documentation/admin-guide/hw-vuln/core-scheduling.rst | 5 +++-- include/uapi/linux/prctl.h | 3 +++ kernel/sched/core_sched.c | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst index 0febe458597c..cf1eeefdfc32 100644 --- a/Documentation/admin-guide/hw-vuln/core-scheduling.rst +++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst @@ -61,8 +61,9 @@ arg3: ``pid`` of the task for which the operation applies. arg4: - ``pid_type`` for which the operation applies. It is of type ``enum pid_type``. - For example, if arg4 is ``PIDTYPE_TGID``, then the operation of this command + ``pid_type`` for which the operation applies. It is one of + ``PR_SCHED_CORE_SCOPE_``-prefixed macro constants. For example, if arg4 + is ``PR_SCHED_CORE_SCOPE_THREAD_GROUP``, then the operation of this command will be performed for all tasks in the task group of ``pid``. arg5: diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 43bd7f713c39..b2e4dc1449b9 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -268,5 +268,8 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ # define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ # define PR_SCHED_CORE_MAX 4 +# define PR_SCHED_CORE_SCOPE_THREAD 0 +# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 +# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index 9a80e9a474c0..20f640949450 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -134,6 +134,10 @@ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, if (!static_branch_likely(&sched_smt_present)) return -ENODEV; + BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID); + BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID); + BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID); + if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 || (cmd != PR_SCHED_CORE_GET && uaddr)) return -EINVAL; From 3109151c47343c80300177ec7704e0757064efdc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 27 Sep 2021 14:02:40 +0100 Subject: [PATCH 050/433] rtc: mcp795: Add SPI ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding an id_table listing the SPI IDs for everything. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210927130240.33693-1-broonie@kernel.org --- drivers/rtc/rtc-mcp795.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c index bad7792b6ca5..0d515b3df571 100644 --- a/drivers/rtc/rtc-mcp795.c +++ b/drivers/rtc/rtc-mcp795.c @@ -430,12 +430,19 @@ static const struct of_device_id mcp795_of_match[] = { MODULE_DEVICE_TABLE(of, mcp795_of_match); #endif +static const struct spi_device_id mcp795_spi_ids[] = { + { .name = "mcp795" }, + { } +}; +MODULE_DEVICE_TABLE(spi, mcp795_spi_ids); + static struct spi_driver mcp795_driver = { .driver = { .name = "rtc-mcp795", .of_match_table = of_match_ptr(mcp795_of_match), }, .probe = mcp795_probe, + .id_table = mcp795_spi_ids, }; module_spi_driver(mcp795_driver); From f3606687b447c41d28a011c98373b62b1cd52345 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 28 Sep 2021 14:46:54 +0100 Subject: [PATCH 051/433] rtc: msc313: Fix unintentional sign extension issues with left shift of a u16 Shifting the u16 value returned by readw by 16 bits to the left will be promoted to a 32 bit signed int and then sign-extended to an unsigned long. If the top bit of the readw is set then the shifted value will be sign extended and the top 32 bits of the result will be set. Fixes: be7d9c9161b9 ("rtc: Add support for the MSTAR MSC313 RTC") Signed-off-by: Colin Ian King Reviewed-by: Romain Perier Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210928134654.991923-1-colin.king@canonical.com --- drivers/rtc/rtc-msc313.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-msc313.c b/drivers/rtc/rtc-msc313.c index 5f178d29cfd8..f3fde013c4b8 100644 --- a/drivers/rtc/rtc-msc313.c +++ b/drivers/rtc/rtc-msc313.c @@ -53,7 +53,7 @@ static int msc313_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) unsigned long seconds; seconds = readw(priv->rtc_base + REG_RTC_MATCH_VAL_L) - | (readw(priv->rtc_base + REG_RTC_MATCH_VAL_H) << 16); + | ((unsigned long)readw(priv->rtc_base + REG_RTC_MATCH_VAL_H) << 16); rtc_time64_to_tm(seconds, &alarm->time); @@ -122,7 +122,7 @@ static int msc313_rtc_read_time(struct device *dev, struct rtc_time *tm) udelay(1); seconds = readw(priv->rtc_base + REG_RTC_CNT_VAL_L) - | (readw(priv->rtc_base + REG_RTC_CNT_VAL_H) << 16); + | ((unsigned long)readw(priv->rtc_base + REG_RTC_CNT_VAL_H) << 16); rtc_time64_to_tm(seconds, tm); From 7caadcfa8a7c6f8e754d982b99d959a222e7f863 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 25 Sep 2021 23:34:41 +0100 Subject: [PATCH 052/433] rtc: m41t80: return NULL rather than a plain 0 integer Function m41t80_sqw_register_clk returns a pointer to struct clk, so returning a plain 0 integer isn't good practice. Fix this by returning a NULL instead. Signed-off-by: Colin Ian King Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210925223441.182673-1-colin.king@canonical.com --- drivers/rtc/rtc-m41t80.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index f736f8c22e96..6d383b629d20 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -557,7 +557,7 @@ static struct clk *m41t80_sqw_register_clk(struct m41t80_data *m41t80) * registered automatically when being referenced. */ of_node_put(fixed_clock); - return 0; + return NULL; } /* First disable the clock */ From 5c4c2c8e6fac26fa0b80c234d6e9f75d637193af Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 Oct 2021 21:20:49 -0700 Subject: [PATCH 053/433] Input: ariel-pwrbutton - add SPI device ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding a SPI device ID table. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210927134104.38648-1-broonie@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ariel-pwrbutton.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/misc/ariel-pwrbutton.c b/drivers/input/misc/ariel-pwrbutton.c index 17bbaac8b80c..cdc80715b5fd 100644 --- a/drivers/input/misc/ariel-pwrbutton.c +++ b/drivers/input/misc/ariel-pwrbutton.c @@ -149,12 +149,19 @@ static const struct of_device_id ariel_pwrbutton_of_match[] = { }; MODULE_DEVICE_TABLE(of, ariel_pwrbutton_of_match); +static const struct spi_device_id ariel_pwrbutton_spi_ids[] = { + { .name = "wyse-ariel-ec-input" }, + { } +}; +MODULE_DEVICE_TABLE(spi, ariel_pwrbutton_spi_ids); + static struct spi_driver ariel_pwrbutton_driver = { .driver = { .name = "dell-wyse-ariel-ec-input", .of_match_table = ariel_pwrbutton_of_match, }, .probe = ariel_pwrbutton_probe, + .id_table = ariel_pwrbutton_spi_ids, }; module_spi_driver(ariel_pwrbutton_driver); From ef5825e3cf0d0af657f5fb4dd86d750ed42fee0a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 2 Sep 2021 11:14:47 +1000 Subject: [PATCH 054/433] NFSD: move filehandle format declarations out of "uapi". A small part of the declaration concerning filehandle format are currently in the "uapi" include directory: include/uapi/linux/nfsd/nfsfh.h There is a lot more to the filehandle format, including "enum fid_type" and "enum nfsd_fsid" which are not exported via "uapi". This small part of the filehandle definition is of minimal use outside of the kernel, and I can find no evidence that an other code is using it. Certainly nfs-utils and wireshark (The most likely candidates) do not use these declarations. So move it out of "uapi" by copying the content from include/uapi/linux/nfsd/nfsfh.h into fs/nfsd/nfsfh.h A few unnecessary "#include" directives are not copied, and neither is the #define of fh_auth, which is annotated as being for userspace only. The copyright claims in the uapi file are identical to those in the nfsd file, so there is no need to copy those. The "__u32" style integer types are only needed in "uapi". In kernel-only code we can use the more familiar "u32" style. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.h | 97 ++++++++++++++++++++++++++- fs/nfsd/vfs.c | 1 + include/uapi/linux/nfsd/nfsfh.h | 115 -------------------------------- 3 files changed, 97 insertions(+), 116 deletions(-) delete mode 100644 include/uapi/linux/nfsd/nfsfh.h diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 6106697adc04..ad47f16676a8 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -10,9 +10,104 @@ #include #include -#include #include #include +#include + + +/* + * This is the old "dentry style" Linux NFSv2 file handle. + * + * The xino and xdev fields are currently used to transport the + * ino/dev of the exported inode. + */ +struct nfs_fhbase_old { + u32 fb_dcookie; /* dentry cookie - always 0xfeebbaca */ + u32 fb_ino; /* our inode number */ + u32 fb_dirino; /* dir inode number, 0 for directories */ + u32 fb_dev; /* our device */ + u32 fb_xdev; + u32 fb_xino; + u32 fb_generation; +}; + +/* + * This is the new flexible, extensible style NFSv2/v3/v4 file handle. + * + * The file handle starts with a sequence of four-byte words. + * The first word contains a version number (1) and three descriptor bytes + * that tell how the remaining 3 variable length fields should be handled. + * These three bytes are auth_type, fsid_type and fileid_type. + * + * All four-byte values are in host-byte-order. + * + * The auth_type field is deprecated and must be set to 0. + * + * The fsid_type identifies how the filesystem (or export point) is + * encoded. + * Current values: + * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number + * NOTE: we cannot use the kdev_t device id value, because kdev_t.h + * says we mustn't. We must break it up and reassemble. + * 1 - 4 byte user specified identifier + * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED + * 3 - 4 byte device id, encoded for user-space, 4 byte inode number + * 4 - 4 byte inode number and 4 byte uuid + * 5 - 8 byte uuid + * 6 - 16 byte uuid + * 7 - 8 byte inode number and 16 byte uuid + * + * The fileid_type identified how the file within the filesystem is encoded. + * The values for this field are filesystem specific, exccept that + * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' + * in include/linux/exportfs.h for currently registered values. + */ +struct nfs_fhbase_new { + union { + struct { + u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */ + u8 fb_auth_type_aux; + u8 fb_fsid_type_aux; + u8 fb_fileid_type_aux; + u32 fb_auth[1]; + /* u32 fb_fsid[0]; floating */ + /* u32 fb_fileid[0]; floating */ + }; + struct { + u8 fb_version; /* == 1, even => nfs_fhbase_old */ + u8 fb_auth_type; + u8 fb_fsid_type; + u8 fb_fileid_type; + u32 fb_auth_flex[]; /* flexible-array member */ + }; + }; +}; + +struct knfsd_fh { + unsigned int fh_size; /* significant for NFSv3. + * Points to the current size while building + * a new file handle + */ + union { + struct nfs_fhbase_old fh_old; + u32 fh_pad[NFS4_FHSIZE/4]; + struct nfs_fhbase_new fh_new; + } fh_base; +}; + +#define ofh_dcookie fh_base.fh_old.fb_dcookie +#define ofh_ino fh_base.fh_old.fb_ino +#define ofh_dirino fh_base.fh_old.fb_dirino +#define ofh_dev fh_base.fh_old.fb_dev +#define ofh_xdev fh_base.fh_old.fb_xdev +#define ofh_xino fh_base.fh_old.fb_xino +#define ofh_generation fh_base.fh_old.fb_generation + +#define fh_version fh_base.fh_new.fb_version +#define fh_fsid_type fh_base.fh_new.fb_fsid_type +#define fh_auth_type fh_base.fh_new.fb_auth_type +#define fh_fileid_type fh_base.fh_new.fb_fileid_type +#define fh_fsid fh_base.fh_new.fb_auth_flex static inline __u32 ino_t_to_u32(ino_t ino) { diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 738d564ca4ce..e9c406fd05b6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -244,6 +244,7 @@ out_nfserr: * returned. Otherwise the covered directory is returned. * NOTE: this mountpoint crossing is not supported properly by all * clients and is explicitly disallowed for NFSv3 + * NeilBrown */ __be32 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h deleted file mode 100644 index e29e8accc4f4..000000000000 --- a/include/uapi/linux/nfsd/nfsfh.h +++ /dev/null @@ -1,115 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * This file describes the layout of the file handles as passed - * over the wire. - * - * Copyright (C) 1995, 1996, 1997 Olaf Kirch - */ - -#ifndef _UAPI_LINUX_NFSD_FH_H -#define _UAPI_LINUX_NFSD_FH_H - -#include -#include -#include -#include -#include - -/* - * This is the old "dentry style" Linux NFSv2 file handle. - * - * The xino and xdev fields are currently used to transport the - * ino/dev of the exported inode. - */ -struct nfs_fhbase_old { - __u32 fb_dcookie; /* dentry cookie - always 0xfeebbaca */ - __u32 fb_ino; /* our inode number */ - __u32 fb_dirino; /* dir inode number, 0 for directories */ - __u32 fb_dev; /* our device */ - __u32 fb_xdev; - __u32 fb_xino; - __u32 fb_generation; -}; - -/* - * This is the new flexible, extensible style NFSv2/v3/v4 file handle. - * - * The file handle starts with a sequence of four-byte words. - * The first word contains a version number (1) and three descriptor bytes - * that tell how the remaining 3 variable length fields should be handled. - * These three bytes are auth_type, fsid_type and fileid_type. - * - * All four-byte values are in host-byte-order. - * - * The auth_type field is deprecated and must be set to 0. - * - * The fsid_type identifies how the filesystem (or export point) is - * encoded. - * Current values: - * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number - * NOTE: we cannot use the kdev_t device id value, because kdev_t.h - * says we mustn't. We must break it up and reassemble. - * 1 - 4 byte user specified identifier - * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED - * 3 - 4 byte device id, encoded for user-space, 4 byte inode number - * 4 - 4 byte inode number and 4 byte uuid - * 5 - 8 byte uuid - * 6 - 16 byte uuid - * 7 - 8 byte inode number and 16 byte uuid - * - * The fileid_type identified how the file within the filesystem is encoded. - * The values for this field are filesystem specific, exccept that - * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' - * in include/linux/exportfs.h for currently registered values. - */ -struct nfs_fhbase_new { - union { - struct { - __u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type_aux; - __u8 fb_fsid_type_aux; - __u8 fb_fileid_type_aux; - __u32 fb_auth[1]; - /* __u32 fb_fsid[0]; floating */ - /* __u32 fb_fileid[0]; floating */ - }; - struct { - __u8 fb_version; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type; - __u8 fb_fsid_type; - __u8 fb_fileid_type; - __u32 fb_auth_flex[]; /* flexible-array member */ - }; - }; -}; - -struct knfsd_fh { - unsigned int fh_size; /* significant for NFSv3. - * Points to the current size while building - * a new file handle - */ - union { - struct nfs_fhbase_old fh_old; - __u32 fh_pad[NFS4_FHSIZE/4]; - struct nfs_fhbase_new fh_new; - } fh_base; -}; - -#define ofh_dcookie fh_base.fh_old.fb_dcookie -#define ofh_ino fh_base.fh_old.fb_ino -#define ofh_dirino fh_base.fh_old.fb_dirino -#define ofh_dev fh_base.fh_old.fb_dev -#define ofh_xdev fh_base.fh_old.fb_xdev -#define ofh_xino fh_base.fh_old.fb_xino -#define ofh_generation fh_base.fh_old.fb_generation - -#define fh_version fh_base.fh_new.fb_version -#define fh_fsid_type fh_base.fh_new.fb_fsid_type -#define fh_auth_type fh_base.fh_new.fb_auth_type -#define fh_fileid_type fh_base.fh_new.fb_fileid_type -#define fh_fsid fh_base.fh_new.fb_auth_flex - -/* Do not use, provided for userspace compatiblity. */ -#define fh_auth fh_base.fh_new.fb_auth - -#endif /* _UAPI_LINUX_NFSD_FH_H */ From c645a883df34ee10b884ec921e850def54b7f461 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 2 Sep 2021 11:15:29 +1000 Subject: [PATCH 055/433] NFSD: drop support for ancient filehandles Filehandles not in the "new" or "version 1" format have not been handed out for new mounts since Linux 2.4 which was released 20 years ago. I think it is safe to say that no such file handles are still in use, and that we can drop support for them. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.c | 154 +++++++++++++++--------------------------------- fs/nfsd/nfsfh.h | 34 +---------- 2 files changed, 51 insertions(+), 137 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c475d2271f9c..149f9bbc48a4 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -154,11 +154,12 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; - struct fid *fid = NULL, sfid; + struct fid *fid = NULL; struct svc_export *exp; struct dentry *dentry; int fileid_type; int data_left = fh->fh_size/4; + int len; __be32 error; error = nfserr_stale; @@ -167,48 +168,35 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers == 4 && fh->fh_size == 0) return nfserr_nofilehandle; - if (fh->fh_version == 1) { - int len; + if (fh->fh_version != 1) + return error; - if (--data_left < 0) - return error; - if (fh->fh_auth_type != 0) - return error; - len = key_len(fh->fh_fsid_type) / 4; - if (len == 0) - return error; - if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { - /* deprecated, convert to type 3 */ - len = key_len(FSID_ENCODE_DEV)/4; - fh->fh_fsid_type = FSID_ENCODE_DEV; - /* - * struct knfsd_fh uses host-endian fields, which are - * sometimes used to hold net-endian values. This - * confuses sparse, so we must use __force here to - * keep it from complaining. - */ - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), - ntohl((__force __be32)fh->fh_fsid[1]))); - fh->fh_fsid[1] = fh->fh_fsid[2]; - } - data_left -= len; - if (data_left < 0) - return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); - fid = (struct fid *)(fh->fh_fsid + len); - } else { - __u32 tfh[2]; - dev_t xdev; - ino_t xino; - - if (fh->fh_size != NFS_FHSIZE) - return error; - /* assume old filehandle format */ - xdev = old_decode_dev(fh->ofh_xdev); - xino = u32_to_ino_t(fh->ofh_xino); - mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); - exp = rqst_exp_find(rqstp, FSID_DEV, tfh); + if (--data_left < 0) + return error; + if (fh->fh_auth_type != 0) + return error; + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) + return error; + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + /* deprecated, convert to type 3 */ + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; + /* + * struct knfsd_fh uses host-endian fields, which are + * sometimes used to hold net-endian values. This + * confuses sparse, so we must use __force here to + * keep it from complaining. + */ + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), + ntohl((__force __be32)fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; } + data_left -= len; + if (data_left < 0) + return error; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); error = nfserr_stale; if (IS_ERR(exp)) { @@ -253,18 +241,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers > 2) error = nfserr_badhandle; - if (fh->fh_version != 1) { - sfid.i32.ino = fh->ofh_ino; - sfid.i32.gen = fh->ofh_generation; - sfid.i32.parent_ino = fh->ofh_dirino; - fid = &sfid; - data_left = 3; - if (fh->ofh_dirino == 0) - fileid_type = FILEID_INO32_GEN; - else - fileid_type = FILEID_INO32_GEN_PARENT; - } else - fileid_type = fh->fh_fileid_type; + fileid_type = fh->fh_fileid_type; if (fileid_type == FILEID_ROOT) dentry = dget(exp->ex_path.dentry); @@ -452,20 +429,6 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, } } -/* - * for composing old style file handles - */ -static inline void _fh_update_old(struct dentry *dentry, - struct svc_export *exp, - struct knfsd_fh *fh) -{ - fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino); - fh->ofh_generation = d_inode(dentry)->i_generation; - if (d_is_dir(dentry) || - (exp->ex_flags & NFSEXP_NOSUBTREECHECK)) - fh->ofh_dirino = 0; -} - static bool is_root_export(struct svc_export *exp) { return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; @@ -562,9 +525,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, /* ref_fh is a reference file handle. * if it is non-null and for the same filesystem, then we should compose * a filehandle which is of the same version, where possible. - * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca - * Then create a 32byte filehandle using nfs_fhbase_old - * */ struct inode * inode = d_inode(dentry); @@ -600,35 +560,21 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp_get(exp); - if (fhp->fh_handle.fh_version == 0xca) { - /* old style filehandle please */ - memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); - fhp->fh_handle.fh_size = NFS_FHSIZE; - fhp->fh_handle.ofh_dcookie = 0xfeebbaca; - fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev); - fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev; - fhp->fh_handle.ofh_xino = - ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino); - fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry)); - if (inode) - _fh_update_old(dentry, exp, &fhp->fh_handle); - } else { - fhp->fh_handle.fh_size = - key_len(fhp->fh_handle.fh_fsid_type) + 4; - fhp->fh_handle.fh_auth_type = 0; + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; + fhp->fh_handle.fh_auth_type = 0; - mk_fsid(fhp->fh_handle.fh_fsid_type, - fhp->fh_handle.fh_fsid, - ex_dev, - d_inode(exp->ex_path.dentry)->i_ino, - exp->ex_fsid, exp->ex_uuid); + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, + d_inode(exp->ex_path.dentry)->i_ino, + exp->ex_fsid, exp->ex_uuid); - if (inode) - _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { - fh_put(fhp); - return nfserr_opnotsupp; - } + if (inode) + _fh_update(fhp, exp, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { + fh_put(fhp); + return nfserr_opnotsupp; } return 0; @@ -649,16 +595,12 @@ fh_update(struct svc_fh *fhp) dentry = fhp->fh_dentry; if (d_really_is_negative(dentry)) goto out_negative; - if (fhp->fh_handle.fh_version != 1) { - _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); - } else { - if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - return 0; + if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) + return 0; - _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) - return nfserr_opnotsupp; - } + _fh_update(fhp, fhp->fh_export, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) + return nfserr_opnotsupp; return 0; out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index ad47f16676a8..8b5587f274a7 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -14,26 +14,7 @@ #include #include - /* - * This is the old "dentry style" Linux NFSv2 file handle. - * - * The xino and xdev fields are currently used to transport the - * ino/dev of the exported inode. - */ -struct nfs_fhbase_old { - u32 fb_dcookie; /* dentry cookie - always 0xfeebbaca */ - u32 fb_ino; /* our inode number */ - u32 fb_dirino; /* dir inode number, 0 for directories */ - u32 fb_dev; /* our device */ - u32 fb_xdev; - u32 fb_xino; - u32 fb_generation; -}; - -/* - * This is the new flexible, extensible style NFSv2/v3/v4 file handle. - * * The file handle starts with a sequence of four-byte words. * The first word contains a version number (1) and three descriptor bytes * that tell how the remaining 3 variable length fields should be handled. @@ -57,7 +38,7 @@ struct nfs_fhbase_old { * 6 - 16 byte uuid * 7 - 8 byte inode number and 16 byte uuid * - * The fileid_type identified how the file within the filesystem is encoded. + * The fileid_type identifies how the file within the filesystem is encoded. * The values for this field are filesystem specific, exccept that * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' * in include/linux/exportfs.h for currently registered values. @@ -65,7 +46,7 @@ struct nfs_fhbase_old { struct nfs_fhbase_new { union { struct { - u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */ + u8 fb_version_aux; /* == 1 */ u8 fb_auth_type_aux; u8 fb_fsid_type_aux; u8 fb_fileid_type_aux; @@ -74,7 +55,7 @@ struct nfs_fhbase_new { /* u32 fb_fileid[0]; floating */ }; struct { - u8 fb_version; /* == 1, even => nfs_fhbase_old */ + u8 fb_version; /* == 1 */ u8 fb_auth_type; u8 fb_fsid_type; u8 fb_fileid_type; @@ -89,20 +70,11 @@ struct knfsd_fh { * a new file handle */ union { - struct nfs_fhbase_old fh_old; u32 fh_pad[NFS4_FHSIZE/4]; struct nfs_fhbase_new fh_new; } fh_base; }; -#define ofh_dcookie fh_base.fh_old.fb_dcookie -#define ofh_ino fh_base.fh_old.fb_ino -#define ofh_dirino fh_base.fh_old.fb_dirino -#define ofh_dev fh_base.fh_old.fb_dev -#define ofh_xdev fh_base.fh_old.fb_xdev -#define ofh_xino fh_base.fh_old.fb_xino -#define ofh_generation fh_base.fh_old.fb_generation - #define fh_version fh_base.fh_new.fb_version #define fh_fsid_type fh_base.fh_new.fb_fsid_type #define fh_auth_type fh_base.fh_new.fb_auth_type From d8b26071e65e80a348602b939e333242f989221b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 2 Sep 2021 11:16:32 +1000 Subject: [PATCH 056/433] NFSD: simplify struct nfsfh Most of the fields in 'struct knfsd_fh' are 2 levels deep (a union and a struct) and are accessed using macros like: #define fh_FOO fh_base.fh_new.fb_FOO This patch makes the union and struct anonymous, so that "fh_FOO" can be a name directly within 'struct knfsd_fh' and the #defines aren't needed. The file handle as a whole is sometimes accessed as "fh_base" or "fh_base.fh_pad", neither of which are particularly helpful names. As the struct holding the filehandle is now anonymous, we cannot use the name of that, so we union it with 'fh_raw' and use that where the raw filehandle is needed. fh_raw also ensure the structure is large enough for the largest possible filehandle. fh_raw is a 'char' array, removing any need to cast it for memcpy etc. SVCFH_fmt() is simplified using the "%ph" printk format. This changes the appearance of filehandles in dprintk() debugging, making them a little more precise. Reviewed-by: Christoph Hellwig Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/flexfilelayout.c | 2 +- fs/nfsd/lockd.c | 2 +- fs/nfsd/nfs3xdr.c | 4 ++-- fs/nfsd/nfs4callback.c | 2 +- fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/nfs4state.c | 4 ++-- fs/nfsd/nfs4xdr.c | 4 ++-- fs/nfsd/nfsctl.c | 6 ++--- fs/nfsd/nfsfh.c | 13 ++++------- fs/nfsd/nfsfh.h | 50 ++++++++++++---------------------------- fs/nfsd/nfsxdr.c | 4 ++-- 11 files changed, 35 insertions(+), 60 deletions(-) diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index db7ef07ae50c..2e2f1d5e9f62 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -61,7 +61,7 @@ nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, goto out_error; fl->fh.size = fhp->fh_handle.fh_size; - memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size); + memcpy(fl->fh.data, &fhp->fh_handle.fh_raw, fl->fh.size); /* Give whole file layout segments */ seg->offset = 0; diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 606fa155c28a..46a7f9b813e5 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -35,7 +35,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp, /* must initialize before using! but maxsize doesn't matter */ fh_init(&fh,0); fh.fh_handle.fh_size = f->size; - memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); + memcpy(&fh.fh_handle.fh_raw, f->data, f->size); fh.fh_export = NULL; access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 0a5ebc52e6a9..3d37923afb06 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -92,7 +92,7 @@ svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp) return false; fh_init(fhp, NFS3_FHSIZE); fhp->fh_handle.fh_size = size; - memcpy(&fhp->fh_handle.fh_base, p, size); + memcpy(&fhp->fh_handle.fh_raw, p, size); return true; } @@ -131,7 +131,7 @@ svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) *p++ = cpu_to_be32(size); if (size) p[XDR_QUADLEN(size) - 1] = 0; - memcpy(p, &fhp->fh_handle.fh_base, size); + memcpy(p, &fhp->fh_handle.fh_raw, size); return true; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 0f8b10f363e7..11f8715d92d6 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -121,7 +121,7 @@ static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh) BUG_ON(length > NFS4_FHSIZE); p = xdr_reserve_space(xdr, 4 + length); - xdr_encode_opaque(p, &fh->fh_base, length); + xdr_encode_opaque(p, &fh->fh_raw, length); } /* diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 486c5dba4b65..3f7e59ec4e32 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -519,7 +519,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fh_put(&cstate->current_fh); cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; - memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, + memcpy(&cstate->current_fh.fh_handle.fh_raw, putfh->pf_fhval, putfh->pf_fhlen); ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); #ifdef CONFIG_NFSD_V4_2_INTER_SSC @@ -1383,7 +1383,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, s_fh = &cstate->save_fh; copy->c_fh.size = s_fh->fh_handle.fh_size; - memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_raw, copy->c_fh.size); copy->stateid.seqid = cpu_to_be32(s_stid->si_generation); memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, sizeof(stateid_opaque_t)); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 42356416f0a0..88c0a5cd7b75 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1010,7 +1010,7 @@ static int delegation_blocked(struct knfsd_fh *fh) } spin_unlock(&blocked_delegations_lock); } - hash = jhash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_raw, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && test_bit((hash>>8)&255, bd->set[0]) && test_bit((hash>>16)&255, bd->set[0])) @@ -1029,7 +1029,7 @@ static void block_delegations(struct knfsd_fh *fh) u32 hash; struct bloom_pair *bd = &blocked_delegations; - hash = jhash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_raw, fh->fh_size, 0); spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7abeccb975b2..a54b2845473b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3110,7 +3110,7 @@ out_acl: p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); if (!p) goto out_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { @@ -3667,7 +3667,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh p = xdr_reserve_space(xdr, len + 4); if (!p) return nfserr_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, len); return 0; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c2c3d9077dc5..5e48bc48942e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -395,12 +395,12 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) auth_domain_put(dom); if (len) return len; - + mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; - qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size); + qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size); mesg[-1] = '\n'; - return mesg - buf; + return mesg - buf; } /* diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 149f9bbc48a4..f3779fa72c89 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -640,16 +640,11 @@ fh_put(struct svc_fh *fhp) char * SVCFH_fmt(struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; + static char buf[2+1+1+64*3+1]; - static char buf[80]; - sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x", - fh->fh_size, - fh->fh_base.fh_pad[0], - fh->fh_base.fh_pad[1], - fh->fh_base.fh_pad[2], - fh->fh_base.fh_pad[3], - fh->fh_base.fh_pad[4], - fh->fh_base.fh_pad[5]); + if (fh->fh_size < 0 || fh->fh_size> 64) + return "bad-fh"; + sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw); return buf; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 8b5587f274a7..d11e4b6870d6 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -43,44 +43,24 @@ * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' * in include/linux/exportfs.h for currently registered values. */ -struct nfs_fhbase_new { + +struct knfsd_fh { + unsigned int fh_size; /* + * Points to the current size while + * building a new file handle. + */ union { + char fh_raw[NFS4_FHSIZE]; struct { - u8 fb_version_aux; /* == 1 */ - u8 fb_auth_type_aux; - u8 fb_fsid_type_aux; - u8 fb_fileid_type_aux; - u32 fb_auth[1]; - /* u32 fb_fsid[0]; floating */ - /* u32 fb_fileid[0]; floating */ - }; - struct { - u8 fb_version; /* == 1 */ - u8 fb_auth_type; - u8 fb_fsid_type; - u8 fb_fileid_type; - u32 fb_auth_flex[]; /* flexible-array member */ + u8 fh_version; /* == 1 */ + u8 fh_auth_type; /* deprecated */ + u8 fh_fsid_type; + u8 fh_fileid_type; + u32 fh_fsid[]; /* flexible-array member */ }; }; }; -struct knfsd_fh { - unsigned int fh_size; /* significant for NFSv3. - * Points to the current size while building - * a new file handle - */ - union { - u32 fh_pad[NFS4_FHSIZE/4]; - struct nfs_fhbase_new fh_new; - } fh_base; -}; - -#define fh_version fh_base.fh_new.fb_version -#define fh_fsid_type fh_base.fh_new.fb_fsid_type -#define fh_auth_type fh_base.fh_new.fb_auth_type -#define fh_fileid_type fh_base.fh_new.fb_fileid_type -#define fh_fsid fh_base.fh_new.fb_auth_flex - static inline __u32 ino_t_to_u32(ino_t ino) { return (__u32) ino; @@ -255,7 +235,7 @@ static inline void fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) { dst->fh_size = src->fh_size; - memcpy(&dst->fh_base, &src->fh_base, src->fh_size); + memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); } static __inline__ struct svc_fh * @@ -270,7 +250,7 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) { if (fh1->fh_size != fh2->fh_size) return false; - if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0) + if (memcmp(fh1->fh_raw, fh2->fh_raw, fh1->fh_size) != 0) return false; return true; } @@ -294,7 +274,7 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) */ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { - return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size); + return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } #else static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index a06c05fe3b42..082449c7d0db 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -64,7 +64,7 @@ svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp) if (!p) return false; fh_init(fhp, NFS_FHSIZE); - memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE); + memcpy(&fhp->fh_handle.fh_raw, p, NFS_FHSIZE); fhp->fh_handle.fh_size = NFS_FHSIZE; return true; @@ -78,7 +78,7 @@ svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp) p = xdr_reserve_space(xdr, NFS_FHSIZE); if (!p) return false; - memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE); + memcpy(p, &fhp->fh_handle.fh_raw, NFS_FHSIZE); return true; } From 8e70bf27fd20cc17e87150327a640e546bfbee64 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 25 Sep 2021 23:58:41 +0100 Subject: [PATCH 057/433] NFSD: Initialize pointer ni with NULL and not plain integer 0 Pointer ni is being initialized with plain integer zero. Fix this by initializing with NULL. Signed-off-by: Colin Ian King Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3f7e59ec4e32..3dc40c1d32bc 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1178,7 +1178,7 @@ extern void nfs_sb_deactive(struct super_block *sb); static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt) { - struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *work = NULL; struct nfsd4_ssc_umount_item *tmp; DEFINE_WAIT(wait); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 88c0a5cd7b75..e49a38cd585f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5531,7 +5531,7 @@ static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn) static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) { bool do_wakeup = false; - struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *tmp; spin_lock(&nn->nfsd_ssc_lock); From f49b68ddc9d7dddf1530312108a648dd815a2f30 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 30 Sep 2021 17:06:15 -0400 Subject: [PATCH 058/433] SUNRPC: xdr_stream_subsegment() must handle non-zero page_bases xdr_stream_subsegment() was introduced in commit c1346a1216ab ("NFSD: Replace the internals of the READ_BUF() macro"). There are two call sites for xdr_stream_subsegment(). One is nfsd4_decode_write(), and the other is nfsd4_decode_setxattr(). Currently neither of these call sites calls this API when xdr_buf::page_base is a non-zero value. However, I'm about to add a case where page_base will sometimes not be zero when nfsd4_decode_write() invokes this API. Replace the logic in xdr_stream_subsegment() that advances to the next data item in the xdr_stream with something more generic in order to handle this new use case. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xdr.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index ca10ba2626f2..df194cc07035 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1633,7 +1633,7 @@ EXPORT_SYMBOL_GPL(xdr_buf_subsegment); * Sets up @subbuf to represent a portion of @xdr. The portion * starts at the current offset in @xdr, and extends for a length * of @nbytes. If this is successful, @xdr is advanced to the next - * position following that portion. + * XDR data item following that portion. * * Return values: * %true: @subbuf has been initialized, and @xdr has been advanced. @@ -1642,29 +1642,31 @@ EXPORT_SYMBOL_GPL(xdr_buf_subsegment); bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, unsigned int nbytes) { - unsigned int remaining, offset, len; + unsigned int start = xdr_stream_pos(xdr); + unsigned int remaining, len; - if (xdr_buf_subsegment(xdr->buf, subbuf, xdr_stream_pos(xdr), nbytes)) + /* Extract @subbuf and bounds-check the fn arguments */ + if (xdr_buf_subsegment(xdr->buf, subbuf, start, nbytes)) return false; - if (subbuf->head[0].iov_len) - if (!__xdr_inline_decode(xdr, subbuf->head[0].iov_len)) - return false; - - remaining = subbuf->page_len; - offset = subbuf->page_base; - while (remaining) { - len = min_t(unsigned int, remaining, PAGE_SIZE) - offset; - + /* Advance @xdr by @nbytes */ + for (remaining = nbytes; remaining;) { if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) return false; - if (!__xdr_inline_decode(xdr, len)) - return false; + len = (char *)xdr->end - (char *)xdr->p; + if (remaining <= len) { + xdr->p = (__be32 *)((char *)xdr->p + + (remaining + xdr_pad_size(nbytes))); + break; + } + + xdr->p = (__be32 *)((char *)xdr->p + len); + xdr->end = xdr->p; remaining -= len; - offset = 0; } + xdr_stream_set_pos(xdr, start + nbytes); return true; } EXPORT_SYMBOL_GPL(xdr_stream_subsegment); From dae9a6cab8009e526570e7477ce858dcdfeb256e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 30 Sep 2021 17:06:21 -0400 Subject: [PATCH 059/433] NFSD: Have legacy NFSD WRITE decoders use xdr_stream_subsegment() Refactor. Now that the NFSv2 and NFSv3 XDR decoders have been converted to use xdr_streams, the WRITE decoder functions can use xdr_stream_subsegment() to extract the WRITE payload into its own xdr_buf, just as the NFSv4 WRITE XDR decoder currently does. That makes it possible to pass the first kvec, pages array + length, page_base, and total payload length via a single function parameter. The payload's page_base is not yet assigned or used, but will be in subsequent patches. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 3 +-- fs/nfsd/nfs3xdr.c | 12 ++---------- fs/nfsd/nfs4proc.c | 3 +-- fs/nfsd/nfsproc.c | 3 +-- fs/nfsd/nfsxdr.c | 9 +-------- fs/nfsd/xdr.h | 2 +- fs/nfsd/xdr3.h | 2 +- include/linux/sunrpc/svc.h | 3 +-- net/sunrpc/svc.c | 11 ++++++----- 9 files changed, 15 insertions(+), 33 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 17715a6c7a40..4418517f6f12 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -201,8 +201,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, - &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, &argp->payload); if (!nvecs) { resp->status = nfserr_io; goto out; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 3d37923afb06..267e56f218af 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -621,9 +621,6 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_writeargs *args = rqstp->rq_argp; u32 max_blocksize = svc_max_payload(rqstp); - struct kvec *head = rqstp->rq_arg.head; - struct kvec *tail = rqstp->rq_arg.tail; - size_t remaining; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) return 0; @@ -641,17 +638,12 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) /* request sanity */ if (args->count != args->len) return 0; - remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; - remaining -= xdr_stream_pos(xdr); - if (remaining < xdr_align_size(args->len)) - return 0; if (args->count > max_blocksize) { args->count = max_blocksize; args->len = max_blocksize; } - - args->first.iov_base = xdr->p; - args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); + if (!xdr_stream_subsegment(xdr, &args->payload, args->count)) + return 0; return 1; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3dc40c1d32bc..5895bbeba373 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1033,8 +1033,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_how_written = write->wr_stable_how; - nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages, - write->wr_payload.head, write->wr_buflen); + nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 90fcd6178823..eea5b59b6a6c 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -234,8 +234,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, - &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, &argp->payload); if (!nvecs) { resp->status = nfserr_io; goto out; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 082449c7d0db..ddcc18adfeb1 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -325,10 +325,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_writeargs *args = rqstp->rq_argp; - struct kvec *head = rqstp->rq_arg.head; - struct kvec *tail = rqstp->rq_arg.tail; u32 beginoffset, totalcount; - size_t remaining; if (!svcxdr_decode_fhandle(xdr, &args->fh)) return 0; @@ -346,12 +343,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) return 0; if (args->len > NFSSVC_MAXBLKSIZE_V2) return 0; - remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; - remaining -= xdr_stream_pos(xdr); - if (remaining < xdr_align_size(args->len)) + if (!xdr_stream_subsegment(xdr, &args->payload, args->len)) return 0; - args->first.iov_base = xdr->p; - args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); return 1; } diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index f45b4bc93f52..80fd6d7f3404 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -33,7 +33,7 @@ struct nfsd_writeargs { svc_fh fh; __u32 offset; int len; - struct kvec first; + struct xdr_buf payload; }; struct nfsd_createargs { diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 933008382bbe..712c117300cb 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -40,7 +40,7 @@ struct nfsd3_writeargs { __u32 count; int stable; __u32 len; - struct kvec first; + struct xdr_buf payload; }; struct nfsd3_createargs { diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 064c96157d1f..6263410c948a 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -532,8 +532,7 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct page **pages, - struct kvec *first, size_t total); + struct xdr_buf *payload); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a3bbe5ce4570..08ca797bb8a4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1676,16 +1676,17 @@ EXPORT_SYMBOL_GPL(svc_encode_result_payload); /** * svc_fill_write_vector - Construct data argument for VFS write call * @rqstp: svc_rqst to operate on - * @pages: list of pages containing data payload - * @first: buffer containing first section of write payload - * @total: total number of bytes of write payload + * @payload: xdr_buf containing only the write data payload * * Fills in rqstp::rq_vec, and returns the number of elements. */ -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, - struct kvec *first, size_t total) +unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, + struct xdr_buf *payload) { + struct page **pages = payload->pages; + struct kvec *first = payload->head; struct kvec *vec = rqstp->rq_vec; + size_t total = payload->len; unsigned int i; /* Some types of transport can present the write payload From 342a67f0884293639bd17ea44df754ead799e669 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Jul 2021 22:08:32 -0400 Subject: [PATCH 060/433] NFS: Label the dentry with a verifier in nfs_link(), nfs_symlink() After the success of an operation such as link(), or symlink(), we expect to add the dentry back to the dcache as an ordinary positive dentry. However in NFS, unless it is labelled with the appropriate verifier for the parent directory state, then nfs_lookup_revalidate will end up discarding that dentry and forcing a new lookup. The fix is to ensure that we relabel the dentry appropriately on success. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1a6d2867fba4..baca036f3890 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2352,6 +2352,8 @@ int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, return error; } + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + /* * No big deal if we can't add this page to the page cache here. * READLINK will get the missing page from the server if needed. @@ -2385,6 +2387,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); ihold(inode); d_add(dentry, inode); } From 9019fb391de02cbff422090768b73afe9f6174df Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Jul 2021 21:43:09 -0400 Subject: [PATCH 061/433] NFS: Label the dentry with a verifier in nfs_rmdir() and nfs_unlink() After the success of an operation such as rmdir() or unlink(), we expect to add the dentry back to the dcache as an ordinary negative dentry. However in NFS, unless it is labelled with the appropriate verifier for the parent directory state, then nfs_lookup_revalidate will end up discarding that dentry and forcing a new lookup. The fix is to ensure that we relabel the dentry appropriately on success. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index baca036f3890..1ce1fa0a5926 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2197,6 +2197,18 @@ static void nfs_dentry_handle_enoent(struct dentry *dentry) d_delete(dentry); } +static void nfs_dentry_remove_handle_error(struct inode *dir, + struct dentry *dentry, int error) +{ + switch (error) { + case -ENOENT: + d_delete(dentry); + fallthrough; + case 0: + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } +} + int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; @@ -2219,6 +2231,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) up_write(&NFS_I(d_inode(dentry))->rmdir_sem); } else error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); + nfs_dentry_remove_handle_error(dir, dentry, error); trace_nfs_rmdir_exit(dir, dentry, error); return error; @@ -2288,9 +2301,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) } spin_unlock(&dentry->d_lock); error = nfs_safe_remove(dentry); - if (!error || error == -ENOENT) { - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - } else if (need_rehash) + nfs_dentry_remove_handle_error(dir, dentry, error); + if (need_rehash) d_rehash(dentry); out: trace_nfs_unlink_exit(dir, dentry, error); From ca05cbae2a0468e5d78e9b4605936a8bf5da328b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 10 Jul 2021 18:07:14 -0400 Subject: [PATCH 062/433] NFS: Fix up nfs_ctx_key_to_expire() If the cached credential exists but doesn't have any expiration callback then exit early. Fix up atomicity issues when replacing the credential with a new one since the existing code could lead to refcount leaks. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ++-- fs/nfs/write.c | 41 ++++++++++++++++++++++++++++------------- include/linux/nfs_fs.h | 2 +- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 853213b3a209..4f45281c47cf 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1024,7 +1024,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, ctx->cred = get_cred(filp->f_cred); else ctx->cred = get_current_cred(); - ctx->ll_cred = NULL; + rcu_assign_pointer(ctx->ll_cred, NULL); ctx->state = NULL; ctx->mode = f_mode; ctx->flags = 0; @@ -1063,7 +1063,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) put_cred(ctx->cred); dput(ctx->dentry); nfs_sb_deactive(sb); - put_rpccred(ctx->ll_cred); + put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1)); kfree(ctx->mdsthreshold); kfree_rcu(ctx, rcu_head); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index eae9bf114041..773ea2c8504d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1246,7 +1246,7 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode) struct nfs_open_context *ctx = nfs_file_open_context(filp); if (nfs_ctx_key_to_expire(ctx, inode) && - !ctx->ll_cred) + !rcu_access_pointer(ctx->ll_cred)) /* Already expired! */ return -EACCES; return 0; @@ -1258,23 +1258,38 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode) bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode) { struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth; - struct rpc_cred *cred = ctx->ll_cred; + struct rpc_cred *cred, *new, *old = NULL; struct auth_cred acred = { .cred = ctx->cred, }; + bool ret = false; - if (cred && !cred->cr_ops->crmatch(&acred, cred, 0)) { - put_rpccred(cred); - ctx->ll_cred = NULL; - cred = NULL; - } - if (!cred) - cred = auth->au_ops->lookup_cred(auth, &acred, 0); - if (!cred || IS_ERR(cred)) + rcu_read_lock(); + cred = rcu_dereference(ctx->ll_cred); + if (cred && !(cred->cr_ops->crkey_timeout && + cred->cr_ops->crkey_timeout(cred))) + goto out; + rcu_read_unlock(); + + new = auth->au_ops->lookup_cred(auth, &acred, 0); + if (new == cred) { + put_rpccred(new); return true; - ctx->ll_cred = cred; - return !!(cred->cr_ops->crkey_timeout && - cred->cr_ops->crkey_timeout(cred)); + } + if (IS_ERR_OR_NULL(new)) { + new = NULL; + ret = true; + } else if (new->cr_ops->crkey_timeout && + new->cr_ops->crkey_timeout(new)) + ret = true; + + rcu_read_lock(); + old = rcu_dereference_protected(xchg(&ctx->ll_cred, + RCU_INITIALIZER(new)), 1); +out: + rcu_read_unlock(); + put_rpccred(old); + return ret; } /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b9a8b925db43..9b75448ce0df 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -81,7 +81,7 @@ struct nfs_open_context { fl_owner_t flock_owner; struct dentry *dentry; const struct cred *cred; - struct rpc_cred *ll_cred; /* low-level cred - use to check for expiry */ + struct rpc_cred __rcu *ll_cred; /* low-level cred - use to check for expiry */ struct nfs4_state *state; fmode_t mode; From ea7a1019d8baf8503ecd6e3ec8436dec283569e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Jul 2021 09:52:59 -0400 Subject: [PATCH 063/433] SUNRPC: Partial revert of commit 6f9f17287e78 The premise of commit 6f9f17287e78 ("SUNRPC: Mitigate cond_resched() in xprt_transmit()") was that cond_resched() is expensive and unnecessary when there has been just a single send. The point of cond_resched() is to ensure that tasks that should pre-empt this one get a chance to do so when it is safe to do so. The code prior to commit 6f9f17287e78 failed to take into account that it was keeping a rpc_task pinned for longer than it needed to, and so rather than doing a full revert, let's just move the cond_resched. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index cfd681700d1a..d4aeee83763e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1603,15 +1603,14 @@ xprt_transmit(struct rpc_task *task) { struct rpc_rqst *next, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int counter, status; + int status; spin_lock(&xprt->queue_lock); - counter = 0; - while (!list_empty(&xprt->xmit_queue)) { - if (++counter == 20) + for (;;) { + next = list_first_entry_or_null(&xprt->xmit_queue, + struct rpc_rqst, rq_xmit); + if (!next) break; - next = list_first_entry(&xprt->xmit_queue, - struct rpc_rqst, rq_xmit); xprt_pin_rqst(next); spin_unlock(&xprt->queue_lock); status = xprt_request_transmit(next, task); @@ -1619,13 +1618,16 @@ xprt_transmit(struct rpc_task *task) status = 0; spin_lock(&xprt->queue_lock); xprt_unpin_rqst(next); - if (status == 0) { - if (!xprt_request_data_received(task) || - test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - continue; - } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - task->tk_status = status; - break; + if (status < 0) { + if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + task->tk_status = status; + break; + } + /* Was @task transmitted, and has it received a reply? */ + if (xprt_request_data_received(task) && + !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + break; + cond_resched_lock(&xprt->queue_lock); } spin_unlock(&xprt->queue_lock); } From 47dd8796a31e132f9e2b93a4f558a9f924a7388f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Jul 2021 09:57:08 -0400 Subject: [PATCH 064/433] SUNRPC: Add cond_resched() at the appropriate point in __rpc_execute() Allow tasks that need to pre-empt rpciod/xprtiod to do so when it is safe. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index c045f63d11fa..dc46130d46a0 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -903,8 +903,10 @@ static void __rpc_execute(struct rpc_task *task) /* * Lockless check for whether task is sleeping or not. */ - if (!RPC_IS_QUEUED(task)) + if (!RPC_IS_QUEUED(task)) { + cond_resched(); continue; + } /* * Signalled tasks should exit rather than sleep. From 6dbcbe3f78bec62a4a96ac9cfddaf894a140b821 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Jul 2021 11:57:15 -0400 Subject: [PATCH 065/433] SUNRPC: Remove WQ_HIGHPRI from xprtiod Don't let xprtiod pre-empt softirq. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index dc46130d46a0..b21457cec8a5 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1232,8 +1232,7 @@ static int rpciod_start(void) if (!wq) goto out_failed; rpciod_workqueue = wq; - /* Note: highpri because network receive is latency sensitive */ - wq = alloc_workqueue("xprtiod", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_HIGHPRI, 0); + wq = alloc_workqueue("xprtiod", WQ_UNBOUND | WQ_MEM_RECLAIM, 0); if (!wq) goto free_rpciod; xprtiod_workqueue = wq; From b9f8713f42af11ae6d7f63075334ba5298436be6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Jul 2021 12:24:15 -0400 Subject: [PATCH 066/433] SUNRPC: Remove unnecessary memory barriers The only check for RPC_TASK_RUNNING is the one in rpc_make_runnable(), which happens under the same spin lock held when we call rpc_clear_running(). Ditto, the last check for RPC_TASK_QUEUED in rpc_execute() is performed under the same lock as the one held when we call rpc_clear_queued(). Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index a237b8dbf608..db964bb63912 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -150,25 +150,13 @@ struct rpc_task_setup { #define RPC_TASK_MSG_PIN_WAIT 5 #define RPC_TASK_SIGNALLED 6 -#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_test_and_set_running(t) \ test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -#define rpc_clear_running(t) \ - do { \ - smp_mb__before_atomic(); \ - clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \ - smp_mb__after_atomic(); \ - } while (0) +#define rpc_clear_running(t) clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define RPC_IS_QUEUED(t) test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) #define rpc_set_queued(t) set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) -#define rpc_clear_queued(t) \ - do { \ - smp_mb__before_atomic(); \ - clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \ - smp_mb__after_atomic(); \ - } while (0) +#define rpc_clear_queued(t) clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) From 33c3214bf450051db99d352cfeef7e0ffcbb8614 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Jul 2021 12:34:34 -0400 Subject: [PATCH 067/433] SUNRPC: xprt_clear_locked() only needs release memory semantics The clearing of the XPRT_LOCKED bit has to happen after we clear xprt->snd_task, but we don't require any extra memory barriers after that. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d4aeee83763e..48560188e84d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -246,11 +246,9 @@ EXPORT_SYMBOL_GPL(xprt_find_transport_ident); static void xprt_clear_locked(struct rpc_xprt *xprt) { xprt->snd_task = NULL; - if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { - smp_mb__before_atomic(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_atomic(); - } else + if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) + clear_bit_unlock(XPRT_LOCKED, &xprt->state); + else queue_work(xprtiod_workqueue, &xprt->task_cleanup); } From 43d20e80e2880a1791d87d8b3fc062e91cd2ec4b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 13 Jul 2021 12:28:22 -0400 Subject: [PATCH 068/433] NFS: Fix a few more clear_bit() instances that need release semantics All these bits are being used as bit locks. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 5 +---- fs/nfs/pagelist.c | 10 ++-------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f22818a80c2c..ecc4594299d6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1194,10 +1194,7 @@ static int nfs4_run_state_manager(void *); static void nfs4_clear_state_manager_bit(struct nfs_client *clp) { - smp_mb__before_atomic(); - clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); - smp_mb__after_atomic(); - wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING); + clear_and_wake_up_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); rpc_wake_up(&clp->cl_rpcwaitq); } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index cc232d1f16f2..72333bcaa4c4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -271,8 +271,7 @@ nfs_page_set_headlock(struct nfs_page *req) void nfs_page_clear_headlock(struct nfs_page *req) { - smp_mb__before_atomic(); - clear_bit(PG_HEADLOCK, &req->wb_flags); + clear_bit_unlock(PG_HEADLOCK, &req->wb_flags); smp_mb__after_atomic(); if (!test_bit(PG_CONTENDED1, &req->wb_flags)) return; @@ -525,12 +524,7 @@ nfs_create_subreq(struct nfs_page *req, */ void nfs_unlock_request(struct nfs_page *req) { - if (!NFS_WBACK_BUSY(req)) { - printk(KERN_ERR "NFS: Invalid unlock attempted\n"); - BUG(); - } - smp_mb__before_atomic(); - clear_bit(PG_BUSY, &req->wb_flags); + clear_bit_unlock(PG_BUSY, &req->wb_flags); smp_mb__after_atomic(); if (!test_bit(PG_CONTENDED2, &req->wb_flags)) return; From a1e7f30a86062380ac804b50491fd24bb9dfb99f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 Jul 2021 13:00:58 -0400 Subject: [PATCH 069/433] NFSv4: Retrieve ACCESS on open if we're not using NFS4_CREATE_EXCLUSIVE NFS4_CREATE_EXCLUSIVE does not allow the caller to set an access mode, so for most Linux filesystems, the access call ends up returning no permissions. However both NFS4_CREATE_EXCLUSIVE4_1 and NFS4_CREATE_GUARDED allow the client to set the access mode. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e1214bb6b7ee..9e89198ea21f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1388,27 +1388,22 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, sizeof(p->o_arg.u.verifier.data)); } } - /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS - * will return permission denied for all bits until close */ - if (!(flags & O_EXCL)) { - /* ask server to check for all possible rights as results - * are cached */ - switch (p->o_arg.claim) { - default: - break; - case NFS4_OPEN_CLAIM_NULL: - case NFS4_OPEN_CLAIM_FH: - p->o_arg.access = NFS4_ACCESS_READ | - NFS4_ACCESS_MODIFY | - NFS4_ACCESS_EXTEND | - NFS4_ACCESS_EXECUTE; + /* ask server to check for all possible rights as results + * are cached */ + switch (p->o_arg.claim) { + default: + break; + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_FH: + p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | + NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE | + NFS4_ACCESS_EXECUTE; #ifdef CONFIG_NFS_V4_2 - if (server->caps & NFS_CAP_XATTR) - p->o_arg.access |= NFS4_ACCESS_XAREAD | - NFS4_ACCESS_XAWRITE | - NFS4_ACCESS_XALIST; + if (!(server->caps & NFS_CAP_XATTR)) + break; + p->o_arg.access |= NFS4_ACCESS_XAREAD | NFS4_ACCESS_XAWRITE | + NFS4_ACCESS_XALIST; #endif - } } p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); @@ -2472,11 +2467,15 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) /* Set the create mode (note dependency on the session type) */ data->o_arg.createmode = NFS4_CREATE_UNCHECKED; if (data->o_arg.open_flags & O_EXCL) { - data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; - if (nfs4_has_persistent_session(clp)) + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; + if (clp->cl_mvops->minor_version == 0) { + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; + /* don't put an ACCESS op in OPEN compound if O_EXCL, + * because ACCESS will return permission denied for + * all bits until close */ + data->o_res.access_request = data->o_arg.access = 0; + } else if (nfs4_has_persistent_session(clp)) data->o_arg.createmode = NFS4_CREATE_GUARDED; - else if (clp->cl_mvops->minor_version > 0) - data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; } return; unlock_no_action: From eea413308f2e6deb00f061f18081a53f3ecc8cc6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 26 Sep 2021 14:05:04 -0400 Subject: [PATCH 070/433] NFS: Default change_attr_type to NFS4_CHANGE_TYPE_IS_UNDEFINED Both NFSv3 and NFSv2 generate their change attribute from the ctime value that was supplied by the server. However the problem is that there are plenty of servers out there with ctime resolutions of 1ms or worse. In a modern performance system, this is insufficient when trying to decide which is the most recent set of attributes when, for instance, a READ or GETATTR call races with a WRITE or SETATTR. For this reason, let's revert to labelling the NFSv2/v3 change attributes as NFS4_CHANGE_TYPE_IS_UNDEFINED. This will ensure we protect against such races. Fixes: 7b24dacf0840 ("NFS: Another inode revalidation improvement") Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- fs/nfs/inode.c | 4 +++- fs/nfs/nfs3xdr.c | 2 +- fs/nfs/proc.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4f45281c47cf..0f092ccb0ca1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1777,8 +1777,10 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER | NFS_INO_INVALID_NLINK; unsigned long cache_validity = NFS_I(inode)->cache_validity; + enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type; - if (!(cache_validity & NFS_INO_INVALID_CHANGE) && + if (ctype != NFS4_CHANGE_TYPE_IS_UNDEFINED && + !(cache_validity & NFS_INO_INVALID_CHANGE) && (cache_validity & check_valid) != 0 && (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index e6eca1d7481b..9274c9c5efea 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2227,7 +2227,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, /* ignore properties */ result->lease_time = 0; - result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; + result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; return 0; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ea19dbf12301..ecc4e717808c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -91,7 +91,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, info->dtpref = fsinfo.tsize; info->maxfilesize = 0x7FFFFFFF; info->lease_time = 0; - info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; + info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; return 0; } From 488796ec1e39fb9194cc8175f770823d40fbf0ed Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Sep 2021 11:15:53 -0400 Subject: [PATCH 071/433] NFS: Don't set NFS_INO_DATA_INVAL_DEFER and NFS_INO_INVALID_DATA NFS_INO_DATA_INVAL_DEFER and NFS_INO_INVALID_DATA should be considered mutually exclusive. Fixes: 1c341b777501 ("NFS: Add deferred cache invalidation for close-to-open consistency violations") Signed-off-by: Trond Myklebust Tested-by: Benjamin Coddington Reviewed-by: Benjamin Coddington --- fs/nfs/inode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0f092ccb0ca1..dcb885b7ad73 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -210,10 +210,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) flags &= ~NFS_INO_INVALID_XATTR; if (flags & NFS_INO_INVALID_DATA) nfs_fscache_invalidate(inode); - if (inode->i_mapping->nrpages == 0) - flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER); flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); + nfsi->cache_validity |= flags; + + if (inode->i_mapping->nrpages == 0) + nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA | + NFS_INO_DATA_INVAL_DEFER); + else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER; } EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); From a6a361c4ca3cc3e6f3b39d1b6bca1de90f5f4b11 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Sep 2021 11:24:57 -0400 Subject: [PATCH 072/433] NFS: Ignore the directory size when marking for revalidation If we want to revalidate the directory, then just mark the change attribute as invalid. Fixes: 13c0b082b6a9 ("NFS: Replace use of NFS_INO_REVAL_PAGECACHE when checking cache validity") Signed-off-by: Trond Myklebust Tested-by: Benjamin Coddington Reviewed-by: Benjamin Coddington --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1ce1fa0a5926..f2df664db020 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1413,7 +1413,7 @@ out_force: static void nfs_mark_dir_for_revalidate(struct inode *inode) { spin_lock(&inode->i_lock); - nfs_set_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE); spin_unlock(&inode->i_lock); } From 2929bc3329f4c7e4df400acca2b1844492650bfd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Sep 2021 12:37:05 -0400 Subject: [PATCH 073/433] NFS: Fix up nfs_readdir_inode_mapping_valid() The check for duplicate readdir cookies should only care if the change attribute is invalid or the data cache is invalid. Signed-off-by: Trond Myklebust Tested-by: Benjamin Coddington Reviewed-by: Benjamin Coddington --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f2df664db020..fa4d33687d2b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -411,7 +411,8 @@ out_eof: static bool nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi) { - if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + if (nfsi->cache_validity & (NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_DATA)) return false; smp_rmb(); return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags); From ff81dfb5d721fff87bd516c558847f6effb70031 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Sep 2021 14:33:44 -0400 Subject: [PATCH 074/433] NFS: Further optimisations for 'ls -l' If a user is doing 'ls -l', we have a heuristic in GETATTR that tells the readdir code to try to use READDIRPLUS in order to refresh the inode attributes. In certain cirumstances, we also try to invalidate the remaining directory entries in order to ensure this refresh. If there are multiple readers of the directory, we probably should avoid invalidating the page cache, since the heuristic breaks down in that situation anyway. Signed-off-by: Trond Myklebust Tested-by: Benjamin Coddington Reviewed-by: Benjamin Coddington --- fs/nfs/dir.c | 16 +++++++++++----- include/linux/nfs_fs.h | 5 ++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index fa4d33687d2b..33cfff8ea551 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir ctx->attr_gencount = nfsi->attr_gencount; ctx->dir_cookie = 0; ctx->dup_cookie = 0; + ctx->page_index = 0; spin_lock(&dir->i_lock); if (list_empty(&nfsi->open_files) && (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) @@ -85,6 +86,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir NFS_INO_INVALID_DATA | NFS_INO_REVAL_FORCED); list_add(&ctx->list, &nfsi->open_files); + clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); spin_unlock(&dir->i_lock); return ctx; } @@ -627,8 +629,7 @@ void nfs_force_use_readdirplus(struct inode *dir) if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && !list_empty(&nfsi->open_files)) { set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); - invalidate_mapping_pages(dir->i_mapping, - nfsi->page_index + 1, -1); + set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); } } @@ -938,10 +939,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) sizeof(nfsi->cookieverf)); } res = nfs_readdir_search_array(desc); - if (res == 0) { - nfsi->page_index = desc->page_index; + if (res == 0) return 0; - } nfs_readdir_page_unlock_and_put_cached(desc); return res; } @@ -1080,6 +1079,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_dir_context *dir_ctx = file->private_data; struct nfs_readdir_descriptor *desc; + pgoff_t page_index; int res; dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", @@ -1110,10 +1110,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->dir_cookie = dir_ctx->dir_cookie; desc->dup_cookie = dir_ctx->dup_cookie; desc->duped = dir_ctx->duped; + page_index = dir_ctx->page_index; desc->attr_gencount = dir_ctx->attr_gencount; memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); spin_unlock(&file->f_lock); + if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) && + list_is_singular(&nfsi->open_files)) + invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1); + do { res = readdir_search_pagecache(desc); @@ -1150,6 +1155,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) dir_ctx->dup_cookie = desc->dup_cookie; dir_ctx->duped = desc->duped; dir_ctx->attr_gencount = desc->attr_gencount; + dir_ctx->page_index = desc->page_index; memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); spin_unlock(&file->f_lock); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9b75448ce0df..ca547cc5458c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -103,6 +103,7 @@ struct nfs_open_dir_context { __be32 verf[NFS_DIR_VERIFIER_SIZE]; __u64 dir_cookie; __u64 dup_cookie; + pgoff_t page_index; signed char duped; }; @@ -181,9 +182,6 @@ struct nfs_inode { struct rw_semaphore rmdir_sem; struct mutex commit_mutex; - /* track last access to cached pages */ - unsigned long page_index; - #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ @@ -272,6 +270,7 @@ struct nfs4_copy_state { #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ +#define NFS_INO_FORCE_READDIR (7) /* force readdirplus */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ From cec08f452a687fce9dfdf47946d00a1d12a8bec5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 29 Sep 2021 08:12:53 -0400 Subject: [PATCH 075/433] NFS: Fix dentry verifier races If the directory changed while we were revalidating the dentry, then don't update the dentry verifier. There is no value in setting the verifier to an older value, and we could end up overwriting a more up to date verifier from a parallel revalidation. Fixes: efeda80da38d ("NFSv4: Fix revalidation of dentries with delegations") Signed-off-by: Trond Myklebust Tested-by: Benjamin Coddington Reviewed-by: Benjamin Coddington --- fs/nfs/dir.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 33cfff8ea551..3fafecdb2070 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1276,13 +1276,12 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry) static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) { struct inode *inode = d_inode(dentry); + struct inode *dir = d_inode(dentry->d_parent); - if (!nfs_verifier_is_delegated(dentry) && - !nfs_verify_change_attribute(d_inode(dentry->d_parent), verf)) - goto out; + if (!nfs_verify_change_attribute(dir, verf)) + return; if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) nfs_set_verifier_delegated(&verf); -out: dentry->d_time = verf; } From b97583b26326ad559d1b1ba7dafec98712ffd834 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 2 Oct 2021 19:04:59 -0400 Subject: [PATCH 076/433] NFS: Do not flush the readdir cache in nfs_dentry_iput() The original premise in commit 83672d392f7b ("NFS: Fix directory caching problem - with test case and patch.") was that readdirplus was caching attribute information and replaying it later. This is no longer the case. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3fafecdb2070..210c5945ac2b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1727,10 +1727,6 @@ static void nfs_drop_nlink(struct inode *inode) */ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { - if (S_ISDIR(inode->i_mode)) - /* drop any readdir cache as it could easily be old */ - nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { nfs_complete_unlink(dentry, inode); nfs_drop_nlink(inode); From 36a10a3c4cb6ea7c8bd895c16b3f59e1f0db2f6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 2 Oct 2021 19:21:49 -0400 Subject: [PATCH 077/433] NFS: Remove unnecessary page cache invalidations Remove cache invalidations that are already covered by change attribute updates. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index dcb885b7ad73..3bd0ae438663 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1451,8 +1451,6 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) && (fattr->valid & NFS_ATTR_FATTR_MTIME) && timespec64_equal(&ts, &fattr->pre_mtime)) { inode->i_mtime = fattr->mtime; - if (S_ISDIR(inode->i_mode)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) @@ -2162,11 +2160,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) save_cache_validity & NFS_INO_INVALID_OTHER; if (fattr->valid & NFS_ATTR_FATTR_NLINK) { - if (inode->i_nlink != fattr->nlink) { - if (S_ISDIR(inode->i_mode)) - invalid |= NFS_INO_INVALID_DATA; + if (inode->i_nlink != fattr->nlink) set_nlink(inode, fattr->nlink); - } } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_NLINK; From a2915fa06227b056a8f9b0d79b61dca08ad5cfc6 Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Mon, 6 Sep 2021 11:59:24 +1000 Subject: [PATCH 078/433] pnfs/flexfiles: Fix misplaced barrier in nfs4_ff_layout_prepare_ds _nfs4_pnfs_v3/v4_ds_connect do some work smp_wmb ds->ds_clp = clp; And nfs4_ff_layout_prepare_ds currently does smp_rmb if(ds->ds_clp) ... This patch places the smp_rmb after the if. This ensures that following reads only happen once nfs4_ff_layout_prepare_ds has checked that data has been properly initialized. Fixes: d67ae825a59d6 ("pnfs/flexfiles: Add the FlexFile Layout Driver") Signed-off-by: Baptiste Lepers Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 4 ++-- fs/nfs/pnfs_nfs.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index c9b61b818ec1..bfa7202ca7be 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -378,10 +378,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, goto noconnect; ds = mirror->mirror_ds->ds; + if (READ_ONCE(ds->ds_clp)) + goto out; /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); - if (ds->ds_clp) - goto out; /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index cf19914fec81..02bd6e83961d 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -895,7 +895,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; @@ -973,7 +973,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; From 8dcc5721da7888685dde82be765018c8a379298c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Oct 2021 10:16:08 -0400 Subject: [PATCH 079/433] svcrdma: Split the svcrdma_wc_receive() tracepoint There are currently three separate purposes being served by a single tracepoint here. They need to be split up. svcrdma_wc_recv: - status is always zero, so there's no value in recording it. - vendor_err is meaningless unless status is not zero, so there's no value in recording it. - This tracepoint is needed only when developing modifications, so it should be left disabled most of the time. svcrdma_wc_recv_flush: - As above, needed only rarely, and not an error. svcrdma_wc_recv_err: - received is always zero, so there's no value in recording it. - This tracepoint can be left enabled because completion errors are run-time problems (except for FLUSHED_ERR). - Tracepoint name now ends in _err to reflect its purpose. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/rpcrdma.h | 75 ++++++++++++++++++++++++- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 9 ++- 2 files changed, 81 insertions(+), 3 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index de4195499592..342d6d7b5cd9 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -145,6 +145,77 @@ DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class, ), \ TP_ARGS(wc, cid)) +DECLARE_EVENT_CLASS(rpcrdma_receive_success_class, + TP_PROTO( + const struct ib_wc *wc, + const struct rpc_rdma_cid *cid + ), + + TP_ARGS(wc, cid), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(u32, received) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->received = wc->byte_len; + ), + + TP_printk("cq.id=%u cid=%d received=%u", + __entry->cq_id, __entry->completion_id, + __entry->received + ) +); + +#define DEFINE_RECEIVE_SUCCESS_EVENT(name) \ + DEFINE_EVENT(rpcrdma_receive_success_class, name, \ + TP_PROTO( \ + const struct ib_wc *wc, \ + const struct rpc_rdma_cid *cid \ + ), \ + TP_ARGS(wc, cid)) + +DECLARE_EVENT_CLASS(rpcrdma_receive_flush_class, + TP_PROTO( + const struct ib_wc *wc, + const struct rpc_rdma_cid *cid + ), + + TP_ARGS(wc, cid), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(unsigned long, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->status = wc->status; + __entry->vendor_err = wc->vendor_err; + ), + + TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x)", + __entry->cq_id, __entry->completion_id, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +#define DEFINE_RECEIVE_FLUSH_EVENT(name) \ + DEFINE_EVENT(rpcrdma_receive_flush_class, name, \ + TP_PROTO( \ + const struct ib_wc *wc, \ + const struct rpc_rdma_cid *cid \ + ), \ + TP_ARGS(wc, cid)) + DECLARE_EVENT_CLASS(xprtrdma_reply_class, TP_PROTO( const struct rpcrdma_rep *rep @@ -1892,7 +1963,9 @@ TRACE_EVENT(svcrdma_post_recv, ) ); -DEFINE_RECEIVE_COMPLETION_EVENT(svcrdma_wc_receive); +DEFINE_RECEIVE_SUCCESS_EVENT(svcrdma_wc_recv); +DEFINE_RECEIVE_FLUSH_EVENT(svcrdma_wc_recv_flush); +DEFINE_RECEIVE_FLUSH_EVENT(svcrdma_wc_recv_err); TRACE_EVENT(svcrdma_rq_post_err, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 6be23ce7a93d..cf76a6ad127b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); - trace_svcrdma_wc_receive(wc, &ctxt->rc_cid); if (wc->status != IB_WC_SUCCESS) goto flushed; + trace_svcrdma_wc_recv(wc, &ctxt->rc_cid); /* If receive posting fails, the connection is about to be * lost anyway. The server will not be able to send a reply @@ -345,7 +345,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) */ if (rdma->sc_pending_recvs < rdma->sc_max_requests) if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false)) - goto flushed; + goto dropped; /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; @@ -360,6 +360,11 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; flushed: + if (wc->status == IB_WC_WR_FLUSH_ERR) + trace_svcrdma_wc_recv_flush(wc, &ctxt->rc_cid); + else + trace_svcrdma_wc_recv_err(wc, &ctxt->rc_cid); +dropped: svc_rdma_recv_ctxt_put(rdma, ctxt); svc_xprt_deferred_close(&rdma->sc_xprt); } From eef2d8d47c33aba5f430fc5f91a17e360f99a591 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Oct 2021 10:16:14 -0400 Subject: [PATCH 080/433] svcrdma: Split the svcrdma_wc_send() tracepoint There are currently three separate purposes being served by a single tracepoint here. They need to be split up. svcrdma_wc_send: - status is always zero, so there's no value in recording it. - vendor_err is meaningless unless status is not zero, so there's no value in recording it. - This tracepoint is needed only when developing modifications, so it should be left disabled most of the time. svcrdma_wc_send_flush: - As above, needed only rarely, and not an error. svcrdma_wc_send_err: - This tracepoint can be left persistently enabled because completion errors are run-time problems (except for FLUSHED_ERR). - Tracepoint name now ends in _err to reflect its purpose. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/rpcrdma.h | 72 ++++++++++++++++++++++++++- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 14 ++++-- 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 342d6d7b5cd9..1d7c12f65f87 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -60,6 +60,74 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class, ), \ TP_ARGS(wc, cid)) +DECLARE_EVENT_CLASS(rpcrdma_send_completion_class, + TP_PROTO( + const struct ib_wc *wc, + const struct rpc_rdma_cid *cid + ), + + TP_ARGS(wc, cid), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + ), + + TP_printk("cq.id=%u cid=%d", + __entry->cq_id, __entry->completion_id + ) +); + +#define DEFINE_SEND_COMPLETION_EVENT(name) \ + DEFINE_EVENT(rpcrdma_send_completion_class, name, \ + TP_PROTO( \ + const struct ib_wc *wc, \ + const struct rpc_rdma_cid *cid \ + ), \ + TP_ARGS(wc, cid)) + +DECLARE_EVENT_CLASS(rpcrdma_send_flush_class, + TP_PROTO( + const struct ib_wc *wc, + const struct rpc_rdma_cid *cid + ), + + TP_ARGS(wc, cid), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(unsigned long, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->status = wc->status; + __entry->vendor_err = wc->vendor_err; + ), + + TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x)", + __entry->cq_id, __entry->completion_id, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +#define DEFINE_SEND_FLUSH_EVENT(name) \ + DEFINE_EVENT(rpcrdma_send_flush_class, name, \ + TP_PROTO( \ + const struct ib_wc *wc, \ + const struct rpc_rdma_cid *cid \ + ), \ + TP_ARGS(wc, cid)) + DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class, TP_PROTO( const struct ib_wc *wc, @@ -1939,7 +2007,9 @@ TRACE_EVENT(svcrdma_post_send, ) ); -DEFINE_COMPLETION_EVENT(svcrdma_wc_send); +DEFINE_SEND_COMPLETION_EVENT(svcrdma_wc_send); +DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_send_flush); +DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_send_err); TRACE_EVENT(svcrdma_post_recv, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 599021b2391d..22a871e6fe4d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -280,13 +280,21 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) struct svc_rdma_send_ctxt *ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); - trace_svcrdma_wc_send(wc, &ctxt->sc_cid); - svc_rdma_wake_send_waiters(rdma, 1); complete(&ctxt->sc_done); if (unlikely(wc->status != IB_WC_SUCCESS)) - svc_xprt_deferred_close(&rdma->sc_xprt); + goto flushed; + + trace_svcrdma_wc_send(wc, &ctxt->sc_cid); + return; + +flushed: + if (wc->status != IB_WC_WR_FLUSH_ERR) + trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid); + else + trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid); + svc_xprt_deferred_close(&rdma->sc_xprt); } /** From 45f135846815ef787b41767ad3823194de5ccfdf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Oct 2021 10:16:20 -0400 Subject: [PATCH 081/433] svcrdma: Split svcrmda_wc_{read,write} tracepoints There are currently three separate purposes being served by single tracepoints. Split them up, as was done with wc_send. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/rpcrdma.h | 38 +++++++++++++++++++++++++++++-- net/sunrpc/xprtrdma/svc_rdma_rw.c | 30 ++++++++++++++++++++---- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 1d7c12f65f87..b5a1388e51a4 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2099,8 +2099,42 @@ DEFINE_POST_CHUNK_EVENT(read); DEFINE_POST_CHUNK_EVENT(write); DEFINE_POST_CHUNK_EVENT(reply); -DEFINE_COMPLETION_EVENT(svcrdma_wc_read); -DEFINE_COMPLETION_EVENT(svcrdma_wc_write); +TRACE_EVENT(svcrdma_wc_read, + TP_PROTO( + const struct ib_wc *wc, + const struct rpc_rdma_cid *cid, + unsigned int totalbytes, + const ktime_t posttime + ), + + TP_ARGS(wc, cid, totalbytes, posttime), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(s64, read_latency) + __field(unsigned int, totalbytes) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->totalbytes = totalbytes; + __entry->read_latency = ktime_us_delta(ktime_get(), posttime); + ), + + TP_printk("cq.id=%u cid=%d totalbytes=%u latency-us=%lld", + __entry->cq_id, __entry->completion_id, + __entry->totalbytes, __entry->read_latency + ) +); + +DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_read_flush); +DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_read_err); + +DEFINE_SEND_COMPLETION_EVENT(svcrdma_wc_write); +DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_flush); +DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_err); TRACE_EVENT(svcrdma_qp_error, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index e27433f08ca7..5f0155fdefc7 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -155,6 +155,7 @@ struct svc_rdma_chunk_ctxt { struct ib_cqe cc_cqe; struct svcxprt_rdma *cc_rdma; struct list_head cc_rwctxts; + ktime_t cc_posttime; int cc_sqecount; enum ib_wc_status cc_status; struct completion cc_done; @@ -267,7 +268,16 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) struct svc_rdma_write_info *info = container_of(cc, struct svc_rdma_write_info, wi_cc); - trace_svcrdma_wc_write(wc, &cc->cc_cid); + switch (wc->status) { + case IB_WC_SUCCESS: + trace_svcrdma_wc_write(wc, &cc->cc_cid); + break; + case IB_WC_WR_FLUSH_ERR: + trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); + break; + default: + trace_svcrdma_wc_write_err(wc, &cc->cc_cid); + } svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); @@ -320,11 +330,22 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); - struct svcxprt_rdma *rdma = cc->cc_rdma; + struct svc_rdma_read_info *info; - trace_svcrdma_wc_read(wc, &cc->cc_cid); + switch (wc->status) { + case IB_WC_SUCCESS: + info = container_of(cc, struct svc_rdma_read_info, ri_cc); + trace_svcrdma_wc_read(wc, &cc->cc_cid, info->ri_totalbytes, + cc->cc_posttime); + break; + case IB_WC_WR_FLUSH_ERR: + trace_svcrdma_wc_read_flush(wc, &cc->cc_cid); + break; + default: + trace_svcrdma_wc_read_err(wc, &cc->cc_cid); + } - svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); + svc_rdma_wake_send_waiters(cc->cc_rdma, cc->cc_sqecount); cc->cc_status = wc->status; complete(&cc->cc_done); return; @@ -363,6 +384,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) do { if (atomic_sub_return(cc->cc_sqecount, &rdma->sc_sq_avail) > 0) { + cc->cc_posttime = ktime_get(); ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); if (ret) break; From 22a027e8c03f9a7794d16daa3b4b117ac6d340c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Oct 2021 10:16:26 -0400 Subject: [PATCH 082/433] SUNRPC: Add trace event when alloc_pages_bulk() makes no progress This is an operational low memory situation that needs to be flagged. The new tracepoint records a timestamp and the nfsd thread that failed to allocate pages. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 18 ++++++++++++++++++ net/sunrpc/svc_xprt.c | 1 + 2 files changed, 19 insertions(+) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2d04eb96d418..fb016308c185 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1859,6 +1859,24 @@ TRACE_EVENT(svc_wake_up, TP_printk("pid=%d", __entry->pid) ); +TRACE_EVENT(svc_alloc_arg_err, + TP_PROTO( + unsigned int pages + ), + + TP_ARGS(pages), + + TP_STRUCT__entry( + __field(unsigned int, pages) + ), + + TP_fast_assign( + __entry->pages = pages; + ), + + TP_printk("pages=%u", __entry->pages) +); + TRACE_EVENT(svc_handle_xprt, TP_PROTO(struct svc_xprt *xprt, int len), diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 6316bd2b8f37..1e99ba1b9d72 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -687,6 +687,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) set_current_state(TASK_RUNNING); return -EINTR; } + trace_svc_alloc_arg_err(pages); schedule_timeout(msecs_to_jiffies(500)); } rqstp->rq_page_end = &rqstp->rq_pages[pages]; From 35940a58f9f1db96e5688e426d713f330ead70b8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Oct 2021 10:16:32 -0400 Subject: [PATCH 083/433] SUNRPC: Capture value of xdr_buf::page_base This value is usually zero, but will be non-zero more often in the future. Knowing its value can be important diagnostic information. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index fb016308c185..9ea59959a2fe 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -62,6 +62,7 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class, __field(size_t, head_len) __field(const void *, tail_base) __field(size_t, tail_len) + __field(unsigned int, page_base) __field(unsigned int, page_len) __field(unsigned int, msg_len) ), @@ -74,14 +75,17 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class, __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; __entry->tail_len = xdr->tail[0].iov_len; + __entry->page_base = xdr->page_base; __entry->page_len = xdr->page_len; __entry->msg_len = xdr->len; ), - TP_printk("task:%u@%u head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + TP_printk("task:%u@%u head=[%p,%zu] page=%u(%u) tail=[%p,%zu] len=%u", __entry->task_id, __entry->client_id, - __entry->head_base, __entry->head_len, __entry->page_len, - __entry->tail_base, __entry->tail_len, __entry->msg_len + __entry->head_base, __entry->head_len, + __entry->page_len, __entry->page_base, + __entry->tail_base, __entry->tail_len, + __entry->msg_len ) ); @@ -1496,6 +1500,7 @@ DECLARE_EVENT_CLASS(svc_xdr_buf_class, __field(size_t, head_len) __field(const void *, tail_base) __field(size_t, tail_len) + __field(unsigned int, page_base) __field(unsigned int, page_len) __field(unsigned int, msg_len) ), @@ -1506,14 +1511,17 @@ DECLARE_EVENT_CLASS(svc_xdr_buf_class, __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; __entry->tail_len = xdr->tail[0].iov_len; + __entry->page_base = xdr->page_base; __entry->page_len = xdr->page_len; __entry->msg_len = xdr->len; ), - TP_printk("xid=0x%08x head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + TP_printk("xid=0x%08x head=[%p,%zu] page=%u(%u) tail=[%p,%zu] len=%u", __entry->xid, - __entry->head_base, __entry->head_len, __entry->page_len, - __entry->tail_base, __entry->tail_len, __entry->msg_len + __entry->head_base, __entry->head_len, + __entry->page_len, __entry->page_base, + __entry->tail_base, __entry->tail_len, + __entry->msg_len ) ); From 1f59342be6c075a9520679981c4cfd08bb26c659 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Oct 2021 13:27:26 -0700 Subject: [PATCH 084/433] Input: analog - fix invalid snprintf() call Overlapping input and output arguments to snprintf() are undefined behavior in C99: drivers/input/joystick/analog.c: In function 'analog_name': drivers/input/joystick/analog.c:428:3: error: 'snprintf' argument 4 overlaps destination object 'analog' [-Werror=restrict] 428 | snprintf(analog->name, sizeof(analog->name), "%s %d-hat", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 429 | analog->name, hweight16(analog->mask & ANALOG_HATS_ALL)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/input/joystick/analog.c:420:40: note: destination object referenced by 'restrict'-qualified argument 1 was declared here 420 | static void analog_name(struct analog *analog) | ~~~~~~~~~~~~~~~^~~~~~ Change this function to use the simpler seq_buf interface instead. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210927101416.1569609-1-arnd@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/analog.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index f798922a4598..d9f866ffde6b 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -419,23 +420,24 @@ static void analog_calibrate_timer(struct analog_port *port) static void analog_name(struct analog *analog) { - snprintf(analog->name, sizeof(analog->name), "Analog %d-axis %d-button", + struct seq_buf s; + + seq_buf_init(&s, analog->name, sizeof(analog->name)); + seq_buf_printf(&s, "Analog %d-axis %d-button", hweight8(analog->mask & ANALOG_AXES_STD), hweight8(analog->mask & ANALOG_BTNS_STD) + !!(analog->mask & ANALOG_BTNS_CHF) * 2 + hweight16(analog->mask & ANALOG_BTNS_GAMEPAD) + !!(analog->mask & ANALOG_HBTN_CHF) * 4); if (analog->mask & ANALOG_HATS_ALL) - snprintf(analog->name, sizeof(analog->name), "%s %d-hat", - analog->name, hweight16(analog->mask & ANALOG_HATS_ALL)); + seq_buf_printf(&s, " %d-hat", + hweight16(analog->mask & ANALOG_HATS_ALL)); if (analog->mask & ANALOG_HAT_FCS) - strlcat(analog->name, " FCS", sizeof(analog->name)); + seq_buf_printf(&s, " FCS"); if (analog->mask & ANALOG_ANY_CHF) - strlcat(analog->name, (analog->mask & ANALOG_SAITEK) ? " Saitek" : " CHF", - sizeof(analog->name)); + seq_buf_printf(&s, (analog->mask & ANALOG_SAITEK) ? " Saitek" : " CHF"); - strlcat(analog->name, (analog->mask & ANALOG_GAMEPAD) ? " gamepad": " joystick", - sizeof(analog->name)); + seq_buf_printf(&s, (analog->mask & ANALOG_GAMEPAD) ? " gamepad" : " joystick"); } /* From a41392e0877a271007e9209e63c34cab7527eb43 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 5 Oct 2021 13:33:25 -0700 Subject: [PATCH 085/433] MAINTAINERS: rectify entry for CHIPONE ICN8318 I2C TOUCHSCREEN DRIVER Commit 04647773d648 ("dt-bindings: input: Convert ChipOne ICN8318 binding to a schema") converts chipone_icn8318.txt to chipone,icn8318.yaml, but missed to adjust its reference in MAINTAINERS. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a broken reference. Repair this file reference in CHIPONE ICN8318 I2C TOUCHSCREEN DRIVER. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20211005075451.29691-12-lukas.bulwahn@gmail.com Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7ab4a058fe4d..5122b3a3bf56 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4301,7 +4301,7 @@ CHIPONE ICN8318 I2C TOUCHSCREEN DRIVER M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt +F: Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml F: drivers/input/touchscreen/chipone_icn8318.c CHIPONE ICN8505 I2C TOUCHSCREEN DRIVER From 8e09650f5ec68858f4b8b67cdef9e2ece9b208f3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Oct 2021 10:09:57 -0400 Subject: [PATCH 086/433] NFS: Remove unnecessary TRACE_DEFINE_ENUM()s Clean up: TRACE_DEFINE_ENUM is unnecessary because the target symbols are all C macros, not enums. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfstrace.h | 68 ----------------------------------------------- 1 file changed, 68 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 8a224871be74..589f32fdbe63 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,16 +11,6 @@ #include #include -TRACE_DEFINE_ENUM(DT_UNKNOWN); -TRACE_DEFINE_ENUM(DT_FIFO); -TRACE_DEFINE_ENUM(DT_CHR); -TRACE_DEFINE_ENUM(DT_DIR); -TRACE_DEFINE_ENUM(DT_BLK); -TRACE_DEFINE_ENUM(DT_REG); -TRACE_DEFINE_ENUM(DT_LNK); -TRACE_DEFINE_ENUM(DT_SOCK); -TRACE_DEFINE_ENUM(DT_WHT); - #define nfs_show_file_type(ftype) \ __print_symbolic(ftype, \ { DT_UNKNOWN, "UNKNOWN" }, \ @@ -33,24 +23,6 @@ TRACE_DEFINE_ENUM(DT_WHT); { DT_SOCK, "SOCK" }, \ { DT_WHT, "WHT" }) -TRACE_DEFINE_ENUM(NFS_INO_INVALID_DATA); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_ATIME); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACCESS); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACL); -TRACE_DEFINE_ENUM(NFS_INO_REVAL_PAGECACHE); -TRACE_DEFINE_ENUM(NFS_INO_REVAL_FORCED); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_LABEL); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_CHANGE); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); -TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE); - #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \ @@ -71,17 +43,6 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE); { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \ { NFS_INO_INVALID_MODE, "INVALID_MODE" }) -TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); -TRACE_DEFINE_ENUM(NFS_INO_STALE); -TRACE_DEFINE_ENUM(NFS_INO_ACL_LRU_SET); -TRACE_DEFINE_ENUM(NFS_INO_INVALIDATING); -TRACE_DEFINE_ENUM(NFS_INO_FSCACHE); -TRACE_DEFINE_ENUM(NFS_INO_FSCACHE_LOCK); -TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMIT); -TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMITTING); -TRACE_DEFINE_ENUM(NFS_INO_LAYOUTSTATS); -TRACE_DEFINE_ENUM(NFS_INO_ODIRECT); - #define nfs_show_nfsi_flags(v) \ __print_flags(v, "|", \ { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \ @@ -270,19 +231,6 @@ TRACE_EVENT(nfs_access_exit, ) ); -TRACE_DEFINE_ENUM(LOOKUP_FOLLOW); -TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY); -TRACE_DEFINE_ENUM(LOOKUP_AUTOMOUNT); -TRACE_DEFINE_ENUM(LOOKUP_PARENT); -TRACE_DEFINE_ENUM(LOOKUP_REVAL); -TRACE_DEFINE_ENUM(LOOKUP_RCU); -TRACE_DEFINE_ENUM(LOOKUP_OPEN); -TRACE_DEFINE_ENUM(LOOKUP_CREATE); -TRACE_DEFINE_ENUM(LOOKUP_EXCL); -TRACE_DEFINE_ENUM(LOOKUP_RENAME_TARGET); -TRACE_DEFINE_ENUM(LOOKUP_EMPTY); -TRACE_DEFINE_ENUM(LOOKUP_DOWN); - #define show_lookup_flags(flags) \ __print_flags(flags, "|", \ { LOOKUP_FOLLOW, "FOLLOW" }, \ @@ -392,22 +340,6 @@ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); -TRACE_DEFINE_ENUM(O_WRONLY); -TRACE_DEFINE_ENUM(O_RDWR); -TRACE_DEFINE_ENUM(O_CREAT); -TRACE_DEFINE_ENUM(O_EXCL); -TRACE_DEFINE_ENUM(O_NOCTTY); -TRACE_DEFINE_ENUM(O_TRUNC); -TRACE_DEFINE_ENUM(O_APPEND); -TRACE_DEFINE_ENUM(O_NONBLOCK); -TRACE_DEFINE_ENUM(O_DSYNC); -TRACE_DEFINE_ENUM(O_DIRECT); -TRACE_DEFINE_ENUM(O_LARGEFILE); -TRACE_DEFINE_ENUM(O_DIRECTORY); -TRACE_DEFINE_ENUM(O_NOFOLLOW); -TRACE_DEFINE_ENUM(O_NOATIME); -TRACE_DEFINE_ENUM(O_CLOEXEC); - #define show_open_flags(flags) \ __print_flags(flags, "|", \ { O_WRONLY, "O_WRONLY" }, \ From 0392dd51f9c78d46109a408f27dc820300dcd8bd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Oct 2021 10:10:10 -0400 Subject: [PATCH 087/433] SUNRPC: Per-rpc_clnt task PIDs The current range of RPC task PIDs is 0..65535. That's not adequate for distinguishing tasks across multiple rpc_clnts running high throughput workloads. To help relieve this situation and to reduce the bottleneck of having a single atomic for assigning all RPC task PIDs, assign task PIDs per rpc_clnt. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/sched.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a4661646adc9..267b7aeaf1a6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -40,6 +40,7 @@ struct rpc_clnt { unsigned int cl_clid; /* client id */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ + atomic_t cl_pid; /* task PID counter */ spinlock_t cl_lock; /* spinlock */ struct rpc_xprt __rcu * cl_xprt; /* transport */ const struct rpc_procinfo *cl_procinfo; /* procedure info */ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b21457cec8a5..f4f311ea7a66 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -277,9 +277,17 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static void rpc_task_set_debuginfo(struct rpc_task *task) { - static atomic_t rpc_pid; + struct rpc_clnt *clnt = task->tk_client; - task->tk_pid = atomic_inc_return(&rpc_pid); + /* Might be a task carrying a reverse-direction operation */ + if (!clnt) { + static atomic_t rpc_pid; + + task->tk_pid = atomic_inc_return(&rpc_pid); + return; + } + + task->tk_pid = atomic_inc_return(&clnt->cl_pid); } #else static inline void rpc_task_set_debuginfo(struct rpc_task *task) From 110cb2d2f9326030f13b7ec85d6d482934ea5462 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Oct 2021 10:10:16 -0400 Subject: [PATCH 088/433] NFS: Instrument i_size_write() Generate a trace event whenever the NFS client modifies the size of a file. These new events aid troubleshooting workloads that trigger races around size updates. There are four new trace points, all named nfs_size_something so they are easy to grep for or enable as a group with a single glob. Size updated on the server: kworker/u24:10-194 [010] 369.939174: nfs_size_update: fileid=00:28:2 fhandle=0x36fbbe51 version=1752899344277980615 cursize=250471 newsize=172083 Server-side size update reported via NFSv3 WCC attributes: fsx-1387 [006] 380.760686: nfs_size_wcc: fileid=00:28:2 fhandle=0x36fbbe51 version=1752899355909932456 cursize=146792 newsize=171216 File has been truncated locally: fsx-1387 [007] 369.437421: nfs_size_truncate: fileid=00:28:2 fhandle=0x36fbbe51 version=1752899231200117272 cursize=215244 newsize=0 File has been extended locally: fsx-1387 [007] 369.439213: nfs_size_grow: fileid=00:28:2 fhandle=0x36fbbe51 version=1752899343704248410 cursize=258048 newsize=262144 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 9 +++------ fs/nfs/nfstrace.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 1 + 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3bd0ae438663..a10572f278e6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -671,6 +671,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) if (err) goto out; + trace_nfs_size_truncate(inode, offset); i_size_write(inode, offset); /* Optimisation */ if (offset == 0) @@ -1456,6 +1457,7 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && !nfs_have_writebacks(inode)) { + trace_nfs_size_wcc(inode, fattr->size); i_size_write(inode, nfs_size_to_loff_t(fattr->size)); } } @@ -2100,16 +2102,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { + trace_nfs_size_update(inode, new_isize); i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_DATA; } - dprintk("NFS: isize change on server for file %s/%ld " - "(%Ld to %Ld)\n", - inode->i_sb->s_id, - inode->i_ino, - (long long)cur_isize, - (long long)new_isize); } if (new_isize == 0 && !(fattr->valid & (NFS_ATTR_FATTR_SPACE_USED | diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 589f32fdbe63..44fd016a8e65 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -231,6 +231,56 @@ TRACE_EVENT(nfs_access_exit, ) ); +DECLARE_EVENT_CLASS(nfs_update_size_class, + TP_PROTO( + const struct inode *inode, + loff_t new_size + ), + + TP_ARGS(inode, new_size), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, cur_size) + __field(loff_t, new_size) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fileid = nfsi->fileid; + __entry->version = inode_peek_iversion_raw(inode); + __entry->cur_size = i_size_read(inode); + __entry->new_size = new_size; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu cursize=%lld newsize=%lld", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->cur_size, __entry->new_size + ) +); + +#define DEFINE_NFS_UPDATE_SIZE_EVENT(name) \ + DEFINE_EVENT(nfs_update_size_class, nfs_size_##name, \ + TP_PROTO( \ + const struct inode *inode, \ + loff_t new_size \ + ), \ + TP_ARGS(inode, new_size)) + +DEFINE_NFS_UPDATE_SIZE_EVENT(truncate); +DEFINE_NFS_UPDATE_SIZE_EVENT(wcc); +DEFINE_NFS_UPDATE_SIZE_EVENT(update); +DEFINE_NFS_UPDATE_SIZE_EVENT(grow); + #define show_lookup_flags(flags) \ __print_flags(flags, "|", \ { LOOKUP_FOLLOW, "FOLLOW" }, \ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 773ea2c8504d..b89d5ef3af0e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -288,6 +288,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c end = page_file_offset(page) + ((loff_t)offset+count); if (i_size >= end) goto out; + trace_nfs_size_grow(inode, end); i_size_write(inode, end); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); From 64a93dbf25d3a1368bb58ddf0f61d0a92d7479e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Oct 2021 15:44:16 -0400 Subject: [PATCH 089/433] NFS: Fix deadlocks in nfs_scan_commit_list() Partially revert commit 2ce209c42c01 ("NFS: Wait for requests that are locked on the commit list"), since it can lead to deadlocks between commit requests and nfs_join_page_group(). For now we should assume that any locked requests on the commit list are either about to be removed and committed by another task, or the writes they describe are about to be retransmitted. In either case, we should not need to worry. Fixes: 2ce209c42c01 ("NFS: Wait for requests that are locked on the commit list") Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b89d5ef3af0e..38f181e1343a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1039,25 +1039,11 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_page *req, *tmp; int ret = 0; -restart: list_for_each_entry_safe(req, tmp, src, wb_list) { kref_get(&req->wb_kref); if (!nfs_lock_request(req)) { - int status; - - /* Prevent deadlock with nfs_lock_and_join_requests */ - if (!list_empty(dst)) { - nfs_release_request(req); - continue; - } - /* Ensure we make progress to prevent livelock */ - mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); - status = nfs_wait_on_request(req); nfs_release_request(req); - mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); - if (status < 0) - break; - goto restart; + continue; } nfs_request_remove_commit_list(req, cinfo); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); @@ -1952,6 +1938,7 @@ static int __nfs_commit_inode(struct inode *inode, int how, int may_wait = how & FLUSH_SYNC; int ret, nscan; + how &= ~FLUSH_SYNC; nfs_init_cinfo_from_inode(&cinfo, inode); nfs_commit_begin(cinfo.mds); for (;;) { From 2667f6b7af99e81958fa97c03bb519fcb09d0055 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 6 Oct 2021 11:06:03 -0700 Subject: [PATCH 090/433] Input: st1232 - increase "wait ready" timeout I have a ST1633 touch controller which fails to probe due to a timeout waiting for the controller to become ready. Increasing the minimum delay to 100ms ensures that the probe sequence completes successfully. The ST1633 datasheet says nothing about the maximum delay here and the ST1232 I2C protocol document says "wait until" with no notion of a timeout. Since this only runs once during probe, being generous with the timout seems reasonable and most likely the device will become ready eventually. (It may be worth noting that I saw this issue with a PREEMPT_RT patched kernel which probably has tighter wakeups from usleep_range() than other preemption models.) Fixes: f605be6a57b4 ("Input: st1232 - wait until device is ready before reading resolution") Signed-off-by: John Keeping Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210929152609.2421483-1-john@metanate.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/st1232.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index 6abae665ca71..9d1dea6996a2 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -92,7 +92,7 @@ static int st1232_ts_wait_ready(struct st1232_ts_data *ts) unsigned int retries; int error; - for (retries = 10; retries; retries--) { + for (retries = 100; retries; retries--) { error = st1232_ts_read_data(ts, REG_STATUS, 1); if (!error) { switch (ts->read_buf[0]) { From b415ed4f49b90655659479fa7c5ddaffe88e41b9 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 11 Oct 2021 18:31:40 -0700 Subject: [PATCH 091/433] Input: st1232 - prefer asynchronous probing The device may take up to 100ms to become responsive during probe, so prefer asynchronous probing to avoid delaying the rest of the system. Suggested-by: Dmitry Torokhov Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20211007111217.1935858-1-john@metanate.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/st1232.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index 9d1dea6996a2..e38ba3e4f183 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -389,6 +389,7 @@ static struct i2c_driver st1232_ts_driver = { .driver = { .name = ST1232_TS_NAME, .of_match_table = st1232_ts_dt_ids, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, .pm = &st1232_ts_pm_ops, }, }; From 0ae93b99beb283438aa571a6add4eab0c077d576 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Oct 2021 16:17:24 -0400 Subject: [PATCH 092/433] SUNRPC: Simplify the SVC dispatch code path Micro-optimization: The last user of the generic SVC dispatch code path has been removed, so svc_process_common() can be simplified. This declutters the hot path so that the by-far most common case (a dispatch function exists) is made the /only/ path. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 5 +--- net/sunrpc/svc.c | 51 ++------------------------------------ 2 files changed, 3 insertions(+), 53 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6263410c948a..4205a6ef4770 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -443,10 +443,7 @@ struct svc_version { /* Need xprt with congestion control */ bool vs_need_cong_ctrl; - /* Override dispatch function (e.g. when caching replies). - * A return value of 0 means drop the request. - * vs_dispatch == NULL means use default dispatcher. - */ + /* Dispatch function */ int (*vs_dispatch)(struct svc_rqst *, __be32 *); }; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 08ca797bb8a4..e0dd6e6a4602 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1186,45 +1186,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif -static int -svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp) -{ - struct kvec *argv = &rqstp->rq_arg.head[0]; - struct kvec *resv = &rqstp->rq_res.head[0]; - const struct svc_procedure *procp = rqstp->rq_procinfo; - - /* - * Decode arguments - * XXX: why do we ignore the return value? - */ - if (procp->pc_decode && - !procp->pc_decode(rqstp, argv->iov_base)) { - *statp = rpc_garbage_args; - return 1; - } - - *statp = procp->pc_func(rqstp); - - if (*statp == rpc_drop_reply || - test_bit(RQ_DROPME, &rqstp->rq_flags)) - return 0; - - if (rqstp->rq_auth_stat != rpc_auth_ok) - return 1; - - if (*statp != rpc_success) - return 1; - - /* Encode reply */ - if (procp->pc_encode && - !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) { - dprintk("svc: failed to encode reply\n"); - /* serv->sv_stats->rpcsystemerr++; */ - *statp = rpc_system_err; - } - return 1; -} - __be32 svc_generic_init_request(struct svc_rqst *rqstp, const struct svc_program *progp, @@ -1392,16 +1353,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_reserve_auth(rqstp, procp->pc_xdrressize<<2); /* Call the function that processes the request. */ - if (!process.dispatch) { - if (!svc_generic_dispatch(rqstp, statp)) - goto release_dropit; - if (*statp == rpc_garbage_args) - goto err_garbage; - } else { - dprintk("svc: calling dispatcher\n"); - if (!process.dispatch(rqstp, statp)) - goto release_dropit; /* Release reply info */ - } + if (!process.dispatch(rqstp, statp)) + goto release_dropit; if (rqstp->rq_auth_stat != rpc_auth_ok) goto err_release_bad_auth; From 5b747a594b19708e3933eb3ecf447739e930790e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Oct 2021 16:17:31 -0400 Subject: [PATCH 093/433] SUNRPC: De-duplicate .pc_release() call sites There was some spaghetti in svc_process_common() that had evolved over time such that there was still one case that needed a call to .pc_release() but never made it. That issue was removed in the previous patch. As additional insurance against missing this important callout, ensure that the .pc_release() method is always called, no matter what the reply_stat is. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svc.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e0dd6e6a4602..4292278a9552 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1252,7 +1252,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) __be32 *statp; u32 prog, vers; __be32 rpc_stat; - int auth_res; + int auth_res, rc; __be32 *reply_statp; rpc_stat = rpc_success; @@ -1353,20 +1353,18 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_reserve_auth(rqstp, procp->pc_xdrressize<<2); /* Call the function that processes the request. */ - if (!process.dispatch(rqstp, statp)) - goto release_dropit; - + rc = process.dispatch(rqstp, statp); + if (procp->pc_release) + procp->pc_release(rqstp); + if (!rc) + goto dropit; if (rqstp->rq_auth_stat != rpc_auth_ok) - goto err_release_bad_auth; + goto err_bad_auth; /* Check RPC status result */ if (*statp != rpc_success) resv->iov_len = ((void*)statp) - resv->iov_base + 4; - /* Release reply info */ - if (procp->pc_release) - procp->pc_release(rqstp); - if (procp->pc_encode == NULL) goto dropit; @@ -1375,9 +1373,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto close_xprt; return 1; /* Caller can now send it */ -release_dropit: - if (procp->pc_release) - procp->pc_release(rqstp); dropit: svc_authorise(rqstp); /* doesn't hurt to call this twice */ dprintk("svc: svc_process dropit\n"); @@ -1404,9 +1399,6 @@ err_bad_rpc: svc_putnl(resv, 2); goto sendit; -err_release_bad_auth: - if (procp->pc_release) - procp->pc_release(rqstp); err_bad_auth: dprintk("svc: authentication failed (%d)\n", be32_to_cpu(rqstp->rq_auth_stat)); From af98ff045f1e0bed4caa28741266430af9724eb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 12 Oct 2021 19:37:44 -0700 Subject: [PATCH 094/433] Input: adxl34x - make adxl34x_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now adxl34x_remove() returns zero unconditionally. Make it return void instead which makes it easier to see in the callers that there is no error to handle. Also the return value of i2c and spi remove callbacks is ignored anyway. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20211012153945.2651412-6-u.kleine-koenig@pengutronix.de Signed-off-by: Dmitry Torokhov --- drivers/input/misc/adxl34x-i2c.c | 4 +++- drivers/input/misc/adxl34x-spi.c | 4 +++- drivers/input/misc/adxl34x.c | 4 +--- drivers/input/misc/adxl34x.h | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c index e64368a63346..a3b5f88d2bd1 100644 --- a/drivers/input/misc/adxl34x-i2c.c +++ b/drivers/input/misc/adxl34x-i2c.c @@ -103,7 +103,9 @@ static int adxl34x_i2c_remove(struct i2c_client *client) { struct adxl34x *ac = i2c_get_clientdata(client); - return adxl34x_remove(ac); + adxl34x_remove(ac); + + return 0; } static int __maybe_unused adxl34x_i2c_suspend(struct device *dev) diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c index df6afa455e46..6e51c9bc619f 100644 --- a/drivers/input/misc/adxl34x-spi.c +++ b/drivers/input/misc/adxl34x-spi.c @@ -91,7 +91,9 @@ static int adxl34x_spi_remove(struct spi_device *spi) { struct adxl34x *ac = spi_get_drvdata(spi); - return adxl34x_remove(ac); + adxl34x_remove(ac); + + return 0; } static int __maybe_unused adxl34x_spi_suspend(struct device *dev) diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c index 4cc4e8ff42b3..34beac80e6f0 100644 --- a/drivers/input/misc/adxl34x.c +++ b/drivers/input/misc/adxl34x.c @@ -896,15 +896,13 @@ struct adxl34x *adxl34x_probe(struct device *dev, int irq, } EXPORT_SYMBOL_GPL(adxl34x_probe); -int adxl34x_remove(struct adxl34x *ac) +void adxl34x_remove(struct adxl34x *ac) { sysfs_remove_group(&ac->dev->kobj, &adxl34x_attr_group); free_irq(ac->irq, ac); input_unregister_device(ac->input); dev_dbg(ac->dev, "unregistered accelerometer\n"); kfree(ac); - - return 0; } EXPORT_SYMBOL_GPL(adxl34x_remove); diff --git a/drivers/input/misc/adxl34x.h b/drivers/input/misc/adxl34x.h index 83a0eeccf613..febf85270fff 100644 --- a/drivers/input/misc/adxl34x.h +++ b/drivers/input/misc/adxl34x.h @@ -25,6 +25,6 @@ void adxl34x_resume(struct adxl34x *ac); struct adxl34x *adxl34x_probe(struct device *dev, int irq, bool fifo_delay_default, const struct adxl34x_bus_ops *bops); -int adxl34x_remove(struct adxl34x *ac); +void adxl34x_remove(struct adxl34x *ac); #endif From 39e4e75a9f1cc4c448e965c14962a62c803922e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 12 Oct 2021 19:38:36 -0700 Subject: [PATCH 095/433] Input: tsc200x - make tsc200x_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now tsc200x_remove() returns zero unconditionally. Make it return void instead which makes it easier to see in the callers that there is no error to handle. Also the return value of i2c and spi remove callbacks is ignored anyway. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20211012153945.2651412-7-u.kleine-koenig@pengutronix.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/tsc2004.c | 4 +++- drivers/input/touchscreen/tsc2005.c | 4 +++- drivers/input/touchscreen/tsc200x-core.c | 4 +--- drivers/input/touchscreen/tsc200x-core.h | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/input/touchscreen/tsc2004.c b/drivers/input/touchscreen/tsc2004.c index 0272cedcc726..9fdd870c4c0b 100644 --- a/drivers/input/touchscreen/tsc2004.c +++ b/drivers/input/touchscreen/tsc2004.c @@ -45,7 +45,9 @@ static int tsc2004_probe(struct i2c_client *i2c, static int tsc2004_remove(struct i2c_client *i2c) { - return tsc200x_remove(&i2c->dev); + tsc200x_remove(&i2c->dev); + + return 0; } static const struct i2c_device_id tsc2004_idtable[] = { diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c index 923496bbb368..a2f55920b9b2 100644 --- a/drivers/input/touchscreen/tsc2005.c +++ b/drivers/input/touchscreen/tsc2005.c @@ -66,7 +66,9 @@ static int tsc2005_probe(struct spi_device *spi) static int tsc2005_remove(struct spi_device *spi) { - return tsc200x_remove(&spi->dev); + tsc200x_remove(&spi->dev); + + return 0; } #ifdef CONFIG_OF diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c index b8d720d52013..27810f6c69f6 100644 --- a/drivers/input/touchscreen/tsc200x-core.c +++ b/drivers/input/touchscreen/tsc200x-core.c @@ -577,15 +577,13 @@ disable_regulator: } EXPORT_SYMBOL_GPL(tsc200x_probe); -int tsc200x_remove(struct device *dev) +void tsc200x_remove(struct device *dev) { struct tsc200x *ts = dev_get_drvdata(dev); sysfs_remove_group(&dev->kobj, &tsc200x_attr_group); regulator_disable(ts->vio); - - return 0; } EXPORT_SYMBOL_GPL(tsc200x_remove); diff --git a/drivers/input/touchscreen/tsc200x-core.h b/drivers/input/touchscreen/tsc200x-core.h index a43c08ccfd3d..4ded34425b21 100644 --- a/drivers/input/touchscreen/tsc200x-core.h +++ b/drivers/input/touchscreen/tsc200x-core.h @@ -74,6 +74,6 @@ extern const struct dev_pm_ops tsc200x_pm_ops; int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id, struct regmap *regmap, int (*tsc200x_cmd)(struct device *dev, u8 cmd)); -int tsc200x_remove(struct device *dev); +void tsc200x_remove(struct device *dev); #endif From 16c663642c7ec03cd4cee5fec520bb69e97babe4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 Oct 2021 11:57:22 -0400 Subject: [PATCH 096/433] SUNRPC: Replace the "__be32 *p" parameter to .pc_decode The passed-in value of the "__be32 *p" parameter is now unused in every server-side XDR decoder, and can be removed. Note also that there is a line in each decoder that sets up a local pointer to a struct xdr_stream. Passing that pointer from the dispatcher instead saves one line per decoder function. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 3 +-- fs/lockd/xdr.c | 27 +++++++++-------------- fs/lockd/xdr4.c | 27 +++++++++-------------- fs/nfsd/nfs2acl.c | 12 +++++----- fs/nfsd/nfs3acl.c | 8 +++---- fs/nfsd/nfs3xdr.c | 45 +++++++++++++------------------------- fs/nfsd/nfs4xdr.c | 4 ++-- fs/nfsd/nfsd.h | 3 ++- fs/nfsd/nfssvc.c | 7 +++--- fs/nfsd/nfsxdr.c | 30 +++++++++---------------- fs/nfsd/xdr.h | 21 +++++++++--------- fs/nfsd/xdr3.h | 31 +++++++++++++------------- fs/nfsd/xdr4.h | 2 +- include/linux/lockd/xdr.h | 19 ++++++++-------- include/linux/lockd/xdr4.h | 19 ++++++++-------- include/linux/sunrpc/svc.h | 3 ++- 16 files changed, 112 insertions(+), 149 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index b632be3ad57b..9a82471bda07 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -780,11 +780,10 @@ module_exit(exit_nlm); static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *procp = rqstp->rq_procinfo; - struct kvec *argv = rqstp->rq_arg.head; struct kvec *resv = rqstp->rq_res.head; svcxdr_init_decode(rqstp); - if (!procp->pc_decode(rqstp, argv->iov_base)) + if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; *statp = procp->pc_func(rqstp); diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 9235e60b1769..895f15222104 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -146,15 +146,14 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) */ int -nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return 1; } int -nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; @@ -171,9 +170,8 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) } int -nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; @@ -197,9 +195,8 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) } int -nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; @@ -218,9 +215,8 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) } int -nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) @@ -233,9 +229,8 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) } int -nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_res *resp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &resp->cookie)) @@ -247,10 +242,10 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p) } int -nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_reboot *argp = rqstp->rq_argp; + __be32 *p; u32 len; if (xdr_stream_decode_u32(xdr, &len) < 0) @@ -273,9 +268,8 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) } int -nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; @@ -301,9 +295,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) } int -nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 98e957e4566c..573c7d580a5e 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -145,15 +145,14 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) */ int -nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return 1; } int -nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; @@ -170,9 +169,8 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) } int -nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; @@ -196,9 +194,8 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) } int -nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; @@ -216,9 +213,8 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) } int -nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) @@ -231,9 +227,8 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) } int -nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_res *resp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &resp->cookie)) @@ -245,10 +240,10 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p) } int -nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_reboot *argp = rqstp->rq_argp; + __be32 *p; u32 len; if (xdr_stream_decode_u32(xdr, &len) < 0) @@ -271,9 +266,8 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) } int -nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; @@ -299,9 +293,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) } int -nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 4b43929c1f25..0069c0fdb94f 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -188,9 +188,9 @@ out: * XDR decode functions */ -static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) +static int +nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_getaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &argp->fh)) @@ -201,9 +201,9 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) return 1; } -static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) +static int +nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_setaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &argp->fh)) @@ -222,9 +222,9 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) return 1; } -static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) +static int +nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_accessargs *args = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &args->fh)) diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 5dfe7644a517..b1e352ed2436 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -127,9 +127,9 @@ out: * XDR decode functions */ -static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) +static int +nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_getaclargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) @@ -140,9 +140,9 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) return 1; } -static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) +static int +nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_setaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh)) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 267e56f218af..5f744f03cda7 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -557,18 +557,16 @@ void fill_post_wcc(struct svc_fh *fhp) */ int -nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_fhandle *args = rqstp->rq_argp; return svcxdr_decode_nfs_fh3(xdr, &args->fh); } int -nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_sattrargs *args = rqstp->rq_argp; return svcxdr_decode_nfs_fh3(xdr, &args->fh) && @@ -577,18 +575,16 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_diropargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len); } int -nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_accessargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) @@ -600,9 +596,8 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_readargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) @@ -616,9 +611,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_writeargs *args = rqstp->rq_argp; u32 max_blocksize = svc_max_payload(rqstp); @@ -649,9 +643,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_createargs *args = rqstp->rq_argp; if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) @@ -674,9 +667,8 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_createargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs3(xdr, &args->fh, @@ -685,9 +677,8 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_symlinkargs *args = rqstp->rq_argp; struct kvec *head = rqstp->rq_arg.head; struct kvec *tail = rqstp->rq_arg.tail; @@ -713,9 +704,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_mknodargs *args = rqstp->rq_argp; if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) @@ -742,9 +732,8 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_renameargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs3(xdr, &args->ffh, @@ -754,9 +743,8 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_linkargs *args = rqstp->rq_argp; return svcxdr_decode_nfs_fh3(xdr, &args->ffh) && @@ -765,9 +753,8 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_readdirargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) @@ -784,9 +771,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_readdirargs *args = rqstp->rq_argp; u32 dircount; @@ -807,9 +793,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p) } int -nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_commitargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a54b2845473b..fc0f154f1172 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5412,14 +5412,14 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) } int -nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) +nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundargs *args = rqstp->rq_argp; /* svcxdr_tmp_alloc */ args->to_free = NULL; - args->xdr = &rqstp->rq_arg_stream; + args->xdr = xdr; args->ops = args->iops; args->rqstp = rqstp; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 9664303afdaf..6e8ad5f9757c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -78,7 +78,8 @@ extern const struct seq_operations nfs_exports_op; */ struct nfsd_voidargs { }; struct nfsd_voidres { }; -int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p); +int nfssvc_decode_voidarg(struct svc_rqst *rqstp, + struct xdr_stream *xdr); int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p); /* diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ccb59e91011b..7cd13e9474ff 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1004,7 +1004,6 @@ out: int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *proc = rqstp->rq_procinfo; - struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; __be32 *p; @@ -1015,7 +1014,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) rqstp->rq_cachetype = proc->pc_cachetype; svcxdr_init_decode(rqstp); - if (!proc->pc_decode(rqstp, argv->iov_base)) + if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; switch (nfsd_cache_lookup(rqstp)) { @@ -1065,13 +1064,13 @@ out_encode_err: /** * nfssvc_decode_voidarg - Decode void arguments * @rqstp: Server RPC transaction context - * @p: buffer containing arguments to decode + * @xdr: XDR stream positioned at arguments to decode * * Return values: * %0: Arguments were not valid * %1: Decoding was successful */ -int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +int nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return 1; } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index ddcc18adfeb1..08e899180ee4 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -273,18 +273,16 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, */ int -nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_fhandle *args = rqstp->rq_argp; return svcxdr_decode_fhandle(xdr, &args->fh); } int -nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_sattrargs *args = rqstp->rq_argp; return svcxdr_decode_fhandle(xdr, &args->fh) && @@ -292,18 +290,16 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_diropargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len); } int -nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_readargs *args = rqstp->rq_argp; u32 totalcount; @@ -321,9 +317,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_writeargs *args = rqstp->rq_argp; u32 beginoffset, totalcount; @@ -350,9 +345,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_createargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs(xdr, &args->fh, @@ -361,9 +355,8 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_renameargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs(xdr, &args->ffh, @@ -373,9 +366,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_linkargs *args = rqstp->rq_argp; return svcxdr_decode_fhandle(xdr, &args->ffh) && @@ -384,9 +376,8 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_symlinkargs *args = rqstp->rq_argp; struct kvec *head = rqstp->rq_arg.head; @@ -405,9 +396,8 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) +nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_readdirargs *args = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &args->fh)) diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 80fd6d7f3404..804f9af94d6d 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -141,16 +141,17 @@ union nfsd_xdrstore { #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) -int nfssvc_decode_fhandleargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_readargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_createargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); + int nfssvc_encode_statres(struct svc_rqst *, __be32 *); int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *); int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 712c117300cb..60a8909205e5 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -265,21 +265,22 @@ union nfsd3_xdrstore { #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) -int nfs3svc_decode_fhandleargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); + int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *); int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 3e4052e3bd50..1d1b8771bdcf 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -756,7 +756,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); -int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *); +int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index a98309c0121c..170ad6f5596a 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -96,18 +96,19 @@ struct nlm_reboot { */ #define NLMSVC_XDRSIZE sizeof(struct nlm_args) -int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); + int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); -int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); -int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); -int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_res(struct svc_rqst *, __be32 *); -int nlmsvc_decode_res(struct svc_rqst *, __be32 *); int nlmsvc_encode_void(struct svc_rqst *, __be32 *); -int nlmsvc_decode_void(struct svc_rqst *, __be32 *); -int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *); -int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); -int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); #endif /* LOCKD_XDR_H */ diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 5ae766f26e04..68e14e0f2b1f 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -22,21 +22,20 @@ #define nlm4_fbig cpu_to_be32(NLM_FBIG) #define nlm4_failed cpu_to_be32(NLM_FAILED) +int nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); - -int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); -int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); -int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); -int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_res(struct svc_rqst *, __be32 *); -int nlm4svc_decode_res(struct svc_rqst *, __be32 *); int nlm4svc_encode_void(struct svc_rqst *, __be32 *); -int nlm4svc_decode_void(struct svc_rqst *, __be32 *); -int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *); -int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); -int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); extern const struct rpc_version nlm_version4; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 4205a6ef4770..da3c5bc43d85 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -454,7 +454,8 @@ struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ - int (*pc_decode)(struct svc_rqst *, __be32 *data); + int (*pc_decode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* XDR encode result: */ int (*pc_encode)(struct svc_rqst *, __be32 *data); /* XDR free result: */ From c44b31c263798ec34614dd394c31ef1a2e7e716e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 Oct 2021 11:57:28 -0400 Subject: [PATCH 097/433] SUNRPC: Change return value type of .pc_decode Returning an undecorated integer is an age-old trope, but it's not clear (even to previous experts in this code) that the only valid return values are 1 and 0. These functions do not return a negative errno, rpc_stat value, or a positive length. Document there are only two valid return values by having .pc_decode return only true or false. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/xdr.c | 96 +++++++++++++++--------------- fs/lockd/xdr4.c | 97 +++++++++++++++--------------- fs/nfsd/nfs2acl.c | 30 +++++----- fs/nfsd/nfs3acl.c | 22 +++---- fs/nfsd/nfs3xdr.c | 118 ++++++++++++++++++------------------- fs/nfsd/nfs4xdr.c | 24 ++++---- fs/nfsd/nfsd.h | 2 +- fs/nfsd/nfssvc.c | 6 +- fs/nfsd/nfsxdr.c | 62 +++++++++---------- fs/nfsd/xdr.h | 20 +++---- fs/nfsd/xdr3.h | 30 +++++----- fs/nfsd/xdr4.h | 2 +- include/linux/lockd/xdr.h | 18 +++--- include/linux/lockd/xdr4.h | 18 +++--- include/linux/sunrpc/svc.h | 2 +- 15 files changed, 274 insertions(+), 273 deletions(-) diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 895f15222104..622c2ca37dbf 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -145,103 +145,103 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) * Decode Call arguments */ -int +bool nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int +bool nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + return true; } -int +bool nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; argp->monitor = 1; /* monitor client by default */ - return 1; + return true; } -int +bool nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + return true; } -int +bool nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; argp->lock.fl.fl_type = F_UNLCK; - return 1; + return true; } -int +bool nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_res *resp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_decode_stats(xdr, &resp->status)) - return 0; + return false; - return 1; + return true; } -int +bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_reboot *argp = rqstp->rq_argp; @@ -249,25 +249,25 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) u32 len; if (xdr_stream_decode_u32(xdr, &len) < 0) - return 0; + return false; if (len > SM_MAXSTRLEN) - return 0; + return false; p = xdr_inline_decode(xdr, len); if (!p) - return 0; + return false; argp->len = len; argp->mon = (char *)p; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; p = xdr_inline_decode(xdr, SM_PRIV_SIZE); if (!p) - return 0; + return false; memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); - return 1; + return true; } -int +bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; @@ -278,34 +278,34 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) lock->svid = ~(u32)0; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return 0; + return false; if (!svcxdr_decode_owner(xdr, &lock->oh)) - return 0; + return false; /* XXX: Range checks are missing in the original code */ if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0) - return 0; + return false; - return 1; + return true; } -int +bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; - return 1; + return true; } diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 573c7d580a5e..45551dee26b4 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -144,102 +144,103 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) * Decode Call arguments */ -int +bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int +bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + return true; } -int +bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; argp->monitor = 1; /* monitor client by default */ - return 1; + return true; } -int +bool nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + + return true; } -int +bool nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; argp->lock.fl.fl_type = F_UNLCK; - return 1; + return true; } -int +bool nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_res *resp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_decode_stats(xdr, &resp->status)) - return 0; + return false; - return 1; + return true; } -int +bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_reboot *argp = rqstp->rq_argp; @@ -247,25 +248,25 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) u32 len; if (xdr_stream_decode_u32(xdr, &len) < 0) - return 0; + return false; if (len > SM_MAXSTRLEN) - return 0; + return false; p = xdr_inline_decode(xdr, len); if (!p) - return 0; + return false; argp->len = len; argp->mon = (char *)p; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; p = xdr_inline_decode(xdr, SM_PRIV_SIZE); if (!p) - return 0; + return false; memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); - return 1; + return true; } -int +bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; @@ -276,34 +277,34 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) lock->svid = ~(u32)0; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return 0; + return false; if (!svcxdr_decode_owner(xdr, &lock->oh)) - return 0; + return false; /* XXX: Range checks are missing in the original code */ if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0) - return 0; + return false; - return 1; + return true; } -int +bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; - return 1; + return true; } diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 0069c0fdb94f..cf6ba5e7937e 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -188,51 +188,51 @@ out: * XDR decode functions */ -static int +static bool nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_getaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &argp->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return 0; + return false; - return 1; + return true; } -static int +static bool nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_setaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &argp->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return 0; + return false; if (argp->mask & ~NFS_ACL_MASK) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? &argp->acl_access : NULL)) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? &argp->acl_default : NULL)) - return 0; + return false; - return 1; + return true; } -static int +static bool nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_accessargs *args = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->access) < 0) - return 0; + return false; - return 1; + return true; } /* diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index b1e352ed2436..9e9f6afb2e00 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -127,38 +127,38 @@ out: * XDR decode functions */ -static int +static bool nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_getaclargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->mask) < 0) - return 0; + return false; - return 1; + return true; } -static int +static bool nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_setaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return 0; + return false; if (argp->mask & ~NFS_ACL_MASK) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? &argp->acl_access : NULL)) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? &argp->acl_default : NULL)) - return 0; + return false; - return 1; + return true; } /* diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 5f744f03cda7..1f3de46d24d4 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -556,7 +556,7 @@ void fill_post_wcc(struct svc_fh *fhp) * XDR decode functions */ -int +bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_fhandle *args = rqstp->rq_argp; @@ -564,7 +564,7 @@ nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) return svcxdr_decode_nfs_fh3(xdr, &args->fh); } -int +bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_sattrargs *args = rqstp->rq_argp; @@ -574,7 +574,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) svcxdr_decode_sattrguard3(xdr, args); } -int +bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_diropargs *args = rqstp->rq_argp; @@ -582,75 +582,75 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len); } -int +bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_accessargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->access) < 0) - return 0; + return false; - return 1; + return true; } -int +bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } -int +bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_writeargs *args = rqstp->rq_argp; u32 max_blocksize = svc_max_payload(rqstp); if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->stable) < 0) - return 0; + return false; /* opaque data */ if (xdr_stream_decode_u32(xdr, &args->len) < 0) - return 0; + return false; /* request sanity */ if (args->count != args->len) - return 0; + return false; if (args->count > max_blocksize) { args->count = max_blocksize; args->len = max_blocksize; } if (!xdr_stream_subsegment(xdr, &args->payload, args->count)) - return 0; + return false; - return 1; + return true; } -int +bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_createargs *args = rqstp->rq_argp; if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->createmode) < 0) - return 0; + return false; switch (args->createmode) { case NFS3_CREATE_UNCHECKED: case NFS3_CREATE_GUARDED: @@ -658,15 +658,15 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) case NFS3_CREATE_EXCLUSIVE: args->verf = xdr_inline_decode(xdr, NFS3_CREATEVERFSIZE); if (!args->verf) - return 0; + return false; break; default: - return 0; + return false; } - return 1; + return true; } -int +bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_createargs *args = rqstp->rq_argp; @@ -676,7 +676,7 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) svcxdr_decode_sattr3(rqstp, xdr, &args->attrs); } -int +bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_symlinkargs *args = rqstp->rq_argp; @@ -685,33 +685,33 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) size_t remaining; if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen)) - return 0; + return false; if (!svcxdr_decode_sattr3(rqstp, xdr, &args->attrs)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) - return 0; + return false; /* request sanity */ remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; remaining -= xdr_stream_pos(xdr); if (remaining < xdr_align_size(args->tlen)) - return 0; + return false; args->first.iov_base = xdr->p; args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); - return 1; + return true; } -int +bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_mknodargs *args = rqstp->rq_argp; if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->ftype) < 0) - return 0; + return false; switch (args->ftype) { case NF3CHR: case NF3BLK: @@ -725,13 +725,13 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) /* Valid XDR but illegal file types */ break; default: - return 0; + return false; } - return 1; + return true; } -int +bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_renameargs *args = rqstp->rq_argp; @@ -742,7 +742,7 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) &args->tname, &args->tlen); } -int +bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_linkargs *args = rqstp->rq_argp; @@ -752,59 +752,59 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) &args->tname, &args->tlen); } -int +bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readdirargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) - return 0; + return false; args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); if (!args->verf) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } -int +bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readdirargs *args = rqstp->rq_argp; u32 dircount; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) - return 0; + return false; args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); if (!args->verf) - return 0; + return false; /* dircount is ignored */ if (xdr_stream_decode_u32(xdr, &dircount) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } -int +bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_commitargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fc0f154f1172..dd1ee9ada7dd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2322,7 +2322,7 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) return true; } -static int +static bool nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { struct nfsd4_op *op; @@ -2335,25 +2335,25 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) int i; if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0) - return 0; + return false; max_reply += XDR_UNIT; argp->tag = NULL; if (unlikely(argp->taglen)) { if (argp->taglen > NFSD4_MAX_TAGLEN) - return 0; + return false; p = xdr_inline_decode(argp->xdr, argp->taglen); if (!p) - return 0; + return false; argp->tag = svcxdr_savemem(argp, p, argp->taglen); if (!argp->tag) - return 0; + return false; max_reply += xdr_align_size(argp->taglen); } if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) - return 0; + return false; if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) - return 0; + return false; /* * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS @@ -2361,14 +2361,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) * nfsd4_proc can handle this is an NFS-level error. */ if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) - return 1; + return true; if (argp->opcnt > ARRAY_SIZE(argp->iops)) { argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); if (!argp->ops) { argp->ops = argp->iops; dprintk("nfsd: couldn't allocate room for COMPOUND\n"); - return 0; + return false; } } @@ -2380,7 +2380,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->replay = NULL; if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0) - return 0; + return false; if (nfsd4_opnum_in_range(argp, op)) { op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); if (op->status != nfs_ok) @@ -2427,7 +2427,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); - return 1; + return true; } static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, @@ -5411,7 +5411,7 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) } } -int +bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundargs *args = rqstp->rq_argp; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 6e8ad5f9757c..bfcddd4c7534 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -78,7 +78,7 @@ extern const struct seq_operations nfs_exports_op; */ struct nfsd_voidargs { }; struct nfsd_voidres { }; -int nfssvc_decode_voidarg(struct svc_rqst *rqstp, +bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr); int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7cd13e9474ff..beb564e8a3db 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1067,10 +1067,10 @@ out_encode_err: * @xdr: XDR stream positioned at arguments to decode * * Return values: - * %0: Arguments were not valid - * %1: Decoding was successful + * %false: Arguments were not valid + * %true: Decoding was successful */ -int nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) +bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return 1; } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 08e899180ee4..b5817a41b3de 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -272,7 +272,7 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, * XDR decode functions */ -int +bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_fhandle *args = rqstp->rq_argp; @@ -280,7 +280,7 @@ nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) return svcxdr_decode_fhandle(xdr, &args->fh); } -int +bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_sattrargs *args = rqstp->rq_argp; @@ -289,7 +289,7 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int +bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_diropargs *args = rqstp->rq_argp; @@ -297,54 +297,54 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len); } -int +bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readargs *args = rqstp->rq_argp; u32 totalcount; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; /* totalcount is ignored */ if (xdr_stream_decode_u32(xdr, &totalcount) < 0) - return 0; + return false; - return 1; + return true; } -int +bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_writeargs *args = rqstp->rq_argp; u32 beginoffset, totalcount; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; /* beginoffset is ignored */ if (xdr_stream_decode_u32(xdr, &beginoffset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->offset) < 0) - return 0; + return false; /* totalcount is ignored */ if (xdr_stream_decode_u32(xdr, &totalcount) < 0) - return 0; + return false; /* opaque data */ if (xdr_stream_decode_u32(xdr, &args->len) < 0) - return 0; + return false; if (args->len > NFSSVC_MAXBLKSIZE_V2) - return 0; + return false; if (!xdr_stream_subsegment(xdr, &args->payload, args->len)) - return 0; + return false; - return 1; + return true; } -int +bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_createargs *args = rqstp->rq_argp; @@ -354,7 +354,7 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int +bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_renameargs *args = rqstp->rq_argp; @@ -365,7 +365,7 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) &args->tname, &args->tlen); } -int +bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_linkargs *args = rqstp->rq_argp; @@ -375,39 +375,39 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) &args->tname, &args->tlen); } -int +bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_symlinkargs *args = rqstp->rq_argp; struct kvec *head = rqstp->rq_arg.head; if (!svcxdr_decode_diropargs(xdr, &args->ffh, &args->fname, &args->flen)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) - return 0; + return false; if (args->tlen == 0) - return 0; + return false; args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); args->first.iov_base = xdr_inline_decode(xdr, args->tlen); if (!args->first.iov_base) - return 0; + return false; return svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int +bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readdirargs *args = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->cookie) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } /* diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 804f9af94d6d..31be7d30e64e 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -141,16 +141,16 @@ union nfsd_xdrstore { #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) -int nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); int nfssvc_encode_statres(struct svc_rqst *, __be32 *); int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *); diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 60a8909205e5..ef72bc4868da 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -265,21 +265,21 @@ union nfsd3_xdrstore { #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) -int nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *); int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 1d1b8771bdcf..8812256cd520 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -756,7 +756,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); -int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 170ad6f5596a..e1362244f909 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -96,15 +96,15 @@ struct nlm_reboot { */ #define NLMSVC_XDRSIZE sizeof(struct nlm_args) -int nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); int nlmsvc_encode_res(struct svc_rqst *, __be32 *); diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 68e14e0f2b1f..376b8f6a3763 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -22,15 +22,15 @@ #define nlm4_fbig cpu_to_be32(NLM_FBIG) #define nlm4_failed cpu_to_be32(NLM_FAILED) -int nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); int nlm4svc_encode_res(struct svc_rqst *, __be32 *); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index da3c5bc43d85..d6109fa7a57b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -454,7 +454,7 @@ struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ - int (*pc_decode)(struct svc_rqst *rqstp, + bool (*pc_decode)(struct svc_rqst *rqstp, struct xdr_stream *xdr); /* XDR encode result: */ int (*pc_encode)(struct svc_rqst *, __be32 *data); From 3b0ebb255fdc49a3d340846deebf045ef58ec744 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 13 Oct 2021 10:40:59 -0400 Subject: [PATCH 098/433] NFSD: Save location of NFSv4 COMPOUND status Refactor: Currently nfs4svc_encode_compoundres() relies on the NFS dispatcher to pass in the buffer location of the COMPOUND status. Instead, save that buffer location in struct nfsd4_compoundres. The compound tag follows immediately after. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4xdr.c | 9 +++++++-- fs/nfsd/xdr4.h | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5895bbeba373..a36261f89bdf 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2461,11 +2461,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) __be32 status; resp->xdr = &rqstp->rq_res_stream; + resp->statusp = resp->xdr->p; /* reserve space for: NFS status code */ xdr_reserve_space(resp->xdr, XDR_UNIT); - resp->tagp = resp->xdr->p; /* reserve space for: taglen, tag, and opcnt */ xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen); resp->taglen = args->taglen; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index dd1ee9ada7dd..c6623080ad98 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5435,11 +5435,16 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p) WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len); - *p = resp->cstate.status; + /* + * Send buffer space for the following items is reserved + * at the top of nfsd4_proc_compound(). + */ + p = resp->statusp; + + *p++ = resp->cstate.status; rqstp->rq_next_page = resp->xdr->page_ptr + 1; - p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 8812256cd520..6aeb6755278f 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -702,10 +702,11 @@ struct nfsd4_compoundres { struct xdr_stream *xdr; struct svc_rqst * rqstp; + __be32 *statusp; u32 taglen; char * tag; u32 opcnt; - __be32 * tagp; /* tag, opcount encode location */ + struct nfsd4_compound_state cstate; }; From fda494411485aff91768842c532f90fb8eb54943 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 13 Oct 2021 10:41:06 -0400 Subject: [PATCH 099/433] SUNRPC: Replace the "__be32 *p" parameter to .pc_encode The passed-in value of the "__be32 *p" parameter is now unused in every server-side XDR encoder, and can be removed. Note also that there is a line in each encoder that sets up a local pointer to a struct xdr_stream. Passing that pointer from the dispatcher instead saves one line per encoder function. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 3 +-- fs/lockd/xdr.c | 11 ++++----- fs/lockd/xdr4.c | 11 ++++----- fs/nfs/callback_xdr.c | 4 ++-- fs/nfsd/nfs2acl.c | 8 +++---- fs/nfsd/nfs3acl.c | 8 +++---- fs/nfsd/nfs3xdr.c | 46 +++++++++++++------------------------- fs/nfsd/nfs4xdr.c | 7 +++--- fs/nfsd/nfsd.h | 3 ++- fs/nfsd/nfssvc.c | 9 +++----- fs/nfsd/nfsxdr.c | 22 +++++++----------- fs/nfsd/xdr.h | 14 ++++++------ fs/nfsd/xdr3.h | 30 ++++++++++++------------- fs/nfsd/xdr4.h | 2 +- include/linux/lockd/xdr.h | 8 +++---- include/linux/lockd/xdr4.h | 8 +++---- include/linux/sunrpc/svc.h | 3 ++- 17 files changed, 85 insertions(+), 112 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 9a82471bda07..b220e1b91726 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -780,7 +780,6 @@ module_exit(exit_nlm); static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *procp = rqstp->rq_procinfo; - struct kvec *resv = rqstp->rq_res.head; svcxdr_init_decode(rqstp); if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream)) @@ -793,7 +792,7 @@ static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp) return 1; svcxdr_init_encode(rqstp); - if (!procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) + if (!procp->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; return 1; diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 622c2ca37dbf..2595b4d14cd4 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -314,15 +314,14 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) */ int -nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return 1; } int -nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && @@ -330,9 +329,8 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p) } int -nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && @@ -340,9 +338,8 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p) } int -nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p) +nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; if (!svcxdr_encode_cookie(xdr, &resp->cookie)) diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 45551dee26b4..32231c21c22d 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -313,15 +313,14 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) */ int -nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return 1; } int -nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && @@ -329,9 +328,8 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p) } int -nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && @@ -339,9 +337,8 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p) } int -nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p) +nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; if (!svcxdr_encode_cookie(xdr, &resp->cookie)) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 4c48d85f6517..286d330488a7 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -67,9 +67,9 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp) * svc_process_common() looks for an XDR encoder to know when * not to drop a Reply. */ -static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p) +static int nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return xdr_ressize_check(rqstp, p); + return 1; } static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index cf6ba5e7937e..25592ba1ed50 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -240,9 +240,9 @@ nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) */ /* GETACL */ -static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) +static int +nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; struct inode *inode; @@ -280,9 +280,9 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) } /* ACCESS */ -static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) +static int +nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 9e9f6afb2e00..e186467b63ec 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -166,9 +166,9 @@ nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) */ /* GETACL */ -static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) +static int +nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; struct kvec *head = rqstp->rq_res.head; @@ -218,9 +218,9 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) } /* SETACL */ -static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p) +static int +nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 1f3de46d24d4..63f0be4e44f7 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -813,9 +813,8 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) /* GETATTR */ int -nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) @@ -833,9 +832,8 @@ nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p) /* SETATTR, REMOVE, RMDIR */ int -nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -843,9 +841,9 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) } /* LOOKUP */ -int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p) +int +nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) @@ -869,9 +867,8 @@ int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p) /* ACCESS */ int -nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) @@ -893,9 +890,8 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) /* READLINK */ int -nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; @@ -921,9 +917,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) /* READ */ int -nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; @@ -954,9 +949,8 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) /* WRITE */ int -nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_writeres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) @@ -982,9 +976,8 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) /* CREATE, MKDIR, SYMLINK, MKNOD */ int -nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) @@ -1008,9 +1001,8 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p) /* RENAME */ int -nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_renameres *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -1020,9 +1012,8 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) /* LINK */ int -nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_linkres *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -1032,9 +1023,8 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) /* READDIR */ int -nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readdirres *resp = rqstp->rq_resp; struct xdr_buf *dirlist = &resp->dirlist; @@ -1286,9 +1276,8 @@ svcxdr_encode_fsstat3resok(struct xdr_stream *xdr, /* FSSTAT */ int -nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsstatres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) @@ -1333,9 +1322,8 @@ svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr, /* FSINFO */ int -nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsinfores *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) @@ -1376,9 +1364,8 @@ svcxdr_encode_pathconf3resok(struct xdr_stream *xdr, /* PATHCONF */ int -nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_pathconfres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) @@ -1400,9 +1387,8 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) /* COMMIT */ int -nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) +nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_commitres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c6623080ad98..fc77db35f2e7 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5427,10 +5427,11 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) } int -nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p) +nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_buf *buf = resp->xdr->buf; + struct xdr_buf *buf = xdr->buf; + __be32 *p; WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len); @@ -5443,7 +5444,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p) *p++ = resp->cstate.status; - rqstp->rq_next_page = resp->xdr->page_ptr + 1; + rqstp->rq_next_page = xdr->page_ptr + 1; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index bfcddd4c7534..345f8247d5da 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -80,7 +80,8 @@ struct nfsd_voidargs { }; struct nfsd_voidres { }; bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p); +int nfssvc_encode_voidres(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* * Function prototypes. diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index beb564e8a3db..ed6a28ecf278 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1004,8 +1004,6 @@ out: int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *proc = rqstp->rq_procinfo; - struct kvec *resv = &rqstp->rq_res.head[0]; - __be32 *p; /* * Give the xdr decoder a chance to change this if it wants @@ -1030,14 +1028,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) * Need to grab the location to store the status, as * NFSv4 does some encoding while processing */ - p = resv->iov_base + resv->iov_len; svcxdr_init_encode(rqstp); *statp = proc->pc_func(rqstp); if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; - if (!proc->pc_encode(rqstp, p)) + if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); @@ -1078,13 +1075,13 @@ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) /** * nfssvc_encode_voidres - Encode void results * @rqstp: Server RPC transaction context - * @p: buffer in which to encode results + * @xdr: XDR stream into which to encode results * * Return values: * %0: Local error while encoding * %1: Encoding was successful */ -int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +int nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return 1; } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index b5817a41b3de..6aa8138ae2f7 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -415,18 +415,16 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) */ int -nfssvc_encode_statres(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_stat *resp = rqstp->rq_resp; return svcxdr_encode_stat(xdr, resp->status); } int -nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_attrstat *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) @@ -442,9 +440,8 @@ nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) @@ -462,9 +459,8 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; @@ -484,9 +480,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; @@ -509,9 +504,8 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readdirres *resp = rqstp->rq_resp; struct xdr_buf *dirlist = &resp->dirlist; @@ -532,11 +526,11 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) } int -nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) +nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_statfsres *resp = rqstp->rq_resp; struct kstatfs *stat = &resp->stats; + __be32 *p; if (!svcxdr_encode_stat(xdr, resp->status)) return 0; diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 31be7d30e64e..1133fb3bf328 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -152,13 +152,13 @@ bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_statres(struct svc_rqst *, __be32 *); -int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *); -int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readres(struct svc_rqst *, __be32 *); -int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *); +int nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); int nfssvc_encode_entry(void *data, const char *name, int namlen, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index ef72bc4868da..bb017fc7cba1 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -281,21 +281,21 @@ bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); -int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_createres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *); -int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr); void nfs3svc_release_fhandle(struct svc_rqst *); void nfs3svc_release_fhandle2(struct svc_rqst *); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 6aeb6755278f..3bd553925c35 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -758,7 +758,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *); +int nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index e1362244f909..d8bd26a5525e 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -106,9 +106,9 @@ bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); -int nlmsvc_encode_res(struct svc_rqst *, __be32 *); -int nlmsvc_encode_void(struct svc_rqst *, __be32 *); -int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *); +int nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); #endif /* LOCKD_XDR_H */ diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 376b8f6a3763..50677be3557d 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -32,10 +32,10 @@ bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); -int nlm4svc_encode_res(struct svc_rqst *, __be32 *); -int nlm4svc_encode_void(struct svc_rqst *, __be32 *); -int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *); +int nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); extern const struct rpc_version nlm_version4; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d6109fa7a57b..85694cd0db66 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -457,7 +457,8 @@ struct svc_procedure { bool (*pc_decode)(struct svc_rqst *rqstp, struct xdr_stream *xdr); /* XDR encode result: */ - int (*pc_encode)(struct svc_rqst *, __be32 *data); + int (*pc_encode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ From 130e2054d4a652a2bd79fb1557ddcd19c053cb37 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 13 Oct 2021 10:41:13 -0400 Subject: [PATCH 100/433] SUNRPC: Change return value type of .pc_encode Returning an undecorated integer is an age-old trope, but it's not clear (even to previous experts in this code) that the only valid return values are 1 and 0. These functions do not return a negative errno, rpc_stat value, or a positive length. Document there are only two valid return values by having .pc_encode return only true or false. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/xdr.c | 18 ++-- fs/lockd/xdr4.c | 18 ++-- fs/nfs/callback_xdr.c | 4 +- fs/nfsd/nfs2acl.c | 4 +- fs/nfsd/nfs3acl.c | 18 ++-- fs/nfsd/nfs3xdr.c | 166 ++++++++++++++++++------------------- fs/nfsd/nfs4xdr.c | 4 +- fs/nfsd/nfsd.h | 2 +- fs/nfsd/nfssvc.c | 8 +- fs/nfsd/nfsxdr.c | 60 +++++++------- fs/nfsd/xdr.h | 14 ++-- fs/nfsd/xdr3.h | 30 +++---- fs/nfsd/xdr4.h | 2 +- include/linux/lockd/xdr.h | 8 +- include/linux/lockd/xdr4.h | 8 +- include/linux/sunrpc/svc.h | 2 +- 16 files changed, 183 insertions(+), 183 deletions(-) diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 2595b4d14cd4..2fb5748dae0c 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -313,13 +313,13 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) * Encode Reply results */ -int +bool nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int +bool nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_res *resp = rqstp->rq_resp; @@ -328,7 +328,7 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) svcxdr_encode_testrply(xdr, resp); } -int +bool nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_res *resp = rqstp->rq_resp; @@ -337,18 +337,18 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) svcxdr_encode_stats(xdr, resp->status); } -int +bool nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_res *resp = rqstp->rq_resp; if (!svcxdr_encode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_encode_stats(xdr, resp->status)) - return 0; + return false; /* sequence */ if (xdr_stream_encode_u32(xdr, 0) < 0) - return 0; + return false; - return 1; + return true; } diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 32231c21c22d..856267c0864b 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -312,13 +312,13 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) * Encode Reply results */ -int +bool nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int +bool nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_res *resp = rqstp->rq_resp; @@ -327,7 +327,7 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) svcxdr_encode_testrply(xdr, resp); } -int +bool nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_res *resp = rqstp->rq_resp; @@ -336,18 +336,18 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) svcxdr_encode_stats(xdr, resp->status); } -int +bool nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nlm_res *resp = rqstp->rq_resp; if (!svcxdr_encode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_encode_stats(xdr, resp->status)) - return 0; + return false; /* sequence */ if (xdr_stream_encode_u32(xdr, 0) < 0) - return 0; + return false; - return 1; + return true; } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 286d330488a7..a67c41ec545f 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -67,9 +67,9 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp) * svc_process_common() looks for an XDR encoder to know when * not to drop a Reply. */ -static int nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) +static bool nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 25592ba1ed50..367551bddfc6 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -240,7 +240,7 @@ nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) */ /* GETACL */ -static int +static bool nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_getaclres *resp = rqstp->rq_resp; @@ -280,7 +280,7 @@ nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) } /* ACCESS */ -static int +static bool nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_accessres *resp = rqstp->rq_resp; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index e186467b63ec..35b2ebda14da 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -166,7 +166,7 @@ nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) */ /* GETACL */ -static int +static bool nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_getaclres *resp = rqstp->rq_resp; @@ -178,14 +178,14 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) int w; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: inode = d_inode(dentry); if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->mask) < 0) - return 0; + return false; base = (char *)xdr->p - (char *)head->iov_base; @@ -194,7 +194,7 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { if (!*(rqstp->rq_next_page++)) - return 0; + return false; w -= PAGE_SIZE; } @@ -207,18 +207,18 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) resp->mask & NFS_DFACL, NFS_ACL_DEFAULT); if (n <= 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* SETACL */ -static int +static bool nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_attrstat *resp = rqstp->rq_resp; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 63f0be4e44f7..c3ac1b6aa3aa 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -812,26 +812,26 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) */ /* GETATTR */ -int +bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_attrstat *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime); if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; break; } - return 1; + return true; } /* SETATTR, REMOVE, RMDIR */ -int +bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_attrstat *resp = rqstp->rq_resp; @@ -841,166 +841,166 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr) } /* LOOKUP */ -int +bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) - return 0; + return false; } - return 1; + return true; } /* ACCESS */ -int +bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->access) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* READLINK */ -int +bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->len) < 0) - return 0; + return false; xdr_write_pages(xdr, resp->pages, 0, resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* READ */ -int +bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; if (xdr_stream_encode_bool(xdr, resp->eof) < 0) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* WRITE */ -int +bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_writeres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->committed) < 0) - return 0; + return false; if (!svcxdr_encode_writeverf3(xdr, resp->verf)) - return 0; + return false; break; default: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* CREATE, MKDIR, SYMLINK, MKNOD */ -int +bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) - return 0; + return false; break; default: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) - return 0; + return false; } - return 1; + return true; } /* RENAME */ -int +bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_renameres *resp = rqstp->rq_resp; @@ -1011,7 +1011,7 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr) } /* LINK */ -int +bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_linkres *resp = rqstp->rq_resp; @@ -1022,33 +1022,33 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) } /* READDIR */ -int +bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readdirres *resp = rqstp->rq_resp; struct xdr_buf *dirlist = &resp->dirlist; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) - return 0; + return false; xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) - return 0; + return false; if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } static __be32 @@ -1275,26 +1275,26 @@ svcxdr_encode_fsstat3resok(struct xdr_stream *xdr, } /* FSSTAT */ -int +bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_fsstatres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; if (!svcxdr_encode_fsstat3resok(xdr, resp)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; } - return 1; + return true; } static bool @@ -1321,26 +1321,26 @@ svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr, } /* FSINFO */ -int +bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_fsinfores *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; if (!svcxdr_encode_fsinfo3resok(xdr, resp)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; } - return 1; + return true; } static bool @@ -1363,49 +1363,49 @@ svcxdr_encode_pathconf3resok(struct xdr_stream *xdr, } /* PATHCONF */ -int +bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_pathconfres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; if (!svcxdr_encode_pathconf3resok(xdr, resp)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; } - return 1; + return true; } /* COMMIT */ -int +bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_commitres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_writeverf3(xdr, resp->verf)) - return 0; + return false; break; default: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fc77db35f2e7..9b609aac47e1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5426,7 +5426,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) return nfsd4_decode_compound(args); } -int +bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundres *resp = rqstp->rq_resp; @@ -5452,5 +5452,5 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) *p++ = htonl(resp->opcnt); nfsd4_sequence_done(resp); - return 1; + return true; } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 345f8247d5da..498e5a489826 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -80,7 +80,7 @@ struct nfsd_voidargs { }; struct nfsd_voidres { }; bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_voidres(struct svc_rqst *rqstp, +bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr); /* diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ed6a28ecf278..362e819ff06a 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1078,12 +1078,12 @@ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) * @xdr: XDR stream into which to encode results * * Return values: - * %0: Local error while encoding - * %1: Encoding was successful + * %false: Local error while encoding + * %true: Encoding was successful */ -int nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } int nfsd_pool_stats_open(struct inode *inode, struct file *file) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 6aa8138ae2f7..aba8520b4b8b 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -414,7 +414,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) * XDR encode functions */ -int +bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_stat *resp = rqstp->rq_resp; @@ -422,110 +422,110 @@ nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return svcxdr_encode_stat(xdr, resp->status); } -int +bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_attrstat *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; break; } - return 1; + return true; } -int +bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fhandle(xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; break; } - return 1; + return true; } -int +bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (xdr_stream_encode_u32(xdr, resp->len) < 0) - return 0; + return false; xdr_write_pages(xdr, &resp->page, 0, resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) - return 0; + return false; break; } - return 1; + return true; } -int +bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) - return 0; + return false; break; } - return 1; + return true; } -int +bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readdirres *resp = rqstp->rq_resp; struct xdr_buf *dirlist = &resp->dirlist; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) - return 0; + return false; if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) - return 0; + return false; break; } - return 1; + return true; } -int +bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_statfsres *resp = rqstp->rq_resp; @@ -533,12 +533,12 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) __be32 *p; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: p = xdr_reserve_space(xdr, XDR_UNIT * 5); if (!p) - return 0; + return false; *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2); *p++ = cpu_to_be32(stat->f_bsize); *p++ = cpu_to_be32(stat->f_blocks); @@ -547,7 +547,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) break; } - return 1; + return true; } /** diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 1133fb3bf328..528fb299430e 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -152,13 +152,13 @@ bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); int nfssvc_encode_entry(void *data, const char *name, int namlen, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index bb017fc7cba1..03fe4e21306c 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -281,21 +281,21 @@ bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr); void nfs3svc_release_fhandle(struct svc_rqst *); void nfs3svc_release_fhandle2(struct svc_rqst *); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 3bd553925c35..846ab6df9d48 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -758,7 +758,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d8bd26a5525e..398f70093cd3 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -106,9 +106,9 @@ bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); #endif /* LOCKD_XDR_H */ diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 50677be3557d..9a6b55da8fd6 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -32,10 +32,10 @@ bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); -int nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); extern const struct rpc_version nlm_version4; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 85694cd0db66..0ae28ae6caf2 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -457,7 +457,7 @@ struct svc_procedure { bool (*pc_decode)(struct svc_rqst *rqstp, struct xdr_stream *xdr); /* XDR encode result: */ - int (*pc_encode)(struct svc_rqst *rqstp, + bool (*pc_encode)(struct svc_rqst *rqstp, struct xdr_stream *xdr); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); From e9bdcdbf6936dd1fbf419e00222dc9038b7812c2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 11 Oct 2021 15:32:44 +0200 Subject: [PATCH 101/433] pid: add pidfd_get_task() helper The number of system calls making use of pidfds is constantly increasing. Some of those new system calls duplicate the code to turn a pidfd into task_struct it refers to. Give them a simple helper for this. Link: https://lore.kernel.org/r/20211004125050.1153693-2-christian.brauner@ubuntu.com Link: https://lore.kernel.org/r/20211011133245.1703103-2-brauner@kernel.org Cc: Vlastimil Babka Cc: Suren Baghdasaryan Cc: Matthew Bobrowski Cc: Alexander Duyck Cc: David Hildenbrand Cc: Jan Kara Cc: Minchan Kim Reviewed-by: Matthew Bobrowski Acked-by: David Hildenbrand Signed-off-by: Christian Brauner --- include/linux/pid.h | 1 + kernel/pid.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/include/linux/pid.h b/include/linux/pid.h index af308e15f174..343abf22092e 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -78,6 +78,7 @@ struct file; extern struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); +struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); int pidfd_create(struct pid *pid, unsigned int flags); static inline struct pid *get_pid(struct pid *pid) diff --git a/kernel/pid.c b/kernel/pid.c index efe87db44683..2fc0a16ec77b 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -539,6 +539,42 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) return pid; } +/** + * pidfd_get_task() - Get the task associated with a pidfd + * + * @pidfd: pidfd for which to get the task + * @flags: flags associated with this pidfd + * + * Return the task associated with @pidfd. The function takes a reference on + * the returned task. The caller is responsible for releasing that reference. + * + * Currently, the process identified by @pidfd is always a thread-group leader. + * This restriction currently exists for all aspects of pidfds including pidfd + * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling + * (only supports thread group leaders). + * + * Return: On success, the task_struct associated with the pidfd. + * On error, a negative errno number will be returned. + */ +struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags) +{ + unsigned int f_flags; + struct pid *pid; + struct task_struct *task; + + pid = pidfd_get_pid(pidfd, &f_flags); + if (IS_ERR(pid)) + return ERR_CAST(pid); + + task = get_pid_task(pid, PIDTYPE_TGID); + put_pid(pid); + if (!task) + return ERR_PTR(-ESRCH); + + *flags = f_flags; + return task; +} + /** * pidfd_create() - Create a new pid file descriptor. * From ee9955d61a0a770152f9c3af470bd1689f034c74 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 11 Oct 2021 15:32:45 +0200 Subject: [PATCH 102/433] mm: use pidfd_get_task() Instead of duplicating the same code in two places use the newly added pidfd_get_task() helper. This fixes an (unimportant for now) bug where PIDTYPE_PID is used whereas PIDTYPE_TGID should have been used. Link: https://lore.kernel.org/r/20211004125050.1153693-3-christian.brauner@ubuntu.com Link: https://lore.kernel.org/r/20211011133245.1703103-3-brauner@kernel.org Cc: Vlastimil Babka Cc: Suren Baghdasaryan Cc: Matthew Bobrowski Cc: Alexander Duyck Cc: David Hildenbrand Cc: Jan Kara Cc: Minchan Kim Reviewed-by: Matthew Bobrowski Acked-by: David Hildenbrand Signed-off-by: Christian Brauner --- mm/madvise.c | 15 +++------------ mm/oom_kill.c | 15 +++------------ 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 0734db8d53a7..8c927202bbe6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1235,7 +1235,6 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, struct iovec iovstack[UIO_FASTIOV], iovec; struct iovec *iov = iovstack; struct iov_iter iter; - struct pid *pid; struct task_struct *task; struct mm_struct *mm; size_t total_len; @@ -1250,18 +1249,12 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, if (ret < 0) goto out; - pid = pidfd_get_pid(pidfd, &f_flags); - if (IS_ERR(pid)) { - ret = PTR_ERR(pid); + task = pidfd_get_task(pidfd, &f_flags); + if (IS_ERR(task)) { + ret = PTR_ERR(task); goto free_iov; } - task = get_pid_task(pid, PIDTYPE_PID); - if (!task) { - ret = -ESRCH; - goto put_pid; - } - if (!process_madvise_behavior_valid(behavior)) { ret = -EINVAL; goto release_task; @@ -1301,8 +1294,6 @@ release_mm: mmput(mm); release_task: put_task_struct(task); -put_pid: - put_pid(pid); free_iov: kfree(iov); out: diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 831340e7ad8b..70d399d5817e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1151,21 +1151,14 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) struct task_struct *p; unsigned int f_flags; bool reap = true; - struct pid *pid; long ret = 0; if (flags) return -EINVAL; - pid = pidfd_get_pid(pidfd, &f_flags); - if (IS_ERR(pid)) - return PTR_ERR(pid); - - task = get_pid_task(pid, PIDTYPE_TGID); - if (!task) { - ret = -ESRCH; - goto put_pid; - } + task = pidfd_get_task(pidfd, &f_flags); + if (IS_ERR(task)) + return PTR_ERR(task); /* * Make sure to choose a thread which still has a reference to mm @@ -1204,8 +1197,6 @@ drop_mm: mmdrop(mm); put_task: put_task_struct(task); -put_pid: - put_pid(pid); return ret; #else return -ENOSYS; From 2336d696862186fd4a6ddd1ea0cb243b3e32847c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 15 Oct 2021 14:42:11 -0400 Subject: [PATCH 103/433] nfsd: update create verifier comment I don't know if that Solaris behavior matters any more or if it's still possible to look up that bug ID any more. The XFS behavior's definitely still relevant, though; any but the most recent XFS filesystems will lose the top bits. Reported-by: Frank S. Filz Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e9c406fd05b6..5c76d6813517 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1411,7 +1411,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (nfsd_create_is_exclusive(createmode)) { /* solaris7 gets confused (bugid 4218508) if these have - * the high bit set, so just clear the high bits. If this is + * the high bit set, as do xfs filesystems without the + * "bigtime" feature. So just clear the high bits. If this is * ever changed to use different attrs for storing the * verifier, then do_open_lookup() will also need to be fixed * accordingly. From 789c1093f02c436b320d78a739f9610c8271cb73 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 11 Oct 2021 21:21:14 +0800 Subject: [PATCH 104/433] rtc: class: don't call cdev_device_del() when cdev_device_add() failed I got a null-ptr-deref report when doing fault injection test: general protection fault, probably for non-canonical address 0xdffffc0000000022: 0000 [#1] SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000110-0x0000000000000117] RIP: 0010:device_del+0x132/0xdc0 Call Trace: cdev_device_del+0x1a/0x80 devm_rtc_unregister_device+0x37/0x80 release_nodes+0xc3/0x3b0 If cdev_device_add() fails, 'dev->p' is not set, it causes null-ptr-deref when calling cdev_device_del(). Registering character device is optional, we don't return error code here, so introduce a new flag 'RTC_NO_CDEV' to indicate if it has character device, cdev_device_del() is called when this bit is not set. Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211011132114.3663509-1-yangyingliang@huawei.com --- drivers/rtc/class.c | 9 ++++++--- include/linux/rtc.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index f77bc089eb6b..654e921244bf 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -334,7 +334,8 @@ static void devm_rtc_unregister_device(void *data) * letting any rtc_class_open() users access it again */ rtc_proc_del_device(rtc); - cdev_device_del(&rtc->char_dev, &rtc->dev); + if (!test_bit(RTC_NO_CDEV, &rtc->flags)) + cdev_device_del(&rtc->char_dev, &rtc->dev); rtc->ops = NULL; mutex_unlock(&rtc->ops_lock); } @@ -397,12 +398,14 @@ int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc) rtc_dev_prepare(rtc); err = cdev_device_add(&rtc->char_dev, &rtc->dev); - if (err) + if (err) { + set_bit(RTC_NO_CDEV, &rtc->flags); dev_warn(rtc->dev.parent, "failed to add char device %d:%d\n", MAJOR(rtc->dev.devt), rtc->id); - else + } else { dev_dbg(rtc->dev.parent, "char device (%d:%d)\n", MAJOR(rtc->dev.devt), rtc->id); + } rtc_proc_add_device(rtc); diff --git a/include/linux/rtc.h b/include/linux/rtc.h index bd611e26291d..354e0843ab17 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -80,6 +80,7 @@ struct rtc_timer { /* flags */ #define RTC_DEV_BUSY 0 +#define RTC_NO_CDEV 1 struct rtc_device { struct device dev; From 24d23181e43d72ca692a479e70dfe5b0b5dd33f1 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 12 Oct 2021 12:16:29 +0800 Subject: [PATCH 105/433] rtc: class: check return value when calling dev_set_name() I got a null-ptr-deref report when doing fault injection test: BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:strcmp+0xc/0x20 Call Trace: __devm_rtc_register_device.cold.7+0x16a/0x2df rv3029_probe+0x4b1/0x770 [rtc_rv3029c2] rv3029_i2c_probe+0x141/0x180 [rtc_rv3029c2] i2c_device_probe+0xa07/0xbb0 really_probe+0x285/0xc30 If dev_set_name() fails, dev_name() is null, it causes null-ptr-deref, we need check the return value of dev_set_name(). Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211012041629.2504158-1-yangyingliang@huawei.com --- drivers/rtc/class.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 654e921244bf..dbccd71589b9 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -364,7 +364,9 @@ struct rtc_device *devm_rtc_allocate_device(struct device *dev) rtc->id = id; rtc->dev.parent = dev; - dev_set_name(&rtc->dev, "rtc%d", id); + err = dev_set_name(&rtc->dev, "rtc%d", id); + if (err) + return ERR_PTR(err); err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc); if (err) From c3336b8ac6091df60a5c1049a8c685d0b947cc61 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Oct 2021 13:10:28 +0300 Subject: [PATCH 106/433] rtc: rv3032: fix error handling in rv3032_clkout_set_rate() Do not call rv3032_exit_eerd() if the enter function fails but don't forget to call the exit when the enter succeeds. Fixes: 2eeaa532acca ("rtc: rv3032: Add a driver for Microcrystal RV-3032") Signed-off-by: Dan Carpenter Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211012101028.GT2083@kadam --- drivers/rtc/rtc-rv3032.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index d63102d5cb1e..1b62ed2f1459 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -617,11 +617,11 @@ static int rv3032_clkout_set_rate(struct clk_hw *hw, unsigned long rate, ret = rv3032_enter_eerd(rv3032, &eerd); if (ret) - goto exit_eerd; + return ret; ret = regmap_write(rv3032->regmap, RV3032_CLKOUT1, hfd & 0xff); if (ret) - return ret; + goto exit_eerd; ret = regmap_write(rv3032->regmap, RV3032_CLKOUT2, RV3032_CLKOUT2_OS | FIELD_PREP(RV3032_CLKOUT2_HFD_MSK, hfd >> 8)); From 4c8a7b80d5f3c924fbe08b24634fb67a97f96465 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 13 Oct 2021 09:49:54 +0200 Subject: [PATCH 107/433] rtc: pcf85063: add support for fixed clock TQ-Systems' TQMa8Mx module (SoM) uses a pcf85063 as RTC. The default output is 32768Hz. This is to provide the i.MX8M CKIL clock. Once the RTC driver is probed, the clock is disabled and all i.MX8M functionality depending on the 32 KHz clock will halt. In our case the whole system halts and a power cycle is required. Referencing the pcf85063 directly results in a deadlock. The kernel will see, that i.MX8M system clock needs the RTC clock and do probe deferral. But the i.MX8M I2C module never becomes usable without the i.MX8M CKIL clock and thus the RTC's clock will not be probed. So from the kernel's perspective this is a chicken-and-egg problem. Technically everything is fine by not touching anything, since the RTC clock correctly enables the clock on reset (i.e. on battery backup power loss). A workaround for this issue is describing the square wave pin as fixed-clock, which is registered early and basically how this pin is used on the i.MX8M. This addresses the exact same issue as in commit f765e349c3e1 ("rtc: m41t80: add support for fixed clock"). Signed-off-by: Alexander Stein Reported-by: kernel test robot Signed-off-by: Alexandre Belloni [Fixed return value 0 -> NULL] Link: https://lore.kernel.org/r/20211013074954.997445-1-alexander.stein@ew.tq-group.com --- .../devicetree/bindings/rtc/nxp,pcf85063.txt | 9 +++++++++ drivers/rtc/rtc-pcf85063.c | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.txt b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.txt index 627bb533eff7..6439682c9319 100644 --- a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.txt +++ b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.txt @@ -13,10 +13,19 @@ Optional property: expressed in femto Farad (fF). Valid values are 7000 and 12500. Default value (if no value is specified) is 7000fF. +Optional child node: +- clock: Provide this if the square wave pin is used as boot-enabled fixed clock. + Example: pcf85063: rtc@51 { compatible = "nxp,pcf85063"; reg = <0x51>; quartz-load-femtofarads = <12500>; + + clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; }; diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 14da4ab30104..3e59590f9b69 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -479,6 +479,18 @@ static struct clk *pcf85063_clkout_register_clk(struct pcf85063 *pcf85063) struct clk *clk; struct clk_init_data init; struct device_node *node = pcf85063->rtc->dev.parent->of_node; + struct device_node *fixed_clock; + + fixed_clock = of_get_child_by_name(node, "clock"); + if (fixed_clock) { + /* + * skip registering square wave clock when a fixed + * clock has been registered. The fixed clock is + * registered automatically when being referenced. + */ + of_node_put(fixed_clock); + return NULL; + } init.name = "pcf85063-clkout"; init.ops = &pcf85063_clkout_ops; From 9f08c9ed580a287de6546044e28f15bb183d00ff Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Fri, 15 Oct 2021 12:12:08 +0100 Subject: [PATCH 108/433] rtc: pcf85063: Always clear EXT_TEST from set_time Power-on reset after the insertion of a battery does not always complete successfully, leading to corrupted register content. The EXT_TEST bit will stop the clock from running, but currently the driver will never recover. Safely handle the erroneous state by clearing EXT_TEST as part of the usual set_time method. Signed-off-by: Phil Elwell Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211015111208.1757110-1-phil@raspberrypi.com --- drivers/rtc/rtc-pcf85063.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 3e59590f9b69..4a70d6bae859 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -34,6 +34,7 @@ #define PCF85063_REG_CTRL1 0x00 /* status */ #define PCF85063_REG_CTRL1_CAP_SEL BIT(0) #define PCF85063_REG_CTRL1_STOP BIT(5) +#define PCF85063_REG_CTRL1_EXT_TEST BIT(7) #define PCF85063_REG_CTRL2 0x01 #define PCF85063_CTRL2_AF BIT(6) @@ -117,6 +118,7 @@ static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm) * reset state until all time/date registers are written */ rc = regmap_update_bits(pcf85063->regmap, PCF85063_REG_CTRL1, + PCF85063_REG_CTRL1_EXT_TEST | PCF85063_REG_CTRL1_STOP, PCF85063_REG_CTRL1_STOP); if (rc) From 03b47b3ad0a9dc4a33b9e4528f4eac74ee781e63 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 11 Oct 2021 18:37:32 -0700 Subject: [PATCH 109/433] Input: ep93xx_keypad - annotate suspend/resume as __maybe_unused Instead of guarding suspend/resume methods with #ifdef CONFIG_PM let's mark them as __maybe_unused as this allows better compile coverage. Acked-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20211012013735.3523140-1-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/ep93xx_keypad.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c index e0e931e796fa..a0c6cdf8e0d3 100644 --- a/drivers/input/keyboard/ep93xx_keypad.c +++ b/drivers/input/keyboard/ep93xx_keypad.c @@ -175,8 +175,7 @@ static void ep93xx_keypad_close(struct input_dev *pdev) } -#ifdef CONFIG_PM_SLEEP -static int ep93xx_keypad_suspend(struct device *dev) +static int __maybe_unused ep93xx_keypad_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct ep93xx_keypad *keypad = platform_get_drvdata(pdev); @@ -197,7 +196,7 @@ static int ep93xx_keypad_suspend(struct device *dev) return 0; } -static int ep93xx_keypad_resume(struct device *dev) +static int __maybe_unused ep93xx_keypad_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct ep93xx_keypad *keypad = platform_get_drvdata(pdev); @@ -220,7 +219,6 @@ static int ep93xx_keypad_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(ep93xx_keypad_pm_ops, ep93xx_keypad_suspend, ep93xx_keypad_resume); From 4ce73b052bdd64f6edba86d3a4e8608cca78c105 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 11 Oct 2021 18:37:33 -0700 Subject: [PATCH 110/433] Input: ep93xx_keypad - use BIT() and GENMASK() macros Also drop parenthesis around macros that do not use expressions as they are not needed. Acked-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20211012013735.3523140-2-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/ep93xx_keypad.c | 37 +++++++++++++------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c index a0c6cdf8e0d3..6be5474ba2f2 100644 --- a/drivers/input/keyboard/ep93xx_keypad.c +++ b/drivers/input/keyboard/ep93xx_keypad.c @@ -17,6 +17,7 @@ * flag. */ +#include #include #include #include @@ -35,28 +36,28 @@ #define KEY_REG 0x08 /* Key Value Capture register */ /* Key Scan Initialization Register bit defines */ -#define KEY_INIT_DBNC_MASK (0x00ff0000) -#define KEY_INIT_DBNC_SHIFT (16) -#define KEY_INIT_DIS3KY (1<<15) -#define KEY_INIT_DIAG (1<<14) -#define KEY_INIT_BACK (1<<13) -#define KEY_INIT_T2 (1<<12) -#define KEY_INIT_PRSCL_MASK (0x000003ff) -#define KEY_INIT_PRSCL_SHIFT (0) +#define KEY_INIT_DBNC_MASK GENMASK(23, 16) +#define KEY_INIT_DBNC_SHIFT 16 +#define KEY_INIT_DIS3KY BIT(15) +#define KEY_INIT_DIAG BIT(14) +#define KEY_INIT_BACK BIT(13) +#define KEY_INIT_T2 BIT(12) +#define KEY_INIT_PRSCL_MASK GENMASK(9, 0) +#define KEY_INIT_PRSCL_SHIFT 0 /* Key Scan Diagnostic Register bit defines */ -#define KEY_DIAG_MASK (0x0000003f) -#define KEY_DIAG_SHIFT (0) +#define KEY_DIAG_MASK GENMASK(5, 0) +#define KEY_DIAG_SHIFT 0 /* Key Value Capture Register bit defines */ -#define KEY_REG_K (1<<15) -#define KEY_REG_INT (1<<14) -#define KEY_REG_2KEYS (1<<13) -#define KEY_REG_1KEY (1<<12) -#define KEY_REG_KEY2_MASK (0x00000fc0) -#define KEY_REG_KEY2_SHIFT (6) -#define KEY_REG_KEY1_MASK (0x0000003f) -#define KEY_REG_KEY1_SHIFT (0) +#define KEY_REG_K BIT(15) +#define KEY_REG_INT BIT(14) +#define KEY_REG_2KEYS BIT(13) +#define KEY_REG_1KEY BIT(12) +#define KEY_REG_KEY2_MASK GENMASK(11, 6) +#define KEY_REG_KEY2_SHIFT 6 +#define KEY_REG_KEY1_MASK GENMASK(5, 0) +#define KEY_REG_KEY1_SHIFT 0 #define EP93XX_MATRIX_SIZE (EP93XX_MATRIX_ROWS * EP93XX_MATRIX_COLS) From ab317169673dbdddba8b6132c53f6e5ce64726a3 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 11 Oct 2021 18:37:34 -0700 Subject: [PATCH 111/433] Input: ep93xx_keypad - use dev_pm_set_wake_irq() Instead of manually toggling interrupt as wakeup source in suspend/resume methods, let's declare keypad interrupt and wakeup interrupt and leave the rest to the PM core. Acked-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20211012013735.3523140-3-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/ep93xx_keypad.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c index 6be5474ba2f2..a66cfeaf5b21 100644 --- a/drivers/input/keyboard/ep93xx_keypad.c +++ b/drivers/input/keyboard/ep93xx_keypad.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * Keypad Interface Register offsets @@ -191,9 +192,6 @@ static int __maybe_unused ep93xx_keypad_suspend(struct device *dev) mutex_unlock(&input_dev->mutex); - if (device_may_wakeup(&pdev->dev)) - enable_irq_wake(keypad->irq); - return 0; } @@ -203,9 +201,6 @@ static int __maybe_unused ep93xx_keypad_resume(struct device *dev) struct ep93xx_keypad *keypad = platform_get_drvdata(pdev); struct input_dev *input_dev = keypad->input_dev; - if (device_may_wakeup(&pdev->dev)) - disable_irq_wake(keypad->irq); - mutex_lock(&input_dev->mutex); if (input_device_enabled(input_dev)) { @@ -316,7 +311,11 @@ static int ep93xx_keypad_probe(struct platform_device *pdev) goto failed_free_irq; platform_set_drvdata(pdev, keypad); + device_init_wakeup(&pdev->dev, 1); + err = dev_pm_set_wake_irq(&pdev->dev, keypad->irq); + if (err) + dev_warn(&pdev->dev, "failed to set up wakeup irq: %d\n", err); return 0; @@ -342,6 +341,8 @@ static int ep93xx_keypad_remove(struct platform_device *pdev) struct ep93xx_keypad *keypad = platform_get_drvdata(pdev); struct resource *res; + dev_pm_clear_wake_irq(&pdev->dev); + free_irq(keypad->irq, keypad); if (keypad->enabled) From c4be5e5a113d78490c426b1124fd458a9d807933 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 12 Oct 2021 19:36:25 -0700 Subject: [PATCH 112/433] Input: ep93xx_keypad - switch to using managed resources By using managed resources (devm) we are able to streamline error handling in probe and remove most of the custom remove method. Acked-by: Alexander Sverdlin Link: https://lore.kernel.org/r/YWZGKWgdarGtvtYA@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/ep93xx_keypad.c | 118 ++++++++----------------- 1 file changed, 37 insertions(+), 81 deletions(-) diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c index a66cfeaf5b21..272a4f1c6e81 100644 --- a/drivers/input/keyboard/ep93xx_keypad.c +++ b/drivers/input/keyboard/ep93xx_keypad.c @@ -219,6 +219,13 @@ static int __maybe_unused ep93xx_keypad_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ep93xx_keypad_pm_ops, ep93xx_keypad_suspend, ep93xx_keypad_resume); +static void ep93xx_keypad_release_gpio_action(void *_pdev) +{ + struct platform_device *pdev = _pdev; + + ep93xx_keypad_release_gpio(pdev); +} + static int ep93xx_keypad_probe(struct platform_device *pdev) { struct ep93xx_keypad *keypad; @@ -227,61 +234,46 @@ static int ep93xx_keypad_probe(struct platform_device *pdev) struct resource *res; int err; - keypad = kzalloc(sizeof(struct ep93xx_keypad), GFP_KERNEL); + keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad), GFP_KERNEL); if (!keypad) return -ENOMEM; keypad->pdata = dev_get_platdata(&pdev->dev); - if (!keypad->pdata) { - err = -EINVAL; - goto failed_free; - } + if (!keypad->pdata) + return -EINVAL; keymap_data = keypad->pdata->keymap_data; - if (!keymap_data) { - err = -EINVAL; - goto failed_free; - } + if (!keymap_data) + return -EINVAL; keypad->irq = platform_get_irq(pdev, 0); - if (keypad->irq < 0) { - err = keypad->irq; - goto failed_free; - } + if (keypad->irq < 0) + return keypad->irq; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - err = -ENXIO; - goto failed_free; - } + if (!res) + return -ENXIO; - res = request_mem_region(res->start, resource_size(res), pdev->name); - if (!res) { - err = -EBUSY; - goto failed_free; - } - - keypad->mmio_base = ioremap(res->start, resource_size(res)); - if (keypad->mmio_base == NULL) { - err = -ENXIO; - goto failed_free_mem; - } + keypad->mmio_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(keypad->mmio_base)) + return PTR_ERR(keypad->mmio_base); err = ep93xx_keypad_acquire_gpio(pdev); if (err) - goto failed_free_io; + return err; - keypad->clk = clk_get(&pdev->dev, NULL); - if (IS_ERR(keypad->clk)) { - err = PTR_ERR(keypad->clk); - goto failed_free_gpio; - } + err = devm_add_action_or_reset(&pdev->dev, + ep93xx_keypad_release_gpio_action, pdev); + if (err) + return err; - input_dev = input_allocate_device(); - if (!input_dev) { - err = -ENOMEM; - goto failed_put_clk; - } + keypad->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(keypad->clk)) + return PTR_ERR(keypad->clk); + + input_dev = devm_input_allocate_device(&pdev->dev); + if (!input_dev) + return -ENOMEM; keypad->input_dev = input_dev; @@ -289,26 +281,26 @@ static int ep93xx_keypad_probe(struct platform_device *pdev) input_dev->id.bustype = BUS_HOST; input_dev->open = ep93xx_keypad_open; input_dev->close = ep93xx_keypad_close; - input_dev->dev.parent = &pdev->dev; err = matrix_keypad_build_keymap(keymap_data, NULL, EP93XX_MATRIX_ROWS, EP93XX_MATRIX_COLS, keypad->keycodes, input_dev); if (err) - goto failed_free_dev; + return err; if (keypad->pdata->flags & EP93XX_KEYPAD_AUTOREPEAT) __set_bit(EV_REP, input_dev->evbit); input_set_drvdata(input_dev, keypad); - err = request_irq(keypad->irq, ep93xx_keypad_irq_handler, - 0, pdev->name, keypad); + err = devm_request_irq(&pdev->dev, keypad->irq, + ep93xx_keypad_irq_handler, + 0, pdev->name, keypad); if (err) - goto failed_free_dev; + return err; err = input_register_device(input_dev); if (err) - goto failed_free_irq; + return err; platform_set_drvdata(pdev, keypad); @@ -318,48 +310,12 @@ static int ep93xx_keypad_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "failed to set up wakeup irq: %d\n", err); return 0; - -failed_free_irq: - free_irq(keypad->irq, keypad); -failed_free_dev: - input_free_device(input_dev); -failed_put_clk: - clk_put(keypad->clk); -failed_free_gpio: - ep93xx_keypad_release_gpio(pdev); -failed_free_io: - iounmap(keypad->mmio_base); -failed_free_mem: - release_mem_region(res->start, resource_size(res)); -failed_free: - kfree(keypad); - return err; } static int ep93xx_keypad_remove(struct platform_device *pdev) { - struct ep93xx_keypad *keypad = platform_get_drvdata(pdev); - struct resource *res; - dev_pm_clear_wake_irq(&pdev->dev); - free_irq(keypad->irq, keypad); - - if (keypad->enabled) - clk_disable(keypad->clk); - clk_put(keypad->clk); - - input_unregister_device(keypad->input_dev); - - ep93xx_keypad_release_gpio(pdev); - - iounmap(keypad->mmio_base); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - - kfree(keypad); - return 0; } From 804f354ab6ce6668244db6d4c2da8e481ad89fb5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 12 Oct 2021 19:46:37 -0700 Subject: [PATCH 113/433] Input: adxl34x - fix sparse warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following warning from sparse: CC [M] drivers/input/misc/adxl34x.o CHECK drivers/input/misc/adxl34x.c drivers/input/misc/adxl34x.c:245:29: warning: cast to restricted __le16 drivers/input/misc/adxl34x.c:248:29: warning: cast to restricted __le16 drivers/input/misc/adxl34x.c:251:29: warning: cast to restricted __le16 Acked-by: Michael Hennerich Acked-by: Uwe Kleine-König Link: https://lore.kernel.org/r/YWZIjb91d6aAwgss@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/adxl34x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c index 34beac80e6f0..a4af314392a9 100644 --- a/drivers/input/misc/adxl34x.c +++ b/drivers/input/misc/adxl34x.c @@ -237,7 +237,7 @@ static const struct adxl34x_platform_data adxl34x_default_init = { static void adxl34x_get_triple(struct adxl34x *ac, struct axis_triple *axis) { - short buf[3]; + __le16 buf[3]; ac->bops->read_block(ac->dev, DATAX0, DATAZ1 - DATAX0 + 1, buf); From f041a7af12636639d9c87e541c4c261d40f5afa7 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 13 Oct 2021 13:23:04 +0200 Subject: [PATCH 114/433] Input: tm2-touchkey - report scan codes Report the index of pressed touch key as MSC_SCAN code to userspace so it is possible to identify which of the keys was pressed (not just the function that is currently assigned to the key). This is done similarly also in mcs_touchkey and mpr121_touchkey. Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20211013112305.41574-1-stephan@gerhold.net Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/tm2-touchkey.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/keyboard/tm2-touchkey.c b/drivers/input/keyboard/tm2-touchkey.c index 6218b1c682ef..ce40ea2d940f 100644 --- a/drivers/input/keyboard/tm2-touchkey.c +++ b/drivers/input/keyboard/tm2-touchkey.c @@ -156,6 +156,8 @@ static irqreturn_t tm2_touchkey_irq_handler(int irq, void *devid) goto out; } + input_event(touchkey->input_dev, EV_MSC, MSC_SCAN, index); + if (data & TM2_TOUCHKEY_BIT_PRESS_EV) { for (i = 0; i < touchkey->num_keycodes; i++) input_report_key(touchkey->input_dev, @@ -250,6 +252,7 @@ static int tm2_touchkey_probe(struct i2c_client *client, touchkey->input_dev->name = TM2_TOUCHKEY_DEV_NAME; touchkey->input_dev->id.bustype = BUS_I2C; + input_set_capability(touchkey->input_dev, EV_MSC, MSC_SCAN); for (i = 0; i < touchkey->num_keycodes; i++) input_set_capability(touchkey->input_dev, EV_KEY, touchkey->keycodes[i]); From 872e57abd171515bc180f2d44c2c99da29542320 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 13 Oct 2021 13:23:05 +0200 Subject: [PATCH 115/433] Input: tm2-touchkey - allow changing keycodes from userspace At the moment the touch keys have key codes assigned from the device tree. In some cases, users might want to change the key code from userspace. There is existing functionality for this in the input core using the EVIOCSKEYCODE ioctl, which is integrated for example into udev. Make it possible to use this functionality for tm2-touchkey by simply making the input core aware of the array that holds the keycodes. Similar code also exists in mcs_touchkey and mpr121_touchkey. Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20211013112305.41574-2-stephan@gerhold.net Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/tm2-touchkey.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/keyboard/tm2-touchkey.c b/drivers/input/keyboard/tm2-touchkey.c index ce40ea2d940f..632cd6c1c8d4 100644 --- a/drivers/input/keyboard/tm2-touchkey.c +++ b/drivers/input/keyboard/tm2-touchkey.c @@ -252,6 +252,10 @@ static int tm2_touchkey_probe(struct i2c_client *client, touchkey->input_dev->name = TM2_TOUCHKEY_DEV_NAME; touchkey->input_dev->id.bustype = BUS_I2C; + touchkey->input_dev->keycode = touchkey->keycodes; + touchkey->input_dev->keycodemax = touchkey->num_keycodes; + touchkey->input_dev->keycodesize = sizeof(touchkey->keycodes[0]); + input_set_capability(touchkey->input_dev, EV_MSC, MSC_SCAN); for (i = 0; i < touchkey->num_keycodes; i++) input_set_capability(touchkey->input_dev, EV_KEY, From 9271cda2bb41b16063e2c24aae890e814caf82a1 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 9 Sep 2021 21:50:37 -0700 Subject: [PATCH 116/433] Input: ads7846 - set input device bus type and product ID Set input device's bus type as BUS_SPI and use model as product ID. Link: https://lore.kernel.org/r/20210910045039.4020199-1-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index eaa8714ad19d..a018481e9d8b 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1304,6 +1304,9 @@ static int ads7846_probe(struct spi_device *spi) input_dev->name = ts->name; input_dev->phys = ts->phys; + input_dev->id.bustype = BUS_SPI; + input_dev->id.product = pdata->model; + input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); input_set_abs_params(input_dev, ABS_X, From ccd661392abb728fc685e543f6be86fc435e163f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 9 Sep 2021 21:50:38 -0700 Subject: [PATCH 117/433] Input: ads7846 - use input_set_capability() Instead of manipulating capability bits directly use input_set_capability(). Also stop setting EV_ABS explicitly as input_set_abs_params() does it for us. Link: https://lore.kernel.org/r/20210910045039.4020199-2-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index a018481e9d8b..0f973351bc67 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1307,8 +1307,7 @@ static int ads7846_probe(struct spi_device *spi) input_dev->id.bustype = BUS_SPI; input_dev->id.product = pdata->model; - input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); - input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); + input_set_capability(input_dev, EV_KEY, BTN_TOUCH); input_set_abs_params(input_dev, ABS_X, pdata->x_min ? : 0, pdata->x_max ? : MAX_12BIT, From 36fc54375f985cb9fc52b655ed18052147596f8f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 9 Sep 2021 21:50:39 -0700 Subject: [PATCH 118/433] Input: ads7846 - do not attempt IRQ workaround when deferring probe When request_irq() returns -EPORBE_DEFER we should abort probe and try again later instead of trying to engage IRQ trigger workaround. Link: https://lore.kernel.org/r/20210910045039.4020199-3-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 0f973351bc67..a25a77dd9a32 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1361,7 +1361,7 @@ static int ads7846_probe(struct spi_device *spi) err = devm_request_threaded_irq(dev, spi->irq, ads7846_hard_irq, ads7846_irq, irq_flags, dev->driver->name, ts); - if (err && !pdata->irq_flags) { + if (err && err != -EPROBE_DEFER && !pdata->irq_flags) { dev_info(dev, "trying pin change workaround on irq %d\n", spi->irq); irq_flags |= IRQF_TRIGGER_RISING; From a88638c4e69cfcbfebc7523d777d1273f2fce806 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 15 Oct 2021 22:08:50 -0700 Subject: [PATCH 119/433] Input: max8925_onkey - don't mark comment as kernel-doc Change the comment to a normal (non-kernel-doc) comment to avoid these kernel-doc warnings: max8925_onkey.c:2: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * MAX8925 ONKEY driver max8925_onkey.c:2: warning: missing initial short description on line: * MAX8925 ONKEY driver Fixes: 3734574cac100 ("Input: enable onkey driver of max8925") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: https://lore.kernel.org/r/20211002045943.9406-1-rdunlap@infradead.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/max8925_onkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/max8925_onkey.c b/drivers/input/misc/max8925_onkey.c index ffab4a490c75..4770cb55631a 100644 --- a/drivers/input/misc/max8925_onkey.c +++ b/drivers/input/misc/max8925_onkey.c @@ -1,4 +1,4 @@ -/** +/* * MAX8925 ONKEY driver * * Copyright (C) 2009 Marvell International Ltd. From ec45b858c867b2489f4ae93958379f5cf2597163 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 15 Oct 2021 22:25:58 -0700 Subject: [PATCH 120/433] Input: cpcap-pwrbutton - do not set input parent explicitly We are using devm_input_allocate_device() that already sets parent of the input device, there is no need to do that again. Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/YWpiZqrfC9+GQsM4@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/cpcap-pwrbutton.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/misc/cpcap-pwrbutton.c b/drivers/input/misc/cpcap-pwrbutton.c index 372cb44d0635..879790bbf9fe 100644 --- a/drivers/input/misc/cpcap-pwrbutton.c +++ b/drivers/input/misc/cpcap-pwrbutton.c @@ -77,7 +77,6 @@ static int cpcap_power_button_probe(struct platform_device *pdev) button->idev->name = "cpcap-pwrbutton"; button->idev->phys = "cpcap-pwrbutton/input0"; - button->idev->dev.parent = button->dev; input_set_capability(button->idev, EV_KEY, KEY_POWER); err = devm_request_threaded_irq(&pdev->dev, irq, NULL, From dcd6a66a23e97e999e8ccededb6256863d288379 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 16 Oct 2021 21:16:55 -0700 Subject: [PATCH 121/433] Input: max77693-haptic - drop unneeded MODULE_ALIAS The MODULE_DEVICE_TABLE already creates proper alias for platform driver. Having another MODULE_ALIAS causes the alias to be duplicated. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20210916170514.137977-1-krzysztof.kozlowski@canonical.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/max77693-haptic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c index 0d09ffeafeea..4369d3c04d38 100644 --- a/drivers/input/misc/max77693-haptic.c +++ b/drivers/input/misc/max77693-haptic.c @@ -424,5 +424,4 @@ module_platform_driver(max77693_haptic_driver); MODULE_AUTHOR("Jaewon Kim "); MODULE_AUTHOR("Krzysztof Kozlowski "); MODULE_DESCRIPTION("MAXIM 77693/77843 Haptic driver"); -MODULE_ALIAS("platform:max77693-haptic"); MODULE_LICENSE("GPL"); From d46b3f5bc0fc265b7ef013e76d7d43f85e4b1e7c Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sat, 16 Oct 2021 21:27:37 -0700 Subject: [PATCH 122/433] reboot: export symbol 'reboot_mode' Some drivers like Qualcomm pm8941-pwrkey need to access 'reboot_mode' for triggering reboot between cold and warm mode. Export the symbol, so that drivers built as module can still access the symbol. Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210714095850.27185-2-shawn.guo@linaro.org Signed-off-by: Dmitry Torokhov --- kernel/reboot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/reboot.c b/kernel/reboot.c index a6ad5eb2fa73..31bf2611ee12 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -32,6 +32,7 @@ EXPORT_SYMBOL(cad_pid); #define DEFAULT_REBOOT_MODE #endif enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; +EXPORT_SYMBOL_GPL(reboot_mode); enum reboot_mode panic_reboot_mode = REBOOT_UNDEFINED; /* From 9e5afc84ff94815ad10853420dcecdf33e9226f8 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sat, 16 Oct 2021 21:27:59 -0700 Subject: [PATCH 123/433] Input: pm8941-pwrkey - respect reboot_mode for warm reset On some devices, e.g. Sony Xperia M4 Aqua, warm reset is used to reboot device into bootloader and recovery mode. Instead of always doing hard reset, add a check on reboot_mode for possible warm reset. Signed-off-by: Shawn Guo Tested-by: Luca Weiss Link: https://lore.kernel.org/r/20210714095850.27185-3-shawn.guo@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/pm8941-pwrkey.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/misc/pm8941-pwrkey.c b/drivers/input/misc/pm8941-pwrkey.c index 33609603245d..89af52498c96 100644 --- a/drivers/input/misc/pm8941-pwrkey.c +++ b/drivers/input/misc/pm8941-pwrkey.c @@ -29,6 +29,7 @@ #define PON_PS_HOLD_RST_CTL2 0x5b #define PON_PS_HOLD_ENABLE BIT(7) #define PON_PS_HOLD_TYPE_MASK 0x0f +#define PON_PS_HOLD_TYPE_WARM_RESET 1 #define PON_PS_HOLD_TYPE_SHUTDOWN 4 #define PON_PS_HOLD_TYPE_HARD_RESET 7 @@ -99,7 +100,10 @@ static int pm8941_reboot_notify(struct notifier_block *nb, break; case SYS_RESTART: default: - reset_type = PON_PS_HOLD_TYPE_HARD_RESET; + if (reboot_mode == REBOOT_WARM) + reset_type = PON_PS_HOLD_TYPE_WARM_RESET; + else + reset_type = PON_PS_HOLD_TYPE_HARD_RESET; break; } From 235300ed8c6ccf8d3044f4ad25b1e984d5dd20d0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 16 Oct 2021 21:34:20 -0700 Subject: [PATCH 124/433] Input: ili210x - use resolution from ili251x firmware The ili251x firmware protocol permits readout of panel resolution, implement this, but make it possible to override this value using DT bindings. This way, older DTs which contain touchscreen-size-x and touchscreen-size-y properties will behave just like before and new DTs may avoid specifying these for ILI251x. Note that the command format is different on other controllers, so this functionality is isolated to ILI251x. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20210831202506.181927-1-marex@denx.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ili210x.c | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 30576a5f2f04..b3f6b4d51df2 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -35,6 +35,7 @@ struct ili2xxx_chip { unsigned int max_touches; unsigned int resolution; bool has_calibrate_reg; + bool has_firmware_proto; bool has_pressure_reg; }; @@ -268,6 +269,7 @@ static const struct ili2xxx_chip ili251x_chip = { .continue_polling = ili251x_check_continue_polling, .max_touches = 10, .has_calibrate_reg = true, + .has_firmware_proto = true, .has_pressure_reg = true, }; @@ -323,6 +325,54 @@ static irqreturn_t ili210x_irq(int irq, void *irq_data) return IRQ_HANDLED; } +static int ili251x_firmware_update_resolution(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + u16 resx, resy; + u8 rs[10]; + int error; + + /* The firmware update blob might have changed the resolution. */ + error = priv->chip->read_reg(client, REG_PANEL_INFO, &rs, sizeof(rs)); + if (error) + return error; + + resx = le16_to_cpup((__le16 *)rs); + resy = le16_to_cpup((__le16 *)(rs + 2)); + + /* The value reported by the firmware is invalid. */ + if (!resx || resx == 0xffff || !resy || resy == 0xffff) + return -EINVAL; + + input_abs_set_max(priv->input, ABS_X, resx - 1); + input_abs_set_max(priv->input, ABS_Y, resy - 1); + input_abs_set_max(priv->input, ABS_MT_POSITION_X, resx - 1); + input_abs_set_max(priv->input, ABS_MT_POSITION_Y, resy - 1); + + return 0; +} + +static int ili251x_firmware_update_cached_state(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + int error; + + if (!priv->chip->has_firmware_proto) + return 0; + + /* Wait for firmware to boot and stabilize itself. */ + msleep(200); + + /* Firmware does report valid information. */ + error = ili251x_firmware_update_resolution(dev); + if (error) + return error; + + return 0; +} + static ssize_t ili210x_calibrate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -449,6 +499,12 @@ static int ili210x_i2c_probe(struct i2c_client *client, input_set_abs_params(input, ABS_MT_POSITION_Y, 0, max_xy, 0, 0); if (priv->chip->has_pressure_reg) input_set_abs_params(input, ABS_MT_PRESSURE, 0, 0xa, 0, 0); + error = ili251x_firmware_update_cached_state(dev); + if (error) { + dev_err(dev, "Unable to cache firmware information, err: %d\n", + error); + return error; + } touchscreen_parse_properties(input, true, &priv->prop); error = input_mt_init_slots(input, priv->chip->max_touches, From 70a7681db0c9266bd0a3fd6c90a5cfa20ac44995 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 16 Oct 2021 21:39:04 -0700 Subject: [PATCH 125/433] Input: ili210x - export ili251x version details via sysfs The ili251x firmware protocol permits readout of firmware version, protocol version, mcu version and current mode (application, boot loader, forced update). These information are useful when updating the firmware on the il251x, e.g. to avoid updating the same firmware into the device multiple times. The locking is now necessary to avoid races between interrupt handler and the sysfs readouts. Note that the protocol differs considerably between the ili2xxx devices, this patch therefore implements this functionality only for ili251x that I can test. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20210831202506.181927-2-marex@denx.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ili210x.c | 165 +++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index b3f6b4d51df2..7a20e3db186a 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -22,6 +22,12 @@ /* Touchscreen commands */ #define REG_TOUCHDATA 0x10 #define REG_PANEL_INFO 0x20 +#define REG_FIRMWARE_VERSION 0x40 +#define REG_PROTOCOL_VERSION 0x42 +#define REG_KERNEL_VERSION 0x61 +#define REG_GET_MODE 0xc0 +#define REG_GET_MODE_AP 0x5a +#define REG_GET_MODE_BL 0x55 #define REG_CALIBRATE 0xcc struct ili2xxx_chip { @@ -45,6 +51,10 @@ struct ili210x { struct gpio_desc *reset_gpio; struct touchscreen_properties prop; const struct ili2xxx_chip *chip; + u8 version_firmware[8]; + u8 version_kernel[5]; + u8 version_proto[2]; + u8 ic_mode[2]; bool stop; }; @@ -353,6 +363,69 @@ static int ili251x_firmware_update_resolution(struct device *dev) return 0; } +static ssize_t ili251x_firmware_update_firmware_version(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + int error; + u8 fw[8]; + + /* Get firmware version */ + error = priv->chip->read_reg(client, REG_FIRMWARE_VERSION, + &fw, sizeof(fw)); + if (!error) + memcpy(priv->version_firmware, fw, sizeof(fw)); + + return error; +} + +static ssize_t ili251x_firmware_update_kernel_version(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + int error; + u8 kv[5]; + + /* Get kernel version */ + error = priv->chip->read_reg(client, REG_KERNEL_VERSION, + &kv, sizeof(kv)); + if (!error) + memcpy(priv->version_kernel, kv, sizeof(kv)); + + return error; +} + +static ssize_t ili251x_firmware_update_protocol_version(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + int error; + u8 pv[2]; + + /* Get protocol version */ + error = priv->chip->read_reg(client, REG_PROTOCOL_VERSION, + &pv, sizeof(pv)); + if (!error) + memcpy(priv->version_proto, pv, sizeof(pv)); + + return error; +} + +static ssize_t ili251x_firmware_update_ic_mode(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + int error; + u8 md[2]; + + /* Get chip boot mode */ + error = priv->chip->read_reg(client, REG_GET_MODE, &md, sizeof(md)); + if (!error) + memcpy(priv->ic_mode, md, sizeof(md)); + + return error; +} + static int ili251x_firmware_update_cached_state(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -370,9 +443,83 @@ static int ili251x_firmware_update_cached_state(struct device *dev) if (error) return error; + error = ili251x_firmware_update_firmware_version(dev); + if (error) + return error; + + error = ili251x_firmware_update_kernel_version(dev); + if (error) + return error; + + error = ili251x_firmware_update_protocol_version(dev); + if (error) + return error; + + error = ili251x_firmware_update_ic_mode(dev); + if (error) + return error; + return 0; } +static ssize_t ili251x_firmware_version_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + u8 *fw = priv->version_firmware; + + return sysfs_emit(buf, "%02x%02x.%02x%02x.%02x%02x.%02x%02x\n", + fw[0], fw[1], fw[2], fw[3], + fw[4], fw[5], fw[6], fw[7]); +} +static DEVICE_ATTR(firmware_version, 0444, ili251x_firmware_version_show, NULL); + +static ssize_t ili251x_kernel_version_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + u8 *kv = priv->version_kernel; + + return sysfs_emit(buf, "%02x.%02x.%02x.%02x.%02x\n", + kv[0], kv[1], kv[2], kv[3], kv[4]); +} +static DEVICE_ATTR(kernel_version, 0444, ili251x_kernel_version_show, NULL); + +static ssize_t ili251x_protocol_version_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + u8 *pv = priv->version_proto; + + return sysfs_emit(buf, "%02x.%02x\n", pv[0], pv[1]); +} +static DEVICE_ATTR(protocol_version, 0444, ili251x_protocol_version_show, NULL); + +static ssize_t ili251x_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + u8 *md = priv->ic_mode; + char *mode = "AP"; + + if (md[0] == REG_GET_MODE_AP) /* Application Mode */ + mode = "AP"; + else if (md[0] == REG_GET_MODE_BL) /* BootLoader Mode */ + mode = "BL"; + else /* Unknown Mode */ + mode = "??"; + + return sysfs_emit(buf, "%02x.%02x:%s\n", md[0], md[1], mode); +} +static DEVICE_ATTR(mode, 0444, ili251x_mode_show, NULL); + static ssize_t ili210x_calibrate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -401,22 +548,34 @@ static DEVICE_ATTR(calibrate, S_IWUSR, NULL, ili210x_calibrate); static struct attribute *ili210x_attributes[] = { &dev_attr_calibrate.attr, + &dev_attr_firmware_version.attr, + &dev_attr_kernel_version.attr, + &dev_attr_protocol_version.attr, + &dev_attr_mode.attr, NULL, }; -static umode_t ili210x_calibrate_visible(struct kobject *kobj, +static umode_t ili210x_attributes_visible(struct kobject *kobj, struct attribute *attr, int index) { struct device *dev = kobj_to_dev(kobj); struct i2c_client *client = to_i2c_client(dev); struct ili210x *priv = i2c_get_clientdata(client); - return priv->chip->has_calibrate_reg ? attr->mode : 0; + /* Calibrate is present on all ILI2xxx which have calibrate register */ + if (attr == &dev_attr_calibrate.attr) + return priv->chip->has_calibrate_reg ? attr->mode : 0; + + /* Firmware/Kernel/Protocol/BootMode is implememted only for ILI251x */ + if (!priv->chip->has_firmware_proto) + return 0; + + return attr->mode; } static const struct attribute_group ili210x_attr_group = { .attrs = ili210x_attributes, - .is_visible = ili210x_calibrate_visible, + .is_visible = ili210x_attributes_visible, }; static void ili210x_power_down(void *data) From c6ac8f0b4ca927316eb40e1e9ba83df5d29f3793 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 16 Oct 2021 21:39:53 -0700 Subject: [PATCH 126/433] Input: ili210x - add ili251x firmware update support The ili251x firmware can be updated, this is used when switching between different modes of operation of the touch surface, e.g. glove operation. This patch implements the firmware update mechanism triggered by a write into an sysfs attribute. The firmware itself is distributed as an intel hex file with non-standard types. The first two lines are of type 0xad, which indicates the start of DataFlash payload, that is always at address 0xf000 on the ili251x, so it can be dropped, and 0xac which indicates the position of firmware info in the Application payload, that is always at address 0x2020 on the ili251x and we do not care. The rest of the firmware is data of type 0x00, and we care about that. To convert the firmware hex file into something usable by the kernel, remove the first two lines and then use ihex2fw: $ tail -n +3 input.hex > temp.hex $ ./tools/firmware/ihex2fw temp.hex firmware/ilitek/ili251x.bin To trigger the firmware update, place firmware file ilitek/ili251x.bin into /lib/firmware/, write into firmware_update sysfs attribute and wait about 30-40 seconds. The firmware update is slow. Afterward, verify the firmware_version and mode sysfs attributes to check whether the firmware got updated and the controller switched back to application (AP) mode by reading out 'mode' attribute in sysfs. Note that the content of firmware_version, e.g. 0600.0005.abcd.aa04 can be matched to the content of the firmware hex file. The first four bytes, 0x06 0x00 0x00 0x05 can be found at ^:102030 00 05000006, the next four bytes 0xab 0xcd 0xaa 0x04 at ^:10F000 00 nnnnnnnn ABCDAA04. Note that the protocol differs considerably between the ili2xxx devices, this patch therefore implements this functionality only for ili251x that I can test. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20210831202506.181927-3-marex@denx.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/Kconfig | 1 + drivers/input/touchscreen/ili210x.c | 308 ++++++++++++++++++++++++++++ 2 files changed, 309 insertions(+) diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index d4e74738c5a8..2f6adfb7b938 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -425,6 +425,7 @@ config TOUCHSCREEN_HYCON_HY46XX config TOUCHSCREEN_ILI210X tristate "Ilitek ILI210X based touchscreen" depends on I2C + select CRC_CCITT help Say Y here if you have a ILI210X based touchscreen controller. This driver supports models ILI2102, diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 7a20e3db186a..867c13d3cb17 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include +#include #include #include #include @@ -25,11 +27,20 @@ #define REG_FIRMWARE_VERSION 0x40 #define REG_PROTOCOL_VERSION 0x42 #define REG_KERNEL_VERSION 0x61 +#define REG_IC_BUSY 0x80 +#define REG_IC_BUSY_NOT_BUSY 0x50 #define REG_GET_MODE 0xc0 #define REG_GET_MODE_AP 0x5a #define REG_GET_MODE_BL 0x55 +#define REG_SET_MODE_AP 0xc1 +#define REG_SET_MODE_BL 0xc2 +#define REG_WRITE_DATA 0xc3 +#define REG_WRITE_ENABLE 0xc4 +#define REG_READ_DATA_CRC 0xc7 #define REG_CALIBRATE 0xcc +#define ILI251X_FW_FILENAME "ilitek/ili251x.bin" + struct ili2xxx_chip { int (*read_reg)(struct i2c_client *client, u8 reg, void *buf, size_t len); @@ -546,8 +557,305 @@ static ssize_t ili210x_calibrate(struct device *dev, } static DEVICE_ATTR(calibrate, S_IWUSR, NULL, ili210x_calibrate); +static int ili251x_firmware_to_buffer(const struct firmware *fw, + u8 **buf, u16 *ac_end, u16 *df_end) +{ + const struct ihex_binrec *rec; + u32 fw_addr, fw_last_addr = 0; + u16 fw_len; + u8 *fw_buf; + int error; + + /* + * The firmware ihex blob can never be bigger than 64 kiB, so make this + * simple -- allocate a 64 kiB buffer, iterate over the ihex blob records + * once, copy them all into this buffer at the right locations, and then + * do all operations on this linear buffer. + */ + fw_buf = kzalloc(SZ_64K, GFP_KERNEL); + if (!fw_buf) + return -ENOMEM; + + rec = (const struct ihex_binrec *)fw->data; + while (rec) { + fw_addr = be32_to_cpu(rec->addr); + fw_len = be16_to_cpu(rec->len); + + /* The last 32 Byte firmware block can be 0xffe0 */ + if (fw_addr + fw_len > SZ_64K || fw_addr > SZ_64K - 32) { + error = -EFBIG; + goto err_big; + } + + /* Find the last address before DF start address, that is AC end */ + if (fw_addr == 0xf000) + *ac_end = fw_last_addr; + fw_last_addr = fw_addr + fw_len; + + memcpy(fw_buf + fw_addr, rec->data, fw_len); + rec = ihex_next_binrec(rec); + } + + /* DF end address is the last address in the firmware blob */ + *df_end = fw_addr + fw_len; + *buf = fw_buf; + return 0; + +err_big: + kfree(fw_buf); + return error; +} + +/* Switch mode between Application and BootLoader */ +static int ili251x_switch_ic_mode(struct i2c_client *client, u8 cmd_mode) +{ + struct ili210x *priv = i2c_get_clientdata(client); + u8 cmd_wren[3] = { REG_WRITE_ENABLE, 0x5a, 0xa5 }; + u8 md[2]; + int error; + + error = priv->chip->read_reg(client, REG_GET_MODE, md, sizeof(md)); + if (error) + return error; + /* Mode already set */ + if ((cmd_mode == REG_SET_MODE_AP && md[0] == REG_GET_MODE_AP) || + (cmd_mode == REG_SET_MODE_BL && md[0] == REG_GET_MODE_BL)) + return 0; + + /* Unlock writes */ + error = i2c_master_send(client, cmd_wren, sizeof(cmd_wren)); + if (error != sizeof(cmd_wren)) + return -EINVAL; + + mdelay(20); + + /* Select mode (BootLoader or Application) */ + error = i2c_master_send(client, &cmd_mode, 1); + if (error != 1) + return -EINVAL; + + mdelay(200); /* Reboot into bootloader takes a lot of time ... */ + + /* Read back mode */ + error = priv->chip->read_reg(client, REG_GET_MODE, md, sizeof(md)); + if (error) + return error; + /* Check if mode is correct now. */ + if ((cmd_mode == REG_SET_MODE_AP && md[0] == REG_GET_MODE_AP) || + (cmd_mode == REG_SET_MODE_BL && md[0] == REG_GET_MODE_BL)) + return 0; + + return -EINVAL; +} + +static int ili251x_firmware_busy(struct i2c_client *client) +{ + struct ili210x *priv = i2c_get_clientdata(client); + int error, i = 0; + u8 data; + + do { + /* The read_reg already contains suitable delay */ + error = priv->chip->read_reg(client, REG_IC_BUSY, &data, 1); + if (error) + return error; + if (i++ == 100000) + return -ETIMEDOUT; + } while (data != REG_IC_BUSY_NOT_BUSY); + + return 0; +} + +static int ili251x_firmware_write_to_ic(struct device *dev, u8 *fwbuf, + u16 start, u16 end, u8 dataflash) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + u8 cmd_crc = REG_READ_DATA_CRC; + u8 crcrb[4] = { 0 }; + u8 fw_data[33]; + u16 fw_addr; + int error; + + /* + * The DF (dataflash) needs 2 bytes offset for unknown reasons, + * the AC (application) has 2 bytes CRC16-CCITT at the end. + */ + u16 crc = crc_ccitt(0, fwbuf + start + (dataflash ? 2 : 0), + end - start - 2); + + /* Unlock write to either AC (application) or DF (dataflash) area */ + u8 cmd_wr[10] = { + REG_WRITE_ENABLE, 0x5a, 0xa5, dataflash, + (end >> 16) & 0xff, (end >> 8) & 0xff, end & 0xff, + (crc >> 16) & 0xff, (crc >> 8) & 0xff, crc & 0xff + }; + + error = i2c_master_send(client, cmd_wr, sizeof(cmd_wr)); + if (error != sizeof(cmd_wr)) + return -EINVAL; + + error = ili251x_firmware_busy(client); + if (error) + return error; + + for (fw_addr = start; fw_addr < end; fw_addr += 32) { + fw_data[0] = REG_WRITE_DATA; + memcpy(&(fw_data[1]), fwbuf + fw_addr, 32); + error = i2c_master_send(client, fw_data, 33); + if (error != sizeof(fw_data)) + return error; + error = ili251x_firmware_busy(client); + if (error) + return error; + } + + error = i2c_master_send(client, &cmd_crc, 1); + if (error != 1) + return -EINVAL; + + error = ili251x_firmware_busy(client); + if (error) + return error; + + error = priv->chip->read_reg(client, REG_READ_DATA_CRC, + &crcrb, sizeof(crcrb)); + if (error) + return error; + + /* Check CRC readback */ + if ((crcrb[0] != (crc & 0xff)) || crcrb[1] != ((crc >> 8) & 0xff)) + return -EINVAL; + + return 0; +} + +static int ili251x_firmware_reset(struct i2c_client *client) +{ + u8 cmd_reset[2] = { 0xf2, 0x01 }; + int error; + + error = i2c_master_send(client, cmd_reset, sizeof(cmd_reset)); + if (error != sizeof(cmd_reset)) + return -EINVAL; + + return ili251x_firmware_busy(client); +} + +static void ili251x_hardware_reset(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ili210x *priv = i2c_get_clientdata(client); + + /* Reset the controller */ + gpiod_set_value_cansleep(priv->reset_gpio, 1); + usleep_range(10000, 15000); + gpiod_set_value_cansleep(priv->reset_gpio, 0); + msleep(300); +} + +static ssize_t ili210x_firmware_update_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + const char *fwname = ILI251X_FW_FILENAME; + const struct firmware *fw; + u16 ac_end, df_end; + u8 *fwbuf; + int error; + int i; + + error = request_ihex_firmware(&fw, fwname, dev); + if (error) { + dev_err(dev, "Failed to request firmware %s, error=%d\n", + fwname, error); + return error; + } + + error = ili251x_firmware_to_buffer(fw, &fwbuf, &ac_end, &df_end); + release_firmware(fw); + if (error) + return error; + + /* + * Disable touchscreen IRQ, so that we would not get spurious touch + * interrupt during firmware update, and so that the IRQ handler won't + * trigger and interfere with the firmware update. There is no bit in + * the touch controller to disable the IRQs during update, so we have + * to do it this way here. + */ + disable_irq(client->irq); + + dev_dbg(dev, "Firmware update started, firmware=%s\n", fwname); + + ili251x_hardware_reset(dev); + + error = ili251x_firmware_reset(client); + if (error) + goto exit; + + /* This may not succeed on first try, so re-try a few times. */ + for (i = 0; i < 5; i++) { + error = ili251x_switch_ic_mode(client, REG_SET_MODE_BL); + if (!error) + break; + } + + if (error) + goto exit; + + dev_dbg(dev, "IC is now in BootLoader mode\n"); + + msleep(200); /* The bootloader seems to need some time too. */ + + error = ili251x_firmware_write_to_ic(dev, fwbuf, 0xf000, df_end, 1); + if (error) { + dev_err(dev, "DF firmware update failed, error=%d\n", error); + goto exit; + } + + dev_dbg(dev, "DataFlash firmware written\n"); + + error = ili251x_firmware_write_to_ic(dev, fwbuf, 0x2000, ac_end, 0); + if (error) { + dev_err(dev, "AC firmware update failed, error=%d\n", error); + goto exit; + } + + dev_dbg(dev, "Application firmware written\n"); + + /* This may not succeed on first try, so re-try a few times. */ + for (i = 0; i < 5; i++) { + error = ili251x_switch_ic_mode(client, REG_SET_MODE_AP); + if (!error) + break; + } + + if (error) + goto exit; + + dev_dbg(dev, "IC is now in Application mode\n"); + + error = ili251x_firmware_update_cached_state(dev); + if (error) + goto exit; + + error = count; + +exit: + ili251x_hardware_reset(dev); + dev_dbg(dev, "Firmware update ended, error=%i\n", error); + enable_irq(client->irq); + kfree(fwbuf); + return error; +} + +static DEVICE_ATTR(firmware_update, 0200, NULL, ili210x_firmware_update_store); + static struct attribute *ili210x_attributes[] = { &dev_attr_calibrate.attr, + &dev_attr_firmware_update.attr, &dev_attr_firmware_version.attr, &dev_attr_kernel_version.attr, &dev_attr_protocol_version.attr, From 917425f71f36ce6f61841497040e10d0166106d8 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:27 +0200 Subject: [PATCH 127/433] rtc: add alarm related features Add more alarm related features to be declared by drivers. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-2-alexandre.belloni@bootlin.com --- include/uapi/linux/rtc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index f950bff75e97..f4037c541925 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -114,7 +114,9 @@ struct rtc_pll_info { #define RTC_FEATURE_ALARM 0 #define RTC_FEATURE_ALARM_RES_MINUTE 1 #define RTC_FEATURE_NEED_WEEK_DAY 2 -#define RTC_FEATURE_CNT 3 +#define RTC_FEATURE_ALARM_RES_2S 3 +#define RTC_FEATURE_UPDATE_INTERRUPT 4 +#define RTC_FEATURE_CNT 5 #define RTC_MAX_FREQ 8192 From 6a8af1b6568ad9ee08a419fb12c793f7992cf8a4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:28 +0200 Subject: [PATCH 128/433] rtc: add parameter ioctl Add an ioctl allowing to get and set extra parameters for an RTC. For now, only handle getting available features. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-3-alexandre.belloni@bootlin.com --- drivers/rtc/dev.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/rtc.h | 18 ++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c index 5b8ebe86124a..143c097eff0f 100644 --- a/drivers/rtc/dev.c +++ b/drivers/rtc/dev.c @@ -208,6 +208,7 @@ static long rtc_dev_ioctl(struct file *file, const struct rtc_class_ops *ops = rtc->ops; struct rtc_time tm; struct rtc_wkalrm alarm; + struct rtc_param param; void __user *uarg = (void __user *)arg; err = mutex_lock_interruptible(&rtc->ops_lock); @@ -221,6 +222,7 @@ static long rtc_dev_ioctl(struct file *file, switch (cmd) { case RTC_EPOCH_SET: case RTC_SET_TIME: + case RTC_PARAM_SET: if (!capable(CAP_SYS_TIME)) err = -EACCES; break; @@ -382,6 +384,44 @@ static long rtc_dev_ioctl(struct file *file, err = -EFAULT; return err; + case RTC_PARAM_GET: + if (copy_from_user(¶m, uarg, sizeof(param))) { + mutex_unlock(&rtc->ops_lock); + return -EFAULT; + } + + switch(param.param) { + long offset; + case RTC_PARAM_FEATURES: + if (param.index != 0) + err = -EINVAL; + param.uvalue = rtc->features[0]; + break; + + default: + err = -EINVAL; + } + + if (!err) + if (copy_to_user(uarg, ¶m, sizeof(param))) + err = -EFAULT; + + break; + + case RTC_PARAM_SET: + if (copy_from_user(¶m, uarg, sizeof(param))) { + mutex_unlock(&rtc->ops_lock); + return -EFAULT; + } + + switch(param.param) { + case RTC_PARAM_FEATURES: + default: + err = -EINVAL; + } + + break; + default: /* Finally try the driver's ioctl interface */ if (ops->ioctl) { diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index f4037c541925..3241f9ecc639 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -14,6 +14,7 @@ #include #include +#include /* * The struct used to pass data via the following ioctl. Similar to the @@ -66,6 +67,17 @@ struct rtc_pll_info { long pll_clock; /* base PLL frequency */ }; +struct rtc_param { + __u64 param; + union { + __u64 uvalue; + __s64 svalue; + __u64 ptr; + }; + __u32 index; + __u32 __pad; +}; + /* * ioctl calls that are permitted to the /dev/rtc interface, if * any of the RTC drivers are enabled. @@ -95,6 +107,9 @@ struct rtc_pll_info { #define RTC_PLL_GET _IOR('p', 0x11, struct rtc_pll_info) /* Get PLL correction */ #define RTC_PLL_SET _IOW('p', 0x12, struct rtc_pll_info) /* Set PLL correction */ +#define RTC_PARAM_GET _IOW('p', 0x13, struct rtc_param) /* Get parameter */ +#define RTC_PARAM_SET _IOW('p', 0x14, struct rtc_param) /* Set parameter */ + #define RTC_VL_DATA_INVALID _BITUL(0) /* Voltage too low, RTC data is invalid */ #define RTC_VL_BACKUP_LOW _BITUL(1) /* Backup voltage is low */ #define RTC_VL_BACKUP_EMPTY _BITUL(2) /* Backup empty or not present */ @@ -118,6 +133,9 @@ struct rtc_pll_info { #define RTC_FEATURE_UPDATE_INTERRUPT 4 #define RTC_FEATURE_CNT 5 +/* parameter list */ +#define RTC_PARAM_FEATURES 0 + #define RTC_MAX_FREQ 8192 From 2268551935dbf1abcbb4d4fb7b1ad74dbe0d1be0 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:29 +0200 Subject: [PATCH 129/433] rtc: expose correction feature Add a new feature for RTCs able to correct the oscillator imprecision. This is also called offset or trimming. Such drivers have a .set_offset callback, use that to set the feature bit from the core. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-4-alexandre.belloni@bootlin.com --- drivers/rtc/class.c | 3 +++ include/uapi/linux/rtc.h | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index dbccd71589b9..2e0cbc190a8a 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -389,6 +389,9 @@ int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc) if (!rtc->ops->set_alarm) clear_bit(RTC_FEATURE_ALARM, rtc->features); + if (rtc->ops->set_offset) + set_bit(RTC_FEATURE_CORRECTION, rtc->features); + rtc->owner = owner; rtc_device_get_offset(rtc); diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index 3241f9ecc639..c83bb9a4fa4f 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -131,7 +131,8 @@ struct rtc_param { #define RTC_FEATURE_NEED_WEEK_DAY 2 #define RTC_FEATURE_ALARM_RES_2S 3 #define RTC_FEATURE_UPDATE_INTERRUPT 4 -#define RTC_FEATURE_CNT 5 +#define RTC_FEATURE_CORRECTION 5 +#define RTC_FEATURE_CNT 6 /* parameter list */ #define RTC_PARAM_FEATURES 0 From a6d8c6e1a5c6fb982964861dc84c0c7cb0151c7c Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:30 +0200 Subject: [PATCH 130/433] rtc: add correction parameter Add a new parameter allowing the get and set the correction using ioctls instead of just sysfs. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-5-alexandre.belloni@bootlin.com --- drivers/rtc/dev.c | 19 +++++++++++++++++++ include/uapi/linux/rtc.h | 1 + 2 files changed, 20 insertions(+) diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c index 143c097eff0f..abee1fc4705e 100644 --- a/drivers/rtc/dev.c +++ b/drivers/rtc/dev.c @@ -398,6 +398,16 @@ static long rtc_dev_ioctl(struct file *file, param.uvalue = rtc->features[0]; break; + case RTC_PARAM_CORRECTION: + mutex_unlock(&rtc->ops_lock); + if (param.index != 0) + return -EINVAL; + err = rtc_read_offset(rtc, &offset); + mutex_lock(&rtc->ops_lock); + if (err == 0) + param.svalue = offset; + break; + default: err = -EINVAL; } @@ -416,6 +426,15 @@ static long rtc_dev_ioctl(struct file *file, switch(param.param) { case RTC_PARAM_FEATURES: + err = -EINVAL; + break; + + case RTC_PARAM_CORRECTION: + mutex_unlock(&rtc->ops_lock); + if (param.index != 0) + return -EINVAL; + return rtc_set_offset(rtc, param.svalue); + default: err = -EINVAL; } diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index c83bb9a4fa4f..5debe82439c2 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -136,6 +136,7 @@ struct rtc_param { /* parameter list */ #define RTC_PARAM_FEATURES 0 +#define RTC_PARAM_CORRECTION 1 #define RTC_MAX_FREQ 8192 From 0d20e9fb1262b1f9ac895b287db892bc75b05b84 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:31 +0200 Subject: [PATCH 131/433] rtc: add BSM parameter BSM or Backup Switch Mode is a common feature on RTCs, allowing to select how the RTC will decide when to switch from its primary power supply to the backup power supply. It is necessary to be able to set it from userspace as there are uses cases where it has to be done dynamically. Supported values are: RTC_BSM_DISABLED: disabled RTC_BSM_DIRECT: switching will happen as soon as Vbackup > Vdd RTC_BSM_LEVEL: switching will happen around a threshold, usually with an hysteresis RTC_BSM_STANDBY: switching will not happen until Vdd > Vbackup, this is useful to ensure the RTC doesn't draw any power until the device is first powered on. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-6-alexandre.belloni@bootlin.com --- drivers/rtc/dev.c | 10 ++++++++-- include/linux/rtc.h | 2 ++ include/uapi/linux/rtc.h | 9 ++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c index abee1fc4705e..e104972a28fd 100644 --- a/drivers/rtc/dev.c +++ b/drivers/rtc/dev.c @@ -409,7 +409,10 @@ static long rtc_dev_ioctl(struct file *file, break; default: - err = -EINVAL; + if (rtc->ops->param_get) + err = rtc->ops->param_get(rtc->dev.parent, ¶m); + else + err = -EINVAL; } if (!err) @@ -436,7 +439,10 @@ static long rtc_dev_ioctl(struct file *file, return rtc_set_offset(rtc, param.svalue); default: - err = -EINVAL; + if (rtc->ops->param_set) + err = rtc->ops->param_set(rtc->dev.parent, ¶m); + else + err = -EINVAL; } break; diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 354e0843ab17..47fd1c2d3a57 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -66,6 +66,8 @@ struct rtc_class_ops { int (*alarm_irq_enable)(struct device *, unsigned int enabled); int (*read_offset)(struct device *, long *offset); int (*set_offset)(struct device *, long offset); + int (*param_get)(struct device *, struct rtc_param *param); + int (*param_set)(struct device *, struct rtc_param *param); }; struct rtc_device; diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index 5debe82439c2..03e5b776e597 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -132,11 +132,18 @@ struct rtc_param { #define RTC_FEATURE_ALARM_RES_2S 3 #define RTC_FEATURE_UPDATE_INTERRUPT 4 #define RTC_FEATURE_CORRECTION 5 -#define RTC_FEATURE_CNT 6 +#define RTC_FEATURE_BACKUP_SWITCH_MODE 6 +#define RTC_FEATURE_CNT 7 /* parameter list */ #define RTC_PARAM_FEATURES 0 #define RTC_PARAM_CORRECTION 1 +#define RTC_PARAM_BACKUP_SWITCH_MODE 2 + +#define RTC_BSM_DISABLED 0 +#define RTC_BSM_DIRECT 1 +#define RTC_BSM_LEVEL 2 +#define RTC_BSM_STANDBY 3 #define RTC_MAX_FREQ 8192 From 018d959ba7ffcadcc21e007f81c4b2b7a2b47447 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:32 +0200 Subject: [PATCH 132/433] rtc: rv3028: add BSM support Backup Switch Mode controls how the RTC decides when to switch to the backup power supply. As it is disabled by default, provide a way to enable and configure it. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-7-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rv3028.c | 74 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index 12c807306893..cdc623b3e365 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -80,6 +81,10 @@ #define RV3028_BACKUP_TCE BIT(5) #define RV3028_BACKUP_TCR_MASK GENMASK(1,0) +#define RV3028_BACKUP_BSM GENMASK(3,2) + +#define RV3028_BACKUP_BSM_DSM 0x1 +#define RV3028_BACKUP_BSM_LSM 0x3 #define OFFSET_STEP_PPT 953674 @@ -512,6 +517,71 @@ exit_eerd: } +static int rv3028_param_get(struct device *dev, struct rtc_param *param) +{ + struct rv3028_data *rv3028 = dev_get_drvdata(dev); + int ret; + + switch(param->param) { + u32 value; + + case RTC_PARAM_BACKUP_SWITCH_MODE: + ret = regmap_read(rv3028->regmap, RV3028_BACKUP, &value); + if (ret < 0) + return ret; + + value = FIELD_GET(RV3028_BACKUP_BSM, value); + + switch(value) { + case RV3028_BACKUP_BSM_DSM: + param->uvalue = RTC_BSM_DIRECT; + break; + case RV3028_BACKUP_BSM_LSM: + param->uvalue = RTC_BSM_LEVEL; + break; + default: + param->uvalue = RTC_BSM_DISABLED; + } + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int rv3028_param_set(struct device *dev, struct rtc_param *param) +{ + struct rv3028_data *rv3028 = dev_get_drvdata(dev); + + switch(param->param) { + u8 mode; + case RTC_PARAM_BACKUP_SWITCH_MODE: + switch (param->uvalue) { + case RTC_BSM_DISABLED: + mode = 0; + break; + case RTC_BSM_DIRECT: + mode = RV3028_BACKUP_BSM_DSM; + break; + case RTC_BSM_LEVEL: + mode = RV3028_BACKUP_BSM_LSM; + break; + default: + return -EINVAL; + } + + return rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_BSM, + FIELD_PREP(RV3028_BACKUP_BSM, mode)); + + default: + return -EINVAL; + } + + return 0; +} + static int rv3028_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { struct rv3028_data *rv3028 = dev_get_drvdata(dev); @@ -776,6 +846,8 @@ static const struct rtc_class_ops rv3028_rtc_ops = { .read_offset = rv3028_read_offset, .set_offset = rv3028_set_offset, .ioctl = rv3028_ioctl, + .param_get = rv3028_param_get, + .param_set = rv3028_param_set, }; static const struct regmap_config regmap_config = { @@ -878,6 +950,8 @@ static int rv3028_probe(struct i2c_client *client) if (ret) return ret; + set_bit(RTC_FEATURE_BACKUP_SWITCH_MODE, rv3028->rtc->features); + rv3028->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rv3028->rtc->range_max = RTC_TIMESTAMP_END_2099; rv3028->rtc->ops = &rv3028_rtc_ops; From 6084eac38e765c5ee1338f4e9b1ad3321f4c53eb Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:19:33 +0200 Subject: [PATCH 133/433] rtc: rv3032: allow setting BSM Backup Switch Mode is currently set properly when the trickle charger is enabled. However, in the case of a non-rechargeable battery, it is necessary to be able to enable it, only allow that when the trickle charger is disabled. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018151933.76865-8-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rv3032.c | 76 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index 1b62ed2f1459..a3c73179ecb1 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -106,6 +106,7 @@ struct rv3032_data { struct regmap *regmap; struct rtc_device *rtc; + bool trickle_charger_set; #ifdef CONFIG_COMMON_CLK struct clk_hw clkout_hw; #endif @@ -402,6 +403,75 @@ static int rv3032_set_offset(struct device *dev, long offset) FIELD_PREP(RV3032_OFFSET_MSK, offset)); } +static int rv3032_param_get(struct device *dev, struct rtc_param *param) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + int ret; + + switch(param->param) { + u32 value; + + case RTC_PARAM_BACKUP_SWITCH_MODE: + ret = regmap_read(rv3032->regmap, RV3032_PMU, &value); + if (ret < 0) + return ret; + + value = FIELD_GET(RV3032_PMU_BSM, value); + + switch(value) { + case RV3032_PMU_BSM_DSM: + param->uvalue = RTC_BSM_DIRECT; + break; + case RV3032_PMU_BSM_LSM: + param->uvalue = RTC_BSM_LEVEL; + break; + default: + param->uvalue = RTC_BSM_DISABLED; + } + + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int rv3032_param_set(struct device *dev, struct rtc_param *param) +{ + struct rv3032_data *rv3032 = dev_get_drvdata(dev); + + switch(param->param) { + u8 mode; + case RTC_PARAM_BACKUP_SWITCH_MODE: + if (rv3032->trickle_charger_set) + return -EINVAL; + + switch (param->uvalue) { + case RTC_BSM_DISABLED: + mode = 0; + break; + case RTC_BSM_DIRECT: + mode = RV3032_PMU_BSM_DSM; + break; + case RTC_BSM_LEVEL: + mode = RV3032_PMU_BSM_LSM; + break; + default: + return -EINVAL; + } + + return rv3032_update_cfg(rv3032, RV3032_PMU, RV3032_PMU_BSM, + FIELD_PREP(RV3032_PMU_BSM, mode)); + + default: + return -EINVAL; + } + + return 0; +} + static int rv3032_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { struct rv3032_data *rv3032 = dev_get_drvdata(dev); @@ -541,6 +611,8 @@ static int rv3032_trickle_charger_setup(struct device *dev, struct rv3032_data * return 0; } + rv3032->trickle_charger_set = true; + return rv3032_update_cfg(rv3032, RV3032_PMU, RV3032_PMU_TCR | RV3032_PMU_TCM | RV3032_PMU_BSM, val | FIELD_PREP(RV3032_PMU_TCR, i)); @@ -813,6 +885,8 @@ static const struct rtc_class_ops rv3032_rtc_ops = { .read_alarm = rv3032_get_alarm, .set_alarm = rv3032_set_alarm, .alarm_irq_enable = rv3032_alarm_irq_enable, + .param_get = rv3032_param_get, + .param_set = rv3032_param_set, }; static const struct regmap_config regmap_config = { @@ -883,6 +957,8 @@ static int rv3032_probe(struct i2c_client *client) rv3032_trickle_charger_setup(&client->dev, rv3032); + set_bit(RTC_FEATURE_BACKUP_SWITCH_MODE, rv3032->rtc->features); + rv3032->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rv3032->rtc->range_max = RTC_TIMESTAMP_END_2099; rv3032->rtc->ops = &rv3032_rtc_ops; From 7d7234a4fff395ab17b021ec18f852e8ec8703b3 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 15 Oct 2021 21:24:00 +0200 Subject: [PATCH 134/433] rtc: pcf8523: avoid reading BLF in pcf8523_rtc_read_time BLF, battery low doesn't mean the time is imprecise or invalid, it simply mean the backup battery has to be replaced. This information can be read using the VL_READ ioctl. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211015192400.818254-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf8523.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 8b6fb20774bf..09110b3ae25f 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -24,6 +24,7 @@ #define PCF8523_CONTROL3_PM_DSM BIT(5) /* direct switching mode */ #define PCF8523_CONTROL3_PM_MASK 0xe0 #define PCF8523_CONTROL3_BLF BIT(2) /* battery low bit, read-only */ +#define PCF8523_CONTROL3_BSF BIT(3) #define PCF8523_REG_SECONDS 0x03 #define PCF8523_SECONDS_OS BIT(7) @@ -94,18 +95,6 @@ static int pcf8523_write(struct i2c_client *client, u8 reg, u8 value) return 0; } -static int pcf8523_voltage_low(struct i2c_client *client) -{ - u8 value; - int err; - - err = pcf8523_read(client, PCF8523_REG_CONTROL3, &value); - if (err < 0) - return err; - - return !!(value & PCF8523_CONTROL3_BLF); -} - static int pcf8523_load_capacitance(struct i2c_client *client) { u32 load; @@ -220,14 +209,6 @@ static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm) struct i2c_msg msgs[2]; int err; - err = pcf8523_voltage_low(client); - if (err < 0) { - return err; - } else if (err > 0) { - dev_err(dev, "low voltage detected, time is unreliable\n"); - return -EINVAL; - } - msgs[0].addr = client->addr; msgs[0].flags = 0; msgs[0].len = 1; @@ -412,10 +393,11 @@ static int pcf8523_rtc_ioctl(struct device *dev, unsigned int cmd, switch (cmd) { case RTC_VL_READ: - ret = pcf8523_voltage_low(client); + ret = pcf8523_read(client, PCF8523_REG_CONTROL3, &value); if (ret < 0) return ret; - if (ret) + + if (value & PCF8523_CONTROL3_BLF) flags |= RTC_VL_BACKUP_LOW; ret = pcf8523_read(client, PCF8523_REG_SECONDS, &value); From adb17a053e460f20740d713c4843d6966e66b1b1 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:23:36 +0200 Subject: [PATCH 135/433] rtc: expose RTC_FEATURE_UPDATE_INTERRUPT Set RTC_FEATURE_UPDATE_INTERRUPT by default and clear it when it is not supported. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018152337.78732-1-alexandre.belloni@bootlin.com --- drivers/rtc/class.c | 4 ++++ drivers/rtc/interface.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 2e0cbc190a8a..4b460c61f1d8 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -232,6 +232,7 @@ static struct rtc_device *rtc_allocate_device(void) rtc->pie_enabled = 0; set_bit(RTC_FEATURE_ALARM, rtc->features); + set_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features); return rtc; } @@ -389,6 +390,9 @@ int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc) if (!rtc->ops->set_alarm) clear_bit(RTC_FEATURE_ALARM, rtc->features); + if (rtc->uie_unsupported) + clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features); + if (rtc->ops->set_offset) set_bit(RTC_FEATURE_CORRECTION, rtc->features); diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 9a2bd4947007..d005623e6eb3 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -561,7 +561,8 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled) if (rtc->uie_rtctimer.enabled == enabled) goto out; - if (rtc->uie_unsupported || !test_bit(RTC_FEATURE_ALARM, rtc->features)) { + if (!test_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features) || + !test_bit(RTC_FEATURE_ALARM, rtc->features)) { mutex_unlock(&rtc->ops_lock); #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL return rtc_dev_update_irq_enable_emul(rtc, enabled); From 91f3849d956d58073ef55e01f2e8871dc30847a5 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:36:46 +0200 Subject: [PATCH 136/433] rtc: pcf8523: switch to regmap Use regmap to access the RTC registers, this is a huge reduction in code lines and generated code. Values on ARMv7: text data bss dec hex 5180 132 0 5312 14c0 before 3900 132 0 4032 fc0 after Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018153651.82069-1-alexandre.belloni@bootlin.com --- drivers/rtc/Kconfig | 1 + drivers/rtc/rtc-pcf8523.c | 307 +++++++++++--------------------------- 2 files changed, 87 insertions(+), 221 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 59dc1410a160..8dc92b4e042f 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -441,6 +441,7 @@ config RTC_DRV_X1205 config RTC_DRV_PCF8523 tristate "NXP PCF8523" + select REGMAP_I2C help If you say yes here you get support for the NXP PCF8523 RTC chips. diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 09110b3ae25f..c5d31c525997 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -49,115 +50,45 @@ struct pcf8523 { struct rtc_device *rtc; - struct i2c_client *client; + struct regmap *regmap; }; -static int pcf8523_read(struct i2c_client *client, u8 reg, u8 *valuep) +static int pcf8523_load_capacitance(struct pcf8523 *pcf8523, struct device_node *node) { - struct i2c_msg msgs[2]; - u8 value = 0; - int err; - - msgs[0].addr = client->addr; - msgs[0].flags = 0; - msgs[0].len = sizeof(reg); - msgs[0].buf = ® - - msgs[1].addr = client->addr; - msgs[1].flags = I2C_M_RD; - msgs[1].len = sizeof(value); - msgs[1].buf = &value; - - err = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); - if (err < 0) - return err; - - *valuep = value; - - return 0; -} - -static int pcf8523_write(struct i2c_client *client, u8 reg, u8 value) -{ - u8 buffer[2] = { reg, value }; - struct i2c_msg msg; - int err; - - msg.addr = client->addr; - msg.flags = 0; - msg.len = sizeof(buffer); - msg.buf = buffer; - - err = i2c_transfer(client->adapter, &msg, 1); - if (err < 0) - return err; - - return 0; -} - -static int pcf8523_load_capacitance(struct i2c_client *client) -{ - u32 load; - u8 value; - int err; - - err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value); - if (err < 0) - return err; + u32 load, value = 0; load = 12500; - of_property_read_u32(client->dev.of_node, "quartz-load-femtofarads", - &load); + of_property_read_u32(node, "quartz-load-femtofarads", &load); switch (load) { default: - dev_warn(&client->dev, "Unknown quartz-load-femtofarads value: %d. Assuming 12500", + dev_warn(&pcf8523->rtc->dev, "Unknown quartz-load-femtofarads value: %d. Assuming 12500", load); fallthrough; case 12500: value |= PCF8523_CONTROL1_CAP_SEL; break; case 7000: - value &= ~PCF8523_CONTROL1_CAP_SEL; break; } - err = pcf8523_write(client, PCF8523_REG_CONTROL1, value); - - return err; -} - -static int pcf8523_set_pm(struct i2c_client *client, u8 pm) -{ - u8 value; - int err; - - err = pcf8523_read(client, PCF8523_REG_CONTROL3, &value); - if (err < 0) - return err; - - value = (value & ~PCF8523_CONTROL3_PM_MASK) | pm; - - err = pcf8523_write(client, PCF8523_REG_CONTROL3, value); - if (err < 0) - return err; - - return 0; + return regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1, + PCF8523_CONTROL1_CAP_SEL, value); } static irqreturn_t pcf8523_irq(int irq, void *dev_id) { - struct pcf8523 *pcf8523 = i2c_get_clientdata(dev_id); - u8 value; + struct pcf8523 *pcf8523 = dev_id; + u32 value; int err; - err = pcf8523_read(pcf8523->client, PCF8523_REG_CONTROL2, &value); + err = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL2, &value); if (err < 0) return IRQ_HANDLED; if (value & PCF8523_CONTROL2_AF) { value &= ~PCF8523_CONTROL2_AF; - pcf8523_write(pcf8523->client, PCF8523_REG_CONTROL2, value); + regmap_write(pcf8523->regmap, PCF8523_REG_CONTROL2, value); rtc_update_irq(pcf8523->rtc, 1, RTC_IRQF | RTC_AF); return IRQ_HANDLED; @@ -166,60 +97,14 @@ static irqreturn_t pcf8523_irq(int irq, void *dev_id) return IRQ_NONE; } -static int pcf8523_stop_rtc(struct i2c_client *client) -{ - u8 value; - int err; - - err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value); - if (err < 0) - return err; - - value |= PCF8523_CONTROL1_STOP; - - err = pcf8523_write(client, PCF8523_REG_CONTROL1, value); - if (err < 0) - return err; - - return 0; -} - -static int pcf8523_start_rtc(struct i2c_client *client) -{ - u8 value; - int err; - - err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value); - if (err < 0) - return err; - - value &= ~PCF8523_CONTROL1_STOP; - - err = pcf8523_write(client, PCF8523_REG_CONTROL1, value); - if (err < 0) - return err; - - return 0; -} - static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm) { - struct i2c_client *client = to_i2c_client(dev); - u8 start = PCF8523_REG_SECONDS, regs[7]; - struct i2c_msg msgs[2]; + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); + u8 regs[7]; int err; - msgs[0].addr = client->addr; - msgs[0].flags = 0; - msgs[0].len = 1; - msgs[0].buf = &start; - - msgs[1].addr = client->addr; - msgs[1].flags = I2C_M_RD; - msgs[1].len = sizeof(regs); - msgs[1].buf = regs; - - err = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_SECONDS, regs, + sizeof(regs)); if (err < 0) return err; @@ -239,63 +124,50 @@ static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm) static int pcf8523_rtc_set_time(struct device *dev, struct rtc_time *tm) { - struct i2c_client *client = to_i2c_client(dev); - struct i2c_msg msg; - u8 regs[8]; + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); + u8 regs[7]; int err; - err = pcf8523_stop_rtc(client); + err = regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1, + PCF8523_CONTROL1_STOP, PCF8523_CONTROL1_STOP); if (err < 0) return err; - regs[0] = PCF8523_REG_SECONDS; /* This will purposely overwrite PCF8523_SECONDS_OS */ - regs[1] = bin2bcd(tm->tm_sec); - regs[2] = bin2bcd(tm->tm_min); - regs[3] = bin2bcd(tm->tm_hour); - regs[4] = bin2bcd(tm->tm_mday); - regs[5] = tm->tm_wday; - regs[6] = bin2bcd(tm->tm_mon + 1); - regs[7] = bin2bcd(tm->tm_year - 100); + regs[0] = bin2bcd(tm->tm_sec); + regs[1] = bin2bcd(tm->tm_min); + regs[2] = bin2bcd(tm->tm_hour); + regs[3] = bin2bcd(tm->tm_mday); + regs[4] = tm->tm_wday; + regs[5] = bin2bcd(tm->tm_mon + 1); + regs[6] = bin2bcd(tm->tm_year - 100); - msg.addr = client->addr; - msg.flags = 0; - msg.len = sizeof(regs); - msg.buf = regs; - - err = i2c_transfer(client->adapter, &msg, 1); + err = regmap_bulk_write(pcf8523->regmap, PCF8523_REG_SECONDS, regs, + sizeof(regs)); if (err < 0) { /* * If the time cannot be set, restart the RTC anyway. Note * that errors are ignored if the RTC cannot be started so * that we have a chance to propagate the original error. */ - pcf8523_start_rtc(client); + regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1, + PCF8523_CONTROL1_STOP, 0); return err; } - return pcf8523_start_rtc(client); + return regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1, + PCF8523_CONTROL1_STOP, 0); } static int pcf8523_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm) { - struct i2c_client *client = to_i2c_client(dev); - u8 start = PCF8523_REG_MINUTE_ALARM, regs[4]; - struct i2c_msg msgs[2]; - u8 value; + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); + u8 regs[4]; + u32 value; int err; - msgs[0].addr = client->addr; - msgs[0].flags = 0; - msgs[0].len = 1; - msgs[0].buf = &start; - - msgs[1].addr = client->addr; - msgs[1].flags = I2C_M_RD; - msgs[1].len = sizeof(regs); - msgs[1].buf = regs; - - err = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_MINUTE_ALARM, regs, + sizeof(regs)); if (err < 0) return err; @@ -305,12 +177,12 @@ static int pcf8523_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm) tm->time.tm_mday = bcd2bin(regs[2] & 0x3F); tm->time.tm_wday = bcd2bin(regs[3] & 0x7); - err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value); + err = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL1, &value); if (err < 0) return err; tm->enabled = !!(value & PCF8523_CONTROL1_AIE); - err = pcf8523_read(client, PCF8523_REG_CONTROL2, &value); + err = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL2, &value); if (err < 0) return err; tm->pending = !!(value & PCF8523_CONTROL2_AF); @@ -320,30 +192,16 @@ static int pcf8523_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm) static int pcf8523_irq_enable(struct device *dev, unsigned int enabled) { - struct i2c_client *client = to_i2c_client(dev); - u8 value; - int err; + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); - err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value); - if (err < 0) - return err; - - value &= PCF8523_CONTROL1_AIE; - - if (enabled) - value |= PCF8523_CONTROL1_AIE; - - err = pcf8523_write(client, PCF8523_REG_CONTROL1, value); - if (err < 0) - return err; - - return 0; + return regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1, + PCF8523_CONTROL1_AIE, enabled ? + PCF8523_CONTROL1_AIE : 0); } static int pcf8523_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm) { - struct i2c_client *client = to_i2c_client(dev); - struct i2c_msg msg; + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); u8 regs[5]; int err; @@ -351,7 +209,7 @@ static int pcf8523_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm) if (err) return err; - err = pcf8523_write(client, PCF8523_REG_CONTROL2, 0); + err = regmap_write(pcf8523->regmap, PCF8523_REG_CONTROL2, 0); if (err < 0) return err; @@ -363,16 +221,13 @@ static int pcf8523_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm) rtc_time64_to_tm(alarm_time, &tm->time); } - regs[0] = PCF8523_REG_MINUTE_ALARM; - regs[1] = bin2bcd(tm->time.tm_min); - regs[2] = bin2bcd(tm->time.tm_hour); - regs[3] = bin2bcd(tm->time.tm_mday); - regs[4] = ALARM_DIS; - msg.addr = client->addr; - msg.flags = 0; - msg.len = sizeof(regs); - msg.buf = regs; - err = i2c_transfer(client->adapter, &msg, 1); + regs[0] = bin2bcd(tm->time.tm_min); + regs[1] = bin2bcd(tm->time.tm_hour); + regs[2] = bin2bcd(tm->time.tm_mday); + regs[3] = ALARM_DIS; + + err = regmap_bulk_write(pcf8523->regmap, PCF8523_REG_MINUTE_ALARM, regs, + sizeof(regs)); if (err < 0) return err; @@ -386,21 +241,21 @@ static int pcf8523_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm) static int pcf8523_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { - struct i2c_client *client = to_i2c_client(dev); + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); unsigned int flags = 0; - u8 value; + u32 value; int ret; switch (cmd) { case RTC_VL_READ: - ret = pcf8523_read(client, PCF8523_REG_CONTROL3, &value); + ret = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL3, &value); if (ret < 0) return ret; if (value & PCF8523_CONTROL3_BLF) flags |= RTC_VL_BACKUP_LOW; - ret = pcf8523_read(client, PCF8523_REG_SECONDS, &value); + ret = regmap_read(pcf8523->regmap, PCF8523_REG_SECONDS, &value); if (ret < 0) return ret; @@ -419,12 +274,12 @@ static int pcf8523_rtc_ioctl(struct device *dev, unsigned int cmd, static int pcf8523_rtc_read_offset(struct device *dev, long *offset) { - struct i2c_client *client = to_i2c_client(dev); + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); int err; - u8 value; + u32 value; s8 val; - err = pcf8523_read(client, PCF8523_REG_OFFSET, &value); + err = regmap_read(pcf8523->regmap, PCF8523_REG_OFFSET, &value); if (err < 0) return err; @@ -437,9 +292,9 @@ static int pcf8523_rtc_read_offset(struct device *dev, long *offset) static int pcf8523_rtc_set_offset(struct device *dev, long offset) { - struct i2c_client *client = to_i2c_client(dev); + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); long reg_m0, reg_m1; - u8 value; + u32 value; reg_m0 = clamp(DIV_ROUND_CLOSEST(offset, 4340), -64L, 63L); reg_m1 = clamp(DIV_ROUND_CLOSEST(offset, 4069), -64L, 63L); @@ -449,7 +304,7 @@ static int pcf8523_rtc_set_offset(struct device *dev, long offset) else value = (reg_m1 & 0x7f) | PCF8523_OFFSET_MODE; - return pcf8523_write(client, PCF8523_REG_OFFSET, value); + return regmap_write(pcf8523->regmap, PCF8523_REG_OFFSET, value); } static const struct rtc_class_ops pcf8523_rtc_ops = { @@ -463,6 +318,12 @@ static const struct rtc_class_ops pcf8523_rtc_ops = { .set_offset = pcf8523_rtc_set_offset, }; +static const struct regmap_config regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x13, +}; + static int pcf8523_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -478,37 +339,41 @@ static int pcf8523_probe(struct i2c_client *client, if (!pcf8523) return -ENOMEM; + pcf8523->regmap = devm_regmap_init_i2c(client, ®map_config); + if (IS_ERR(pcf8523->regmap)) + return PTR_ERR(pcf8523->regmap); + i2c_set_clientdata(client, pcf8523); - pcf8523->client = client; - - err = pcf8523_load_capacitance(client); - if (err < 0) - dev_warn(&client->dev, "failed to set xtal load capacitance: %d", - err); - - err = pcf8523_set_pm(client, 0); - if (err < 0) - return err; rtc = devm_rtc_allocate_device(&client->dev); if (IS_ERR(rtc)) return PTR_ERR(rtc); - pcf8523->rtc = rtc; + + err = pcf8523_load_capacitance(pcf8523, client->dev.of_node); + if (err < 0) + dev_warn(&client->dev, "failed to set xtal load capacitance: %d", + err); + + err = regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL3, + PCF8523_CONTROL3_PM_MASK, 0); + if (err < 0) + return err; + rtc->ops = &pcf8523_rtc_ops; rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rtc->range_max = RTC_TIMESTAMP_END_2099; rtc->uie_unsupported = 1; if (client->irq > 0) { - err = pcf8523_write(client, PCF8523_TMR_CLKOUT_CTRL, 0x38); + err = regmap_write(pcf8523->regmap, PCF8523_TMR_CLKOUT_CTRL, 0x38); if (err < 0) return err; err = devm_request_threaded_irq(&client->dev, client->irq, NULL, pcf8523_irq, IRQF_SHARED | IRQF_ONESHOT | IRQF_TRIGGER_LOW, - dev_name(&rtc->dev), client); + dev_name(&rtc->dev), pcf8523); if (err) return err; From 5537752c53497fca0469488d1788ceba1b75d5a7 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:36:47 +0200 Subject: [PATCH 137/433] rtc: pcf8523: always compile pcf8523_rtc_ioctl Compiling out pcf8523_rtc_ioctl saves about 5% of the generated machine code. However, it certainly never happens as the RTC character device interface is the most useful one and is probably always compiled in. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018153651.82069-2-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf8523.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index c5d31c525997..103557f48409 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -237,7 +237,6 @@ static int pcf8523_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm) return 0; } -#ifdef CONFIG_RTC_INTF_DEV static int pcf8523_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { @@ -268,9 +267,6 @@ static int pcf8523_rtc_ioctl(struct device *dev, unsigned int cmd, return -ENOIOCTLCMD; } } -#else -#define pcf8523_rtc_ioctl NULL -#endif static int pcf8523_rtc_read_offset(struct device *dev, long *offset) { From 7c176119aefd64a099d06ba33b4730f643b3a890 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:36:48 +0200 Subject: [PATCH 138/433] rtc: pcf8523: remove unecessary ifdefery If CONFIG_OF is not defined, of_property_read_bool will return false which is our default value Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018153651.82069-3-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf8523.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 103557f48409..79a571db803b 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -376,9 +376,7 @@ static int pcf8523_probe(struct i2c_client *client, dev_pm_set_wake_irq(&client->dev, client->irq); } -#ifdef CONFIG_OF wakeup_source = of_property_read_bool(client->dev.of_node, "wakeup-source"); -#endif if (client->irq > 0 || wakeup_source) device_init_wakeup(&client->dev, true); From ebf48cbe32e9e2b74e9d5d8f7daf3422a3ec27ea Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:36:49 +0200 Subject: [PATCH 139/433] rtc: pcf8523: allow usage on ACPI platforms Always provide an OF table to ensure ACPI platforms can also use this driver. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018153651.82069-4-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf8523.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 79a571db803b..181c3425d6c7 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -389,19 +389,17 @@ static const struct i2c_device_id pcf8523_id[] = { }; MODULE_DEVICE_TABLE(i2c, pcf8523_id); -#ifdef CONFIG_OF static const struct of_device_id pcf8523_of_match[] = { { .compatible = "nxp,pcf8523" }, { .compatible = "microcrystal,rv8523" }, { } }; MODULE_DEVICE_TABLE(of, pcf8523_of_match); -#endif static struct i2c_driver pcf8523_driver = { .driver = { .name = "rtc-pcf8523", - .of_match_table = of_match_ptr(pcf8523_of_match), + .of_match_table = pcf8523_of_match, }, .probe = pcf8523_probe, .id_table = pcf8523_id, From f8d4e4fa51ec817edfee49c173521a1102f7f7a6 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 18 Oct 2021 17:36:50 +0200 Subject: [PATCH 140/433] rtc: pcf8523: add BSM support Backup Switch Mode allows to select the strategy to use to switch from the main power supply to the backup power supply. As before, the driver will switch from standby mode to level mode but now only when it has never been set. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211018153651.82069-5-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf8523.c | 103 +++++++++++++++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 181c3425d6c7..c93acade7205 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -20,10 +21,8 @@ #define PCF8523_CONTROL2_AF BIT(3) #define PCF8523_REG_CONTROL3 0x02 -#define PCF8523_CONTROL3_PM_BLD BIT(7) /* battery low detection disabled */ -#define PCF8523_CONTROL3_PM_VDD BIT(6) /* switch-over disabled */ -#define PCF8523_CONTROL3_PM_DSM BIT(5) /* direct switching mode */ -#define PCF8523_CONTROL3_PM_MASK 0xe0 +#define PCF8523_CONTROL3_PM GENMASK(7,5) +#define PCF8523_PM_STANDBY 0x7 #define PCF8523_CONTROL3_BLF BIT(2) /* battery low bit, read-only */ #define PCF8523_CONTROL3_BSF BIT(3) @@ -237,6 +236,83 @@ static int pcf8523_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm) return 0; } +static int pcf8523_param_get(struct device *dev, struct rtc_param *param) +{ + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); + int ret; + + switch(param->param) { + u32 value; + + case RTC_PARAM_BACKUP_SWITCH_MODE: + ret = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL3, &value); + if (ret < 0) + return ret; + + value = FIELD_GET(PCF8523_CONTROL3_PM, value); + + switch(value) { + case 0x0: + case 0x4: + param->uvalue = RTC_BSM_LEVEL; + break; + case 0x1: + case 0x5: + param->uvalue = RTC_BSM_DIRECT; + break; + case PCF8523_PM_STANDBY: + param->uvalue = RTC_BSM_STANDBY; + break; + default: + param->uvalue = RTC_BSM_DISABLED; + } + + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int pcf8523_param_set(struct device *dev, struct rtc_param *param) +{ + struct pcf8523 *pcf8523 = dev_get_drvdata(dev); + + switch(param->param) { + u8 mode; + case RTC_PARAM_BACKUP_SWITCH_MODE: + switch (param->uvalue) { + case RTC_BSM_DISABLED: + mode = 0x2; + break; + case RTC_BSM_DIRECT: + mode = 0x1; + break; + case RTC_BSM_LEVEL: + mode = 0x0; + break; + case RTC_BSM_STANDBY: + mode = PCF8523_PM_STANDBY; + break; + default: + return -EINVAL; + } + + return regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL3, + PCF8523_CONTROL3_PM, + FIELD_PREP(PCF8523_CONTROL3_PM, mode)); + + break; + + default: + return -EINVAL; + } + + return 0; +} + static int pcf8523_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { @@ -312,6 +388,8 @@ static const struct rtc_class_ops pcf8523_rtc_ops = { .ioctl = pcf8523_rtc_ioctl, .read_offset = pcf8523_rtc_read_offset, .set_offset = pcf8523_rtc_set_offset, + .param_get = pcf8523_param_get, + .param_set = pcf8523_param_set, }; static const struct regmap_config regmap_config = { @@ -326,6 +404,7 @@ static int pcf8523_probe(struct i2c_client *client, struct pcf8523 *pcf8523; struct rtc_device *rtc; bool wakeup_source = false; + u32 value; int err; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) @@ -351,11 +430,23 @@ static int pcf8523_probe(struct i2c_client *client, dev_warn(&client->dev, "failed to set xtal load capacitance: %d", err); - err = regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL3, - PCF8523_CONTROL3_PM_MASK, 0); + err = regmap_read(pcf8523->regmap, PCF8523_REG_SECONDS, &value); if (err < 0) return err; + if (value & PCF8523_SECONDS_OS) { + err = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL3, &value); + if (err < 0) + return err; + + if (FIELD_GET(PCF8523_CONTROL3_PM, value) == PCF8523_PM_STANDBY) { + err = regmap_write(pcf8523->regmap, PCF8523_REG_CONTROL3, + value & ~PCF8523_CONTROL3_PM); + if (err < 0) + return err; + } + } + rtc->ops = &pcf8523_rtc_ops; rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rtc->range_max = RTC_TIMESTAMP_END_2099; From 291cd656da04163f4bba67953c1f2f823e0d1231 Mon Sep 17 00:00:00 2001 From: Changcheng Deng Date: Tue, 19 Oct 2021 04:14:22 +0000 Subject: [PATCH 141/433] NFSD:fix boolreturn.cocci warning ./fs/nfsd/nfssvc.c: 1072: 8-9: :WARNING return of 0/1 in function 'nfssvc_decode_voidarg' with return type bool Return statements in functions returning bool should use true/false instead of 1/0. Reported-by: Zeal Robot Signed-off-by: Changcheng Deng Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 362e819ff06a..80431921e5d7 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1069,7 +1069,7 @@ out_encode_err: */ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } /** From 6ab80d88f82e84e331e79ca4b7e2ca2fe63c8c2f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:47 -0500 Subject: [PATCH 142/433] exit/doublefault: Remove apparently bogus comment about rewind_stack_do_exit I do not see panic calling rewind_stack_do_exit anywhere, nor can I find anywhere in the history where doublefault_shim has called rewind_stack_do_exit. So I don't think this comment was ever actually correct. Cc: Andy Lutomirski Fixes: 7d8d8cfdee9a ("x86/doublefault/32: Rewrite the x86_32 #DF handler and unify with 64-bit") Link: https://lkml.kernel.org/r/20211020174406.17889-1-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/x86/kernel/doublefault_32.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index d1d49e3d536b..3b58d8703094 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -77,9 +77,6 @@ asmlinkage noinstr void __noreturn doublefault_shim(void) * some way to reconstruct CR3. We could make a credible guess based * on cpu_tlbstate, but that would be racy and would not account for * PTI. - * - * Instead, don't bother. We can return through - * rewind_stack_do_exit() instead. */ panic("cannot return from double fault\n"); } From 9fd5a04d8efcbf511286dd36c46fd70a645b167d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:48 -0500 Subject: [PATCH 143/433] exit: Remove calls of do_exit after noreturn versions of die On nds32, openrisc, s390, sh, and xtensa the function die never returns. Mark die __noreturn so that no one expects die to return. Remove the do_exit calls after die as they will never be reached. Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: openrisc@lists.librecores.org Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: linux-s390@vger.kernel.org Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: Chris Zankel Cc: Max Filippov Fixes: 2.3.16 Fixes: 2.3.99-pre8 Fixes: 3f65ce4d141e ("[PATCH] xtensa: Architecture support for Tensilica Xtensa Part 5") Fixes: 664eec400bf8 ("nds32: MMU fault handling and page table management") Fixes: 61e85e367535 ("OpenRISC: Memory management") Link: https://lkml.kernel.org/r/20211020174406.17889-2-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/nds32/kernel/traps.c | 2 +- arch/nds32/mm/fault.c | 6 +----- arch/openrisc/kernel/traps.c | 2 +- arch/openrisc/mm/fault.c | 4 +--- arch/s390/include/asm/kdebug.h | 2 +- arch/s390/kernel/dumpstack.c | 2 +- arch/s390/mm/fault.c | 2 -- arch/sh/kernel/traps.c | 2 +- arch/sh/mm/fault.c | 2 -- arch/xtensa/kernel/traps.c | 2 +- arch/xtensa/mm/fault.c | 3 +-- 11 files changed, 9 insertions(+), 20 deletions(-) diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index f06421c645af..ca75d475eda4 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -118,7 +118,7 @@ DEFINE_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. */ -void die(const char *str, struct pt_regs *regs, int err) +void __noreturn die(const char *str, struct pt_regs *regs, int err) { struct task_struct *tsk = current; static int die_counter; diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index f02524eb6d56..1d139b117168 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -13,7 +13,7 @@ #include -extern void die(const char *str, struct pt_regs *regs, long err); +extern void __noreturn die(const char *str, struct pt_regs *regs, long err); /* * This is useful to dump out the page tables associated with @@ -299,10 +299,6 @@ no_context: show_pte(mm, addr); die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); - - return; /* * We ran out of memory, or some other thing happened to us that made diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index aa1e709405ac..0898cb159fac 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -197,7 +197,7 @@ void nommu_dump_state(struct pt_regs *regs, } /* This is normally the 'Oops' routine */ -void die(const char *str, struct pt_regs *regs, long err) +void __noreturn die(const char *str, struct pt_regs *regs, long err) { console_verbose(); diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index c730d1a51686..f0fa6394a58e 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -32,7 +32,7 @@ unsigned long pte_errors; /* updated by do_page_fault() */ */ volatile pgd_t *current_pgd[NR_CPUS]; -extern void die(char *, struct pt_regs *, long); +extern void __noreturn die(char *, struct pt_regs *, long); /* * This routine handles page faults. It determines the address, @@ -248,8 +248,6 @@ no_context: die("Oops", regs, write_acc); - do_exit(SIGKILL); - /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h index d5327f064799..4377238e4752 100644 --- a/arch/s390/include/asm/kdebug.h +++ b/arch/s390/include/asm/kdebug.h @@ -23,6 +23,6 @@ enum die_val { DIE_NMI_IPI, }; -extern void die(struct pt_regs *, const char *); +extern void __noreturn die(struct pt_regs *, const char *); #endif diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index db1bc00229ca..f45e66b8bed6 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -192,7 +192,7 @@ void show_regs(struct pt_regs *regs) static DEFINE_SPINLOCK(die_lock); -void die(struct pt_regs *regs, const char *str) +void __noreturn die(struct pt_regs *regs, const char *str) { static int die_counter; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 212632d57db9..d30f5986fa85 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -260,7 +260,6 @@ static noinline void do_no_context(struct pt_regs *regs) " in virtual user address space\n"); dump_fault_info(regs); die(regs, "Oops"); - do_exit(SIGKILL); } static noinline void do_low_address(struct pt_regs *regs) @@ -270,7 +269,6 @@ static noinline void do_low_address(struct pt_regs *regs) if (regs->psw.mask & PSW_MASK_PSTATE) { /* Low-address protection hit in user mode 'cannot happen'. */ die (regs, "Low-address protection"); - do_exit(SIGKILL); } do_no_context(regs); diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index e76b22157099..cbe3201d4f21 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -20,7 +20,7 @@ static DEFINE_SPINLOCK(die_lock); -void die(const char *str, struct pt_regs *regs, long err) +void __noreturn die(const char *str, struct pt_regs *regs, long err) { static int die_counter; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 88a1f453d73e..1e1aa75df3ca 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -238,8 +238,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, address); die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); } static void diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 874b6efc6fb3..fb056a191339 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -527,7 +527,7 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) DEFINE_SPINLOCK(die_lock); -void die(const char * str, struct pt_regs * regs, long err) +void __noreturn die(const char * str, struct pt_regs * regs, long err) { static int die_counter; const char *pr = ""; diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 95a74890c7e9..fd6a70635962 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -238,7 +238,7 @@ bad_page_fault: void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { - extern void die(const char*, struct pt_regs*, long); + extern void __noreturn die(const char*, struct pt_regs*, long); const struct exception_table_entry *entry; /* Are we prepared to handle this kernel fault? */ @@ -257,5 +257,4 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) "address %08lx\n pc = %08lx, ra = %08lx\n", address, regs->pc, regs->areg[0]); die("Oops", regs, sig); - do_exit(sig); } From a52f60fa2905b4abb26235d0a11cff13ced92709 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:49 -0500 Subject: [PATCH 144/433] reboot: Remove the unreachable panic after do_exit in reboot(2) Link: https://lkml.kernel.org/r/20211020174406.17889-3-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/reboot.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index f7440c0c7e43..d6e0f9fb7f04 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -359,7 +359,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, case LINUX_REBOOT_CMD_HALT: kernel_halt(); do_exit(0); - panic("cannot halt"); case LINUX_REBOOT_CMD_POWER_OFF: kernel_power_off(); From 97cae848270731e4224681368f2061c94a9fc588 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:50 -0500 Subject: [PATCH 145/433] signal/sparc32: Remove unreachable do_exit in do_sparc_fault The call to do_exit in do_sparc_fault immediately follows a call to unhandled_fault. The function unhandled_fault never returns. This means the call to do_exit can never be reached. Cc: David Miller Cc: sparclinux@vger.kernel.org Fixes: 2.3.41 Link: https://lkml.kernel.org/r/20211020174406.17889-4-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sparc/mm/fault_32.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index fa858626b85b..90dc4ae315c8 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -248,7 +248,6 @@ no_context: } unhandled_fault(address, tsk, regs); - do_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made From 133a48abf6ecc535d7eddc6da1c3e4c972445882 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Oct 2021 15:37:42 -0400 Subject: [PATCH 146/433] NFS: Fix up commit deadlocks If O_DIRECT bumps the commit_info rpcs_out field, then that could lead to fsync() hangs. The fix is to ensure that O_DIRECT calls nfs_commit_end(). Fixes: 723c921e7dfc ("sched/wait, fs/nfs: Convert wait_on_atomic_t() usage to the new wait_var_event() API") Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- fs/nfs/pnfs_nfs.c | 2 -- fs/nfs/write.c | 9 ++++++--- include/linux/nfs_fs.h | 1 + 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2e894fec036b..3c0335c15a73 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -620,7 +620,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_unlock_and_release_request(req); } - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) + if (nfs_commit_end(cinfo.mds)) nfs_direct_write_complete(dreq); } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 02bd6e83961d..316f68f96e57 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -468,7 +468,6 @@ pnfs_bucket_alloc_ds_commits(struct list_head *list, goto out_error; data->ds_commit_index = i; list_add_tail(&data->list, list); - atomic_inc(&cinfo->mds->rpcs_out); nreq++; } mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); @@ -520,7 +519,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, data->ds_commit_index = -1; list_splice_init(mds_pages, &data->pages); list_add_tail(&data->list, &list); - atomic_inc(&cinfo->mds->rpcs_out); nreq++; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 38f181e1343a..465220f47142 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1673,10 +1673,13 @@ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) atomic_inc(&cinfo->rpcs_out); } -static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) +bool nfs_commit_end(struct nfs_mds_commit_info *cinfo) { - if (atomic_dec_and_test(&cinfo->rpcs_out)) + if (atomic_dec_and_test(&cinfo->rpcs_out)) { wake_up_var(&cinfo->rpcs_out); + return true; + } + return false; } void nfs_commitdata_release(struct nfs_commit_data *data) @@ -1776,6 +1779,7 @@ void nfs_init_commit(struct nfs_commit_data *data, data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); + nfs_commit_begin(cinfo->mds); } EXPORT_SYMBOL_GPL(nfs_init_commit); @@ -1822,7 +1826,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); - atomic_inc(&cinfo->mds->rpcs_out); if (NFS_SERVER(inode)->nfs_client->cl_minorversion) task_flags = RPC_TASK_MOVEABLE; return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ca547cc5458c..e58b78da7a98 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -568,6 +568,7 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); +bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline int nfs_have_writebacks(struct inode *inode) From f0caea8882a7412a2ad4d8274f0280cdf849c9e2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Oct 2021 14:05:02 -0400 Subject: [PATCH 147/433] NFS: Fix an Oops in pnfs_mark_request_commit() Olga reports seeing the following Oops when doing O_DIRECT writes to a pNFS flexfiles server: Oops: 0000 [#1] SMP PTI CPU: 1 PID: 234186 Comm: kworker/u8:1 Not tainted 5.15.0-rc4+ #4 Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.13.0-2.module+el8.3.0+7353+9de0a3cc 04/01/2014 Workqueue: nfsiod rpc_async_release [sunrpc] RIP: 0010:nfs_mark_request_commit+0x12/0x30 [nfs] Code: ff ff be 03 00 00 00 e8 ac 34 83 eb e9 29 ff ff ff e8 22 bc d7 eb 66 90 0f 1f 44 00 00 48 85 f6 74 16 48 8b 42 10 48 8b 40 18 <48> 8b 40 18 48 85 c0 74 05 e9 70 fc 15 ec 48 89 d6 e9 68 ed ff ff RSP: 0018:ffffa82f0159fe00 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8f3393141880 RCX: 0000000000000000 RDX: ffffa82f0159fe08 RSI: ffff8f3381252500 RDI: ffff8f3393141880 RBP: ffff8f33ac317c00 R08: 0000000000000000 R09: ffff8f3487724cb0 R10: 0000000000000008 R11: 0000000000000001 R12: 0000000000000001 R13: ffff8f3485bccee0 R14: ffff8f33ac317c10 R15: ffff8f33ac317cd8 FS: 0000000000000000(0000) GS:ffff8f34fbc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 0000000122120006 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: nfs_direct_write_completion+0x13b/0x250 [nfs] rpc_free_task+0x39/0x60 [sunrpc] rpc_async_release+0x29/0x40 [sunrpc] process_one_work+0x1ce/0x370 worker_thread+0x30/0x380 ? process_one_work+0x370/0x370 kthread+0x11a/0x140 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x22/0x30 Reported-by: Olga Kornievskaia Fixes: 9c455a8c1e14 ("NFS/pNFS: Clean up pNFS commit operations") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d810ae674f4e..a0f6ff094b3a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -517,7 +517,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, { struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - if (!lseg || !fl_cinfo->ops->mark_request_commit) + if (!lseg || !fl_cinfo->ops || !fl_cinfo->ops->mark_request_commit) return false; fl_cinfo->ops->mark_request_commit(req, lseg, cinfo, ds_commit_idx); return true; From 6e176d47160cec8bcaa28d9aa06926d72d54237c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 10 Oct 2021 10:58:12 +0200 Subject: [PATCH 148/433] NFSv4: Fixes for nfs4_inode_return_delegation() We mustn't call nfs_wb_all() on anything other than a regular file. Furthermore, we can exit early when we don't hold a delegation. Reported-by: David Wysochanski Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 11118398f495..7c9eb679dbdb 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -755,11 +755,13 @@ int nfs4_inode_return_delegation(struct inode *inode) struct nfs_delegation *delegation; delegation = nfs_start_delegation_return(nfsi); - /* Synchronous recall of any application leases */ - break_lease(inode, O_WRONLY | O_RDWR); - nfs_wb_all(inode); - if (delegation != NULL) + if (delegation != NULL) { + /* Synchronous recall of any application leases */ + break_lease(inode, O_WRONLY | O_RDWR); + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); return nfs_end_delegation_return(inode, delegation, 1); + } return 0; } From 0ebeebcf59601bcfa0284f4bb7abdec051eb856d Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Sun, 10 Oct 2021 18:23:13 -0400 Subject: [PATCH 149/433] NFS: Fix WARN_ON due to unionization of nfs_inode.nrequests Fixes the following WARN_ON WARNING: CPU: 2 PID: 18678 at fs/nfs/inode.c:123 nfs_clear_inode+0x3b/0x50 [nfs] ... Call Trace: nfs4_evict_inode+0x57/0x70 [nfsv4] evict+0xd1/0x180 dispose_list+0x48/0x60 evict_inodes+0x156/0x190 generic_shutdown_super+0x37/0x110 nfs_kill_super+0x1d/0x40 [nfs] deactivate_locked_super+0x36/0xa0 Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index e58b78da7a98..457b866a2d9e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -573,7 +573,9 @@ bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline int nfs_have_writebacks(struct inode *inode) { - return atomic_long_read(&NFS_I(inode)->nrequests) != 0; + if (S_ISREG(inode->i_mode)) + return atomic_long_read(&NFS_I(inode)->nrequests) != 0; + return 0; } /* From e591b298d7ecb851e200f65946e3d53fe78a3c4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Sep 2021 17:41:41 -0400 Subject: [PATCH 150/433] NFS: Save some space in the inode Save some space in the nfs_inode by setting up an anonymous union with the fields that are peculiar to a specific type of filesystem object. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 26 ++++++++++++++++++-------- include/linux/nfs_fs.h | 42 ++++++++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a10572f278e6..b81b2d2f47ad 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -431,6 +431,22 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) return inode; } +static void nfs_inode_init_regular(struct nfs_inode *nfsi) +{ + atomic_long_set(&nfsi->nrequests, 0); + INIT_LIST_HEAD(&nfsi->commit_info.list); + atomic_long_set(&nfsi->commit_info.ncommit, 0); + atomic_set(&nfsi->commit_info.rpcs_out, 0); + mutex_init(&nfsi->commit_mutex); +} + +static void nfs_inode_init_dir(struct nfs_inode *nfsi) +{ + nfsi->cache_change_attribute = 0; + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + init_rwsem(&nfsi->rmdir_sem); +} + /* * This is our front-end to iget that looks up inodes by file handle * instead of inode number. @@ -485,10 +501,12 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st if (S_ISREG(inode->i_mode)) { inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; inode->i_data.a_ops = &nfs_file_aops; + nfs_inode_init_regular(nfsi); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; inode->i_data.a_ops = &nfs_dir_aops; + nfs_inode_init_dir(nfsi); /* Deal with crossing mountpoints */ if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { @@ -514,7 +532,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_uid = make_kuid(&init_user_ns, -2); inode->i_gid = make_kgid(&init_user_ns, -2); inode->i_blocks = 0; - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); nfsi->write_io = 0; nfsi->read_io = 0; @@ -2259,14 +2276,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_LIST_HEAD(&nfsi->commit_info.list); - atomic_long_set(&nfsi->nrequests, 0); - atomic_long_set(&nfsi->commit_info.ncommit, 0); - atomic_set(&nfsi->commit_info.rpcs_out, 0); - init_rwsem(&nfsi->rmdir_sem); - mutex_init(&nfsi->commit_mutex); nfs4_init_once(nfsi); - nfsi->cache_change_attribute = 0; } static int __init nfs_init_inodecache(void) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 457b866a2d9e..739ca1ef934f 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -155,33 +155,39 @@ struct nfs_inode { unsigned long attrtimeo_timestamp; unsigned long attr_gencount; - /* "Generation counter" for the attribute cache. This is - * bumped whenever we update the metadata on the - * server. - */ - unsigned long cache_change_attribute; struct rb_root access_cache; struct list_head access_cache_entry_lru; struct list_head access_cache_inode_lru; - /* - * This is the cookie verifier used for NFSv3 readdir - * operations - */ - __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; - - atomic_long_t nrequests; - struct nfs_mds_commit_info commit_info; + union { + /* Directory */ + struct { + /* "Generation counter" for the attribute cache. + * This is bumped whenever we update the metadata + * on the server. + */ + unsigned long cache_change_attribute; + /* + * This is the cookie verifier used for NFSv3 readdir + * operations + */ + __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; + /* Readers: in-flight sillydelete RPC calls */ + /* Writers: rmdir */ + struct rw_semaphore rmdir_sem; + }; + /* Regular file */ + struct { + atomic_long_t nrequests; + struct nfs_mds_commit_info commit_info; + struct mutex commit_mutex; + }; + }; /* Open contexts for shared mmap writes */ struct list_head open_files; - /* Readers: in-flight sillydelete RPC calls */ - /* Writers: rmdir */ - struct rw_semaphore rmdir_sem; - struct mutex commit_mutex; - #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ From d5f458a979650e5ed37212f6134e4ee2b28cb6ed Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 14 Oct 2021 18:02:30 +0200 Subject: [PATCH 151/433] Fix user namespace leak Fixes: 61ca2c4afd9d ("NFS: Only reference user namespace from nfs4idmap struct instead of cred") Signed-off-by: Alexey Gladkov Signed-off-by: Trond Myklebust --- fs/nfs/nfs4idmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 8d8aba305ecc..f331866dd418 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -487,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp) err_destroy_pipe: rpc_destroy_pipe_data(idmap->idmap_pipe); err: - get_user_ns(idmap->user_ns); + put_user_ns(idmap->user_ns); kfree(idmap); return error; } From 21037b8c2258ec40de3b31be9ced43ceb3b784f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 5 Oct 2021 10:17:59 -0400 Subject: [PATCH 152/433] xprtrdma: Provide a buffer to pad Write chunks of unaligned length This is a buffer to be left persistently registered while a connection is up. Connection tear-down will automatically DMA-unmap, invalidate, and dereg the MR. A persistently registered buffer is lower in cost to provide, and it can never be coalesced into the RDMA segment that carries the data payload. An RPC that provisions a Write chunk with a non-aligned length now uses this MR rather than the tail buffer of the RPC's rq_rcv_buf. Reviewed-By: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 13 +++++++++--- net/sunrpc/xprtrdma/frwr_ops.c | 35 +++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 23 +++++++++++++--------- net/sunrpc/xprtrdma/verbs.c | 1 + net/sunrpc/xprtrdma/xprt_rdma.h | 5 +++++ 5 files changed, 65 insertions(+), 12 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index de4195499592..afb2e394797c 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -375,10 +375,16 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class, TP_fast_assign( const struct rpcrdma_req *req = mr->mr_req; - const struct rpc_task *task = req->rl_slot.rq_task; - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; + if (req) { + const struct rpc_task *task = req->rl_slot.rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + } else { + __entry->task_id = 0; + __entry->client_id = -1; + } __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; @@ -639,6 +645,7 @@ TRACE_EVENT(xprtrdma_nomrs_err, DEFINE_RDCH_EVENT(read); DEFINE_WRCH_EVENT(write); DEFINE_WRCH_EVENT(reply); +DEFINE_WRCH_EVENT(wp); TRACE_DEFINE_ENUM(rpcrdma_noch); TRACE_DEFINE_ENUM(rpcrdma_noch_pullup); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index f700b34a5bfd..3eccf365fcb8 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -666,3 +666,38 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ rpcrdma_force_disconnect(ep); } + +/** + * frwr_wp_create - Create an MR for padding Write chunks + * @r_xprt: transport resources to use + * + * Return 0 on success, negative errno on failure. + */ +int frwr_wp_create(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct rpcrdma_mr_seg seg; + struct rpcrdma_mr *mr; + + mr = rpcrdma_mr_get(r_xprt); + if (!mr) + return -EAGAIN; + mr->mr_req = NULL; + ep->re_write_pad_mr = mr; + + seg.mr_len = XDR_UNIT; + seg.mr_page = virt_to_page(ep->re_write_pad); + seg.mr_offset = offset_in_page(ep->re_write_pad); + if (IS_ERR(frwr_map(r_xprt, &seg, 1, true, xdr_zero, mr))) + return -EIO; + trace_xprtrdma_mr_fastreg(mr); + + mr->mr_cqe.done = frwr_wc_fastreg; + mr->mr_regwr.wr.next = NULL; + mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe; + mr->mr_regwr.wr.num_sge = 0; + mr->mr_regwr.wr.opcode = IB_WR_REG_MR; + mr->mr_regwr.wr.send_flags = 0; + + return ib_post_send(ep->re_id->qp, &mr->mr_regwr.wr, NULL); +} diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c335c1361564..8035a983c8ce 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -255,15 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, page_base = 0; } - if (type == rpcrdma_readch) - goto out; - - /* When encoding a Write chunk, some servers need to see an - * extra segment for non-XDR-aligned Write chunks. The upper - * layer provides space in the tail iovec that may be used - * for this purpose. - */ - if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup) + if (type == rpcrdma_readch || type == rpcrdma_writech) goto out; if (xdrbuf->tail[0].iov_len) @@ -405,6 +397,7 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, enum rpcrdma_chunktype wtype) { struct xdr_stream *xdr = &req->rl_stream; + struct rpcrdma_ep *ep = r_xprt->rx_ep; struct rpcrdma_mr_seg *seg; struct rpcrdma_mr *mr; int nsegs, nchunks; @@ -443,6 +436,18 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, nsegs -= mr->mr_nents; } while (nsegs); + if (xdr_pad_size(rqst->rq_rcv_buf.page_len)) { + if (encode_rdma_segment(xdr, ep->re_write_pad_mr) < 0) + return -EMSGSIZE; + + trace_xprtrdma_chunk_wp(rqst->rq_task, ep->re_write_pad_mr, + nsegs); + r_xprt->rx_stats.write_chunk_count++; + r_xprt->rx_stats.total_rdma_request += mr->mr_length; + nchunks++; + nsegs -= mr->mr_nents; + } + /* Update count of segments in this Write chunk */ *segcount = cpu_to_be32(nchunks); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index aaec3c9be8db..c3784b7b6855 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -551,6 +551,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) goto out; } rpcrdma_mrs_create(r_xprt); + frwr_wp_create(r_xprt); out: trace_xprtrdma_connect(r_xprt, rc); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index d91f54eae00b..b6d8b3e6356c 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -68,12 +68,14 @@ /* * RDMA Endpoint -- connection endpoint details */ +struct rpcrdma_mr; struct rpcrdma_ep { struct kref re_kref; struct rdma_cm_id *re_id; struct ib_pd *re_pd; unsigned int re_max_rdma_segs; unsigned int re_max_fr_depth; + struct rpcrdma_mr *re_write_pad_mr; bool re_implicit_roundup; enum ib_mr_type re_mrtype; struct completion re_done; @@ -97,6 +99,8 @@ struct rpcrdma_ep { unsigned int re_inline_recv; /* negotiated */ atomic_t re_completion_ids; + + char re_write_pad[XDR_UNIT]; }; /* Pre-allocate extra Work Requests for handling reverse-direction @@ -535,6 +539,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); +int frwr_wp_create(struct rpcrdma_xprt *r_xprt); /* * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c From 7a3d524c4cf520aa4501b66eac4a7d2339b018e6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 5 Oct 2021 10:18:06 -0400 Subject: [PATCH 153/433] xprtrdma: Remove rpcrdma_ep::re_implicit_roundup Clean up: this field is no longer used. xprt_rdma_pad_optimize is also no longer used, but is left in place because it is part of the kernel/userspace API. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 2 -- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 2 files changed, 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c3784b7b6855..3d3673ba9e1e 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -205,14 +205,12 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, unsigned int rsize, wsize; /* Default settings for RPC-over-RDMA Version One */ - ep->re_implicit_roundup = xprt_rdma_pad_optimize; rsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE; if (pmsg && pmsg->cp_magic == rpcrdma_cmp_magic && pmsg->cp_version == RPCRDMA_CMP_VERSION) { - ep->re_implicit_roundup = true; rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index b6d8b3e6356c..c79f92eeda76 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -76,7 +76,6 @@ struct rpcrdma_ep { unsigned int re_max_rdma_segs; unsigned int re_max_fr_depth; struct rpcrdma_mr *re_write_pad_mr; - bool re_implicit_roundup; enum ib_mr_type re_mrtype; struct completion re_done; unsigned int re_send_count; From b4776a341ec05e809d21e98db5ed49dbdc81d5d8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Oct 2021 18:02:24 -0400 Subject: [PATCH 154/433] SUNRPC: Tracepoints should display tk_pid and cl_clid as a fixed-size field For certain special cases, RPC-related tracepoints record a -1 as the task ID or the client ID. It's ugly for a trace event to display 4 billion in these cases. To help keep SUNRPC tracepoints consistent, create a macro that defines the print format specifiers for tk_pid and cl_clid. At some point in the future we might try tk_pid with a wider range of values than 0..64K so this makes it easier to make that change. RPC tracepoints now look like this: <...>-1276 [009] 149.720358: rpc_clnt_new: client=00000005 peer=[192.168.2.55]:20049 program=nfs server=klimt.ib <...>-1342 [004] 149.921234: rpc_xdr_recvfrom: task:0000001a@00000005 head=[0xff1242d9ab6dc01c,144] page=0 tail=[(nil),0] len=144 <...>-1342 [004] 149.921235: xprt_release_cong: task:0000001a@00000005 snd_task:ffffffff cong=256 cwnd=16384 <...>-1342 [004] 149.921235: xprt_put_cong: task:0000001a@00000005 snd_task:ffffffff cong=0 cwnd=16384 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4trace.h | 9 ++-- fs/nfs/nfstrace.h | 6 ++- include/trace/events/rpcgss.h | 18 +++++--- include/trace/events/rpcrdma.h | 42 +++++++++-------- include/trace/events/sunrpc.h | 74 ++++++++++++++++++------------ include/trace/events/sunrpc_base.h | 18 ++++++++ 6 files changed, 108 insertions(+), 59 deletions(-) create mode 100644 include/trace/events/sunrpc_base.h diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 7a2567aa2b86..d4f061046c09 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -9,6 +9,7 @@ #define _TRACE_NFS4_H #include +#include TRACE_DEFINE_ENUM(EPERM); TRACE_DEFINE_ENUM(ENOENT); @@ -694,8 +695,8 @@ TRACE_EVENT(nfs4_xdr_bad_operation, __entry->expected = expected; ), - TP_printk( - "task:%u@%d xid=0x%08x operation=%u, expected=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x operation=%u, expected=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->op, __entry->expected ) @@ -729,8 +730,8 @@ DECLARE_EVENT_CLASS(nfs4_xdr_event, __entry->error = error; ), - TP_printk( - "task:%u@%d xid=0x%08x error=%ld (%s) operation=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x error=%ld (%s) operation=%u", __entry->task_id, __entry->client_id, __entry->xid, -__entry->error, show_nfsv4_errors(__entry->error), __entry->op diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 44fd016a8e65..82b51120450f 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,6 +11,8 @@ #include #include +#include + #define nfs_show_file_type(ftype) \ __print_symbolic(ftype, \ { DT_UNKNOWN, "UNKNOWN" }, \ @@ -1409,8 +1411,8 @@ DECLARE_EVENT_CLASS(nfs_xdr_event, __assign_str(procedure, task->tk_msg.rpc_proc->p_name); ), - TP_printk( - "task:%u@%d xid=0x%08x %sv%d %s error=%ld (%s)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x %sv%d %s error=%ld (%s)", __entry->task_id, __entry->client_id, __entry->xid, __get_str(program), __entry->version, __get_str(procedure), -__entry->error, diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index b2a2672e6632..3ba63319af3c 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -13,6 +13,8 @@ #include +#include + /** ** GSS-API related trace events **/ @@ -99,7 +101,7 @@ DECLARE_EVENT_CLASS(rpcgss_gssapi_event, __entry->maj_stat = maj_stat; ), - TP_printk("task:%u@%u maj_stat=%s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " maj_stat=%s", __entry->task_id, __entry->client_id, __entry->maj_stat == 0 ? "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat)) @@ -332,7 +334,8 @@ TRACE_EVENT(rpcgss_unwrap_failed, __entry->client_id = task->tk_client->cl_clid; ), - TP_printk("task:%u@%u", __entry->task_id, __entry->client_id) + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER, + __entry->task_id, __entry->client_id) ); TRACE_EVENT(rpcgss_bad_seqno, @@ -358,7 +361,8 @@ TRACE_EVENT(rpcgss_bad_seqno, __entry->received = received; ), - TP_printk("task:%u@%u expected seqno %u, received seqno %u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " expected seqno %u, received seqno %u", __entry->task_id, __entry->client_id, __entry->expected, __entry->received) ); @@ -386,7 +390,7 @@ TRACE_EVENT(rpcgss_seqno, __entry->seqno = rqst->rq_seqno; ), - TP_printk("task:%u@%u xid=0x%08x seqno=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x seqno=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->seqno) ); @@ -418,7 +422,8 @@ TRACE_EVENT(rpcgss_need_reencode, __entry->ret = ret; ), - TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded", __entry->task_id, __entry->client_id, __entry->xid, __entry->seqno, __entry->seq_xmit, __entry->ret ? "" : "un") @@ -452,7 +457,8 @@ TRACE_EVENT(rpcgss_update_slack, __entry->verfsize = auth->au_verfsize; ), - TP_printk("task:%u@%u xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n", __entry->task_id, __entry->client_id, __entry->xid, __entry->auth, __entry->rslack, __entry->ralign, __entry->verfsize) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index afb2e394797c..7f46ef621c95 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -14,7 +14,9 @@ #include #include #include + #include +#include /** ** Event classes @@ -279,7 +281,8 @@ DECLARE_EVENT_CLASS(xprtrdma_rdch_event, __entry->nsegs = nsegs; ), - TP_printk("task:%u@%u pos=%u %u@0x%016llx:0x%08x (%s)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " pos=%u %u@0x%016llx:0x%08x (%s)", __entry->task_id, __entry->client_id, __entry->pos, __entry->length, (unsigned long long)__entry->offset, __entry->handle, @@ -326,7 +329,8 @@ DECLARE_EVENT_CLASS(xprtrdma_wrch_event, __entry->nsegs = nsegs; ), - TP_printk("task:%u@%u %u@0x%016llx:0x%08x (%s)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " %u@0x%016llx:0x%08x (%s)", __entry->task_id, __entry->client_id, __entry->length, (unsigned long long)__entry->offset, __entry->handle, @@ -393,7 +397,8 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class, __entry->dir = mr->mr_dir; ), - TP_printk("task:%u@%u mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)", __entry->task_id, __entry->client_id, __entry->mr_id, __entry->nents, __entry->length, (unsigned long long)__entry->offset, __entry->handle, @@ -636,9 +641,9 @@ TRACE_EVENT(xprtrdma_nomrs_err, __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s task:%u@%u", - __get_str(addr), __get_str(port), - __entry->task_id, __entry->client_id + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " peer=[%s]:%s", + __entry->task_id, __entry->client_id, + __get_str(addr), __get_str(port) ) ); @@ -700,7 +705,8 @@ TRACE_EVENT(xprtrdma_marshal, __entry->wtype = wtype; ), - TP_printk("task:%u@%u xid=0x%08x: hdr=%u xdr=%u/%u/%u %s/%s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x hdr=%u xdr=%u/%u/%u %s/%s", __entry->task_id, __entry->client_id, __entry->xid, __entry->hdrlen, __entry->headlen, __entry->pagelen, __entry->taillen, @@ -730,7 +736,7 @@ TRACE_EVENT(xprtrdma_marshal_failed, __entry->ret = ret; ), - TP_printk("task:%u@%u xid=0x%08x: ret=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x ret=%d", __entry->task_id, __entry->client_id, __entry->xid, __entry->ret ) @@ -757,7 +763,7 @@ TRACE_EVENT(xprtrdma_prepsend_failed, __entry->ret = ret; ), - TP_printk("task:%u@%u xid=0x%08x: ret=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x ret=%d", __entry->task_id, __entry->client_id, __entry->xid, __entry->ret ) @@ -792,7 +798,7 @@ TRACE_EVENT(xprtrdma_post_send, __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; ), - TP_printk("task:%u@%u cq.id=%u cid=%d (%d SGE%s) %s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " cq.id=%u cid=%d (%d SGE%s) %s", __entry->task_id, __entry->client_id, __entry->cq_id, __entry->completion_id, __entry->num_sge, (__entry->num_sge == 1 ? "" : "s"), @@ -827,7 +833,7 @@ TRACE_EVENT(xprtrdma_post_send_err, __entry->rc = rc; ), - TP_printk("task:%u@%u cq.id=%u rc=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " cq.id=%u rc=%d", __entry->task_id, __entry->client_id, __entry->cq_id, __entry->rc ) @@ -939,7 +945,7 @@ TRACE_EVENT(xprtrdma_post_linv_err, __entry->status = status; ), - TP_printk("task:%u@%u status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d", __entry->task_id, __entry->client_id, __entry->status ) ); @@ -1127,7 +1133,7 @@ TRACE_EVENT(xprtrdma_reply, __entry->credits = credits; ), - TP_printk("task:%u@%u xid=0x%08x credits=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x credits=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->credits ) @@ -1163,7 +1169,7 @@ TRACE_EVENT(xprtrdma_err_vers, __entry->max = be32_to_cpup(max); ), - TP_printk("task:%u@%u xid=0x%08x versions=[%u, %u]", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x versions=[%u, %u]", __entry->task_id, __entry->client_id, __entry->xid, __entry->min, __entry->max ) @@ -1188,7 +1194,7 @@ TRACE_EVENT(xprtrdma_err_chunk, __entry->xid = be32_to_cpu(rqst->rq_xid); ), - TP_printk("task:%u@%u xid=0x%08x", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x", __entry->task_id, __entry->client_id, __entry->xid ) ); @@ -1214,7 +1220,7 @@ TRACE_EVENT(xprtrdma_err_unrecognized, __entry->procedure = be32_to_cpup(procedure); ), - TP_printk("task:%u@%u xid=0x%08x procedure=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x procedure=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->procedure ) @@ -1246,7 +1252,7 @@ TRACE_EVENT(xprtrdma_fixup, __entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len; ), - TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " fixup=%lu xdr=%zu/%u/%zu", __entry->task_id, __entry->client_id, __entry->fixup, __entry->headlen, __entry->pagelen, __entry->taillen ) @@ -1296,7 +1302,7 @@ TRACE_EVENT(xprtrdma_mrs_zap, __entry->client_id = task->tk_client->cl_clid; ), - TP_printk("task:%u@%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER, __entry->task_id, __entry->client_id ) ); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2d04eb96d418..92def7d6663e 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,6 +14,8 @@ #include #include +#include + TRACE_DEFINE_ENUM(SOCK_STREAM); TRACE_DEFINE_ENUM(SOCK_DGRAM); TRACE_DEFINE_ENUM(SOCK_RAW); @@ -78,7 +80,8 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class, __entry->msg_len = xdr->len; ), - TP_printk("task:%u@%u head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", __entry->task_id, __entry->client_id, __entry->head_base, __entry->head_len, __entry->page_len, __entry->tail_base, __entry->tail_len, __entry->msg_len @@ -114,7 +117,7 @@ DECLARE_EVENT_CLASS(rpc_clnt_class, __entry->client_id = clnt->cl_clid; ), - TP_printk("clid=%u", __entry->client_id) + TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER, __entry->client_id) ); #define DEFINE_RPC_CLNT_EVENT(name) \ @@ -158,7 +161,8 @@ TRACE_EVENT(rpc_clnt_new, __assign_str(server, server); ), - TP_printk("client=%u peer=[%s]:%s program=%s server=%s", + TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER + " peer=[%s]:%s program=%s server=%s", __entry->client_id, __get_str(addr), __get_str(port), __get_str(program), __get_str(server)) ); @@ -206,7 +210,8 @@ TRACE_EVENT(rpc_clnt_clone_err, __entry->error = error; ), - TP_printk("client=%u error=%d", __entry->client_id, __entry->error) + TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER " error=%d", + __entry->client_id, __entry->error) ); @@ -248,7 +253,7 @@ DECLARE_EVENT_CLASS(rpc_task_status, __entry->status = task->tk_status; ), - TP_printk("task:%u@%u status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d", __entry->task_id, __entry->client_id, __entry->status) ); @@ -288,7 +293,7 @@ TRACE_EVENT(rpc_request, __assign_str(procname, rpc_proc_name(task)); ), - TP_printk("task:%u@%u %sv%d %s (%ssync)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " %sv%d %s (%ssync)", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procname), __entry->async ? "a": "" @@ -348,7 +353,8 @@ DECLARE_EVENT_CLASS(rpc_task_running, __entry->flags = task->tk_flags; ), - TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%ps", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " flags=%s runstate=%s status=%d action=%ps", __entry->task_id, __entry->client_id, rpc_show_task_flags(__entry->flags), rpc_show_runstate(__entry->runstate), @@ -400,7 +406,8 @@ DECLARE_EVENT_CLASS(rpc_task_queued, __assign_str(q_name, rpc_qname(q)); ), - TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " flags=%s runstate=%s status=%d timeout=%lu queue=%s", __entry->task_id, __entry->client_id, rpc_show_task_flags(__entry->flags), rpc_show_runstate(__entry->runstate), @@ -436,7 +443,7 @@ DECLARE_EVENT_CLASS(rpc_failure, __entry->client_id = task->tk_client->cl_clid; ), - TP_printk("task:%u@%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER, __entry->task_id, __entry->client_id) ); @@ -478,7 +485,8 @@ DECLARE_EVENT_CLASS(rpc_reply_event, __assign_str(servername, task->tk_xprt->servername); ), - TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " server=%s xid=0x%08x %sv%d %s", __entry->task_id, __entry->client_id, __get_str(servername), __entry->xid, __get_str(progname), __entry->version, __get_str(procname)) @@ -538,7 +546,8 @@ TRACE_EVENT(rpc_buf_alloc, __entry->status = status; ), - TP_printk("task:%u@%u callsize=%zu recvsize=%zu status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " callsize=%zu recvsize=%zu status=%d", __entry->task_id, __entry->client_id, __entry->callsize, __entry->recvsize, __entry->status ) @@ -567,7 +576,8 @@ TRACE_EVENT(rpc_call_rpcerror, __entry->rpc_status = rpc_status; ), - TP_printk("task:%u@%u tk_status=%d rpc_status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " tk_status=%d rpc_status=%d", __entry->task_id, __entry->client_id, __entry->tk_status, __entry->rpc_status) ); @@ -607,7 +617,8 @@ TRACE_EVENT(rpc_stats_latency, __entry->execute = ktime_to_us(execute); ), - TP_printk("task:%u@%d xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", __entry->task_id, __entry->client_id, __entry->xid, __get_str(progname), __entry->version, __get_str(procname), __entry->backlog, __entry->rtt, __entry->execute) @@ -651,8 +662,8 @@ TRACE_EVENT(rpc_xdr_overflow, __entry->version = task->tk_client->cl_vers; __assign_str(procedure, task->tk_msg.rpc_proc->p_name); } else { - __entry->task_id = 0; - __entry->client_id = 0; + __entry->task_id = -1; + __entry->client_id = -1; __assign_str(progname, "unknown"); __entry->version = 0; __assign_str(procedure, "unknown"); @@ -668,8 +679,8 @@ TRACE_EVENT(rpc_xdr_overflow, __entry->len = xdr->buf->len; ), - TP_printk( - "task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procedure), __entry->requested, __entry->p, __entry->end, @@ -727,8 +738,8 @@ TRACE_EVENT(rpc_xdr_alignment, __entry->len = xdr->buf->len; ), - TP_printk( - "task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procedure), __entry->offset, __entry->copied, @@ -917,7 +928,8 @@ TRACE_EVENT(rpc_socket_nospace, __entry->remaining = rqst->rq_slen - transport->xmit.offset; ), - TP_printk("task:%u@%u total=%u remaining=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " total=%u remaining=%u", __entry->task_id, __entry->client_id, __entry->total, __entry->remaining ) @@ -1042,8 +1054,8 @@ TRACE_EVENT(xprt_transmit, __entry->status = status; ), - TP_printk( - "task:%u@%u xid=0x%08x seqno=%u status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x seqno=%u status=%d", __entry->task_id, __entry->client_id, __entry->xid, __entry->seqno, __entry->status) ); @@ -1082,8 +1094,8 @@ TRACE_EVENT(xprt_retransmit, __assign_str(procname, rpc_proc_name(task)); ), - TP_printk( - "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d timeout=%lu", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x %sv%d %s ntrans=%d timeout=%lu", __entry->task_id, __entry->client_id, __entry->xid, __get_str(progname), __entry->version, __get_str(procname), __entry->ntrans, __entry->timeout @@ -1137,7 +1149,8 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, xprt->snd_task->tk_pid : -1; ), - TP_printk("task:%u@%u snd_task:%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " snd_task:" SUNRPC_TRACE_PID_SPECIFIER, __entry->task_id, __entry->client_id, __entry->snd_task_id) ); @@ -1185,7 +1198,9 @@ DECLARE_EVENT_CLASS(xprt_cong_event, __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); ), - TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " snd_task:" SUNRPC_TRACE_PID_SPECIFIER + " cong=%lu cwnd=%lu%s", __entry->task_id, __entry->client_id, __entry->snd_task_id, __entry->cong, __entry->cwnd, __entry->wait ? " (wait)" : "") @@ -1223,7 +1238,7 @@ TRACE_EVENT(xprt_reserve, __entry->xid = be32_to_cpu(rqst->rq_xid); ), - TP_printk("task:%u@%u xid=0x%08x", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x", __entry->task_id, __entry->client_id, __entry->xid ) ); @@ -1312,7 +1327,8 @@ TRACE_EVENT(rpcb_getport, __assign_str(servername, task->tk_xprt->servername); ), - TP_printk("task:%u@%u server=%s program=%u version=%u protocol=%d bind_version=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " server=%s program=%u version=%u protocol=%d bind_version=%u", __entry->task_id, __entry->client_id, __get_str(servername), __entry->program, __entry->version, __entry->protocol, __entry->bind_version @@ -1342,7 +1358,7 @@ TRACE_EVENT(rpcb_setport, __entry->port = port; ), - TP_printk("task:%u@%u status=%d port=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d port=%u", __entry->task_id, __entry->client_id, __entry->status, __entry->port ) diff --git a/include/trace/events/sunrpc_base.h b/include/trace/events/sunrpc_base.h new file mode 100644 index 000000000000..588557d07ea8 --- /dev/null +++ b/include/trace/events/sunrpc_base.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021 Oracle and/or its affiliates. + * + * Common types and format specifiers for sunrpc. + */ + +#if !defined(_TRACE_SUNRPC_BASE_H) +#define _TRACE_SUNRPC_BASE_H + +#include + +#define SUNRPC_TRACE_PID_SPECIFIER "%08x" +#define SUNRPC_TRACE_CLID_SPECIFIER "%08x" +#define SUNRPC_TRACE_TASK_SPECIFIER \ + "task:" SUNRPC_TRACE_PID_SPECIFIER "@" SUNRPC_TRACE_CLID_SPECIFIER + +#endif /* _TRACE_SUNRPC_BASE_H */ From 76497b1adb89175eee85afc437f08a68247314b3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Oct 2021 18:02:38 -0400 Subject: [PATCH 155/433] SUNRPC: Use BIT() macro in rpc_show_xprt_state() Clean up: BIT() is preferred over open-coding the shift. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 92def7d6663e..f8b12eb07693 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -937,18 +937,18 @@ TRACE_EVENT(rpc_socket_nospace, #define rpc_show_xprt_state(x) \ __print_flags(x, "|", \ - { (1UL << XPRT_LOCKED), "LOCKED"}, \ - { (1UL << XPRT_CONNECTED), "CONNECTED"}, \ - { (1UL << XPRT_CONNECTING), "CONNECTING"}, \ - { (1UL << XPRT_CLOSE_WAIT), "CLOSE_WAIT"}, \ - { (1UL << XPRT_BOUND), "BOUND"}, \ - { (1UL << XPRT_BINDING), "BINDING"}, \ - { (1UL << XPRT_CLOSING), "CLOSING"}, \ - { (1UL << XPRT_OFFLINE), "OFFLINE"}, \ - { (1UL << XPRT_REMOVE), "REMOVE"}, \ - { (1UL << XPRT_CONGESTED), "CONGESTED"}, \ - { (1UL << XPRT_CWND_WAIT), "CWND_WAIT"}, \ - { (1UL << XPRT_WRITE_SPACE), "WRITE_SPACE"}) + { BIT(XPRT_LOCKED), "LOCKED" }, \ + { BIT(XPRT_CONNECTED), "CONNECTED" }, \ + { BIT(XPRT_CONNECTING), "CONNECTING" }, \ + { BIT(XPRT_CLOSE_WAIT), "CLOSE_WAIT" }, \ + { BIT(XPRT_BOUND), "BOUND" }, \ + { BIT(XPRT_BINDING), "BINDING" }, \ + { BIT(XPRT_CLOSING), "CLOSING" }, \ + { BIT(XPRT_OFFLINE), "OFFLINE" }, \ + { BIT(XPRT_REMOVE), "REMOVE" }, \ + { BIT(XPRT_CONGESTED), "CONGESTED" }, \ + { BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \ + { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }) DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, TP_PROTO( From d9f877433ef8ba2a79d2abd921ddaf2d301e24bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Oct 2021 18:02:51 -0400 Subject: [PATCH 156/433] NFS: Replace dprintk callsites in nfs_readpage(s) These new events report slightly different information for readpage and readpages/readahead. For readpage: fsx-1387 [006] 380.761896: nfs_aop_readpage: fileid=00:28:2 fhandle=0x36fbbe51 version=1752899355910932437 offset=131072 fsx-1387 [006] 380.761900: nfs_aop_readpage_done: fileid=00:28:2 fhandle=0x36fbbe51 version=1752899355910932437 offset=131072 ret=0 The index of a synchronous single-page read is reported. For readpages: fsx-1387 [006] 380.760847: nfs_aop_readahead: fileid=00:28:2 fhandle=0x36fbbe51 version=1752899355909932456 nr_pages=3 fsx-1387 [006] 380.760853: nfs_aop_readahead_done: fileid=00:28:2 fhandle=0x36fbbe51 version=1752899355909932456 nr_pages=3 ret=0 The count of pages requested is reported. nfs_readpages does not wait for the READ requests to complete. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfstrace.h | 146 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/read.c | 11 ++-- 2 files changed, 151 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 82b51120450f..83e9615c8b8c 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -862,6 +862,152 @@ TRACE_EVENT(nfs_sillyrename_unlink, ) ); +TRACE_EVENT(nfs_aop_readpage, + TP_PROTO( + const struct inode *inode, + struct page *page + ), + + TP_ARGS(inode, page), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, offset) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode_peek_iversion_raw(inode); + __entry->offset = page_index(page) << PAGE_SHIFT; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->offset + ) +); + +TRACE_EVENT(nfs_aop_readpage_done, + TP_PROTO( + const struct inode *inode, + struct page *page, + int ret + ), + + TP_ARGS(inode, page, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(int, ret) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, offset) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode_peek_iversion_raw(inode); + __entry->offset = page_index(page) << PAGE_SHIFT; + __entry->ret = ret; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld ret=%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->offset, __entry->ret + ) +); + +TRACE_EVENT(nfs_aop_readahead, + TP_PROTO( + const struct inode *inode, + unsigned int nr_pages + ), + + TP_ARGS(inode, nr_pages), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, version) + __field(unsigned int, nr_pages) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode_peek_iversion_raw(inode); + __entry->nr_pages = nr_pages; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu nr_pages=%u", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->nr_pages + ) +); + +TRACE_EVENT(nfs_aop_readahead_done, + TP_PROTO( + const struct inode *inode, + unsigned int nr_pages, + int ret + ), + + TP_ARGS(inode, nr_pages, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(int, ret) + __field(u64, fileid) + __field(u64, version) + __field(unsigned int, nr_pages) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode_peek_iversion_raw(inode); + __entry->nr_pages = nr_pages; + __entry->ret = ret; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu nr_pages=%u ret=%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->nr_pages, __entry->ret + ) +); + TRACE_EVENT(nfs_initiate_read, TP_PROTO( const struct nfs_pgio_header *hdr diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 08d6cc57cbc3..c8273d4b12ad 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -337,8 +337,7 @@ int nfs_readpage(struct file *file, struct page *page) struct inode *inode = page_file_mapping(page)->host; int ret; - dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", - page, PAGE_SIZE, page_index(page)); + trace_nfs_aop_readpage(inode, page); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); /* @@ -390,9 +389,11 @@ out_wait: } out: put_nfs_open_context(desc.ctx); + trace_nfs_aop_readpage_done(inode, page, ret); return ret; out_unlock: unlock_page(page); + trace_nfs_aop_readpage_done(inode, page, ret); return ret; } @@ -403,10 +404,7 @@ int nfs_readpages(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; int ret; - dprintk("NFS: nfs_readpages (%s/%Lu %d)\n", - inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(inode), - nr_pages); + trace_nfs_aop_readahead(inode, nr_pages); nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); ret = -ESTALE; @@ -439,6 +437,7 @@ int nfs_readpages(struct file *file, struct address_space *mapping, read_complete: put_nfs_open_context(desc.ctx); out: + trace_nfs_aop_readahead_done(inode, nr_pages, ret); return ret; } From b40887e10dcacc5e8ae3c1a99dcba20877c4831b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Oct 2021 18:02:57 -0400 Subject: [PATCH 157/433] SUNRPC: Trace calls to .rpc_call_done Introduce a single tracepoint that can replace simple dprintk call sites in upper layer "rpc_call_done" callbacks. Example: kworker/u24:2-1254 [001] 771.026677: rpc_stats_latency: task:00000001@00000002 xid=0x16a6f3c0 rpcbindv2 GETPORT backlog=446 rtt=101 execute=555 kworker/u24:2-1254 [001] 771.026677: rpc_task_call_done: task:00000001@00000002 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpcb_getport_done kworker/u24:2-1254 [001] 771.026678: rpcb_setport: task:00000001@00000002 status=0 port=20048 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/clntproc.c | 3 --- fs/lockd/svc4proc.c | 2 -- fs/lockd/svcproc.c | 2 -- fs/nfs/filelayout/filelayout.c | 2 -- fs/nfs/flexfilelayout/flexfilelayout.c | 2 -- fs/nfs/pagelist.c | 3 --- fs/nfs/write.c | 3 --- include/trace/events/sunrpc.h | 1 + net/sunrpc/sched.c | 1 + 9 files changed, 2 insertions(+), 17 deletions(-) diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index b11f2afa84f1..99fffc9cb958 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -794,9 +794,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data) goto retry_cancel; } - dprintk("lockd: cancel status %u (task %u)\n", - status, task->tk_pid); - switch (status) { case NLM_LCK_GRANTED: case NLM_LCK_DENIED_GRACE_PERIOD: diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index e10ae2c41279..176b468a61c7 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -269,8 +269,6 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp) */ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) { - dprintk("lockd: %5u callback returned %d\n", task->tk_pid, - -task->tk_status); } static void nlm4svc_callback_release(void *data) diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 99696d3f6dd6..4dc1b40a489a 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -301,8 +301,6 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp) */ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) { - dprintk("lockd: %5u callback returned %d\n", task->tk_pid, - -task->tk_status); } void nlmsvc_release_call(struct nlm_rqst *call) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index d2103852475f..9c96e3e5ed35 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -293,8 +293,6 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs41_sequence_done(task, &hdr->res.seq_res); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index d383de00d486..a553d59afa8b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1414,8 +1414,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 72333bcaa4c4..ad7f83dc9a2d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -864,9 +864,6 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) struct nfs_pgio_header *hdr = calldata; struct inode *inode = hdr->inode; - dprintk("NFS: %s: %5u, (status %d)\n", __func__, - task->tk_pid, task->tk_status); - if (hdr->rw_ops->rw_done(task, hdr, inode) != 0) return; if (task->tk_status < 0) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 465220f47142..82c5b89395f6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1840,9 +1840,6 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) { struct nfs_commit_data *data = calldata; - dprintk("NFS: %5u nfs_commit_done (status %d)\n", - task->tk_pid, task->tk_status); - /* Call the NFS version-specific code */ NFS_PROTO(data->inode)->commit_done(task, data); trace_nfs_commit_done(task, data); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f8b12eb07693..dc922e664820 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -378,6 +378,7 @@ DEFINE_RPC_RUNNING_EVENT(complete); DEFINE_RPC_RUNNING_EVENT(timeout); DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); +DEFINE_RPC_RUNNING_EVENT(call_done); DECLARE_EVENT_CLASS(rpc_task_queued, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f4f311ea7a66..e2c835482791 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -837,6 +837,7 @@ void rpc_exit_task(struct rpc_task *task) else if (task->tk_client) rpc_count_iostats(task, task->tk_client->cl_metrics); if (task->tk_ops->rpc_call_done != NULL) { + trace_rpc_task_call_done(task, task->tk_ops->rpc_call_done); task->tk_ops->rpc_call_done(task, task->tk_calldata); if (task->tk_action != NULL) { /* Always release the RPC slot and buffer memory */ From 86882c75464920684d39b747d7f52a75200cc24f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Oct 2021 18:03:04 -0400 Subject: [PATCH 158/433] NFS: Remove --> and <-- dprintk call sites dprintk call sites that display no other information than the function name can be replaced with use of the trace "function" or "function_graph" plug-ins. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 54 +++++------------------------------------------ 1 file changed, 5 insertions(+), 49 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9e89198ea21f..979665a91e80 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3560,7 +3560,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data) .stateid = &calldata->arg.stateid, }; - dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); @@ -3615,7 +3614,7 @@ out_release: task->tk_status = 0; nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, &calldata->fattr); - dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); + dprintk("%s: ret = %d\n", __func__, task->tk_status); return; out_restart: task->tk_status = 0; @@ -3633,7 +3632,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) bool is_rdonly, is_wronly, is_rdwr; int call_close = 0; - dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) goto out_wait; @@ -3707,7 +3705,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) &calldata->res.seq_res, task) != 0) nfs_release_seqid(calldata->arg.seqid); - dprintk("%s: done!\n", __func__); return; out_no_action: task->tk_action = NULL; @@ -5346,8 +5343,6 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task, static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - dprintk("--> %s\n", __func__); - if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; if (nfs4_read_stateid_changed(task, &hdr->args)) @@ -7002,7 +6997,6 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) struct nfs4_lockdata *data = calldata; struct nfs4_state *state = data->lsp->ls_state; - dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) goto out_wait; /* Do we need to do an open_to_lock_owner? */ @@ -7036,7 +7030,7 @@ out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); out_wait: nfs4_sequence_done(task, &data->res.seq_res); - dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); + dprintk("%s: ret = %d\n", __func__, data->rpc_status); } static void nfs4_lock_done(struct rpc_task *task, void *calldata) @@ -7044,8 +7038,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) struct nfs4_lockdata *data = calldata; struct nfs4_lock_state *lsp = data->lsp; - dprintk("%s: begin!\n", __func__); - if (!nfs4_sequence_done(task, &data->res.seq_res)) return; @@ -7079,7 +7071,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) goto out_restart; } out_done: - dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); + dprintk("%s: ret = %d!\n", __func__, data->rpc_status); return; out_restart: if (!data->cancelled) @@ -7091,7 +7083,6 @@ static void nfs4_lock_release(void *calldata) { struct nfs4_lockdata *data = calldata; - dprintk("%s: begin!\n", __func__); nfs_free_seqid(data->arg.open_seqid); if (data->cancelled && data->rpc_status == 0) { struct rpc_task *task; @@ -7105,7 +7096,6 @@ static void nfs4_lock_release(void *calldata) nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); kfree(data); - dprintk("%s: done!\n", __func__); } static const struct rpc_call_ops nfs4_lock_ops = { @@ -7152,7 +7142,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f if (client->cl_minorversion) task_setup_data.flags |= RPC_TASK_MOVEABLE; - dprintk("%s: begin!\n", __func__); data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), fl->fl_u.nfs4_fl.owner, recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS); @@ -7183,7 +7172,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->cancelled = true; trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); rpc_put_task(task); - dprintk("%s: done, ret = %d!\n", __func__, ret); + dprintk("%s: ret = %d\n", __func__, ret); return ret; } @@ -8854,14 +8843,12 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, struct nfs4_get_lease_time_data *data = (struct nfs4_get_lease_time_data *)calldata; - dprintk("--> %s\n", __func__); /* just setup sequence, do not trigger session recovery since we're invoked within one */ nfs4_setup_sequence(data->clp, &data->args->la_seq_args, &data->res->lr_seq_res, task); - dprintk("<-- %s\n", __func__); } /* @@ -8873,13 +8860,11 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) struct nfs4_get_lease_time_data *data = (struct nfs4_get_lease_time_data *)calldata; - dprintk("--> %s\n", __func__); if (!nfs4_sequence_done(task, &data->res->lr_seq_res)) return; switch (task->tk_status) { case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: - dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; fallthrough; @@ -8887,7 +8872,6 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) rpc_restart_call_prepare(task); return; } - dprintk("<-- %s\n", __func__); } static const struct rpc_call_ops nfs4_get_lease_time_ops = { @@ -9119,7 +9103,6 @@ int nfs4_proc_create_session(struct nfs_client *clp, const struct cred *cred) dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); out: - dprintk("<-- %s\n", __func__); return status; } @@ -9137,8 +9120,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, }; int status = 0; - dprintk("--> nfs4_proc_destroy_session\n"); - /* session is still being setup */ if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state)) return 0; @@ -9150,8 +9131,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, if (status) dprintk("NFS: Got error %d from the server on DESTROY_SESSION. " "Session has been destroyed regardless...\n", status); - - dprintk("<-- nfs4_proc_destroy_session\n"); return status; } @@ -9199,7 +9178,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (task->tk_status < 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); if (refcount_read(&clp->cl_count) == 1) - goto out; + return; if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -9207,8 +9186,6 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) } } dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); -out: - dprintk("<-- %s\n", __func__); } static void nfs41_sequence_prepare(struct rpc_task *task, void *data) @@ -9355,7 +9332,6 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) struct nfs_client *clp = calldata->clp; struct nfs4_sequence_res *res = &calldata->res.seq_res; - dprintk("--> %s\n", __func__); if (!nfs41_sequence_done(task, res)) return; @@ -9364,7 +9340,6 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) rpc_restart_call_prepare(task); return; } - dprintk("<-- %s\n", __func__); } static void nfs4_free_reclaim_complete_data(void *data) @@ -9399,7 +9374,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, }; int status = -ENOMEM; - dprintk("--> %s\n", __func__); calldata = kzalloc(sizeof(*calldata), GFP_NOFS); if (calldata == NULL) goto out; @@ -9422,19 +9396,15 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) struct nfs4_layoutget *lgp = calldata; struct nfs_server *server = NFS_SERVER(lgp->args.inode); - dprintk("--> %s\n", __func__); nfs4_setup_sequence(server->nfs_client, &lgp->args.seq_args, &lgp->res.seq_res, task); - dprintk("<-- %s\n", __func__); } static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; - dprintk("--> %s\n", __func__); nfs41_sequence_process(task, &lgp->res.seq_res); - dprintk("<-- %s\n", __func__); } static int @@ -9523,7 +9493,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, status = err; } out: - dprintk("<-- %s\n", __func__); return status; } @@ -9537,10 +9506,8 @@ static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; - dprintk("--> %s\n", __func__); nfs4_sequence_free_slot(&lgp->res.seq_res); pnfs_layoutget_free(lgp); - dprintk("<-- %s\n", __func__); } static const struct rpc_call_ops nfs4_layoutget_call_ops = { @@ -9576,8 +9543,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) }; int status = 0; - dprintk("--> %s\n", __func__); - nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); @@ -9613,7 +9578,6 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; - dprintk("--> %s\n", __func__); nfs4_setup_sequence(lrp->clp, &lrp->args.seq_args, &lrp->res.seq_res, @@ -9627,8 +9591,6 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; - dprintk("--> %s\n", __func__); - if (!nfs41_sequence_process(task, &lrp->res.seq_res)) return; @@ -9659,7 +9621,6 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) break; goto out_restart; } - dprintk("<-- %s\n", __func__); return; out_restart: task->tk_status = 0; @@ -9672,7 +9633,6 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; - dprintk("--> %s\n", __func__); pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range, lrp->res.lrs_present ? &lrp->res.stateid : NULL); nfs4_sequence_free_slot(&lrp->res.seq_res); @@ -9682,7 +9642,6 @@ static void nfs4_layoutreturn_release(void *calldata) nfs_iput_and_deactive(lrp->inode); put_cred(lrp->cred); kfree(calldata); - dprintk("<-- %s\n", __func__); } static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { @@ -9713,7 +9672,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) NFS_SP4_MACH_CRED_PNFS_CLEANUP, &task_setup_data.rpc_client, &msg); - dprintk("--> %s\n", __func__); lrp->inode = nfs_igrab_and_active(lrp->args.inode); if (!sync) { if (!lrp->inode) { @@ -9760,7 +9718,6 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, }; int status; - dprintk("--> %s\n", __func__); status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (res.notification & ~args.notify_types) dprintk("%s: unsupported notification\n", __func__); @@ -9932,7 +9889,6 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, msg.rpc_cred = cred; } - dprintk("--> %s\n", __func__); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); status = nfs4_call_sync_custom(&task_setup); dprintk("<-- %s status=%d\n", __func__, status); From 01dde76e471229e3437a2686c572f4980b2c483e Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 14 Oct 2021 13:55:04 -0400 Subject: [PATCH 159/433] NFS: Create an nfs4_server_set_init_caps() function And call it before doing an FSINFO probe to reset to the baseline capabilities before probing. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/nfs4client.c | 33 +++++++++++++++++++-------------- fs/nfs/nfs4proc.c | 2 ++ 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 66fc936834f2..ee14d71da4a1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -209,6 +209,7 @@ extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); extern struct nfs_server *nfs_create_server(struct fs_context *); +extern void nfs4_server_set_init_caps(struct nfs_server *); extern struct nfs_server *nfs4_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index af57332503be..3fb0ca92377c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1059,6 +1059,24 @@ static void nfs4_session_limit_xasize(struct nfs_server *server) #endif } +void nfs4_server_set_init_caps(struct nfs_server *server) +{ + /* Set the basic capabilities */ + server->caps |= server->nfs_client->cl_mvops->init_caps; + if (server->flags & NFS_MOUNT_NORDIRPLUS) + server->caps &= ~NFS_CAP_READDIRPLUS; + if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) + server->caps &= ~NFS_CAP_READ_PLUS; + + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && + server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; +} + static int nfs4_server_common_setup(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { @@ -1078,20 +1096,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, if (error < 0) goto out; - /* Set the basic capabilities */ - server->caps |= server->nfs_client->cl_mvops->init_caps; - if (server->flags & NFS_MOUNT_NORDIRPLUS) - server->caps &= ~NFS_CAP_READDIRPLUS; - if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) - server->caps &= ~NFS_CAP_READ_PLUS; - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && - server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; - + nfs4_server_set_init_caps(server); /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh, auth_probe); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 979665a91e80..1c485edf1d07 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3937,6 +3937,8 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) .interruptible = true, }; int err; + + nfs4_server_set_init_caps(server); do { err = nfs4_handle_exception(server, _nfs4_server_capabilities(server, fhandle), From e5731131fb6fefaa69064ca511b7c4971d6cf54f Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 14 Oct 2021 13:55:05 -0400 Subject: [PATCH 160/433] NFS: Move nfs_probe_destination() into the generic client And rename it to nfs_probe_server(). I also change it to take the nfs_fh as an argument so callers can choose what filehandle to probe. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 24 ++++++++++++++++++++++++ fs/nfs/internal.h | 1 + fs/nfs/nfs4client.c | 26 +------------------------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 23e165d5ec9c..e867d9090386 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -864,6 +864,30 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs } EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); +/* + * Grab the destination's particulars, including lease expiry time. + * + * Returns zero if probe succeeded and retrieved FSID matches the FSID + * we have cached. + */ +int nfs_probe_server(struct nfs_server *server, struct nfs_fh *mntfh) +{ + struct nfs_fattr *fattr; + int error; + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + return -ENOMEM; + + /* Sanity: the probe won't work if the destination server + * does not recognize the migrated FH. */ + error = nfs_probe_fsinfo(server, mntfh, fattr); + + nfs_free_fattr(fattr); + return error; +} +EXPORT_SYMBOL_GPL(nfs_probe_server); + /* * Copy useful information when duplicating a server record */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ee14d71da4a1..c1253dd60c32 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -194,6 +194,7 @@ extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); +int nfs_probe_server(struct nfs_server *, struct nfs_fh *); void nfs_server_insert_lists(struct nfs_server *); void nfs_server_remove_lists(struct nfs_server *); void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 3fb0ca92377c..85978ecb727e 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1293,30 +1293,6 @@ error: return ERR_PTR(error); } -/* - * Grab the destination's particulars, including lease expiry time. - * - * Returns zero if probe succeeded and retrieved FSID matches the FSID - * we have cached. - */ -static int nfs_probe_destination(struct nfs_server *server) -{ - struct inode *inode = d_inode(server->super->s_root); - struct nfs_fattr *fattr; - int error; - - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - return -ENOMEM; - - /* Sanity: the probe won't work if the destination server - * does not recognize the migrated FH. */ - error = nfs_probe_fsinfo(server, NFS_FH(inode), fattr); - - nfs_free_fattr(fattr); - return error; -} - /** * nfs4_update_server - Move an nfs_server to a different nfs_client * @@ -1377,5 +1353,5 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); nfs_server_insert_lists(server); - return nfs_probe_destination(server); + return nfs_probe_server(server, NFS_FH(d_inode(server->super->s_root))); } From 4d4cf8d2d6ccb43c68bc5925dc83500b81b50f9e Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 14 Oct 2021 13:55:06 -0400 Subject: [PATCH 161/433] NFS: Replace calls to nfs_probe_fsinfo() with nfs_probe_server() Clean up. There are a few places where we want to probe the server, but don't actually care about the fsinfo result. Change these to use nfs_probe_server(), which handles the fattr allocation for us. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 10 +--------- fs/nfs/nfs4client.c | 8 +------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e867d9090386..1a882c78a794 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1082,7 +1082,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, rpc_authflavor_t flavor) { struct nfs_server *server; - struct nfs_fattr *fattr_fsinfo; int error; server = nfs_alloc_server(); @@ -1091,11 +1090,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, server->cred = get_cred(source->cred); - error = -ENOMEM; - fattr_fsinfo = nfs_alloc_fattr(); - if (fattr_fsinfo == NULL) - goto out_free_server; - /* Copy data from the source */ server->nfs_client = source->nfs_client; server->destroy = source->destroy; @@ -1111,7 +1105,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, goto out_free_server; /* probe the filesystem info for this server filesystem */ - error = nfs_probe_fsinfo(server, fh, fattr_fsinfo); + error = nfs_probe_server(server, fh); if (error < 0) goto out_free_server; @@ -1125,11 +1119,9 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, nfs_server_insert_lists(server); server->mount_time = jiffies; - nfs_free_fattr(fattr_fsinfo); return server; out_free_server: - nfs_free_fattr(fattr_fsinfo); nfs_free_server(server); return ERR_PTR(error); } diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 85978ecb727e..d8b5a250ca05 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1080,17 +1080,12 @@ void nfs4_server_set_init_caps(struct nfs_server *server) static int nfs4_server_common_setup(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { - struct nfs_fattr *fattr; int error; /* data servers support only a subset of NFSv4.1 */ if (is_ds_only_client(server->nfs_client)) return -EPROTONOSUPPORT; - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - return -ENOMEM; - /* We must ensure the session is initialised first */ error = nfs4_init_session(server->nfs_client); if (error < 0) @@ -1108,7 +1103,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, (unsigned long long) server->fsid.minor); nfs_display_fhandle(mntfh, "Pseudo-fs root FH"); - error = nfs_probe_fsinfo(server, mntfh, fattr); + error = nfs_probe_server(server, mntfh); if (error < 0) goto out; @@ -1122,7 +1117,6 @@ static int nfs4_server_common_setup(struct nfs_server *server, server->mount_time = jiffies; server->destroy = nfs4_destroy_server; out: - nfs_free_fattr(fattr); return error; } From 1301ba603ca519f9191eae68ee59a2d1165b3458 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 14 Oct 2021 13:55:07 -0400 Subject: [PATCH 162/433] NFS: Call nfs_probe_server() during a fscontext-reconfigure event This lets us update the server's attributes when the user does a "mount -o remount" on the filesystem. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e65c83494c05..3aced401735c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1004,6 +1004,7 @@ int nfs_reconfigure(struct fs_context *fc) struct nfs_fs_context *ctx = nfs_fc2context(fc); struct super_block *sb = fc->root->d_sb; struct nfs_server *nfss = sb->s_fs_info; + int ret; sync_filesystem(sb); @@ -1028,7 +1029,11 @@ int nfs_reconfigure(struct fs_context *fc) } /* compare new mount options with old ones */ - return nfs_compare_remount_data(nfss, ctx); + ret = nfs_compare_remount_data(nfss, ctx); + if (ret) + return ret; + + return nfs_probe_server(nfss, NFS_FH(d_inode(fc->root))); } EXPORT_SYMBOL_GPL(nfs_reconfigure); From 5fe1210d259542f966bab130830ece08e97f68f5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 14 Oct 2021 13:55:08 -0400 Subject: [PATCH 163/433] NFS: Unexport nfs_probe_fsinfo() All the callers are now in client.c so we can remove the EXPORT_SYMBOL_GPL() and make it static. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +-- fs/nfs/internal.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1a882c78a794..960b9d87648e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -828,7 +828,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, /* * Probe filesystem information, including the FSID on v2/v3 */ -int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) { struct nfs_fsinfo fsinfo; struct nfs_client *clp = server->nfs_client; @@ -862,7 +862,6 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs return 0; } -EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); /* * Grab the destination's particulars, including lease expiry time. diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index c1253dd60c32..123078c76495 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -193,7 +193,6 @@ extern void nfs_clients_exit(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); -int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); int nfs_probe_server(struct nfs_server *, struct nfs_fh *); void nfs_server_insert_lists(struct nfs_server *); void nfs_server_remove_lists(struct nfs_server *); From 023859ce6f88f7cfc223752fb56ec453a147b852 Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Wed, 20 Oct 2021 18:04:28 -0300 Subject: [PATCH 164/433] sunrpc: remove unnecessary test in rpc_task_set_client() In rpc_task_set_client(), testing for a NULL clnt is not necessary, as clnt should always be a valid pointer to a rpc_client. Signed-off-by: Thiago Rafael Becker Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f056ff931444..a312ea2bc440 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1076,24 +1076,21 @@ void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) static void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) { - - if (clnt != NULL) { - rpc_task_set_transport(task, clnt); - task->tk_client = clnt; - refcount_inc(&clnt->cl_count); - if (clnt->cl_softrtry) - task->tk_flags |= RPC_TASK_SOFT; - if (clnt->cl_softerr) - task->tk_flags |= RPC_TASK_TIMEOUT; - if (clnt->cl_noretranstimeo) - task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (atomic_read(&clnt->cl_swapper)) - task->tk_flags |= RPC_TASK_SWAPPER; - /* Add to the client's list of all tasks */ - spin_lock(&clnt->cl_lock); - list_add_tail(&task->tk_task, &clnt->cl_tasks); - spin_unlock(&clnt->cl_lock); - } + rpc_task_set_transport(task, clnt); + task->tk_client = clnt; + refcount_inc(&clnt->cl_count); + if (clnt->cl_softrtry) + task->tk_flags |= RPC_TASK_SOFT; + if (clnt->cl_softerr) + task->tk_flags |= RPC_TASK_TIMEOUT; + if (clnt->cl_noretranstimeo) + task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; + if (atomic_read(&clnt->cl_swapper)) + task->tk_flags |= RPC_TASK_SWAPPER; + /* Add to the client's list of all tasks */ + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); } static void From 4cd27df88af29929cda6e8eb4e0f5bb4e25812bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Oct 2021 17:11:37 -0400 Subject: [PATCH 165/433] NFS: Remove redundant call to __set_page_dirty_nobuffers Remove a redundant call in nfs_updatepage(). nfs_writepage_setup() will have already called nfs_mark_request_dirty() on success. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 82c5b89395f6..9b7619ce17a7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1384,8 +1384,6 @@ int nfs_updatepage(struct file *file, struct page *page, status = nfs_writepage_setup(ctx, page, offset, count); if (status < 0) nfs_set_pageerror(mapping); - else - __set_page_dirty_nobuffers(page); out: dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); From 5648b5e1169ff1d6d6a46c35c0b5fbebd2a5cbb2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Oct 2021 18:08:10 +0200 Subject: [PATCH 166/433] netfilter: nfnetlink_queue: fix OOB when mac header was cleared On 64bit platforms the MAC header is set to 0xffff on allocation and also when a helper like skb_unset_mac_header() is called. dev_parse_header may call skb_mac_header() which assumes valid mac offset: BUG: KASAN: use-after-free in eth_header_parse+0x75/0x90 Read of size 6 at addr ffff8881075a5c05 by task nf-queue/1364 Call Trace: memcpy+0x20/0x60 eth_header_parse+0x75/0x90 __nfqnl_enqueue_packet+0x1a61/0x3380 __nf_queue+0x597/0x1300 nf_queue+0xf/0x40 nf_hook_slow+0xed/0x190 nf_hook+0x184/0x440 ip_output+0x1c0/0x2a0 nf_reinject+0x26f/0x700 nfqnl_recv_verdict+0xa16/0x18b0 nfnetlink_rcv_msg+0x506/0xe70 The existing code only works if the skb has a mac header. Fixes: 2c38de4c1f8da7 ("netfilter: fix looped (broad|multi)cast's MAC handling") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 4c3fbaaeb103..4acc4b8e9fe5 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -560,7 +560,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nla_put_failure; if (indev && entskb->dev && - entskb->mac_header != entskb->network_header) { + skb_mac_header_was_set(entskb)) { struct nfqnl_msg_packet_hw phw; int len; From 2199f562730dd1382946e0a2532afc38cd444129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 21 Oct 2021 15:02:55 +0200 Subject: [PATCH 167/433] ipvs: autoload ipvs on genl access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel provides the functionality to automatically load modules providing genl families. Use this to remove the need for users to manually load the module. Signed-off-by: Thomas Weißschuh Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 29ec3ef63edc..0ff94c66641f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -48,6 +48,8 @@ #include +MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME); + /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ static DEFINE_MUTEX(__ip_vs_mutex); From 814691c7f7d1f958ac30c3dca5070a95c1f658dd Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 28 Sep 2021 03:03:32 -0500 Subject: [PATCH 168/433] rtc: sun6i: Allow probing without an early clock provider Some SoCs have an RTC supported by this RTC driver, but do not have an early clock provider declared here. Currently, this prevents the RTC driver from probing, because it expects a global struct to already be allocated. Fix probing the driver by copying the missing pieces from the clock provider setup function, replacing them with the devm variants. Signed-off-by: Samuel Holland Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210928080335.36706-7-samuel@sholland.org --- drivers/rtc/rtc-sun6i.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index adec1b14a8de..711832c758ae 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -673,8 +673,17 @@ static int sun6i_rtc_probe(struct platform_device *pdev) struct sun6i_rtc_dev *chip = sun6i_rtc; int ret; - if (!chip) - return -ENODEV; + if (!chip) { + chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + spin_lock_init(&chip->lock); + + chip->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(chip->base)) + return PTR_ERR(chip->base); + } platform_set_drvdata(pdev, chip); From 005870f46cf6f98417ec48d129721e945dfb3a43 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 21 Oct 2021 22:22:56 +0300 Subject: [PATCH 169/433] rtc: tps80031: Remove driver Driver was upstreamed in 2013 and never got a user, remove it. Signed-off-by: Dmitry Osipenko Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211021192258.21968-2-digetx@gmail.com --- drivers/rtc/Kconfig | 8 - drivers/rtc/Makefile | 1 - drivers/rtc/rtc-tps80031.c | 324 ------------------------------------- 3 files changed, 333 deletions(-) delete mode 100644 drivers/rtc/rtc-tps80031.c diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 8dc92b4e042f..f4dfc65693a4 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -583,14 +583,6 @@ config RTC_DRV_TPS65910 This driver can also be built as a module. If so, the module will be called rtc-tps65910. -config RTC_DRV_TPS80031 - tristate "TI TPS80031/TPS80032 RTC driver" - depends on MFD_TPS80031 - help - TI Power Management IC TPS80031 supports RTC functionality - along with alarm. This driver supports the RTC driver for - the TPS80031 RTC module. - config RTC_DRV_RC5T583 tristate "RICOH 5T583 RTC driver" depends on MFD_RC5T583 diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index e76308053b0f..678a8ef4abae 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -170,7 +170,6 @@ obj-$(CONFIG_RTC_DRV_TEGRA) += rtc-tegra.o obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o obj-$(CONFIG_RTC_DRV_TPS6586X) += rtc-tps6586x.o obj-$(CONFIG_RTC_DRV_TPS65910) += rtc-tps65910.o -obj-$(CONFIG_RTC_DRV_TPS80031) += rtc-tps80031.o obj-$(CONFIG_RTC_DRV_TWL4030) += rtc-twl.o obj-$(CONFIG_RTC_DRV_V3020) += rtc-v3020.o obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o diff --git a/drivers/rtc/rtc-tps80031.c b/drivers/rtc/rtc-tps80031.c deleted file mode 100644 index c77b8eab94a0..000000000000 --- a/drivers/rtc/rtc-tps80031.c +++ /dev/null @@ -1,324 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * rtc-tps80031.c -- TI TPS80031/TPS80032 RTC driver - * - * RTC driver for TI TPS80031/TPS80032 Fully Integrated - * Power Management with Power Path and Battery Charger - * - * Copyright (c) 2012, NVIDIA Corporation. - * - * Author: Laxman Dewangan - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define ENABLE_ALARM_INT 0x08 -#define ALARM_INT_STATUS 0x40 - -/** - * Setting bit to 1 in STOP_RTC will run the RTC and - * setting this bit to 0 will freeze RTC. - */ -#define STOP_RTC 0x1 - -/* Power on reset Values of RTC registers */ -#define TPS80031_RTC_POR_YEAR 0 -#define TPS80031_RTC_POR_MONTH 1 -#define TPS80031_RTC_POR_DAY 1 - -/* Numbers of registers for time and alarms */ -#define TPS80031_RTC_TIME_NUM_REGS 7 -#define TPS80031_RTC_ALARM_NUM_REGS 6 - -/** - * PMU RTC have only 2 nibbles to store year information, so using an - * offset of 100 to set the base year as 2000 for our driver. - */ -#define RTC_YEAR_OFFSET 100 - -struct tps80031_rtc { - struct rtc_device *rtc; - int irq; -}; - -static int tps80031_rtc_read_time(struct device *dev, struct rtc_time *tm) -{ - u8 buff[TPS80031_RTC_TIME_NUM_REGS]; - int ret; - - ret = tps80031_reads(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_SECONDS_REG, TPS80031_RTC_TIME_NUM_REGS, buff); - if (ret < 0) { - dev_err(dev, "reading RTC_SECONDS_REG failed, err = %d\n", ret); - return ret; - } - - tm->tm_sec = bcd2bin(buff[0]); - tm->tm_min = bcd2bin(buff[1]); - tm->tm_hour = bcd2bin(buff[2]); - tm->tm_mday = bcd2bin(buff[3]); - tm->tm_mon = bcd2bin(buff[4]) - 1; - tm->tm_year = bcd2bin(buff[5]) + RTC_YEAR_OFFSET; - tm->tm_wday = bcd2bin(buff[6]); - return 0; -} - -static int tps80031_rtc_set_time(struct device *dev, struct rtc_time *tm) -{ - u8 buff[7]; - int ret; - - buff[0] = bin2bcd(tm->tm_sec); - buff[1] = bin2bcd(tm->tm_min); - buff[2] = bin2bcd(tm->tm_hour); - buff[3] = bin2bcd(tm->tm_mday); - buff[4] = bin2bcd(tm->tm_mon + 1); - buff[5] = bin2bcd(tm->tm_year % RTC_YEAR_OFFSET); - buff[6] = bin2bcd(tm->tm_wday); - - /* Stop RTC while updating the RTC time registers */ - ret = tps80031_clr_bits(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_RTC_CTRL_REG, STOP_RTC); - if (ret < 0) { - dev_err(dev->parent, "Stop RTC failed, err = %d\n", ret); - return ret; - } - - ret = tps80031_writes(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_SECONDS_REG, - TPS80031_RTC_TIME_NUM_REGS, buff); - if (ret < 0) { - dev_err(dev, "writing RTC_SECONDS_REG failed, err %d\n", ret); - return ret; - } - - ret = tps80031_set_bits(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_RTC_CTRL_REG, STOP_RTC); - if (ret < 0) - dev_err(dev->parent, "Start RTC failed, err = %d\n", ret); - return ret; -} - -static int tps80031_rtc_alarm_irq_enable(struct device *dev, - unsigned int enable) -{ - int ret; - - if (enable) - ret = tps80031_set_bits(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_RTC_INTERRUPTS_REG, ENABLE_ALARM_INT); - else - ret = tps80031_clr_bits(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_RTC_INTERRUPTS_REG, ENABLE_ALARM_INT); - if (ret < 0) { - dev_err(dev, "Update on RTC_INT failed, err = %d\n", ret); - return ret; - } - return 0; -} - -static int tps80031_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) -{ - u8 buff[TPS80031_RTC_ALARM_NUM_REGS]; - int ret; - - buff[0] = bin2bcd(alrm->time.tm_sec); - buff[1] = bin2bcd(alrm->time.tm_min); - buff[2] = bin2bcd(alrm->time.tm_hour); - buff[3] = bin2bcd(alrm->time.tm_mday); - buff[4] = bin2bcd(alrm->time.tm_mon + 1); - buff[5] = bin2bcd(alrm->time.tm_year % RTC_YEAR_OFFSET); - ret = tps80031_writes(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_ALARM_SECONDS_REG, - TPS80031_RTC_ALARM_NUM_REGS, buff); - if (ret < 0) { - dev_err(dev, "Writing RTC_ALARM failed, err %d\n", ret); - return ret; - } - return tps80031_rtc_alarm_irq_enable(dev, alrm->enabled); -} - -static int tps80031_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) -{ - u8 buff[6]; - int ret; - - ret = tps80031_reads(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_ALARM_SECONDS_REG, - TPS80031_RTC_ALARM_NUM_REGS, buff); - if (ret < 0) { - dev_err(dev->parent, - "reading RTC_ALARM failed, err = %d\n", ret); - return ret; - } - - alrm->time.tm_sec = bcd2bin(buff[0]); - alrm->time.tm_min = bcd2bin(buff[1]); - alrm->time.tm_hour = bcd2bin(buff[2]); - alrm->time.tm_mday = bcd2bin(buff[3]); - alrm->time.tm_mon = bcd2bin(buff[4]) - 1; - alrm->time.tm_year = bcd2bin(buff[5]) + RTC_YEAR_OFFSET; - return 0; -} - -static int clear_alarm_int_status(struct device *dev, struct tps80031_rtc *rtc) -{ - int ret; - u8 buf; - - /** - * As per datasheet, A dummy read of this RTC_STATUS_REG register - * is necessary before each I2C read in order to update the status - * register value. - */ - ret = tps80031_read(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_RTC_STATUS_REG, &buf); - if (ret < 0) { - dev_err(dev, "reading RTC_STATUS failed. err = %d\n", ret); - return ret; - } - - /* clear Alarm status bits.*/ - ret = tps80031_set_bits(dev->parent, TPS80031_SLAVE_ID1, - TPS80031_RTC_STATUS_REG, ALARM_INT_STATUS); - if (ret < 0) { - dev_err(dev, "clear Alarm INT failed, err = %d\n", ret); - return ret; - } - return 0; -} - -static irqreturn_t tps80031_rtc_irq(int irq, void *data) -{ - struct device *dev = data; - struct tps80031_rtc *rtc = dev_get_drvdata(dev); - int ret; - - ret = clear_alarm_int_status(dev, rtc); - if (ret < 0) - return ret; - - rtc_update_irq(rtc->rtc, 1, RTC_IRQF | RTC_AF); - return IRQ_HANDLED; -} - -static const struct rtc_class_ops tps80031_rtc_ops = { - .read_time = tps80031_rtc_read_time, - .set_time = tps80031_rtc_set_time, - .set_alarm = tps80031_rtc_set_alarm, - .read_alarm = tps80031_rtc_read_alarm, - .alarm_irq_enable = tps80031_rtc_alarm_irq_enable, -}; - -static int tps80031_rtc_probe(struct platform_device *pdev) -{ - struct tps80031_rtc *rtc; - struct rtc_time tm; - int ret; - - rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); - if (!rtc) - return -ENOMEM; - - rtc->irq = platform_get_irq(pdev, 0); - platform_set_drvdata(pdev, rtc); - - /* Start RTC */ - ret = tps80031_set_bits(pdev->dev.parent, TPS80031_SLAVE_ID1, - TPS80031_RTC_CTRL_REG, STOP_RTC); - if (ret < 0) { - dev_err(&pdev->dev, "failed to start RTC. err = %d\n", ret); - return ret; - } - - /* If RTC have POR values, set time 01:01:2000 */ - tps80031_rtc_read_time(&pdev->dev, &tm); - if ((tm.tm_year == RTC_YEAR_OFFSET + TPS80031_RTC_POR_YEAR) && - (tm.tm_mon == (TPS80031_RTC_POR_MONTH - 1)) && - (tm.tm_mday == TPS80031_RTC_POR_DAY)) { - tm.tm_year = 2000; - tm.tm_mday = 1; - tm.tm_mon = 1; - ret = tps80031_rtc_set_time(&pdev->dev, &tm); - if (ret < 0) { - dev_err(&pdev->dev, - "RTC set time failed, err = %d\n", ret); - return ret; - } - } - - /* Clear alarm intretupt status if it is there */ - ret = clear_alarm_int_status(&pdev->dev, rtc); - if (ret < 0) { - dev_err(&pdev->dev, "Clear alarm int failed, err = %d\n", ret); - return ret; - } - - rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, - &tps80031_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc->rtc)) { - ret = PTR_ERR(rtc->rtc); - dev_err(&pdev->dev, "RTC registration failed, err %d\n", ret); - return ret; - } - - ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL, - tps80031_rtc_irq, - IRQF_ONESHOT, - dev_name(&pdev->dev), rtc); - if (ret < 0) { - dev_err(&pdev->dev, "request IRQ:%d failed, err = %d\n", - rtc->irq, ret); - return ret; - } - device_set_wakeup_capable(&pdev->dev, 1); - return 0; -} - -#ifdef CONFIG_PM_SLEEP -static int tps80031_rtc_suspend(struct device *dev) -{ - struct tps80031_rtc *rtc = dev_get_drvdata(dev); - - if (device_may_wakeup(dev)) - enable_irq_wake(rtc->irq); - return 0; -} - -static int tps80031_rtc_resume(struct device *dev) -{ - struct tps80031_rtc *rtc = dev_get_drvdata(dev); - - if (device_may_wakeup(dev)) - disable_irq_wake(rtc->irq); - return 0; -}; -#endif - -static SIMPLE_DEV_PM_OPS(tps80031_pm_ops, tps80031_rtc_suspend, - tps80031_rtc_resume); - -static struct platform_driver tps80031_rtc_driver = { - .driver = { - .name = "tps80031-rtc", - .pm = &tps80031_pm_ops, - }, - .probe = tps80031_rtc_probe, -}; - -module_platform_driver(tps80031_rtc_driver); - -MODULE_ALIAS("platform:tps80031-rtc"); -MODULE_DESCRIPTION("TI TPS80031/TPS80032 RTC driver"); -MODULE_AUTHOR("Laxman Dewangan "); -MODULE_LICENSE("GPL v2"); From dba28c37f23a09fc32dbc37463ddb2feb3886f98 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Thu, 21 Oct 2021 23:22:54 +0300 Subject: [PATCH 170/433] rtc: s3c: Remove usage of devm_rtc_device_register() devm_rtc_device_register() is deprecated. Use devm_rtc_allocate_device() and devm_rtc_register_device() API instead. This change doesn't change the behavior, but allows for further improvements. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211021202256.28517-2-semen.protsenko@linaro.org --- drivers/rtc/rtc-s3c.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index e57d3ca70a78..10e591794276 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -447,15 +447,18 @@ static int s3c_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - /* register RTC and exit */ - info->rtc = devm_rtc_device_register(&pdev->dev, "s3c", &s3c_rtcops, - THIS_MODULE); + info->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(info->rtc)) { - dev_err(&pdev->dev, "cannot attach rtc\n"); ret = PTR_ERR(info->rtc); goto err_nortc; } + info->rtc->ops = &s3c_rtcops; + + ret = devm_rtc_register_device(info->rtc); + if (ret) + goto err_nortc; + ret = devm_request_irq(&pdev->dev, info->irq_alarm, s3c_rtc_alarmirq, 0, "s3c2410-rtc alarm", info); if (ret) { From e4a1444e10cbda2892a4ea7325ef5efa47c75cfb Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Thu, 21 Oct 2021 23:22:55 +0300 Subject: [PATCH 171/433] rtc: s3c: Extract read/write IO into separate functions Create dedicated functions for I/O operations and BCD conversion. It can be useful to separate those from representation conversion and other stuff found in RTC callbacks. This patch does not introduce any functional changes, it's merely refactoring change. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211021202256.28517-3-semen.protsenko@linaro.org --- drivers/rtc/rtc-s3c.c | 96 +++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 36 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 10e591794276..d1baf655c008 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -127,10 +127,9 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled) return ret; } -/* Time read/write */ -static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) +/* Read time from RTC and convert it from BCD */ +static int s3c_rtc_read_time(struct s3c_rtc *info, struct rtc_time *tm) { - struct s3c_rtc *info = dev_get_drvdata(dev); unsigned int have_retried = 0; int ret; @@ -139,54 +138,40 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) return ret; retry_get_time: - rtc_tm->tm_min = readb(info->base + S3C2410_RTCMIN); - rtc_tm->tm_hour = readb(info->base + S3C2410_RTCHOUR); - rtc_tm->tm_mday = readb(info->base + S3C2410_RTCDATE); - rtc_tm->tm_mon = readb(info->base + S3C2410_RTCMON); - rtc_tm->tm_year = readb(info->base + S3C2410_RTCYEAR); - rtc_tm->tm_sec = readb(info->base + S3C2410_RTCSEC); + tm->tm_min = readb(info->base + S3C2410_RTCMIN); + tm->tm_hour = readb(info->base + S3C2410_RTCHOUR); + tm->tm_mday = readb(info->base + S3C2410_RTCDATE); + tm->tm_mon = readb(info->base + S3C2410_RTCMON); + tm->tm_year = readb(info->base + S3C2410_RTCYEAR); + tm->tm_sec = readb(info->base + S3C2410_RTCSEC); - /* the only way to work out whether the system was mid-update + /* + * The only way to work out whether the system was mid-update * when we read it is to check the second counter, and if it * is zero, then we re-try the entire read */ - - if (rtc_tm->tm_sec == 0 && !have_retried) { + if (tm->tm_sec == 0 && !have_retried) { have_retried = 1; goto retry_get_time; } - rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec); - rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min); - rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour); - rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday); - rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon); - rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year); - s3c_rtc_disable_clk(info); - rtc_tm->tm_year += 100; - rtc_tm->tm_mon -= 1; + tm->tm_sec = bcd2bin(tm->tm_sec); + tm->tm_min = bcd2bin(tm->tm_min); + tm->tm_hour = bcd2bin(tm->tm_hour); + tm->tm_mday = bcd2bin(tm->tm_mday); + tm->tm_mon = bcd2bin(tm->tm_mon); + tm->tm_year = bcd2bin(tm->tm_year); - dev_dbg(dev, "read time %ptR\n", rtc_tm); return 0; } -static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) +/* Convert time to BCD and write it to RTC */ +static int s3c_rtc_write_time(struct s3c_rtc *info, const struct rtc_time *tm) { - struct s3c_rtc *info = dev_get_drvdata(dev); - int year = tm->tm_year - 100; int ret; - dev_dbg(dev, "set time %ptR\n", tm); - - /* we get around y2k by simply not supporting it */ - - if (year < 0 || year >= 100) { - dev_err(dev, "rtc only supports 100 years\n"); - return -EINVAL; - } - ret = s3c_rtc_enable_clk(info); if (ret) return ret; @@ -195,14 +180,53 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) writeb(bin2bcd(tm->tm_min), info->base + S3C2410_RTCMIN); writeb(bin2bcd(tm->tm_hour), info->base + S3C2410_RTCHOUR); writeb(bin2bcd(tm->tm_mday), info->base + S3C2410_RTCDATE); - writeb(bin2bcd(tm->tm_mon + 1), info->base + S3C2410_RTCMON); - writeb(bin2bcd(year), info->base + S3C2410_RTCYEAR); + writeb(bin2bcd(tm->tm_mon), info->base + S3C2410_RTCMON); + writeb(bin2bcd(tm->tm_year), info->base + S3C2410_RTCYEAR); s3c_rtc_disable_clk(info); return 0; } +static int s3c_rtc_gettime(struct device *dev, struct rtc_time *tm) +{ + struct s3c_rtc *info = dev_get_drvdata(dev); + int ret; + + ret = s3c_rtc_read_time(info, tm); + if (ret) + return ret; + + /* Convert internal representation to actual date/time */ + tm->tm_year += 100; + tm->tm_mon -= 1; + + dev_dbg(dev, "read time %ptR\n", tm); + return 0; +} + +static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) +{ + struct s3c_rtc *info = dev_get_drvdata(dev); + struct rtc_time rtc_tm = *tm; + + dev_dbg(dev, "set time %ptR\n", tm); + + /* + * Convert actual date/time to internal representation. + * We get around Y2K by simply not supporting it. + */ + rtc_tm.tm_year -= 100; + rtc_tm.tm_mon += 1; + + if (rtc_tm.tm_year < 0 || rtc_tm.tm_year >= 100) { + dev_err(dev, "rtc only supports 100 years\n"); + return -EINVAL; + } + + return s3c_rtc_write_time(info, &rtc_tm); +} + static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) { struct s3c_rtc *info = dev_get_drvdata(dev); From a5feda3b361e11b291786d5c4ff86d4b9a55498f Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Thu, 21 Oct 2021 23:22:56 +0300 Subject: [PATCH 172/433] rtc: s3c: Add time range This RTC driver starts counting from 2000 to avoid Y2K problem. Also it only supports 100 years range for all RTCs. Provide that info to RTC framework. Also remove check for 100 years range in s3c_rtc_settime(), as RTC core won't pass any invalid values to the driver, now that correct range is set. Here is the rationale on 100 years range limitation. Info on different Samsung RTCs (credit goes to Krzysztof Kozlowski): - All S3C chips have only 8-bit wide year register (can store 100 years range in BCD format) - S5Pv210 and Exynos chips have 12-bit year register (can store 1000 years range in BCD format) But in reality we usually can't make use of those 12 bits either: - RTCs might think that both 2000 and 2100 years are leap years. So when the YEAR register is 0, RTC goes from 28 Feb to 29 Feb, and when the YEAR register is 100, RTC also goes from 28 Feb to 29 Feb. This is of course incorrect: RTC breaks leap year criteria, which breaks the time contiguity, which leads to inability to use the RTC after year of 2099. It was found for example on Exynos850 SoC. - Despite having 12 bits for holding the year value, RTC might overflow the year value internally much earlier. For example, on Exynos850 the RTC overflows when YEAR=159, making the next YEAR=0. This way RTC actually has range of 160 years, not 1000 as one may think. All that said, there is no sense in trying to increase the time range for more than 100 years on RTCs that seem capable of that. It also doesn't have too much practical value -- current hardware will be probably obsolete by 2100. Tested manually on Exynos850 RTC: $ date -s "1999-12-31 23:59:50" $ hwclock -w -f /dev/rtc0 $ date -s "2100-01-01 00:00:00" $ hwclock -w -f /dev/rtc0 $ date -s "2000-01-01 00:00:00" $ hwclock -w -f /dev/rtc0 $ hwclock -r -f /dev/rtc0 $ date -s "2099-12-31 23:59:50" $ hwclock -w -f /dev/rtc0 $ hwclock -r -f /dev/rtc0 Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211021202256.28517-4-semen.protsenko@linaro.org --- drivers/rtc/rtc-s3c.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index d1baf655c008..db529733c9c4 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -219,11 +219,6 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) rtc_tm.tm_year -= 100; rtc_tm.tm_mon += 1; - if (rtc_tm.tm_year < 0 || rtc_tm.tm_year >= 100) { - dev_err(dev, "rtc only supports 100 years\n"); - return -EINVAL; - } - return s3c_rtc_write_time(info, &rtc_tm); } @@ -478,6 +473,8 @@ static int s3c_rtc_probe(struct platform_device *pdev) } info->rtc->ops = &s3c_rtcops; + info->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + info->rtc->range_max = RTC_TIMESTAMP_END_2099; ret = devm_rtc_register_device(info->rtc); if (ret) From 95bf9d646c3c3f95cb0be7e703b371db8da5be68 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:51 -0500 Subject: [PATCH 173/433] signal/mips: Update (_save|_restore)_fp_context to fail with -EFAULT When an instruction to save or restore a register from the stack fails in _save_fp_context or _restore_fp_context return with -EFAULT. This change was made to r2300_fpu.S[1] but it looks like it got lost with the introduction of EX2[2]. This is also what the other implementation of _save_fp_context and _restore_fp_context in r4k_fpu.S does, and what is needed for the callers to be able to handle the error. Furthermore calling do_exit(SIGSEGV) from bad_stack is wrong because it does not terminate the entire process it just terminates a single thread. As the changed code was the only caller of arch/mips/kernel/syscall.c:bad_stack remove the problematic and now unused helper function. Cc: Thomas Bogendoerfer Cc: Maciej Rozycki Cc: linux-mips@vger.kernel.org [1] 35938a00ba86 ("MIPS: Fix ISA I FP sigcontext access violation handling") [2] f92722dc4545 ("MIPS: Correct MIPS I FP sigcontext layout") Cc: stable@vger.kernel.org Fixes: f92722dc4545 ("MIPS: Correct MIPS I FP sigcontext layout") Acked-by: Maciej W. Rozycki Acked-by: Thomas Bogendoerfer Link: https://lkml.kernel.org/r/20211020174406.17889-5-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/mips/kernel/r2300_fpu.S | 4 ++-- arch/mips/kernel/syscall.c | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 12e58053544f..cbf6db98cfb3 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -29,8 +29,8 @@ #define EX2(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,bad_stack; \ - PTR 9b+4,bad_stack; \ + PTR 9b,fault; \ + PTR 9b+4,fault; \ .previous .set mips1 diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 2afa3eef486a..5512cd586e6e 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -240,12 +240,3 @@ SYSCALL_DEFINE3(cachectl, char *, addr, int, nbytes, int, op) { return -ENOSYS; } - -/* - * If we ever come here the user sp is bad. Zap the process right away. - * Due to the bad stack signaling wouldn't work. - */ -asmlinkage void bad_stack(void) -{ - do_exit(SIGSEGV); -} From ce0ee4e6ac99606f3945f4d47775544edc3f7985 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:52 -0500 Subject: [PATCH 174/433] signal/sh: Use force_sig(SIGKILL) instead of do_group_exit(SIGKILL) Today the sh code allocates memory the first time a process uses the fpu. If that memory allocation fails, kill the affected task with force_sig(SIGKILL) rather than do_group_exit(SIGKILL). Calling do_group_exit from an exception handler can potentially lead to dead locks as do_group_exit is not designed to be called from interrupt context. Instead use force_sig(SIGKILL) to kill the userspace process. Sending signals in general and force_sig in particular has been tested from interrupt context so there should be no problems. Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Fixes: 0ea820cf9bf5 ("sh: Move over to dynamically allocated FPU context.") Link: https://lkml.kernel.org/r/20211020174406.17889-6-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sh/kernel/cpu/fpu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index ae354a2931e7..fd6db0ab1928 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -62,18 +62,20 @@ void fpu_state_restore(struct pt_regs *regs) } if (!tsk_used_math(tsk)) { - local_irq_enable(); + int ret; /* * does a slab alloc which can sleep */ - if (init_fpu(tsk)) { + local_irq_enable(); + ret = init_fpu(tsk); + local_irq_disable(); + if (ret) { /* * ran out of memory! */ - do_group_exit(SIGKILL); + force_sig(SIGKILL); return; } - local_irq_disable(); } grab_fpu(regs); From 83a1f27ad773b1d8f0460d3a676114c7651918cc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:53 -0500 Subject: [PATCH 175/433] signal/powerpc: On swapcontext failure force SIGSEGV If the register state may be partial and corrupted instead of calling do_exit, call force_sigsegv(SIGSEGV). Which properly kills the process with SIGSEGV and does not let any more userspace code execute, instead of just killing one thread of the process and potentially confusing everything. Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org History-tree: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Fixes: 756f1ae8a44e ("PPC32: Rework signal code and add a swapcontext system call.") Fixes: 04879b04bf50 ("[PATCH] ppc64: VMX (Altivec) support & signal32 rework, from Ben Herrenschmidt") Link: https://lkml.kernel.org/r/20211020174406.17889-7-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/powerpc/kernel/signal_32.c | 6 ++++-- arch/powerpc/kernel/signal_64.c | 9 ++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 0608581967f0..666f3da41232 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1062,8 +1062,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * or if another thread unmaps the region containing the context. * We kill the task with a SIGSEGV in this situation. */ - if (do_setcontext(new_ctx, regs, 0)) - do_exit(SIGSEGV); + if (do_setcontext(new_ctx, regs, 0)) { + force_sigsegv(SIGSEGV); + return -EFAULT; + } set_thread_flag(TIF_RESTOREALL); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 1831bba0582e..d8de622c9e4a 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -703,15 +703,18 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ - if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) - do_exit(SIGSEGV); + if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) { + force_sigsegv(SIGSEGV); + return -EFAULT; + } set_current_blocked(&set); if (!user_read_access_begin(new_ctx, ctx_size)) return -EFAULT; if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { user_read_access_end(); - do_exit(SIGSEGV); + force_sigsegv(SIGSEGV); + return -EFAULT; } user_read_access_end(); From 984bd71fb32032ef395a895916853964166b322b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:54 -0500 Subject: [PATCH 176/433] signal/sparc: In setup_tsb_params convert open coded BUG into BUG The function setup_tsb_params has exactly one caller tsb_grow. The function tsb_grow passes in a tsb_bytes value that is between 8192 and 1048576 inclusive, and is guaranteed to be a power of 2. The function setup_tsb_params verifies this property with a switch statement and then prints an error and causes the task to exit if this is not true. In practice that print statement can never be reached because tsb_grow never passes in a bad tsb_size. So if tsb_size ever gets a bad value that is a kernel bug. So replace the do_exit which is effectively an open coded version of BUG() with an actuall call to BUG(). Making it clearer that this is a case that can never, and should never happen. Cc: David Miller Cc: sparclinux@vger.kernel.org Link: https://lkml.kernel.org/r/20211020174406.17889-8-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sparc/mm/tsb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 0dce4b7ff73e..912205787161 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -266,7 +266,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign default: printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", current->comm, current->pid, tsb_bytes); - do_exit(SIGSEGV); + BUG(); } tte |= pte_sz_bits(page_sz); From 1a4d21a23c4ca7467726be7db9ae8077a62b2c62 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:55 -0500 Subject: [PATCH 177/433] signal/vm86_32: Replace open coded BUG_ON with an actual BUG_ON The function save_v86_state is only called when userspace was operating in vm86 mode before entering the kernel. Not having vm86 state in the task_struct should never happen. So transform the hand rolled BUG_ON into an actual BUG_ON to make it clear what is happening. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Cc: H Peter Anvin Link: https://lkml.kernel.org/r/20211020174406.17889-9-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/x86/kernel/vm86_32.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index e5a7a10a0164..63486da77272 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -106,10 +106,8 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) */ local_irq_enable(); - if (!vm86 || !vm86->user_vm86) { - pr_alert("no user_vm86: BAD\n"); - do_exit(SIGSEGV); - } + BUG_ON(!vm86 || !vm86->user_vm86); + set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask); user = vm86->user_vm86; From 1fbd60df8a852d9c55de8cd3621899cf4c72a5b7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:56 -0500 Subject: [PATCH 178/433] signal/vm86_32: Properly send SIGSEGV when the vm86 state cannot be saved. Update save_v86_state to always complete all of it's work except possibly some of the copies to userspace even if save_v86_state takes a fault. This ensures that the kernel is always in a sane state, even if userspace has done something silly. When save_v86_state takes a fault update it to force userspace to take a SIGSEGV and terminate the userspace application. As Andy pointed out in review of the first version of this change there are races between sigaction and the application terinating. Now that the code has been modified to always perform all save_v86_state's work (except possibly copying to userspace) those races do not matter from a kernel perspective. Forcing the userspace application to terminate (by resetting it's handler to SIGDFL) is there to keep everything as close to the current behavior as possible while removing the unique (and difficult to maintain) use of do_exit. If this new SIGSEGV happens during handle_signal the next time around the exit_to_user_mode_loop, SIGSEGV will be delivered to userspace. All of the callers of handle_vm86_trap and handle_vm86_fault run the exit_to_user_mode_loop before they return to userspace any signal sent to the current task during their execution will be delivered to the current task before that tasks exits to usermode. Cc: Andy Lutomirski Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Cc: H Peter Anvin v1: https://lkml.kernel.org/r/20211020174406.17889-10-ebiederm@xmission.com Link: https://lkml.kernel.org/r/877de1xcr6.fsf_-_@disp2133 Signed-off-by: Eric W. Biederman --- arch/x86/kernel/vm86_32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 63486da77272..933cafab7832 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -140,6 +140,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) user_access_end(); +exit_vm86: preempt_disable(); tsk->thread.sp0 = vm86->saved_sp0; tsk->thread.sysenter_cs = __KERNEL_CS; @@ -159,7 +160,8 @@ Efault_end: user_access_end(); Efault: pr_alert("could not access userspace vm86 info\n"); - do_exit(SIGSEGV); + force_sigsegv(SIGSEGV); + goto exit_vm86; } static int do_vm86_irq_handling(int subfunction, int irqnumber); From 1aaa557b2db95c9506ed0981bc34505c32d6b62b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 2 Oct 2021 17:02:23 -0700 Subject: [PATCH 179/433] m68k: set a default value for MEMORY_RESERVE 'make randconfig' can produce a .config file with "CONFIG_MEMORY_RESERVE=" (no value) since it has no default. When a subsequent 'make all' is done, kconfig restarts the config and prompts for a value for MEMORY_RESERVE. This breaks scripting/automation where there is no interactive user input. Add a default value for MEMORY_RESERVE. (Any integer value will work here for kconfig.) Fixes a kconfig warning: .config:214:warning: symbol value '' invalid for MEMORY_RESERVE * Restart config... Memory reservation (MiB) (MEMORY_RESERVE) [] (NEW) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") # from beginning of git history Signed-off-by: Randy Dunlap Reviewed-by: Geert Uytterhoeven Cc: Greg Ungerer Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Greg Ungerer --- arch/m68k/Kconfig.machine | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index 36fa0c3ef129..eeab4f3e6c19 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -203,6 +203,7 @@ config INIT_LCD config MEMORY_RESERVE int "Memory reservation (MiB)" depends on (UCSIMM || UCDIMM) + default 0 help Reserve certain memory regions on 68x328 based boards. From 6dbe88e93c351a6cf5b7c70850d7a1a7f67d83ab Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 4 Oct 2021 09:02:31 +0200 Subject: [PATCH 180/433] m68knommu: Remove MCPU32 config symbol As of commit a3595962d82495f5 ("m68knommu: remove obsolete 68360 support"), nothing selects MCPU32 anymore. Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Ungerer --- arch/m68k/Kconfig.cpu | 11 ----------- arch/m68k/include/asm/bitops.h | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 277d61a09463..0d00ef5117dc 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -53,17 +53,6 @@ config M68000 System-On-Chip devices (eg 68328, 68302, etc). It does not contain a paging MMU. -config MCPU32 - bool - select CPU_HAS_NO_BITFIELDS - select CPU_HAS_NO_CAS - select CPU_HAS_NO_UNALIGNED - select CPU_NO_EFFICIENT_FFS - help - The Freescale (was then Motorola) CPU32 is a CPU core that is - based on the 68020 processor. For the most part it is used in - System-On-Chip parts, and does not contain a paging MMU. - config M68020 bool "68020 support" depends on MMU diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index 7b414099e5fc..7b93e1fd8ffa 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -451,7 +451,7 @@ static inline unsigned long ffz(unsigned long word) * generic functions for those. */ #if (defined(__mcfisaaplus__) || defined(__mcfisac__)) && \ - !defined(CONFIG_M68000) && !defined(CONFIG_MCPU32) + !defined(CONFIG_M68000) static inline unsigned long __ffs(unsigned long x) { __asm__ __volatile__ ("bitrev %0; ff1 %0" From 01d29f87fcfef38d51ce2b473981a5c1e861ac0a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Oct 2021 21:56:40 -0400 Subject: [PATCH 181/433] NFSv4: Fix a regression in nfs_set_open_stateid_locked() If we already hold open state on the client, yet the server gives us a completely different stateid to the one we already hold, then we currently treat it as if it were an out-of-sequence update, and wait for 5 seconds for other updates to come in. This commit fixes the behaviour so that we immediately start processing of the new stateid, and then leave it to the call to nfs4_test_and_free_stateid() to decide what to do with the old stateid. Fixes: b4868b44c562 ("NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1c485edf1d07..1c94f54cab58 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1604,15 +1604,16 @@ static bool nfs_stateid_is_sequential(struct nfs4_state *state, { if (test_bit(NFS_OPEN_STATE, &state->flags)) { /* The common case - we're updating to a new sequence number */ - if (nfs4_stateid_match_other(stateid, &state->open_stateid) && - nfs4_stateid_is_next(&state->open_stateid, stateid)) { - return true; + if (nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (nfs4_stateid_is_next(&state->open_stateid, stateid)) + return true; + return false; } - } else { - /* This is the first OPEN in this generation */ - if (stateid->seqid == cpu_to_be32(1)) - return true; + /* The server returned a new stateid */ } + /* This is the first OPEN in this generation */ + if (stateid->seqid == cpu_to_be32(1)) + return true; return false; } From 9bc508cf0791c8e5a37696de1a046d746fcbd9d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:57 -0500 Subject: [PATCH 182/433] signal/s390: Use force_sigsegv in default_trap_handler Reading the history it is unclear why default_trap_handler calls do_exit. It is not even menthioned in the commit where the change happened. My best guess is that because it is unknown why the exception happened it was desired to guarantee the process never returned to userspace. Using do_exit(SIGSEGV) has the problem that it will only terminate one thread of a process, leaving the process in an undefined state. Use force_sigsegv(SIGSEGV) instead which effectively has the same behavior except that is uses the ordinary signal mechanism and terminates all threads of a process and is generally well defined. Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: linux-s390@vger.kernel.org Fixes: ca2ab03237ec ("[PATCH] s390: core changes") History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Reviewed-by: Christian Borntraeger Link: https://lkml.kernel.org/r/20211020174406.17889-11-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/s390/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index bcefc2173de4..51729ea2cf8e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { report_user_fault(regs, SIGSEGV, 0); - do_exit(SIGSEGV); + force_sigsegv(SIGSEGV); } else die(regs, "Unknown program exception"); } From 111e70490d2a673730b89c010b61cea2d982d121 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:58 -0500 Subject: [PATCH 183/433] exit/kthread: Have kernel threads return instead of calling do_exit In 2009 Oleg reworked[1] the kernel threads so that it is not necessary to call do_exit if you are not using kthread_stop(). Remove the explicit calls of do_exit and complete_and_exit (with a NULL completion) that were previously necessary. [1] 63706172f332 ("kthreads: rework kthread_stop()") Link: https://lkml.kernel.org/r/20211020174406.17889-12-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- drivers/firmware/stratix10-svc.c | 4 ++-- drivers/soc/ti/wkup_m3_ipc.c | 2 +- fs/ocfs2/journal.c | 5 +---- kernel/kthread.c | 2 +- net/batman-adv/tp_meter.c | 2 +- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 2a7687911c09..29c0a616b317 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -520,7 +520,7 @@ static int svc_normal_to_secure_thread(void *data) * physical address of memory block reserved by secure monitor software at * secure world. * - * svc_normal_to_secure_shm_thread() calls do_exit() directly since it is a + * svc_normal_to_secure_shm_thread() terminates directly since it is a * standlone thread for which no one will call kthread_stop() or return when * 'kthread_should_stop()' is true. */ @@ -544,7 +544,7 @@ static int svc_normal_to_secure_shm_thread(void *data) } complete(&sh_mem->sync_complete); - do_exit(0); + return 0; } /** diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 09abd17065ba..0733443a2631 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -426,7 +426,7 @@ static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc) else m3_ipc_state = m3_ipc; - do_exit(0); + return 0; } static int wkup_m3_ipc_probe(struct platform_device *pdev) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4f15750aac5d..329986f12db3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1497,10 +1497,7 @@ bail: if (quota_enabled) kfree(rm_quota); - /* no one is callint kthread_stop() for us so the kthread() api - * requires that we call do_exit(). And it isn't exported, but - * complete_and_exit() seems to be a minimal wrapper around it. */ - complete_and_exit(NULL, status); + return status; } void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) diff --git a/kernel/kthread.c b/kernel/kthread.c index 5b37a8567168..33e17beaa682 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -433,7 +433,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), * If thread is going to be bound on a particular cpu, give its node * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE. * When woken, the thread will run @threadfn() with @data as its - * argument. @threadfn() can either call do_exit() directly if it is a + * argument. @threadfn() can either return directly if it is a * standalone thread for which no one will call kthread_stop(), or * return when 'kthread_should_stop()' is true (which means * kthread_stop() has been called). The return value should be zero diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 56b9fe97b3b4..1252540cde17 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -890,7 +890,7 @@ out: batadv_tp_vars_put(tp_vars); - do_exit(0); + return 0; } /** From 26d5badbccddcc063dc5174a2baffd13a23322aa Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:59 -0500 Subject: [PATCH 184/433] signal: Implement force_fatal_sig Add a simple helper force_fatal_sig that causes a signal to be delivered to a process as if the signal handler was set to SIG_DFL. Reimplement force_sigsegv based upon this new helper. This fixes force_sigsegv so that when it forces the default signal handler to be used the code now forces the signal to be unblocked as well. Reusing the tested logic in force_sig_info_to_task that was built for force_sig_seccomp this makes the implementation trivial. This is interesting both because it makes force_sigsegv simpler and because there are a couple of buggy places in the kernel that call do_exit(SIGILL) or do_exit(SIGSYS) because there is no straight forward way today for those places to simply force the exit of a process with the chosen signal. Creating force_fatal_sig allows those places to be implemented with normal signal exits. Link: https://lkml.kernel.org/r/20211020174406.17889-13-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/sched/signal.h | 1 + kernel/signal.c | 26 +++++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index e5f4ce622ee6..e2dc9f119ada 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -338,6 +338,7 @@ extern int kill_pid(struct pid *pid, int sig, int priv); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int); +extern void force_fatal_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); diff --git a/kernel/signal.c b/kernel/signal.c index 952741f6d0f9..6a5e1802b9a2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1662,6 +1662,19 @@ void force_sig(int sig) } EXPORT_SYMBOL(force_sig); +void force_fatal_sig(int sig) +{ + struct kernel_siginfo info; + + clear_siginfo(&info); + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_KERNEL; + info.si_pid = 0; + info.si_uid = 0; + force_sig_info_to_task(&info, current, true); +} + /* * When things go south during signal handling, we * will force a SIGSEGV. And if the signal that caused @@ -1670,15 +1683,10 @@ EXPORT_SYMBOL(force_sig); */ void force_sigsegv(int sig) { - struct task_struct *p = current; - - if (sig == SIGSEGV) { - unsigned long flags; - spin_lock_irqsave(&p->sighand->siglock, flags); - p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; - spin_unlock_irqrestore(&p->sighand->siglock, flags); - } - force_sig(SIGSEGV); + if (sig == SIGSEGV) + force_fatal_sig(SIGSEGV); + else + force_sig(SIGSEGV); } int force_sig_fault_to_task(int sig, int code, void __user *addr From 941edc5bf174b67f94db19817cbeab0a93e0c32a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:00 -0500 Subject: [PATCH 185/433] exit/syscall_user_dispatch: Send ordinary signals on failure Use force_fatal_sig instead of calling do_exit directly. This ensures the ordinary signal handling path gets invoked, core dumps as appropriate get created, and for multi-threaded processes all of the threads are terminated not just a single thread. When asked Gabriel Krisman Bertazi said [1]: > ebiederm@xmission.com (Eric W. Biederman) asked: > > > Why does do_syscal_user_dispatch call do_exit(SIGSEGV) and > > do_exit(SIGSYS) instead of force_sig(SIGSEGV) and force_sig(SIGSYS)? > > > > Looking at the code these cases are not expected to happen, so I would > > be surprised if userspace depends on any particular behaviour on the > > failure path so I think we can change this. > > Hi Eric, > > There is not really a good reason, and the use case that originated the > feature doesn't rely on it. > > Unless I'm missing yet another problem and others correct me, I think > it makes sense to change it as you described. > > > Is using do_exit in this way something you copied from seccomp? > > I'm not sure, its been a while, but I think it might be just that. The > first prototype of SUD was implemented as a seccomp mode. If at some point it becomes interesting we could relax "force_fatal_sig(SIGSEGV)" to instead say "force_sig_fault(SIGSEGV, SEGV_MAPERR, sd->selector)". I avoid doing that in this patch to avoid making it possible to catch currently uncatchable signals. Cc: Gabriel Krisman Bertazi Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski [1] https://lkml.kernel.org/r/87mtr6gdvi.fsf@collabora.com Link: https://lkml.kernel.org/r/20211020174406.17889-14-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/entry/syscall_user_dispatch.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c index c240302f56e2..4508201847d2 100644 --- a/kernel/entry/syscall_user_dispatch.c +++ b/kernel/entry/syscall_user_dispatch.c @@ -47,14 +47,18 @@ bool syscall_user_dispatch(struct pt_regs *regs) * access_ok() is performed once, at prctl time, when * the selector is loaded by userspace. */ - if (unlikely(__get_user(state, sd->selector))) - do_exit(SIGSEGV); + if (unlikely(__get_user(state, sd->selector))) { + force_fatal_sig(SIGSEGV); + return true; + } if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW)) return false; - if (state != SYSCALL_DISPATCH_FILTER_BLOCK) - do_exit(SIGSYS); + if (state != SYSCALL_DISPATCH_FILTER_BLOCK) { + force_fatal_sig(SIGSYS); + return true; + } } sd->on_dispatch = true; From c317d306d55079525c9610267fdaf3a8a6d2f08b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:01 -0500 Subject: [PATCH 186/433] signal/sparc32: Exit with a fatal signal when try_to_clear_window_buffer fails The function try_to_clear_window_buffer is only called from rtrap_32.c. After it is called the signal pending state is retested, and signals are handled if TIF_SIGPENDING is set. This allows try_to_clear_window_buffer to call force_fatal_signal and then rely on the signal being delivered to kill the process, without any danger of returning to userspace, or otherwise using possible corrupt state on failure. The functional difference between force_fatal_sig and do_exit is that do_exit will only terminate a single thread, and will never trigger a core-dump. A multi-threaded program for which a single thread terminates unexpectedly is hard to reason about. Calling force_fatal_sig does not give userspace a chance to catch the signal, but otherwise is an ordinary fatal signal exit, and it will trigger a coredump of the offending process if core dumps are enabled. Cc: David Miller Cc: sparclinux@vger.kernel.org Link: https://lkml.kernel.org/r/20211020174406.17889-15-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sparc/kernel/windows.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index 69a6ba6e9293..bbbd40cc6b28 100644 --- a/arch/sparc/kernel/windows.c +++ b/arch/sparc/kernel/windows.c @@ -121,8 +121,10 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who) if ((sp & 7) || copy_to_user((char __user *) sp, &tp->reg_window[window], - sizeof(struct reg_window32))) - do_exit(SIGILL); + sizeof(struct reg_window32))) { + force_fatal_sig(SIGILL); + return; + } } tp->w_saved = 0; } From 086ec444f86660e103de8945d0dcae9b67132ac9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:02 -0500 Subject: [PATCH 187/433] signal/sparc32: In setup_rt_frame and setup_fram use force_fatal_sig Modify the 32bit version of setup_rt_frame and setup_frame to act similar to the 64bit version of setup_rt_frame and fail with a signal instead of calling do_exit. Replacing do_exit(SIGILL) with force_fatal_signal(SIGILL) ensures that the process will be terminated cleanly when the stack frame is invalid, instead of just killing off a single thread and leaving the process is a weird state. Cc: David Miller Cc: sparclinux@vger.kernel.org Link: https://lkml.kernel.org/r/20211020174406.17889-16-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sparc/kernel/signal_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 02f3ad55dfe3..cd677bc564a7 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -244,7 +244,7 @@ static int setup_frame(struct ksignal *ksig, struct pt_regs *regs, get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + force_fatal_sig(SIGILL); return -EINVAL; } @@ -336,7 +336,7 @@ static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, sf = (struct rt_signal_frame __user *) get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + force_fatal_sig(SIGILL); return -EINVAL; } From 695dd0d634df8903e5ead8aa08d326f63b23368a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:03 -0500 Subject: [PATCH 188/433] signal/x86: In emulate_vsyscall force a signal instead of calling do_exit Directly calling do_exit with a signal number has the problem that all of the side effects of the signal don't happen, such as killing all of the threads of a process instead of just the calling thread. So replace do_exit(SIGSYS) with force_fatal_sig(SIGSYS) which causes the signal handling to take it's normal path and work as expected. Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20211020174406.17889-17-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/x86/entry/vsyscall/vsyscall_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 1b40b9297083..0b6b277ee050 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -226,7 +226,8 @@ bool emulate_vsyscall(unsigned long error_code, if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); - do_exit(SIGSYS); + force_fatal_sig(SIGSYS); + return true; } regs->orig_ax = -1; if (tmp) From 501c88722797a1923145658cce85fb3661121832 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:04 -0500 Subject: [PATCH 189/433] exit/rtl8723bs: Replace the macro thread_exit with a simple return 0 Every place thread_exit is called is at the end of a function started with kthread_run. The code in kthread_run has arranged things so a kernel thread can just return and do_exit will be called. So just have the threads return instead of calling complete_and_exit. Link: https://lkml.kernel.org/r/20211020174406.17889-18-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- drivers/staging/rtl8723bs/core/rtw_cmd.c | 2 +- drivers/staging/rtl8723bs/core/rtw_xmit.c | 2 +- drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c | 2 +- drivers/staging/rtl8723bs/include/osdep_service_linux.h | 2 -- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_cmd.c b/drivers/staging/rtl8723bs/core/rtw_cmd.c index d494c06dab96..8e69f9c10f5c 100644 --- a/drivers/staging/rtl8723bs/core/rtw_cmd.c +++ b/drivers/staging/rtl8723bs/core/rtw_cmd.c @@ -524,7 +524,7 @@ post_process: complete(&pcmdpriv->terminate_cmdthread_comp); atomic_set(&(pcmdpriv->cmdthd_running), false); - thread_exit(); + return 0; } /* diff --git a/drivers/staging/rtl8723bs/core/rtw_xmit.c b/drivers/staging/rtl8723bs/core/rtw_xmit.c index 79e4d7df1ef5..0c357bc2478c 100644 --- a/drivers/staging/rtl8723bs/core/rtw_xmit.c +++ b/drivers/staging/rtl8723bs/core/rtw_xmit.c @@ -2491,7 +2491,7 @@ int rtw_xmit_thread(void *context) complete(&padapter->xmitpriv.terminate_xmitthread_comp); - thread_exit(); + return 0; } void rtw_sctx_init(struct submit_ctx *sctx, int timeout_ms) diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c index 156d6aba18ca..2b9a41b12d1f 100644 --- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c +++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c @@ -435,7 +435,7 @@ int rtl8723bs_xmit_thread(void *context) complete(&pxmitpriv->SdioXmitTerminate); - thread_exit(); + return 0; } s32 rtl8723bs_mgnt_xmit( diff --git a/drivers/staging/rtl8723bs/include/osdep_service_linux.h b/drivers/staging/rtl8723bs/include/osdep_service_linux.h index 3492ec1efd1e..188ed7e26550 100644 --- a/drivers/staging/rtl8723bs/include/osdep_service_linux.h +++ b/drivers/staging/rtl8723bs/include/osdep_service_linux.h @@ -45,8 +45,6 @@ spinlock_t lock; }; - #define thread_exit() complete_and_exit(NULL, 0) - static inline struct list_head *get_next(struct list_head *list) { return list->next; From 99d7ef1e4792de3d8658f967539bdc6df2b03fa4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:05 -0500 Subject: [PATCH 190/433] exit/rtl8712: Replace the macro thread_exit with a simple return 0 The macro thread_exit is called is at the end of a function started with kthread_run. The code in kthread_run has arranged things so a kernel thread can just return and do_exit will be called. So just have the cmd_thread return instead of calling complete_and_exit. Link: https://lkml.kernel.org/r/20211020174406.17889-19-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- drivers/staging/rtl8712/osdep_service.h | 1 - drivers/staging/rtl8712/rtl8712_cmd.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/rtl8712/osdep_service.h b/drivers/staging/rtl8712/osdep_service.h index d33ddffb7ad9..0d9bb42cbc58 100644 --- a/drivers/staging/rtl8712/osdep_service.h +++ b/drivers/staging/rtl8712/osdep_service.h @@ -37,7 +37,6 @@ struct __queue { #define _pkt struct sk_buff #define _buffer unsigned char -#define thread_exit() complete_and_exit(NULL, 0) #define _init_queue(pqueue) \ do { \ diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index e9294e1ed06e..2326aae6709e 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -393,7 +393,7 @@ _next: r8712_free_cmd_obj(pcmd); } while (1); complete(&pcmdpriv->terminate_cmdthread_comp); - thread_exit(); + return 0; } void r8712_event_handle(struct _adapter *padapter, __le32 *peventbuf) From 0fdc0c4279c822eda8f5ce3b7689d34f4cac2e82 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:06 -0500 Subject: [PATCH 191/433] exit/r8188eu: Replace the macro thread_exit with a simple return 0 The macro thread_exit is called is at the end of functions started with kthread_run. The code in kthread_run has arranged things so a kernel thread can just return and do_exit will be called. So just have rtw_cmd_thread and mp_xmit_packet_thread return instead of calling complete_and_exit. Link: https://lkml.kernel.org/r/20211020174406.17889-20-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- drivers/staging/r8188eu/core/rtw_cmd.c | 2 +- drivers/staging/r8188eu/core/rtw_mp.c | 2 +- drivers/staging/r8188eu/include/osdep_service.h | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_cmd.c b/drivers/staging/r8188eu/core/rtw_cmd.c index ce73ac7cf973..d37c9463eecc 100644 --- a/drivers/staging/r8188eu/core/rtw_cmd.c +++ b/drivers/staging/r8188eu/core/rtw_cmd.c @@ -347,7 +347,7 @@ post_process: up(&pcmdpriv->terminate_cmdthread_sema); - thread_exit(); + return 0; } u8 rtw_setstandby_cmd(struct adapter *padapter, uint action) diff --git a/drivers/staging/r8188eu/core/rtw_mp.c b/drivers/staging/r8188eu/core/rtw_mp.c index dabdd0406f30..3945c4efe45a 100644 --- a/drivers/staging/r8188eu/core/rtw_mp.c +++ b/drivers/staging/r8188eu/core/rtw_mp.c @@ -580,7 +580,7 @@ exit: pmptx->pallocated_buf = NULL; pmptx->stop = 1; - thread_exit(); + return 0; } void fill_txdesc_for_mp(struct adapter *padapter, struct tx_desc *ptxdesc) diff --git a/drivers/staging/r8188eu/include/osdep_service.h b/drivers/staging/r8188eu/include/osdep_service.h index 029aa4e92c9b..afbffb551f9b 100644 --- a/drivers/staging/r8188eu/include/osdep_service.h +++ b/drivers/staging/r8188eu/include/osdep_service.h @@ -49,8 +49,6 @@ struct __queue { spinlock_t lock; }; -#define thread_exit() complete_and_exit(NULL, 0) - static inline struct list_head *get_list_head(struct __queue *queue) { return (&(queue->queue)); From e21294a7aaae32c5d7154b187113a04db5852e37 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 25 Oct 2021 10:50:57 -0500 Subject: [PATCH 192/433] signal: Replace force_sigsegv(SIGSEGV) with force_fatal_sig(SIGSEGV) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that force_fatal_sig exists it is unnecessary and a bit confusing to use force_sigsegv in cases where the simpler force_fatal_sig is wanted. So change every instance we can to make the code clearer. Acked-by: Geert Uytterhoeven Reviewed-by: Philippe Mathieu-Daudé Link: https://lkml.kernel.org/r/877de7jrev.fsf@disp2133 Signed-off-by: "Eric W. Biederman" --- arch/arc/kernel/process.c | 2 +- arch/m68k/kernel/traps.c | 2 +- arch/powerpc/kernel/signal_32.c | 2 +- arch/powerpc/kernel/signal_64.c | 4 ++-- arch/s390/kernel/traps.c | 2 +- arch/um/kernel/trap.c | 2 +- arch/x86/kernel/vm86_32.c | 2 +- fs/exec.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 3793876f42d9..8e90052f6f05 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -294,7 +294,7 @@ int elf_check_arch(const struct elf32_hdr *x) eflags = x->e_flags; if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) { pr_err("ABI mismatch - you need newer toolchain\n"); - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); return 0; } diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 5b19fcdcd69e..74045d164ddb 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -1150,7 +1150,7 @@ asmlinkage void set_esp0(unsigned long ssp) */ asmlinkage void fpsp040_die(void) { - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); } #ifdef CONFIG_M68KFPU_EMU diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 666f3da41232..933ab95805a6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1063,7 +1063,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ if (do_setcontext(new_ctx, regs, 0)) { - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); return -EFAULT; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d8de622c9e4a..8ead9b3f47c6 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -704,7 +704,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, */ if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) { - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); return -EFAULT; } set_current_blocked(&set); @@ -713,7 +713,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, return -EFAULT; if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { user_read_access_end(); - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); return -EFAULT; } user_read_access_end(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 51729ea2cf8e..01a7c68dcfb6 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { report_user_fault(regs, SIGSEGV, 0); - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); } else die(regs, "Unknown program exception"); } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 3198c4767387..c32efb09db21 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -158,7 +158,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip) void fatal_sigsegv(void) { - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); do_signal(¤t->thread.regs); /* * This is to tell gcc that we're not returning - do_signal diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 933cafab7832..f14f69d7aa3c 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -160,7 +160,7 @@ Efault_end: user_access_end(); Efault: pr_alert("could not access userspace vm86 info\n"); - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); goto exit_vm86; } diff --git a/fs/exec.c b/fs/exec.c index a098c133d8d7..ac7b51b51f38 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1852,7 +1852,7 @@ out: * SIGSEGV. */ if (bprm->point_of_no_return && !fatal_signal_pending(current)) - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); out_unmark: current->fs->in_exec = 0; From ea9afca88bbea26f23697b3305789f77f0341d23 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Oct 2021 11:02:20 -0400 Subject: [PATCH 193/433] SUNRPC: Replace use of socket sk_callback_lock with sock_lock Since we do things like setting flags, etc it really is more appropriate to use sock_lock(). Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 04f1b78bcbca..1c42153025dd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1153,14 +1153,13 @@ static void xs_error_report(struct sock *sk) struct sock_xprt *transport; struct rpc_xprt *xprt; - read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) - goto out; + return; transport = container_of(xprt, struct sock_xprt, xprt); transport->xprt_err = -sk->sk_err; if (transport->xprt_err == 0) - goto out; + return; dprintk("RPC: xs_error_report client %p, error=%d...\n", xprt, -transport->xprt_err); trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err); @@ -1168,8 +1167,6 @@ static void xs_error_report(struct sock *sk) /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */ smp_mb__before_atomic(); xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); - out: - read_unlock_bh(&sk->sk_callback_lock); } static void xs_reset_transport(struct sock_xprt *transport) @@ -1188,7 +1185,7 @@ static void xs_reset_transport(struct sock_xprt *transport) kernel_sock_shutdown(sock, SHUT_RDWR); mutex_lock(&transport->recv_mutex); - write_lock_bh(&sk->sk_callback_lock); + lock_sock(sk); transport->inet = NULL; transport->sock = NULL; transport->file = NULL; @@ -1197,10 +1194,10 @@ static void xs_reset_transport(struct sock_xprt *transport) xs_restore_old_callbacks(transport, sk); xprt_clear_connected(xprt); - write_unlock_bh(&sk->sk_callback_lock); xs_sock_reset_connection_flags(xprt); /* Reset stream record info */ xs_stream_reset_connect(transport); + release_sock(sk); mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); @@ -1364,7 +1361,6 @@ static void xs_data_ready(struct sock *sk) { struct rpc_xprt *xprt; - read_lock_bh(&sk->sk_callback_lock); dprintk("RPC: xs_data_ready...\n"); xprt = xprt_from_sock(sk); if (xprt != NULL) { @@ -1379,7 +1375,6 @@ static void xs_data_ready(struct sock *sk) if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) queue_work(xprtiod_workqueue, &transport->recv_worker); } - read_unlock_bh(&sk->sk_callback_lock); } /* @@ -1408,9 +1403,8 @@ static void xs_tcp_state_change(struct sock *sk) struct rpc_xprt *xprt; struct sock_xprt *transport; - read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) - goto out; + return; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n", sk->sk_state, xprt_connected(xprt), @@ -1471,8 +1465,6 @@ static void xs_tcp_state_change(struct sock *sk) /* Trigger the socket release */ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); } - out: - read_unlock_bh(&sk->sk_callback_lock); } static void xs_write_space(struct sock *sk) @@ -1511,13 +1503,9 @@ out: */ static void xs_udp_write_space(struct sock *sk) { - read_lock_bh(&sk->sk_callback_lock); - /* from net/core/sock.c:sock_def_write_space */ if (sock_writeable(sk)) xs_write_space(sk); - - read_unlock_bh(&sk->sk_callback_lock); } /** @@ -1532,13 +1520,9 @@ static void xs_udp_write_space(struct sock *sk) */ static void xs_tcp_write_space(struct sock *sk) { - read_lock_bh(&sk->sk_callback_lock); - /* from net/core/stream.c:sk_stream_write_space */ if (sk_stream_is_writeable(sk)) xs_write_space(sk); - - read_unlock_bh(&sk->sk_callback_lock); } static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) @@ -1833,7 +1817,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, if (!transport->inet) { struct sock *sk = sock->sk; - write_lock_bh(&sk->sk_callback_lock); + lock_sock(sk); xs_save_old_callbacks(transport, sk); @@ -1849,7 +1833,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, transport->sock = sock; transport->inet = sk; - write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); } xs_stream_start_connect(transport); @@ -2031,7 +2015,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!transport->inet) { struct sock *sk = sock->sk; - write_lock_bh(&sk->sk_callback_lock); + lock_sock(sk); xs_save_old_callbacks(transport, sk); @@ -2048,7 +2032,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_set_memalloc(xprt); - write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); } xs_udp_do_set_buffer_size(xprt); @@ -2194,7 +2178,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_tcp_set_socket_timeouts(xprt, sock); tcp_sock_set_nodelay(sk); - write_lock_bh(&sk->sk_callback_lock); + lock_sock(sk); xs_save_old_callbacks(transport, sk); @@ -2214,7 +2198,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) transport->sock = sock; transport->inet = sk; - write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); } if (!xprt_bound(xprt)) From 280254b605ffb6ec88f33b43a360aa6b5247bef7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Oct 2021 12:05:48 -0400 Subject: [PATCH 194/433] SUNRPC: Clean up xs_tcp_setup_sock() Move the error handling into a single switch statement for clarity. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 68 ++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 40 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1c42153025dd..aa293e4a77fa 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2158,7 +2158,6 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - int ret = -ENOTCONN; if (!transport->inet) { struct sock *sk = sock->sk; @@ -2202,7 +2201,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } if (!xprt_bound(xprt)) - goto out; + return -ENOTCONN; xs_set_memalloc(xprt); @@ -2210,22 +2209,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* Tell the socket layer to start connecting... */ set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); - ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); - switch (ret) { - case 0: - xs_set_srcport(transport, sock); - fallthrough; - case -EINPROGRESS: - /* SYN_SENT! */ - if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) - xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; - break; - case -EADDRNOTAVAIL: - /* Source port number is unavailable. Try a new one! */ - transport->srcport = 0; - } -out: - return ret; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); } /** @@ -2240,14 +2224,14 @@ static void xs_tcp_setup_socket(struct work_struct *work) container_of(work, struct sock_xprt, connect_worker.work); struct socket *sock = transport->sock; struct rpc_xprt *xprt = &transport->xprt; - int status = -EIO; + int status; if (!sock) { sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP, true); if (IS_ERR(sock)) { - status = PTR_ERR(sock); + xprt_wake_pending_tasks(xprt, PTR_ERR(sock)); goto out; } } @@ -2264,21 +2248,21 @@ static void xs_tcp_setup_socket(struct work_struct *work) xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { - default: - printk("%s: connect returned unhandled error %d\n", - __func__, status); - fallthrough; - case -EADDRNOTAVAIL: - /* We're probably in TIME_WAIT. Get rid of existing socket, - * and retry - */ - xs_tcp_force_close(xprt); - break; case 0: + xs_set_srcport(transport, sock); + fallthrough; case -EINPROGRESS: + /* SYN_SENT! */ + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + fallthrough; case -EALREADY: - xprt_unlock_connect(xprt, transport); - return; + goto out_unlock; + case -EADDRNOTAVAIL: + /* Source port number is unavailable. Try a new one! */ + transport->srcport = 0; + status = -EAGAIN; + break; case -EINVAL: /* Happens, for instance, if the user specified a link * local IPv6 address without a scope-id. @@ -2290,18 +2274,22 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: - /* xs_tcp_force_close() wakes tasks with a fixed error code. - * We need to wake them first to ensure the correct error code. - */ - xprt_wake_pending_tasks(xprt, status); - xs_tcp_force_close(xprt); - goto out; + break; + default: + printk("%s: connect returned unhandled error %d\n", + __func__, status); + status = -EAGAIN; } - status = -EAGAIN; + + /* xs_tcp_force_close() wakes tasks with a fixed error code. + * We need to wake them first to ensure the correct error code. + */ + xprt_wake_pending_tasks(xprt, status); + xs_tcp_force_close(xprt); out: xprt_clear_connecting(xprt); +out_unlock: xprt_unlock_connect(xprt, transport); - xprt_wake_pending_tasks(xprt, status); } /** From 6d91929a6fa6b340241bf09ded8740cd439d4df3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 1 Nov 2021 15:17:53 -0400 Subject: [PATCH 195/433] nfsd: document server-to-server-copy parameters Signed-off-by: J. Bruce Fields --- Documentation/admin-guide/kernel-parameters.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 91ba391f9b32..14bc3f0b0149 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3243,6 +3243,19 @@ driver. A non-zero value sets the minimum interval in seconds between layoutstats transmissions. + nfsd.inter_copy_offload_enable = + [NFSv4.2] When set to 1, the server will support + server-to-server copies for which this server is + the destination of the copy. + + nfsd.nfsd4_ssc_umount_timeout = + [NFSv4.2] When used as the destination of a + server-to-server copy, knfsd temporarily mounts + the source server. It caches the mount in case + it will be needed again, and discards it if not + used for the number of milliseconds specified by + this parameter. + nfsd.nfs4_disable_idmapping= [NFSv4] When set to the default of '1', the NFSv4 server will return only numeric uids and gids to @@ -3250,6 +3263,7 @@ and gids from such clients. This is intended to ease migration from NFSv2/v3. + nmi_backtrace.backtrace_idle [KNL] Dump stacks even of idle CPUs in response to an NMI stack-backtrace request. From dc155617fa5bf5bddbeb99dc781dd011ed23b90f Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 3 Apr 2021 11:07:37 -0700 Subject: [PATCH 196/433] apparmor: Fix internal policy capable check for policy management The check was incorrectly treating a returned error as a boolean. Fixes: 31ec99e13346 ("apparmor: switch to apparmor to internal capable check for policy management") Signed-off-by: John Johansen --- security/apparmor/policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 9ce93966401a..4da4f3df9d4a 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -678,7 +678,7 @@ bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns) bool aa_policy_admin_capable(struct aa_label *label, struct aa_ns *ns) { struct user_namespace *user_ns = current_user_ns(); - bool capable = policy_ns_capable(label, user_ns, CAP_MAC_ADMIN); + bool capable = policy_ns_capable(label, user_ns, CAP_MAC_ADMIN) == 0; AA_DEBUG("cap_mac_admin? %d\n", capable); AA_DEBUG("policy locked? %d\n", aa_g_lock_policy); From 80479eb862102f9513e93fcf726c78cc0be2e3b2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 26 Oct 2021 12:56:55 -0400 Subject: [PATCH 197/433] nfsd4: remove obselete comment Mandatory locking has been removed. And the rest of this comment is redundant with the code. Reported-by: Jeff layton Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 5c76d6813517..c99857689e2c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -730,9 +730,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, path.dentry = fhp->fh_dentry; inode = d_inode(path.dentry); - /* Disallow write access to files with the append-only bit set - * or any access when mandatory locking enabled - */ err = nfserr_perm; if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE)) goto out; From 9d2d48bbbdabf7b2f029369c4f926d133c1d47ad Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Oct 2021 16:16:56 -0400 Subject: [PATCH 198/433] NFS: Move generic FS show macros to global header Refactor: Surface useful show_ macros for use by other trace subsystems. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4trace.h | 67 ++++++--------------- fs/nfs/nfstrace.h | 80 +++++-------------------- include/trace/events/fs.h | 122 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 113 deletions(-) create mode 100644 include/trace/events/fs.h diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index d4f061046c09..424d9cd4c196 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -11,6 +11,8 @@ #include #include +#include + TRACE_DEFINE_ENUM(EPERM); TRACE_DEFINE_ENUM(ENOENT); TRACE_DEFINE_ENUM(EIO); @@ -314,19 +316,6 @@ TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) -#define show_open_flags(flags) \ - __print_flags(flags, "|", \ - { O_CREAT, "O_CREAT" }, \ - { O_EXCL, "O_EXCL" }, \ - { O_TRUNC, "O_TRUNC" }, \ - { O_DIRECT, "O_DIRECT" }) - -#define show_fmode_flags(mode) \ - __print_flags(mode, "|", \ - { ((__force unsigned long)FMODE_READ), "READ" }, \ - { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ - { ((__force unsigned long)FMODE_EXEC), "EXEC" }) - #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ { NFS_ATTR_FATTR_TYPE, "TYPE" }, \ @@ -794,8 +783,8 @@ DECLARE_EVENT_CLASS(nfs4_open_event, TP_STRUCT__entry( __field(unsigned long, error) - __field(unsigned int, flags) - __field(unsigned int, fmode) + __field(unsigned long, flags) + __field(unsigned long, fmode) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -813,7 +802,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, __entry->error = -error; __entry->flags = flags; - __entry->fmode = (__force unsigned int)ctx->mode; + __entry->fmode = (__force unsigned long)ctx->mode; __entry->dev = ctx->dentry->d_sb->s_dev; if (!IS_ERR_OR_NULL(state)) { inode = state->inode; @@ -843,15 +832,15 @@ DECLARE_EVENT_CLASS(nfs4_open_event, ), TP_printk( - "error=%ld (%s) flags=%d (%s) fmode=%s " + "error=%ld (%s) flags=%lu (%s) fmode=%s " "fileid=%02x:%02x:%llu fhandle=0x%08x " "name=%02x:%02x:%llu/%s stateid=%d:0x%08x " "openstateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), __entry->flags, - show_open_flags(__entry->flags), - show_fmode_flags(__entry->fmode), + show_fs_fcntl_open_flags(__entry->flags), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -905,7 +894,7 @@ TRACE_EVENT(nfs4_cached_open, TP_printk( "fmode=%s fileid=%02x:%02x:%llu " "fhandle=0x%08x stateid=%d:0x%08x", - __entry->fmode ? show_fmode_flags(__entry->fmode) : + __entry->fmode ? show_fs_fmode_flags(__entry->fmode) : "closed", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -953,7 +942,7 @@ TRACE_EVENT(nfs4_close, "fhandle=0x%08x openstateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), - __entry->fmode ? show_fmode_flags(__entry->fmode) : + __entry->fmode ? show_fs_fmode_flags(__entry->fmode) : "closed", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -962,24 +951,6 @@ TRACE_EVENT(nfs4_close, ) ); -TRACE_DEFINE_ENUM(F_GETLK); -TRACE_DEFINE_ENUM(F_SETLK); -TRACE_DEFINE_ENUM(F_SETLKW); -TRACE_DEFINE_ENUM(F_RDLCK); -TRACE_DEFINE_ENUM(F_WRLCK); -TRACE_DEFINE_ENUM(F_UNLCK); - -#define show_lock_cmd(type) \ - __print_symbolic((int)type, \ - { F_GETLK, "GETLK" }, \ - { F_SETLK, "SETLK" }, \ - { F_SETLKW, "SETLKW" }) -#define show_lock_type(type) \ - __print_symbolic((int)type, \ - { F_RDLCK, "RDLCK" }, \ - { F_WRLCK, "WRLCK" }, \ - { F_UNLCK, "UNLCK" }) - DECLARE_EVENT_CLASS(nfs4_lock_event, TP_PROTO( const struct file_lock *request, @@ -992,8 +963,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, TP_STRUCT__entry( __field(unsigned long, error) - __field(int, cmd) - __field(char, type) + __field(unsigned long, cmd) + __field(unsigned long, type) __field(loff_t, start) __field(loff_t, end) __field(dev_t, dev) @@ -1026,8 +997,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, "stateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), - show_lock_cmd(__entry->cmd), - show_lock_type(__entry->type), + show_fs_fcntl_cmd(__entry->cmd), + show_fs_fcntl_lock_type(__entry->type), (long long)__entry->start, (long long)__entry->end, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1062,8 +1033,8 @@ TRACE_EVENT(nfs4_set_lock, TP_STRUCT__entry( __field(unsigned long, error) - __field(int, cmd) - __field(char, type) + __field(unsigned long, cmd) + __field(unsigned long, type) __field(loff_t, start) __field(loff_t, end) __field(dev_t, dev) @@ -1102,8 +1073,8 @@ TRACE_EVENT(nfs4_set_lock, "stateid=%d:0x%08x lockstateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), - show_lock_cmd(__entry->cmd), - show_lock_type(__entry->type), + show_fs_fcntl_cmd(__entry->cmd), + show_fs_fcntl_lock_type(__entry->type), (long long)__entry->start, (long long)__entry->end, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1220,7 +1191,7 @@ DECLARE_EVENT_CLASS(nfs4_set_delegation_event, TP_printk( "fmode=%s fileid=%02x:%02x:%llu fhandle=0x%08x", - show_fmode_flags(__entry->fmode), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 83e9615c8b8c..331bcc0c0a75 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,20 +11,9 @@ #include #include +#include #include -#define nfs_show_file_type(ftype) \ - __print_symbolic(ftype, \ - { DT_UNKNOWN, "UNKNOWN" }, \ - { DT_FIFO, "FIFO" }, \ - { DT_CHR, "CHR" }, \ - { DT_DIR, "DIR" }, \ - { DT_BLK, "BLK" }, \ - { DT_REG, "REG" }, \ - { DT_LNK, "LNK" }, \ - { DT_SOCK, "SOCK" }, \ - { DT_WHT, "WHT" }) - #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \ @@ -131,7 +120,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, - nfs_show_file_type(__entry->type), + show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, @@ -222,7 +211,7 @@ TRACE_EVENT(nfs_access_exit, (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, - nfs_show_file_type(__entry->type), + show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, @@ -283,21 +272,6 @@ DEFINE_NFS_UPDATE_SIZE_EVENT(wcc); DEFINE_NFS_UPDATE_SIZE_EVENT(update); DEFINE_NFS_UPDATE_SIZE_EVENT(grow); -#define show_lookup_flags(flags) \ - __print_flags(flags, "|", \ - { LOOKUP_FOLLOW, "FOLLOW" }, \ - { LOOKUP_DIRECTORY, "DIRECTORY" }, \ - { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ - { LOOKUP_PARENT, "PARENT" }, \ - { LOOKUP_REVAL, "REVAL" }, \ - { LOOKUP_RCU, "RCU" }, \ - { LOOKUP_OPEN, "OPEN" }, \ - { LOOKUP_CREATE, "CREATE" }, \ - { LOOKUP_EXCL, "EXCL" }, \ - { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ - { LOOKUP_EMPTY, "EMPTY" }, \ - { LOOKUP_DOWN, "DOWN" }) - DECLARE_EVENT_CLASS(nfs_lookup_event, TP_PROTO( const struct inode *dir, @@ -324,7 +298,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, - show_lookup_flags(__entry->flags), + show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -370,7 +344,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", -__entry->error, nfs_show_status(__entry->error), __entry->flags, - show_lookup_flags(__entry->flags), + show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -392,30 +366,6 @@ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); -#define show_open_flags(flags) \ - __print_flags(flags, "|", \ - { O_WRONLY, "O_WRONLY" }, \ - { O_RDWR, "O_RDWR" }, \ - { O_CREAT, "O_CREAT" }, \ - { O_EXCL, "O_EXCL" }, \ - { O_NOCTTY, "O_NOCTTY" }, \ - { O_TRUNC, "O_TRUNC" }, \ - { O_APPEND, "O_APPEND" }, \ - { O_NONBLOCK, "O_NONBLOCK" }, \ - { O_DSYNC, "O_DSYNC" }, \ - { O_DIRECT, "O_DIRECT" }, \ - { O_LARGEFILE, "O_LARGEFILE" }, \ - { O_DIRECTORY, "O_DIRECTORY" }, \ - { O_NOFOLLOW, "O_NOFOLLOW" }, \ - { O_NOATIME, "O_NOATIME" }, \ - { O_CLOEXEC, "O_CLOEXEC" }) - -#define show_fmode_flags(mode) \ - __print_flags(mode, "|", \ - { ((__force unsigned long)FMODE_READ), "READ" }, \ - { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ - { ((__force unsigned long)FMODE_EXEC), "EXEC" }) - TRACE_EVENT(nfs_atomic_open_enter, TP_PROTO( const struct inode *dir, @@ -427,7 +377,7 @@ TRACE_EVENT(nfs_atomic_open_enter, TP_STRUCT__entry( __field(unsigned long, flags) - __field(unsigned int, fmode) + __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) @@ -437,15 +387,15 @@ TRACE_EVENT(nfs_atomic_open_enter, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __entry->fmode = (__force unsigned int)ctx->mode; + __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name, ctx->dentry->d_name.name); ), TP_printk( "flags=0x%lx (%s) fmode=%s name=%02x:%02x:%llu/%s", __entry->flags, - show_open_flags(__entry->flags), - show_fmode_flags(__entry->fmode), + show_fs_fcntl_open_flags(__entry->flags), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -465,7 +415,7 @@ TRACE_EVENT(nfs_atomic_open_exit, TP_STRUCT__entry( __field(unsigned long, error) __field(unsigned long, flags) - __field(unsigned int, fmode) + __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) @@ -476,7 +426,7 @@ TRACE_EVENT(nfs_atomic_open_exit, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __entry->fmode = (__force unsigned int)ctx->mode; + __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name, ctx->dentry->d_name.name); ), @@ -485,8 +435,8 @@ TRACE_EVENT(nfs_atomic_open_exit, "name=%02x:%02x:%llu/%s", -__entry->error, nfs_show_status(__entry->error), __entry->flags, - show_open_flags(__entry->flags), - show_fmode_flags(__entry->fmode), + show_fs_fcntl_open_flags(__entry->flags), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -519,7 +469,7 @@ TRACE_EVENT(nfs_create_enter, TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, - show_open_flags(__entry->flags), + show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -556,7 +506,7 @@ TRACE_EVENT(nfs_create_exit, "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", -__entry->error, nfs_show_status(__entry->error), __entry->flags, - show_open_flags(__entry->flags), + show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h new file mode 100644 index 000000000000..738b97f22f36 --- /dev/null +++ b/include/trace/events/fs.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Display helpers for generic filesystem items + * + * Author: Chuck Lever + * + * Copyright (c) 2020, Oracle and/or its affiliates. + */ + +#include + +#define show_fs_dirent_type(x) \ + __print_symbolic(x, \ + { DT_UNKNOWN, "UNKNOWN" }, \ + { DT_FIFO, "FIFO" }, \ + { DT_CHR, "CHR" }, \ + { DT_DIR, "DIR" }, \ + { DT_BLK, "BLK" }, \ + { DT_REG, "REG" }, \ + { DT_LNK, "LNK" }, \ + { DT_SOCK, "SOCK" }, \ + { DT_WHT, "WHT" }) + +#define show_fs_fcntl_open_flags(x) \ + __print_flags(x, "|", \ + { O_WRONLY, "O_WRONLY" }, \ + { O_RDWR, "O_RDWR" }, \ + { O_CREAT, "O_CREAT" }, \ + { O_EXCL, "O_EXCL" }, \ + { O_NOCTTY, "O_NOCTTY" }, \ + { O_TRUNC, "O_TRUNC" }, \ + { O_APPEND, "O_APPEND" }, \ + { O_NONBLOCK, "O_NONBLOCK" }, \ + { O_DSYNC, "O_DSYNC" }, \ + { O_DIRECT, "O_DIRECT" }, \ + { O_LARGEFILE, "O_LARGEFILE" }, \ + { O_DIRECTORY, "O_DIRECTORY" }, \ + { O_NOFOLLOW, "O_NOFOLLOW" }, \ + { O_NOATIME, "O_NOATIME" }, \ + { O_CLOEXEC, "O_CLOEXEC" }) + +#define __fmode_flag(x) { (__force unsigned long)FMODE_##x, #x } +#define show_fs_fmode_flags(x) \ + __print_flags(x, "|", \ + __fmode_flag(READ), \ + __fmode_flag(WRITE), \ + __fmode_flag(EXEC)) + +#ifdef CONFIG_64BIT +#define show_fs_fcntl_cmd(x) \ + __print_symbolic(x, \ + { F_DUPFD, "DUPFD" }, \ + { F_GETFD, "GETFD" }, \ + { F_SETFD, "SETFD" }, \ + { F_GETFL, "GETFL" }, \ + { F_SETFL, "SETFL" }, \ + { F_GETLK, "GETLK" }, \ + { F_SETLK, "SETLK" }, \ + { F_SETLKW, "SETLKW" }, \ + { F_SETOWN, "SETOWN" }, \ + { F_GETOWN, "GETOWN" }, \ + { F_SETSIG, "SETSIG" }, \ + { F_GETSIG, "GETSIG" }, \ + { F_SETOWN_EX, "SETOWN_EX" }, \ + { F_GETOWN_EX, "GETOWN_EX" }, \ + { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \ + { F_OFD_GETLK, "OFD_GETLK" }, \ + { F_OFD_SETLK, "OFD_SETLK" }, \ + { F_OFD_SETLKW, "OFD_SETLKW" }) +#else /* CONFIG_64BIT */ +#define show_fs_fcntl_cmd(x) \ + __print_symbolic(x, \ + { F_DUPFD, "DUPFD" }, \ + { F_GETFD, "GETFD" }, \ + { F_SETFD, "SETFD" }, \ + { F_GETFL, "GETFL" }, \ + { F_SETFL, "SETFL" }, \ + { F_GETLK, "GETLK" }, \ + { F_SETLK, "SETLK" }, \ + { F_SETLKW, "SETLKW" }, \ + { F_SETOWN, "SETOWN" }, \ + { F_GETOWN, "GETOWN" }, \ + { F_SETSIG, "SETSIG" }, \ + { F_GETSIG, "GETSIG" }, \ + { F_GETLK64, "GETLK64" }, \ + { F_SETLK64, "SETLK64" }, \ + { F_SETLKW64, "SETLKW64" }, \ + { F_SETOWN_EX, "SETOWN_EX" }, \ + { F_GETOWN_EX, "GETOWN_EX" }, \ + { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \ + { F_OFD_GETLK, "OFD_GETLK" }, \ + { F_OFD_SETLK, "OFD_SETLK" }, \ + { F_OFD_SETLKW, "OFD_SETLKW" }) +#endif /* CONFIG_64BIT */ + +#define show_fs_fcntl_lock_type(x) \ + __print_symbolic(x, \ + { F_RDLCK, "RDLCK" }, \ + { F_WRLCK, "WRLCK" }, \ + { F_UNLCK, "UNLCK" }) + +#define show_fs_lookup_flags(flags) \ + __print_flags(flags, "|", \ + { LOOKUP_FOLLOW, "FOLLOW" }, \ + { LOOKUP_DIRECTORY, "DIRECTORY" }, \ + { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ + { LOOKUP_EMPTY, "EMPTY" }, \ + { LOOKUP_DOWN, "DOWN" }, \ + { LOOKUP_MOUNTPOINT, "MOUNTPOINT" }, \ + { LOOKUP_REVAL, "REVAL" }, \ + { LOOKUP_RCU, "RCU" }, \ + { LOOKUP_OPEN, "OPEN" }, \ + { LOOKUP_CREATE, "CREATE" }, \ + { LOOKUP_EXCL, "EXCL" }, \ + { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ + { LOOKUP_PARENT, "PARENT" }, \ + { LOOKUP_NO_SYMLINKS, "NO_SYMLINKS" }, \ + { LOOKUP_NO_MAGICLINKS, "NO_MAGICLINKS" }, \ + { LOOKUP_NO_XDEV, "NO_XDEV" }, \ + { LOOKUP_BENEATH, "BENEATH" }, \ + { LOOKUP_IN_ROOT, "IN_ROOT" }, \ + { LOOKUP_CACHED, "CACHED" }) From 8791545eda52e8f3bc48e3cd902e38bf4ba4c9de Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Oct 2021 16:17:03 -0400 Subject: [PATCH 199/433] NFS: Move NFS protocol display macros to global header Refactor: surface useful show_ macros so they can be shared between the client and server trace code. Additional clean up: - Housekeeping: ensure the correct #include files are pulled in and add proper TRACE_DEFINE_ENUM where they are missing - Use a consistent naming scheme for the helpers - Store values to be displayed symbolically as unsigned long, as that is the type that the __print_yada() functions take Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4trace.h | 403 ++++--------------------------------- fs/nfs/nfstrace.h | 117 ++--------- fs/nfs/pnfs.h | 4 - fs/nfsd/trace.h | 1 + include/linux/nfs4.h | 4 + include/trace/events/nfs.h | 375 ++++++++++++++++++++++++++++++++++ 6 files changed, 433 insertions(+), 471 deletions(-) create mode 100644 include/trace/events/nfs.h diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 424d9cd4c196..18f149f72160 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -12,309 +12,7 @@ #include #include - -TRACE_DEFINE_ENUM(EPERM); -TRACE_DEFINE_ENUM(ENOENT); -TRACE_DEFINE_ENUM(EIO); -TRACE_DEFINE_ENUM(ENXIO); -TRACE_DEFINE_ENUM(EACCES); -TRACE_DEFINE_ENUM(EEXIST); -TRACE_DEFINE_ENUM(EXDEV); -TRACE_DEFINE_ENUM(ENOTDIR); -TRACE_DEFINE_ENUM(EISDIR); -TRACE_DEFINE_ENUM(EFBIG); -TRACE_DEFINE_ENUM(ENOSPC); -TRACE_DEFINE_ENUM(EROFS); -TRACE_DEFINE_ENUM(EMLINK); -TRACE_DEFINE_ENUM(ENAMETOOLONG); -TRACE_DEFINE_ENUM(ENOTEMPTY); -TRACE_DEFINE_ENUM(EDQUOT); -TRACE_DEFINE_ENUM(ESTALE); -TRACE_DEFINE_ENUM(EBADHANDLE); -TRACE_DEFINE_ENUM(EBADCOOKIE); -TRACE_DEFINE_ENUM(ENOTSUPP); -TRACE_DEFINE_ENUM(ETOOSMALL); -TRACE_DEFINE_ENUM(EREMOTEIO); -TRACE_DEFINE_ENUM(EBADTYPE); -TRACE_DEFINE_ENUM(EAGAIN); -TRACE_DEFINE_ENUM(ELOOP); -TRACE_DEFINE_ENUM(EOPNOTSUPP); -TRACE_DEFINE_ENUM(EDEADLK); -TRACE_DEFINE_ENUM(ENOMEM); -TRACE_DEFINE_ENUM(EKEYEXPIRED); -TRACE_DEFINE_ENUM(ETIMEDOUT); -TRACE_DEFINE_ENUM(ERESTARTSYS); -TRACE_DEFINE_ENUM(ECONNREFUSED); -TRACE_DEFINE_ENUM(ECONNRESET); -TRACE_DEFINE_ENUM(ENETUNREACH); -TRACE_DEFINE_ENUM(EHOSTUNREACH); -TRACE_DEFINE_ENUM(EHOSTDOWN); -TRACE_DEFINE_ENUM(EPIPE); -TRACE_DEFINE_ENUM(EPFNOSUPPORT); -TRACE_DEFINE_ENUM(EPROTONOSUPPORT); - -TRACE_DEFINE_ENUM(NFS4_OK); -TRACE_DEFINE_ENUM(NFS4ERR_ACCESS); -TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED); -TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY); -TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR); -TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE); -TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE); -TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT); -TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL); -TRACE_DEFINE_ENUM(NFS4ERR_BADNAME); -TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER); -TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION); -TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT); -TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE); -TRACE_DEFINE_ENUM(NFS4ERR_BADXDR); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID); -TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN); -TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE); -TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY); -TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY); -TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION); -TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK); -TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION); -TRACE_DEFINE_ENUM(NFS4ERR_DELAY); -TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED); -TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED); -TRACE_DEFINE_ENUM(NFS4ERR_DENIED); -TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL); -TRACE_DEFINE_ENUM(NFS4ERR_DQUOT); -TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_EXIST); -TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED); -TRACE_DEFINE_ENUM(NFS4ERR_FBIG); -TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED); -TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN); -TRACE_DEFINE_ENUM(NFS4ERR_GRACE); -TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_INVAL); -TRACE_DEFINE_ENUM(NFS4ERR_IO); -TRACE_DEFINE_ENUM(NFS4ERR_ISDIR); -TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER); -TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE); -TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED); -TRACE_DEFINE_ENUM(NFS4ERR_LOCKED); -TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD); -TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE); -TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH); -TRACE_DEFINE_ENUM(NFS4ERR_MLINK); -TRACE_DEFINE_ENUM(NFS4ERR_MOVED); -TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG); -TRACE_DEFINE_ENUM(NFS4ERR_NOENT); -TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE); -TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT); -TRACE_DEFINE_ENUM(NFS4ERR_NOSPC); -TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR); -TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY); -TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP); -TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME); -TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE); -TRACE_DEFINE_ENUM(NFS4ERR_NXIO); -TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID); -TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE); -TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL); -TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION); -TRACE_DEFINE_ENUM(NFS4ERR_PERM); -TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE); -TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT); -TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT); -TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD); -TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT); -TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG); -TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG); -TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE); -TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG); -TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE); -TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH); -TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP); -TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT); -TRACE_DEFINE_ENUM(NFS4ERR_ROFS); -TRACE_DEFINE_ENUM(NFS4ERR_SAME); -TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED); -TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS); -TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY); -TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED); -TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT); -TRACE_DEFINE_ENUM(NFS4ERR_STALE); -TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID); -TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID); -TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK); -TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL); -TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS); -TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE); -TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND); -TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC); -TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); -TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); -TRACE_DEFINE_ENUM(NFS4ERR_XDEV); - -TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); -TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); - -#define show_nfsv4_errors(error) \ - __print_symbolic(error, \ - { NFS4_OK, "OK" }, \ - /* Mapped by nfs4_stat_to_errno() */ \ - { EPERM, "EPERM" }, \ - { ENOENT, "ENOENT" }, \ - { EIO, "EIO" }, \ - { ENXIO, "ENXIO" }, \ - { EACCES, "EACCES" }, \ - { EEXIST, "EEXIST" }, \ - { EXDEV, "EXDEV" }, \ - { ENOTDIR, "ENOTDIR" }, \ - { EISDIR, "EISDIR" }, \ - { EFBIG, "EFBIG" }, \ - { ENOSPC, "ENOSPC" }, \ - { EROFS, "EROFS" }, \ - { EMLINK, "EMLINK" }, \ - { ENAMETOOLONG, "ENAMETOOLONG" }, \ - { ENOTEMPTY, "ENOTEMPTY" }, \ - { EDQUOT, "EDQUOT" }, \ - { ESTALE, "ESTALE" }, \ - { EBADHANDLE, "EBADHANDLE" }, \ - { EBADCOOKIE, "EBADCOOKIE" }, \ - { ENOTSUPP, "ENOTSUPP" }, \ - { ETOOSMALL, "ETOOSMALL" }, \ - { EREMOTEIO, "EREMOTEIO" }, \ - { EBADTYPE, "EBADTYPE" }, \ - { EAGAIN, "EAGAIN" }, \ - { ELOOP, "ELOOP" }, \ - { EOPNOTSUPP, "EOPNOTSUPP" }, \ - { EDEADLK, "EDEADLK" }, \ - /* RPC errors */ \ - { ENOMEM, "ENOMEM" }, \ - { EKEYEXPIRED, "EKEYEXPIRED" }, \ - { ETIMEDOUT, "ETIMEDOUT" }, \ - { ERESTARTSYS, "ERESTARTSYS" }, \ - { ECONNREFUSED, "ECONNREFUSED" }, \ - { ECONNRESET, "ECONNRESET" }, \ - { ENETUNREACH, "ENETUNREACH" }, \ - { EHOSTUNREACH, "EHOSTUNREACH" }, \ - { EHOSTDOWN, "EHOSTDOWN" }, \ - { EPIPE, "EPIPE" }, \ - { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ - { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ - /* NFSv4 native errors */ \ - { NFS4ERR_ACCESS, "ACCESS" }, \ - { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ - { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ - { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ - { NFS4ERR_BADCHAR, "BADCHAR" }, \ - { NFS4ERR_BADHANDLE, "BADHANDLE" }, \ - { NFS4ERR_BADIOMODE, "BADIOMODE" }, \ - { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ - { NFS4ERR_BADLABEL, "BADLABEL" }, \ - { NFS4ERR_BADNAME, "BADNAME" }, \ - { NFS4ERR_BADOWNER, "BADOWNER" }, \ - { NFS4ERR_BADSESSION, "BADSESSION" }, \ - { NFS4ERR_BADSLOT, "BADSLOT" }, \ - { NFS4ERR_BADTYPE, "BADTYPE" }, \ - { NFS4ERR_BADXDR, "BADXDR" }, \ - { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ - { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ - { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ - { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ - { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ - { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ - { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ - { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ - { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ - { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ - { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \ - "CONN_NOT_BOUND_TO_SESSION" }, \ - { NFS4ERR_DEADLOCK, "DEADLOCK" }, \ - { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ - { NFS4ERR_DELAY, "DELAY" }, \ - { NFS4ERR_DELEG_ALREADY_WANTED, \ - "DELEG_ALREADY_WANTED" }, \ - { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ - { NFS4ERR_DENIED, "DENIED" }, \ - { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ - { NFS4ERR_DQUOT, "DQUOT" }, \ - { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ - { NFS4ERR_EXIST, "EXIST" }, \ - { NFS4ERR_EXPIRED, "EXPIRED" }, \ - { NFS4ERR_FBIG, "FBIG" }, \ - { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ - { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ - { NFS4ERR_GRACE, "GRACE" }, \ - { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ - { NFS4ERR_INVAL, "INVAL" }, \ - { NFS4ERR_IO, "IO" }, \ - { NFS4ERR_ISDIR, "ISDIR" }, \ - { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ - { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ - { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ - { NFS4ERR_LOCKED, "LOCKED" }, \ - { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ - { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ - { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ - { NFS4ERR_MLINK, "MLINK" }, \ - { NFS4ERR_MOVED, "MOVED" }, \ - { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ - { NFS4ERR_NOENT, "NOENT" }, \ - { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ - { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ - { NFS4ERR_NOSPC, "NOSPC" }, \ - { NFS4ERR_NOTDIR, "NOTDIR" }, \ - { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ - { NFS4ERR_NOTSUPP, "NOTSUPP" }, \ - { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ - { NFS4ERR_NOT_SAME, "NOT_SAME" }, \ - { NFS4ERR_NO_GRACE, "NO_GRACE" }, \ - { NFS4ERR_NXIO, "NXIO" }, \ - { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ - { NFS4ERR_OPENMODE, "OPENMODE" }, \ - { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ - { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ - { NFS4ERR_PERM, "PERM" }, \ - { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ - { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ - { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ - { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ - { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ - { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ - { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ - { NFS4ERR_REP_TOO_BIG_TO_CACHE, \ - "REP_TOO_BIG_TO_CACHE" }, \ - { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ - { NFS4ERR_RESOURCE, "RESOURCE" }, \ - { NFS4ERR_RESTOREFH, "RESTOREFH" }, \ - { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ - { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ - { NFS4ERR_ROFS, "ROFS" }, \ - { NFS4ERR_SAME, "SAME" }, \ - { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ - { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ - { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ - { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ - { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ - { NFS4ERR_STALE, "STALE" }, \ - { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ - { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ - { NFS4ERR_SYMLINK, "SYMLINK" }, \ - { NFS4ERR_TOOSMALL, "TOOSMALL" }, \ - { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ - { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ - { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ - { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ - { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ - { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ - { NFS4ERR_XDEV, "XDEV" }, \ - /* ***** Internal to Linux NFS client ***** */ \ - { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ - { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) +#include #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ @@ -355,7 +53,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_printk( "error=%ld (%s) dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __get_str(dstaddr) ) ); @@ -379,29 +77,6 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session); DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); -#define show_nfs4_sequence_status_flags(status) \ - __print_flags((unsigned long)status, "|", \ - { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ - { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, \ - "CB_GSS_CONTEXTS_EXPIRING" }, \ - { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, \ - "CB_GSS_CONTEXTS_EXPIRED" }, \ - { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, \ - "EXPIRED_ALL_STATE_REVOKED" }, \ - { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, \ - "EXPIRED_SOME_STATE_REVOKED" }, \ - { SEQ4_STATUS_ADMIN_STATE_REVOKED, \ - "ADMIN_STATE_REVOKED" }, \ - { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, \ - "RECALLABLE_STATE_REVOKED" }, \ - { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \ - { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, \ - "RESTART_RECLAIM_NEEDED" }, \ - { SEQ4_STATUS_CB_PATH_DOWN_SESSION, \ - "CB_PATH_DOWN_SESSION" }, \ - { SEQ4_STATUS_BACKCHANNEL_FAULT, \ - "BACKCHANNEL_FAULT" }) - TRACE_EVENT(nfs4_sequence_done, TP_PROTO( const struct nfs4_session *session, @@ -415,7 +90,7 @@ TRACE_EVENT(nfs4_sequence_done, __field(unsigned int, seq_nr) __field(unsigned int, highest_slotid) __field(unsigned int, target_highest_slotid) - __field(unsigned int, status_flags) + __field(unsigned long, status_flags) __field(unsigned long, error) ), @@ -434,16 +109,16 @@ TRACE_EVENT(nfs4_sequence_done, TP_printk( "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u target_highest_slotid=%u " - "status_flags=%u (%s)", + "status_flags=0x%lx (%s)", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->session, __entry->slot_nr, __entry->seq_nr, __entry->highest_slotid, __entry->target_highest_slotid, __entry->status_flags, - show_nfs4_sequence_status_flags(__entry->status_flags) + show_nfs4_seq4_status(__entry->status_flags) ) ); @@ -480,7 +155,7 @@ TRACE_EVENT(nfs4_cb_sequence, "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->session, __entry->slot_nr, __entry->seq_nr, @@ -517,7 +192,7 @@ TRACE_EVENT(nfs4_cb_seqid_err, "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->session, __entry->slot_nr, __entry->seq_nr, @@ -651,7 +326,7 @@ TRACE_EVENT(nfs4_state_mgr_failed, "hostname=%s clp state=%s error=%ld (%s) section=%s", __get_str(hostname), show_nfs4_clp_state(__entry->state), -__entry->error, - show_nfsv4_errors(__entry->error), __get_str(section) + show_nfs4_status(__entry->error), __get_str(section) ) ) @@ -722,7 +397,7 @@ DECLARE_EVENT_CLASS(nfs4_xdr_event, TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x error=%ld (%s) operation=%u", __entry->task_id, __entry->client_id, __entry->xid, - -__entry->error, show_nfsv4_errors(__entry->error), + -__entry->error, show_nfs4_status(__entry->error), __entry->op ) ); @@ -837,7 +512,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, "name=%02x:%02x:%llu/%s stateid=%d:0x%08x " "openstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->flags, show_fs_fcntl_open_flags(__entry->flags), show_fs_fmode_flags(__entry->fmode), @@ -941,7 +616,7 @@ TRACE_EVENT(nfs4_close, "error=%ld (%s) fmode=%s fileid=%02x:%02x:%llu " "fhandle=0x%08x openstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->fmode ? show_fs_fmode_flags(__entry->fmode) : "closed", MAJOR(__entry->dev), MINOR(__entry->dev), @@ -996,7 +671,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), show_fs_fcntl_cmd(__entry->cmd), show_fs_fcntl_lock_type(__entry->type), (long long)__entry->start, @@ -1072,7 +747,7 @@ TRACE_EVENT(nfs4_set_lock, "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x lockstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), show_fs_fcntl_cmd(__entry->cmd), show_fs_fcntl_lock_type(__entry->type), (long long)__entry->start, @@ -1238,7 +913,7 @@ TRACE_EVENT(nfs4_delegreturn_exit, "error=%ld (%s) dev=%02x:%02x fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fhandle, __entry->stateid_seq, __entry->stateid_hash @@ -1281,7 +956,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1328,7 +1003,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event, TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -1375,7 +1050,7 @@ TRACE_EVENT(nfs4_lookupp, TP_printk( "error=%ld (%s) inode=%02x:%02x:%llu", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->ino ) @@ -1414,7 +1089,7 @@ TRACE_EVENT(nfs4_rename, "error=%ld (%s) oldname=%02x:%02x:%llu/%s " "newname=%02x:%02x:%llu/%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->olddir, __get_str(oldname), @@ -1449,7 +1124,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle @@ -1507,7 +1182,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1560,7 +1235,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "valid=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1616,7 +1291,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1677,7 +1352,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1726,7 +1401,7 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_printk( "error=%ld (%s) id=%u name=%s", - -__entry->error, show_nfsv4_errors(__entry->error), + -__entry->error, show_nfs4_status(__entry->error), __entry->id, __get_str(name) ) @@ -1804,7 +1479,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, "offset=%lld count=%u res=%u stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1878,7 +1553,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, "offset=%lld count=%u res=%u stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1942,7 +1617,7 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1962,16 +1637,6 @@ DEFINE_NFS4_COMMIT_EVENT(nfs4_commit); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds); -TRACE_DEFINE_ENUM(IOMODE_READ); -TRACE_DEFINE_ENUM(IOMODE_RW); -TRACE_DEFINE_ENUM(IOMODE_ANY); - -#define show_pnfs_iomode(iomode) \ - __print_symbolic(iomode, \ - { IOMODE_READ, "READ" }, \ - { IOMODE_RW, "RW" }, \ - { IOMODE_ANY, "ANY" }) - TRACE_EVENT(nfs4_layoutget, TP_PROTO( const struct nfs_open_context *ctx, @@ -2027,11 +1692,11 @@ TRACE_EVENT(nfs4_layoutget, "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - show_pnfs_iomode(__entry->iomode), + show_pnfs_layout_iomode(__entry->iomode), (unsigned long long)__entry->offset, (unsigned long long)__entry->count, __entry->stateid_seq, __entry->stateid_hash, @@ -2125,7 +1790,7 @@ TRACE_EVENT(pnfs_update_layout, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - show_pnfs_iomode(__entry->iomode), + show_pnfs_layout_iomode(__entry->iomode), (unsigned long long)__entry->pos, (unsigned long long)__entry->count, __entry->layoutstateid_seq, __entry->layoutstateid_hash, @@ -2179,7 +1844,7 @@ DECLARE_EVENT_CLASS(pnfs_layout_event, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - show_pnfs_iomode(__entry->iomode), + show_pnfs_layout_iomode(__entry->iomode), (unsigned long long)__entry->pos, (unsigned long long)__entry->count, __entry->layoutstateid_seq, __entry->layoutstateid_hash, @@ -2324,7 +1989,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -2380,7 +2045,7 @@ TRACE_EVENT(ff_layout_commit_error, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%llu count=%u dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 331bcc0c0a75..76f04aa3367c 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -12,6 +12,7 @@ #include #include +#include #include #define nfs_show_cache_validity(v) \ @@ -115,7 +116,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -206,7 +207,7 @@ TRACE_EVENT(nfs_access_exit, "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) " "mask=0x%x permitted=0x%x", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -342,7 +343,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -433,7 +434,7 @@ TRACE_EVENT(nfs_atomic_open_exit, TP_printk( "error=%ld (%s) flags=0x%lx (%s) fmode=%s " "name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_fcntl_open_flags(__entry->flags), show_fs_fmode_flags(__entry->fmode), @@ -504,7 +505,7 @@ TRACE_EVENT(nfs_create_exit, TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -574,7 +575,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done, TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -664,7 +665,7 @@ TRACE_EVENT(nfs_link_exit, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fileid, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -751,7 +752,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, TP_printk( "error=%ld (%s) old_name=%02x:%02x:%llu/%s " "new_name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->old_dir, __get_str(old_name), @@ -805,7 +806,7 @@ TRACE_EVENT(nfs_sillyrename_unlink, TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -1134,16 +1135,6 @@ TRACE_EVENT(nfs_pgio_error, ) ); -TRACE_DEFINE_ENUM(NFS_UNSTABLE); -TRACE_DEFINE_ENUM(NFS_DATA_SYNC); -TRACE_DEFINE_ENUM(NFS_FILE_SYNC); - -#define nfs_show_stable(stable) \ - __print_symbolic(stable, \ - { NFS_UNSTABLE, "UNSTABLE" }, \ - { NFS_DATA_SYNC, "DATA_SYNC" }, \ - { NFS_FILE_SYNC, "FILE_SYNC" }) - TRACE_EVENT(nfs_initiate_write, TP_PROTO( const struct nfs_pgio_header *hdr @@ -1157,7 +1148,7 @@ TRACE_EVENT(nfs_initiate_write, __field(u64, fileid) __field(loff_t, offset) __field(u32, count) - __field(enum nfs3_stable_how, stable) + __field(unsigned long, stable) ), TP_fast_assign( @@ -1181,7 +1172,7 @@ TRACE_EVENT(nfs_initiate_write, (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->count, - nfs_show_stable(__entry->stable) + show_nfs_stable_how(__entry->stable) ) ); @@ -1201,7 +1192,7 @@ TRACE_EVENT(nfs_writeback_done, __field(u32, arg_count) __field(u32, res_count) __field(int, status) - __field(enum nfs3_stable_how, stable) + __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), @@ -1234,8 +1225,8 @@ TRACE_EVENT(nfs_writeback_done, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, __entry->status, - nfs_show_stable(__entry->stable), - __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) + show_nfs_stable_how(__entry->stable), + show_nfs4_verifier(__entry->verifier) ) ); @@ -1336,7 +1327,7 @@ TRACE_EVENT(nfs_commit_done, __field(u64, fileid) __field(loff_t, offset) __field(int, status) - __field(enum nfs3_stable_how, stable) + __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), @@ -1365,8 +1356,8 @@ TRACE_EVENT(nfs_commit_done, (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->status, - nfs_show_stable(__entry->stable), - __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) + show_nfs_stable_how(__entry->stable), + show_nfs4_verifier(__entry->verifier) ) ); @@ -1403,76 +1394,6 @@ TRACE_EVENT(nfs_fh_to_dentry, ) ); -TRACE_DEFINE_ENUM(NFS_OK); -TRACE_DEFINE_ENUM(NFSERR_PERM); -TRACE_DEFINE_ENUM(NFSERR_NOENT); -TRACE_DEFINE_ENUM(NFSERR_IO); -TRACE_DEFINE_ENUM(NFSERR_NXIO); -TRACE_DEFINE_ENUM(ECHILD); -TRACE_DEFINE_ENUM(NFSERR_EAGAIN); -TRACE_DEFINE_ENUM(NFSERR_ACCES); -TRACE_DEFINE_ENUM(NFSERR_EXIST); -TRACE_DEFINE_ENUM(NFSERR_XDEV); -TRACE_DEFINE_ENUM(NFSERR_NODEV); -TRACE_DEFINE_ENUM(NFSERR_NOTDIR); -TRACE_DEFINE_ENUM(NFSERR_ISDIR); -TRACE_DEFINE_ENUM(NFSERR_INVAL); -TRACE_DEFINE_ENUM(NFSERR_FBIG); -TRACE_DEFINE_ENUM(NFSERR_NOSPC); -TRACE_DEFINE_ENUM(NFSERR_ROFS); -TRACE_DEFINE_ENUM(NFSERR_MLINK); -TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); -TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); -TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); -TRACE_DEFINE_ENUM(NFSERR_DQUOT); -TRACE_DEFINE_ENUM(NFSERR_STALE); -TRACE_DEFINE_ENUM(NFSERR_REMOTE); -TRACE_DEFINE_ENUM(NFSERR_WFLUSH); -TRACE_DEFINE_ENUM(NFSERR_BADHANDLE); -TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC); -TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE); -TRACE_DEFINE_ENUM(NFSERR_NOTSUPP); -TRACE_DEFINE_ENUM(NFSERR_TOOSMALL); -TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT); -TRACE_DEFINE_ENUM(NFSERR_BADTYPE); -TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); - -#define nfs_show_status(x) \ - __print_symbolic(x, \ - { NFS_OK, "OK" }, \ - { NFSERR_PERM, "PERM" }, \ - { NFSERR_NOENT, "NOENT" }, \ - { NFSERR_IO, "IO" }, \ - { NFSERR_NXIO, "NXIO" }, \ - { ECHILD, "CHILD" }, \ - { NFSERR_EAGAIN, "AGAIN" }, \ - { NFSERR_ACCES, "ACCES" }, \ - { NFSERR_EXIST, "EXIST" }, \ - { NFSERR_XDEV, "XDEV" }, \ - { NFSERR_NODEV, "NODEV" }, \ - { NFSERR_NOTDIR, "NOTDIR" }, \ - { NFSERR_ISDIR, "ISDIR" }, \ - { NFSERR_INVAL, "INVAL" }, \ - { NFSERR_FBIG, "FBIG" }, \ - { NFSERR_NOSPC, "NOSPC" }, \ - { NFSERR_ROFS, "ROFS" }, \ - { NFSERR_MLINK, "MLINK" }, \ - { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ - { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ - { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ - { NFSERR_DQUOT, "DQUOT" }, \ - { NFSERR_STALE, "STALE" }, \ - { NFSERR_REMOTE, "REMOTE" }, \ - { NFSERR_WFLUSH, "WFLUSH" }, \ - { NFSERR_BADHANDLE, "BADHANDLE" }, \ - { NFSERR_NOT_SYNC, "NOTSYNC" }, \ - { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \ - { NFSERR_NOTSUPP, "NOTSUPP" }, \ - { NFSERR_TOOSMALL, "TOOSMALL" }, \ - { NFSERR_SERVERFAULT, "REMOTEIO" }, \ - { NFSERR_BADTYPE, "BADTYPE" }, \ - { NFSERR_JUKEBOX, "JUKEBOX" }) - DECLARE_EVENT_CLASS(nfs_xdr_event, TP_PROTO( const struct xdr_stream *xdr, @@ -1512,7 +1433,7 @@ DECLARE_EVENT_CLASS(nfs_xdr_event, __entry->task_id, __entry->client_id, __entry->xid, __get_str(program), __entry->version, __get_str(procedure), -__entry->error, - nfs_show_status(__entry->error) + show_nfs_status(__entry->error) ) ); #define DEFINE_NFS_XDR_EVENT(name) \ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a0f6ff094b3a..f4d7548d67b2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -82,10 +82,6 @@ enum pnfs_try_status { PNFS_TRY_AGAIN = 2, }; -/* error codes for internal use */ -#define NFS4ERR_RESET_TO_MDS 12001 -#define NFS4ERR_RESET_TO_PNFS 12002 - #ifdef CONFIG_NFS_V4_1 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 538520957a81..f1e0d3c51bc2 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,6 +9,7 @@ #define _NFSD_TRACE_H #include + #include "export.h" #include "nfsfh.h" diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 15004c469807..5662d8be04eb 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -292,6 +292,10 @@ enum nfsstat4 { NFS4ERR_XATTR2BIG = 10096, }; +/* error codes for internal client use */ +#define NFS4ERR_RESET_TO_MDS 12001 +#define NFS4ERR_RESET_TO_PNFS 12002 + static inline bool seqid_mutating_err(u32 err) { /* See RFC 7530, section 9.1.7 */ diff --git a/include/trace/events/nfs.h b/include/trace/events/nfs.h new file mode 100644 index 000000000000..09ffdbb04134 --- /dev/null +++ b/include/trace/events/nfs.h @@ -0,0 +1,375 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Display helpers for NFS protocol elements + * + * Author: Chuck Lever + * + * Copyright (c) 2020, Oracle and/or its affiliates. + */ + +#include +#include +#include + +TRACE_DEFINE_ENUM(NFS_OK); +TRACE_DEFINE_ENUM(NFSERR_PERM); +TRACE_DEFINE_ENUM(NFSERR_NOENT); +TRACE_DEFINE_ENUM(NFSERR_IO); +TRACE_DEFINE_ENUM(NFSERR_NXIO); +TRACE_DEFINE_ENUM(NFSERR_EAGAIN); +TRACE_DEFINE_ENUM(NFSERR_ACCES); +TRACE_DEFINE_ENUM(NFSERR_EXIST); +TRACE_DEFINE_ENUM(NFSERR_XDEV); +TRACE_DEFINE_ENUM(NFSERR_NODEV); +TRACE_DEFINE_ENUM(NFSERR_NOTDIR); +TRACE_DEFINE_ENUM(NFSERR_ISDIR); +TRACE_DEFINE_ENUM(NFSERR_INVAL); +TRACE_DEFINE_ENUM(NFSERR_FBIG); +TRACE_DEFINE_ENUM(NFSERR_NOSPC); +TRACE_DEFINE_ENUM(NFSERR_ROFS); +TRACE_DEFINE_ENUM(NFSERR_MLINK); +TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); +TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); +TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); +TRACE_DEFINE_ENUM(NFSERR_DQUOT); +TRACE_DEFINE_ENUM(NFSERR_STALE); +TRACE_DEFINE_ENUM(NFSERR_REMOTE); +TRACE_DEFINE_ENUM(NFSERR_WFLUSH); +TRACE_DEFINE_ENUM(NFSERR_BADHANDLE); +TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC); +TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE); +TRACE_DEFINE_ENUM(NFSERR_NOTSUPP); +TRACE_DEFINE_ENUM(NFSERR_TOOSMALL); +TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT); +TRACE_DEFINE_ENUM(NFSERR_BADTYPE); +TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); + +#define show_nfs_status(x) \ + __print_symbolic(x, \ + { NFS_OK, "OK" }, \ + { NFSERR_PERM, "PERM" }, \ + { NFSERR_NOENT, "NOENT" }, \ + { NFSERR_IO, "IO" }, \ + { NFSERR_NXIO, "NXIO" }, \ + { ECHILD, "CHILD" }, \ + { NFSERR_EAGAIN, "AGAIN" }, \ + { NFSERR_ACCES, "ACCES" }, \ + { NFSERR_EXIST, "EXIST" }, \ + { NFSERR_XDEV, "XDEV" }, \ + { NFSERR_NODEV, "NODEV" }, \ + { NFSERR_NOTDIR, "NOTDIR" }, \ + { NFSERR_ISDIR, "ISDIR" }, \ + { NFSERR_INVAL, "INVAL" }, \ + { NFSERR_FBIG, "FBIG" }, \ + { NFSERR_NOSPC, "NOSPC" }, \ + { NFSERR_ROFS, "ROFS" }, \ + { NFSERR_MLINK, "MLINK" }, \ + { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ + { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ + { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ + { NFSERR_DQUOT, "DQUOT" }, \ + { NFSERR_STALE, "STALE" }, \ + { NFSERR_REMOTE, "REMOTE" }, \ + { NFSERR_WFLUSH, "WFLUSH" }, \ + { NFSERR_BADHANDLE, "BADHANDLE" }, \ + { NFSERR_NOT_SYNC, "NOTSYNC" }, \ + { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \ + { NFSERR_NOTSUPP, "NOTSUPP" }, \ + { NFSERR_TOOSMALL, "TOOSMALL" }, \ + { NFSERR_SERVERFAULT, "REMOTEIO" }, \ + { NFSERR_BADTYPE, "BADTYPE" }, \ + { NFSERR_JUKEBOX, "JUKEBOX" }) + +TRACE_DEFINE_ENUM(NFS_UNSTABLE); +TRACE_DEFINE_ENUM(NFS_DATA_SYNC); +TRACE_DEFINE_ENUM(NFS_FILE_SYNC); + +#define show_nfs_stable_how(x) \ + __print_symbolic(x, \ + { NFS_UNSTABLE, "UNSTABLE" }, \ + { NFS_DATA_SYNC, "DATA_SYNC" }, \ + { NFS_FILE_SYNC, "FILE_SYNC" }) + +TRACE_DEFINE_ENUM(NFS4_OK); +TRACE_DEFINE_ENUM(NFS4ERR_ACCESS); +TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED); +TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY); +TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR); +TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE); +TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE); +TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT); +TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL); +TRACE_DEFINE_ENUM(NFS4ERR_BADNAME); +TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER); +TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION); +TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT); +TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE); +TRACE_DEFINE_ENUM(NFS4ERR_BADXDR); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID); +TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN); +TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE); +TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY); +TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY); +TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION); +TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK); +TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION); +TRACE_DEFINE_ENUM(NFS4ERR_DELAY); +TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED); +TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED); +TRACE_DEFINE_ENUM(NFS4ERR_DENIED); +TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL); +TRACE_DEFINE_ENUM(NFS4ERR_DQUOT); +TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_EXIST); +TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED); +TRACE_DEFINE_ENUM(NFS4ERR_FBIG); +TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED); +TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN); +TRACE_DEFINE_ENUM(NFS4ERR_GRACE); +TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_INVAL); +TRACE_DEFINE_ENUM(NFS4ERR_IO); +TRACE_DEFINE_ENUM(NFS4ERR_ISDIR); +TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER); +TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE); +TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED); +TRACE_DEFINE_ENUM(NFS4ERR_LOCKED); +TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD); +TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE); +TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH); +TRACE_DEFINE_ENUM(NFS4ERR_MLINK); +TRACE_DEFINE_ENUM(NFS4ERR_MOVED); +TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG); +TRACE_DEFINE_ENUM(NFS4ERR_NOENT); +TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE); +TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT); +TRACE_DEFINE_ENUM(NFS4ERR_NOSPC); +TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR); +TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY); +TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP); +TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME); +TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE); +TRACE_DEFINE_ENUM(NFS4ERR_NXIO); +TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID); +TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE); +TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL); +TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION); +TRACE_DEFINE_ENUM(NFS4ERR_PERM); +TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE); +TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT); +TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT); +TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD); +TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT); +TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG); +TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG); +TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE); +TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG); +TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE); +TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH); +TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP); +TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT); +TRACE_DEFINE_ENUM(NFS4ERR_ROFS); +TRACE_DEFINE_ENUM(NFS4ERR_SAME); +TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED); +TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS); +TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY); +TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED); +TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT); +TRACE_DEFINE_ENUM(NFS4ERR_STALE); +TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID); +TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID); +TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK); +TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL); +TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS); +TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE); +TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND); +TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC); +TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); +TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); +TRACE_DEFINE_ENUM(NFS4ERR_XDEV); + +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); + +#define show_nfs4_status(x) \ + __print_symbolic(x, \ + { NFS4_OK, "OK" }, \ + { EPERM, "EPERM" }, \ + { ENOENT, "ENOENT" }, \ + { EIO, "EIO" }, \ + { ENXIO, "ENXIO" }, \ + { EACCES, "EACCES" }, \ + { EEXIST, "EEXIST" }, \ + { EXDEV, "EXDEV" }, \ + { ENOTDIR, "ENOTDIR" }, \ + { EISDIR, "EISDIR" }, \ + { EFBIG, "EFBIG" }, \ + { ENOSPC, "ENOSPC" }, \ + { EROFS, "EROFS" }, \ + { EMLINK, "EMLINK" }, \ + { ENAMETOOLONG, "ENAMETOOLONG" }, \ + { ENOTEMPTY, "ENOTEMPTY" }, \ + { EDQUOT, "EDQUOT" }, \ + { ESTALE, "ESTALE" }, \ + { EBADHANDLE, "EBADHANDLE" }, \ + { EBADCOOKIE, "EBADCOOKIE" }, \ + { ENOTSUPP, "ENOTSUPP" }, \ + { ETOOSMALL, "ETOOSMALL" }, \ + { EREMOTEIO, "EREMOTEIO" }, \ + { EBADTYPE, "EBADTYPE" }, \ + { EAGAIN, "EAGAIN" }, \ + { ELOOP, "ELOOP" }, \ + { EOPNOTSUPP, "EOPNOTSUPP" }, \ + { EDEADLK, "EDEADLK" }, \ + { ENOMEM, "ENOMEM" }, \ + { EKEYEXPIRED, "EKEYEXPIRED" }, \ + { ETIMEDOUT, "ETIMEDOUT" }, \ + { ERESTARTSYS, "ERESTARTSYS" }, \ + { ECONNREFUSED, "ECONNREFUSED" }, \ + { ECONNRESET, "ECONNRESET" }, \ + { ENETUNREACH, "ENETUNREACH" }, \ + { EHOSTUNREACH, "EHOSTUNREACH" }, \ + { EHOSTDOWN, "EHOSTDOWN" }, \ + { EPIPE, "EPIPE" }, \ + { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ + { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ + { NFS4ERR_ACCESS, "ACCESS" }, \ + { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ + { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ + { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ + { NFS4ERR_BADCHAR, "BADCHAR" }, \ + { NFS4ERR_BADHANDLE, "BADHANDLE" }, \ + { NFS4ERR_BADIOMODE, "BADIOMODE" }, \ + { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ + { NFS4ERR_BADLABEL, "BADLABEL" }, \ + { NFS4ERR_BADNAME, "BADNAME" }, \ + { NFS4ERR_BADOWNER, "BADOWNER" }, \ + { NFS4ERR_BADSESSION, "BADSESSION" }, \ + { NFS4ERR_BADSLOT, "BADSLOT" }, \ + { NFS4ERR_BADTYPE, "BADTYPE" }, \ + { NFS4ERR_BADXDR, "BADXDR" }, \ + { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ + { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ + { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ + { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ + { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ + { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ + { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ + { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ + { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ + { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ + { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \ + { NFS4ERR_DEADLOCK, "DEADLOCK" }, \ + { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ + { NFS4ERR_DELAY, "DELAY" }, \ + { NFS4ERR_DELEG_ALREADY_WANTED, "DELEG_ALREADY_WANTED" }, \ + { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ + { NFS4ERR_DENIED, "DENIED" }, \ + { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ + { NFS4ERR_DQUOT, "DQUOT" }, \ + { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ + { NFS4ERR_EXIST, "EXIST" }, \ + { NFS4ERR_EXPIRED, "EXPIRED" }, \ + { NFS4ERR_FBIG, "FBIG" }, \ + { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ + { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ + { NFS4ERR_GRACE, "GRACE" }, \ + { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ + { NFS4ERR_INVAL, "INVAL" }, \ + { NFS4ERR_IO, "IO" }, \ + { NFS4ERR_ISDIR, "ISDIR" }, \ + { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ + { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ + { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ + { NFS4ERR_LOCKED, "LOCKED" }, \ + { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ + { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ + { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ + { NFS4ERR_MLINK, "MLINK" }, \ + { NFS4ERR_MOVED, "MOVED" }, \ + { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ + { NFS4ERR_NOENT, "NOENT" }, \ + { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ + { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ + { NFS4ERR_NOSPC, "NOSPC" }, \ + { NFS4ERR_NOTDIR, "NOTDIR" }, \ + { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ + { NFS4ERR_NOTSUPP, "NOTSUPP" }, \ + { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ + { NFS4ERR_NOT_SAME, "NOT_SAME" }, \ + { NFS4ERR_NO_GRACE, "NO_GRACE" }, \ + { NFS4ERR_NXIO, "NXIO" }, \ + { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ + { NFS4ERR_OPENMODE, "OPENMODE" }, \ + { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ + { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ + { NFS4ERR_PERM, "PERM" }, \ + { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ + { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ + { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ + { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ + { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ + { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ + { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ + { NFS4ERR_REP_TOO_BIG_TO_CACHE, "REP_TOO_BIG_TO_CACHE" }, \ + { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ + { NFS4ERR_RESOURCE, "RESOURCE" }, \ + { NFS4ERR_RESTOREFH, "RESTOREFH" }, \ + { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ + { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ + { NFS4ERR_ROFS, "ROFS" }, \ + { NFS4ERR_SAME, "SAME" }, \ + { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ + { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ + { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ + { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ + { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ + { NFS4ERR_STALE, "STALE" }, \ + { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ + { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ + { NFS4ERR_SYMLINK, "SYMLINK" }, \ + { NFS4ERR_TOOSMALL, "TOOSMALL" }, \ + { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ + { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ + { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ + { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ + { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ + { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ + { NFS4ERR_XDEV, "XDEV" }, \ + /* ***** Internal to Linux NFS client ***** */ \ + { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ + { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) + +#define show_nfs4_verifier(x) \ + __print_hex_str(x, NFS4_VERIFIER_SIZE) + +TRACE_DEFINE_ENUM(IOMODE_READ); +TRACE_DEFINE_ENUM(IOMODE_RW); +TRACE_DEFINE_ENUM(IOMODE_ANY); + +#define show_pnfs_layout_iomode(x) \ + __print_symbolic(x, \ + { IOMODE_READ, "READ" }, \ + { IOMODE_RW, "RW" }, \ + { IOMODE_ANY, "ANY" }) + +#define show_nfs4_seq4_status(x) \ + __print_flags(x, "|", \ + { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ + { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, "CB_GSS_CONTEXTS_EXPIRING" }, \ + { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, "CB_GSS_CONTEXTS_EXPIRED" }, \ + { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \ + { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \ + { SEQ4_STATUS_ADMIN_STATE_REVOKED, "ADMIN_STATE_REVOKED" }, \ + { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, "RECALLABLE_STATE_REVOKED" }, \ + { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \ + { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \ + { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \ + { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" }) From cb5a967f7ce413d08cb86ab2285ed34f5ca54935 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 2 Nov 2021 14:48:59 -0400 Subject: [PATCH 200/433] xprtrdma: Fix a maybe-uninitialized compiler warning This minor fix-up keeps GCC from complaining that "last' may be used uninitialized", which breaks some build workflows that have been running with all warnings treated as errors. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/frwr_ops.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 3eccf365fcb8..ff699307e820 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -515,8 +515,8 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ prev = &first; - while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { - + mr = rpcrdma_mr_pop(&req->rl_registered); + do { trace_xprtrdma_mr_localinv(mr); r_xprt->rx_stats.local_inv_needed++; @@ -533,7 +533,8 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) *prev = last; prev = &last->next; - } + } while ((mr = rpcrdma_mr_pop(&req->rl_registered))); + mr = container_of(last, struct rpcrdma_mr, mr_invwr); /* Strong send queue ordering guarantees that when the @@ -617,8 +618,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ prev = &first; - while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { - + mr = rpcrdma_mr_pop(&req->rl_registered); + do { trace_xprtrdma_mr_localinv(mr); r_xprt->rx_stats.local_inv_needed++; @@ -635,7 +636,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) *prev = last; prev = &last->next; - } + } while ((mr = rpcrdma_mr_pop(&req->rl_registered))); /* Strong send queue ordering guarantees that when the * last WR in the chain completes, all WRs in the chain From edfa0b16bf9eb1e2c93e7e846e0e02c51395ca7b Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 2 Nov 2021 15:51:55 -0400 Subject: [PATCH 201/433] NFS: Add offset to nfs_aop_readahead tracepoint Add the byte offset of the readahead request to the tracepoint output so we know where the read starts. Before this patch: cat-8104 [002] ..... 813.168775: nfs_aop_readahead: fileid=00:31:141 fhandle=0xe55807f6 version=1756509392533525500 nr_pages=256 cat-8104 [002] ..... 813.174973: nfs_aop_readahead_done: fileid=00:31:141 fhandle=0xe55807f6 version=1756509392533525500 nr_pages=256 ret=0 cat-8104 [002] ..... 813.175963: nfs_aop_readahead: fileid=00:31:141 fhandle=0xe55807f6 version=1756509392533525500 nr_pages=256 cat-8104 [002] ..... 813.183742: nfs_aop_readahead_done: fileid=00:31:141 fhandle=0xe55807f6 version=1756509392533525500 nr_pages=1 ret=0 After this patch: cat-6392 [001] ..... 73.107782: nfs_aop_readahead: fileid=00:31:141 fhandle=0xed22403f version=1756511950029502774 offset=5242880 nr_pages=256 cat-6392 [001] ..... 73.112466: nfs_aop_readahead_done: fileid=00:31:141 fhandle=0xed22403f version=1756511950029502774 nr_pages=256 ret=0 cat-6392 [001] ..... 73.115692: nfs_aop_readahead: fileid=00:31:141 fhandle=0xed22403f version=1756511950029502774 offset=6291456 nr_pages=256 cat-6392 [001] ..... 73.123283: nfs_aop_readahead_done: fileid=00:31:141 fhandle=0xed22403f version=1756511950029502774 nr_pages=256 ret=0 Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- fs/nfs/nfstrace.h | 10 +++++++--- fs/nfs/read.c | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 76f04aa3367c..21dac847f1e4 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -889,16 +889,18 @@ TRACE_EVENT(nfs_aop_readpage_done, TRACE_EVENT(nfs_aop_readahead, TP_PROTO( const struct inode *inode, + struct page *page, unsigned int nr_pages ), - TP_ARGS(inode, nr_pages), + TP_ARGS(inode, page, nr_pages), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) + __field(loff_t, offset) __field(unsigned int, nr_pages) ), @@ -909,15 +911,16 @@ TRACE_EVENT(nfs_aop_readahead, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); + __entry->offset = page_index(page) << PAGE_SHIFT; __entry->nr_pages = nr_pages; ), TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu nr_pages=%u", + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld nr_pages=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, - __entry->nr_pages + __entry->offset, __entry->nr_pages ) ); @@ -936,6 +939,7 @@ TRACE_EVENT(nfs_aop_readahead_done, __field(int, ret) __field(u64, fileid) __field(u64, version) + __field(loff_t, offset) __field(unsigned int, nr_pages) ), diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c8273d4b12ad..d11af2a9299c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -404,7 +404,7 @@ int nfs_readpages(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; int ret; - trace_nfs_aop_readahead(inode, nr_pages); + trace_nfs_aop_readahead(inode, lru_to_page(pages), nr_pages); nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); ret = -ESTALE; From 5fe11512cdc24ccc66ac5da3c815ac9e59449abc Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 2 Nov 2021 16:33:13 -0700 Subject: [PATCH 202/433] Input: remove unused header Commit 83b41248ed04 ("Input: cy8ctmg110_ts - switch to using gpiod API") remove the last use of but left the header file behind. Nothing uses it now, delete it. Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20211102220203.940290-6-corbet@lwn.net Signed-off-by: Dmitry Torokhov --- include/linux/input/cy8ctmg110_pdata.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 include/linux/input/cy8ctmg110_pdata.h diff --git a/include/linux/input/cy8ctmg110_pdata.h b/include/linux/input/cy8ctmg110_pdata.h deleted file mode 100644 index ee1d44545f30..000000000000 --- a/include/linux/input/cy8ctmg110_pdata.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_CY8CTMG110_PDATA_H -#define _LINUX_CY8CTMG110_PDATA_H - -struct cy8ctmg110_pdata -{ - int reset_pin; /* Reset pin is wired to this GPIO (optional) */ -}; - -#endif From 322a552e19550872eb9f3a54330a5f615db42916 Mon Sep 17 00:00:00 2001 From: Jesse Taube Date: Tue, 2 Nov 2021 16:55:49 -0700 Subject: [PATCH 203/433] Input: cap11xx - add support for cap1206 According to the datasheet "The CAP1206 is pin- and register-compatible with the CAP1106, with the exception of the GAIN[1:0] bits and ALT_POL bit"(57). So, this patch aims to disable them as they are no longer used. Signed-off-by: Jesse Taube Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/cap11xx.txt | 1 + drivers/input/keyboard/cap11xx.c | 43 +++++++++++++------ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/Documentation/devicetree/bindings/input/cap11xx.txt b/Documentation/devicetree/bindings/input/cap11xx.txt index 8c67a0b5058d..74b91c398d22 100644 --- a/Documentation/devicetree/bindings/input/cap11xx.txt +++ b/Documentation/devicetree/bindings/input/cap11xx.txt @@ -9,6 +9,7 @@ Required properties: "microchip,cap1106" "microchip,cap1126" "microchip,cap1188" + "microchip,cap1206" reg: The I2C slave address of the device. diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c index 688e2bef682e..7c85343cd32f 100644 --- a/drivers/input/keyboard/cap11xx.c +++ b/drivers/input/keyboard/cap11xx.c @@ -91,18 +91,21 @@ struct cap11xx_hw_model { u8 product_id; unsigned int num_channels; unsigned int num_leds; + bool no_gain; }; enum { CAP1106, CAP1126, CAP1188, + CAP1206, }; static const struct cap11xx_hw_model cap11xx_devices[] = { - [CAP1106] = { .product_id = 0x55, .num_channels = 6, .num_leds = 0 }, - [CAP1126] = { .product_id = 0x53, .num_channels = 6, .num_leds = 2 }, - [CAP1188] = { .product_id = 0x50, .num_channels = 8, .num_leds = 8 }, + [CAP1106] = { .product_id = 0x55, .num_channels = 6, .num_leds = 0, .no_gain = false }, + [CAP1126] = { .product_id = 0x53, .num_channels = 6, .num_leds = 2, .no_gain = false }, + [CAP1188] = { .product_id = 0x50, .num_channels = 8, .num_leds = 8, .no_gain = false }, + [CAP1206] = { .product_id = 0x67, .num_channels = 6, .num_leds = 0, .no_gain = true }, }; static const struct reg_default cap11xx_reg_defaults[] = { @@ -378,17 +381,24 @@ static int cap11xx_i2c_probe(struct i2c_client *i2c_client, node = dev->of_node; if (!of_property_read_u32(node, "microchip,sensor-gain", &gain32)) { - if (is_power_of_2(gain32) && gain32 <= 8) + if (cap->no_gain) + dev_warn(dev, + "This version doesn't support sensor gain\n"); + else if (is_power_of_2(gain32) && gain32 <= 8) gain = ilog2(gain32); else dev_err(dev, "Invalid sensor-gain value %d\n", gain32); } - if (of_property_read_bool(node, "microchip,irq-active-high")) { - error = regmap_update_bits(priv->regmap, CAP11XX_REG_CONFIG2, - CAP11XX_REG_CONFIG2_ALT_POL, 0); - if (error) - return error; + if (id->driver_data != CAP1206) { + if (of_property_read_bool(node, "microchip,irq-active-high")) { + error = regmap_update_bits(priv->regmap, + CAP11XX_REG_CONFIG2, + CAP11XX_REG_CONFIG2_ALT_POL, + 0); + if (error) + return error; + } } /* Provide some useful defaults */ @@ -398,11 +408,14 @@ static int cap11xx_i2c_probe(struct i2c_client *i2c_client, of_property_read_u32_array(node, "linux,keycodes", priv->keycodes, cap->num_channels); - error = regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL, - CAP11XX_REG_MAIN_CONTROL_GAIN_MASK, - gain << CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT); - if (error) - return error; + if (!cap->no_gain) { + error = regmap_update_bits(priv->regmap, + CAP11XX_REG_MAIN_CONTROL, + CAP11XX_REG_MAIN_CONTROL_GAIN_MASK, + gain << CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT); + if (error) + return error; + } /* Disable autorepeat. The Linux input system has its own handling. */ error = regmap_write(priv->regmap, CAP11XX_REG_REPEAT_RATE, 0); @@ -470,6 +483,7 @@ static const struct of_device_id cap11xx_dt_ids[] = { { .compatible = "microchip,cap1106", }, { .compatible = "microchip,cap1126", }, { .compatible = "microchip,cap1188", }, + { .compatible = "microchip,cap1206", }, {} }; MODULE_DEVICE_TABLE(of, cap11xx_dt_ids); @@ -478,6 +492,7 @@ static const struct i2c_device_id cap11xx_i2c_ids[] = { { "cap1106", CAP1106 }, { "cap1126", CAP1126 }, { "cap1188", CAP1188 }, + { "cap1206", CAP1206 }, {} }; MODULE_DEVICE_TABLE(i2c, cap11xx_i2c_ids); From 26499499cae642a906e7d501ac5342ca148ea8b1 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Mon, 1 Nov 2021 12:21:19 -0400 Subject: [PATCH 204/433] net: phy: microchip_t1: add lan87xx_config_rgmii_delay for lan87xx phy Add a function to initialize phy rgmii delay according to phydev->interface. Signed-off-by: Yuiko Oshino Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20211101162119.29275-1-yuiko.oshino@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip_t1.c | 44 +++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index a4de3d2081c5..bc50224d43dd 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -28,6 +28,11 @@ #define LAN87XX_MASK_LINK_UP (0x0004) #define LAN87XX_MASK_LINK_DOWN (0x0002) +/* MISC Control 1 Register */ +#define LAN87XX_CTRL_1 (0x11) +#define LAN87XX_MASK_RGMII_TXC_DLY_EN (0x4000) +#define LAN87XX_MASK_RGMII_RXC_DLY_EN (0x2000) + /* phyaccess nested types */ #define PHYACC_ATTR_MODE_READ 0 #define PHYACC_ATTR_MODE_WRITE 1 @@ -112,6 +117,43 @@ static int access_ereg_modify_changed(struct phy_device *phydev, return rc; } +static int lan87xx_config_rgmii_delay(struct phy_device *phydev) +{ + int rc; + + if (!phy_interface_is_rgmii(phydev)) + return 0; + + rc = access_ereg(phydev, PHYACC_ATTR_MODE_READ, + PHYACC_ATTR_BANK_MISC, LAN87XX_CTRL_1, 0); + if (rc < 0) + return rc; + + switch (phydev->interface) { + case PHY_INTERFACE_MODE_RGMII: + rc &= ~LAN87XX_MASK_RGMII_TXC_DLY_EN; + rc &= ~LAN87XX_MASK_RGMII_RXC_DLY_EN; + break; + case PHY_INTERFACE_MODE_RGMII_ID: + rc |= LAN87XX_MASK_RGMII_TXC_DLY_EN; + rc |= LAN87XX_MASK_RGMII_RXC_DLY_EN; + break; + case PHY_INTERFACE_MODE_RGMII_RXID: + rc &= ~LAN87XX_MASK_RGMII_TXC_DLY_EN; + rc |= LAN87XX_MASK_RGMII_RXC_DLY_EN; + break; + case PHY_INTERFACE_MODE_RGMII_TXID: + rc |= LAN87XX_MASK_RGMII_TXC_DLY_EN; + rc &= ~LAN87XX_MASK_RGMII_RXC_DLY_EN; + break; + default: + return 0; + } + + return access_ereg(phydev, PHYACC_ATTR_MODE_WRITE, + PHYACC_ATTR_BANK_MISC, LAN87XX_CTRL_1, rc); +} + static int lan87xx_phy_init(struct phy_device *phydev) { static const struct access_ereg_val init[] = { @@ -185,7 +227,7 @@ static int lan87xx_phy_init(struct phy_device *phydev) return rc; } - return 0; + return lan87xx_config_rgmii_delay(phydev); } static int lan87xx_phy_config_intr(struct phy_device *phydev) From d52bcb47bdf971a59a2467975d2405fcfcb2fa19 Mon Sep 17 00:00:00 2001 From: Maxim Kiselev Date: Mon, 1 Nov 2021 18:23:41 +0300 Subject: [PATCH 205/433] net: davinci_emac: Fix interrupt pacing disable This patch allows to use 0 for `coal->rx_coalesce_usecs` param to disable rx irq coalescing. Previously we could enable rx irq coalescing via ethtool (For ex: `ethtool -C eth0 rx-usecs 2000`) but we couldn't disable it because this part rejects 0 value: if (!coal->rx_coalesce_usecs) return -EINVAL; Fixes: 84da2658a619 ("TI DaVinci EMAC : Implement interrupt pacing functionality.") Signed-off-by: Maxim Kiselev Reviewed-by: Grygorii Strashko Link: https://lore.kernel.org/r/20211101152343.4193233-1-bigunclemax@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/davinci_emac.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 2d2dcf70563f..d55f06120ce7 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -420,8 +420,20 @@ static int emac_set_coalesce(struct net_device *ndev, u32 int_ctrl, num_interrupts = 0; u32 prescale = 0, addnl_dvdr = 1, coal_intvl = 0; - if (!coal->rx_coalesce_usecs) - return -EINVAL; + if (!coal->rx_coalesce_usecs) { + priv->coal_intvl = 0; + + switch (priv->version) { + case EMAC_VERSION_2: + emac_ctrl_write(EMAC_DM646X_CMINTCTRL, 0); + break; + default: + emac_ctrl_write(EMAC_CTRL_EWINTTCNT, 0); + break; + } + + return 0; + } coal_intvl = coal->rx_coalesce_usecs; From 6ab9f57a648953e2326b9ad000783c122d133c9d Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 1 Nov 2021 22:03:12 -0400 Subject: [PATCH 206/433] bnxt_en: avoid newline at end of message in NL_SET_ERR_MSG_MOD Fix following coccicheck warning: ./drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c:446:8-56: WARNING avoid newline at end of message in NL_SET_ERR_MSG_MOD. Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20211102020312.16567-1-wanjiabing@vivo.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index ce790e9b45c3..5c464ea73576 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -443,7 +443,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: { if (BNXT_PF(bp) && bp->pf.active_vfs) { NL_SET_ERR_MSG_MOD(extack, - "reload is unsupported when VFs are allocated\n"); + "reload is unsupported when VFs are allocated"); return -EOPNOTSUPP; } rtnl_lock(); From 236f57fe1b8853fb3505502c0f94ae64d153ae92 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 2 Nov 2021 09:24:33 +0100 Subject: [PATCH 207/433] net: marvell: prestera: Add explicit padding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On m68k: In function ‘prestera_hw_build_tests’, inlined from ‘prestera_hw_switch_init’ at drivers/net/ethernet/marvell/prestera/prestera_hw.c:788:2: ././include/linux/compiler_types.h:335:38: error: call to ‘__compiletime_assert_345’ declared with attribute error: BUILD_BUG_ON failed: sizeof(struct prestera_msg_switch_attr_req) != 16 ... The driver assumes structure members are naturally aligned, but does not add explicit padding, thus breaking architectures where integral values are not always naturally aligned (e.g. on m68k, __alignof(int) is 2, not 4). Fixes: bb5dbf2cc64d5cfa ("net: marvell: prestera: add firmware v4.0 support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211102082433.3820514-1-geert@linux-m68k.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_hw.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c index 41ba17cb2965..4f5f52dcdd9d 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c @@ -189,6 +189,7 @@ struct prestera_msg_switch_attr_req { struct prestera_msg_cmd cmd; __le32 attr; union prestera_msg_switch_param param; + u8 pad[2]; }; struct prestera_msg_switch_init_resp { @@ -313,6 +314,7 @@ struct prestera_msg_port_info_resp { __le32 hw_id; __le32 dev_id; __le16 fp_id; + u8 pad[2]; }; struct prestera_msg_vlan_req { @@ -345,11 +347,13 @@ struct prestera_msg_bridge_req { __le32 port; __le32 dev; __le16 bridge; + u8 pad[2]; }; struct prestera_msg_bridge_resp { struct prestera_msg_ret ret; __le16 bridge; + u8 pad[2]; }; struct prestera_msg_acl_action { @@ -408,16 +412,19 @@ struct prestera_msg_acl_ruleset_bind_req { __le32 port; __le32 dev; __le16 ruleset_id; + u8 pad[2]; }; struct prestera_msg_acl_ruleset_req { struct prestera_msg_cmd cmd; __le16 id; + u8 pad[2]; }; struct prestera_msg_acl_ruleset_resp { struct prestera_msg_ret ret; __le16 id; + u8 pad[2]; }; struct prestera_msg_span_req { @@ -425,11 +432,13 @@ struct prestera_msg_span_req { __le32 port; __le32 dev; u8 id; + u8 pad[3]; }; struct prestera_msg_span_resp { struct prestera_msg_ret ret; u8 id; + u8 pad[3]; }; struct prestera_msg_stp_req { @@ -443,6 +452,7 @@ struct prestera_msg_stp_req { struct prestera_msg_rxtx_req { struct prestera_msg_cmd cmd; u8 use_sdma; + u8 pad[3]; }; struct prestera_msg_rxtx_resp { @@ -455,12 +465,14 @@ struct prestera_msg_lag_req { __le32 port; __le32 dev; __le16 lag_id; + u8 pad[2]; }; struct prestera_msg_cpu_code_counter_req { struct prestera_msg_cmd cmd; u8 counter_type; u8 code; + u8 pad[2]; }; struct mvsw_msg_cpu_code_counter_ret { From c4777efa751d293e369aec464ce6875e957be255 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Nov 2021 17:45:55 -0700 Subject: [PATCH 208/433] net: add and use skb_unclone_keeptruesize() helper While commit 097b9146c0e2 ("net: fix up truesize of cloned skb in skb_prepare_for_shift()") fixed immediate issues found when KFENCE was enabled/tested, there are still similar issues, when tcp_trim_head() hits KFENCE while the master skb is cloned. This happens under heavy networking TX workloads, when the TX completion might be delayed after incoming ACK. This patch fixes the WARNING in sk_stream_kill_queues when sk->sk_mem_queued/sk->sk_forward_alloc are not zero. Fixes: d3fb45f370d9 ("mm, kfence: insert KFENCE hooks for SLAB") Signed-off-by: Eric Dumazet Acked-by: Marco Elver Link: https://lore.kernel.org/r/20211102004555.1359210-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 16 ++++++++++++++++ net/core/skbuff.c | 14 +------------- net/ipv4/tcp_output.c | 6 +++--- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0bd6520329f6..a63e13082397 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1675,6 +1675,22 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } +/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(gfpflags_allow_blocking(pri)); + + if (skb_cloned(skb)) { + unsigned int save = skb->truesize; + int res; + + res = pskb_expand_head(skb, 0, 0, pri); + skb->truesize = save; + return res; + } + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 67a9188d8a49..3ec42cdee16a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3449,19 +3449,7 @@ EXPORT_SYMBOL(skb_split); */ static int skb_prepare_for_shift(struct sk_buff *skb) { - int ret = 0; - - if (skb_cloned(skb)) { - /* Save and restore truesize: pskb_expand_head() may reallocate - * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we - * cannot change truesize at this point. - */ - unsigned int save_truesize = skb->truesize; - - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - skb->truesize = save_truesize; - } - return ret; + return skb_unclone_keeptruesize(skb, GFP_ATOMIC); } /** diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6fbbf1558033..76cc1641beb4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1559,7 +1559,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, return -ENOMEM; } - if (skb_unclone(skb, gfp)) + if (skb_unclone_keeptruesize(skb, gfp)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -1667,7 +1667,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { u32 delta_truesize; - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone_keeptruesize(skb, GFP_ATOMIC)) return -ENOMEM; delta_truesize = __pskb_trim_head(skb, len); @@ -3166,7 +3166,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) cur_mss, GFP_ATOMIC)) return -ENOMEM; /* We'll try again later. */ } else { - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone_keeptruesize(skb, GFP_ATOMIC)) return -ENOMEM; diff = tcp_skb_pcount(skb); From 18635d524870888b60625abef086b53c2df3ad89 Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Tue, 2 Nov 2021 13:04:00 +0200 Subject: [PATCH 209/433] MAINTAINERS: Update ENA maintainers information The ENA driver is no longer maintained by Netanel and Guy Signed-off-by: Shay Agroskin Link: https://lore.kernel.org/r/20211102110358.193920-1-shayagr@amazon.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f96aa662ee32..ecf8ec3d2339 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -872,9 +872,10 @@ F: Documentation/devicetree/bindings/thermal/amazon,al-thermal.txt F: drivers/thermal/thermal_mmio.c AMAZON ETHERNET DRIVERS -M: Netanel Belgazal +M: Shay Agroskin M: Arthur Kiyanovski -R: Guy Tzalik +R: David Arinzon +R: Noam Dagan R: Saeed Bishara L: netdev@vger.kernel.org S: Supported From db2434343b2c29817fe1fa63919e9c56218a46e8 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 2 Nov 2021 21:03:53 +0800 Subject: [PATCH 210/433] amt: fix error return code in amt_init() Return error code when alloc_workqueue() fails in amt_init(). Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Taehee Yoo Link: https://lore.kernel.org/r/20211102130353.1666999-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/amt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index 60a7053a9cf7..d8c9ed9f8a81 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -3259,8 +3259,10 @@ static int __init amt_init(void) goto unregister_notifier; amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 1); - if (!amt_wq) + if (!amt_wq) { + err = -ENOMEM; goto rtnl_unregister; + } spin_lock_init(&source_gc_lock); spin_lock_bh(&source_gc_lock); From c75ea024094e7307219a4f9c706dad5ea461509a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Mar 2021 12:00:08 +0100 Subject: [PATCH 211/433] apparmor: avoid -Wempty-body warning Building with 'make W=1' shows a warning for an empty macro: security/apparmor/label.c: In function '__label_update': security/apparmor/label.c:2096:59: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body] 2096 | AA_BUG(labels_ns(label) != labels_ns(new)); Change the macro definition to use no_printk(), which improves format string checking and avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: John Johansen --- security/apparmor/include/lib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 7d27db740bc2..67fbb81a11f3 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -36,7 +36,7 @@ #define AA_BUG_FMT(X, fmt, args...) \ WARN((X), "AppArmor WARN %s: (" #X "): " fmt, __func__, ##args) #else -#define AA_BUG_FMT(X, fmt, args...) +#define AA_BUG_FMT(X, fmt, args...) no_printk(fmt, ##args) #endif #define AA_ERROR(fmt, args...) \ From 7e50e9ffdee6fa8b375baddbac85fcb8ffee156a Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Sat, 29 May 2021 16:40:48 +0800 Subject: [PATCH 212/433] apparmor: Remove the repeated declaration Function 'aa_labelset_destroy' and 'aa_labelset_init' are declared twice, so remove the repeated declaration and unnecessary blank line. Cc: John Johansen Cc: James Morris Cc: "Serge E. Hallyn" Signed-off-by: Shaokun Zhang Signed-off-by: John Johansen --- security/apparmor/include/label.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index f5b5485e20c9..7ead1474769e 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -77,10 +77,6 @@ struct aa_labelset { #define __labelset_for_each(LS, N) \ for ((N) = rb_first(&(LS)->root); (N); (N) = rb_next(N)) -void aa_labelset_destroy(struct aa_labelset *ls); -void aa_labelset_init(struct aa_labelset *ls); - - enum label_flags { FLAG_HAT = 1, /* profile is a hat */ FLAG_UNCONFINED = 2, /* label unconfined only if all */ From aa4ceed7c3276852031a3e3d6fa767ff1858831f Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Mon, 7 Jun 2021 14:30:22 +0800 Subject: [PATCH 213/433] apparmor: fix doc warning Fix gcc W=1 warning: security/apparmor/apparmorfs.c:2125: warning: Function parameter or member 'p' not described in '__next_profile' Signed-off-by: ChenXiaoSong Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 3275e074e5f8..a515d1f6d951 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2115,7 +2115,7 @@ static struct aa_profile *__first_profile(struct aa_ns *root, /** * __next_profile - step to the next profile in a profile tree - * @profile: current profile in tree (NOT NULL) + * @p: current profile in tree (NOT NULL) * * Perform a depth first traversal on the profile tree in a namespace * From d0d845a790d31adb0c90f1f8364de199b23128c8 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Fri, 30 Jul 2021 01:23:55 -0400 Subject: [PATCH 214/433] apparmor: use per file locks for transactional queries As made mention of in commit 1dea3b41e84c5 ("apparmor: speed up transactional queries"), a single lock is currently used to synchronize transactional queries. We can, use the lock allocated for each file by VFS instead. Signed-off-by: Hamza Mahfooz Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index a515d1f6d951..0920f5188631 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -811,8 +811,6 @@ struct multi_transaction { }; #define MULTI_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct multi_transaction)) -/* TODO: replace with per file lock */ -static DEFINE_SPINLOCK(multi_transaction_lock); static void multi_transaction_kref(struct kref *kref) { @@ -846,10 +844,10 @@ static void multi_transaction_set(struct file *file, AA_BUG(n > MULTI_TRANSACTION_LIMIT); new->size = n; - spin_lock(&multi_transaction_lock); + spin_lock(&file->f_lock); old = (struct multi_transaction *) file->private_data; file->private_data = new; - spin_unlock(&multi_transaction_lock); + spin_unlock(&file->f_lock); put_multi_transaction(old); } @@ -878,9 +876,10 @@ static ssize_t multi_transaction_read(struct file *file, char __user *buf, struct multi_transaction *t; ssize_t ret; - spin_lock(&multi_transaction_lock); + spin_lock(&file->f_lock); t = get_multi_transaction(file->private_data); - spin_unlock(&multi_transaction_lock); + spin_unlock(&file->f_lock); + if (!t) return 0; From 4d47fbbe54bf75b72eac3f5a0caa664300937620 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 2 Sep 2021 23:27:31 -0700 Subject: [PATCH 215/433] apparmor: fix zero-length compiler warning in AA_BUG() Uses of AA_BUG() without a message can result in the compiler warning warning: zero-length gnu_printf format string [-Wformat-zero-length] Fix this with a pragma for now. A larger rework of AA_BUG() will follow. Signed-off-by: John Johansen --- security/apparmor/include/lib.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 67fbb81a11f3..e2e8df0c6f1c 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -31,7 +31,12 @@ #define AA_WARN(X) WARN((X), "APPARMOR WARN %s: %s\n", __func__, #X) -#define AA_BUG(X, args...) AA_BUG_FMT((X), "" args) +#define AA_BUG(X, args...) \ + do { \ + _Pragma("GCC diagnostic ignored \"-Wformat-zero-length\""); \ + AA_BUG_FMT((X), "" args); \ + _Pragma("GCC diagnostic warning \"-Wformat-zero-length\""); \ + } while (0) #ifdef CONFIG_SECURITY_APPARMOR_DEBUG_ASSERTS #define AA_BUG_FMT(X, fmt, args...) \ WARN((X), "AppArmor WARN %s: (" #X "): " fmt, __func__, ##args) From a4414341b58397e703c066d28081c58f5057e948 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 3 Nov 2021 11:45:07 +0800 Subject: [PATCH 216/433] amt: Remove duplicate include Clean up the following includecheck warning: ./drivers/net/amt.c: net/protocol.h is included more than once. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Reviewed-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/amt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index d8c9ed9f8a81..896c9e2857f0 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include From ca3676f94b8f40f52d285f9aef36dfd6725bfc14 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 3 Nov 2021 10:44:55 +0800 Subject: [PATCH 217/433] kselftests/net: add missed icmp.sh test to Makefile When generating the selftests to another folder, the icmp.sh test will miss as it is not in Makefile, e.g. make -C tools/testing/selftests/ install \ TARGETS="net" INSTALL_PATH=/tmp/kselftests Fixes: 7e9838b7915e ("selftests/net: Add icmp.sh for testing ICMP dummy address responses") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index aee76d1bb9da..7b079b01aa1b 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -12,7 +12,7 @@ TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_a TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh -TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh +TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS += txtimestamp.sh From b99ac1841147eefd8d8b52fcf00d7d917949ae7f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 3 Nov 2021 10:44:56 +0800 Subject: [PATCH 218/433] kselftests/net: add missed setup_loopback.sh/setup_veth.sh to Makefile When generating the selftests to another folder, the include file setup_loopback.sh/setup_veth.sh for gro.sh/gre_gro.sh are missing as they are not in Makefile, e.g. make -C tools/testing/selftests/ install \ TARGETS="net" INSTALL_PATH=/tmp/kselftests Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Fixes: 9af771d2ec04 ("selftests/net: allow GRO coalesce test on veth") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7b079b01aa1b..8c3d0709b870 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -30,7 +30,7 @@ TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh -TEST_PROGS_EXTENDED := in_netns.sh +TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite From 653e7f19b4a0a632cead2390281bde352d3d3273 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 3 Nov 2021 10:44:57 +0800 Subject: [PATCH 219/433] kselftests/net: add missed SRv6 tests When generating the selftests to another folder, the SRv6 tests are missing as they are not in Makefile, e.g. make -C tools/testing/selftests/ install \ TARGETS="net" INSTALL_PATH=/tmp/kselftests Fixes: 03a0b567a03d ("selftests: seg6: add selftest for SRv6 End.DT46 Behavior") Fixes: 2195444e09b4 ("selftests: add selftest for the SRv6 End.DT4 behavior") Fixes: 2bc035538e16 ("selftests: add selftest for the SRv6 End.DT6 (VRF) behavior") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8c3d0709b870..256dcd17cd8d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -30,6 +30,9 @@ TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh +TEST_PROGS += srv6_end_dt46_l3vpn_test.sh +TEST_PROGS += srv6_end_dt4_l3vpn_test.sh +TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any From 8883deb50eb6529ae1fd4641e402da8ab4f720d2 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 3 Nov 2021 10:44:58 +0800 Subject: [PATCH 220/433] kselftests/net: add missed vrf_strict_mode_test.sh test to Makefile When generating the selftests to another folder, the vrf_strict_mode_test.sh test will miss as it is not in Makefile, e.g. make -C tools/testing/selftests/ install \ TARGETS="net" INSTALL_PATH=/tmp/kselftests Fixes: 8735e6eaa438 ("selftests: add selftest for the VRF strict mode") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 256dcd17cd8d..218a24f0567e 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -33,6 +33,7 @@ TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh +TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any From 17b67370c38de2a878debf39dcbc704a206af4d0 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 3 Nov 2021 10:44:59 +0800 Subject: [PATCH 221/433] kselftests/net: add missed toeplitz.sh/toeplitz_client.sh to Makefile When generating the selftests to another folder, the toeplitz.sh and toeplitz_client.sh are missing as they are not in Makefile, e.g. make -C tools/testing/selftests/ install \ TARGETS="net" INSTALL_PATH=/tmp/kselftests Making them under TEST_PROGS_EXTENDED as they test NIC hardware features and are not intended to be run from kselftests. Fixes: 5ebfb4cc3048 ("selftests/net: toeplitz test") Reviewed-by: Willem de Bruijn Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 218a24f0567e..7615f29831eb 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -35,6 +35,7 @@ TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh +TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite From c081d53f97a1a90a38e4296dd3d6fda5e38dca2c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 2 Nov 2021 08:02:47 -0400 Subject: [PATCH 222/433] security: pass asoc to sctp_assoc_request and sctp_sk_clone This patch is to move secid and peer_secid from endpoint to association, and pass asoc to sctp_assoc_request and sctp_sk_clone instead of ep. As ep is the local endpoint and asoc represents a connection, and in SCTP one sk/ep could have multiple asoc/connection, saving secid/peer_secid for new asoc will overwrite the old asoc's. Note that since asoc can be passed as NULL, security_sctp_assoc_request() is moved to the place right after the new_asoc is created in sctp_sf_do_5_1B_init() and sctp_sf_do_unexpected_init(). v1->v2: - fix the description of selinux_netlbl_skbuff_setsid(), as Jakub noticed. - fix the annotation in selinux_sctp_assoc_request(), as Richard Noticed. Fixes: 72e89f50084c ("security: Add support for SCTP security hooks") Reported-by: Prashanth Prahlad Reviewed-by: Richard Haines Tested-by: Richard Haines Signed-off-by: Xin Long Signed-off-by: David S. Miller --- Documentation/security/SCTP.rst | 28 ++++++++++++++-------------- include/linux/lsm_hook_defs.h | 4 ++-- include/linux/lsm_hooks.h | 8 ++++---- include/linux/security.h | 10 +++++----- include/net/sctp/structs.h | 20 ++++++++++---------- net/sctp/sm_statefuns.c | 26 +++++++++++++------------- net/sctp/socket.c | 5 ++--- security/security.c | 8 ++++---- security/selinux/hooks.c | 22 +++++++++++----------- security/selinux/include/netlabel.h | 4 ++-- security/selinux/netlabel.c | 18 +++++++++--------- 11 files changed, 76 insertions(+), 77 deletions(-) diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst index 0bcf6c1245ee..415b548d9ce0 100644 --- a/Documentation/security/SCTP.rst +++ b/Documentation/security/SCTP.rst @@ -26,11 +26,11 @@ described in the `SCTP SELinux Support`_ chapter. security_sctp_assoc_request() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Passes the ``@ep`` and ``@chunk->skb`` of the association INIT packet to the +Passes the ``@asoc`` and ``@chunk->skb`` of the association INIT packet to the security module. Returns 0 on success, error on failure. :: - @ep - pointer to sctp endpoint structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of association packet. @@ -117,9 +117,9 @@ Called whenever a new socket is created by **accept**\(2) calls **sctp_peeloff**\(3). :: - @ep - pointer to current sctp endpoint structure. + @asoc - pointer to current sctp association structure. @sk - pointer to current sock structure. - @sk - pointer to new sock structure. + @newsk - pointer to new sock structure. security_inet_conn_established() @@ -200,22 +200,22 @@ hooks with the SELinux specifics expanded below:: security_sctp_assoc_request() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Passes the ``@ep`` and ``@chunk->skb`` of the association INIT packet to the +Passes the ``@asoc`` and ``@chunk->skb`` of the association INIT packet to the security module. Returns 0 on success, error on failure. :: - @ep - pointer to sctp endpoint structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of association packet. The security module performs the following operations: - IF this is the first association on ``@ep->base.sk``, then set the peer + IF this is the first association on ``@asoc->base.sk``, then set the peer sid to that in ``@skb``. This will ensure there is only one peer sid - assigned to ``@ep->base.sk`` that may support multiple associations. + assigned to ``@asoc->base.sk`` that may support multiple associations. - ELSE validate the ``@ep->base.sk peer_sid`` against the ``@skb peer sid`` + ELSE validate the ``@asoc->base.sk peer_sid`` against the ``@skb peer sid`` to determine whether the association should be allowed or denied. - Set the sctp ``@ep sid`` to socket's sid (from ``ep->base.sk``) with + Set the sctp ``@asoc sid`` to socket's sid (from ``asoc->base.sk``) with MLS portion taken from ``@skb peer sid``. This will be used by SCTP TCP style sockets and peeled off connections as they cause a new socket to be generated. @@ -259,13 +259,13 @@ security_sctp_sk_clone() Called whenever a new socket is created by **accept**\(2) (i.e. a TCP style socket) or when a socket is 'peeled off' e.g userspace calls **sctp_peeloff**\(3). ``security_sctp_sk_clone()`` will set the new -sockets sid and peer sid to that contained in the ``@ep sid`` and -``@ep peer sid`` respectively. +sockets sid and peer sid to that contained in the ``@asoc sid`` and +``@asoc peer sid`` respectively. :: - @ep - pointer to current sctp endpoint structure. + @asoc - pointer to current sctp association structure. @sk - pointer to current sock structure. - @sk - pointer to new sock structure. + @newsk - pointer to new sock structure. security_inet_conn_established() diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index a9ac70ae01ab..df8de62f4710 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -329,11 +329,11 @@ LSM_HOOK(int, 0, tun_dev_create, void) LSM_HOOK(int, 0, tun_dev_attach_queue, void *security) LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security) LSM_HOOK(int, 0, tun_dev_open, void *security) -LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_endpoint *ep, +LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_association *asoc, struct sk_buff *skb) LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) -LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_endpoint *ep, +LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) #endif /* CONFIG_SECURITY_NETWORK */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 0bada4df23fc..d45b6f6e27fd 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1027,9 +1027,9 @@ * Security hooks for SCTP * * @sctp_assoc_request: - * Passes the @ep and @chunk->skb of the association INIT packet to + * Passes the @asoc and @chunk->skb of the association INIT packet to * the security module. - * @ep pointer to sctp endpoint structure. + * @asoc pointer to sctp association structure. * @skb pointer to skbuff of association packet. * Return 0 on success, error on failure. * @sctp_bind_connect: @@ -1047,9 +1047,9 @@ * Called whenever a new socket is created by accept(2) (i.e. a TCP * style socket) or when a socket is 'peeled off' e.g userspace * calls sctp_peeloff(3). - * @ep pointer to current sctp endpoint structure. + * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. - * @sk pointer to new sock structure. + * @newsk pointer to new sock structure. * * Security hooks for Infiniband * diff --git a/include/linux/security.h b/include/linux/security.h index 7e0ba63b5dde..bbf44a466832 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -179,7 +179,7 @@ struct xfrm_policy; struct xfrm_state; struct xfrm_user_sec_ctx; struct seq_file; -struct sctp_endpoint; +struct sctp_association; #ifdef CONFIG_MMU extern unsigned long mmap_min_addr; @@ -1425,10 +1425,10 @@ int security_tun_dev_create(void); int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); -int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb); +int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb); int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); -void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, +void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); #else /* CONFIG_SECURITY_NETWORK */ @@ -1631,7 +1631,7 @@ static inline int security_tun_dev_open(void *security) return 0; } -static inline int security_sctp_assoc_request(struct sctp_endpoint *ep, +static inline int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb) { return 0; @@ -1644,7 +1644,7 @@ static inline int security_sctp_bind_connect(struct sock *sk, int optname, return 0; } -static inline void security_sctp_sk_clone(struct sctp_endpoint *ep, +static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk) { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 651bba654d77..899c29c326ba 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1355,16 +1355,6 @@ struct sctp_endpoint { reconf_enable:1; __u8 strreset_enable; - - /* Security identifiers from incoming (INIT). These are set by - * security_sctp_assoc_request(). These will only be used by - * SCTP TCP type sockets and peeled off connections as they - * cause a new socket to be generated. security_sctp_sk_clone() - * will then plug these into the new socket. - */ - - u32 secid; - u32 peer_secid; }; /* Recover the outter endpoint structure. */ @@ -2104,6 +2094,16 @@ struct sctp_association { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + /* Security identifiers from incoming (INIT). These are set by + * security_sctp_assoc_request(). These will only be used by + * SCTP TCP type sockets and peeled off connections as they + * cause a new socket to be generated. security_sctp_sk_clone() + * will then plug these into the new socket. + */ + + u32 secid; + u32 peer_secid; + struct rcu_head rcu; }; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index fb3da4d8f4a3..3206374209bc 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -326,11 +326,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, struct sctp_packet *packet; int len; - /* Update socket peer label if first association. */ - if (security_sctp_assoc_request((struct sctp_endpoint *)ep, - chunk->skb)) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. @@ -415,6 +410,12 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (!new_asoc) goto nomem; + /* Update socket peer label if first association. */ + if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + if (sctp_assoc_set_bind_addr_from_ep(new_asoc, sctp_scope(sctp_source(chunk)), GFP_ATOMIC) < 0) @@ -780,7 +781,6 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, } } - /* Delay state machine commands until later. * * Re-build the bind address for the association is done in @@ -1517,11 +1517,6 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( struct sctp_packet *packet; int len; - /* Update socket peer label if first association. */ - if (security_sctp_assoc_request((struct sctp_endpoint *)ep, - chunk->skb)) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. @@ -1594,6 +1589,12 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!new_asoc) goto nomem; + /* Update socket peer label if first association. */ + if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + if (sctp_assoc_set_bind_addr_from_ep(new_asoc, sctp_scope(sctp_source(chunk)), GFP_ATOMIC) < 0) goto nomem; @@ -2255,8 +2256,7 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( } /* Update socket peer label if first association. */ - if (security_sctp_assoc_request((struct sctp_endpoint *)ep, - chunk->skb)) { + if (security_sctp_assoc_request(new_asoc, chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6b937bfd4751..33391254fa82 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9412,7 +9412,6 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; struct sctp_sock *sp = sctp_sk(sk); - struct sctp_endpoint *ep = sp->ep; newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; @@ -9457,9 +9456,9 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, net_enable_timestamp(); /* Set newsk security attributes from original sk and connection - * security attribute from ep. + * security attribute from asoc. */ - security_sctp_sk_clone(ep, sk, newsk); + security_sctp_sk_clone(asoc, sk, newsk); } static inline void sctp_copy_descendant(struct sock *sk_to, diff --git a/security/security.c b/security/security.c index 95e30fadba78..c88167a414b4 100644 --- a/security/security.c +++ b/security/security.c @@ -2367,9 +2367,9 @@ int security_tun_dev_open(void *security) } EXPORT_SYMBOL(security_tun_dev_open); -int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb) +int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb) { - return call_int_hook(sctp_assoc_request, 0, ep, skb); + return call_int_hook(sctp_assoc_request, 0, asoc, skb); } EXPORT_SYMBOL(security_sctp_assoc_request); @@ -2381,10 +2381,10 @@ int security_sctp_bind_connect(struct sock *sk, int optname, } EXPORT_SYMBOL(security_sctp_bind_connect); -void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, +void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk) { - call_void_hook(sctp_sk_clone, ep, sk, newsk); + call_void_hook(sctp_sk_clone, asoc, sk, newsk); } EXPORT_SYMBOL(security_sctp_sk_clone); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ea7b2876a5ae..62d30c0a30c2 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5339,10 +5339,10 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent) * connect(2), sctp_connectx(3) or sctp_sendmsg(3) (with no association * already present). */ -static int selinux_sctp_assoc_request(struct sctp_endpoint *ep, +static int selinux_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb) { - struct sk_security_struct *sksec = ep->base.sk->sk_security; + struct sk_security_struct *sksec = asoc->base.sk->sk_security; struct common_audit_data ad; struct lsm_network_audit net = {0,}; u8 peerlbl_active; @@ -5359,7 +5359,7 @@ static int selinux_sctp_assoc_request(struct sctp_endpoint *ep, /* This will return peer_sid = SECSID_NULL if there are * no peer labels, see security_net_peersid_resolve(). */ - err = selinux_skb_peerlbl_sid(skb, ep->base.sk->sk_family, + err = selinux_skb_peerlbl_sid(skb, asoc->base.sk->sk_family, &peer_sid); if (err) return err; @@ -5383,7 +5383,7 @@ static int selinux_sctp_assoc_request(struct sctp_endpoint *ep, */ ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; - ad.u.net->sk = ep->base.sk; + ad.u.net->sk = asoc->base.sk; err = avc_has_perm(&selinux_state, sksec->peer_sid, peer_sid, sksec->sclass, SCTP_SOCKET__ASSOCIATION, &ad); @@ -5392,7 +5392,7 @@ static int selinux_sctp_assoc_request(struct sctp_endpoint *ep, } /* Compute the MLS component for the connection and store - * the information in ep. This will be used by SCTP TCP type + * the information in asoc. This will be used by SCTP TCP type * sockets and peeled off connections as they cause a new * socket to be generated. selinux_sctp_sk_clone() will then * plug this into the new socket. @@ -5401,11 +5401,11 @@ static int selinux_sctp_assoc_request(struct sctp_endpoint *ep, if (err) return err; - ep->secid = conn_sid; - ep->peer_secid = peer_sid; + asoc->secid = conn_sid; + asoc->peer_secid = peer_sid; /* Set any NetLabel labels including CIPSO/CALIPSO options. */ - return selinux_netlbl_sctp_assoc_request(ep, skb); + return selinux_netlbl_sctp_assoc_request(asoc, skb); } /* Check if sctp IPv4/IPv6 addresses are valid for binding or connecting @@ -5490,7 +5490,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname, } /* Called whenever a new socket is created by accept(2) or sctp_peeloff(3). */ -static void selinux_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, +static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk) { struct sk_security_struct *sksec = sk->sk_security; @@ -5502,8 +5502,8 @@ static void selinux_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, if (!selinux_policycap_extsockclass()) return selinux_sk_clone_security(sk, newsk); - newsksec->sid = ep->secid; - newsksec->peer_sid = ep->peer_secid; + newsksec->sid = asoc->secid; + newsksec->peer_sid = asoc->peer_secid; newsksec->sclass = sksec->sclass; selinux_netlbl_sctp_sk_clone(sk, newsk); } diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index 0c58f62dc6ab..4d0456d3d459 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -39,7 +39,7 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, u16 family, u32 sid); -int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep, +int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb); int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family); void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family); @@ -98,7 +98,7 @@ static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, return 0; } -static inline int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep, +static inline int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb) { return 0; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 29b88e81869b..1321f15799e2 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -261,30 +261,30 @@ skbuff_setsid_return: /** * selinux_netlbl_sctp_assoc_request - Label an incoming sctp association. - * @ep: incoming association endpoint. + * @asoc: incoming association. * @skb: the packet. * * Description: - * A new incoming connection is represented by @ep, ...... + * A new incoming connection is represented by @asoc, ...... * Returns zero on success, negative values on failure. * */ -int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep, +int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb) { int rc; struct netlbl_lsm_secattr secattr; - struct sk_security_struct *sksec = ep->base.sk->sk_security; + struct sk_security_struct *sksec = asoc->base.sk->sk_security; struct sockaddr_in addr4; struct sockaddr_in6 addr6; - if (ep->base.sk->sk_family != PF_INET && - ep->base.sk->sk_family != PF_INET6) + if (asoc->base.sk->sk_family != PF_INET && + asoc->base.sk->sk_family != PF_INET6) return 0; netlbl_secattr_init(&secattr); rc = security_netlbl_sid_to_secattr(&selinux_state, - ep->secid, &secattr); + asoc->secid, &secattr); if (rc != 0) goto assoc_request_return; @@ -294,11 +294,11 @@ int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep, if (ip_hdr(skb)->version == 4) { addr4.sin_family = AF_INET; addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; - rc = netlbl_conn_setattr(ep->base.sk, (void *)&addr4, &secattr); + rc = netlbl_conn_setattr(asoc->base.sk, (void *)&addr4, &secattr); } else if (IS_ENABLED(CONFIG_IPV6) && ip_hdr(skb)->version == 6) { addr6.sin6_family = AF_INET6; addr6.sin6_addr = ipv6_hdr(skb)->saddr; - rc = netlbl_conn_setattr(ep->base.sk, (void *)&addr6, &secattr); + rc = netlbl_conn_setattr(asoc->base.sk, (void *)&addr6, &secattr); } else { rc = -EAFNOSUPPORT; } From e215dab1c49070cd75620afd801f777207a5b65c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 2 Nov 2021 08:02:48 -0400 Subject: [PATCH 223/433] security: call security_sctp_assoc_request in sctp_sf_do_5_1D_ce The asoc created when receives the INIT chunk is a temporary one, it will be deleted after INIT_ACK chunk is replied. So for the real asoc created in sctp_sf_do_5_1D_ce() when the COOKIE_ECHO chunk is received, security_sctp_assoc_request() should also be called. v1->v2: - fix some typo and grammar errors, noticed by Ondrej. Fixes: 72e89f50084c ("security: Add support for SCTP security hooks") Reported-by: Prashanth Prahlad Reviewed-by: Richard Haines Tested-by: Richard Haines Signed-off-by: Xin Long Signed-off-by: David S. Miller --- Documentation/security/SCTP.rst | 15 +++++++++------ net/sctp/sm_statefuns.c | 5 +++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst index 415b548d9ce0..d5fd6ccc3dcb 100644 --- a/Documentation/security/SCTP.rst +++ b/Documentation/security/SCTP.rst @@ -151,9 +151,9 @@ establishing an association. INIT ---------------------------------------------> sctp_sf_do_5_1B_init() Respond to an INIT chunk. - SCTP peer endpoint "A" is - asking for an association. Call - security_sctp_assoc_request() + SCTP peer endpoint "A" is asking + for a temporary association. + Call security_sctp_assoc_request() to set the peer label if first association. If not first association, check @@ -163,9 +163,12 @@ establishing an association. | discard the packet. | COOKIE ECHO ------------------------------------------> - | - | - | + sctp_sf_do_5_1D_ce() + Respond to an COOKIE ECHO chunk. + Confirm the cookie and create a + permanent association. + Call security_sctp_assoc_request() to + do the same as for INIT chunk Response. <------------------------------------------- COOKIE ACK | | sctp_sf_do_5_1E_ca | diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3206374209bc..b818532c3fc2 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -781,6 +781,11 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, } } + if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + /* Delay state machine commands until later. * * Re-build the bind address for the association is done in From 7c2ef0240e6abfd3cc59511339517358350a8910 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 2 Nov 2021 08:02:49 -0400 Subject: [PATCH 224/433] security: add sctp_assoc_established hook security_sctp_assoc_established() is added to replace security_inet_conn_established() called in sctp_sf_do_5_1E_ca(), so that asoc can be accessed in security subsystem and save the peer secid to asoc->peer_secid. v1->v2: - fix the return value of security_sctp_assoc_established() in security.h, found by kernel test robot and Ondrej. Fixes: 72e89f50084c ("security: Add support for SCTP security hooks") Reported-by: Prashanth Prahlad Reviewed-by: Richard Haines Tested-by: Richard Haines Signed-off-by: Xin Long Signed-off-by: David S. Miller --- Documentation/security/SCTP.rst | 22 ++++++++++------------ include/linux/lsm_hook_defs.h | 2 ++ include/linux/lsm_hooks.h | 5 +++++ include/linux/security.h | 7 +++++++ net/sctp/sm_statefuns.c | 2 +- security/security.c | 7 +++++++ 6 files changed, 32 insertions(+), 13 deletions(-) diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst index d5fd6ccc3dcb..406cc68b8808 100644 --- a/Documentation/security/SCTP.rst +++ b/Documentation/security/SCTP.rst @@ -15,10 +15,7 @@ For security module support, three SCTP specific hooks have been implemented:: security_sctp_assoc_request() security_sctp_bind_connect() security_sctp_sk_clone() - -Also the following security hook has been utilised:: - - security_inet_conn_established() + security_sctp_assoc_established() The usage of these hooks are described below with the SELinux implementation described in the `SCTP SELinux Support`_ chapter. @@ -122,11 +119,12 @@ calls **sctp_peeloff**\(3). @newsk - pointer to new sock structure. -security_inet_conn_established() +security_sctp_assoc_established() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Called when a COOKIE ACK is received:: +Called when a COOKIE ACK is received, and the peer secid will be +saved into ``@asoc->peer_secid`` for client:: - @sk - pointer to sock structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of the COOKIE ACK packet. @@ -134,7 +132,7 @@ Security Hooks used for Association Establishment ------------------------------------------------- The following diagram shows the use of ``security_sctp_bind_connect()``, -``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when +``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when establishing an association. :: @@ -172,7 +170,7 @@ establishing an association. <------------------------------------------- COOKIE ACK | | sctp_sf_do_5_1E_ca | - Call security_inet_conn_established() | + Call security_sctp_assoc_established() | to set the peer label. | | | | If SCTP_SOCKET_TCP or peeled off @@ -198,7 +196,7 @@ hooks with the SELinux specifics expanded below:: security_sctp_assoc_request() security_sctp_bind_connect() security_sctp_sk_clone() - security_inet_conn_established() + security_sctp_assoc_established() security_sctp_assoc_request() @@ -271,12 +269,12 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and @newsk - pointer to new sock structure. -security_inet_conn_established() +security_sctp_assoc_established() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Called when a COOKIE ACK is received where it sets the connection's peer sid to that in ``@skb``:: - @sk - pointer to sock structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of the COOKIE ACK packet. diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index df8de62f4710..442a611fa0fb 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -335,6 +335,8 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) +LSM_HOOK(void, LSM_RET_VOID, sctp_assoc_established, struct sctp_association *asoc, + struct sk_buff *skb) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index d45b6f6e27fd..d6823214d5c1 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1050,6 +1050,11 @@ * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. * @newsk pointer to new sock structure. + * @sctp_assoc_established: + * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet + * to the security module. + * @asoc pointer to sctp association structure. + * @skb pointer to skbuff of association packet. * * Security hooks for Infiniband * diff --git a/include/linux/security.h b/include/linux/security.h index bbf44a466832..06eac4e61a13 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1430,6 +1430,8 @@ int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); +void security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -1649,6 +1651,11 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *newsk) { } + +static inline void security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b818532c3fc2..5fabaa54b77d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -946,7 +946,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL()); /* Set peer label for connection. */ - security_inet_conn_established(ep->base.sk, chunk->skb); + security_sctp_assoc_established((struct sctp_association *)asoc, chunk->skb); /* RFC 2960 5.1 Normal Establishment of an Association * diff --git a/security/security.c b/security/security.c index c88167a414b4..779a9edea0a0 100644 --- a/security/security.c +++ b/security/security.c @@ -2388,6 +2388,13 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, } EXPORT_SYMBOL(security_sctp_sk_clone); +void security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + call_void_hook(sctp_assoc_established, asoc, skb); +} +EXPORT_SYMBOL(security_sctp_assoc_established); + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND From e7310c94024cdf099c0d29e6903dd6fe9205bb60 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 2 Nov 2021 08:02:50 -0400 Subject: [PATCH 225/433] security: implement sctp_assoc_established hook in selinux Different from selinux_inet_conn_established(), it also gives the secid to asoc->peer_secid in selinux_sctp_assoc_established(), as one UDP-type socket may have more than one asocs. Note that peer_secid in asoc will save the peer secid for this asoc connection, and peer_sid in sksec will just keep the peer secid for the latest connection. So the right use should be do peeloff for UDP-type socket if there will be multiple asocs in one socket, so that the peeloff socket has the right label for its asoc. v1->v2: - call selinux_inet_conn_established() to reduce some code duplication in selinux_sctp_assoc_established(), as Ondrej suggested. - when doing peeloff, it calls sock_create() where it actually gets secid for socket from socket_sockcreate_sid(). So reuse SECSID_WILD to ensure the peeloff socket keeps using that secid after calling selinux_sctp_sk_clone() for client side. Fixes: 72e89f50084c ("security: Add support for SCTP security hooks") Reported-by: Prashanth Prahlad Reviewed-by: Richard Haines Tested-by: Richard Haines Signed-off-by: Xin Long Signed-off-by: David S. Miller --- security/selinux/hooks.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 62d30c0a30c2..5e5215fe2e83 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5502,7 +5502,8 @@ static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk if (!selinux_policycap_extsockclass()) return selinux_sk_clone_security(sk, newsk); - newsksec->sid = asoc->secid; + if (asoc->secid != SECSID_WILD) + newsksec->sid = asoc->secid; newsksec->peer_sid = asoc->peer_secid; newsksec->sclass = sksec->sclass; selinux_netlbl_sctp_sk_clone(sk, newsk); @@ -5558,6 +5559,16 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); } +static void selinux_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + struct sk_security_struct *sksec = asoc->base.sk->sk_security; + + selinux_inet_conn_established(asoc->base.sk, skb); + asoc->peer_secid = sksec->peer_sid; + asoc->secid = SECSID_WILD; +} + static int selinux_secmark_relabel_packet(u32 sid) { const struct task_security_struct *__tsec; @@ -7228,6 +7239,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(sctp_assoc_request, selinux_sctp_assoc_request), LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone), LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect), + LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established), LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request), LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone), LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established), From aedddb4e45b34426cfbfa84454b6f203712733c5 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 2 Nov 2021 16:10:21 +0800 Subject: [PATCH 226/433] NFC: add necessary privilege flags in netlink layer The CAP_NET_ADMIN checks are needed to prevent attackers faking a device under NCIUARTSETDRIVER and exploit privileged commands. This patch add GENL_ADMIN_PERM flags in genl_ops to fulfill the check. Except for commands like NFC_CMD_GET_DEVICE, NFC_CMD_GET_TARGET, NFC_CMD_LLC_GET_PARAMS, and NFC_CMD_GET_SE, which are mainly information- read operations. Signed-off-by: Lin Ma Signed-off-by: David S. Miller --- net/nfc/netlink.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 49089c50872e..334f63c9529e 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1664,31 +1664,37 @@ static const struct genl_ops nfc_genl_ops[] = { .cmd = NFC_CMD_DEV_UP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_up, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEV_DOWN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_down, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_START_POLL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_start_poll, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_STOP_POLL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_stop_poll, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEP_LINK_UP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_up, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEP_LINK_DOWN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_down, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_GET_TARGET, @@ -1706,26 +1712,31 @@ static const struct genl_ops nfc_genl_ops[] = { .cmd = NFC_CMD_LLC_SET_PARAMS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_set_params, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_LLC_SDREQ, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_sdreq, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_FW_DOWNLOAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_fw_download, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_ENABLE_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_enable_se, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DISABLE_SE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_disable_se, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_GET_SE, @@ -1737,21 +1748,25 @@ static const struct genl_ops nfc_genl_ops[] = { .cmd = NFC_CMD_SE_IO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_se_io, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_ACTIVATE_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_activate_target, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_VENDOR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_vendor_cmd, + .flags = GENL_ADMIN_PERM, }, { .cmd = NFC_CMD_DEACTIVATE_TARGET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_deactivate_target, + .flags = GENL_ADMIN_PERM, }, }; From acaea0d5a63406c052444ad3a7cb54241adaf805 Mon Sep 17 00:00:00 2001 From: Zhang Mingyu Date: Wed, 3 Nov 2021 06:46:17 +0000 Subject: [PATCH 227/433] net:ipv6:Remove unneeded semicolon Eliminate the following coccinelle check warning: net/ipv6/seg6.c:381:2-3 Reported-by: Zeal Robot Signed-off-by: Zhang Mingyu Signed-off-by: David S. Miller --- net/ipv6/seg6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 5daa1c3ed83b..a8b5784afb1a 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -378,7 +378,7 @@ static int __net_init seg6_net_init(struct net *net) kfree(rcu_dereference_raw(sdata->tun_src)); kfree(sdata); return -ENOMEM; - }; + } #endif return 0; From 9b65b17db72313b7a4fe9bc9502928c88be57986 Mon Sep 17 00:00:00 2001 From: Talal Ahmad Date: Tue, 2 Nov 2021 22:58:44 -0400 Subject: [PATCH 228/433] net: avoid double accounting for pure zerocopy skbs Track skbs containing only zerocopy data and avoid charging them to kernel memory to correctly account the memory utilization for msg_zerocopy. All of the data in such skbs is held in user pages which are already accounted to user. Before this change, they are charged again in kernel in __zerocopy_sg_from_iter. The charging in kernel is excessive because data is not being copied into skb frags. This excessive charging can lead to kernel going into memory pressure state which impacts all sockets in the system adversely. Mark pure zerocopy skbs with a SKBFL_PURE_ZEROCOPY flag and remove charge/uncharge for data in such skbs. Initially, an skb is marked pure zerocopy when it is empty and in zerocopy path. skb can then change from a pure zerocopy skb to mixed data skb (zerocopy and copy data) if it is at tail of write queue and there is room available in it and non-zerocopy data is being sent in the next sendmsg call. At this time sk_mem_charge is done for the pure zerocopied data and the pure zerocopy flag is unmarked. We found that this happens very rarely on workloads that pass MSG_ZEROCOPY. A pure zerocopy skb can later be coalesced into normal skb if they are next to each other in queue but this patch prevents coalescing from happening. This avoids complexity of charging when skb downgrades from pure zerocopy to mixed. This is also rare. In sk_wmem_free_skb, if it is a pure zerocopy skb, an sk_mem_uncharge for SKB_TRUESIZE(skb_end_offset(skb)) is done for sk_mem_charge in tcp_skb_entail for an skb without data. Testing with the msg_zerocopy.c benchmark between two hosts(100G nics) with zerocopy showed that before this patch the 'sock' variable in memory.stat for cgroup2 that tracks sum of sk_forward_alloc, sk_rmem_alloc and sk_wmem_queued is around 1822720 and with this change it is 0. This is due to no charge to sk_forward_alloc for zerocopy data and shows memory utilization for kernel is lowered. With this commit we don't see the warning we saw in previous commit which resulted in commit 84882cf72cd774cf16fd338bdbf00f69ac9f9194. Signed-off-by: Talal Ahmad Acked-by: Arjun Roy Acked-by: Soheil Hassas Yeganeh Signed-off-by: Willem de Bruijn Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 19 ++++++++++++++++++- include/net/tcp.h | 8 ++++++-- net/core/datagram.c | 3 ++- net/core/skbuff.c | 3 ++- net/ipv4/tcp.c | 22 ++++++++++++++++++++-- net/ipv4/tcp_output.c | 7 +++++-- 6 files changed, 53 insertions(+), 9 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a63e13082397..686a666d073d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -454,9 +454,15 @@ enum { * all frags to avoid possible bad checksum */ SKBFL_SHARED_FRAG = BIT(1), + + /* segment contains only zerocopy data and should not be + * charged to the kernel memory. + */ + SKBFL_PURE_ZEROCOPY = BIT(2), }; #define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG) +#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY) /* * The callback notifies userspace to release buffers when skb DMA is done in @@ -1464,6 +1470,17 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) return is_zcopy ? skb_uarg(skb) : NULL; } +static inline bool skb_zcopy_pure(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY; +} + +static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1, + const struct sk_buff *skb2) +{ + return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2); +} + static inline void net_zcopy_get(struct ubuf_info *uarg) { refcount_inc(&uarg->refcnt); @@ -1528,7 +1545,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) if (!skb_zcopy_is_nouarg(skb)) uarg->callback(skb, uarg, zerocopy_success); - skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG; + skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; } } diff --git a/include/net/tcp.h b/include/net/tcp.h index 70972f3ac8fa..4da22b41bde6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -293,7 +293,10 @@ static inline bool tcp_out_of_memory(struct sock *sk) static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sk_wmem_queued_add(sk, -skb->truesize); - sk_mem_uncharge(sk, skb->truesize); + if (!skb_zcopy_pure(skb)) + sk_mem_uncharge(sk, skb->truesize); + else + sk_mem_uncharge(sk, SKB_TRUESIZE(skb_end_offset(skb))); __kfree_skb(skb); } @@ -974,7 +977,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { return likely(tcp_skb_can_collapse_to(to) && - mptcp_skb_can_collapse(to, from)); + mptcp_skb_can_collapse(to, from) && + skb_pure_zcopy_same(to, from)); } /* Events passed to congestion control interface */ diff --git a/net/core/datagram.c b/net/core/datagram.c index 15ab9ffb27fe..ee290776c661 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -646,7 +646,8 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, skb->truesize += truesize; if (sk && sk->sk_type == SOCK_STREAM) { sk_wmem_queued_add(sk, truesize); - sk_mem_charge(sk, truesize); + if (!skb_zcopy_pure(skb)) + sk_mem_charge(sk, truesize); } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3ec42cdee16a..ba2f38246f07 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3433,8 +3433,9 @@ static inline void skb_split_no_header(struct sk_buff *skb, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); + const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY; - skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; + skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags; skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bc7f419184aa..b461ae573afc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -863,6 +863,7 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, if (likely(skb)) { bool mem_scheduled; + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); if (force_schedule) { mem_scheduled = true; sk_forced_mem_schedule(sk, skb->truesize); @@ -1319,6 +1320,15 @@ new_segment: copy = min_t(int, copy, pfrag->size - pfrag->offset); + /* skb changing from pure zc to mixed, must charge zc */ + if (unlikely(skb_zcopy_pure(skb))) { + if (!sk_wmem_schedule(sk, skb->data_len)) + goto wait_for_space; + + sk_mem_charge(sk, skb->data_len); + skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; + } + if (!sk_wmem_schedule(sk, copy)) goto wait_for_space; @@ -1339,8 +1349,16 @@ new_segment: } pfrag->offset += copy; } else { - if (!sk_wmem_schedule(sk, copy)) - goto wait_for_space; + /* First append to a fragless skb builds initial + * pure zerocopy skb + */ + if (!skb->len) + skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY; + + if (!skb_zcopy_pure(skb)) { + if (!sk_wmem_schedule(sk, copy)) + goto wait_for_space; + } err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); if (err == -EMSGSIZE || err == -EEXIST) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 76cc1641beb4..6f7860e283c6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1677,7 +1677,8 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (delta_truesize) { skb->truesize -= delta_truesize; sk_wmem_queued_add(sk, -delta_truesize); - sk_mem_uncharge(sk, delta_truesize); + if (!skb_zcopy_pure(skb)) + sk_mem_uncharge(sk, delta_truesize); } /* Any change of skb->len requires recalculation of tso factor. */ @@ -2295,7 +2296,9 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb)) + if (unlikely(TCP_SKB_CB(skb)->eor) || + tcp_has_tx_tstamp(skb) || + !skb_pure_zcopy_same(skb, next)) return false; len -= skb->len; From 1aabe578dd86e9f2867c4db4fba9a15f4ba1825d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Nov 2021 15:02:36 -0700 Subject: [PATCH 229/433] ethtool: fix ethtool msg len calculation for pause stats ETHTOOL_A_PAUSE_STAT_MAX is the MAX attribute id, so we need to subtract non-stats and add one to get a count (IOW -2+1 == -1). Otherwise we'll see: ethnl cmd 21: calculated reply length 40, but consumed 52 Fixes: 9a27a33027f2 ("ethtool: add standard pause stats") Signed-off-by: Jakub Kicinski Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/ethtool_netlink.h | 3 +++ include/uapi/linux/ethtool_netlink.h | 4 +++- net/ethtool/pause.c | 3 +-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index 1e7bf78cb382..aba348d58ff6 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -10,6 +10,9 @@ #define __ETHTOOL_LINK_MODE_MASK_NWORDS \ DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) +#define ETHTOOL_PAUSE_STAT_CNT (__ETHTOOL_A_PAUSE_STAT_CNT - \ + ETHTOOL_A_PAUSE_STAT_TX_FRAMES) + enum ethtool_multicast_groups { ETHNL_MCGRP_MONITOR, }; diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index ca5fbb59fa42..999777d32dcf 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -411,7 +411,9 @@ enum { ETHTOOL_A_PAUSE_STAT_TX_FRAMES, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - /* add new constants above here */ + /* add new constants above here + * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! + */ __ETHTOOL_A_PAUSE_STAT_CNT, ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) }; diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index 9009f412151e..ee1e5806bc93 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -56,8 +56,7 @@ static int pause_reply_size(const struct ethnl_req_info *req_base, if (req_base->flags & ETHTOOL_FLAG_STATS) n += nla_total_size(0) + /* _PAUSE_STATS */ - nla_total_size_64bit(sizeof(u64)) * - (ETHTOOL_A_PAUSE_STAT_MAX - 2); + nla_total_size_64bit(sizeof(u64)) * ETHTOOL_PAUSE_STAT_CNT; return n; } From 576acc259146af848cec0940f573f7125a116b9f Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 3 Nov 2021 06:24:40 -0400 Subject: [PATCH 230/433] nfs4: take a reference on the nfs_client when running FREE_STATEID During umount, the session slot tables are freed. If there are outstanding FREE_STATEID tasks, a use-after-free and slab corruption can occur when rpc_exit_task calls rpc_call_done -> nfs41_sequence_done -> nfs4_sequence_process/nfs41_sequence_free_slot. Prevent that from happening by taking a reference on the nfs_client in nfs41_free_stateid and putting it in nfs41_free_stateid_release. Signed-off-by: Scott Mayhew Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1c94f54cab58..127388fabda8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10115,6 +10115,10 @@ static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata) static void nfs41_free_stateid_release(void *calldata) { + struct nfs_free_stateid_data *data = calldata; + struct nfs_client *clp = data->server->nfs_client; + + nfs_put_client(clp); kfree(calldata); } @@ -10151,6 +10155,10 @@ static int nfs41_free_stateid(struct nfs_server *server, }; struct nfs_free_stateid_data *data; struct rpc_task *task; + struct nfs_client *clp = server->nfs_client; + + if (!refcount_inc_not_zero(&clp->cl_count)) + return -EIO; nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, &task_setup.rpc_client, &msg); From 250962e4684678629afd2feeaefdc40c5db501f4 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 3 Nov 2021 16:28:43 +0800 Subject: [PATCH 231/433] net: udp6: replace __UDP_INC_STATS() with __UDP6_INC_STATS() __UDP_INC_STATS() is used in udpv6_queue_rcv_one_skb() when encap_rcv() fails. __UDP6_INC_STATS() should be used here, so replace it with __UDP6_INC_STATS(). Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- net/ipv6/udp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 12c12619ee35..e43b31d25fb6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -700,9 +700,9 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - __UDP_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP6_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); return -ret; } } From 4330fe35b8213e92ff51907b4cb6323be943a9ad Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 2 Nov 2021 16:01:56 -0600 Subject: [PATCH 232/433] nfs: remove unused header Commit 19fcae3d4f2dd ("scsi: remove the SCSI OSD library") deleted the last file that included but left that file behind. It's unused, get rid of it now. Cc: Christoph Hellwig Cc: Trond Myklebust Cc: Anna Schumaker Cc: linux-nfs@vger.kernel.org Signed-off-by: Jonathan Corbet Signed-off-by: Trond Myklebust --- include/linux/pnfs_osd_xdr.h | 317 ----------------------------------- 1 file changed, 317 deletions(-) delete mode 100644 include/linux/pnfs_osd_xdr.h diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h deleted file mode 100644 index 17d7d0d20eca..000000000000 --- a/include/linux/pnfs_osd_xdr.h +++ /dev/null @@ -1,317 +0,0 @@ -/* - * pNFS-osd on-the-wire data structures - * - * Copyright (C) 2007 Panasas Inc. [year of first publication] - * All rights reserved. - * - * Benny Halevy - * Boaz Harrosh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * See the file COPYING included with this distribution for more details. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Panasas company nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __PNFS_OSD_XDR_H__ -#define __PNFS_OSD_XDR_H__ - -#include - -/* - * draft-ietf-nfsv4-minorversion-22 - * draft-ietf-nfsv4-pnfs-obj-12 - */ - -/* Layout Structure */ - -enum pnfs_osd_raid_algorithm4 { - PNFS_OSD_RAID_0 = 1, - PNFS_OSD_RAID_4 = 2, - PNFS_OSD_RAID_5 = 3, - PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ -}; - -/* struct pnfs_osd_data_map4 { - * uint32_t odm_num_comps; - * length4 odm_stripe_unit; - * uint32_t odm_group_width; - * uint32_t odm_group_depth; - * uint32_t odm_mirror_cnt; - * pnfs_osd_raid_algorithm4 odm_raid_algorithm; - * }; - */ -struct pnfs_osd_data_map { - u32 odm_num_comps; - u64 odm_stripe_unit; - u32 odm_group_width; - u32 odm_group_depth; - u32 odm_mirror_cnt; - u32 odm_raid_algorithm; -}; - -/* struct pnfs_osd_objid4 { - * deviceid4 oid_device_id; - * uint64_t oid_partition_id; - * uint64_t oid_object_id; - * }; - */ -struct pnfs_osd_objid { - struct nfs4_deviceid oid_device_id; - u64 oid_partition_id; - u64 oid_object_id; -}; - -/* For printout. I use: - * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer)); - * BE style - */ -#define _DEVID_LO(oid_device_id) \ - (unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data) - -#define _DEVID_HI(oid_device_id) \ - (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) - -enum pnfs_osd_version { - PNFS_OSD_MISSING = 0, - PNFS_OSD_VERSION_1 = 1, - PNFS_OSD_VERSION_2 = 2 -}; - -struct pnfs_osd_opaque_cred { - u32 cred_len; - void *cred; -}; - -enum pnfs_osd_cap_key_sec { - PNFS_OSD_CAP_KEY_SEC_NONE = 0, - PNFS_OSD_CAP_KEY_SEC_SSV = 1, -}; - -/* struct pnfs_osd_object_cred4 { - * pnfs_osd_objid4 oc_object_id; - * pnfs_osd_version4 oc_osd_version; - * pnfs_osd_cap_key_sec4 oc_cap_key_sec; - * opaque oc_capability_key<>; - * opaque oc_capability<>; - * }; - */ -struct pnfs_osd_object_cred { - struct pnfs_osd_objid oc_object_id; - u32 oc_osd_version; - u32 oc_cap_key_sec; - struct pnfs_osd_opaque_cred oc_cap_key; - struct pnfs_osd_opaque_cred oc_cap; -}; - -/* struct pnfs_osd_layout4 { - * pnfs_osd_data_map4 olo_map; - * uint32_t olo_comps_index; - * pnfs_osd_object_cred4 olo_components<>; - * }; - */ -struct pnfs_osd_layout { - struct pnfs_osd_data_map olo_map; - u32 olo_comps_index; - u32 olo_num_comps; - struct pnfs_osd_object_cred *olo_comps; -}; - -/* Device Address */ -enum pnfs_osd_targetid_type { - OBJ_TARGET_ANON = 1, - OBJ_TARGET_SCSI_NAME = 2, - OBJ_TARGET_SCSI_DEVICE_ID = 3, -}; - -/* union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) { - * case OBJ_TARGET_SCSI_NAME: - * string oti_scsi_name<>; - * - * case OBJ_TARGET_SCSI_DEVICE_ID: - * opaque oti_scsi_device_id<>; - * - * default: - * void; - * }; - * - * union pnfs_osd_targetaddr4 switch (bool ota_available) { - * case TRUE: - * netaddr4 ota_netaddr; - * case FALSE: - * void; - * }; - * - * struct pnfs_osd_deviceaddr4 { - * pnfs_osd_targetid4 oda_targetid; - * pnfs_osd_targetaddr4 oda_targetaddr; - * uint64_t oda_lun; - * opaque oda_systemid<>; - * pnfs_osd_object_cred4 oda_root_obj_cred; - * opaque oda_osdname<>; - * }; - */ -struct pnfs_osd_targetid { - u32 oti_type; - struct nfs4_string oti_scsi_device_id; -}; - -/* struct netaddr4 { - * // see struct rpcb in RFC1833 - * string r_netid<>; // network id - * string r_addr<>; // universal address - * }; - */ -struct pnfs_osd_net_addr { - struct nfs4_string r_netid; - struct nfs4_string r_addr; -}; - -struct pnfs_osd_targetaddr { - u32 ota_available; - struct pnfs_osd_net_addr ota_netaddr; -}; - -struct pnfs_osd_deviceaddr { - struct pnfs_osd_targetid oda_targetid; - struct pnfs_osd_targetaddr oda_targetaddr; - u8 oda_lun[8]; - struct nfs4_string oda_systemid; - struct pnfs_osd_object_cred oda_root_obj_cred; - struct nfs4_string oda_osdname; -}; - -/* LAYOUTCOMMIT: layoutupdate */ - -/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { - * case TRUE: - * int64_t dsu_delta; - * case FALSE: - * void; - * }; - * - * struct pnfs_osd_layoutupdate4 { - * pnfs_osd_deltaspaceused4 olu_delta_space_used; - * bool olu_ioerr_flag; - * }; - */ -struct pnfs_osd_layoutupdate { - u32 dsu_valid; - s64 dsu_delta; - u32 olu_ioerr_flag; -}; - -/* LAYOUTRETURN: I/O Rrror Report */ - -enum pnfs_osd_errno { - PNFS_OSD_ERR_EIO = 1, - PNFS_OSD_ERR_NOT_FOUND = 2, - PNFS_OSD_ERR_NO_SPACE = 3, - PNFS_OSD_ERR_BAD_CRED = 4, - PNFS_OSD_ERR_NO_ACCESS = 5, - PNFS_OSD_ERR_UNREACHABLE = 6, - PNFS_OSD_ERR_RESOURCE = 7 -}; - -/* struct pnfs_osd_ioerr4 { - * pnfs_osd_objid4 oer_component; - * length4 oer_comp_offset; - * length4 oer_comp_length; - * bool oer_iswrite; - * pnfs_osd_errno4 oer_errno; - * }; - */ -struct pnfs_osd_ioerr { - struct pnfs_osd_objid oer_component; - u64 oer_comp_offset; - u64 oer_comp_length; - u32 oer_iswrite; - u32 oer_errno; -}; - -/* OSD XDR Client API */ -/* Layout helpers */ -/* Layout decoding is done in two parts: - * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part - * of the layout. @iter members need not be initialized. - * Returned: - * @layout members are set. (@layout->olo_comps set to NULL). - * - * Zero on success, or negative error if passed xdr is broken. - * - * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns - * false, to decode the next component. - * Returned: - * true if there is more to decode or false if we are done or error. - * - * Example: - * struct pnfs_osd_xdr_decode_layout_iter iter; - * struct pnfs_osd_layout layout; - * struct pnfs_osd_object_cred comp; - * int status; - * - * status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); - * if (unlikely(status)) - * goto err; - * while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) { - * // All of @comp strings point to inside the xdr_buffer - * // or scrach buffer. Copy them out to user memory eg. - * copy_single_comp(dest_comp++, &comp); - * } - * if (unlikely(status)) - * goto err; - */ - -struct pnfs_osd_xdr_decode_layout_iter { - unsigned total_comps; - unsigned decoded_comps; -}; - -extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, - struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr); - -extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, - struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, - int *err); - -/* Device Info helpers */ - -/* Note: All strings inside @deviceaddr point to space inside @p. - * @p should stay valid while @deviceaddr is in use. - */ -extern void pnfs_osd_xdr_decode_deviceaddr( - struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p); - -/* layoutupdate (layout_commit) xdr helpers */ -extern int -pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, - struct pnfs_osd_layoutupdate *lou); - -/* osd_ioerror encoding (layout_return) */ -extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); -extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); - -#endif /* __PNFS_OSD_XDR_H__ */ From 563bcbae3ba233c275c244bfce2efe12938f5363 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Tue, 2 Nov 2021 10:12:18 +0800 Subject: [PATCH 233/433] net: vlan: fix a UAF in vlan_dev_real_dev() The real_dev of a vlan net_device may be freed after unregister_vlan_dev(). Access the real_dev continually by vlan_dev_real_dev() will trigger the UAF problem for the real_dev like following: ================================================================== BUG: KASAN: use-after-free in vlan_dev_real_dev+0xf9/0x120 Call Trace: kasan_report.cold+0x83/0xdf vlan_dev_real_dev+0xf9/0x120 is_eth_port_of_netdev_filter.part.0+0xb1/0x2c0 is_eth_port_of_netdev_filter+0x28/0x40 ib_enum_roce_netdev+0x1a3/0x300 ib_enum_all_roce_netdevs+0xc7/0x140 netdevice_event_work_handler+0x9d/0x210 ... Freed by task 9288: kasan_save_stack+0x1b/0x40 kasan_set_track+0x1c/0x30 kasan_set_free_info+0x20/0x30 __kasan_slab_free+0xfc/0x130 slab_free_freelist_hook+0xdd/0x240 kfree+0xe4/0x690 kvfree+0x42/0x50 device_release+0x9f/0x240 kobject_put+0x1c8/0x530 put_device+0x1b/0x30 free_netdev+0x370/0x540 ppp_destroy_interface+0x313/0x3d0 ... Move the put_device(real_dev) to vlan_dev_free(). Ensure real_dev not be freed before vlan_dev unregistered. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+e4df4e1389e28972e955@syzkaller.appspotmail.com Signed-off-by: Ziyang Xuan Reviewed-by: Jason Gunthorpe Signed-off-by: David S. Miller --- net/8021q/vlan.c | 3 --- net/8021q/vlan_dev.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 55275ef9a31a..a3a0a5e994f5 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -123,9 +123,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) } vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); - - /* Get rid of the vlan's reference to real_dev */ - dev_put(real_dev); } int vlan_check_real_dev(struct net_device *real_dev, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 90330b893134..ab6dee28536d 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -843,6 +843,9 @@ static void vlan_dev_free(struct net_device *dev) free_percpu(vlan->vlan_pcpu_stats); vlan->vlan_pcpu_stats = NULL; + + /* Get rid of the vlan's reference to real_dev */ + dev_put(vlan->real_dev); } void vlan_setup(struct net_device *dev) From 5f15d392dcb4aa250a63d6f2c5adfc26c0aedc78 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Tue, 2 Nov 2021 19:30:41 +0100 Subject: [PATCH 234/433] net: dsa: qca8k: make sure PAD0 MAC06 exchange is disabled Some device set MAC06 exchange in the bootloader. This cause some problem as we don't support this strange mode and we just set the port6 as the primary CPU port. With MAC06 exchange, PAD0 reg configure port6 instead of port0. Add an extra check and explicitly disable MAC06 exchange to correctly configure the port PAD config. Signed-off-by: Ansuel Smith Fixes: 3fcf734aa482 ("net: dsa: qca8k: add support for cpu port 6") Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 8 ++++++++ drivers/net/dsa/qca8k.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index ea7f12778922..a429c9750add 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1109,6 +1109,14 @@ qca8k_setup(struct dsa_switch *ds) if (ret) return ret; + /* Make sure MAC06 is disabled */ + ret = qca8k_reg_clear(priv, QCA8K_REG_PORT0_PAD_CTRL, + QCA8K_PORT0_PAD_MAC06_EXCHANGE_EN); + if (ret) { + dev_err(priv->dev, "failed disabling MAC06 exchange"); + return ret; + } + /* Enable CPU Port */ ret = qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0, QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index e10571a398c9..128b8cf85e08 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -34,6 +34,7 @@ #define QCA8K_MASK_CTRL_DEVICE_ID_MASK GENMASK(15, 8) #define QCA8K_MASK_CTRL_DEVICE_ID(x) ((x) >> 8) #define QCA8K_REG_PORT0_PAD_CTRL 0x004 +#define QCA8K_PORT0_PAD_MAC06_EXCHANGE_EN BIT(31) #define QCA8K_PORT0_PAD_SGMII_RXCLK_FALLING_EDGE BIT(19) #define QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE BIT(18) #define QCA8K_REG_PORT5_PAD_CTRL 0x008 From 92f62485b3715882cd397b0cbd80a96d179b86d6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 2 Nov 2021 21:31:22 +0200 Subject: [PATCH 235/433] net: dsa: felix: fix broken VLAN-tagged PTP under VLAN-aware bridge Normally it is expected that the dsa_device_ops :: rcv() method finishes parsing the DSA tag and consumes it, then never looks at it again. But commit c0bcf537667c ("net: dsa: ocelot: add hardware timestamping support for Felix") added support for RX timestamping in a very unconventional way. On this switch, a partial timestamp is available in the DSA header, but the driver got away with not parsing that timestamp right away, but instead delayed that parsing for a little longer: dsa_switch_rcv(): nskb = cpu_dp->rcv(skb, dev); <------------- not here -> ocelot_rcv() ... skb = nskb; skb_push(skb, ETH_HLEN); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); ... if (dsa_skb_defer_rx_timestamp(p, skb)) <--- but here -> felix_rxtstamp() return 0; When in felix_rxtstamp(), this driver accounted for the fact that eth_type_trans() happened in the meanwhile, so it got a hold of the extraction header again by subtracting (ETH_HLEN + OCELOT_TAG_LEN) bytes from the current skb->data. This worked for quite some time but was quite fragile from the very beginning. Not to mention that having DSA tag parsing split in two different files, under different folders (net/dsa/tag_ocelot.c vs drivers/net/dsa/ocelot/felix.c) made it quite non-obvious for patches to come that they might break this. Finally, the blamed commit does the following: at the end of ocelot_rcv(), it checks whether the skb payload contains a VLAN header. If it does, and this port is under a VLAN-aware bridge, that VLAN ID might not be correct in the sense that the packet might have suffered VLAN rewriting due to TCAM rules (VCAP IS1). So we consume the VLAN ID from the skb payload using __skb_vlan_pop(), and take the classified VLAN ID from the DSA tag, and construct a hwaccel VLAN tag with the classified VLAN, and the skb payload is VLAN-untagged. The big problem is that __skb_vlan_pop() does: memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); __skb_pull(skb, VLAN_HLEN); aka it moves the Ethernet header 4 bytes to the right, and pulls 4 bytes from the skb headroom (effectively also moving skb->data, by definition). So for felix_rxtstamp()'s fragile logic, all bets are off now. Instead of having the "extraction" pointer point to the DSA header, it actually points to 4 bytes _inside_ the extraction header. Corollary, the last 4 bytes of the "extraction" header are in fact 4 stale bytes of the destination MAC address from the Ethernet header, from prior to the __skb_vlan_pop() movement. So of course, RX timestamps are completely bogus when the system is configured in this way. The fix is actually very simple: just don't structure the code like that. For better or worse, the DSA PTP timestamping API does not offer a straightforward way for drivers to present their RX timestamps, but other drivers (sja1105) have established a simple mechanism to carry their RX timestamp from dsa_device_ops :: rcv() all the way to dsa_switch_ops :: port_rxtstamp() and even later. That mechanism is to simply save the partial timestamp to the skb->cb, and complete it later. Question: why don't we simply populate the skb's struct skb_shared_hwtstamps from ocelot_rcv(), and bother with this complication of propagating the timestamp to felix_rxtstamp()? Answer: dsa_switch_ops :: port_rxtstamp() answers the question whether PTP packets need sleepable context to retrieve the full RX timestamp. Currently felix_rxtstamp() answers "no, thanks" to that question, and calls ocelot_ptp_gettime64() from softirq atomic context. This is understandable, since Felix VSC9959 is a PCIe memory-mapped switch, so hardware access does not require sleeping. But the felix driver is preparing for the introduction of other switches where hardware access is over a slow bus like SPI or MDIO: https://lore.kernel.org/lkml/20210814025003.2449143-1-colin.foster@in-advantage.com/ So I would like to keep this code structure, so the rework needed when that driver will need PTP support will be minimal (answer "yes, I need deferred context for this skb's RX timestamp", then the partial timestamp will still be found in the skb->cb. Fixes: ea440cd2d9b2 ("net: dsa: tag_ocelot: use VLAN information from tagging header when available") Reported-by: Po Liu Cc: Yangbo Lu Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 9 +++------ include/linux/dsa/ocelot.h | 1 + net/dsa/tag_ocelot.c | 3 +++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 83808e7dbdda..327cc4654806 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1370,12 +1370,12 @@ out: static bool felix_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb, unsigned int type) { - u8 *extraction = skb->data - ETH_HLEN - OCELOT_TAG_LEN; + u32 tstamp_lo = OCELOT_SKB_CB(skb)->tstamp_lo; struct skb_shared_hwtstamps *shhwtstamps; struct ocelot *ocelot = ds->priv; - u32 tstamp_lo, tstamp_hi; struct timespec64 ts; - u64 tstamp, val; + u32 tstamp_hi; + u64 tstamp; /* If the "no XTR IRQ" workaround is in use, tell DSA to defer this skb * for RX timestamping. Then free it, and poll for its copy through @@ -1390,9 +1390,6 @@ static bool felix_rxtstamp(struct dsa_switch *ds, int port, ocelot_ptp_gettime64(&ocelot->ptp_info, &ts); tstamp = ktime_set(ts.tv_sec, ts.tv_nsec); - ocelot_xfh_get_rew_val(extraction, &val); - tstamp_lo = (u32)val; - tstamp_hi = tstamp >> 32; if ((tstamp & 0xffffffff) < tstamp_lo) tstamp_hi--; diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index d42010cf5468..7ee708ad7df2 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -12,6 +12,7 @@ struct ocelot_skb_cb { struct sk_buff *clone; unsigned int ptp_class; /* valid only for clones */ + u32 tstamp_lo; u8 ptp_cmd; u8 ts_id; }; diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index cd60b94fc175..de1c849a0a70 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -101,6 +101,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, struct dsa_port *dp; u8 *extraction; u16 vlan_tpid; + u64 rew_val; /* Revert skb->data by the amount consumed by the DSA master, * so it points to the beginning of the frame. @@ -130,6 +131,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, ocelot_xfh_get_qos_class(extraction, &qos_class); ocelot_xfh_get_tag_type(extraction, &tag_type); ocelot_xfh_get_vlan_tci(extraction, &vlan_tci); + ocelot_xfh_get_rew_val(extraction, &rew_val); skb->dev = dsa_master_find_slave(netdev, 0, src_port); if (!skb->dev) @@ -143,6 +145,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, dsa_default_offload_fwd_mark(skb); skb->priority = qos_class; + OCELOT_SKB_CB(skb)->tstamp_lo = rew_val; /* Ocelot switches copy frames unmodified to the CPU. However, it is * possible for the user to request a VLAN modification through From 6429e46304ac7820eebbea2bf5d73b90c18e0e06 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 28 Oct 2021 10:47:21 +0100 Subject: [PATCH 236/433] libfs: Move shmem_exchange to simple_rename_exchange Move shmem_exchange and make it available to other callers. Suggested-by: Miklos Szeredi Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Acked-by: Miklos Szeredi Cc: Al Viro Cc: Christian Brauner Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/bpf/20211028094724.59043-2-lmb@cloudflare.com --- fs/libfs.c | 24 ++++++++++++++++++++++++ include/linux/fs.h | 2 ++ mm/shmem.c | 24 +----------------------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 51b4de3b3447..1cf144dc9ed2 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -448,6 +448,30 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) } EXPORT_SYMBOL(simple_rmdir); +int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + bool old_is_dir = d_is_dir(old_dentry); + bool new_is_dir = d_is_dir(new_dentry); + + if (old_dir != new_dir && old_is_dir != new_is_dir) { + if (old_is_dir) { + drop_nlink(old_dir); + inc_nlink(new_dir); + } else { + drop_nlink(new_dir); + inc_nlink(old_dir); + } + } + old_dir->i_ctime = old_dir->i_mtime = + new_dir->i_ctime = new_dir->i_mtime = + d_inode(old_dentry)->i_ctime = + d_inode(new_dentry)->i_ctime = current_time(old_dir); + + return 0; +} +EXPORT_SYMBOL_GPL(simple_rename_exchange); + int simple_rename(struct user_namespace *mnt_userns, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) diff --git a/include/linux/fs.h b/include/linux/fs.h index f3cfca5edc9a..bc4e97b82ddd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3383,6 +3383,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); +extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct user_namespace *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); diff --git a/mm/shmem.c b/mm/shmem.c index 17e344e26e73..56616aabe0a1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2947,28 +2947,6 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry) return shmem_unlink(dir, dentry); } -static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) -{ - bool old_is_dir = d_is_dir(old_dentry); - bool new_is_dir = d_is_dir(new_dentry); - - if (old_dir != new_dir && old_is_dir != new_is_dir) { - if (old_is_dir) { - drop_nlink(old_dir); - inc_nlink(new_dir); - } else { - drop_nlink(new_dir); - inc_nlink(old_dir); - } - } - old_dir->i_ctime = old_dir->i_mtime = - new_dir->i_ctime = new_dir->i_mtime = - d_inode(old_dentry)->i_ctime = - d_inode(new_dentry)->i_ctime = current_time(old_dir); - - return 0; -} - static int shmem_whiteout(struct user_namespace *mnt_userns, struct inode *old_dir, struct dentry *old_dentry) { @@ -3014,7 +2992,7 @@ static int shmem_rename2(struct user_namespace *mnt_userns, return -EINVAL; if (flags & RENAME_EXCHANGE) - return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry); + return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); if (!simple_empty(new_dentry)) return -ENOTEMPTY; From 3871cb8cf741dcd8ebaec4f960be9479da2f176b Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 28 Oct 2021 10:47:22 +0100 Subject: [PATCH 237/433] libfs: Support RENAME_EXCHANGE in simple_rename() Allow atomic exchange via RENAME_EXCHANGE when using simple_rename. This affects binderfs, ramfs, hubetlbfs and bpffs. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Acked-by: Christian Brauner Cc: Al Viro Cc: Miklos Szeredi Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/bpf/20211028094724.59043-3-lmb@cloudflare.com --- fs/libfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/libfs.c b/fs/libfs.c index 1cf144dc9ed2..ba7438ab9371 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -479,9 +479,12 @@ int simple_rename(struct user_namespace *mnt_userns, struct inode *old_dir, struct inode *inode = d_inode(old_dentry); int they_are_dirs = d_is_dir(old_dentry); - if (flags & ~RENAME_NOREPLACE) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) return -EINVAL; + if (flags & RENAME_EXCHANGE) + return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); + if (!simple_empty(new_dentry)) return -ENOTEMPTY; From 9fc23c22e5745decc93ba5789bdcf2b093f21145 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 28 Oct 2021 10:47:23 +0100 Subject: [PATCH 238/433] selftests/bpf: Convert test_bpffs to ASSERT macros Remove usage of deprecated CHECK macros. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211028094724.59043-4-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/test_bpffs.c | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c index 172c999e523c..533e3f3a459a 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c @@ -29,43 +29,43 @@ static int read_iter(char *file) static int fn(void) { - int err, duration = 0; + int err; err = unshare(CLONE_NEWNS); - if (CHECK(err, "unshare", "failed: %d\n", errno)) + if (!ASSERT_OK(err, "unshare")) goto out; err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL); - if (CHECK(err, "mount /", "failed: %d\n", errno)) + if (!ASSERT_OK(err, "mount /")) goto out; err = umount(TDIR); - if (CHECK(err, "umount " TDIR, "failed: %d\n", errno)) + if (!ASSERT_OK(err, "umount " TDIR)) goto out; err = mount("none", TDIR, "tmpfs", 0, NULL); - if (CHECK(err, "mount", "mount root failed: %d\n", errno)) + if (!ASSERT_OK(err, "mount tmpfs")) goto out; err = mkdir(TDIR "/fs1", 0777); - if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno)) + if (!ASSERT_OK(err, "mkdir " TDIR "/fs1")) goto out; err = mkdir(TDIR "/fs2", 0777); - if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno)) + if (!ASSERT_OK(err, "mkdir " TDIR "/fs2")) goto out; err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL); - if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno)) + if (!ASSERT_OK(err, "mount bpffs " TDIR "/fs1")) goto out; err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL); - if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno)) + if (!ASSERT_OK(err, "mount bpffs " TDIR "/fs2")) goto out; err = read_iter(TDIR "/fs1/maps.debug"); - if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n")) + if (!ASSERT_OK(err, "reading " TDIR "/fs1/maps.debug")) goto out; err = read_iter(TDIR "/fs2/progs.debug"); - if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n")) + if (!ASSERT_OK(err, "reading " TDIR "/fs2/progs.debug")) goto out; out: umount(TDIR "/fs1"); From 7e5ad817ec297f91a2fa5c423a39a458a4701bca Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 28 Oct 2021 10:47:24 +0100 Subject: [PATCH 239/433] selftests/bpf: Test RENAME_EXCHANGE and RENAME_NOREPLACE on bpffs Add tests to exercise the behaviour of RENAME_EXCHANGE and RENAME_NOREPLACE on bpffs. The former checks that after an exchange the inode of two directories has changed. The latter checks that the source still exists after a failed rename. Generally, having support for renameat2(RENAME_EXCHANGE) in bpffs fixes atomic upgrades of our sk_lookup control plane. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211028094724.59043-5-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/test_bpffs.c | 65 ++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c index 533e3f3a459a..d29ebfeef9c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #define _GNU_SOURCE +#include #include #include #include @@ -29,7 +30,8 @@ static int read_iter(char *file) static int fn(void) { - int err; + struct stat a, b, c; + int err, map; err = unshare(CLONE_NEWNS); if (!ASSERT_OK(err, "unshare")) @@ -67,6 +69,67 @@ static int fn(void) err = read_iter(TDIR "/fs2/progs.debug"); if (!ASSERT_OK(err, "reading " TDIR "/fs2/progs.debug")) goto out; + + err = mkdir(TDIR "/fs1/a", 0777); + if (!ASSERT_OK(err, "creating " TDIR "/fs1/a")) + goto out; + err = mkdir(TDIR "/fs1/a/1", 0777); + if (!ASSERT_OK(err, "creating " TDIR "/fs1/a/1")) + goto out; + err = mkdir(TDIR "/fs1/b", 0777); + if (!ASSERT_OK(err, "creating " TDIR "/fs1/b")) + goto out; + + map = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0); + if (!ASSERT_GT(map, 0, "create_map(ARRAY)")) + goto out; + err = bpf_obj_pin(map, TDIR "/fs1/c"); + if (!ASSERT_OK(err, "pin map")) + goto out; + close(map); + + /* Check that RENAME_EXCHANGE works for directories. */ + err = stat(TDIR "/fs1/a", &a); + if (!ASSERT_OK(err, "stat(" TDIR "/fs1/a)")) + goto out; + err = renameat2(0, TDIR "/fs1/a", 0, TDIR "/fs1/b", RENAME_EXCHANGE); + if (!ASSERT_OK(err, "renameat2(/fs1/a, /fs1/b, RENAME_EXCHANGE)")) + goto out; + err = stat(TDIR "/fs1/b", &b); + if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)")) + goto out; + if (!ASSERT_EQ(a.st_ino, b.st_ino, "b should have a's inode")) + goto out; + err = access(TDIR "/fs1/b/1", F_OK); + if (!ASSERT_OK(err, "access(" TDIR "/fs1/b/1)")) + goto out; + + /* Check that RENAME_EXCHANGE works for mixed file types. */ + err = stat(TDIR "/fs1/c", &c); + if (!ASSERT_OK(err, "stat(" TDIR "/fs1/map)")) + goto out; + err = renameat2(0, TDIR "/fs1/c", 0, TDIR "/fs1/b", RENAME_EXCHANGE); + if (!ASSERT_OK(err, "renameat2(/fs1/c, /fs1/b, RENAME_EXCHANGE)")) + goto out; + err = stat(TDIR "/fs1/b", &b); + if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)")) + goto out; + if (!ASSERT_EQ(c.st_ino, b.st_ino, "b should have c's inode")) + goto out; + err = access(TDIR "/fs1/c/1", F_OK); + if (!ASSERT_OK(err, "access(" TDIR "/fs1/c/1)")) + goto out; + + /* Check that RENAME_NOREPLACE works. */ + err = renameat2(0, TDIR "/fs1/b", 0, TDIR "/fs1/a", RENAME_NOREPLACE); + if (!ASSERT_ERR(err, "renameat2(RENAME_NOREPLACE)")) { + err = -EINVAL; + goto out; + } + err = access(TDIR "/fs1/b", F_OK); + if (!ASSERT_OK(err, "access(" TDIR "/fs1/b)")) + goto out; + out: umount(TDIR "/fs1"); umount(TDIR "/fs2"); From 401a33da3a45cc05859b121314f8ab52c2c01977 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 2 Nov 2021 22:41:13 -0700 Subject: [PATCH 240/433] selftests/bpf: Make netcnt selftests serial to avoid spurious failures When running `./test_progs -j` test_netcnt fails with a very high probability, undercounting number of packets received (9999 vs expected 10000). It seems to be conflicting with other cgroup/skb selftests. So make it serial for now to make parallel mode more robust. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211103054113.2130582-1-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/netcnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c index 6ede48bde91b..954964f0ac3d 100644 --- a/tools/testing/selftests/bpf/prog_tests/netcnt.c +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -8,7 +8,7 @@ #define CG_NAME "/netcnt" -void test_netcnt(void) +void serial_test_netcnt(void) { union percpu_net_cnt *percpu_netcnt = NULL; struct bpf_cgroup_storage_key key; From f30d4968e9aee737e174fc97942af46cfb49b484 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 1 Nov 2021 23:45:35 -0700 Subject: [PATCH 241/433] bpf: Do not reject when the stack read size is different from the tracked scalar size Below is a simplified case from a report in bcc [0]: r4 = 20 *(u32 *)(r10 -4) = r4 *(u32 *)(r10 -8) = r4 /* r4 state is tracked */ r4 = *(u64 *)(r10 -8) /* Read more than the tracked 32bit scalar. * verifier rejects as 'corrupted spill memory'. */ After commit 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill"), the 8-byte aligned 32bit spill is also tracked by the verifier and the register state is stored. However, if 8 bytes are read from the stack instead of the tracked 4 byte scalar, then verifier currently rejects the program as "corrupted spill memory". This patch fixes this case by allowing it to read but marks the register as unknown. Also note that, if the prog is trying to corrupt/leak an earlier spilled pointer by spilling another <8 bytes register on top, this has already been rejected in the check_stack_write_fixed_off(). [0] https://github.com/iovisor/bcc/pull/3683 Fixes: 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill") Reported-by: Hengqi Chen Reported-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Tested-by: Hengqi Chen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211102064535.316018-1-kafai@fb.com --- kernel/bpf/verifier.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f0dca726ebfd..5f8d9128860a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3088,9 +3088,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, reg = ®_state->stack[spi].spilled_ptr; if (is_spilled_reg(®_state->stack[spi])) { - if (size != BPF_REG_SIZE) { - u8 scalar_size = 0; + u8 spill_size = 1; + for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--) + spill_size++; + + if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) { if (reg->type != SCALAR_VALUE) { verbose_linfo(env, env->insn_idx, "; "); verbose(env, "invalid size of register fill\n"); @@ -3101,10 +3104,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, if (dst_regno < 0) return 0; - for (i = BPF_REG_SIZE; i > 0 && stype[i - 1] == STACK_SPILL; i--) - scalar_size++; - - if (!(off % BPF_REG_SIZE) && size == scalar_size) { + if (!(off % BPF_REG_SIZE) && size == spill_size) { /* The earlier check_reg_arg() has decided the * subreg_def for this insn. Save it first. */ @@ -3128,12 +3128,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, state->regs[dst_regno].live |= REG_LIVE_WRITTEN; return 0; } - for (i = 1; i < BPF_REG_SIZE; i++) { - if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { - verbose(env, "corrupted spill memory\n"); - return -EACCES; - } - } if (dst_regno >= 0) { /* restore register state from stack */ From c08455dec5acf4668f5d1eb099f7fedb29f2de5f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 1 Nov 2021 23:45:41 -0700 Subject: [PATCH 242/433] selftests/bpf: Verifier test on refill from a smaller spill This patch adds a verifier test to ensure the verifier can read 8 bytes from the stack after two 32bit write at fp-4 and fp-8. The test is similar to the reported case from bcc [0]. [0] https://github.com/iovisor/bcc/pull/3683 Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211102064541.316414-1-kafai@fb.com --- .../testing/selftests/bpf/verifier/spill_fill.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index c9991c3f3bd2..7ab3de108761 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -265,3 +265,20 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "Spill a u32 scalar at fp-4 and then at fp-8", + .insns = { + /* r4 = 4321 */ + BPF_MOV32_IMM(BPF_REG_4, 4321), + /* *(u32 *)(r10 -4) = r4 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -4), + /* *(u32 *)(r10 -8) = r4 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), + /* r4 = *(u64 *)(r10 -8) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, From 1a8c7778bcde5981463a5b9f9b2caa44a327ff93 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Fri, 26 Feb 2021 13:19:23 -0800 Subject: [PATCH 243/433] ice: Fix VF true promiscuous mode When a VF requests promiscuous mode and it's trusted and true promiscuous mode is enabled the PF driver attempts to enable unicast and/or multicast promiscuous mode filters based on the request. This is fine, but there are a couple issues with the current code. [1] The define to configure the unicast promiscuous mode mask also includes bits to configure the multicast promiscuous mode mask, which causes multicast to be set/cleared unintentionally. [2] All 4 cases for enable/disable unicast/multicast mode are not handled in the promiscuous mode message handler, which causes unexpected results regarding the current promiscuous mode settings. To fix [1] make sure any promiscuous mask defines include the correct bits for each of the promiscuous modes. To fix [2] make sure that all 4 cases are handled since there are 2 bits (FLAG_VF_UNICAST_PROMISC and FLAG_VF_MULTICAST_PROMISC) that can be either set or cleared. Also, since either unicast and/or multicast promiscuous configuration can fail, introduce two separate error values to handle each of these cases. Fixes: 01b5e89aab49 ("ice: Add VF promiscuous support") Signed-off-by: Brett Creeley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 5 +- .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 76 +++++++++---------- 2 files changed, 39 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index bf4ecd9a517c..b2db39ee5f85 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -165,13 +165,10 @@ #define ice_for_each_chnl_tc(i) \ for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++) -#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \ - ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX) +#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_UCAST_RX) #define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \ - ICE_PROMISC_MCAST_TX | \ ICE_PROMISC_UCAST_RX | \ - ICE_PROMISC_MCAST_RX | \ ICE_PROMISC_VLAN_TX | \ ICE_PROMISC_VLAN_RX) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 2ac21484b876..9b699419c933 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -3013,6 +3013,7 @@ bool ice_is_any_vf_in_promisc(struct ice_pf *pf) static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) { enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + enum ice_status mcast_status = 0, ucast_status = 0; bool rm_promisc, alluni = false, allmulti = false; struct virtchnl_promisc_info *info = (struct virtchnl_promisc_info *)msg; @@ -3105,52 +3106,51 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) goto error_param; } } else { - enum ice_status status; - u8 promisc_m; + u8 mcast_m, ucast_m; - if (alluni) { - if (vf->port_vlan_info || vsi->num_vlan) - promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; - else - promisc_m = ICE_UCAST_PROMISC_BITS; - } else if (allmulti) { - if (vf->port_vlan_info || vsi->num_vlan) - promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; - else - promisc_m = ICE_MCAST_PROMISC_BITS; + if (vf->port_vlan_info || vsi->num_vlan > 1) { + mcast_m = ICE_MCAST_VLAN_PROMISC_BITS; + ucast_m = ICE_UCAST_VLAN_PROMISC_BITS; } else { - if (vf->port_vlan_info || vsi->num_vlan) - promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; - else - promisc_m = ICE_UCAST_PROMISC_BITS; + mcast_m = ICE_MCAST_PROMISC_BITS; + ucast_m = ICE_UCAST_PROMISC_BITS; } - /* Configure multicast/unicast with or without VLAN promiscuous - * mode - */ - status = ice_vf_set_vsi_promisc(vf, vsi, promisc_m, rm_promisc); - if (status) { - dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed, error: %s\n", - rm_promisc ? "dis" : "en", vf->vf_id, - ice_stat_str(status)); - v_ret = ice_err_to_virt_err(status); - goto error_param; - } else { - dev_dbg(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d succeeded\n", - rm_promisc ? "dis" : "en", vf->vf_id); + ucast_status = ice_vf_set_vsi_promisc(vf, vsi, ucast_m, + !alluni); + if (ucast_status) { + dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed\n", + alluni ? "en" : "dis", vf->vf_id); + v_ret = ice_err_to_virt_err(ucast_status); + } + + mcast_status = ice_vf_set_vsi_promisc(vf, vsi, mcast_m, + !allmulti); + if (mcast_status) { + dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed\n", + allmulti ? "en" : "dis", vf->vf_id); + v_ret = ice_err_to_virt_err(mcast_status); } } - if (allmulti && - !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) - dev_info(dev, "VF %u successfully set multicast promiscuous mode\n", vf->vf_id); - else if (!allmulti && test_and_clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) - dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n", vf->vf_id); + if (!mcast_status) { + if (allmulti && + !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) + dev_info(dev, "VF %u successfully set multicast promiscuous mode\n", + vf->vf_id); + else if (!allmulti && test_and_clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) + dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n", + vf->vf_id); + } - if (alluni && !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) - dev_info(dev, "VF %u successfully set unicast promiscuous mode\n", vf->vf_id); - else if (!alluni && test_and_clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) - dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n", vf->vf_id); + if (!ucast_status) { + if (alluni && !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) + dev_info(dev, "VF %u successfully set unicast promiscuous mode\n", + vf->vf_id); + else if (!alluni && test_and_clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) + dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n", + vf->vf_id); + } error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, From 0299faeaf8eb982103e4388af61fd94feb9c2d9f Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Wed, 5 May 2021 14:17:57 -0700 Subject: [PATCH 244/433] ice: Remove toggling of antispoof for VF trusted promiscuous mode Currently when a trusted VF enables promiscuous mode spoofchk will be disabled. This is wrong and should only be modified from the ndo_set_vf_spoofchk callback. Fix this by removing the call to toggle spoofchk for trusted VFs. Fixes: 01b5e89aab49 ("ice: Add VF promiscuous support") Signed-off-by: Brett Creeley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 9b699419c933..3f8f94732a1f 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -3055,24 +3055,6 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) rm_promisc = !allmulti && !alluni; if (vsi->num_vlan || vf->port_vlan_info) { - struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); - struct net_device *pf_netdev; - - if (!pf_vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - pf_netdev = pf_vsi->netdev; - - ret = ice_set_vf_spoofchk(pf_netdev, vf->vf_id, rm_promisc); - if (ret) { - dev_err(dev, "Failed to update spoofchk to %s for VF %d VSI %d when setting promiscuous mode\n", - rm_promisc ? "ON" : "OFF", vf->vf_id, - vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - } - if (rm_promisc) ret = ice_cfg_vlan_pruning(vsi, true); else From ce572a5b88d5ca6737b5e23da9892792fd708ad3 Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Thu, 6 May 2021 08:40:03 -0700 Subject: [PATCH 245/433] ice: Fix replacing VF hardware MAC to existing MAC filter VF was not able to change its hardware MAC address in case the new address was already present in the MAC filter list. Change the handling of VF add mac request to not return if requested MAC address is already present on the list and check if its hardware MAC needs to be updated in this case. Fixes: ed4c068d46f6 ("ice: Enable ip link show on the PF to display VF unicast MAC(s)") Signed-off-by: Sylwester Dziedziuch Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 3f8f94732a1f..650ad7f56829 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -3806,6 +3806,7 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, struct device *dev = ice_pf_to_dev(vf->pf); u8 *mac_addr = vc_ether_addr->addr; enum ice_status status; + int ret = 0; /* device MAC already added */ if (ether_addr_equal(mac_addr, vf->dev_lan_addr.addr)) @@ -3818,20 +3819,23 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, status = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI); if (status == ICE_ERR_ALREADY_EXISTS) { - dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr, + dev_dbg(dev, "MAC %pM already exists for VF %d\n", mac_addr, vf->vf_id); - return -EEXIST; + /* don't return since we might need to update + * the primary MAC in ice_vfhw_mac_add() below + */ + ret = -EEXIST; } else if (status) { dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %s\n", mac_addr, vf->vf_id, ice_stat_str(status)); return -EIO; + } else { + vf->num_mac++; } ice_vfhw_mac_add(vf, vc_ether_addr); - vf->num_mac++; - - return 0; + return ret; } /** From b385cca47363316c6d9a74ae9db407bbc281f815 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 9 Sep 2021 14:38:08 -0700 Subject: [PATCH 246/433] ice: Fix not stopping Tx queues for VFs When a VF is removed and/or reset its Tx queues need to be stopped from the PF. This is done by calling the ice_dis_vf_qs() function, which calls ice_vsi_stop_lan_tx_rings(). Currently ice_dis_vf_qs() is protected by the VF state bit ICE_VF_STATE_QS_ENA. Unfortunately, this is causing the Tx queues to not be disabled in some cases and when the VF tries to re-enable/reconfigure its Tx queues over virtchnl the op is failing. This is because a VF can be reset and/or removed before the ICE_VF_STATE_QS_ENA bit is set, but the Tx queues were already configured via ice_vsi_cfg_single_txq() in the VIRTCHNL_OP_CONFIG_VSI_QUEUES op. However, the ICE_VF_STATE_QS_ENA bit is set on a successful VIRTCHNL_OP_ENABLE_QUEUES, which will always happen after the VIRTCHNL_OP_CONFIG_VSI_QUEUES op. This was causing the following error message when loading the ice driver, creating VFs, and modifying VF trust in an endless loop: [35274.192484] ice 0000:88:00.0: Failed to set LAN Tx queue context, error: ICE_ERR_PARAM [35274.193074] ice 0000:88:00.0: VF 0 failed opcode 6, retval: -5 [35274.193640] iavf 0000:88:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 6 Fix this by always calling ice_dis_vf_qs() and silencing the error message in ice_vsi_stop_tx_ring() since the calling code ignores the return anyway. Also, all other places that call ice_vsi_stop_tx_ring() catch the error, so this doesn't affect those flows since there was no change to the values the function returns. Other solutions were considered (i.e. tracking which VF queues had been "started/configured" in VIRTCHNL_OP_CONFIG_VSI_QUEUES, but it seemed more complicated than it was worth. This solution also brings in the chance for other unexpected conditions due to invalid state bit checks. So, the proposed solution seemed like the best option since there is no harm in failing to stop Tx queues that were never started. This issue can be seen using the following commands: for i in {0..50}; do rmmod ice modprobe ice sleep 1 echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs ip link set ens785f1 vf 0 trust on ip link set ens785f0 vf 0 trust on sleep 2 echo 0 > /sys/class/net/ens785f0/device/sriov_numvfs echo 0 > /sys/class/net/ens785f1/device/sriov_numvfs sleep 1 echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs ip link set ens785f1 vf 0 trust on ip link set ens785f0 vf 0 trust on done Fixes: 77ca27c41705 ("ice: add support for virtchnl_queue_select.[tx|rx]_queues bitmap") Signed-off-by: Brett Creeley Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 2 +- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index fa6cd63cbf1f..1efc635cc0f5 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -962,7 +962,7 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, } else if (status == ICE_ERR_DOES_NOT_EXIST) { dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n"); } else if (status) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %s\n", + dev_dbg(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %s\n", ice_stat_str(status)); return -ENODEV; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 650ad7f56829..3f727df3b6fb 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -638,8 +638,7 @@ void ice_free_vfs(struct ice_pf *pf) /* Avoid wait time by stopping all VFs at the same time */ ice_for_each_vf(pf, i) - if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states)) - ice_dis_vf_qs(&pf->vf[i]); + ice_dis_vf_qs(&pf->vf[i]); tmp = pf->num_alloc_vfs; pf->num_qps_per_vf = 0; @@ -1695,8 +1694,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) vsi = ice_get_vf_vsi(vf); - if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) - ice_dis_vf_qs(vf); + ice_dis_vf_qs(vf); /* Call Disable LAN Tx queue AQ whether or not queues are * enabled. This is needed for successful completion of VFR. From e6ba5273d4ede03d075d7a116b8edad1f6115f4d Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 9 Sep 2021 14:38:09 -0700 Subject: [PATCH 247/433] ice: Fix race conditions between virtchnl handling and VF ndo ops The VF can be configured via the PF's ndo ops at the same time the PF is receiving/handling virtchnl messages. This has many issues, with one of them being the ndo op could be actively resetting a VF (i.e. resetting it to the default state and deleting/re-adding the VF's VSI) while a virtchnl message is being handled. The following error was seen because a VF ndo op was used to change a VF's trust setting while the VIRTCHNL_OP_CONFIG_VSI_QUEUES was ongoing: [35274.192484] ice 0000:88:00.0: Failed to set LAN Tx queue context, error: ICE_ERR_PARAM [35274.193074] ice 0000:88:00.0: VF 0 failed opcode 6, retval: -5 [35274.193640] iavf 0000:88:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 6 Fix this by making sure the virtchnl handling and VF ndo ops that trigger VF resets cannot run concurrently. This is done by adding a struct mutex cfg_lock to each VF structure. For VF ndo ops, the mutex will be locked around the critical operations and VFR. Since the ndo ops will trigger a VFR, the virtchnl thread will use mutex_trylock(). This is done because if any other thread (i.e. VF ndo op) has the mutex, then that means the current VF message being handled is no longer valid, so just ignore it. This issue can be seen using the following commands: for i in {0..50}; do rmmod ice modprobe ice sleep 1 echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs ip link set ens785f1 vf 0 trust on ip link set ens785f0 vf 0 trust on sleep 2 echo 0 > /sys/class/net/ens785f0/device/sriov_numvfs echo 0 > /sys/class/net/ens785f1/device/sriov_numvfs sleep 1 echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs ip link set ens785f1 vf 0 trust on ip link set ens785f0 vf 0 trust on done Fixes: 7c710869d64e ("ice: Add handlers for VF netdevice operations") Signed-off-by: Brett Creeley Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 25 +++++++++++++++++++ .../net/ethernet/intel/ice/ice_virtchnl_pf.h | 5 ++++ 2 files changed, 30 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 3f727df3b6fb..217ff5e9a6f1 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -650,6 +650,8 @@ void ice_free_vfs(struct ice_pf *pf) set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); ice_free_vf_res(&pf->vf[i]); } + + mutex_destroy(&pf->vf[i].cfg_lock); } if (ice_sriov_free_msix_res(pf)) @@ -1946,6 +1948,8 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf) ice_vf_fdir_init(vf); ice_vc_set_dflt_vf_ops(&vf->vc_ops); + + mutex_init(&vf->cfg_lock); } } @@ -4135,6 +4139,8 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, return 0; } + mutex_lock(&vf->cfg_lock); + vf->port_vlan_info = vlanprio; if (vf->port_vlan_info) @@ -4144,6 +4150,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id); ice_vc_reset_vf(vf); + mutex_unlock(&vf->cfg_lock); return 0; } @@ -4683,6 +4690,15 @@ error_handler: return; } + /* VF is being configured in another context that triggers a VFR, so no + * need to process this message + */ + if (!mutex_trylock(&vf->cfg_lock)) { + dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n", + vf->vf_id); + return; + } + switch (v_opcode) { case VIRTCHNL_OP_VERSION: err = ops->get_ver_msg(vf, msg); @@ -4771,6 +4787,8 @@ error_handler: dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n", vf_id, v_opcode, err); } + + mutex_unlock(&vf->cfg_lock); } /** @@ -4886,6 +4904,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) return -EINVAL; } + mutex_lock(&vf->cfg_lock); + /* VF is notified of its new MAC via the PF's response to the * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset */ @@ -4904,6 +4924,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) } ice_vc_reset_vf(vf); + mutex_unlock(&vf->cfg_lock); return 0; } @@ -4938,11 +4959,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) if (trusted == vf->trusted) return 0; + mutex_lock(&vf->cfg_lock); + vf->trusted = trusted; ice_vc_reset_vf(vf); dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n", vf_id, trusted ? "" : "un"); + mutex_unlock(&vf->cfg_lock); + return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 5ff93a08f54c..7e28ecbbe7af 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -100,6 +100,11 @@ struct ice_vc_vf_ops { struct ice_vf { struct ice_pf *pf; + /* Used during virtchnl message handling and NDO ops against the VF + * that will trigger a VFR + */ + struct mutex cfg_lock; + u16 vf_id; /* VF ID in the PF space */ u16 lan_vsi_idx; /* index into PF struct */ u16 ctrl_vsi_idx; From 00b06da29cf9dc633cdba87acd3f57f4df3fd5c7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 29 Oct 2021 09:14:19 -0500 Subject: [PATCH 248/433] signal: Add SA_IMMUTABLE to ensure forced siganls do not get changed As Andy pointed out that there are races between force_sig_info_to_task and sigaction[1] when force_sig_info_task. As Kees discovered[2] ptrace is also able to change these signals. In the case of seeccomp killing a process with a signal it is a security violation to allow the signal to be caught or manipulated. Solve this problem by introducing a new flag SA_IMMUTABLE that prevents sigaction and ptrace from modifying these forced signals. This flag is carefully made kernel internal so that no new ABI is introduced. Longer term I think this can be solved by guaranteeing short circuit delivery of signals in this case. Unfortunately reliable and guaranteed short circuit delivery of these signals is still a ways off from being implemented, tested, and merged. So I have implemented a much simpler alternative for now. [1] https://lkml.kernel.org/r/b5d52d25-7bde-4030-a7b1-7c6f8ab90660@www.fastmail.com [2] https://lkml.kernel.org/r/202110281136.5CE65399A7@keescook Cc: stable@vger.kernel.org Fixes: 307d522f5eb8 ("signal/seccomp: Refactor seccomp signal and coredump generation") Tested-by: Andrea Righi Tested-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/signal_types.h | 3 +++ include/uapi/asm-generic/signal-defs.h | 1 + kernel/signal.c | 8 +++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index 34cb28b8f16c..a70b2bdbf4d9 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -70,6 +70,9 @@ struct ksignal { int sig; }; +/* Used to kill the race between sigaction and forced signals */ +#define SA_IMMUTABLE 0x00800000 + #ifndef __ARCH_UAPI_SA_FLAGS #ifdef SA_RESTORER #define __ARCH_UAPI_SA_FLAGS SA_RESTORER diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h index fe929e7b77ca..7572f2f46ee8 100644 --- a/include/uapi/asm-generic/signal-defs.h +++ b/include/uapi/asm-generic/signal-defs.h @@ -45,6 +45,7 @@ #define SA_UNSUPPORTED 0x00000400 #define SA_EXPOSE_TAGBITS 0x00000800 /* 0x00010000 used on mips */ +/* 0x00800000 used for internal SA_IMMUTABLE */ /* 0x01000000 used on x86 */ /* 0x02000000 used on x86 */ /* diff --git a/kernel/signal.c b/kernel/signal.c index 6a5e1802b9a2..056a107e3cbc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1336,6 +1336,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool blocked = sigismember(&t->blocked, sig); if (blocked || ignored || sigdfl) { action->sa.sa_handler = SIG_DFL; + action->sa.sa_flags |= SA_IMMUTABLE; if (blocked) { sigdelset(&t->blocked, sig); recalc_sigpending_and_wake(t); @@ -2760,7 +2761,8 @@ relock: if (!signr) break; /* will return 0 */ - if (unlikely(current->ptrace) && signr != SIGKILL) { + if (unlikely(current->ptrace) && (signr != SIGKILL) && + !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) { signr = ptrace_signal(signr, &ksig->info); if (!signr) continue; @@ -4110,6 +4112,10 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) k = &p->sighand->action[sig-1]; spin_lock_irq(&p->sighand->siglock); + if (k->sa.sa_flags & SA_IMMUTABLE) { + spin_unlock_irq(&p->sighand->siglock); + return -EINVAL; + } if (oact) *oact = *k; From f4a2d282cca57607a0d6718fafa1ab2d62703254 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 29 Sep 2021 17:05:26 -0500 Subject: [PATCH 249/433] apparmor: Use struct_size() helper in kzalloc() Make use of the struct_size() helper instead of an open-coded version, in order to avoid any potential type mistakes or integer overflows that, in the worse scenario, could lead to heap overflows. Link: https://github.com/KSPP/linux/issues/160 Signed-off-by: Gustavo A. R. Silva Signed-off-by: John Johansen --- security/apparmor/label.c | 3 +-- security/apparmor/policy.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index f5eb9ac07e9b..1c89b056337b 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -424,8 +424,7 @@ struct aa_label *aa_label_alloc(int size, struct aa_proxy *proxy, gfp_t gfp) AA_BUG(size < 1); /* + 1 for null terminator entry on vec */ - new = kzalloc(sizeof(*new) + sizeof(struct aa_profile *) * (size + 1), - gfp); + new = kzalloc(struct_size(new, vec, size + 1), gfp); AA_DEBUG("%s (%p)\n", __func__, new); if (!new) goto fail; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 4da4f3df9d4a..76cc1949c66f 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -259,8 +259,7 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy, struct aa_profile *profile; /* freed by free_profile - usually through aa_put_profile */ - profile = kzalloc(sizeof(*profile) + sizeof(struct aa_profile *) * 2, - gfp); + profile = kzalloc(struct_size(profile, label.vec, 2), gfp); if (!profile) return NULL; From 7b7211243afa1058b0f10bae7bd14d562f9767ca Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Oct 2021 16:38:54 +0200 Subject: [PATCH 250/433] apparmor: remove unneeded one-line hook wrappers Use the common function directly. Signed-off-by: Florian Westphal Signed-off-by: John Johansen --- security/apparmor/lsm.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 64d6020ffd50..13c2f76bd1f7 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1763,32 +1763,16 @@ static unsigned int apparmor_ip_postroute(void *priv, } -static unsigned int apparmor_ipv4_postroute(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return apparmor_ip_postroute(priv, skb, state); -} - -#if IS_ENABLED(CONFIG_IPV6) -static unsigned int apparmor_ipv6_postroute(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return apparmor_ip_postroute(priv, skb, state); -} -#endif - static const struct nf_hook_ops apparmor_nf_ops[] = { { - .hook = apparmor_ipv4_postroute, + .hook = apparmor_ip_postroute, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_SELINUX_FIRST, }, #if IS_ENABLED(CONFIG_IPV6) { - .hook = apparmor_ipv6_postroute, + .hook = apparmor_ip_postroute, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_SELINUX_FIRST, From 582122f1d73af28407234321c94711e09aa3fd04 Mon Sep 17 00:00:00 2001 From: Austin Kim Date: Wed, 3 Nov 2021 09:25:31 +0000 Subject: [PATCH 251/433] apparmor: remove duplicated 'Returns:' comments It might look better if duplicated 'Returns:' comment is removed. Signed-off-by: Austin Kim Signed-off-by: John Johansen --- security/apparmor/procattr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c index c929bf4a3df1..fde332e0ea7d 100644 --- a/security/apparmor/procattr.c +++ b/security/apparmor/procattr.c @@ -21,8 +21,6 @@ * @profile: the profile to print profile info about (NOT NULL) * @string: Returns - string containing the profile info (NOT NULL) * - * Returns: length of @string on success else error on failure - * * Requires: profile != NULL * * Creates a string containing the namespace_name://profile_name for From a985442fdecb59504e3a2f1cfdd3c53af017ea5b Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 4 Nov 2021 11:46:13 +0100 Subject: [PATCH 252/433] selftests: net: properly support IPv6 in GSO GRE test Explicitly pass -6 to netcat when the test is using IPv6 to prevent failures. Also make sure to pass "-N" to netcat to close the socket after EOF on the client side, otherwise we would always hit the timeout and the test would fail. Without this fix applied: TEST: GREv6/v4 - copy file w/ TSO [FAIL] TEST: GREv6/v4 - copy file w/ GSO [FAIL] TEST: GREv6/v6 - copy file w/ TSO [FAIL] TEST: GREv6/v6 - copy file w/ GSO [FAIL] With this fix applied: TEST: GREv6/v4 - copy file w/ TSO [ OK ] TEST: GREv6/v4 - copy file w/ GSO [ OK ] TEST: GREv6/v6 - copy file w/ TSO [ OK ] TEST: GREv6/v6 - copy file w/ GSO [ OK ] Fixes: 025efa0a82df ("selftests: add simple GSO GRE test") Signed-off-by: Andrea Righi Signed-off-by: David S. Miller --- tools/testing/selftests/net/gre_gso.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh index facbb0c80443..fdeb44d621eb 100755 --- a/tools/testing/selftests/net/gre_gso.sh +++ b/tools/testing/selftests/net/gre_gso.sh @@ -116,17 +116,18 @@ gre_gst_test_checks() { local name=$1 local addr=$2 + local proto=$3 - $NS_EXEC nc -kl $port >/dev/null & + $NS_EXEC nc $proto -kl $port >/dev/null & PID=$! while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done - cat $TMPFILE | timeout 1 nc $addr $port + cat $TMPFILE | timeout 1 nc $proto -N $addr $port log_test $? 0 "$name - copy file w/ TSO" ethtool -K veth0 tso off - cat $TMPFILE | timeout 1 nc $addr $port + cat $TMPFILE | timeout 1 nc $proto -N $addr $port log_test $? 0 "$name - copy file w/ GSO" ethtool -K veth0 tso on @@ -155,7 +156,7 @@ gre6_gso_test() sleep 2 gre_gst_test_checks GREv6/v4 172.16.2.2 - gre_gst_test_checks GREv6/v6 2001:db8:1::2 + gre_gst_test_checks GREv6/v6 2001:db8:1::2 -6 cleanup } From 3b65abb8d8a650e50ff5448ac38992ef8a74c584 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 4 Nov 2021 00:17:51 +0200 Subject: [PATCH 253/433] tcp: Use BIT() for OPTION_* constants Extending these flags using the existing (1 << x) pattern triggers complaints from checkpatch. Instead of ignoring checkpatch modify the existing values to use BIT(x) style in a separate commit. Signed-off-by: Leonard Crestez Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6f7860e283c6..2e6e5a70168e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -408,13 +408,13 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) return tp->snd_una != tp->snd_up; } -#define OPTION_SACK_ADVERTISE (1 << 0) -#define OPTION_TS (1 << 1) -#define OPTION_MD5 (1 << 2) -#define OPTION_WSCALE (1 << 3) -#define OPTION_FAST_OPEN_COOKIE (1 << 8) -#define OPTION_SMC (1 << 9) -#define OPTION_MPTCP (1 << 10) +#define OPTION_SACK_ADVERTISE BIT(0) +#define OPTION_TS BIT(1) +#define OPTION_MD5 BIT(2) +#define OPTION_WSCALE BIT(3) +#define OPTION_FAST_OPEN_COOKIE BIT(8) +#define OPTION_SMC BIT(9) +#define OPTION_MPTCP BIT(10) static void smc_options_write(__be32 *ptr, u16 *options) { From d00c8ee31729248ba40b4ab25cd3b3b580c6f87c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 3 Nov 2021 16:49:11 -0700 Subject: [PATCH 254/433] net: fix possible NULL deref in sock_reserve_memory Sanity check in sock_reserve_memory() was not enough to prevent malicious user to trigger a NULL deref. In this case, the isse is that sk_prot->memory_allocated is NULL. Use standard sk_has_account() helper to deal with this. BUG: KASAN: null-ptr-deref in instrument_atomic_read_write include/linux/instrumented.h:101 [inline] BUG: KASAN: null-ptr-deref in atomic_long_add_return include/linux/atomic/atomic-instrumented.h:1218 [inline] BUG: KASAN: null-ptr-deref in sk_memory_allocated_add include/net/sock.h:1371 [inline] BUG: KASAN: null-ptr-deref in sock_reserve_memory net/core/sock.c:994 [inline] BUG: KASAN: null-ptr-deref in sock_setsockopt+0x22ab/0x2b30 net/core/sock.c:1443 Write of size 8 at addr 0000000000000000 by task syz-executor.0/11270 CPU: 1 PID: 11270 Comm: syz-executor.0 Not tainted 5.15.0-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 __kasan_report mm/kasan/report.c:446 [inline] kasan_report.cold+0x66/0xdf mm/kasan/report.c:459 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189 instrument_atomic_read_write include/linux/instrumented.h:101 [inline] atomic_long_add_return include/linux/atomic/atomic-instrumented.h:1218 [inline] sk_memory_allocated_add include/net/sock.h:1371 [inline] sock_reserve_memory net/core/sock.c:994 [inline] sock_setsockopt+0x22ab/0x2b30 net/core/sock.c:1443 __sys_setsockopt+0x4f8/0x610 net/socket.c:2172 __do_sys_setsockopt net/socket.c:2187 [inline] __se_sys_setsockopt net/socket.c:2184 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2184 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f56076d5ae9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f5604c4b188 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007f56077e8f60 RCX: 00007f56076d5ae9 RDX: 0000000000000049 RSI: 0000000000000001 RDI: 0000000000000003 RBP: 00007f560772ff25 R08: 000000000000fec7 R09: 0000000000000000 R10: 0000000020000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fffb61a100f R14: 00007f5604c4b300 R15: 0000000000022000 Fixes: 2bb2f5fb21b0 ("net: add new socket option SO_RESERVE_MEM") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Wei Wang Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 9862eefce21e..8f2b2f2c0e7b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -976,7 +976,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes) bool charged; int pages; - if (!mem_cgroup_sockets_enabled || !sk->sk_memcg) + if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk)) return -EOPNOTSUPP; if (!bytes) From 3eda41df05d6ad5c825cbc7fef03d563597b1afa Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Tue, 12 Oct 2021 13:19:01 -0400 Subject: [PATCH 255/433] Revert "ext4: enforce buffer head state assertion in ext4_da_map_blocks" This reverts commit 948ca5f30e1df0c11eb5b0f410b9ceb97fa77ad9. Two crash reports from users running variations on 5.15-rc4 kernels suggest that it is premature to enforce the state assertion in the original commit. Both crashes were triggered by BUG calls in that code, indicating that under some rare circumstance the buffer head state did not match a delayed allocated block at the time the block was written out. No reproducer is available. Resolving this problem will require more time than remains in the current release cycle, so reverting the original patch for the time being is necessary to avoid any instability it may cause. Signed-off-by: Eric Whitney Link: https://lore.kernel.org/r/20211012171901.5352-1-enwlinux@gmail.com Fixes: 948ca5f30e1d ("ext4: enforce buffer head state assertion in ext4_da_map_blocks") Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inode.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0f06305167d5..9097fccdc688 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1711,16 +1711,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, } /* - * the buffer head associated with a delayed and not unwritten - * block found in the extent status cache must contain an - * invalid block number and have its BH_New and BH_Delay bits - * set, reflecting the state assigned when the block was - * initially delayed allocated + * Delayed extent could be allocated by fallocate. + * So we need to check it. */ - if (ext4_es_is_delonly(&es)) { - BUG_ON(bh->b_blocknr != invalid_block); - BUG_ON(!buffer_new(bh)); - BUG_ON(!buffer_delay(bh)); + if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); return 0; } From 39fec6889d15a658c3a3ebb06fd69d3584ddffd3 Mon Sep 17 00:00:00 2001 From: Shaoying Xu Date: Thu, 2 Sep 2021 16:44:12 +0000 Subject: [PATCH 256/433] ext4: fix lazy initialization next schedule time computation in more granular unit Ext4 file system has default lazy inode table initialization setup once it is mounted. However, it has issue on computing the next schedule time that makes the timeout same amount in jiffies but different real time in secs if with various HZ values. Therefore, fix by measuring the current time in a more granular unit nanoseconds and make the next schedule time independent of the HZ value. Fixes: bfff68738f1c ("ext4: add support for lazy inode table initialization") Signed-off-by: Shaoying Xu Cc: stable@vger.kernel.org Signed-off-by: Theodore Ts'o Link: https://lore.kernel.org/r/20210902164412.9994-2-shaoyi@amazon.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 88d5d274a868..8a67e5f3f576 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3263,9 +3263,9 @@ static int ext4_run_li_request(struct ext4_li_request *elr) struct super_block *sb = elr->lr_super; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; ext4_group_t group = elr->lr_next_group; - unsigned long timeout = 0; unsigned int prefetch_ios = 0; int ret = 0; + u64 start_time; if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) { elr->lr_next_group = ext4_mb_prefetch(sb, group, @@ -3302,14 +3302,13 @@ static int ext4_run_li_request(struct ext4_li_request *elr) ret = 1; if (!ret) { - timeout = jiffies; + start_time = ktime_get_real_ns(); ret = ext4_init_inode_table(sb, group, elr->lr_timeout ? 0 : 1); trace_ext4_lazy_itable_init(sb, group); if (elr->lr_timeout == 0) { - timeout = (jiffies - timeout) * - EXT4_SB(elr->lr_super)->s_li_wait_mult; - elr->lr_timeout = timeout; + elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) * + EXT4_SB(elr->lr_super)->s_li_wait_mult); } elr->lr_next_sched = jiffies + elr->lr_timeout; elr->lr_next_group = group + 1; From 83c5688b8977b1cd495a05ca0455e4353c8f6655 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Fri, 3 Sep 2021 14:27:46 +0800 Subject: [PATCH 257/433] ext4: correct the left/middle/right debug message for binsearch The debuginfo for binsearch want to show the left/middle/right extent while the process search for the goal block. However we show this info after we change right or left. Link: https://lore.kernel.org/r/20210903062748.4118886-2-yangerkun@huawei.com Signed-off-by: yangerkun Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0e02571f2f82..c59426f1e1d2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -714,13 +714,14 @@ ext4_ext_binsearch_idx(struct inode *inode, r = EXT_LAST_INDEX(eh); while (l <= r) { m = l + (r - l) / 2; + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), + r, le32_to_cpu(r->ei_block)); + if (block < le32_to_cpu(m->ei_block)) r = m - 1; else l = m + 1; - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, - le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), - r, le32_to_cpu(r->ei_block)); } path->p_idx = l - 1; @@ -782,13 +783,14 @@ ext4_ext_binsearch(struct inode *inode, while (l <= r) { m = l + (r - l) / 2; + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), + r, le32_to_cpu(r->ee_block)); + if (block < le32_to_cpu(m->ee_block)) r = m - 1; else l = m + 1; - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, - le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), - r, le32_to_cpu(r->ee_block)); } path->p_ext = l - 1; From 4268496e48dc681cfa53b92357314b5d7221e625 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Fri, 3 Sep 2021 14:27:47 +0800 Subject: [PATCH 258/433] ext4: ensure enough credits in ext4_ext_shift_path_extents Like ext4_ext_rm_leaf, we can ensure that there are enough credits before every call that will consume credits. As part of this fix we fold the functionality of ext4_access_path() into ext4_ext_shift_path_extents(). This change is needed as a preparation for the next bugfix patch. Cc: stable@kernel.org Link: https://lore.kernel.org/r/20210903062748.4118886-3-yangerkun@huawei.com Signed-off-by: yangerkun Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 49 +++++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c59426f1e1d2..6b080f61342a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4979,36 +4979,6 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo); } -/* - * ext4_access_path: - * Function to access the path buffer for marking it dirty. - * It also checks if there are sufficient credits left in the journal handle - * to update path. - */ -static int -ext4_access_path(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - int credits, err; - - if (!ext4_handle_valid(handle)) - return 0; - - /* - * Check if need to extend journal credits - * 3 for leaf, sb, and inode plus 2 (bmap and group - * descriptor) for each block group; assume two block - * groups - */ - credits = ext4_writepage_trans_blocks(inode); - err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0); - if (err < 0) - return err; - - err = ext4_ext_get_access(handle, inode, path); - return err; -} - /* * ext4_ext_shift_path_extents: * Shift the extents of a path structure lying between path[depth].p_ext @@ -5023,6 +4993,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, int depth, err = 0; struct ext4_extent *ex_start, *ex_last; bool update = false; + int credits, restart_credits; depth = path->p_depth; while (depth >= 0) { @@ -5032,13 +5003,23 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, return -EFSCORRUPTED; ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); + /* leaf + sb + inode */ + credits = 3; + if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) { + update = true; + /* extent tree + sb + inode */ + credits = depth + 2; + } - err = ext4_access_path(handle, inode, path + depth); + restart_credits = ext4_writepage_trans_blocks(inode); + err = ext4_datasem_ensure_credits(handle, inode, credits, + restart_credits, 0); if (err) goto out; - if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) - update = true; + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; while (ex_start <= ex_last) { if (SHIFT == SHIFT_LEFT) { @@ -5069,7 +5050,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, } /* Update index too */ - err = ext4_access_path(handle, inode, path + depth); + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; From 1811bc401aa58c7bdb0df3205aa6613b49d32127 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Fri, 3 Sep 2021 14:27:48 +0800 Subject: [PATCH 259/433] ext4: refresh the ext4_ext_path struct after dropping i_data_sem. After we drop i_data sem, we need to reload the ext4_ext_path structure since the extent tree can change once i_data_sem is released. This addresses the BUG: [52117.465187] ------------[ cut here ]------------ [52117.465686] kernel BUG at fs/ext4/extents.c:1756! ... [52117.478306] Call Trace: [52117.478565] ext4_ext_shift_extents+0x3ee/0x710 [52117.479020] ext4_fallocate+0x139c/0x1b40 [52117.479405] ? __do_sys_newfstat+0x6b/0x80 [52117.479805] vfs_fallocate+0x151/0x4b0 [52117.480177] ksys_fallocate+0x4a/0xa0 [52117.480533] __x64_sys_fallocate+0x22/0x30 [52117.480930] do_syscall_64+0x35/0x80 [52117.481277] entry_SYSCALL_64_after_hwframe+0x44/0xae [52117.481769] RIP: 0033:0x7fa062f855ca Cc: stable@kernel.org Link: https://lore.kernel.org/r/20210903062748.4118886-4-yangerkun@huawei.com Signed-off-by: yangerkun Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 6b080f61342a..15c68bc80d21 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5014,8 +5014,11 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, restart_credits = ext4_writepage_trans_blocks(inode); err = ext4_datasem_ensure_credits(handle, inode, credits, restart_credits, 0); - if (err) + if (err) { + if (err > 0) + err = -EAGAIN; goto out; + } err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -5089,6 +5092,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, int ret = 0, depth; struct ext4_extent *extent; ext4_lblk_t stop, *iterator, ex_start, ex_end; + ext4_lblk_t tmp = EXT_MAX_BLOCKS; /* Let path point to the last extent */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, @@ -5142,11 +5146,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * till we reach stop. In case of right shift, iterator points to stop * and it is decreased till we reach start. */ +again: if (SHIFT == SHIFT_LEFT) iterator = &start; else iterator = &stop; + if (tmp != EXT_MAX_BLOCKS) + *iterator = tmp; + /* * Its safe to start updating extents. Start and stop are unsigned, so * in case of right shift if extent with 0 block is reached, iterator @@ -5175,6 +5183,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } } + tmp = *iterator; if (SHIFT == SHIFT_LEFT) { extent = EXT_LAST_EXTENT(path[depth].p_hdr); *iterator = le32_to_cpu(extent->ee_block) + @@ -5193,6 +5202,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } ret = ext4_ext_shift_path_extents(path, shift, inode, handle, SHIFT); + /* iterator can be NULL which means we should break */ + if (ret == -EAGAIN) + goto again; if (ret) break; } From 31d21d219b51dcfb16e18427eddae5394d402820 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Mon, 19 Jul 2021 13:59:14 +0800 Subject: [PATCH 260/433] ext4: convert from atomic_t to refcount_t on ext4_io_end->count refcount_t type and corresponding API can protect refcounters from accidental underflow and overflow and further use-after-free situations. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1626674355-55795-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 ++- fs/ext4/page-io.c | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3825195539d7..404dd50856e5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -17,6 +17,7 @@ #ifndef _EXT4_H #define _EXT4_H +#include #include #include #include @@ -241,7 +242,7 @@ typedef struct ext4_io_end { struct bio *bio; /* Linked list of completed * bios covering the extent */ unsigned int flag; /* unwritten or not */ - atomic_t count; /* reference counter */ + refcount_t count; /* reference counter */ struct list_head list_vec; /* list of ext4_io_end_vec */ } ext4_io_end_t; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index f038d578d8d8..9cb261714991 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -279,14 +279,14 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) io_end->inode = inode; INIT_LIST_HEAD(&io_end->list); INIT_LIST_HEAD(&io_end->list_vec); - atomic_set(&io_end->count, 1); + refcount_set(&io_end->count, 1); } return io_end; } void ext4_put_io_end_defer(ext4_io_end_t *io_end) { - if (atomic_dec_and_test(&io_end->count)) { + if (refcount_dec_and_test(&io_end->count)) { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || list_empty(&io_end->list_vec)) { ext4_release_io_end(io_end); @@ -300,7 +300,7 @@ int ext4_put_io_end(ext4_io_end_t *io_end) { int err = 0; - if (atomic_dec_and_test(&io_end->count)) { + if (refcount_dec_and_test(&io_end->count)) { if (io_end->flag & EXT4_IO_END_UNWRITTEN) { err = ext4_convert_unwritten_io_end_vec(io_end->handle, io_end); @@ -314,7 +314,7 @@ int ext4_put_io_end(ext4_io_end_t *io_end) ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) { - atomic_inc(&io_end->count); + refcount_inc(&io_end->count); return io_end; } From 8dd27fecede55e8a4e67eef2878040ecad0f0d33 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 8 Sep 2021 20:08:48 +0800 Subject: [PATCH 261/433] ext4: check for out-of-order index extents in ext4_valid_extent_entries() After commit 5946d089379a ("ext4: check for overlapping extents in ext4_valid_extent_entries()"), we can check out the overlapping extent entry in leaf extent blocks. But the out-of-order extent entry in index extent blocks could also trigger bad things if the filesystem is inconsistent. So this patch add a check to figure out the out-of-order index extents and return error. Signed-off-by: Zhang Yi Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20210908120850.4012324-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 15c68bc80d21..c0ad07adf9a6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -357,6 +357,9 @@ static int ext4_valid_extent_entries(struct inode *inode, ext4_fsblk_t *pblk, int depth) { unsigned short entries; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + if (eh->eh_entries == 0) return 1; @@ -365,31 +368,35 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); - ext4_lblk_t lblock = 0; - ext4_lblk_t prev = 0; - int len = 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; /* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); - len = ext4_ext_get_actual_len(ext); if ((lblock <= prev) && prev) { *pblk = ext4_ext_pblock(ext); return 0; } + prev = lblock + ext4_ext_get_actual_len(ext) - 1; ext++; entries--; - prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; + + /* Check for overlapping index extents */ + lblock = le32_to_cpu(ext_idx->ei_block); + if ((lblock <= prev) && prev) { + *pblk = ext4_idx_pblock(ext_idx); + return 0; + } ext_idx++; entries--; + prev = lblock; } } return 1; From 9c6e071913792d80894cd0be98cc3c4b770e26d3 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 8 Sep 2021 20:08:49 +0800 Subject: [PATCH 262/433] ext4: check for inconsistent extents between index and leaf block Now that we can check out overlapping extents in leaf block and out-of-order index extents in index block. But the .ee_block in the first extent of one leaf block should equal to the .ei_block in it's parent index extent entry. This patch add a check to verify such inconsistent between the index and leaf block. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20210908120850.4012324-3-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 59 +++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c0ad07adf9a6..f6a902de7f41 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -354,7 +354,8 @@ static int ext4_valid_extent_idx(struct inode *inode, static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent_header *eh, - ext4_fsblk_t *pblk, int depth) + ext4_lblk_t lblk, ext4_fsblk_t *pblk, + int depth) { unsigned short entries; ext4_lblk_t lblock = 0; @@ -368,6 +369,14 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); + + /* + * The logical block in the first entry should equal to + * the number in the index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext->ee_block)) + return 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; @@ -384,6 +393,14 @@ static int ext4_valid_extent_entries(struct inode *inode, } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); + + /* + * The logical block in the first entry should equal to + * the number in the parent index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext_idx->ei_block)) + return 0; while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; @@ -404,7 +421,7 @@ static int ext4_valid_extent_entries(struct inode *inode, static int __ext4_ext_check(const char *function, unsigned int line, struct inode *inode, struct ext4_extent_header *eh, - int depth, ext4_fsblk_t pblk) + int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk) { const char *error_msg; int max = 0, err = -EFSCORRUPTED; @@ -430,7 +447,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid eh_entries"; goto corrupted; } - if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) { + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; } @@ -460,7 +477,7 @@ corrupted: } #define ext4_ext_check(inode, eh, depth, pblk) \ - __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk)) + __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0) int ext4_ext_check_inode(struct inode *inode) { @@ -493,16 +510,18 @@ static void ext4_cache_extents(struct inode *inode, static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, - struct inode *inode, ext4_fsblk_t pblk, int depth, - int flags) + struct inode *inode, struct ext4_extent_idx *idx, + int depth, int flags) { struct buffer_head *bh; int err; gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS; + ext4_fsblk_t pblk; if (flags & EXT4_EX_NOFAIL) gfp_flags |= __GFP_NOFAIL; + pblk = ext4_idx_pblock(idx); bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -515,8 +534,8 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); + err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh), + depth, pblk, le32_to_cpu(idx->ei_block)); if (err) goto errout; set_buffer_verified(bh); @@ -534,8 +553,8 @@ errout: } -#define read_extent_tree_block(inode, pblk, depth, flags) \ - __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \ +#define read_extent_tree_block(inode, idx, depth, flags) \ + __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \ (depth), (flags)) /* @@ -585,8 +604,7 @@ int ext4_ext_precache(struct inode *inode) i--; continue; } - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx++), + bh = read_extent_tree_block(inode, path[i].p_idx++, depth - i - 1, EXT4_EX_FORCE_CACHE); if (IS_ERR(bh)) { @@ -893,8 +911,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_depth = i; path[ppos].p_ext = NULL; - bh = read_extent_tree_block(inode, path[ppos].p_block, --i, - flags); + bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags); if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; @@ -1503,7 +1520,6 @@ static int ext4_ext_search_right(struct inode *inode, struct ext4_extent_header *eh; struct ext4_extent_idx *ix; struct ext4_extent *ex; - ext4_fsblk_t block; int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len; @@ -1570,20 +1586,17 @@ got_index: * follow it and find the closest allocated * block to the right */ ix++; - block = ext4_idx_pblock(ix); while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */ - bh = read_extent_tree_block(inode, block, - path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); ix = EXT_FIRST_INDEX(eh); - block = ext4_idx_pblock(ix); put_bh(bh); } - bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); @@ -2962,9 +2975,9 @@ again: ext_debug(inode, "move to level %d (block %llu)\n", i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx), depth - i - 1, - EXT4_EX_NOCACHE); + bh = read_extent_tree_block(inode, path[i].p_idx, + depth - i - 1, + EXT4_EX_NOCACHE); if (IS_ERR(bh)) { /* should we reset i_size? */ err = PTR_ERR(bh); From 0f2f87d51aebcf71a709b52f661d681594c7dffa Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 8 Sep 2021 20:08:50 +0800 Subject: [PATCH 263/433] ext4: prevent partial update of the extent blocks In the most error path of current extents updating operations are not roll back partial updates properly when some bad things happens(.e.g in ext4_ext_insert_extent()). So we may get an inconsistent extents tree if journal has been aborted due to IO error, which may probability lead to BUGON later when we accessing these extent entries in errors=continue mode. This patch drop extent buffer's verify flag before updatng the contents in ext4_ext_get_access(), and reset it after updating in __ext4_ext_dirty(). After this patch we could force to check the extent buffer if extents tree updating was break off, make sure the extents are consistent. Signed-off-by: Zhang Yi Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20210908120850.4012324-4-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f6a902de7f41..09f56e04f4b2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -136,15 +136,25 @@ int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode, static int ext4_ext_get_access(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { + int err = 0; + if (path->p_bh) { /* path points to block */ BUFFER_TRACE(path->p_bh, "get_write_access"); - return ext4_journal_get_write_access(handle, inode->i_sb, - path->p_bh, EXT4_JTR_NONE); + err = ext4_journal_get_write_access(handle, inode->i_sb, + path->p_bh, EXT4_JTR_NONE); + /* + * The extent buffer's verified bit will be set again in + * __ext4_ext_dirty(). We could leave an inconsistent + * buffer if the extents updating procudure break off du + * to some error happens, force to check it again. + */ + if (!err) + clear_buffer_verified(path->p_bh); } /* path points to leaf/index in inode body */ /* we use in-core data, no need to protect them */ - return 0; + return err; } /* @@ -165,6 +175,9 @@ static int __ext4_ext_dirty(const char *where, unsigned int line, /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); + /* Extents updating done, re-set verified flag */ + if (!err) + set_buffer_verified(path->p_bh); } else { /* path points to leaf/index in inode body */ err = ext4_mark_inode_dirty(handle, inode); From 664bd38b9cbed11689a9b7ce8b7db2e57b7b9e23 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 1 Sep 2021 10:09:53 +0800 Subject: [PATCH 264/433] ext4: factor out ext4_fill_raw_inode() Factor out ext4_fill_raw_inode() from ext4_do_update_inode(), which is use to fill the in-mem inode contents into the inode table buffer, in preparation for initializing the exclusive inode buffer without reading the block in __ext4_get_inode_loc(). Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210901020955.1657340-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 85 +++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 38 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9097fccdc688..791958088e57 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4852,9 +4852,8 @@ bad_inode: return ERR_PTR(ret); } -static int ext4_inode_blocks_set(handle_t *handle, - struct ext4_inode *raw_inode, - struct ext4_inode_info *ei) +static int ext4_inode_blocks_set(struct ext4_inode *raw_inode, + struct ext4_inode_info *ei) { struct inode *inode = &(ei->vfs_inode); u64 i_blocks = READ_ONCE(inode->i_blocks); @@ -4957,37 +4956,16 @@ static void ext4_update_other_inodes_time(struct super_block *sb, rcu_read_unlock(); } -/* - * Post the struct inode info into an on-disk inode location in the - * buffer-cache. This gobbles the caller's reference to the - * buffer_head in the inode location struct. - * - * The caller must have write access to iloc->bh. - */ -static int ext4_do_update_inode(handle_t *handle, - struct inode *inode, - struct ext4_iloc *iloc) +static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode) { - struct ext4_inode *raw_inode = ext4_raw_inode(iloc); struct ext4_inode_info *ei = EXT4_I(inode); - struct buffer_head *bh = iloc->bh; - struct super_block *sb = inode->i_sb; - int err = 0, block; - int need_datasync = 0, set_large_file = 0; uid_t i_uid; gid_t i_gid; projid_t i_projid; + int block; + int err; - spin_lock(&ei->i_raw_lock); - - /* - * For fields not tracked in the in-memory inode, initialise them - * to zero for new inodes. - */ - if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) - memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); - - err = ext4_inode_blocks_set(handle, raw_inode, ei); + err = ext4_inode_blocks_set(raw_inode, ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); i_uid = i_uid_read(inode); @@ -5029,16 +5007,8 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_file_acl_high = cpu_to_le16(ei->i_file_acl >> 32); raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) { - ext4_isize_set(raw_inode, ei->i_disksize); - need_datasync = 1; - } - if (ei->i_disksize > 0x7fffffffULL) { - if (!ext4_has_feature_large_file(sb) || - EXT4_SB(sb)->s_es->s_rev_level == - cpu_to_le32(EXT4_GOOD_OLD_REV)) - set_large_file = 1; - } + ext4_isize_set(raw_inode, ei->i_disksize); + raw_inode->i_generation = cpu_to_le32(inode->i_generation); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { @@ -5078,6 +5048,45 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_projid = cpu_to_le32(i_projid); ext4_inode_csum_set(inode, raw_inode, ei); + return err; +} + +/* + * Post the struct inode info into an on-disk inode location in the + * buffer-cache. This gobbles the caller's reference to the + * buffer_head in the inode location struct. + * + * The caller must have write access to iloc->bh. + */ +static int ext4_do_update_inode(handle_t *handle, + struct inode *inode, + struct ext4_iloc *iloc) +{ + struct ext4_inode *raw_inode = ext4_raw_inode(iloc); + struct ext4_inode_info *ei = EXT4_I(inode); + struct buffer_head *bh = iloc->bh; + struct super_block *sb = inode->i_sb; + int err; + int need_datasync = 0, set_large_file = 0; + + spin_lock(&ei->i_raw_lock); + + /* + * For fields not tracked in the in-memory inode, initialise them + * to zero for new inodes. + */ + if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) + memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + + if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) + need_datasync = 1; + if (ei->i_disksize > 0x7fffffffULL) { + if (!ext4_has_feature_large_file(sb) || + EXT4_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT4_GOOD_OLD_REV)) + set_large_file = 1; + } + + err = ext4_fill_raw_inode(inode, raw_inode); spin_unlock(&ei->i_raw_lock); if (err) { EXT4_ERROR_INODE(inode, "corrupted inode contents"); From 9a1bf32c8e12b7768325e83e9b9eeb69c46435b3 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 1 Sep 2021 10:09:54 +0800 Subject: [PATCH 265/433] ext4: move ext4_fill_raw_inode() related functions In preparation for calling ext4_fill_raw_inode() in __ext4_get_inode_loc(), move three related functions before __ext4_get_inode_loc(), no logical change. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210901020955.1657340-3-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 293 ++++++++++++++++++++++++------------------------ 1 file changed, 147 insertions(+), 146 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 791958088e57..a46d5e022175 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4231,6 +4231,153 @@ out_trace: return err; } +static inline u64 ext4_inode_peek_iversion(const struct inode *inode) +{ + if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) + return inode_peek_iversion_raw(inode); + else + return inode_peek_iversion(inode); +} + +static int ext4_inode_blocks_set(struct ext4_inode *raw_inode, + struct ext4_inode_info *ei) +{ + struct inode *inode = &(ei->vfs_inode); + u64 i_blocks = READ_ONCE(inode->i_blocks); + struct super_block *sb = inode->i_sb; + + if (i_blocks <= ~0U) { + /* + * i_blocks can be represented in a 32 bit variable + * as multiple of 512 bytes + */ + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); + raw_inode->i_blocks_high = 0; + ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); + return 0; + } + + /* + * This should never happen since sb->s_maxbytes should not have + * allowed this, sb->s_maxbytes was set according to the huge_file + * feature in ext4_fill_super(). + */ + if (!ext4_has_feature_huge_file(sb)) + return -EFSCORRUPTED; + + if (i_blocks <= 0xffffffffffffULL) { + /* + * i_blocks can be represented in a 48 bit variable + * as multiple of 512 bytes + */ + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); + raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); + ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); + } else { + ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); + /* i_block is stored in file system block size */ + i_blocks = i_blocks >> (inode->i_blkbits - 9); + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); + raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); + } + return 0; +} + +static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + uid_t i_uid; + gid_t i_gid; + projid_t i_projid; + int block; + int err; + + err = ext4_inode_blocks_set(raw_inode, ei); + + raw_inode->i_mode = cpu_to_le16(inode->i_mode); + i_uid = i_uid_read(inode); + i_gid = i_gid_read(inode); + i_projid = from_kprojid(&init_user_ns, ei->i_projid); + if (!(test_opt(inode->i_sb, NO_UID32))) { + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); + /* + * Fix up interoperability with old kernels. Otherwise, + * old inodes get re-used with the upper 16 bits of the + * uid/gid intact. + */ + if (ei->i_dtime && list_empty(&ei->i_orphan)) { + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } else { + raw_inode->i_uid_high = + cpu_to_le16(high_16_bits(i_uid)); + raw_inode->i_gid_high = + cpu_to_le16(high_16_bits(i_gid)); + } + } else { + raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); + raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } + raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); + + EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); + EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); + + raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); + raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); + if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) + raw_inode->i_file_acl_high = + cpu_to_le16(ei->i_file_acl >> 32); + raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); + ext4_isize_set(raw_inode, ei->i_disksize); + + raw_inode->i_generation = cpu_to_le32(inode->i_generation); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + if (old_valid_dev(inode->i_rdev)) { + raw_inode->i_block[0] = + cpu_to_le32(old_encode_dev(inode->i_rdev)); + raw_inode->i_block[1] = 0; + } else { + raw_inode->i_block[0] = 0; + raw_inode->i_block[1] = + cpu_to_le32(new_encode_dev(inode->i_rdev)); + raw_inode->i_block[2] = 0; + } + } else if (!ext4_has_inline_data(inode)) { + for (block = 0; block < EXT4_N_BLOCKS; block++) + raw_inode->i_block[block] = ei->i_data[block]; + } + + if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { + u64 ivers = ext4_inode_peek_iversion(inode); + + raw_inode->i_disk_version = cpu_to_le32(ivers); + if (ei->i_extra_isize) { + if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) + raw_inode->i_version_hi = + cpu_to_le32(ivers >> 32); + raw_inode->i_extra_isize = + cpu_to_le16(ei->i_extra_isize); + } + } + + if (i_projid != EXT4_DEF_PROJID && + !ext4_has_feature_project(inode->i_sb)) + err = err ?: -EFSCORRUPTED; + + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && + EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) + raw_inode->i_projid = cpu_to_le32(i_projid); + + ext4_inode_csum_set(inode, raw_inode, ei); + return err; +} + /* * ext4_get_inode_loc returns with an extra refcount against the inode's * underlying buffer_head on success. If 'in_mem' is true, we have all @@ -4525,13 +4672,6 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val) else inode_set_iversion_queried(inode, val); } -static inline u64 ext4_inode_peek_iversion(const struct inode *inode) -{ - if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) - return inode_peek_iversion_raw(inode); - else - return inode_peek_iversion(inode); -} struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ext4_iget_flags flags, const char *function, @@ -4852,50 +4992,6 @@ bad_inode: return ERR_PTR(ret); } -static int ext4_inode_blocks_set(struct ext4_inode *raw_inode, - struct ext4_inode_info *ei) -{ - struct inode *inode = &(ei->vfs_inode); - u64 i_blocks = READ_ONCE(inode->i_blocks); - struct super_block *sb = inode->i_sb; - - if (i_blocks <= ~0U) { - /* - * i_blocks can be represented in a 32 bit variable - * as multiple of 512 bytes - */ - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); - raw_inode->i_blocks_high = 0; - ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); - return 0; - } - - /* - * This should never happen since sb->s_maxbytes should not have - * allowed this, sb->s_maxbytes was set according to the huge_file - * feature in ext4_fill_super(). - */ - if (!ext4_has_feature_huge_file(sb)) - return -EFSCORRUPTED; - - if (i_blocks <= 0xffffffffffffULL) { - /* - * i_blocks can be represented in a 48 bit variable - * as multiple of 512 bytes - */ - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); - raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); - ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); - } else { - ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); - /* i_block is stored in file system block size */ - i_blocks = i_blocks >> (inode->i_blkbits - 9); - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); - raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); - } - return 0; -} - static void __ext4_update_other_inode_time(struct super_block *sb, unsigned long orig_ino, unsigned long ino, @@ -4956,101 +5052,6 @@ static void ext4_update_other_inodes_time(struct super_block *sb, rcu_read_unlock(); } -static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - uid_t i_uid; - gid_t i_gid; - projid_t i_projid; - int block; - int err; - - err = ext4_inode_blocks_set(raw_inode, ei); - - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - i_uid = i_uid_read(inode); - i_gid = i_gid_read(inode); - i_projid = from_kprojid(&init_user_ns, ei->i_projid); - if (!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); - /* - * Fix up interoperability with old kernels. Otherwise, - * old inodes get re-used with the upper 16 bits of the - * uid/gid intact. - */ - if (ei->i_dtime && list_empty(&ei->i_orphan)) { - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; - } else { - raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(i_uid)); - raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(i_gid)); - } - } else { - raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); - raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - - EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); - EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); - EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); - EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); - if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) - raw_inode->i_file_acl_high = - cpu_to_le16(ei->i_file_acl >> 32); - raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - ext4_isize_set(raw_inode, ei->i_disksize); - - raw_inode->i_generation = cpu_to_le32(inode->i_generation); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - if (old_valid_dev(inode->i_rdev)) { - raw_inode->i_block[0] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - raw_inode->i_block[1] = 0; - } else { - raw_inode->i_block[0] = 0; - raw_inode->i_block[1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - raw_inode->i_block[2] = 0; - } - } else if (!ext4_has_inline_data(inode)) { - for (block = 0; block < EXT4_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - } - - if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - u64 ivers = ext4_inode_peek_iversion(inode); - - raw_inode->i_disk_version = cpu_to_le32(ivers); - if (ei->i_extra_isize) { - if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) - raw_inode->i_version_hi = - cpu_to_le32(ivers >> 32); - raw_inode->i_extra_isize = - cpu_to_le16(ei->i_extra_isize); - } - } - - if (i_projid != EXT4_DEF_PROJID && - !ext4_has_feature_project(inode->i_sb)) - err = err ?: -EFSCORRUPTED; - - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) - raw_inode->i_projid = cpu_to_le32(i_projid); - - ext4_inode_csum_set(inode, raw_inode, ei); - return err; -} - /* * Post the struct inode info into an on-disk inode location in the * buffer-cache. This gobbles the caller's reference to the From de01f484576d29b02fb2856387f29cfdf5ad4f19 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 1 Sep 2021 10:09:55 +0800 Subject: [PATCH 266/433] ext4: prevent getting empty inode buffer In ext4_get_inode_loc(), we may skip IO and get an zero && uptodate inode buffer when the inode monopolize an inode block for performance reason. For most cases, ext4_mark_iloc_dirty() will fill the inode buffer to make it fine, but we could miss this call if something bad happened. Finally, __ext4_get_inode_loc_noinmem() may probably get an empty inode buffer and trigger ext4 error. For example, if we remove a nonexistent xattr on inode A, ext4_xattr_set_handle() will return ENODATA before invoking ext4_mark_iloc_dirty(), it will left an uptodate but zero buffer. We will get checksum error message in ext4_iget() when getting inode again. EXT4-fs error (device sda): ext4_lookup:1784: inode #131074: comm cat: iget: checksum invalid Even worse, if we allocate another inode B at the same inode block, it will corrupt the inode A on disk when write back inode B. So this patch initialize the inode buffer by filling the in-mem inode contents if we skip read I/O, ensure that the buffer is really uptodate. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210901020955.1657340-4-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a46d5e022175..bfd3545f1e5d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4380,12 +4380,12 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode /* * ext4_get_inode_loc returns with an extra refcount against the inode's - * underlying buffer_head on success. If 'in_mem' is true, we have all - * data in memory that is needed to recreate the on-disk version of this - * inode. + * underlying buffer_head on success. If we pass 'inode' and it does not + * have in-inode xattr, we have all inode data in memory that is needed + * to recreate the on-disk version of this inode. */ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, - struct ext4_iloc *iloc, int in_mem, + struct inode *inode, struct ext4_iloc *iloc, ext4_fsblk_t *ret_block) { struct ext4_group_desc *gdp; @@ -4431,7 +4431,7 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, * is the only valid inode in the block, we need not read the * block. */ - if (in_mem) { + if (inode && !ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { struct buffer_head *bitmap_bh; int i, start; @@ -4459,8 +4459,13 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, } brelse(bitmap_bh); if (i == start + inodes_per_block) { + struct ext4_inode *raw_inode = + (struct ext4_inode *) (bh->b_data + iloc->offset); + /* all other inodes are free, so skip I/O */ memset(bh->b_data, 0, bh->b_size); + if (!ext4_test_inode_state(inode, EXT4_STATE_NEW)) + ext4_fill_raw_inode(inode, raw_inode); set_buffer_uptodate(bh); unlock_buffer(bh); goto has_buffer; @@ -4521,7 +4526,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode, ext4_fsblk_t err_blk; int ret; - ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, 0, + ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc, &err_blk); if (ret == -EIO) @@ -4536,9 +4541,8 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) ext4_fsblk_t err_blk; int ret; - /* We have all inode data except xattrs in memory here. */ - ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, - !ext4_test_inode_state(inode, EXT4_STATE_XATTR), &err_blk); + ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc, + &err_blk); if (ret == -EIO) ext4_error_inode_block(inode, err_blk, EIO, @@ -4551,7 +4555,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) int ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino, struct ext4_iloc *iloc) { - return __ext4_get_inode_loc(sb, ino, iloc, 0, NULL); + return __ext4_get_inode_loc(sb, ino, NULL, iloc, NULL); } static bool ext4_should_enable_dax(struct inode *inode) From d4ffeeb7315d82e10803e067cbf079f246b09f00 Mon Sep 17 00:00:00 2001 From: Jing Yangyang Date: Mon, 23 Aug 2021 22:55:43 -0700 Subject: [PATCH 267/433] ext4: fix boolreturn.cocci warnings in fs/ext4/name.c Return statements in functions returning bool should use true/false instead of 1/0. ./fs/ext4/namei.c:1441:12-13:WARNING:return of 0/1 in function 'ext4_match' with return type bool Reported-by: Zeal Robot Signed-off-by: Jing Yangyang Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210824055543.58718-1-deng.changcheng@zte.com.cn Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index da7698341d7d..52c9bd154122 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1439,7 +1439,7 @@ static bool ext4_match(struct inode *parent, fname->hinfo.minor_hash != EXT4_DIRENT_MINOR_HASH(de)) { - return 0; + return false; } } return !ext4_ci_compare(parent, &cf, de->name, From 3bbef91bdd2180c67407285ba160b023eb4d5306 Mon Sep 17 00:00:00 2001 From: Austin Kim Date: Tue, 24 Aug 2021 04:49:29 +0100 Subject: [PATCH 268/433] ext4: remove an unused variable warning with CONFIG_QUOTA=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'enable_quota' variable is only used in an CONFIG_QUOTA. With CONFIG_QUOTA=n, compiler causes a harmless warning: fs/ext4/super.c: In function ‘ext4_remount’: fs/ext4/super.c:5840:6: warning: variable ‘enable_quota’ set but not used [-Wunused-but-set-variable] int enable_quota = 0; ^~~~~ Move 'enable_quota' into the same #ifdef CONFIG_QUOTA block to remove an unused variable warning. Signed-off-by: Austin Kim Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210824034929.GA13415@raspberrypi Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8a67e5f3f576..160e58249482 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5726,10 +5726,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long old_sb_flags, vfs_flags; struct ext4_mount_options old_opts; - int enable_quota = 0; ext4_group_t g; int err = 0; #ifdef CONFIG_QUOTA + int enable_quota = 0; int i, j; char *to_free[EXT4_MAXQUOTAS]; #endif @@ -5934,7 +5934,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) err = -EROFS; goto restore_opts; } +#ifdef CONFIG_QUOTA enable_quota = 1; +#endif } } From afcc4e32f606dbfb47aa7309172c89174b86e74c Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 20 Aug 2021 14:08:53 +0200 Subject: [PATCH 269/433] ext4: scope ret locally in ext4_try_to_trim_range() As commit 6920b3913235 ("ext4: add new helper interface ext4_try_to_trim_range()") moves some code into the separate function ext4_try_to_trim_range(), the use of the variable ret within that function is more limited and can be adjusted as well. Scope the use of the variable ret locally and drop dead assignments. No functional change. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20210820120853.23134-1-lukas.bulwahn@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 72bfac2d6dce..215b7068f548 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6299,7 +6299,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) { ext4_grpblk_t next, count, free_count; void *bitmap; - int ret = 0; bitmap = e4b->bd_bitmap; start = (e4b->bd_info->bb_first_free > start) ? @@ -6314,10 +6313,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) next = mb_find_next_bit(bitmap, max + 1, start); if ((next - start) >= minblocks) { - ret = ext4_trim_extent(sb, start, next - start, e4b); + int ret = ext4_trim_extent(sb, start, next - start, e4b); + if (ret && ret != -EOPNOTSUPP) break; - ret = 0; count += next - start; } free_count += next - start; From 6c31a689b2e9e1dee5cbe16b773648a2d84dfb02 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Fri, 15 Oct 2021 11:25:12 -0700 Subject: [PATCH 270/433] ext4: commit inline data during fast commit During the commit phase in fast commits if an inode with inline data is being committed, also commit the inline data along with inode. Since recovery code just blindly copies entire content found in inode TLV, there is no change needed on the recovery path. Thus, this change is backward compatiable. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211015182513.395917-1-harshads@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 8ea5a81e6554..744b000d9756 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -819,7 +819,9 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc) if (ret) return ret; - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) + inode_len = EXT4_INODE_SIZE(inode->i_sb); + else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) inode_len += ei->i_extra_isize; fc_inode.fc_ino = cpu_to_le32(inode->i_ino); From 1ebf21784b19d5bc269f39a5d1eedb7f29a7d152 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Fri, 15 Oct 2021 11:25:13 -0700 Subject: [PATCH 271/433] ext4: inline data inode fast commit replay fixes Since there are no blocks in an inline data inode, there's no point in fixing iblocks field in fast commit replay path for this inode. Similarly, there's no point in fixing any block bitmaps / global block counters with respect to such an inode. Just bail out from these functions if an inline data inode is encountered. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211015182513.395917-2-harshads@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 3 +++ fs/ext4/fast_commit.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 09f56e04f4b2..0ecf819bf189 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -6071,6 +6071,9 @@ int ext4_ext_clear_bb(struct inode *inode) int j, ret = 0; struct ext4_map_blocks map; + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) + return 0; + /* Determin the size of the file first */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, EXT4_EX_NOCACHE); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 744b000d9756..0f32b445582a 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1526,7 +1526,8 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, * crashing. This should be fixed but until then, we calculate * the number of blocks the inode. */ - ext4_ext_replay_set_iblocks(inode); + if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) + ext4_ext_replay_set_iblocks(inode); inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); ext4_reset_inode_seed(inode); @@ -1844,6 +1845,10 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) } cur = 0; end = EXT_MAX_BLOCKS; + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) { + iput(inode); + continue; + } while (cur < end) { map.m_lblk = cur; map.m_len = end - cur; From 124e7c61deb27d758df5ec0521c36cf08d417f7a Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 26 Oct 2021 14:33:02 -0300 Subject: [PATCH 272/433] ext4: fix error code saved on super block during file system abort ext4_abort will eventually call ext4_errno_to_code, which translates the errno to an EXT4_ERR specific error. This means that ext4_abort expects an errno. By using EXT4_ERR_ here, it gets misinterpreted (as an errno), and ends up saving EXT4_ERR_EBUSY on the superblock during an abort, which makes no sense. ESHUTDOWN will get properly translated to EXT4_ERR_SHUTDOWN, so use that instead. Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20211026173302.84000-1-krisman@collabora.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 160e58249482..0e8406f5bf0a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5820,7 +5820,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) - ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user"); + ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); From a38bc45a08e9759f04d61669f45941d6624d173c Mon Sep 17 00:00:00 2001 From: Kleber Sacilotto de Souza Date: Mon, 1 Nov 2021 15:53:17 +0100 Subject: [PATCH 273/433] selftests/net: Fix reuseport_bpf_numa by skipping unavailable nodes In some platforms the numa node numbers are not necessarily consecutive, meaning that not all nodes from 0 to the value returned by numa_max_node() are available on the system. Using node numbers which are not available results on errors from libnuma such as: ---- IPv4 UDP ---- send node 0, receive socket 0 libnuma: Warning: Cannot read node cpumask from sysfs ./reuseport_bpf_numa: failed to pin to node: No such file or directory Fix it by checking if the node number bit is set on numa_nodes_ptr, which is defined on libnuma as "Set with all nodes the kernel has exposed to userspace". Signed-off-by: Kleber Sacilotto de Souza Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211101145317.286118-1-kleber.souza@canonical.com --- tools/testing/selftests/net/reuseport_bpf_numa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index c9f478b40996..b2eebf669b8c 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -211,12 +211,16 @@ static void test(int *rcv_fd, int len, int family, int proto) /* Forward iterate */ for (node = 0; node < len; ++node) { + if (!numa_bitmask_isbitset(numa_nodes_ptr, node)) + continue; send_from_node(node, family, proto); receive_on_node(rcv_fd, len, epfd, node, proto); } /* Reverse iterate */ for (node = len - 1; node >= 0; --node) { + if (!numa_bitmask_isbitset(numa_nodes_ptr, node)) + continue; send_from_node(node, family, proto); receive_on_node(rcv_fd, len, epfd, node, proto); } From 20b02fe36530a1e48dde73c80b882ae276346ea3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Nov 2021 22:12:56 +0000 Subject: [PATCH 274/433] arm64: cpufeature: Export this_cpu_has_cap helper Export the this_cpu_has_cap() for use by modules. This is used by TRBE driver. Without this patch, TRBE will fail to build as a module : ERROR: modpost: "this_cpu_has_cap" [drivers/hwtracing/coresight/coresight-trbe.ko] undefined! Fixes: 8a1065127d95 ("coresight: trbe: Add infrastructure for Errata handling") Cc: Will Deacon Cc: Catalin Marinas Cc: Mathieu Poirier Cc: Anshuman Khandual Signed-off-by: Arnd Bergmann [ change to EXPORT_SYMBOL_GPL ] Acked-by: Catalin Marinas Signed-off-by: Suzuki K Poulose [ Added Will AB tag] Acked-by: Will Deacon Link: https://lore.kernel.org/r/20211103221256.725080-1-suzuki.poulose@arm.com Signed-off-by: Mathieu Poirier --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6ec7036ef7e1..1ffda21c9d36 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2843,6 +2843,7 @@ bool this_cpu_has_cap(unsigned int n) return false; } +EXPORT_SYMBOL_GPL(this_cpu_has_cap); /* * This helper function is used in a narrow window when, From aeb58c860dc516794fdf7ff89d96ead2644d5889 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 2 Nov 2021 03:52:36 -0700 Subject: [PATCH 275/433] thermal/drivers/int340x: processor_thermal: Suppot 64 bit RFIM responses Some of the RFIM mail box command returns 64 bit values. So enhance mailbox interface to return 64 bit values and use them for RFIM commands. Signed-off-by: Srinivas Pandruvada Fixes: 5d6fbc96bd36 ("thermal/drivers/int340x: processor_thermal: Export additional attributes") Signed-off-by: Rafael J. Wysocki --- .../processor_thermal_device.h | 2 +- .../int340x_thermal/processor_thermal_mbox.c | 22 +++++++++++-------- .../int340x_thermal/processor_thermal_rfim.c | 10 ++++----- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h index c1d8de6dc3d1..be27f633e40a 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h @@ -80,7 +80,7 @@ void proc_thermal_rfim_remove(struct pci_dev *pdev); int proc_thermal_mbox_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv); void proc_thermal_mbox_remove(struct pci_dev *pdev); -int processor_thermal_send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u32 *cmd_resp); +int processor_thermal_send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u64 *cmd_resp); int proc_thermal_add(struct device *dev, struct proc_thermal_device *priv); void proc_thermal_remove(struct proc_thermal_device *proc_priv); int proc_thermal_suspend(struct device *dev); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c index 59e93b04f0a9..a86521973dad 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c @@ -23,7 +23,7 @@ static DEFINE_MUTEX(mbox_lock); -static int send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u32 *cmd_resp) +static int send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u64 *cmd_resp) { struct proc_thermal_device *proc_priv; u32 retries, data; @@ -68,12 +68,16 @@ static int send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u32 *cm goto unlock_mbox; } - if (cmd_id == MBOX_CMD_WORKLOAD_TYPE_READ) { - data = readl((void __iomem *) (proc_priv->mmio_base + MBOX_OFFSET_DATA)); - *cmd_resp = data & 0xff; - } - ret = 0; + + if (!cmd_resp) + break; + + if (cmd_id == MBOX_CMD_WORKLOAD_TYPE_READ) + *cmd_resp = readl((void __iomem *) (proc_priv->mmio_base + MBOX_OFFSET_DATA)); + else + *cmd_resp = readq((void __iomem *) (proc_priv->mmio_base + MBOX_OFFSET_DATA)); + break; } while (--retries); @@ -82,7 +86,7 @@ unlock_mbox: return ret; } -int processor_thermal_send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u32 *cmd_resp) +int processor_thermal_send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u64 *cmd_resp) { return send_mbox_cmd(pdev, cmd_id, cmd_data, cmd_resp); } @@ -153,7 +157,7 @@ static ssize_t workload_type_show(struct device *dev, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); - u32 cmd_resp; + u64 cmd_resp; int ret; ret = send_mbox_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, 0, &cmd_resp); @@ -187,7 +191,7 @@ static bool workload_req_created; int proc_thermal_mbox_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv) { - u32 cmd_resp; + u64 cmd_resp; int ret; /* Check if there is a mailbox support, if fails return success */ diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c index 2b8a3235d518..b25b54d4bac1 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c @@ -195,7 +195,7 @@ static ssize_t rfi_restriction_store(struct device *dev, const char *buf, size_t count) { u16 cmd_id = 0x0008; - u32 cmd_resp; + u64 cmd_resp; u32 input; int ret; @@ -215,14 +215,14 @@ static ssize_t rfi_restriction_show(struct device *dev, char *buf) { u16 cmd_id = 0x0007; - u32 cmd_resp; + u64 cmd_resp; int ret; ret = processor_thermal_send_mbox_cmd(to_pci_dev(dev), cmd_id, 0, &cmd_resp); if (ret) return ret; - return sprintf(buf, "%u\n", cmd_resp); + return sprintf(buf, "%llu\n", cmd_resp); } static ssize_t ddr_data_rate_show(struct device *dev, @@ -230,14 +230,14 @@ static ssize_t ddr_data_rate_show(struct device *dev, char *buf) { u16 cmd_id = 0x0107; - u32 cmd_resp; + u64 cmd_resp; int ret; ret = processor_thermal_send_mbox_cmd(to_pci_dev(dev), cmd_id, 0, &cmd_resp); if (ret) return ret; - return sprintf(buf, "%u\n", cmd_resp); + return sprintf(buf, "%llu\n", cmd_resp); } static DEVICE_ATTR_RW(rfi_restriction); From 17f09d3f619a7ad2d2b021b4e5246f08225b1b0f Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 28 Oct 2021 15:17:41 -0400 Subject: [PATCH 276/433] SUNRPC: Check if the xprt is connected before handling sysfs reads xprts don't immediately reconnect when changing the "dstaddr" property, instead this gets handled the next time an operation uses the transport. This could lead to NULL pointer dereferences when trying to read sysfs files between the disconnect and reconnect operations. Fix this by returning an error if the xprt is not connected. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/sysfs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 9a6f17e18f73..2766dd21935b 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -109,8 +109,10 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, struct sock_xprt *sock; ssize_t ret = -1; - if (!xprt) - return 0; + if (!xprt || !xprt_connected(xprt)) { + xprt_put(xprt); + return -ENOTCONN; + } sock = container_of(xprt, struct sock_xprt, xprt); if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) @@ -129,8 +131,10 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); ssize_t ret; - if (!xprt) - return 0; + if (!xprt || !xprt_connected(xprt)) { + xprt_put(xprt); + return -ENOTCONN; + } ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" From f628d462b366a160c181e89279f15295fad62f59 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 4 Nov 2021 10:57:08 -0400 Subject: [PATCH 277/433] NFSv4.2 add tracepoint to SEEK Add a tracepoint to the SEEK operation. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 1 + fs/nfs/nfs4trace.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index a24349512ffe..87c0dcb8823b 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -678,6 +678,7 @@ static loff_t _nfs42_proc_llseek(struct file *filep, status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + trace_nfs4_llseek(inode, &args, &res, status); if (status == -ENOTSUPP) server->caps &= ~NFS_CAP_SEEK; if (status) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 18f149f72160..823ac436a1da 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2054,6 +2054,80 @@ TRACE_EVENT(ff_layout_commit_error, ) ); +TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA); +TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE); + +#define show_llseek_mode(what) \ + __print_symbolic(what, \ + { NFS4_CONTENT_DATA, "DATA" }, \ + { NFS4_CONTENT_HOLE, "HOLE" }) + +#ifdef CONFIG_NFS_V4_2 +TRACE_EVENT(nfs4_llseek, + TP_PROTO( + const struct inode *inode, + const struct nfs42_seek_args *args, + const struct nfs42_seek_res *res, + int error + ), + + TP_ARGS(inode, args, res, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, fhandle) + __field(u32, fileid) + __field(dev_t, dev) + __field(int, stateid_seq) + __field(u32, stateid_hash) + __field(loff_t, offset_s) + __field(u32, what) + __field(loff_t, offset_r) + __field(u32, eof) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = args->sa_fh; + + __entry->fileid = nfsi->fileid; + __entry->dev = inode->i_sb->s_dev; + __entry->fhandle = nfs_fhandle_hash(fh); + __entry->offset_s = args->sa_offset; + __entry->stateid_seq = + be32_to_cpu(args->sa_stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&args->sa_stateid); + __entry->what = args->sa_what; + if (error) { + __entry->error = -error; + __entry->offset_r = 0; + __entry->eof = 0; + } else { + __entry->error = 0; + __entry->offset_r = res->sr_offset; + __entry->eof = res->sr_eof; + } + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "stateid=%d:0x%08x offset_s=%llu what=%s " + "offset_r=%llu eof=%u", + -__entry->error, + show_nfs4_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash, + __entry->offset_s, + show_llseek_mode(__entry->what), + __entry->offset_r, + __entry->eof + ) +); + +#endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_1 */ From 40a8241771a77e479146e0205112f88241571822 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 4 Nov 2021 10:57:09 -0400 Subject: [PATCH 278/433] NFSv4.2 add tracepoints to FALLOCATE and DEALLOCATE Add a tracepoint to the FALLOCATE/DEALLOCATE operations. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 4 ++++ fs/nfs/nfs4trace.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 87c0dcb8823b..c36824888601 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -83,6 +83,10 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, status = nfs_post_op_update_inode_force_wcc(inode, res.falloc_fattr); + if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE]) + trace_nfs4_fallocate(inode, &args, status); + else + trace_nfs4_deallocate(inode, &args, status); kfree(res.falloc_fattr); return status; } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 823ac436a1da..a88464238b88 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2127,6 +2127,62 @@ TRACE_EVENT(nfs4_llseek, ) ); +DECLARE_EVENT_CLASS(nfs4_sparse_event, + TP_PROTO( + const struct inode *inode, + const struct nfs42_falloc_args *args, + int error + ), + + TP_ARGS(inode, args, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(loff_t, offset) + __field(loff_t, len) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(int, stateid_seq) + __field(u32, stateid_hash) + ), + + TP_fast_assign( + __entry->error = error < 0 ? -error : 0; + __entry->offset = args->falloc_offset; + __entry->len = args->falloc_length; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->stateid_seq = + be32_to_cpu(args->falloc_stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&args->falloc_stateid); + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "stateid=%d:0x%08x offset=%llu len=%llu", + -__entry->error, + show_nfs4_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash, + (long long)__entry->offset, + (long long)__entry->len + ) +); +#define DEFINE_NFS4_SPARSE_EVENT(name) \ + DEFINE_EVENT(nfs4_sparse_event, name, \ + TP_PROTO( \ + const struct inode *inode, \ + const struct nfs42_falloc_args *args, \ + int error \ + ), \ + TP_ARGS(inode, args, error)) +DEFINE_NFS4_SPARSE_EVENT(nfs4_fallocate); +DEFINE_NFS4_SPARSE_EVENT(nfs4_deallocate); #endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_1 */ From ce7cea1ba72ef2ba91bcb28ab441597405d6d705 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 4 Nov 2021 10:57:10 -0400 Subject: [PATCH 279/433] NFSv4.2 add tracepoint to COPY Add a tracepoint to the COPY operation. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 1 + fs/nfs/nfs4trace.h | 107 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index c36824888601..a072cdaf7bdc 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -367,6 +367,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = nfs4_call_sync(dst_server->client, dst_server, &msg, &args->seq_args, &res->seq_res, 0); + trace_nfs4_copy(src_inode, dst_inode, args, res, nss, status); if (status == -ENOTSUPP) dst_server->caps &= ~NFS_CAP_COPY; if (status) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index a88464238b88..bdccaec504d8 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2183,6 +2183,113 @@ DECLARE_EVENT_CLASS(nfs4_sparse_event, TP_ARGS(inode, args, error)) DEFINE_NFS4_SPARSE_EVENT(nfs4_fallocate); DEFINE_NFS4_SPARSE_EVENT(nfs4_deallocate); + +TRACE_EVENT(nfs4_copy, + TP_PROTO( + const struct inode *src_inode, + const struct inode *dst_inode, + const struct nfs42_copy_args *args, + const struct nfs42_copy_res *res, + const struct nl4_server *nss, + int error + ), + + TP_ARGS(src_inode, dst_inode, args, res, nss, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, src_fhandle) + __field(u32, src_fileid) + __field(u32, dst_fhandle) + __field(u32, dst_fileid) + __field(dev_t, src_dev) + __field(dev_t, dst_dev) + __field(int, src_stateid_seq) + __field(u32, src_stateid_hash) + __field(int, dst_stateid_seq) + __field(u32, dst_stateid_hash) + __field(loff_t, src_offset) + __field(loff_t, dst_offset) + __field(bool, sync) + __field(loff_t, len) + __field(int, res_stateid_seq) + __field(u32, res_stateid_hash) + __field(loff_t, res_count) + __field(bool, res_sync) + __field(bool, res_cons) + __field(bool, intra) + ), + + TP_fast_assign( + const struct nfs_inode *src_nfsi = NFS_I(src_inode); + const struct nfs_inode *dst_nfsi = NFS_I(dst_inode); + + __entry->src_fileid = src_nfsi->fileid; + __entry->src_dev = src_inode->i_sb->s_dev; + __entry->src_fhandle = nfs_fhandle_hash(args->src_fh); + __entry->src_offset = args->src_pos; + __entry->dst_fileid = dst_nfsi->fileid; + __entry->dst_dev = dst_inode->i_sb->s_dev; + __entry->dst_fhandle = nfs_fhandle_hash(args->dst_fh); + __entry->dst_offset = args->dst_pos; + __entry->len = args->count; + __entry->sync = args->sync; + __entry->src_stateid_seq = + be32_to_cpu(args->src_stateid.seqid); + __entry->src_stateid_hash = + nfs_stateid_hash(&args->src_stateid); + __entry->dst_stateid_seq = + be32_to_cpu(args->dst_stateid.seqid); + __entry->dst_stateid_hash = + nfs_stateid_hash(&args->dst_stateid); + __entry->intra = nss ? 0 : 1; + if (error) { + __entry->error = -error; + __entry->res_stateid_seq = 0; + __entry->res_stateid_hash = 0; + __entry->res_count = 0; + __entry->res_sync = 0; + __entry->res_cons = 0; + } else { + __entry->error = 0; + __entry->res_stateid_seq = + be32_to_cpu(res->write_res.stateid.seqid); + __entry->res_stateid_hash = + nfs_stateid_hash(&res->write_res.stateid); + __entry->res_count = res->write_res.count; + __entry->res_sync = res->synchronous; + __entry->res_cons = res->consecutive; + } + ), + + TP_printk( + "error=%ld (%s) intra=%d src_fileid=%02x:%02x:%llu " + "src_fhandle=0x%08x dst_fileid=%02x:%02x:%llu " + "dst_fhandle=0x%08x src_stateid=%d:0x%08x " + "dst_stateid=%d:0x%08x src_offset=%llu dst_offset=%llu " + "len=%llu sync=%d cb_stateid=%d:0x%08x res_sync=%d " + "res_cons=%d res_count=%llu", + -__entry->error, + show_nfs4_status(__entry->error), + __entry->intra, + MAJOR(__entry->src_dev), MINOR(__entry->src_dev), + (unsigned long long)__entry->src_fileid, + __entry->src_fhandle, + MAJOR(__entry->dst_dev), MINOR(__entry->dst_dev), + (unsigned long long)__entry->dst_fileid, + __entry->dst_fhandle, + __entry->src_stateid_seq, __entry->src_stateid_hash, + __entry->dst_stateid_seq, __entry->dst_stateid_hash, + __entry->src_offset, + __entry->dst_offset, + __entry->len, + __entry->sync, + __entry->res_stateid_seq, __entry->res_stateid_hash, + __entry->res_sync, + __entry->res_cons, + __entry->res_count + ) +); #endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_1 */ From 2a65ca8b5850914e2ca67a0b8b8140bf7297353a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 4 Nov 2021 10:57:11 -0400 Subject: [PATCH 280/433] NFSv4.2 add tracepoint to CLONE Add a tracepoint to the CLONE operation. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 1 + fs/nfs/nfs4trace.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index a072cdaf7bdc..d3d9ea71702f 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1077,6 +1077,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, status = nfs4_call_sync(server->client, server, msg, &args.seq_args, &res.seq_res, 0); + trace_nfs4_clone(src_inode, dst_inode, &args, status); if (status == 0) { nfs42_copy_dest_done(dst_inode, dst_offset, count); status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index bdccaec504d8..af7e59aa9265 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2290,6 +2290,79 @@ TRACE_EVENT(nfs4_copy, __entry->res_count ) ); + +TRACE_EVENT(nfs4_clone, + TP_PROTO( + const struct inode *src_inode, + const struct inode *dst_inode, + const struct nfs42_clone_args *args, + int error + ), + + TP_ARGS(src_inode, dst_inode, args, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, src_fhandle) + __field(u32, src_fileid) + __field(u32, dst_fhandle) + __field(u32, dst_fileid) + __field(dev_t, src_dev) + __field(dev_t, dst_dev) + __field(loff_t, src_offset) + __field(loff_t, dst_offset) + __field(int, src_stateid_seq) + __field(u32, src_stateid_hash) + __field(int, dst_stateid_seq) + __field(u32, dst_stateid_hash) + __field(loff_t, len) + ), + + TP_fast_assign( + const struct nfs_inode *src_nfsi = NFS_I(src_inode); + const struct nfs_inode *dst_nfsi = NFS_I(dst_inode); + + __entry->src_fileid = src_nfsi->fileid; + __entry->src_dev = src_inode->i_sb->s_dev; + __entry->src_fhandle = nfs_fhandle_hash(args->src_fh); + __entry->src_offset = args->src_offset; + __entry->dst_fileid = dst_nfsi->fileid; + __entry->dst_dev = dst_inode->i_sb->s_dev; + __entry->dst_fhandle = nfs_fhandle_hash(args->dst_fh); + __entry->dst_offset = args->dst_offset; + __entry->len = args->count; + __entry->error = error < 0 ? -error : 0; + __entry->src_stateid_seq = + be32_to_cpu(args->src_stateid.seqid); + __entry->src_stateid_hash = + nfs_stateid_hash(&args->src_stateid); + __entry->dst_stateid_seq = + be32_to_cpu(args->dst_stateid.seqid); + __entry->dst_stateid_hash = + nfs_stateid_hash(&args->dst_stateid); + ), + + TP_printk( + "error=%ld (%s) src_fileid=%02x:%02x:%llu " + "src_fhandle=0x%08x dst_fileid=%02x:%02x:%llu " + "dst_fhandle=0x%08x src_stateid=%d:0x%08x " + "dst_stateid=%d:0x%08x src_offset=%llu " + "dst_offset=%llu len=%llu", + -__entry->error, + show_nfs4_status(__entry->error), + MAJOR(__entry->src_dev), MINOR(__entry->src_dev), + (unsigned long long)__entry->src_fileid, + __entry->src_fhandle, + MAJOR(__entry->dst_dev), MINOR(__entry->dst_dev), + (unsigned long long)__entry->dst_fileid, + __entry->dst_fhandle, + __entry->src_stateid_seq, __entry->src_stateid_hash, + __entry->dst_stateid_seq, __entry->dst_stateid_hash, + __entry->src_offset, + __entry->dst_offset, + __entry->len + ) +); #endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_1 */ From 8db744ce45ee31d26bd2776070f31121b009f27e Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 4 Nov 2021 10:57:12 -0400 Subject: [PATCH 281/433] NFSv4.2 add tracepoint to CB_OFFLOAD Add a tracepoint to the CB_OFFLOAD operation. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 3 +++ fs/nfs/nfs4trace.h | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ed9d580826f5..09c5b1cb3e07 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -739,6 +739,9 @@ out: kfree(copy); spin_unlock(&cps->clp->cl_lock); + trace_nfs4_cb_offload(&args->coa_fh, &args->coa_stateid, + args->wr_count, args->error, + args->wr_writeverf.committed); return 0; } #endif /* CONFIG_NFS_V4_2 */ diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index af7e59aa9265..f971e38a6e3a 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -200,6 +200,50 @@ TRACE_EVENT(nfs4_cb_seqid_err, ) ); +TRACE_EVENT(nfs4_cb_offload, + TP_PROTO( + const struct nfs_fh *cb_fh, + const nfs4_stateid *cb_stateid, + uint64_t cb_count, + int cb_error, + int cb_how_stable + ), + + TP_ARGS(cb_fh, cb_stateid, cb_count, cb_error, + cb_how_stable), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, fhandle) + __field(loff_t, cb_count) + __field(int, cb_how) + __field(int, cb_stateid_seq) + __field(u32, cb_stateid_hash) + ), + + TP_fast_assign( + __entry->error = cb_error < 0 ? -cb_error : 0; + __entry->fhandle = nfs_fhandle_hash(cb_fh); + __entry->cb_stateid_seq = + be32_to_cpu(cb_stateid->seqid); + __entry->cb_stateid_hash = + nfs_stateid_hash(cb_stateid); + __entry->cb_count = cb_count; + __entry->cb_how = cb_how_stable; + ), + + TP_printk( + "error=%ld (%s) fhandle=0x%08x cb_stateid=%d:0x%08x " + "cb_count=%llu cb_how=%s", + -__entry->error, + show_nfs4_status(__entry->error), + __entry->fhandle, + __entry->cb_stateid_seq, __entry->cb_stateid_hash, + __entry->cb_count, + show_nfs_stable_how(__entry->cb_how) + ) +); + #endif /* CONFIG_NFS_V4_1 */ TRACE_EVENT(nfs4_setup_sequence, From 488b170c7d78bae2ea493a14d71ac6566f4a0afa Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 4 Nov 2021 10:57:13 -0400 Subject: [PATCH 282/433] NFSv4.2 add tracepoint to COPY_NOTIFY Add a tracepoint to COPY_NOTIFY operation. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 1 + fs/nfs/nfs4trace.h | 58 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index d3d9ea71702f..7c7399b10050 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -603,6 +603,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, status = nfs4_call_sync(src_server->client, src_server, &msg, &args->cna_seq_args, &res->cnr_seq_res, 0); + trace_nfs4_copy_notify(file_inode(src), args, res, status); if (status == -ENOTSUPP) src_server->caps &= ~NFS_CAP_COPY_NOTIFY; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index f971e38a6e3a..f337a25c67b3 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -243,7 +243,6 @@ TRACE_EVENT(nfs4_cb_offload, show_nfs_stable_how(__entry->cb_how) ) ); - #endif /* CONFIG_NFS_V4_1 */ TRACE_EVENT(nfs4_setup_sequence, @@ -2407,6 +2406,63 @@ TRACE_EVENT(nfs4_clone, __entry->len ) ); + +TRACE_EVENT(nfs4_copy_notify, + TP_PROTO( + const struct inode *inode, + const struct nfs42_copy_notify_args *args, + const struct nfs42_copy_notify_res *res, + int error + ), + + TP_ARGS(inode, args, res, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, fhandle) + __field(u32, fileid) + __field(dev_t, dev) + __field(int, stateid_seq) + __field(u32, stateid_hash) + __field(int, res_stateid_seq) + __field(u32, res_stateid_hash) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->fileid = nfsi->fileid; + __entry->dev = inode->i_sb->s_dev; + __entry->fhandle = nfs_fhandle_hash(args->cna_src_fh); + __entry->stateid_seq = + be32_to_cpu(args->cna_src_stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&args->cna_src_stateid); + if (error) { + __entry->error = -error; + __entry->res_stateid_seq = 0; + __entry->res_stateid_hash = 0; + } else { + __entry->error = 0; + __entry->res_stateid_seq = + be32_to_cpu(res->cnr_stateid.seqid); + __entry->res_stateid_hash = + nfs_stateid_hash(&res->cnr_stateid); + } + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "stateid=%d:0x%08x res_stateid=%d:0x%08x", + -__entry->error, + show_nfs4_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash, + __entry->res_stateid_seq, __entry->res_stateid_hash + ) +); #endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_1 */ From 127becabad7bef13b2e2ab795953cf8e9cce8aab Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 4 Nov 2021 10:57:14 -0400 Subject: [PATCH 283/433] NFSv4.2 add tracepoint to OFFLOAD_CANCEL Add tracepoint to OFFLOAD_CANCEL operation. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 1 + fs/nfs/nfs4trace.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 7c7399b10050..08355b66e7cb 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -509,6 +509,7 @@ static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata) { struct nfs42_offloadcancel_data *data = calldata; + trace_nfs4_offload_cancel(&data->args, task->tk_status); nfs41_sequence_done(task, &data->res.osr_seq_res); if (task->tk_status && nfs4_async_handle_error(task, data->seq_server, NULL, diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index f337a25c67b3..6ee6ad3674a2 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2463,6 +2463,39 @@ TRACE_EVENT(nfs4_copy_notify, __entry->res_stateid_seq, __entry->res_stateid_hash ) ); + +TRACE_EVENT(nfs4_offload_cancel, + TP_PROTO( + const struct nfs42_offload_status_args *args, + int error + ), + + TP_ARGS(args, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, fhandle) + __field(int, stateid_seq) + __field(u32, stateid_hash) + ), + + TP_fast_assign( + __entry->fhandle = nfs_fhandle_hash(args->osa_src_fh); + __entry->error = error < 0 ? -error : 0; + __entry->stateid_seq = + be32_to_cpu(args->osa_stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&args->osa_stateid); + ), + + TP_printk( + "error=%ld (%s) fhandle=0x%08x stateid=%d:0x%08x", + -__entry->error, + show_nfs4_status(__entry->error), + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash + ) +); #endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_1 */ From 96d0c9be432dfd4908e96dde7cab860368a348ab Mon Sep 17 00:00:00 2001 From: Guo Zhengkui Date: Wed, 3 Nov 2021 20:16:06 +0800 Subject: [PATCH 284/433] devlink: fix flexible_array.cocci warning Fix following coccicheck warning: ./net/core/devlink.c:69:6-10: WARNING use flexible-array member instead Signed-off-by: Guo Zhengkui Link: https://lore.kernel.org/r/20211103121607.27490-1-guozhengkui@vivo.com Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 6b5ee862429e..5ba4f9434acd 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -66,7 +66,7 @@ struct devlink { u8 reload_failed:1; refcount_t refcount; struct completion comp; - char priv[0] __aligned(NETDEV_ALIGN); + char priv[] __aligned(NETDEV_ALIGN); }; void *devlink_priv(struct devlink *devlink) From d896ba8300ebd09e5fc9c43051d9eb36b65e996e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Oct 2021 12:26:17 -0400 Subject: [PATCH 285/433] SUNRPC: Fix races when closing the socket Ensure that we bump the xprt->connect_cookie when we set the XPRT_CLOSE_WAIT flag so that another call to xprt_conditional_disconnect() won't race with the reconnection. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 2 ++ net/sunrpc/xprtsock.c | 1 + 2 files changed, 3 insertions(+) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 48560188e84d..691fe5a682b6 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -735,6 +735,8 @@ static void xprt_autoclose(struct work_struct *work) unsigned int pflags = memalloc_nofs_save(); trace_xprt_disconnect_auto(xprt); + xprt->connect_cookie++; + smp_mb__before_atomic(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt->ops->close(xprt); xprt_release_write(xprt, NULL); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index aa293e4a77fa..7fb302e202bc 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1134,6 +1134,7 @@ static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) { + xprt->connect_cookie++; smp_mb__before_atomic(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); From 3be232f11a3cc9b0ef0795e39fa11bdb8e422a06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Oct 2021 18:01:07 -0400 Subject: [PATCH 286/433] SUNRPC: Prevent immediate close+reconnect If we have already set up the socket and are waiting for it to connect, then don't immediately close and retry. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 3 ++- net/sunrpc/xprtsock.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 691fe5a682b6..a02de2bddb28 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -767,7 +767,8 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done); */ static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt) { - set_bit(XPRT_CLOSE_WAIT, &xprt->state); + if (test_and_set_bit(XPRT_CLOSE_WAIT, &xprt->state)) + return; if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(xprtiod_workqueue, &xprt->task_cleanup); else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7fb302e202bc..ae48c9c84ee1 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2314,7 +2314,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - if (transport->sock != NULL) { + if (transport->sock != NULL && !xprt_connecting(xprt)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); From a4db9055fdb9cf607775c66d39796caf6439ec92 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 3 Nov 2021 22:08:28 +0100 Subject: [PATCH 287/433] net: phy: fix duplex out of sync problem while changing settings As reported by Zhang there's a small issue if in forced mode the duplex mode changes with the link staying up [0]. In this case the MAC isn't notified about the change. The proposed patch relies on the phylib state machine and ignores the fact that there are drivers that uses phylib but not the phylib state machine. So let's don't change the behavior for such drivers and fix it w/o re-adding state PHY_FORCING for the case that phylib state machine is used. [0] https://lore.kernel.org/netdev/a5c26ffd-4ee4-a5e6-4103-873208ce0dc5@huawei.com/T/ Fixes: 2bd229df5e2e ("net: phy: remove state PHY_FORCING") Reported-by: Zhang Changzhong Tested-by: Zhang Changzhong Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/7b8b9456-a93f-abbc-1dc5-a2c2542f932c@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a3bfb156c83d..beb2b66da132 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -815,7 +815,12 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; /* Restart the PHY */ - _phy_start_aneg(phydev); + if (phy_is_started(phydev)) { + phydev->state = PHY_UP; + phy_trigger_machine(phydev); + } else { + _phy_start_aneg(phydev); + } mutex_unlock(&phydev->lock); return 0; From 1e4b50f06d970d8da3474d2a0354450416710bda Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Wed, 3 Nov 2021 20:09:42 +0100 Subject: [PATCH 288/433] mctp: handle the struct sockaddr_mctp padding fields In order to have the padding fields actually usable in the future, there have to be checks that user space doesn't supply non-zero garbage there. It is also worth setting these padding fields to zero, unless it is known that they have been already zeroed. Cc: stable@vger.kernel.org # v5.15 Fixes: 5a20dd46b8b84593 ("mctp: Be explicit about struct sockaddr_mctp padding") Signed-off-by: Eugene Syromiatnikov Acked-by: Jeremy Kerr Signed-off-by: Jakub Kicinski --- net/mctp/af_mctp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index d344b02a1cde..bc88159f8844 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -33,6 +33,12 @@ static int mctp_release(struct socket *sock) return 0; } +/* Generic sockaddr checks, padding checks only so far */ +static bool mctp_sockaddr_is_ok(const struct sockaddr_mctp *addr) +{ + return !addr->__smctp_pad0 && !addr->__smctp_pad1; +} + static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { struct sock *sk = sock->sk; @@ -52,6 +58,9 @@ static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) /* it's a valid sockaddr for MCTP, cast and do protocol checks */ smctp = (struct sockaddr_mctp *)addr; + if (!mctp_sockaddr_is_ok(smctp)) + return -EINVAL; + lock_sock(sk); /* TODO: allow rebind */ @@ -87,6 +96,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) return -EINVAL; if (addr->smctp_family != AF_MCTP) return -EINVAL; + if (!mctp_sockaddr_is_ok(addr)) + return -EINVAL; if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER)) return -EINVAL; @@ -198,11 +209,13 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, addr = msg->msg_name; addr->smctp_family = AF_MCTP; + addr->__smctp_pad0 = 0; addr->smctp_network = cb->net; addr->smctp_addr.s_addr = hdr->src; addr->smctp_type = type; addr->smctp_tag = hdr->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); + addr->__smctp_pad1 = 0; msg->msg_namelen = sizeof(*addr); if (msk->addr_ext) { From e9ea574ec1c27e555e7f78cbbcd28af91889d529 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Wed, 3 Nov 2021 20:09:46 +0100 Subject: [PATCH 289/433] mctp: handle the struct sockaddr_mctp_ext padding field struct sockaddr_mctp_ext.__smctp_paddin0 has to be checked for being set to zero, otherwise it cannot be utilised in the future. Fixes: 99ce45d5e7dbde39 ("mctp: Implement extended addressing") Signed-off-by: Eugene Syromiatnikov Acked-by: Jeremy Kerr Signed-off-by: Jakub Kicinski --- net/mctp/af_mctp.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index bc88159f8844..871cf6266125 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -39,6 +39,13 @@ static bool mctp_sockaddr_is_ok(const struct sockaddr_mctp *addr) return !addr->__smctp_pad0 && !addr->__smctp_pad1; } +static bool mctp_sockaddr_ext_is_ok(const struct sockaddr_mctp_ext *addr) +{ + return !addr->__smctp_pad0[0] && + !addr->__smctp_pad0[1] && + !addr->__smctp_pad0[2]; +} + static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { struct sock *sk = sock->sk; @@ -135,7 +142,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, extaddr, msg->msg_name); - if (extaddr->smctp_halen > sizeof(cb->haddr)) { + if (!mctp_sockaddr_ext_is_ok(extaddr) || + extaddr->smctp_halen > sizeof(cb->haddr)) { rc = -EINVAL; goto err_free; } @@ -224,6 +232,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(*ae); ae->smctp_ifindex = cb->ifindex; ae->smctp_halen = cb->halen; + memset(ae->__smctp_pad0, 0x0, sizeof(ae->__smctp_pad0)); memset(ae->smctp_haddr, 0x0, sizeof(ae->smctp_haddr)); memcpy(ae->smctp_haddr, cb->haddr, cb->halen); } From 0a55457c7c37642118e0973816286566be19240d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Nov 2021 16:51:12 +0100 Subject: [PATCH 290/433] Revert "xhci: Set HCD flag to defer primary roothub registration" This reverts commit b7a0a792f864583207c593b50fd1b752ed89f4c1. It has been reported to be causing problems in Arch and Fedora bug reports. Reported-by: Hans de Goede Link: https://bbs.archlinux.org/viewtopic.php?pid=2000956#p2000956 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2019542 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2019576 Link: https://lore.kernel.org/r/42bcbea6-5eb8-16c7-336a-2cb72e71bc36@redhat.com Cc: Mathias Nyman Cc: Chris Chiu Cc: Alan Stern Cc: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 541fe4dcc43a..902f410874e8 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -692,7 +692,6 @@ int xhci_run(struct usb_hcd *hcd) if (ret) xhci_free_command(xhci, command); } - set_bit(HCD_FLAG_DEFER_RH_REGISTER, &hcd->flags); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Finished xhci_run for USB2 roothub"); From 439b08c57c3fe1df85cfe9d00accdf9b62cb3275 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Nov 2021 16:51:36 +0100 Subject: [PATCH 291/433] Revert "usb: core: hcd: Add support for deferring roothub registration" This reverts commit 58877b0824da15698bd85a0a9dbfa8c354e6ecb7. It has been reported to be causing problems in Arch and Fedora bug reports. Reported-by: Hans de Goede Link: https://bbs.archlinux.org/viewtopic.php?pid=2000956#p2000956 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2019542 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2019576 Link: https://lore.kernel.org/r/42bcbea6-5eb8-16c7-336a-2cb72e71bc36@redhat.com Cc: Mathias Nyman Cc: Chris Chiu Cc: Alan Stern Cc: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 29 ++++++----------------------- include/linux/usb/hcd.h | 2 -- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index a3311e937847..4d326ee12c36 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2795,7 +2795,6 @@ int usb_add_hcd(struct usb_hcd *hcd, { int retval; struct usb_device *rhdev; - struct usb_hcd *shared_hcd; if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) { hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev); @@ -2956,26 +2955,13 @@ int usb_add_hcd(struct usb_hcd *hcd, goto err_hcd_driver_start; } - /* starting here, usbcore will pay attention to the shared HCD roothub */ - shared_hcd = hcd->shared_hcd; - if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) { - retval = register_root_hub(shared_hcd); - if (retval != 0) - goto err_register_root_hub; - - if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd)) - usb_hcd_poll_rh_status(shared_hcd); - } - /* starting here, usbcore will pay attention to this root hub */ - if (!HCD_DEFER_RH_REGISTER(hcd)) { - retval = register_root_hub(hcd); - if (retval != 0) - goto err_register_root_hub; + retval = register_root_hub(hcd); + if (retval != 0) + goto err_register_root_hub; - if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) - usb_hcd_poll_rh_status(hcd); - } + if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) + usb_hcd_poll_rh_status(hcd); return retval; @@ -3013,7 +2999,6 @@ EXPORT_SYMBOL_GPL(usb_add_hcd); void usb_remove_hcd(struct usb_hcd *hcd) { struct usb_device *rhdev = hcd->self.root_hub; - bool rh_registered; dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); @@ -3024,7 +3009,6 @@ void usb_remove_hcd(struct usb_hcd *hcd) dev_dbg(hcd->self.controller, "roothub graceful disconnect\n"); spin_lock_irq (&hcd_root_hub_lock); - rh_registered = hcd->rh_registered; hcd->rh_registered = 0; spin_unlock_irq (&hcd_root_hub_lock); @@ -3034,8 +3018,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) cancel_work_sync(&hcd->died_work); mutex_lock(&usb_bus_idr_lock); - if (rh_registered) - usb_disconnect(&rhdev); /* Sets rhdev to NULL */ + usb_disconnect(&rhdev); /* Sets rhdev to NULL */ mutex_unlock(&usb_bus_idr_lock); /* diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 2c1fc9212cf2..548a028f2dab 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -124,7 +124,6 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ -#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -135,7 +134,6 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) -#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default From b93c6a911a3fe926b00add28f3b932007827c4ca Mon Sep 17 00:00:00 2001 From: Huang Guobin Date: Tue, 2 Nov 2021 17:37:33 +0800 Subject: [PATCH 292/433] bonding: Fix a use-after-free problem when bond_sysfs_slave_add() failed When I do fuzz test for bonding device interface, I got the following use-after-free Calltrace: ================================================================== BUG: KASAN: use-after-free in bond_enslave+0x1521/0x24f0 Read of size 8 at addr ffff88825bc11c00 by task ifenslave/7365 CPU: 5 PID: 7365 Comm: ifenslave Tainted: G E 5.15.0-rc1+ #13 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 Call Trace: dump_stack_lvl+0x6c/0x8b print_address_description.constprop.0+0x48/0x70 kasan_report.cold+0x82/0xdb __asan_load8+0x69/0x90 bond_enslave+0x1521/0x24f0 bond_do_ioctl+0x3e0/0x450 dev_ifsioc+0x2ba/0x970 dev_ioctl+0x112/0x710 sock_do_ioctl+0x118/0x1b0 sock_ioctl+0x2e0/0x490 __x64_sys_ioctl+0x118/0x150 do_syscall_64+0x35/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f19159cf577 Code: b3 66 90 48 8b 05 11 89 2c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 78 RSP: 002b:00007ffeb3083c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffeb3084bca RCX: 00007f19159cf577 RDX: 00007ffeb3083ce0 RSI: 0000000000008990 RDI: 0000000000000003 RBP: 00007ffeb3084bc4 R08: 0000000000000040 R09: 0000000000000000 R10: 00007ffeb3084bc0 R11: 0000000000000246 R12: 00007ffeb3083ce0 R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffeb3083cb0 Allocated by task 7365: kasan_save_stack+0x23/0x50 __kasan_kmalloc+0x83/0xa0 kmem_cache_alloc_trace+0x22e/0x470 bond_enslave+0x2e1/0x24f0 bond_do_ioctl+0x3e0/0x450 dev_ifsioc+0x2ba/0x970 dev_ioctl+0x112/0x710 sock_do_ioctl+0x118/0x1b0 sock_ioctl+0x2e0/0x490 __x64_sys_ioctl+0x118/0x150 do_syscall_64+0x35/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 7365: kasan_save_stack+0x23/0x50 kasan_set_track+0x20/0x30 kasan_set_free_info+0x24/0x40 __kasan_slab_free+0xf2/0x130 kfree+0xd1/0x5c0 slave_kobj_release+0x61/0x90 kobject_put+0x102/0x180 bond_sysfs_slave_add+0x7a/0xa0 bond_enslave+0x11b6/0x24f0 bond_do_ioctl+0x3e0/0x450 dev_ifsioc+0x2ba/0x970 dev_ioctl+0x112/0x710 sock_do_ioctl+0x118/0x1b0 sock_ioctl+0x2e0/0x490 __x64_sys_ioctl+0x118/0x150 do_syscall_64+0x35/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae Last potentially related work creation: kasan_save_stack+0x23/0x50 kasan_record_aux_stack+0xb7/0xd0 insert_work+0x43/0x190 __queue_work+0x2e3/0x970 delayed_work_timer_fn+0x3e/0x50 call_timer_fn+0x148/0x470 run_timer_softirq+0x8a8/0xc50 __do_softirq+0x107/0x55f Second to last potentially related work creation: kasan_save_stack+0x23/0x50 kasan_record_aux_stack+0xb7/0xd0 insert_work+0x43/0x190 __queue_work+0x2e3/0x970 __queue_delayed_work+0x130/0x180 queue_delayed_work_on+0xa7/0xb0 bond_enslave+0xe25/0x24f0 bond_do_ioctl+0x3e0/0x450 dev_ifsioc+0x2ba/0x970 dev_ioctl+0x112/0x710 sock_do_ioctl+0x118/0x1b0 sock_ioctl+0x2e0/0x490 __x64_sys_ioctl+0x118/0x150 do_syscall_64+0x35/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88825bc11c00 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 0 bytes inside of 1024-byte region [ffff88825bc11c00, ffff88825bc12000) The buggy address belongs to the page: page:ffffea00096f0400 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x25bc10 head:ffffea00096f0400 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x57ff00000010200(slab|head|node=1|zone=2|lastcpupid=0x7ff) raw: 057ff00000010200 ffffea0009a71c08 ffff888240001968 ffff88810004dbc0 raw: 0000000000000000 00000000000a000a 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88825bc11b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88825bc11b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88825bc11c00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88825bc11c80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88825bc11d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Put new_slave in bond_sysfs_slave_add() will cause use-after-free problems when new_slave is accessed in the subsequent error handling process. Since new_slave will be put in the subsequent error handling process, remove the unnecessary put to fix it. In addition, when sysfs_create_file() fails, if some files have been crea- ted successfully, we need to call sysfs_remove_file() to remove them. Since there are sysfs_create_files() & sysfs_remove_files() can be used, use these two functions instead. Fixes: 7afcaec49696 (bonding: use kobject_put instead of _del after kobject_add) Signed-off-by: Huang Guobin Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/bonding/bond_sysfs_slave.c | 36 ++++++++------------------ 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index fd07561da034..6a6cdd0bb258 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -108,15 +108,15 @@ static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf) } static SLAVE_ATTR_RO(ad_partner_oper_port_state); -static const struct slave_attribute *slave_attrs[] = { - &slave_attr_state, - &slave_attr_mii_status, - &slave_attr_link_failure_count, - &slave_attr_perm_hwaddr, - &slave_attr_queue_id, - &slave_attr_ad_aggregator_id, - &slave_attr_ad_actor_oper_port_state, - &slave_attr_ad_partner_oper_port_state, +static const struct attribute *slave_attrs[] = { + &slave_attr_state.attr, + &slave_attr_mii_status.attr, + &slave_attr_link_failure_count.attr, + &slave_attr_perm_hwaddr.attr, + &slave_attr_queue_id.attr, + &slave_attr_ad_aggregator_id.attr, + &slave_attr_ad_actor_oper_port_state.attr, + &slave_attr_ad_partner_oper_port_state.attr, NULL }; @@ -137,24 +137,10 @@ const struct sysfs_ops slave_sysfs_ops = { int bond_sysfs_slave_add(struct slave *slave) { - const struct slave_attribute **a; - int err; - - for (a = slave_attrs; *a; ++a) { - err = sysfs_create_file(&slave->kobj, &((*a)->attr)); - if (err) { - kobject_put(&slave->kobj); - return err; - } - } - - return 0; + return sysfs_create_files(&slave->kobj, slave_attrs); } void bond_sysfs_slave_del(struct slave *slave) { - const struct slave_attribute **a; - - for (a = slave_attrs; *a; ++a) - sysfs_remove_file(&slave->kobj, &((*a)->attr)); + sysfs_remove_files(&slave->kobj, slave_attrs); } From af1877b6cad16bdd8d8d93ca1c7b37e8f21ef4e3 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Wed, 3 Nov 2021 20:48:37 +0800 Subject: [PATCH 293/433] net/smc: Print function name in smcr_link_down tracepoint This makes the output of smcr_link_down tracepoint easier to use and understand without additional translating function's pointer address. It prints the function name with offset: -0 [000] ..s. 69.087164: smcr_link_down: lnk=00000000dab41cdc lgr=000000007d5d8e24 state=0 rc=1 dev=mlx5_0 location=smc_wr_tx_tasklet_fn+0x5ef/0x6f0 [smc] Link: https://lore.kernel.org/netdev/11f17a34-fd35-f2ec-3f20-dd0c34e55fde@linux.ibm.com/ Signed-off-by: Tony Lu Reviewed-by: Wen Gu Signed-off-by: David S. Miller --- net/smc/smc_tracepoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h index b4c36795a928..ec17f29646f5 100644 --- a/net/smc/smc_tracepoint.h +++ b/net/smc/smc_tracepoint.h @@ -99,7 +99,7 @@ TRACE_EVENT(smcr_link_down, __entry->location = location; ), - TP_printk("lnk=%p lgr=%p state=%d dev=%s location=%p", + TP_printk("lnk=%p lgr=%p state=%d dev=%s location=%pS", __entry->lnk, __entry->lgr, __entry->state, __get_str(name), __entry->location) From 0c500ef5d3395b68f615486c90aaf28868e0032c Mon Sep 17 00:00:00 2001 From: luo penghao Date: Thu, 4 Nov 2021 06:21:38 +0000 Subject: [PATCH 294/433] tg3: Remove redundant assignments The assignment of err will be overwritten next, so this statement should be deleted. The clang_analyzer complains as follows: drivers/net/ethernet/broadcom/tg3.c:5506:2: warning: Value stored to 'expected_sg_dig_ctrl' is never read Reported-by: Zeal Robot Signed-off-by: luo penghao Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index b1328c5524b5..85ca3909859d 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -5503,7 +5503,6 @@ static bool tg3_setup_fiber_hw_autoneg(struct tg3 *tp, u32 mac_status) int workaround, port_a; serdes_cfg = 0; - expected_sg_dig_ctrl = 0; workaround = 0; port_a = 1; current_link_up = false; From d7be1d1cfb4d3215c06e98ac6f6c4e99293d8e3c Mon Sep 17 00:00:00 2001 From: Yang Guang Date: Thu, 4 Nov 2021 14:21:58 +0800 Subject: [PATCH 295/433] octeontx2-af: use swap() to make code cleaner Use the macro 'swap()' defined in 'include/linux/minmax.h' to avoid opencoding it. Reported-by: Zeal Robot Signed-off-by: Yang Guang Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index bb6b42bbefa4..c0005a1feee6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2450,9 +2450,7 @@ alloc: bmap = mcam->bmap_reverse; start = mcam->bmap_entries - start; end = mcam->bmap_entries - end; - index = start; - start = end; - end = index; + swap(start, end); } else { bmap = mcam->bmap; } From f6a510102c0553f683550238cadfab6368f34c24 Mon Sep 17 00:00:00 2001 From: Yang Guang Date: Thu, 4 Nov 2021 14:53:50 +0800 Subject: [PATCH 296/433] sfc: use swap() to make code cleaner Use the macro 'swap()' defined in 'include/linux/minmax.h' to avoid opencoding it. Reported-by: Zeal Robot Signed-off-by: Yang Guang Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/falcon/efx.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index c68837a951f4..314c9c69eb0e 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -817,9 +817,7 @@ ef4_realloc_channels(struct ef4_nic *efx, u32 rxq_entries, u32 txq_entries) efx->rxq_entries = rxq_entries; efx->txq_entries = txq_entries; for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - efx->channel[i] = other_channel[i]; - other_channel[i] = channel; + swap(efx->channel[i], other_channel[i]); } /* Restart buffer table allocation */ @@ -863,9 +861,7 @@ rollback: efx->rxq_entries = old_rxq_entries; efx->txq_entries = old_txq_entries; for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - efx->channel[i] = other_channel[i]; - other_channel[i] = channel; + swap(efx->channel[i], other_channel[i]); } goto out; } From 9cbc3367968de69017a87a1118b62490ac1bdd0a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Nov 2021 14:34:42 +0100 Subject: [PATCH 297/433] octeontx2-pf: select CONFIG_NET_DEVLINK The octeontx2 pf nic driver failsz to link when the devlink support is not reachable: aarch64-linux-ld: drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.o: in function `otx2_dl_mcam_count_get': otx2_devlink.c:(.text+0x10): undefined reference to `devlink_priv' aarch64-linux-ld: drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.o: in function `otx2_dl_mcam_count_validate': otx2_devlink.c:(.text+0x50): undefined reference to `devlink_priv' aarch64-linux-ld: drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.o: in function `otx2_dl_mcam_count_set': otx2_devlink.c:(.text+0xd0): undefined reference to `devlink_priv' aarch64-linux-ld: drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.o: in function `otx2_devlink_info_get': otx2_devlink.c:(.text+0x150): undefined reference to `devlink_priv' This is already selected by the admin function driver, but not the actual nic, which might be built-in when the af driver is not. Fixes: 2da489432747 ("octeontx2-pf: devlink params support to set mcam entry count") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 3f982ccf2c85..639893d87055 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -31,6 +31,7 @@ config NDC_DIS_DYNAMIC_CACHING config OCTEONTX2_PF tristate "Marvell OcteonTX2 NIC Physical Function driver" select OCTEONTX2_MBOX + select NET_DEVLINK depends on (64BIT && COMPILE_TEST) || ARM64 depends on PCI depends on PTP_1588_CLOCK_OPTIONAL From 827beb7781d3dbba1a8cd8dc364cc3cb3fc13b11 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Fri, 5 Nov 2021 01:42:17 +0000 Subject: [PATCH 298/433] net: ethernet: litex: Remove unnecessary print function dev_err() The print function dev_err() is redundant because platform_get_irq() already prints an error. Signed-off-by: Xu Wang Reviewed-by: Cai Huoqing Signed-off-by: David S. Miller --- drivers/net/ethernet/litex/litex_liteeth.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c index 3d9385a4989b..ab9fa1525053 100644 --- a/drivers/net/ethernet/litex/litex_liteeth.c +++ b/drivers/net/ethernet/litex/litex_liteeth.c @@ -242,10 +242,8 @@ static int liteeth_probe(struct platform_device *pdev) priv->dev = &pdev->dev; irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "Failed to get IRQ %d\n", irq); + if (irq < 0) return irq; - } netdev->irq = irq; priv->base = devm_platform_ioremap_resource_byname(pdev, "mac"); From 69dfccbc1186f7091f97b70a9437d6a51313834d Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 5 Nov 2021 15:35:41 +0800 Subject: [PATCH 299/433] net: udp: correct the document for udp_mem udp_mem is a vector of 3 INTEGERs, which is used to limit the number of pages allowed for queueing by all UDP sockets. However, sk_has_memory_pressure() in __sk_mem_raise_allocated() always return false for udp, as memory pressure is not supported by udp, which means that __sk_mem_raise_allocated() will fail once pages allocated for udp socket exceeds udp_mem[0]. Therefor, udp_mem[0] is the only one that limit the number of pages. However, the document of udp_mem just express that udp_mem[2] is the limitation. So, just fix it. Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c61cc0219f4c..c04431144f7a 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1004,13 +1004,11 @@ udp_l3mdev_accept - BOOLEAN udp_mem - vector of 3 INTEGERs: min, pressure, max Number of pages allowed for queueing by all UDP sockets. - min: Below this number of pages UDP is not bothered about its - memory appetite. When amount of memory allocated by UDP exceeds - this number, UDP starts to moderate memory usage. + min: Number of pages allowed for queueing by all UDP sockets. pressure: This value was introduced to follow format of tcp_mem. - max: Number of pages allowed for queueing by all UDP sockets. + max: This value was introduced to follow format of tcp_mem. Default is calculated at boot time from amount of available memory. From 6789a4c05127d3f9257db6767fd7ede614e0241f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 4 Nov 2021 10:55:27 -0700 Subject: [PATCH 300/433] net: ax88796c: hide ax88796c_dt_ids if !CONFIG_OF Build bot says: >> drivers/net/ethernet/asix/ax88796c_main.c:1116:34: warning: unused variable 'ax88796c_dt_ids' [-Wunused-const-variable] static const struct of_device_id ax88796c_dt_ids[] = { ^ The only reference to this array is wrapped in of_match_ptr(). Reported-by: kernel test robot Fixes: a97c69ba4f30 ("net: ax88796c: ASIX AX88796C SPI Ethernet Adapter Driver") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/asix/ax88796c_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c index 4b0c5a09fd57..2086de05385c 100644 --- a/drivers/net/ethernet/asix/ax88796c_main.c +++ b/drivers/net/ethernet/asix/ax88796c_main.c @@ -1114,11 +1114,13 @@ static int ax88796c_remove(struct spi_device *spi) return 0; } +#ifdef CONFIG_OF static const struct of_device_id ax88796c_dt_ids[] = { { .compatible = "asix,ax88796c" }, {}, }; MODULE_DEVICE_TABLE(of, ax88796c_dt_ids); +#endif static const struct spi_device_id asix_id[] = { { "ax88796c", 0 }, From 3f81c579912855f19ed1a72af8133485a6119fba Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 5 Nov 2021 15:12:50 +0800 Subject: [PATCH 301/433] amt: Fix NULL but dereferenced coccicheck error Eliminate the following coccicheck warning: ./drivers/net/amt.c:2795:6-9: ERROR: amt is NULL but dereferenced. Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: David S. Miller --- drivers/net/amt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index 896c9e2857f0..cfd6c8cb4e97 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -2766,7 +2766,7 @@ static int amt_err_lookup(struct sock *sk, struct sk_buff *skb) rcu_read_lock_bh(); amt = rcu_dereference_sk_user_data(sk); if (!amt) - goto drop; + goto out; if (amt->mode != AMT_MODE_GATEWAY) goto drop; @@ -2788,6 +2788,7 @@ static int amt_err_lookup(struct sock *sk, struct sk_buff *skb) default: goto drop; } +out: rcu_read_unlock_bh(); return 0; drop: From 9dcc00715a7c0aea0d3afe1e935f4b4aefbeb294 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Nov 2021 10:29:39 +0100 Subject: [PATCH 302/433] ax88796c: fix ioctl callback The timestamp ioctls are now handled by the ndo_eth_ioctl() callback, not the old ndo_do_ioctl(), but oax88796 introduced the function for the old way. Move it over to ndo_eth_ioctl() to actually allow calling it from user space. Fixes: a97c69ba4f30 ("net: ax88796c: ASIX AX88796C SPI Ethernet Adapter Driver") Fixes: a76053707dbf ("dev_ioctl: split out ndo_eth_ioctl") Signed-off-by: Arnd Bergmann Acked-by: Lukasz Stelmach Signed-off-by: David S. Miller --- drivers/net/ethernet/asix/ax88796c_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c index 2086de05385c..e230d8d0ff73 100644 --- a/drivers/net/ethernet/asix/ax88796c_main.c +++ b/drivers/net/ethernet/asix/ax88796c_main.c @@ -934,7 +934,7 @@ static const struct net_device_ops ax88796c_netdev_ops = { .ndo_stop = ax88796c_close, .ndo_start_xmit = ax88796c_start_xmit, .ndo_get_stats64 = ax88796c_get_stats64, - .ndo_do_ioctl = ax88796c_ioctl, + .ndo_eth_ioctl = ax88796c_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_set_features = ax88796c_set_features, }; From a6785bd7d83c9e73c6a6aa33d30a071460074728 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Nov 2021 10:29:40 +0100 Subject: [PATCH 303/433] octeontx2-nicvf: fix ioctl callback The mii ioctls are now handled by the ndo_eth_ioctl() callback, not the old ndo_do_ioctl(), but octeontx2-nicvf introduced the function for the old way. Move it over to ndo_eth_ioctl() to actually allow calling it from user space. Fixes: 43510ef4ddad ("octeontx2-nicvf: Add PTP hardware clock support to NIX VF") Fixes: a76053707dbf ("dev_ioctl: split out ndo_eth_ioctl") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index e6cb8cd0787d..78944ad3492f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -501,7 +501,7 @@ static const struct net_device_ops otx2vf_netdev_ops = { .ndo_set_features = otx2vf_set_features, .ndo_get_stats64 = otx2_get_stats64, .ndo_tx_timeout = otx2_tx_timeout, - .ndo_do_ioctl = otx2_ioctl, + .ndo_eth_ioctl = otx2_ioctl, }; static int otx2_wq_init(struct otx2_nic *vf) From dce981c42151e1f0176b0788c2e1bdc3f1e2bc1f Mon Sep 17 00:00:00 2001 From: Zhang Mingyu Date: Fri, 5 Nov 2021 01:27:17 +0000 Subject: [PATCH 304/433] amt: remove duplicate include in amt.c 'net/protocol.h' included in 'drivers/net/amt.c' is duplicated. Reported-by: Zeal Robot Signed-off-by: Zhang Mingyu Signed-off-by: David S. Miller --- drivers/net/amt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index cfd6c8cb4e97..c384b2694f9e 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include From a46a5036e7d2c537995ed331b7b8727d0e28390c Mon Sep 17 00:00:00 2001 From: Volodymyr Mytnyk Date: Thu, 4 Nov 2021 15:12:52 +0200 Subject: [PATCH 305/433] net: marvell: prestera: fix patchwork build problems fix the remaining build issues reported by patchwork in firmware v4.0 support commit which has been already merged. Fix patchwork issues: - source inline - checkpatch Fixes: bb5dbf2cc64d ("net: marvell: prestera: add firmware v4.0 support") Signed-off-by: Volodymyr Mytnyk Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/prestera/prestera_ethtool.c | 3 ++- drivers/net/ethernet/marvell/prestera/prestera_hw.c | 3 ++- drivers/net/ethernet/marvell/prestera/prestera_main.c | 6 ++++-- drivers/net/ethernet/marvell/prestera/prestera_pci.c | 3 ++- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_ethtool.c b/drivers/net/ethernet/marvell/prestera/prestera_ethtool.c index 6011454dba71..40d5b89573bb 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_ethtool.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_ethtool.c @@ -499,7 +499,8 @@ static void prestera_port_mdix_get(struct ethtool_link_ksettings *ecmd, { struct prestera_port_phy_state *state = &port->state_phy; - if (prestera_hw_port_phy_mode_get(port, &state->mdix, NULL, NULL, NULL)) { + if (prestera_hw_port_phy_mode_get(port, + &state->mdix, NULL, NULL, NULL)) { netdev_warn(port->dev, "MDIX params get failed"); state->mdix = ETH_TP_MDI_INVALID; } diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c index 4f5f52dcdd9d..bc3c9310678a 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c @@ -1356,7 +1356,8 @@ int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed) int prestera_hw_port_autoneg_restart(struct prestera_port *port) { struct prestera_msg_port_attr_req req = { - .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_PHY_AUTONEG_RESTART), + .attr = + __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_PHY_AUTONEG_RESTART), .port = __cpu_to_le32(port->hw_id), .dev = __cpu_to_le32(port->dev_id), }; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 625b40149fac..4369a3ffad45 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -405,7 +405,8 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) err = prestera_port_cfg_mac_write(port, &cfg_mac); if (err) { - dev_err(prestera_dev(sw), "Failed to set port(%u) mac mode\n", id); + dev_err(prestera_dev(sw), + "Failed to set port(%u) mac mode\n", id); goto err_port_init; } @@ -418,7 +419,8 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) false, 0, 0, port->cfg_phy.mdix); if (err) { - dev_err(prestera_dev(sw), "Failed to set port(%u) phy mode\n", id); + dev_err(prestera_dev(sw), + "Failed to set port(%u) phy mode\n", id); goto err_port_init; } } diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c index 5d4d410b07c8..461259b3655a 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c @@ -411,7 +411,8 @@ static int prestera_fw_cmd_send(struct prestera_fw *fw, int qid, goto cmd_exit; } - memcpy_fromio(out_msg, prestera_fw_cmdq_buf(fw, qid) + in_size, ret_size); + memcpy_fromio(out_msg, + prestera_fw_cmdq_buf(fw, qid) + in_size, ret_size); cmd_exit: prestera_fw_write(fw, PRESTERA_CMDQ_REQ_CTL_REG(qid), From e41ac2020bca4acdb7485ddca34098f68d3af5ae Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 5 Nov 2021 01:58:13 +0000 Subject: [PATCH 306/433] bpftool: Install libbpf headers for the bootstrap version, too We recently changed bpftool's Makefile to make it install libbpf's headers locally instead of pulling them from the source directory of the library. Although bpftool needs two versions of libbpf, a "regular" one and a "bootstrap" version, we would only install headers for the regular libbpf build. Given that this build always occurs before the bootstrap build when building bpftool, this is enough to ensure that the bootstrap bpftool will have access to the headers exported through the regular libbpf build. However, this did not account for the case when we only want the bootstrap version of bpftool, through the "bootstrap" target. For example, perf needs the bootstrap version only, to generate BPF skeletons. In that case, when are the headers installed? For some time, the issue has been masked, because we had a step (the installation of headers internal to libbpf) which would depend on the regular build of libbpf and hence trigger the export of the headers, just for the sake of creating a directory. But this changed with commit 8b6c46241c77 ("bpftool: Remove Makefile dep. on $(LIBBPF) for $(LIBBPF_INTERNAL_HDRS)"), where we cleaned up that stage and removed the dependency on the regular libbpf build. As a result, when we only want the bootstrap bpftool version, the regular libbpf is no longer built. The bootstrap libbpf version is built, but headers are not exported, and the bootstrap bpftool build fails because of the missing headers. To fix this, we also install the library headers for the bootstrap version of libbpf, to use them for the bootstrap bpftool and for generating the skeletons. Fixes: f012ade10b34 ("bpftool: Install libbpf headers instead of including the dir") Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/bpf/20211105015813.6171-1-quentin@isovalent.com --- tools/bpf/bpftool/Makefile | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index c0c30e56988f..7cfba11c3014 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -22,24 +22,29 @@ else _OUTPUT := $(CURDIR) endif BOOTSTRAP_OUTPUT := $(_OUTPUT)/bootstrap/ + LIBBPF_OUTPUT := $(_OUTPUT)/libbpf/ LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include LIBBPF_HDRS_DIR := $(LIBBPF_INCLUDE)/bpf +LIBBPF := $(LIBBPF_OUTPUT)libbpf.a -LIBBPF = $(LIBBPF_OUTPUT)libbpf.a -LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ -LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a +LIBBPF_BOOTSTRAP_OUTPUT := $(BOOTSTRAP_OUTPUT)libbpf/ +LIBBPF_BOOTSTRAP_DESTDIR := $(LIBBPF_BOOTSTRAP_OUTPUT) +LIBBPF_BOOTSTRAP_INCLUDE := $(LIBBPF_BOOTSTRAP_DESTDIR)/include +LIBBPF_BOOTSTRAP_HDRS_DIR := $(LIBBPF_BOOTSTRAP_INCLUDE)/bpf +LIBBPF_BOOTSTRAP := $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a # We need to copy hashmap.h and nlattr.h which is not otherwise exported by # libbpf, but still required by bpftool. LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h) +LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h) ifeq ($(BPFTOOL_VERSION),) BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) endif -$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT) $(LIBBPF_HDRS_DIR): +$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT) $(LIBBPF_HDRS_DIR) $(LIBBPF_BOOTSTRAP_HDRS_DIR): $(QUIET_MKDIR)mkdir -p $@ $(LIBBPF): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_OUTPUT) @@ -52,7 +57,12 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_ $(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ - ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ + DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR) prefix= \ + ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers + +$(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR) + $(call QUIET_INSTALL, $@) + $(Q)install -m 644 -t $(LIBBPF_BOOTSTRAP_HDRS_DIR) $< $(LIBBPF)-clean: FORCE | $(LIBBPF_OUTPUT) $(call QUIET_CLEAN, libbpf) @@ -172,11 +182,11 @@ else $(Q)cp "$(VMLINUX_H)" $@ endif -$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) +$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP) $(QUIET_CLANG)$(CLANG) \ -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ - -I$(LIBBPF_INCLUDE) \ + -I$(LIBBPF_BOOTSTRAP_INCLUDE) \ -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) @@ -209,8 +219,10 @@ $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) -$(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) - $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< +$(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) + $(QUIET_CC)$(HOSTCC) \ + $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),$(CFLAGS)) \ + -c -MMD -o $@ $< $(OUTPUT)%.o: %.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< @@ -257,6 +269,6 @@ doc-uninstall: FORCE: .SECONDARY: -.PHONY: all FORCE clean install-bin install uninstall +.PHONY: all FORCE bootstrap clean install-bin install uninstall .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all From 64165ddf8ea184631c65e3bbc8d59f6d940590ca Mon Sep 17 00:00:00 2001 From: Mehrdad Arshad Rad Date: Thu, 4 Nov 2021 10:13:54 -0700 Subject: [PATCH 307/433] libbpf: Fix lookup_and_delete_elem_flags error reporting Fix bpf_map_lookup_and_delete_elem_flags() to pass the return code through libbpf_err_errno() as we do similarly in bpf_map_lookup_and_delete_elem(). Fixes: f12b65432728 ("libbpf: Streamline error reporting for low-level APIs") Signed-off-by: Mehrdad Arshad Rad Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211104171354.11072-1-arshad.rad@gmail.com --- tools/lib/bpf/bpf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index c09cbb868c9f..725701235fd8 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -515,6 +515,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; @@ -522,7 +523,8 @@ int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, _ attr.value = ptr_to_u64(value); attr.flags = flags; - return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_map_delete_elem(int fd, const void *key) From 8b4ac13abe7d82da0e0d22a9ba2e27301559a93e Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 27 Oct 2021 11:35:50 +0800 Subject: [PATCH 308/433] selftests/bpf/xdp_redirect_multi: Put the logs to tmp folder The xdp_redirect_multi test logs are created in selftest folder and not cleaned after test. Let's creat a tmp dir and remove the logs after testing. Fixes: d23292476297 ("selftests/bpf: Add xdp_redirect_multi test") Suggested-by: Jiri Benc Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211027033553.962413-2-liuhangbin@gmail.com --- .../selftests/bpf/test_xdp_redirect_multi.sh | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh index 351955c2bdfd..c1653f6d7f77 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -31,6 +31,7 @@ IFACES="" DRV_MODE="xdpgeneric xdpdrv xdpegress" PASS=0 FAIL=0 +LOG_DIR=$(mktemp -d) test_pass() { @@ -100,17 +101,17 @@ do_egress_tests() local mode=$1 # mac test - ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log & - ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log & + ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log & + ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log & sleep 0.5 ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null sleep 0.5 pkill -9 tcpdump # mac check - grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \ + grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \ test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2" - grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \ + grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-3_${mode}.log && \ test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3" } @@ -121,9 +122,9 @@ do_ping_tests() # ping6 test: echo request should be redirect back to itself, not others ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 - ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log & - ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log & - ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log & + ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log & + ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log & + ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & sleep 0.5 # ARP test ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null @@ -135,32 +136,32 @@ do_ping_tests() pkill -9 tcpdump # All netns should receive the redirect arp requests - [ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \ + [ $(grep -c "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -gt 4 ] && \ test_pass "$mode arp(F_BROADCAST) ns1-1" || \ test_fail "$mode arp(F_BROADCAST) ns1-1" - [ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \ + [ $(grep -c "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -le 4 ] && \ test_pass "$mode arp(F_BROADCAST) ns1-2" || \ test_fail "$mode arp(F_BROADCAST) ns1-2" - [ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \ + [ $(grep -c "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -le 4 ] && \ test_pass "$mode arp(F_BROADCAST) ns1-3" || \ test_fail "$mode arp(F_BROADCAST) ns1-3" # ns1 should not receive the redirect echo request, others should - [ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \ + [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" - [ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \ + [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 4 ] && \ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" - [ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \ + [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-3_${mode}.log) -eq 4 ] && \ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" # ns1 should receive the echo request, ns2 should not - [ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \ + [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ test_pass "$mode IPv6 (no flags) ns1-1" || \ test_fail "$mode IPv6 (no flags) ns1-1" - [ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \ + [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 0 ] && \ test_pass "$mode IPv6 (no flags) ns1-2" || \ test_fail "$mode IPv6 (no flags) ns1-2" } @@ -176,7 +177,7 @@ do_tests() xdpgeneric) drv_p="-S";; esac - ./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log & + ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & xdp_pid=$! sleep 1 @@ -192,13 +193,13 @@ do_tests() trap clean_up 0 2 3 6 9 check_env -rm -f xdp_redirect_*.log ns*.log mac_ns*.log for mode in ${DRV_MODE}; do setup_ns $mode do_tests $mode clean_up done +rm -rf ${LOG_DIR} echo "Summary: PASS $PASS, FAIL $FAIL" [ $FAIL -eq 0 ] && exit 0 || exit 1 From f53ea9dbf78d42a10e2392b5c59362ccc224fd1d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 27 Oct 2021 11:35:51 +0800 Subject: [PATCH 309/433] selftests/bpf/xdp_redirect_multi: Use arping to accurate the arp number The arp request number triggered by ping none exist address is not accurate, which may lead the test false negative/positive. Change to use arping to accurate the arp number. Also do not use grep pattern match for dot. Fixes: d23292476297 ("selftests/bpf: Add xdp_redirect_multi test") Suggested-by: Jiri Benc Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211027033553.962413-3-liuhangbin@gmail.com --- tools/testing/selftests/bpf/test_xdp_redirect_multi.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh index c1653f6d7f77..e14dc41b52f2 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -127,7 +127,7 @@ do_ping_tests() ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & sleep 0.5 # ARP test - ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null + ip netns exec ns1 arping -q -c 2 -I veth0 192.0.2.254 # IPv4 test ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null # IPv6 test @@ -136,13 +136,13 @@ do_ping_tests() pkill -9 tcpdump # All netns should receive the redirect arp requests - [ $(grep -c "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -gt 4 ] && \ + [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ test_pass "$mode arp(F_BROADCAST) ns1-1" || \ test_fail "$mode arp(F_BROADCAST) ns1-1" - [ $(grep -c "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -le 4 ] && \ + [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -eq 2 ] && \ test_pass "$mode arp(F_BROADCAST) ns1-2" || \ test_fail "$mode arp(F_BROADCAST) ns1-2" - [ $(grep -c "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -le 4 ] && \ + [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -eq 2 ] && \ test_pass "$mode arp(F_BROADCAST) ns1-3" || \ test_fail "$mode arp(F_BROADCAST) ns1-3" From 648c3677062fbd14d754b853daebb295426771e8 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 27 Oct 2021 11:35:52 +0800 Subject: [PATCH 310/433] selftests/bpf/xdp_redirect_multi: Give tcpdump a chance to terminate cleanly No need to kill tcpdump with -9. Fixes: d23292476297 ("selftests/bpf: Add xdp_redirect_multi test") Suggested-by: Jiri Benc Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211027033553.962413-4-liuhangbin@gmail.com --- tools/testing/selftests/bpf/test_xdp_redirect_multi.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh index e14dc41b52f2..d4cdb76cdf9e 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -106,7 +106,7 @@ do_egress_tests() sleep 0.5 ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null sleep 0.5 - pkill -9 tcpdump + pkill tcpdump # mac check grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \ @@ -133,7 +133,7 @@ do_ping_tests() # IPv6 test ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null sleep 0.5 - pkill -9 tcpdump + pkill tcpdump # All netns should receive the redirect arp requests [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ From 8955c1a329873385775081e029d9a7c6aa9037e1 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 27 Oct 2021 11:35:53 +0800 Subject: [PATCH 311/433] selftests/bpf/xdp_redirect_multi: Limit the tests in netns As I want to test both DEVMAP and DEVMAP_HASH in XDP multicast redirect, I limited DEVMAP max entries to a small value for performace. When the test runs after amount of interface creating/deleting tests. The interface index will exceed the map max entries and xdp_redirect_multi will error out with "Get interfacesInterface index to large". Fix this issue by limit the tests in netns and specify the ifindex when creating interfaces. Fixes: d23292476297 ("selftests/bpf: Add xdp_redirect_multi test") Reported-by: Jiri Benc Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211027033553.962413-5-liuhangbin@gmail.com --- .../selftests/bpf/test_xdp_redirect_multi.sh | 23 ++++++++++++------- .../selftests/bpf/xdp_redirect_multi.c | 4 ++-- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh index d4cdb76cdf9e..05f872740999 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -2,11 +2,11 @@ # SPDX-License-Identifier: GPL-2.0 # # Test topology: -# - - - - - - - - - - - - - - - - - - - - - - - - - -# | veth1 veth2 veth3 | ... init net +# - - - - - - - - - - - - - - - - - - - +# | veth1 veth2 veth3 | ns0 # - -| - - - - - - | - - - - - - | - - # --------- --------- --------- -# | veth0 | | veth0 | | veth0 | ... +# | veth0 | | veth0 | | veth0 | # --------- --------- --------- # ns1 ns2 ns3 # @@ -51,6 +51,7 @@ clean_up() ip link del veth$i 2> /dev/null ip netns del ns$i 2> /dev/null done + ip netns del ns0 2> /dev/null } # Kselftest framework requirement - SKIP code is 4. @@ -78,10 +79,12 @@ setup_ns() mode="xdpdrv" fi + ip netns add ns0 for i in $(seq $NUM); do ip netns add ns$i - ip link add veth$i type veth peer name veth0 netns ns$i - ip link set veth$i up + ip -n ns$i link add veth0 index 2 type veth \ + peer name veth$i netns ns0 index $((1 + $i)) + ip -n ns0 link set veth$i up ip -n ns$i link set veth0 up ip -n ns$i addr add 192.0.2.$i/24 dev veth0 @@ -92,7 +95,7 @@ setup_ns() xdp_dummy.o sec xdp &> /dev/null || \ { test_fail "Unable to load dummy xdp" && exit 1; } IFACES="$IFACES veth$i" - veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}') + veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}') done } @@ -177,9 +180,13 @@ do_tests() xdpgeneric) drv_p="-S";; esac - ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & + ip netns exec ns0 ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & xdp_pid=$! sleep 1 + if ! ps -p $xdp_pid > /dev/null; then + test_fail "$mode xdp_redirect_multi start failed" + return 1 + fi if [ "$mode" = "xdpegress" ]; then do_egress_tests $mode @@ -190,7 +197,7 @@ do_tests() kill $xdp_pid } -trap clean_up 0 2 3 6 9 +trap clean_up EXIT check_env diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c index 3696a8f32c23..f5ffba341c17 100644 --- a/tools/testing/selftests/bpf/xdp_redirect_multi.c +++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c @@ -129,7 +129,7 @@ int main(int argc, char **argv) goto err_out; } - printf("Get interfaces"); + printf("Get interfaces:"); for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) { ifaces[i] = if_nametoindex(argv[optind + i]); if (!ifaces[i]) @@ -139,7 +139,7 @@ int main(int argc, char **argv) goto err_out; } if (ifaces[i] > MAX_INDEX_NUM) { - printf("Interface index to large\n"); + printf(" interface index too large\n"); goto err_out; } printf(" %d", ifaces[i]); From f47d4ffe3a84ae11fc4bddc37939b9719467042c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 3 Nov 2021 12:54:53 +0100 Subject: [PATCH 312/433] riscv, bpf: Fix RV32 broken build, and silence RV64 warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 252c765bd764 ("riscv, bpf: Add BPF exception tables") only addressed RV64, and broke the RV32 build [1]. Fix by gating the exception tables code with CONFIG_ARCH_RV64I. Further, silence a "-Wmissing-prototypes" warning [2] in the RV64 BPF JIT. [1] https://lore.kernel.org/llvm/202111020610.9oy9Rr0G-lkp@intel.com/ [2] https://lore.kernel.org/llvm/202110290334.2zdMyRq4-lkp@intel.com/ Fixes: 252c765bd764 ("riscv, bpf: Add BPF exception tables") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Reviewed-by: Tong Tiangen Link: https://lore.kernel.org/bpf/20211103115453.397209-1-bjorn@kernel.org --- arch/riscv/mm/extable.c | 4 ++-- arch/riscv/net/bpf_jit_comp64.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c index 18bf338303b6..ddb7d3b99e89 100644 --- a/arch/riscv/mm/extable.c +++ b/arch/riscv/mm/extable.c @@ -11,7 +11,7 @@ #include #include -#ifdef CONFIG_BPF_JIT +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I) int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); #endif @@ -23,7 +23,7 @@ int fixup_exception(struct pt_regs *regs) if (!fixup) return 0; -#ifdef CONFIG_BPF_JIT +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I) if (regs->epc >= BPF_JIT_REGION_START && regs->epc < BPF_JIT_REGION_END) return rv_bpf_fixup_exception(fixup, regs); #endif diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 2ca345c7b0bf..f2a779c7e225 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -459,6 +459,8 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) +int rv_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs); int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs) { From 96cfe05051fd8543cdedd6807ec59a0e6c409195 Mon Sep 17 00:00:00 2001 From: Subbaraman Narayanamurthy Date: Thu, 4 Nov 2021 16:57:07 -0700 Subject: [PATCH 313/433] thermal: Fix NULL pointer dereferences in of_thermal_ functions of_parse_thermal_zones() parses the thermal-zones node and registers a thermal_zone device for each subnode. However, if a thermal zone is consuming a thermal sensor and that thermal sensor device hasn't probed yet, an attempt to set trip_point_*_temp for that thermal zone device can cause a NULL pointer dereference. Fix it. console:/sys/class/thermal/thermal_zone87 # echo 120000 > trip_point_0_temp ... Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 ... Call trace: of_thermal_set_trip_temp+0x40/0xc4 trip_point_temp_store+0xc0/0x1dc dev_attr_store+0x38/0x88 sysfs_kf_write+0x64/0xc0 kernfs_fop_write_iter+0x108/0x1d0 vfs_write+0x2f4/0x368 ksys_write+0x7c/0xec __arm64_sys_write+0x20/0x30 el0_svc_common.llvm.7279915941325364641+0xbc/0x1bc do_el0_svc+0x28/0xa0 el0_svc+0x14/0x24 el0_sync_handler+0x88/0xec el0_sync+0x1c0/0x200 While at it, fix the possible NULL pointer dereference in other functions as well: of_thermal_get_temp(), of_thermal_set_emul_temp(), of_thermal_get_trend(). Suggested-by: David Collins Signed-off-by: Subbaraman Narayanamurthy Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_of.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 6379f26a335f..9233f7e74454 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -89,7 +89,7 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; - if (!data->ops->get_temp) + if (!data->ops || !data->ops->get_temp) return -EINVAL; return data->ops->get_temp(data->sensor_data, temp); @@ -186,6 +186,9 @@ static int of_thermal_set_emul_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; + if (!data->ops || !data->ops->set_emul_temp) + return -EINVAL; + return data->ops->set_emul_temp(data->sensor_data, temp); } @@ -194,7 +197,7 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip, { struct __thermal_zone *data = tz->devdata; - if (!data->ops->get_trend) + if (!data->ops || !data->ops->get_trend) return -EINVAL; return data->ops->get_trend(data->sensor_data, trip, trend); @@ -301,7 +304,7 @@ static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip, if (trip >= data->ntrips || trip < 0) return -EDOM; - if (data->ops->set_trip_temp) { + if (data->ops && data->ops->set_trip_temp) { int ret; ret = data->ops->set_trip_temp(data->sensor_data, trip, temp); From 567af705206564946f724cf752ff36cb7a2935e3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Nov 2021 17:31:54 +0100 Subject: [PATCH 314/433] thermal: Replace pr_warn() with pr_warn_once() in user_space_bind() Use pr_warn_once() instead of pr_warn() to print the user space governor deprecation message in user_space_bind() to reduce the kernel log noise. Fixes: 0275c9fb0eff ("thermal/core: Make the userspace governor deprecated") Reported-by: Linus Torvalds Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano --- drivers/thermal/gov_user_space.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/gov_user_space.c b/drivers/thermal/gov_user_space.c index f4fe050e1cbc..64a18e354a20 100644 --- a/drivers/thermal/gov_user_space.c +++ b/drivers/thermal/gov_user_space.c @@ -17,8 +17,8 @@ static int user_space_bind(struct thermal_zone_device *tz) { - pr_warn("Userspace governor deprecated: use thermal netlink " \ - "notification instead\n"); + pr_warn_once("Userspace governor deprecated: use thermal netlink " \ + "notification instead\n"); return 0; } From 4798f8058d6b6bcfaedf5560fc28a83f404cbb57 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Nov 2021 12:35:26 -0400 Subject: [PATCH 315/433] NFS: Don't trace an uninitialised value If fhandle is NULL or fattr is NULL, then 'error' is uninitialised. Reported-by: Nathan Chancellor Signed-off-by: Trond Myklebust Reviewed-by: Nathan Chancellor --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 210c5945ac2b..36cb1012c7e1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1814,7 +1814,7 @@ no_entry: } nfs_set_verifier(dentry, dir_verifier); out_label: - trace_nfs_lookup_exit(dir, dentry, flags, error); + trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res)); nfs4_label_free(label); out: nfs_free_fattr(fattr); From 6659db4c59842343da46b97017574130f95143a9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Nov 2021 17:18:01 -0400 Subject: [PATCH 316/433] NFSv4: Ensure decode_compound_hdr() sanity checks the tag The server is supposed to return the same tag that the client sends in the outgoing RPC call, but we should still sanity check the length just in case. Reported-by: Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a8cff19c6f00..f206d41d6bee 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3168,20 +3168,23 @@ static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; + ssize_t ret; + void *ptr; + u32 tmp; - p = xdr_inline_decode(xdr, 8); - if (unlikely(!p)) + if (xdr_stream_decode_u32(xdr, &tmp) < 0) return -EIO; - hdr->status = be32_to_cpup(p++); - hdr->taglen = be32_to_cpup(p); + hdr->status = tmp; - p = xdr_inline_decode(xdr, hdr->taglen + 4); - if (unlikely(!p)) + ret = xdr_stream_decode_opaque_inline(xdr, &ptr, NFS4_OPAQUE_LIMIT); + if (ret < 0) return -EIO; - hdr->tag = (char *)p; - p += XDR_QUADLEN(hdr->taglen); - hdr->nops = be32_to_cpup(p); + hdr->taglen = ret; + hdr->tag = ptr; + + if (xdr_stream_decode_u32(xdr, &tmp) < 0) + return -EIO; + hdr->nops = tmp; if (unlikely(hdr->nops < 1)) return nfs4_stat_to_errno(hdr->status); return 0; From f114759c322edb4f293941a62748e31858852cd1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Nov 2021 17:33:36 -0400 Subject: [PATCH 317/433] NFSv4: Fix potential Oops in decode_op_map() The return value of xdr_inline_decode() is not being checked, leading to a potential Oops. Just replace the open coded array decode with the generic XDR version. Reported-by: Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f206d41d6bee..63a12181e6c7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5575,20 +5575,9 @@ static int decode_secinfo_no_name(struct xdr_stream *xdr, struct nfs4_secinfo_re static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map) { - __be32 *p; - uint32_t bitmap_words; - unsigned int i; - - p = xdr_inline_decode(xdr, 4); - if (!p) + if (xdr_stream_decode_uint32_array(xdr, op_map->u.words, + ARRAY_SIZE(op_map->u.words)) < 0) return -EIO; - bitmap_words = be32_to_cpup(p++); - if (bitmap_words > NFS4_OP_MAP_NUM_WORDS) - return -EIO; - p = xdr_inline_decode(xdr, 4 * bitmap_words); - for (i = 0; i < bitmap_words; i++) - op_map->u.words[i] = be32_to_cpup(p++); - return 0; } From e48c81bbc188964ac3932539e53287491f2c1d87 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Nov 2021 13:40:11 -0400 Subject: [PATCH 318/433] NFSv4: Remove unnecessary 'minor version' check It is completely redundant to the server capability check. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b81b2d2f47ad..b4cb75c45f90 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -378,14 +378,10 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { - struct nfs4_label *label = NULL; - int minor_version = server->nfs_client->cl_minorversion; - - if (minor_version < 2) - return label; + struct nfs4_label *label; if (!(server->caps & NFS_CAP_SECURITY_LABEL)) - return label; + return NULL; label = kzalloc(sizeof(struct nfs4_label), flags); if (label == NULL) From 156cd28562a4e8ca454d11b234d9f634a45d6390 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Nov 2021 14:23:30 -0400 Subject: [PATCH 319/433] NFS: Don't allocate nfs_fattr on the stack in __nfs42_ssc_open() The preferred behaviour is always to allocate struct nfs_fattr from the slab. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index c91565227ea2..f9f50fe1f3a4 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -317,7 +317,7 @@ static int read_name_gen = 1; static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, struct nfs_fh *src_fh, nfs4_stateid *stateid) { - struct nfs_fattr fattr; + struct nfs_fattr *fattr = nfs_alloc_fattr(); struct file *filep, *res; struct nfs_server *server; struct inode *r_ino = NULL; @@ -328,9 +328,10 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, server = NFS_SERVER(ss_mnt->mnt_root->d_inode); - nfs_fattr_init(&fattr); + if (!fattr) + return ERR_PTR(-ENOMEM); - status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL); + status = nfs4_proc_getattr(server, src_fh, fattr, NULL, NULL); if (status < 0) { res = ERR_PTR(status); goto out; @@ -343,7 +344,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out; snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++); - r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr, + r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr, NULL); if (IS_ERR(r_ino)) { res = ERR_CAST(r_ino); @@ -388,6 +389,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, out_free_name: kfree(read_name); out: + nfs_free_fattr(fattr); return res; out_stateowner: nfs4_put_state_owner(sp); From aa97a3ef15c303697ca58340938a5c385be00cf7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Nov 2021 14:32:28 -0400 Subject: [PATCH 320/433] NFSv4.2: alloc_file_pseudo() takes an open flag, not an f_mode Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f9f50fe1f3a4..92a1b992a141 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -351,13 +351,12 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out_free_name; } - filep = alloc_file_pseudo(r_ino, ss_mnt, read_name, FMODE_READ, + filep = alloc_file_pseudo(r_ino, ss_mnt, read_name, O_RDONLY, r_ino->i_fop); if (IS_ERR(filep)) { res = ERR_CAST(filep); goto out_free_name; } - filep->f_mode |= FMODE_READ; ctx = alloc_nfs_open_context(filep->f_path.dentry, filep->f_mode, filep); From d4a95a7e5a4d3b68b26f70668cf77324a11b5718 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Nov 2021 18:03:26 -0400 Subject: [PATCH 321/433] NFS: Always initialise fattr->label in nfs_fattr_alloc() We're about to add a check in nfs_free_fattr() for whether or not the label is non-zero. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b4cb75c45f90..0b5b1e44b2c4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1598,8 +1598,10 @@ struct nfs_fattr *nfs_alloc_fattr(void) struct nfs_fattr *fattr; fattr = kmalloc(sizeof(*fattr), GFP_NOFS); - if (fattr != NULL) + if (fattr != NULL) { nfs_fattr_init(fattr); + fattr->label = NULL; + } return fattr; } EXPORT_SYMBOL_GPL(nfs_alloc_fattr); From d755ad8dc752d44545613ea04d660aed674e540d Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:00 -0400 Subject: [PATCH 322/433] NFS: Create a new nfs_alloc_fattr_with_label() function For creating fattrs with the label field already allocated for us. I also update nfs_free_fattr() to free the label in the end. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/getroot.c | 17 ++++++----------- fs/nfs/inode.c | 17 +++++++++++++++++ fs/nfs/internal.h | 9 --------- include/linux/nfs_fs.h | 13 +++++++++++++ 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 59355c106ece..7604cb6a0ac2 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -80,18 +80,15 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) goto out; /* get the actual root for this mount */ - fsinfo.fattr = nfs_alloc_fattr(); + fsinfo.fattr = nfs_alloc_fattr_with_label(server); if (fsinfo.fattr == NULL) goto out_name; - fsinfo.fattr->label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(fsinfo.fattr->label)) - goto out_fattr; error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo); if (error < 0) { dprintk("nfs_get_root: getattr error = %d\n", -error); nfs_errorf(fc, "NFS: Couldn't getattr on root"); - goto out_label; + goto out_fattr; } inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL); @@ -99,12 +96,12 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) dprintk("nfs_get_root: get root inode failed\n"); error = PTR_ERR(inode); nfs_errorf(fc, "NFS: Couldn't get root inode"); - goto out_label; + goto out_fattr; } error = nfs_superblock_set_dummy_root(s, inode); if (error != 0) - goto out_label; + goto out_fattr; /* root dentries normally start off anonymous and get spliced in later * if the dentry tree reaches them; however if the dentry already @@ -115,7 +112,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) dprintk("nfs_get_root: get root dentry failed\n"); error = PTR_ERR(root); nfs_errorf(fc, "NFS: Couldn't get root dentry"); - goto out_label; + goto out_fattr; } security_d_instantiate(root, inode); @@ -154,8 +151,6 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label); error = 0; -out_label: - nfs4_label_free(fsinfo.fattr->label); out_fattr: nfs_free_fattr(fsinfo.fattr); out_name: @@ -165,5 +160,5 @@ out: error_splat_root: dput(fc->root); fc->root = NULL; - goto out_label; + goto out_fattr; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0b5b1e44b2c4..84c7efa2ea87 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1606,6 +1606,23 @@ struct nfs_fattr *nfs_alloc_fattr(void) } EXPORT_SYMBOL_GPL(nfs_alloc_fattr); +struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server) +{ + struct nfs_fattr *fattr = nfs_alloc_fattr(); + + if (!fattr) + return NULL; + + fattr->label = nfs4_label_alloc(server, GFP_NOFS); + if (IS_ERR(fattr->label)) { + kfree(fattr); + return NULL; + } + + return fattr; +} +EXPORT_SYMBOL_GPL(nfs_alloc_fattr_with_label); + struct nfs_fh *nfs_alloc_fhandle(void) { struct nfs_fh *fh; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 123078c76495..12f6acb483bb 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -342,14 +342,6 @@ nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) return dst; } -static inline void nfs4_label_free(struct nfs4_label *label) -{ - if (label) { - kfree(label->label); - kfree(label); - } - return; -} static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { @@ -358,7 +350,6 @@ static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) } #else static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } -static inline void nfs4_label_free(void *label) {} static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 739ca1ef934f..88c3aed8ad39 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -426,9 +426,22 @@ extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr); extern unsigned long nfs_inc_attr_generation_counter(void); extern struct nfs_fattr *nfs_alloc_fattr(void); +extern struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server); + +static inline void nfs4_label_free(struct nfs4_label *label) +{ +#ifdef CONFIG_NFS_V4_SECURITY_LABEL + if (label) { + kfree(label->label); + kfree(label); + } +#endif +} static inline void nfs_free_fattr(const struct nfs_fattr *fattr) { + if (fattr) + nfs4_label_free(fattr->label); kfree(fattr); } From b1db9a401d464d526d5941f0544e7c9ea37fa731 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:01 -0400 Subject: [PATCH 323/433] NFS: Remove the nfs4_label from the nfs_entry struct And instead allocate the fattr using nfs_alloc_fattr_with_label() Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 21 +++++++-------------- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 1 - 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 36cb1012c7e1..92530c3c1694 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -682,7 +682,8 @@ again: nfs_set_verifier(dentry, dir_verifier); status = nfs_refresh_inode(d_inode(dentry), entry->fattr); if (!status) - nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label); + nfs_setsecurity(d_inode(dentry), entry->fattr, + entry->fattr->label); goto out; } else { d_invalidate(dentry); @@ -696,7 +697,7 @@ again: goto out; } - inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label); + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->fattr->label); alias = d_splice_alias(inode, dentry); d_lookup_done(dentry); if (alias) { @@ -732,8 +733,8 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, xdr_set_scratch_page(&stream, scratch); do { - if (entry->label) - entry->label->len = NFS4_MAXLABELLEN; + if (entry->fattr->label) + entry->fattr->label->len = NFS4_MAXLABELLEN; status = xdr_decode(desc, entry, &stream); if (status != 0) @@ -838,21 +839,15 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, return -ENOMEM; entry->cookie = nfs_readdir_page_last_cookie(page); entry->fh = nfs_alloc_fhandle(); - entry->fattr = nfs_alloc_fattr(); + entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); entry->server = NFS_SERVER(inode); if (entry->fh == NULL || entry->fattr == NULL) goto out; - entry->label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); - if (IS_ERR(entry->label)) { - status = PTR_ERR(entry->label); - goto out; - } - array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT; pages = nfs_readdir_alloc_pages(array_size); if (!pages) - goto out_release_label; + goto out; do { unsigned int pglen; @@ -875,8 +870,6 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, } while (!status && nfs_readdir_page_needs_filling(page)); nfs_readdir_free_pages(pages, array_size); -out_release_label: - nfs4_label_free(entry->label); out: nfs_free_fattr(entry->fattr); nfs_free_fhandle(entry->fh); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 63a12181e6c7..fba89f82e7b7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7467,7 +7467,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return -EAGAIN; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, - NULL, entry->label, entry->server) < 0) + NULL, entry->fattr->label, entry->server) < 0) return -EAGAIN; if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) entry->ino = entry->fattr->mounted_on_fileid; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e9698b6278a5..9960f6628066 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -753,7 +753,6 @@ struct nfs_entry { int eof; struct nfs_fh * fh; struct nfs_fattr * fattr; - struct nfs4_label *label; unsigned char d_type; struct nfs_server * server; }; From 68be1742c22983558f0f148a4467eb9127d56b86 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:02 -0400 Subject: [PATCH 324/433] NFS: Remove the nfs4_label from the nfs4_create_res struct Instead, use the label embedded in the attached fattr. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++------- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 1 - 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 127388fabda8..2e07550dd0d4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4850,7 +4850,6 @@ struct nfs4_createdata { struct nfs4_create_res res; struct nfs_fh fh; struct nfs_fattr fattr; - struct nfs4_label *label; }; static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, @@ -4862,8 +4861,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, if (data != NULL) { struct nfs_server *server = NFS_SERVER(dir); - data->label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(data->label)) + data->fattr.label = nfs4_label_alloc(server, GFP_KERNEL); + if (IS_ERR(data->fattr.label)) goto out_free; data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; @@ -4874,12 +4873,11 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, data->arg.name = name; data->arg.attrs = sattr; data->arg.ftype = ftype; - data->arg.bitmask = nfs4_bitmask(server, data->label); + data->arg.bitmask = nfs4_bitmask(server, data->fattr.label); data->arg.umask = current_umask(); data->res.server = server; data->res.fh = &data->fh; data->res.fattr = &data->fattr; - data->res.label = data->label; nfs_fattr_init(data->res.fattr); } return data; @@ -4901,14 +4899,14 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ data->res.fattr->time_start, NFS_INO_INVALID_DATA); spin_unlock(&dir->i_lock); - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.fattr->label); } return status; } static void nfs4_free_createdata(struct nfs4_createdata *data) { - nfs4_label_free(data->label); + nfs4_label_free(data->fattr.label); kfree(data); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fba89f82e7b7..38c74833f263 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6353,7 +6353,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - decode_getfattr_label(xdr, res->fattr, res->label, res->server); + decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9960f6628066..5aba81b74c98 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1040,7 +1040,6 @@ struct nfs4_create_res { const struct nfs_server * server; struct nfs_fh * fh; struct nfs_fattr * fattr; - struct nfs4_label *label; struct nfs4_change_info dir_cinfo; }; From aa7ca3b2de190675543d84adaa1ff74e7867c76f Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:03 -0400 Subject: [PATCH 325/433] NFS: Remove the nfs4_label from the nfs4_link_res struct Again, use the fattr's label field instead. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 16 +++------------- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 1 - 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2e07550dd0d4..bde5b5723046 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4790,7 +4790,6 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct }; struct nfs4_link_res res = { .server = server, - .label = NULL, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], @@ -4799,18 +4798,12 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct }; int status = -ENOMEM; - res.fattr = nfs_alloc_fattr(); + res.fattr = nfs_alloc_fattr_with_label(server); if (res.fattr == NULL) goto out; - res.label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(res.label)) { - status = PTR_ERR(res.label); - goto out; - } - nfs4_inode_make_writeable(inode); - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.label), inode, + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode, NFS_INO_INVALID_CHANGE); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { @@ -4819,12 +4812,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct nfs4_inc_nlink(inode); status = nfs_post_op_update_inode(inode, res.fattr); if (!status) - nfs_setsecurity(inode, res.fattr, res.label); + nfs_setsecurity(inode, res.fattr, res.fattr->label); } - - nfs4_label_free(res.label); - out: nfs_free_fattr(res.fattr); return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 38c74833f263..4c9d66fac3fd 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6323,7 +6323,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_restorefh(xdr); if (status) goto out; - decode_getfattr_label(xdr, res->fattr, res->label, res->server); + decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5aba81b74c98..d55bf3fd5167 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1079,7 +1079,6 @@ struct nfs4_link_res { struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; - struct nfs4_label *label; struct nfs4_change_info cinfo; struct nfs_fattr * dir_attr; }; From 9558a007dbc383d48e7f5a123d0b5ff656c71068 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:04 -0400 Subject: [PATCH 326/433] NFS: Remove the label from the nfs4_lookup_res struct And usethe fattr's label field instead. I also adjust function calls to remove labels along the way. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 34 ++++++++++++---------------------- fs/nfs/namespace.c | 3 +-- fs/nfs/nfs3proc.c | 3 +-- fs/nfs/nfs4proc.c | 16 +++++++--------- fs/nfs/nfs4xdr.c | 4 ++-- fs/nfs/proc.c | 3 +-- include/linux/nfs_xdr.h | 4 +--- 7 files changed, 25 insertions(+), 42 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 92530c3c1694..8a327971d485 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1494,19 +1494,17 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, { struct nfs_fh *fhandle; struct nfs_fattr *fattr; - struct nfs4_label *label; unsigned long dir_verifier; int ret; ret = -ENOMEM; fhandle = nfs_alloc_fhandle(); - fattr = nfs_alloc_fattr(); - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (fhandle == NULL || fattr == NULL || IS_ERR(label)) + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); + if (fhandle == NULL || fattr == NULL) goto out; dir_verifier = nfs_save_change_attribute(dir); - ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); + ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); if (ret < 0) { switch (ret) { case -ESTALE: @@ -1525,7 +1523,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, if (nfs_refresh_inode(inode, fattr) < 0) goto out; - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr, fattr->label); nfs_set_verifier(dentry, dir_verifier); /* set a readdirplus hint that we had a cache miss */ @@ -1534,7 +1532,6 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, out: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); - nfs4_label_free(label); /* * If the lookup failed despite the dentry change attribute being @@ -1754,7 +1751,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in struct inode *inode = NULL; struct nfs_fh *fhandle = NULL; struct nfs_fattr *fattr = NULL; - struct nfs4_label *label = NULL; unsigned long dir_verifier; int error; @@ -1773,27 +1769,23 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in res = ERR_PTR(-ENOMEM); fhandle = nfs_alloc_fhandle(); - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(dir)); if (fhandle == NULL || fattr == NULL) goto out; - label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT); - if (IS_ERR(label)) - goto out; - dir_verifier = nfs_save_change_attribute(dir); trace_nfs_lookup_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); if (error == -ENOENT) goto no_entry; if (error < 0) { res = ERR_PTR(error); - goto out_label; + goto out; } - inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); + inode = nfs_fhget(dentry->d_sb, fhandle, fattr, fattr->label); res = ERR_CAST(inode); if (IS_ERR(res)) - goto out_label; + goto out; /* Notify readdir to use READDIRPLUS */ nfs_force_use_readdirplus(dir); @@ -1802,14 +1794,12 @@ no_entry: res = d_splice_alias(inode, dentry); if (res != NULL) { if (IS_ERR(res)) - goto out_label; + goto out; dentry = res; } nfs_set_verifier(dentry, dir_verifier); -out_label: - trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res)); - nfs4_label_free(label); out: + trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res)); nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); return res; @@ -2058,7 +2048,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, d_drop(dentry); if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); if (error) goto out_error; } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index bc0c698f3350..3295af4110f1 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -308,8 +308,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server) /* Look it up again to get its attributes */ err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, - ctx->mntfh, ctx->clone_data.fattr, - NULL); + ctx->mntfh, ctx->clone_data.fattr); dput(parent); if (err != 0) return err; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index f7524310ddf4..717eb651f0fd 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -193,8 +193,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len, static int nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { unsigned short task_flags = 0; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bde5b5723046..7af73fd34b22 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4294,7 +4294,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); int status; @@ -4306,7 +4306,6 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct nfs4_lookup_res res = { .server = server, .fattr = fattr, - .label = label, .fh = fhandle, }; struct rpc_message msg = { @@ -4323,7 +4322,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, if (nfs_lookup_is_soft_revalidate(dentry)) task_flags |= RPC_TASK_TIMEOUT; - args.bitmask = nfs4_bitmask(server, label); + args.bitmask = nfs4_bitmask(server, fattr->label); nfs_fattr_init(fattr); @@ -4345,7 +4344,7 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr) { struct nfs4_exception exception = { .interruptible = true, @@ -4354,7 +4353,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, const struct qstr *name = &dentry->d_name; int err; do { - err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr, label); + err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr); trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: @@ -4390,13 +4389,12 @@ out: } static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, label); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -4411,7 +4409,7 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, struct rpc_clnt *client = NFS_CLIENT(dir); int status; - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, NULL); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); if (status < 0) return ERR_PTR(status); return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4c9d66fac3fd..960e2b2a7a58 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6171,7 +6171,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); + status = decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); out: return status; } @@ -6229,7 +6229,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, status = decode_getfh(xdr, res->fh); if (status == 0) status = decode_getfattr_label(xdr, res->fattr, - res->label, res->server); + res->fattr->label, res->server); out: return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ecc4e717808c..98a8901ede2e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -154,8 +154,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, static int nfs_proc_lookup(struct inode *dir, struct dentry *dentry, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_diropargs arg = { .fh = NFS_FH(dir), diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d55bf3fd5167..95219d5a8668 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1095,7 +1095,6 @@ struct nfs4_lookup_res { const struct nfs_server * server; struct nfs_fattr * fattr; struct nfs_fh * fh; - struct nfs4_label *label; }; struct nfs4_lookupp_arg { @@ -1740,8 +1739,7 @@ struct nfs_rpc_ops { int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); int (*lookup) (struct inode *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *, - struct nfs4_label *); + struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *); int (*access) (struct inode *, struct nfs_access_entry *); From ba4bc8dc4d937df2b407393435a302550be0ad82 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:05 -0400 Subject: [PATCH 327/433] NFS: Remove the nfs4_label from the nfs4_lookupp_res struct Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/export.c | 30 +++++++++--------------------- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 10 ++++------ fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 3 +-- 5 files changed, 16 insertions(+), 31 deletions(-) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index d772c20bbfd1..895b404888dd 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -131,7 +131,6 @@ nfs_get_parent(struct dentry *dentry) struct super_block *sb = inode->i_sb; struct nfs_server *server = NFS_SB(sb); struct nfs_fattr *fattr = NULL; - struct nfs4_label *label = NULL; struct dentry *parent; struct nfs_rpc_ops const *ops = server->nfs_client->rpc_ops; struct nfs_fh fh; @@ -139,31 +138,20 @@ nfs_get_parent(struct dentry *dentry) if (!ops->lookupp) return ERR_PTR(-EACCES); - fattr = nfs_alloc_fattr(); - if (fattr == NULL) { - parent = ERR_PTR(-ENOMEM); + fattr = nfs_alloc_fattr_with_label(server); + if (fattr == NULL) + return ERR_PTR(-ENOMEM); + + ret = ops->lookupp(inode, &fh, fattr); + if (ret) { + parent = ERR_PTR(ret); goto out; } - label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(label)) { - parent = ERR_CAST(label); - goto out_free_fattr; - } - - ret = ops->lookupp(inode, &fh, fattr, label); - if (ret) { - parent = ERR_PTR(ret); - goto out_free_label; - } - - pinode = nfs_fhget(sb, &fh, fattr, label); + pinode = nfs_fhget(sb, &fh, fattr, fattr->label); parent = d_obtain_alias(pinode); -out_free_label: - nfs4_label_free(label); -out_free_fattr: - nfs_free_fattr(fattr); out: + nfs_free_fattr(fattr); return parent; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 717eb651f0fd..516f3340b226 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -208,7 +208,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, } static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr) { const char dotdot[] = ".."; const size_t len = strlen(dotdot); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7af73fd34b22..caa5a1467f94 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4416,8 +4416,7 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, } static int _nfs4_proc_lookupp(struct inode *inode, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_server *server = NFS_SERVER(inode); @@ -4429,7 +4428,6 @@ static int _nfs4_proc_lookupp(struct inode *inode, struct nfs4_lookupp_res res = { .server = server, .fattr = fattr, - .label = label, .fh = fhandle, }; struct rpc_message msg = { @@ -4442,7 +4440,7 @@ static int _nfs4_proc_lookupp(struct inode *inode, if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) task_flags |= RPC_TASK_TIMEOUT; - args.bitmask = nfs4_bitmask(server, label); + args.bitmask = nfs4_bitmask(server, fattr->label); nfs_fattr_init(fattr); @@ -4454,14 +4452,14 @@ static int _nfs4_proc_lookupp(struct inode *inode, } static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_proc_lookupp(inode, fhandle, fattr, label); + err = _nfs4_proc_lookupp(inode, fhandle, fattr); trace_nfs4_lookupp(inode, err); err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 960e2b2a7a58..0044747f9314 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6201,7 +6201,7 @@ static int nfs4_xdr_dec_lookupp(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); + status = decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 95219d5a8668..f0a685d9b8bd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1108,7 +1108,6 @@ struct nfs4_lookupp_res { const struct nfs_server *server; struct nfs_fattr *fattr; struct nfs_fh *fh; - struct nfs4_label *label; }; struct nfs4_lookup_root_arg { @@ -1741,7 +1740,7 @@ struct nfs_rpc_ops { int (*lookup) (struct inode *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, - struct nfs_fattr *, struct nfs4_label *); + struct nfs_fattr *); int (*access) (struct inode *, struct nfs_access_entry *); int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); From 76baa2b29c7161bc65a3051d311297b7d7fc827a Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:06 -0400 Subject: [PATCH 328/433] NFS: Remove the f_label from the nfs4_opendata and nfs_openres Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 - fs/nfs/nfs4proc.c | 35 +++++++++++------------------------ fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 1 - 4 files changed, 12 insertions(+), 27 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ba78df4b13d9..b621e29e6187 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -234,7 +234,6 @@ struct nfs4_opendata { struct nfs4_string group_name; struct nfs4_label *a_label; struct nfs_fattr f_attr; - struct nfs4_label *f_label; struct dentry *dir; struct dentry *dentry; struct nfs4_state_owner *owner; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index caa5a1467f94..cb0613e0ef8f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1329,7 +1329,6 @@ nfs4_map_atomic_open_claim(struct nfs_server *server, static void nfs4_init_opendata_res(struct nfs4_opendata *p) { p->o_res.f_attr = &p->f_attr; - p->o_res.f_label = p->f_label; p->o_res.seqid = p->o_arg.seqid; p->c_res.seqid = p->c_arg.seqid; p->o_res.server = p->o_arg.server; @@ -1355,8 +1354,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, if (p == NULL) goto err; - p->f_label = nfs4_label_alloc(server, gfp_mask); - if (IS_ERR(p->f_label)) + p->f_attr.label = nfs4_label_alloc(server, gfp_mask); + if (IS_ERR(p->f_attr.label)) goto err_free_p; p->a_label = nfs4_label_alloc(server, gfp_mask); @@ -1434,7 +1433,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, err_free_label: nfs4_label_free(p->a_label); err_free_f: - nfs4_label_free(p->f_label); + nfs4_label_free(p->f_attr.label); err_free_p: kfree(p); err: @@ -1456,7 +1455,7 @@ static void nfs4_opendata_free(struct kref *kref) nfs4_put_state_owner(p->owner); nfs4_label_free(p->a_label); - nfs4_label_free(p->f_label); + nfs4_label_free(p->f_attr.label); dput(p->dir); dput(p->dentry); @@ -2009,7 +2008,7 @@ nfs4_opendata_get_inode(struct nfs4_opendata *data) if (!(data->f_attr.valid & NFS_ATTR_FATTR)) return ERR_PTR(-EAGAIN); inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, - &data->f_attr, data->f_label); + &data->f_attr, data->f_attr.label); break; default: inode = d_inode(data->dentry); @@ -2709,7 +2708,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data, if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { nfs4_sequence_free_slot(&o_res->seq_res); nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, - o_res->f_label, NULL); + o_res->f_attr->label, NULL); } return 0; } @@ -3125,7 +3124,6 @@ static int _nfs4_do_open(struct inode *dir, enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; struct iattr *sattr = c->sattr; struct nfs4_label *label = c->label; - struct nfs4_label *olabel = NULL; int status; /* Protect against reboot recovery conflicts */ @@ -3148,19 +3146,11 @@ static int _nfs4_do_open(struct inode *dir, if (opendata == NULL) goto err_put_state_owner; - if (label) { - olabel = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(olabel)) { - status = PTR_ERR(olabel); - goto err_opendata_put; - } - } - if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { if (!opendata->f_attr.mdsthreshold) { opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); if (!opendata->f_attr.mdsthreshold) - goto err_free_label; + goto err_opendata_put; } opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; } @@ -3169,7 +3159,7 @@ static int _nfs4_do_open(struct inode *dir, status = _nfs4_open_and_get_state(opendata, flags, ctx); if (status != 0) - goto err_free_label; + goto err_opendata_put; state = ctx->state; if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && @@ -3186,11 +3176,12 @@ static int _nfs4_do_open(struct inode *dir, nfs_fattr_init(opendata->o_res.f_attr); status = nfs4_do_setattr(state->inode, cred, opendata->o_res.f_attr, sattr, - ctx, label, olabel); + ctx, label, opendata->o_res.f_attr->label); if (status == 0) { nfs_setattr_update_inode(state->inode, sattr, opendata->o_res.f_attr); - nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); + nfs_setsecurity(state->inode, opendata->o_res.f_attr, + opendata->o_res.f_attr->label); } sattr->ia_valid = ia_old; } @@ -3203,13 +3194,9 @@ static int _nfs4_do_open(struct inode *dir, opendata->f_attr.mdsthreshold = NULL; } - nfs4_label_free(olabel); - nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); return 0; -err_free_label: - nfs4_label_free(olabel); err_opendata_put: nfs4_opendata_put(opendata); err_put_state_owner: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0044747f9314..09bd1d121318 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6524,7 +6524,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, goto out; if (res->access_request) decode_access(xdr, &res->access_supported, &res->access_result); - decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server); + decode_getfattr_label(xdr, res->f_attr, res->f_attr->label, res->server); if (res->lg_res) decode_layoutget(xdr, rqstp, res->lg_res); out: diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f0a685d9b8bd..cb28e01ea41e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -488,7 +488,6 @@ struct nfs_openres { struct nfs4_change_info cinfo; __u32 rflags; struct nfs_fattr * f_attr; - struct nfs4_label *f_label; struct nfs_seqid * seqid; const struct nfs_server *server; fmode_t delegation_type; From 2ef61e0eaa333e4e9c348c41a4b7abfb34b8736d Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:07 -0400 Subject: [PATCH 329/433] NFS: Remove the nfs4_label from the nfs4_getattr_res Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- fs/nfs/dir.c | 2 +- fs/nfs/export.c | 18 ++++-------------- fs/nfs/inode.c | 20 +++++--------------- fs/nfs/nfs3proc.c | 3 +-- fs/nfs/nfs4_fs.h | 3 +-- fs/nfs/nfs4file.c | 2 +- fs/nfs/nfs4proc.c | 25 +++++++++++-------------- fs/nfs/nfs4xdr.c | 2 +- fs/nfs/proc.c | 3 +-- include/linux/nfs_xdr.h | 4 +--- 11 files changed, 28 insertions(+), 56 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 960b9d87648e..1e4dc1ab9312 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1048,7 +1048,7 @@ struct nfs_server *nfs_create_server(struct fs_context *fc) if (!(fattr->valid & NFS_ATTR_FATTR)) { error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, - fattr, NULL, NULL); + fattr, NULL); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8a327971d485..aa95a898ad0f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2056,7 +2056,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); error = server->nfs_client->rpc_ops->getattr(server, fhandle, - fattr, NULL, NULL); + fattr, NULL); if (error < 0) goto out_error; } diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 895b404888dd..a0462f7e7e35 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -64,7 +64,6 @@ static struct dentry * nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - struct nfs4_label *label = NULL; struct nfs_fattr *fattr = NULL; struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw); size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size; @@ -79,7 +78,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, if (fh_len < len || fh_type != len) return NULL; - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SB(sb)); if (fattr == NULL) { dentry = ERR_PTR(-ENOMEM); goto out; @@ -95,28 +94,19 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, if (inode) goto out_found; - label = nfs4_label_alloc(NFS_SB(sb), GFP_KERNEL); - if (IS_ERR(label)) { - dentry = ERR_CAST(label); - goto out_free_fattr; - } - rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops; - ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL); + ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, NULL); if (ret) { dprintk("%s: getattr failed %d\n", __func__, ret); trace_nfs_fh_to_dentry(sb, server_fh, fattr->fileid, ret); dentry = ERR_PTR(ret); - goto out_free_label; + goto out_free_fattr; } - inode = nfs_fhget(sb, server_fh, fattr, label); + inode = nfs_fhget(sb, server_fh, fattr, fattr->label); out_found: dentry = d_obtain_alias(inode); - -out_free_label: - nfs4_label_free(label); out_free_fattr: nfs_free_fattr(fattr); out: diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 84c7efa2ea87..7d9dca781956 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1194,7 +1194,6 @@ int __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { int status = -ESTALE; - struct nfs4_label *label = NULL; struct nfs_fattr *fattr = NULL; struct nfs_inode *nfsi = NFS_I(inode); @@ -1216,20 +1215,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } status = -ENOMEM; - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) goto out; nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (IS_ERR(label)) { - status = PTR_ERR(label); - goto out; - } - - status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, - label, inode); + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, inode); if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, @@ -1246,7 +1238,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) else nfs_zap_caches(inode); } - goto err_out; + goto out; } status = nfs_refresh_inode(inode, fattr); @@ -1254,20 +1246,18 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), status); - goto err_out; + goto out; } if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr, fattr->label); dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode)); -err_out: - nfs4_label_free(label); out: nfs_free_fattr(fattr); trace_nfs_revalidate_inode_exit(inode, status); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 516f3340b226..7bae21a2ba05 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -100,8 +100,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode) + struct nfs_fattr *fattr, struct inode *inode) { struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b621e29e6187..ed5eaca6801e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -316,8 +316,7 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_lock_context *l_ctx, fmode_t fmode); extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode); + struct nfs_fattr *fattr, struct inode *inode); extern int update_open_stateid(struct nfs4_state *state, const nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 92a1b992a141..e2451f66024c 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -331,7 +331,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, if (!fattr) return ERR_PTR(-ENOMEM); - status = nfs4_proc_getattr(server, src_fh, fattr, NULL, NULL); + status = nfs4_proc_getattr(server, src_fh, fattr, NULL); if (status < 0) { res = ERR_PTR(status); goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cb0613e0ef8f..f0262397faec 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -93,7 +93,8 @@ struct nfs4_opendata; static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); -static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode); +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr, struct inode *inode); static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs_open_context *ctx, struct nfs4_label *ilabel, @@ -2707,8 +2708,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data, } if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { nfs4_sequence_free_slot(&o_res->seq_res); - nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, - o_res->f_attr->label, NULL); + nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, NULL); } return 0; } @@ -4090,7 +4090,6 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, { int error; struct nfs_fattr *fattr = info->fattr; - struct nfs4_label *label = fattr->label; error = nfs4_server_capabilities(server, mntfh); if (error < 0) { @@ -4098,7 +4097,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, return error; } - error = nfs4_proc_getattr(server, mntfh, fattr, label, NULL); + error = nfs4_proc_getattr(server, mntfh, fattr, NULL); if (error < 0) { dprintk("nfs4_get_root: getattr error = %d\n", -error); goto out; @@ -4161,8 +4160,7 @@ out: } static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode) + struct nfs_fattr *fattr, struct inode *inode) { __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_getattr_arg args = { @@ -4171,7 +4169,6 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, }; struct nfs4_getattr_res res = { .fattr = fattr, - .label = label, .server = server, }; struct rpc_message msg = { @@ -4188,7 +4185,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) task_flags |= RPC_TASK_TIMEOUT; - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode, 0); + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), inode, 0); nfs_fattr_init(fattr); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); return nfs4_do_call_sync(server->client, server, &msg, @@ -4196,15 +4193,14 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, } int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode) + struct nfs_fattr *fattr, struct inode *inode) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode); + err = _nfs4_proc_getattr(server, fhandle, fattr, inode); trace_nfs4_getattr(server, fhandle, fattr, err); err = nfs4_handle_exception(server, err, &exception); @@ -5972,17 +5968,18 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_fattr fattr; struct nfs4_label label = {0, 0, buflen, buf}; u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; + struct nfs_fattr fattr = { + .label = &label, + }; struct nfs4_getattr_arg arg = { .fh = NFS_FH(inode), .bitmask = bitmask, }; struct nfs4_getattr_res res = { .fattr = &fattr, - .label = &label, .server = server, }; struct rpc_message msg = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 09bd1d121318..d1b61b76bc82 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6386,7 +6386,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); + status = decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); out: return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 98a8901ede2e..baee21c2c091 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -100,8 +100,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode) + struct nfs_fattr *fattr, struct inode *inode) { struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cb28e01ea41e..817f1bf5f187 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1063,7 +1063,6 @@ struct nfs4_getattr_res { struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; - struct nfs4_label *label; }; struct nfs4_link_arg { @@ -1732,8 +1731,7 @@ struct nfs_rpc_ops { int (*submount) (struct fs_context *, struct nfs_server *); int (*try_get_tree) (struct fs_context *); int (*getattr) (struct nfs_server *, struct nfs_fh *, - struct nfs_fattr *, struct nfs4_label *, - struct inode *); + struct nfs_fattr *, struct inode *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); int (*lookup) (struct inode *, struct dentry *, From 1b00ad657997c8984a9e627a3bd37ea14f20beb2 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:08 -0400 Subject: [PATCH 330/433] NFS: Remove the nfs4_label from the nfs_setattrres Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- fs/nfs/nfs4proc.c | 56 ++++++++++++----------------------------- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 1 - 4 files changed, 18 insertions(+), 43 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7d9dca781956..df109287f2e0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -650,7 +650,7 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (S_ISREG(inode->i_mode)) nfs_sync_inode(inode); - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) { error = -ENOMEM; goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f0262397faec..a0762ecc0c73 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -97,8 +97,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct inode *inode); static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs_open_context *ctx, struct nfs4_label *ilabel, - struct nfs4_label *olabel); + struct nfs_open_context *ctx, struct nfs4_label *ilabel); #ifdef CONFIG_NFS_V4_1 static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, const struct cred *cred, @@ -3176,7 +3175,7 @@ static int _nfs4_do_open(struct inode *dir, nfs_fattr_init(opendata->o_res.f_attr); status = nfs4_do_setattr(state->inode, cred, opendata->o_res.f_attr, sattr, - ctx, label, opendata->o_res.f_attr->label); + ctx, label); if (status == 0) { nfs_setattr_update_inode(state->inode, sattr, opendata->o_res.f_attr); @@ -3341,8 +3340,7 @@ zero_stateid: static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs_open_context *ctx, struct nfs4_label *ilabel, - struct nfs4_label *olabel) + struct nfs_open_context *ctx, struct nfs4_label *ilabel) { struct nfs_server *server = NFS_SERVER(inode); __u32 bitmask[NFS4_BITMASK_SZ]; @@ -3356,7 +3354,6 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, }; struct nfs_setattrres res = { .fattr = fattr, - .label = olabel, .server = server, }; struct nfs4_exception exception = { @@ -3373,7 +3370,7 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, adjust_flags |= NFS_INO_INVALID_OTHER; do { - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, olabel), + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), inode, adjust_flags); err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); @@ -4232,7 +4229,6 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct inode *inode = d_inode(dentry); const struct cred *cred = NULL; struct nfs_open_context *ctx = NULL; - struct nfs4_label *label = NULL; int status; if (pnfs_ld_layoutret_on_setattr(inode) && @@ -4258,20 +4254,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, cred = ctx->cred; } - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (IS_ERR(label)) - return PTR_ERR(label); - /* Return any delegations if we're going to change ACLs */ if ((sattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) nfs4_inode_make_writeable(inode); - status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label); + status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL); if (status == 0) { nfs_setattr_update_inode(inode, sattr, fattr); - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr, fattr->label); } - nfs4_label_free(label); return status; } @@ -6021,8 +6012,7 @@ static int nfs4_get_security_label(struct inode *inode, void *buf, static int _nfs4_do_set_security_label(struct inode *inode, struct nfs4_label *ilabel, - struct nfs_fattr *fattr, - struct nfs4_label *olabel) + struct nfs_fattr *fattr) { struct iattr sattr = {0}; @@ -6037,7 +6027,6 @@ static int _nfs4_do_set_security_label(struct inode *inode, }; struct nfs_setattrres res = { .fattr = fattr, - .label = olabel, .server = server, }; struct rpc_message msg = { @@ -6058,15 +6047,13 @@ static int _nfs4_do_set_security_label(struct inode *inode, static int nfs4_do_set_security_label(struct inode *inode, struct nfs4_label *ilabel, - struct nfs_fattr *fattr, - struct nfs4_label *olabel) + struct nfs_fattr *fattr) { struct nfs4_exception exception = { }; int err; do { - err = _nfs4_do_set_security_label(inode, ilabel, - fattr, olabel); + err = _nfs4_do_set_security_label(inode, ilabel, fattr); trace_nfs4_set_security_label(inode, err); err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); @@ -6077,32 +6064,21 @@ static int nfs4_do_set_security_label(struct inode *inode, static int nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) { - struct nfs4_label ilabel, *olabel = NULL; - struct nfs_fattr fattr; + struct nfs4_label ilabel = {0, 0, buflen, (char *)buf }; + struct nfs_fattr *fattr; int status; if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) return -EOPNOTSUPP; - nfs_fattr_init(&fattr); + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); + if (fattr == NULL) + return -ENOMEM; - ilabel.pi = 0; - ilabel.lfs = 0; - ilabel.label = (char *)buf; - ilabel.len = buflen; - - olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (IS_ERR(olabel)) { - status = -PTR_ERR(olabel); - goto out; - } - - status = nfs4_do_set_security_label(inode, &ilabel, &fattr, olabel); + status = nfs4_do_set_security_label(inode, &ilabel, fattr); if (status == 0) - nfs_setsecurity(inode, &fattr, olabel); + nfs_setsecurity(inode, fattr, fattr->label); - nfs4_label_free(olabel); -out: return status; } #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d1b61b76bc82..e3df7ada5988 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6608,7 +6608,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, status = decode_setattr(xdr); if (status) goto out; - decode_getfattr_label(xdr, res->fattr, res->label, res->server); + decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 817f1bf5f187..967a0098f0a9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -832,7 +832,6 @@ struct nfs_getaclres { struct nfs_setattrres { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - struct nfs4_label *label; const struct nfs_server * server; }; From d91bfc46426d3d772fc0d9d165e3435fd0f0a79e Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:09 -0400 Subject: [PATCH 331/433] NFS: Remove the nfs4_label argument from nfs_instantiate() Pull the label from the fattr instead. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 5 ++--- fs/nfs/nfs4proc.c | 2 +- fs/nfs/proc.c | 8 ++++---- include/linux/nfs_fs.h | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index aa95a898ad0f..48ea69af9446 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2075,12 +2075,11 @@ EXPORT_SYMBOL_GPL(nfs_add_or_obtain); * Code common to create, mkdir, and mknod. */ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fattr *fattr) { struct dentry *d; - d = nfs_add_or_obtain(dentry, fhandle, fattr, label); + d = nfs_add_or_obtain(dentry, fhandle, fattr, fattr->label); if (IS_ERR(d)) return PTR_ERR(d); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a0762ecc0c73..4d09c81502cf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4859,7 +4859,7 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ data->res.fattr->time_start, NFS_INO_INVALID_DATA); spin_unlock(&dir->i_lock); - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.fattr->label); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); } return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index baee21c2c091..73dcaa99fa9b 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -255,7 +255,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); nfs_free_createdata(data); out: dprintk("NFS reply create: %d\n", status); @@ -302,7 +302,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); } if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); nfs_free_createdata(data); out: dprintk("NFS reply mknod: %d\n", status); @@ -434,7 +434,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, * should fill in the data with a LOOKUP call on the wire. */ if (status == 0) - status = nfs_instantiate(dentry, fh, fattr, NULL); + status = nfs_instantiate(dentry, fh, fattr); out_free: nfs_free_fattr(fattr); @@ -463,7 +463,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); nfs_free_createdata(data); out: dprintk("NFS reply mkdir: %d\n", status); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 88c3aed8ad39..a8a9b71aeea6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -532,7 +532,7 @@ extern struct dentry *nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label); extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr, struct nfs4_label *label); + struct nfs_fattr *fattr); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, From cc6f32989c3202349b90edde0c4702b098410fe8 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:10 -0400 Subject: [PATCH 332/433] NFS: Remove the nfs4_label argument from nfs_add_or_obtain() Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 7 +++---- fs/nfs/nfs3proc.c | 2 +- include/linux/nfs_fs.h | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 48ea69af9446..1c74f9d2f3a1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2036,8 +2036,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct dentry * nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fattr *fattr) { struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); @@ -2060,7 +2059,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, if (error < 0) goto out_error; } - inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); + inode = nfs_fhget(dentry->d_sb, fhandle, fattr, fattr->label); d = d_splice_alias(inode, dentry); out: dput(parent); @@ -2079,7 +2078,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, { struct dentry *d; - d = nfs_add_or_obtain(dentry, fhandle, fattr, fattr->label); + d = nfs_add_or_obtain(dentry, fhandle, fattr); if (IS_ERR(d)) return PTR_ERR(d); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7bae21a2ba05..7100514d306b 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -321,7 +321,7 @@ nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata if (status != 0) return ERR_PTR(status); - return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr, NULL); + return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); } static void nfs3_free_createdata(struct nfs3_createdata *data) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a8a9b71aeea6..6eda001b306b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -529,8 +529,7 @@ extern void nfs_set_verifier(struct dentry * dentry, unsigned long verf); extern void nfs_clear_verifier_delegated(struct inode *inode); #endif /* IS_ENABLED(CONFIG_NFS_V4) */ extern struct dentry *nfs_add_or_obtain(struct dentry *dentry, - struct nfs_fh *fh, struct nfs_fattr *fattr, - struct nfs4_label *label); + struct nfs_fh *fh, struct nfs_fattr *fattr); extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); From cf7ab00aabbf9c8f1ec72edff15849ddc23aa6a7 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:11 -0400 Subject: [PATCH 333/433] NFS: Remove the nfs4_label argument from nfs_fhget() Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 6 +++--- fs/nfs/export.c | 4 ++-- fs/nfs/getroot.c | 2 +- fs/nfs/inode.c | 4 ++-- fs/nfs/nfs4file.c | 3 +-- fs/nfs/nfs4proc.c | 2 +- include/linux/nfs_fs.h | 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1c74f9d2f3a1..bd89f39e8ba9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -697,7 +697,7 @@ again: goto out; } - inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->fattr->label); + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); alias = d_splice_alias(inode, dentry); d_lookup_done(dentry); if (alias) { @@ -1782,7 +1782,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in res = ERR_PTR(error); goto out; } - inode = nfs_fhget(dentry->d_sb, fhandle, fattr, fattr->label); + inode = nfs_fhget(dentry->d_sb, fhandle, fattr); res = ERR_CAST(inode); if (IS_ERR(res)) goto out; @@ -2059,7 +2059,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, if (error < 0) goto out_error; } - inode = nfs_fhget(dentry->d_sb, fhandle, fattr, fattr->label); + inode = nfs_fhget(dentry->d_sb, fhandle, fattr); d = d_splice_alias(inode, dentry); out: dput(parent); diff --git a/fs/nfs/export.c b/fs/nfs/export.c index a0462f7e7e35..171c424cb6d5 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -103,7 +103,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, goto out_free_fattr; } - inode = nfs_fhget(sb, server_fh, fattr, fattr->label); + inode = nfs_fhget(sb, server_fh, fattr); out_found: dentry = d_obtain_alias(inode); @@ -138,7 +138,7 @@ nfs_get_parent(struct dentry *dentry) goto out; } - pinode = nfs_fhget(sb, &fh, fattr, fattr->label); + pinode = nfs_fhget(sb, &fh, fattr); parent = d_obtain_alias(pinode); out: nfs_free_fattr(fattr); diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 7604cb6a0ac2..0aedee201166 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -91,7 +91,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) goto out_fattr; } - inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL); + inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); error = PTR_ERR(inode); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index df109287f2e0..be28f0251dee 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -448,7 +448,7 @@ static void nfs_inode_init_dir(struct nfs_inode *nfsi) * instead of inode number. */ struct inode * -nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label) +nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) { struct nfs_find_desc desc = { .fh = fh, @@ -581,7 +581,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st fattr->size != 0) nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr, fattr->label); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e2451f66024c..e79ae4cbc395 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -344,8 +344,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out; snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++); - r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr, - NULL); + r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr); if (IS_ERR(r_ino)) { res = ERR_CAST(r_ino); goto out_free_name; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4d09c81502cf..b25adb2250ef 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2008,7 +2008,7 @@ nfs4_opendata_get_inode(struct nfs4_opendata *data) if (!(data->f_attr.valid & NFS_ATTR_FATTR)) return ERR_PTR(-EAGAIN); inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, - &data->f_attr, data->f_attr.label); + &data->f_attr); break; default: inode = d_inode(data->dentry); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6eda001b306b..c36c6a559fc9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -388,7 +388,7 @@ extern void nfs_zap_caches(struct inode *); extern void nfs_set_inode_stale(struct inode *inode); extern void nfs_invalidate_atime(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, - struct nfs_fattr *, struct nfs4_label *); + struct nfs_fattr *); struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *); extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); From dd225cb3b02b827271a2284f89102fc81efcbf6f Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:12 -0400 Subject: [PATCH 334/433] NFS: Remove the nfs4_label argument from nfs_setsecurity Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 5 ++--- fs/nfs/getroot.c | 2 +- fs/nfs/inode.c | 20 +++++++++----------- fs/nfs/nfs4proc.c | 9 ++++----- include/linux/nfs_fs.h | 3 +-- 5 files changed, 17 insertions(+), 22 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bd89f39e8ba9..731d31015b6a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -682,8 +682,7 @@ again: nfs_set_verifier(dentry, dir_verifier); status = nfs_refresh_inode(d_inode(dentry), entry->fattr); if (!status) - nfs_setsecurity(d_inode(dentry), entry->fattr, - entry->fattr->label); + nfs_setsecurity(d_inode(dentry), entry->fattr); goto out; } else { d_invalidate(dentry); @@ -1523,7 +1522,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, if (nfs_refresh_inode(inode, fattr) < 0) goto out; - nfs_setsecurity(inode, fattr, fattr->label); + nfs_setsecurity(inode, fattr); nfs_set_verifier(dentry, dir_verifier); /* set a readdirplus hint that we had a cache miss */ diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 0aedee201166..11ff2b2e060f 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -148,7 +148,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) server->caps &= ~NFS_CAP_SECURITY_LABEL; - nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label); + nfs_setsecurity(inode, fsinfo.fattr); error = 0; out_fattr: diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index be28f0251dee..dd53704c3f40 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -355,23 +355,22 @@ static void nfs_clear_label_invalid(struct inode *inode) spin_unlock(&inode->i_lock); } -void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, - struct nfs4_label *label) +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr) { int error; - if (label == NULL) + if (fattr->label == NULL) return; if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { - error = security_inode_notifysecctx(inode, label->label, - label->len); + error = security_inode_notifysecctx(inode, fattr->label->label, + fattr->label->len); if (error) printk(KERN_ERR "%s() %s %d " "security_inode_notifysecctx() %d\n", __func__, - (char *)label->label, - label->len, error); + (char *)fattr->label->label, + fattr->label->len, error); nfs_clear_label_invalid(inode); } } @@ -398,8 +397,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) } EXPORT_SYMBOL_GPL(nfs4_label_alloc); #else -void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, - struct nfs4_label *label) +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr) { } #endif @@ -581,7 +579,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) fattr->size != 0) nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); - nfs_setsecurity(inode, fattr, fattr->label); + nfs_setsecurity(inode, fattr); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; @@ -1252,7 +1250,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - nfs_setsecurity(inode, fattr, fattr->label); + nfs_setsecurity(inode, fattr); dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n", inode->i_sb->s_id, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b25adb2250ef..535436dbdc9a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3179,8 +3179,7 @@ static int _nfs4_do_open(struct inode *dir, if (status == 0) { nfs_setattr_update_inode(state->inode, sattr, opendata->o_res.f_attr); - nfs_setsecurity(state->inode, opendata->o_res.f_attr, - opendata->o_res.f_attr->label); + nfs_setsecurity(state->inode, opendata->o_res.f_attr); } sattr->ia_valid = ia_old; } @@ -4261,7 +4260,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL); if (status == 0) { nfs_setattr_update_inode(inode, sattr, fattr); - nfs_setsecurity(inode, fattr, fattr->label); + nfs_setsecurity(inode, fattr); } return status; } @@ -4782,7 +4781,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct nfs4_inc_nlink(inode); status = nfs_post_op_update_inode(inode, res.fattr); if (!status) - nfs_setsecurity(inode, res.fattr, res.fattr->label); + nfs_setsecurity(inode, res.fattr); } out: @@ -6077,7 +6076,7 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) status = nfs4_do_set_security_label(inode, &ilabel, fattr); if (status == 0) - nfs_setsecurity(inode, fattr, fattr->label); + nfs_setsecurity(inode, fattr); return status; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c36c6a559fc9..05f249f20f55 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -409,8 +409,7 @@ extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *map extern int nfs_revalidate_mapping_rcu(struct inode *inode); extern int nfs_setattr(struct user_namespace *, struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *); -extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, - struct nfs4_label *label); +extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct cred *cred, fmode_t mode); From 1e2f67da89310c3b879c4e18d0d1d57e9d901745 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 22 Oct 2021 13:11:13 -0400 Subject: [PATCH 335/433] NFS: Remove the nfs4_label argument from decode_getattr_*() functions Wa can check if the fattr has an allocated label when needed Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e3df7ada5988..69862bf6db00 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4585,8 +4585,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fattr *fattr, struct nfs_fh *fh, - struct nfs4_fs_locations *fs_loc, struct nfs4_label *label, - const struct nfs_server *server) + struct nfs4_fs_locations *fs_loc, const struct nfs_server *server) { int status; umode_t fmode = 0; @@ -4701,8 +4700,8 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, if (status < 0) goto xdr_error; - if (label) { - status = decode_attr_security_label(xdr, bitmap, label); + if (fattr->label) { + status = decode_attr_security_label(xdr, bitmap, fattr->label); if (status < 0) goto xdr_error; fattr->valid |= status; @@ -4715,7 +4714,7 @@ xdr_error: static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, - struct nfs4_label *label, const struct nfs_server *server) + const struct nfs_server *server) { unsigned int savep; uint32_t attrlen, @@ -4734,8 +4733,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat if (status < 0) goto xdr_error; - status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, - label, server); + status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server); if (status < 0) goto xdr_error; @@ -4745,16 +4743,10 @@ xdr_error: return status; } -static int decode_getfattr_label(struct xdr_stream *xdr, struct nfs_fattr *fattr, - struct nfs4_label *label, const struct nfs_server *server) -{ - return decode_getfattr_generic(xdr, fattr, NULL, NULL, label, server); -} - static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) { - return decode_getfattr_generic(xdr, fattr, NULL, NULL, NULL, server); + return decode_getfattr_generic(xdr, fattr, NULL, NULL, server); } /* @@ -6171,7 +6163,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - status = decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6201,7 +6193,7 @@ static int nfs4_xdr_dec_lookupp(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - status = decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6228,8 +6220,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, goto out; status = decode_getfh(xdr, res->fh); if (status == 0) - status = decode_getfattr_label(xdr, res->fattr, - res->fattr->label, res->server); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6323,7 +6314,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_restorefh(xdr); if (status) goto out; - decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6353,7 +6344,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6386,7 +6377,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6524,7 +6515,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, goto out; if (res->access_request) decode_access(xdr, &res->access_supported, &res->access_result); - decode_getfattr_label(xdr, res->f_attr, res->f_attr->label, res->server); + decode_getfattr(xdr, res->f_attr, res->server); if (res->lg_res) decode_layoutget(xdr, rqstp, res->lg_res); out: @@ -6608,7 +6599,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, status = decode_setattr(xdr); if (status) goto out; - decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -7023,7 +7014,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, NULL, res->fs_locations, - NULL, res->fs_locations->server); + res->fs_locations->server); if (status) goto out; if (res->renew) @@ -7036,7 +7027,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, NULL, res->fs_locations, - NULL, res->fs_locations->server); + res->fs_locations->server); } out: return status; @@ -7467,7 +7458,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return -EAGAIN; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, - NULL, entry->fattr->label, entry->server) < 0) + NULL, entry->server) < 0) return -EAGAIN; if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) entry->ino = entry->fattr->mounted_on_fileid; From 16e28abb7290c4ca3b3a0f333ba067f34bb18c86 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Nov 2021 08:00:19 +0100 Subject: [PATCH 336/433] Input: i8042 - Add quirk for Fujitsu Lifebook T725 Fujitsu Lifebook T725 laptop requires, like a few other similar models, the nomux and notimeout options to probe the touchpad properly. This patch adds the corresponding quirk entries. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1191980 Tested-by: Neal Gompa Cc: Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20211103070019.13374-1-tiwai@suse.de Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index a5a003553646..aedd05541044 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -272,6 +272,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook S6230"), }, }, + { + /* Fujitsu Lifebook T725 laptop */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T725"), + }, + }, { /* Fujitsu Lifebook U745 */ .matches = { @@ -840,6 +847,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"), }, }, + { + /* Fujitsu Lifebook T725 laptop */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T725"), + }, + }, { /* Fujitsu U574 laptop */ /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */ From 6e4860410b828f8576c0c003d412fcf8a7d433f9 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 29 Oct 2021 15:42:51 +0200 Subject: [PATCH 337/433] Input: synaptics-rmi4 - Fix device hierarchy The created rmi device is orphan, which breaks the real device hierarchy, and can cause some trouble, especially during suspend and resume sequences. E.g. in case of I2C, rmi dev should be child of the I2C client device. Fix this, assigning the transport device as parent of the rmi device. Signed-off-by: Loic Poulain Link: https://lore.kernel.org/r/1635514971-18415-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_bus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c index 24f31a5c0e04..50a0134b6901 100644 --- a/drivers/input/rmi4/rmi_bus.c +++ b/drivers/input/rmi4/rmi_bus.c @@ -90,6 +90,7 @@ int rmi_register_transport_device(struct rmi_transport_dev *xport) rmi_dev->dev.bus = &rmi_bus_type; rmi_dev->dev.type = &rmi_device_type; + rmi_dev->dev.parent = xport->dev; xport->rmi_dev = rmi_dev; From 70bf363d7adb3a428773bc905011d0ff923ba747 Mon Sep 17 00:00:00 2001 From: Nghia Le Date: Thu, 4 Nov 2021 21:37:40 +0700 Subject: [PATCH 338/433] ipv6: remove useless assignment to newinet in tcp_v6_syn_recv_sock() The newinet value is initialized with inet_sk() in a block code to handle sockets for the ETH_P_IP protocol. Along this code path, newinet is never read. Thus, assignment to newinet is needless and can be removed. Signed-off-by: Nghia Le Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20211104143740.32446-1-nghialm78@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/tcp_ipv6.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2cc9b0e53ad1..551fce49841d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1263,7 +1263,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); - newinet = inet_sk(newsk); newnp = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); From e1959faf085b004e6c3afaaaa743381f00e7c015 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 5 Nov 2021 18:00:36 +0200 Subject: [PATCH 339/433] xhci: Fix USB 3.1 enumeration issues by increasing roothub power-on-good delay Some USB 3.1 enumeration issues were reported after the hub driver removed the minimum 100ms limit for the power-on-good delay. Since commit 90d28fb53d4a ("usb: core: reduce power-on-good delay time of root hub") the hub driver sets the power-on-delay based on the bPwrOn2PwrGood value in the hub descriptor. xhci driver has a 20ms bPwrOn2PwrGood value for both roothubs based on xhci spec section 5.4.8, but it's clearly not enough for the USB 3.1 devices, causing enumeration issues. Tests indicate full 100ms delay is needed. Reported-by: Walt Jr. Brake Signed-off-by: Mathias Nyman Fixes: 90d28fb53d4a ("usb: core: reduce power-on-good delay time of root hub") Cc: stable Link: https://lore.kernel.org/r/20211105160036.549516-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index a3f875eea751..af946c42b6f0 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -257,7 +257,6 @@ static void xhci_common_hub_descriptor(struct xhci_hcd *xhci, { u16 temp; - desc->bPwrOn2PwrGood = 10; /* xhci section 5.4.9 says 20ms max */ desc->bHubContrCurrent = 0; desc->bNbrPorts = ports; @@ -292,6 +291,7 @@ static void xhci_usb2_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci, desc->bDescriptorType = USB_DT_HUB; temp = 1 + (ports / 8); desc->bDescLength = USB_DT_HUB_NONVAR_SIZE + 2 * temp; + desc->bPwrOn2PwrGood = 10; /* xhci section 5.4.8 says 20ms */ /* The Device Removable bits are reported on a byte granularity. * If the port doesn't exist within that byte, the bit is set to 0. @@ -344,6 +344,7 @@ static void xhci_usb3_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci, xhci_common_hub_descriptor(xhci, desc, ports); desc->bDescriptorType = USB_DT_SS_HUB; desc->bDescLength = USB_DT_SS_HUB_SIZE; + desc->bPwrOn2PwrGood = 50; /* usb 3.1 may fail if less than 100ms */ /* header decode latency should be zero for roothubs, * see section 4.23.5.2. From c0f49d98006f2db3333b917caac65bce2af9865c Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 28 Oct 2021 22:38:25 +0800 Subject: [PATCH 340/433] can: j1939: j1939_tp_cmd_recv(): ignore abort message in the BAM transport This patch prevents BAM transport from being closed by receiving abort message, as specified in SAE-J1939-82 2015 (A.3.3 Row 4). Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/all/1635431907-15617-2-git-send-email-zhangchangzhong@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Zhang Changzhong Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 6c0a0ebdd024..05eb3d059e17 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -2085,6 +2085,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) break; case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ + if (j1939_cb_is_broadcast(skcb)) { + netdev_err_once(priv->ndev, "%s: abort to broadcast (%02x), ignoring!\n", + __func__, skcb->addr.sa); + return; + } + if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_abort(priv, skb, true); From a79305e156db3d24fcd8eb649cdb3c3b2350e5c2 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 28 Oct 2021 22:38:26 +0800 Subject: [PATCH 341/433] can: j1939: j1939_can_recv(): ignore messages with invalid source address According to SAE-J1939-82 2015 (A.3.6 Row 2), a receiver should never send TP.CM_CTS to the global address, so we can add a check in j1939_can_recv() to drop messages with invalid source address. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/all/1635431907-15617-3-git-send-email-zhangchangzhong@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Zhang Changzhong Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 9bc55ecb37f9..8452b0fbb78c 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -75,6 +75,13 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX; /* set default message type */ skcb->addr.type = J1939_TP; + + if (!j1939_address_is_valid(skcb->addr.sa)) { + netdev_err_once(priv->ndev, "%s: sa is broadcast address, ignoring!\n", + __func__); + goto done; + } + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) { /* Type 1: with destination address */ skcb->addr.da = skcb->addr.pgn; From 164051a6ab5445bd97f719f50b16db8b32174269 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 28 Oct 2021 22:38:27 +0800 Subject: [PATCH 342/433] can: j1939: j1939_tp_cmd_recv(): check the dst address of TP.CM_BAM The TP.CM_BAM message must be sent to the global address [1], so add a check to drop TP.CM_BAM sent to a non-global address. Without this patch, the receiver will treat the following packets as normal RTS/CTS transport: 18EC0102#20090002FF002301 18EB0102#0100000000000000 18EB0102#020000FFFFFFFFFF [1] SAE-J1939-82 2015 A.3.3 Row 1. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/all/1635431907-15617-4-git-send-email-zhangchangzhong@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Zhang Changzhong Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 05eb3d059e17..a271688780a2 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -2023,6 +2023,11 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) extd = J1939_ETP; fallthrough; case J1939_TP_CMD_BAM: + if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) { + netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n", + __func__, skcb->addr.sa); + return; + } fallthrough; case J1939_TP_CMD_RTS: if (skcb->addr.type != extd) From d9447f768bc8c60623e4bb3ce65b8f4654d33a50 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 27 Oct 2021 03:07:40 +0900 Subject: [PATCH 343/433] can: etas_es58x: es58x_rx_err_msg(): fix memory leak in error path In es58x_rx_err_msg(), if can->do_set_mode() fails, the function directly returns without calling netif_rx(skb). This means that the skb previously allocated by alloc_can_err_skb() is not freed. In other terms, this is a memory leak. This patch simply removes the return statement in the error branch and let the function continue. Issue was found with GCC -fanalyzer, please follow the link below for details. Fixes: 8537257874e9 ("can: etas_es58x: add core support for ETAS ES58X CAN USB interfaces") Link: https://lore.kernel.org/all/20211026180740.1953265-1-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/etas_es58x/es58x_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 96a13c770e4a..24627ab14626 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -664,7 +664,7 @@ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error, struct can_device_stats *can_stats = &can->can_stats; struct can_frame *cf = NULL; struct sk_buff *skb; - int ret; + int ret = 0; if (!netif_running(netdev)) { if (net_ratelimit()) @@ -823,8 +823,6 @@ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error, can->state = CAN_STATE_BUS_OFF; can_bus_off(netdev); ret = can->do_set_mode(netdev, CAN_MODE_STOP); - if (ret) - return ret; } break; @@ -881,7 +879,7 @@ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error, ES58X_EVENT_BUSOFF, timestamp); } - return 0; + return ret; } /** From 3f1c7aa28498e52a5e6aa2f1b89bf35c63352cfd Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 21 Oct 2021 10:15:04 +0200 Subject: [PATCH 344/433] can: peak_usb: always ask for BERR reporting for PCAN-USB devices Since for the PCAN-USB, the management of the transition to the ERROR_WARNING or ERROR_PASSIVE state is done according to the error counters, these must be requested unconditionally. Link: https://lore.kernel.org/all/20211021081505.18223-2-s.grosjean@peak-system.com Fixes: c11dcee75830 ("can: peak_usb: pcan_usb_decode_error(): upgrade handling of bus state changes") Cc: stable@vger.kernel.org Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 837b3fecd71e..af8d3dadbbb8 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -841,14 +841,14 @@ static int pcan_usb_start(struct peak_usb_device *dev) pdev->bec.rxerr = 0; pdev->bec.txerr = 0; - /* be notified on error counter changes (if requested by user) */ - if (dev->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) { - err = pcan_usb_set_err_frame(dev, PCAN_USB_BERR_MASK); - if (err) - netdev_warn(dev->netdev, - "Asking for BERR reporting error %u\n", - err); - } + /* always ask the device for BERR reporting, to be able to switch from + * WARNING to PASSIVE state + */ + err = pcan_usb_set_err_frame(dev, PCAN_USB_BERR_MASK); + if (err) + netdev_warn(dev->netdev, + "Asking for BERR reporting error %u\n", + err); /* if revision greater than 3, can put silent mode on/off */ if (dev->device_rev > 3) { @@ -986,7 +986,6 @@ const struct peak_usb_adapter pcan_usb = { .device_id = PCAN_USB_PRODUCT_ID, .ctrl_count = 1, .ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | - CAN_CTRLMODE_BERR_REPORTING | CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_USB_CRYSTAL_HZ / 2, From 6b78ba3e51f9a2fa5b48eef959acc8b6f02cbf1f Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 21 Oct 2021 10:15:05 +0200 Subject: [PATCH 345/433] can: peak_usb: exchange the order of information messages Proposes the possible update of the PCAN-USB firmware after indicating its name and current version. Link: https://lore.kernel.org/all/20211021081505.18223-3-s.grosjean@peak-system.com Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index af8d3dadbbb8..876218752766 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -883,6 +883,11 @@ static int pcan_usb_init(struct peak_usb_device *dev) return err; } + dev_info(dev->netdev->dev.parent, + "PEAK-System %s adapter hwrev %u serial %08X (%u channel)\n", + pcan_usb.name, dev->device_rev, serial_number, + pcan_usb.ctrl_count); + /* Since rev 4.1, PCAN-USB is able to make single-shot as well as * looped back frames. */ @@ -896,11 +901,6 @@ static int pcan_usb_init(struct peak_usb_device *dev) "Firmware update available. Please contact support@peak-system.com\n"); } - dev_info(dev->netdev->dev.parent, - "PEAK-System %s adapter hwrev %u serial %08X (%u channel)\n", - pcan_usb.name, dev->device_rev, serial_number, - pcan_usb.ctrl_count); - return 0; } From 3990ed4c426652fcd469f8c9dc08156294b36c28 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 5 Nov 2021 18:40:14 -0700 Subject: [PATCH 346/433] bpf: Stop caching subprog index in the bpf_pseudo_func insn This patch is to fix an out-of-bound access issue when jit-ing the bpf_pseudo_func insn (i.e. ld_imm64 with src_reg == BPF_PSEUDO_FUNC) In jit_subprog(), it currently reuses the subprog index cached in insn[1].imm. This subprog index is an index into a few array related to subprogs. For example, in jit_subprog(), it is an index to the newly allocated 'struct bpf_prog **func' array. The subprog index was cached in insn[1].imm after add_subprog(). However, this could become outdated (and too big in this case) if some subprogs are completely removed during dead code elimination (in adjust_subprog_starts_after_remove). The cached index in insn[1].imm is not updated accordingly and causing out-of-bound issue in the later jit_subprog(). Unlike bpf_pseudo_'func' insn, the current bpf_pseudo_'call' insn is handling the DCE properly by calling find_subprog(insn->imm) to figure out the index instead of caching the subprog index. The existing bpf_adj_branches() will adjust the insn->imm whenever insn is added or removed. Instead of having two ways handling subprog index, this patch is to make bpf_pseudo_func works more like bpf_pseudo_call. First change is to stop caching the subprog index result in insn[1].imm after add_subprog(). The verification process will use find_subprog(insn->imm) to figure out the subprog index. Second change is in bpf_adj_branches() and have it to adjust the insn->imm for the bpf_pseudo_func insn also whenever insn is added or removed. Third change is in jit_subprog(). Like the bpf_pseudo_call handling, bpf_pseudo_func temporarily stores the find_subprog() result in insn->off. It is fine because the prog's insn has been finalized at this point. insn->off will be reset back to 0 later to avoid confusing the userspace prog dump tool. Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211106014014.651018-1-kafai@fb.com --- include/linux/bpf.h | 6 ++++++ kernel/bpf/core.c | 7 +++++++ kernel/bpf/verifier.c | 37 ++++++++++++++----------------------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2be6dfd68df9..f715e8863f4d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -484,6 +484,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static inline bool bpf_pseudo_func(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; +} + struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 327e3996eadb..2405e39d800f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -390,6 +390,13 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, i = end_new; insn = prog->insnsi + end_old; } + if (bpf_pseudo_func(insn)) { + ret = bpf_adj_delta_to_imm(insn, pos, end_old, + end_new, i, probe_pass); + if (ret) + return ret; + continue; + } code = insn->code; if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5f8d9128860a..890b3ec375a3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -240,12 +240,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_KFUNC_CALL; } -static bool bpf_pseudo_func(const struct bpf_insn *insn) -{ - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; -} - struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@ -1960,16 +1954,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) return -EPERM; } - if (bpf_pseudo_func(insn)) { + if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) ret = add_subprog(env, i + insn->imm + 1); - if (ret >= 0) - /* remember subprog */ - insn[1].imm = ret; - } else if (bpf_pseudo_call(insn)) { - ret = add_subprog(env, i + insn->imm + 1); - } else { + else ret = add_kfunc_call(env, insn->imm, insn->off); - } if (ret < 0) return ret; @@ -9387,7 +9375,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->src_reg == BPF_PSEUDO_FUNC) { struct bpf_prog_aux *aux = env->prog->aux; - u32 subprogno = insn[1].imm; + u32 subprogno = find_subprog(env, + env->insn_idx + insn->imm + 1); if (!aux->func_info) { verbose(env, "missing btf func_info\n"); @@ -12557,14 +12546,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) return 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { - if (bpf_pseudo_func(insn)) { - env->insn_aux_data[i].call_imm = insn->imm; - /* subprog is encoded in insn[1].imm */ + if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) continue; - } - if (!bpf_pseudo_call(insn)) - continue; /* Upon error here we cannot fall back to interpreter but * need a hard reject of the program. Thus -EFAULT is * propagated in any case. @@ -12585,6 +12569,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) env->insn_aux_data[i].call_imm = insn->imm; /* point imm to __bpf_call_base+1 from JITs point of view */ insn->imm = 1; + if (bpf_pseudo_func(insn)) + /* jit (e.g. x86_64) may emit fewer instructions + * if it learns a u32 imm is the same as a u64 imm. + * Force a non zero here. + */ + insn[1].imm = 1; } err = bpf_prog_alloc_jited_linfo(prog); @@ -12669,7 +12659,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { if (bpf_pseudo_func(insn)) { - subprog = insn[1].imm; + subprog = insn->off; insn[0].imm = (u32)(long)func[subprog]->bpf_func; insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; continue; @@ -12720,7 +12710,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { if (bpf_pseudo_func(insn)) { insn[0].imm = env->insn_aux_data[i].call_imm; - insn[1].imm = find_subprog(env, i + insn[0].imm + 1); + insn[1].imm = insn->off; + insn->off = 0; continue; } if (!bpf_pseudo_call(insn)) From d99341b373215cf32bfb7f341fb3e720e0e791ef Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 5 Nov 2021 18:40:20 -0700 Subject: [PATCH 347/433] bpf: selftest: Trigger a DCE on the whole subprog This patch adds a test to trigger the DCE to remove the whole subprog to ensure the verifier does not depend on a stable subprog index. The DCE is done by testing a global const. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211106014020.651638-1-kafai@fb.com --- .../selftests/bpf/progs/for_each_array_map_elem.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c index df918b2469da..52f6995ff29c 100644 --- a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c +++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c @@ -23,6 +23,16 @@ struct callback_ctx { int output; }; +const volatile int bypass_unused = 1; + +static __u64 +unused_subprog(struct bpf_map *map, __u32 *key, __u64 *val, + struct callback_ctx *data) +{ + data->output = 0; + return 1; +} + static __u64 check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val, struct callback_ctx *data) @@ -54,6 +64,8 @@ int test_pkt_access(struct __sk_buff *skb) data.output = 0; bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0); + if (!bypass_unused) + bpf_for_each_map_elem(&arraymap, unused_subprog, &data, 0); arraymap_output = data.output; bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0); From 691204bd66b34ba982e19988e6eba9f6321dfe6c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 15 Oct 2021 19:46:59 +0200 Subject: [PATCH 348/433] can: mcp251xfd: mcp251xfd_irq(): add missing can_rx_offload_threaded_irq_finish() in case of bus off The function can_rx_offload_threaded_irq_finish() is needed to trigger the NAPI thread to deliver read CAN frames to the networking stack. This patch adds the missing call to can_rx_offload_threaded_irq_finish() in case of a bus off, before leaving the interrupt handler to avoid packet starvation. Link: https://lore.kernel.org/all/20211106201526.44292-1-mkl@pengutronix.de Fixes: 30bfec4fec59 ("can: rx-offload: can_rx_offload_threaded_irq_finish(): add new function to be called from threaded interrupt") Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 673861ab665a..212fcd1554e4 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2290,8 +2290,10 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) * check will fail, too. So leave IRQ handler * directly. */ - if (priv->can.state == CAN_STATE_BUS_OFF) + if (priv->can.state == CAN_STATE_BUS_OFF) { + can_rx_offload_threaded_irq_finish(&priv->offload); return IRQ_HANDLED; + } } handled = IRQ_HANDLED; From 69c55f6e7669d46bb40e41f6e2b218428178368a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 19 Oct 2021 17:00:04 +0200 Subject: [PATCH 349/433] can: mcp251xfd: mcp251xfd_chip_start(): fix error handling for mcp251xfd_chip_rx_int_enable() This patch fixes the error handling for mcp251xfd_chip_rx_int_enable(). Instead just returning the error, properly shut down the chip. Link: https://lore.kernel.org/all/20211106201526.44292-2-mkl@pengutronix.de Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 212fcd1554e4..e16dc482f327 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1092,7 +1092,7 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv) err = mcp251xfd_chip_rx_int_enable(priv); if (err) - return err; + goto out_chip_stop; err = mcp251xfd_chip_ecc_init(priv); if (err) From f96f8cc4a63dd645e07ea9712be4e0a76ea4ec1f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Nov 2021 09:15:44 -0500 Subject: [PATCH 350/433] NFSv4: Sanity check the parameters in nfs41_update_target_slotid() Ensure that the values supplied by the server do not exceed the size of the largest allowed slot table. Reported-by: Signed-off-by: Trond Myklebust --- fs/nfs/nfs4session.c | 12 ++++++++---- fs/nfs/nfs4session.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 4145a0138907..5db460476bf2 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -511,12 +511,16 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, struct nfs4_slot *slot, struct nfs4_sequence_res *res) { + u32 target_highest_slotid = min(res->sr_target_highest_slotid, + NFS4_MAX_SLOTID); + u32 highest_slotid = min(res->sr_highest_slotid, NFS4_MAX_SLOTID); + spin_lock(&tbl->slot_tbl_lock); - if (!nfs41_is_outlier_target_slotid(tbl, res->sr_target_highest_slotid)) - nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); + if (!nfs41_is_outlier_target_slotid(tbl, target_highest_slotid)) + nfs41_set_target_slotid_locked(tbl, target_highest_slotid); if (tbl->generation == slot->generation) - nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); - nfs41_set_max_slotid_locked(tbl, res->sr_target_highest_slotid); + nfs41_set_server_slotid_locked(tbl, highest_slotid); + nfs41_set_max_slotid_locked(tbl, target_highest_slotid); spin_unlock(&tbl->slot_tbl_lock); } diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 3de425f59b3a..351616c61df5 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -12,6 +12,7 @@ #define NFS4_DEF_SLOT_TABLE_SIZE (64U) #define NFS4_DEF_CB_SLOT_TABLE_SIZE (16U) #define NFS4_MAX_SLOT_TABLE (1024U) +#define NFS4_MAX_SLOTID (NFS4_MAX_SLOT_TABLE - 1U) #define NFS4_NO_SLOT ((u32)-1) #if IS_ENABLED(CONFIG_NFS_V4) From 08fcdfa6e3ae394ce44ad27485f9722e7eb4e142 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 7 Nov 2021 15:14:00 +0100 Subject: [PATCH 351/433] nfc: port100: lower verbosity of cancelled URB messages It is not an error to receive an URB with -ENOENT because it can come from regular user operations, e.g. pressing CTRL+C when running nfctool from neard. Make it a debugging message, not an error. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/port100.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 16ceb763594f..d7db1a0e6be1 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -624,7 +624,7 @@ static void port100_recv_response(struct urb *urb) break; /* success */ case -ECONNRESET: case -ENOENT: - nfc_err(&dev->interface->dev, + nfc_dbg(&dev->interface->dev, "The urb has been canceled (status %d)\n", urb->status); goto sched_wq; case -ESHUTDOWN: @@ -678,7 +678,7 @@ static void port100_recv_ack(struct urb *urb) break; /* success */ case -ECONNRESET: case -ENOENT: - nfc_err(&dev->interface->dev, + nfc_dbg(&dev->interface->dev, "The urb has been stopped (status %d)\n", urb->status); goto sched_wq; case -ESHUTDOWN: @@ -942,7 +942,7 @@ static void port100_send_complete(struct urb *urb) break; /* success */ case -ECONNRESET: case -ENOENT: - nfc_err(&dev->interface->dev, + nfc_dbg(&dev->interface->dev, "The urb has been stopped (status %d)\n", urb->status); break; case -ESHUTDOWN: From 85879f131d78151847baf29f9557c5be1aa8e066 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 5 Nov 2021 20:14:41 -0700 Subject: [PATCH 352/433] net: hisilicon: fix hsn3_ethtool kernel-doc warnings Fix kernel-doc warnings and spacing in hns3_ethtool.c: hns3_ethtool.c:246: warning: No description found for return value of 'hns3_lp_run_test' hns3_ethtool.c:408: warning: expecting prototype for hns3_nic_self_test(). Prototype was for hns3_self_test() instead Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Peng Li Cc: Guangbin Huang Cc: Yisen Zhuang Cc: Salil Mehta Cc: "David S. Miller" Cc: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 5ebd96f6833d..057024278cf4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -238,9 +238,11 @@ static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid, } /** - * hns3_lp_run_test - run loopback test + * hns3_lp_run_test - run loopback test * @ndev: net device * @mode: loopback type + * + * Return: %0 for success or a NIC loopback test error code on failure */ static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode) { @@ -398,7 +400,7 @@ static void hns3_do_selftest(struct net_device *ndev, int (*st_param)[2], } /** - * hns3_nic_self_test - self test + * hns3_self_test - self test * @ndev: net device * @eth_test: test cmd * @data: test result From 8ac9dfd58b138f7e82098a4e0a0d46858b12215b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Nov 2021 14:42:14 -0700 Subject: [PATCH 353/433] llc: fix out-of-bound array index in llc_sk_dev_hash() Both ifindex and LLC_SK_DEV_HASH_ENTRIES are signed. This means that (ifindex % LLC_SK_DEV_HASH_ENTRIES) is negative if @ifindex is negative. We could simply make LLC_SK_DEV_HASH_ENTRIES unsigned. In this patch I chose to use hash_32() to get more entropy from @ifindex, like llc_sk_laddr_hashfn(). UBSAN: array-index-out-of-bounds in ./include/net/llc.h:75:26 index -43 is out of range for type 'hlist_head [64]' CPU: 1 PID: 20999 Comm: syz-executor.3 Not tainted 5.15.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 ubsan_epilogue+0xb/0x5a lib/ubsan.c:151 __ubsan_handle_out_of_bounds.cold+0x62/0x6c lib/ubsan.c:291 llc_sk_dev_hash include/net/llc.h:75 [inline] llc_sap_add_socket+0x49c/0x520 net/llc/llc_conn.c:697 llc_ui_bind+0x680/0xd70 net/llc/af_llc.c:404 __sys_bind+0x1e9/0x250 net/socket.c:1693 __do_sys_bind net/socket.c:1704 [inline] __se_sys_bind net/socket.c:1702 [inline] __x64_sys_bind+0x6f/0xb0 net/socket.c:1702 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fa503407ae9 Fixes: 6d2e3ea28446 ("llc: use a device based hash table to speed up multicast delivery") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/llc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/llc.h b/include/net/llc.h index fd1f9a3fd8dd..e250dca03963 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -72,7 +72,9 @@ struct llc_sap { static inline struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex) { - return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; + u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS); + + return &sap->sk_dev_hash[bucket]; } static inline From e7ea51cd879c8214a824717d28a169b5f2262c02 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 5 Nov 2021 20:30:27 +0300 Subject: [PATCH 354/433] sctp: remove unreachable code from sctp_sf_violation_chunk() sctp_sf_violation_chunk() is not called with asoc argument equal to NULL, but if that happens it would lead to NULL pointer dereference in sctp_vtag_verify(). The patch removes code that handles NULL asoc in sctp_sf_violation_chunk(). Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Alexey Khoroshilov Proposed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5fabaa54b77d..39ba82ee87ce 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4898,9 +4898,6 @@ static enum sctp_disposition sctp_sf_violation_chunk( { static const char err_str[] = "The following chunk violates protocol:"; - if (!asoc) - return sctp_sf_violation(net, ep, asoc, type, arg, commands); - return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } From e1464db5c57ef393dde8126f09d2b04d166acf16 Mon Sep 17 00:00:00 2001 From: Volodymyr Mytnyk Date: Fri, 5 Nov 2021 18:49:24 +0200 Subject: [PATCH 355/433] net: marvell: prestera: fix hw structure laid out The prestera FW v4.0 support commit has been merged accidentally w/o review comments addressed and waiting for the final patch set to be uploaded. So, fix the remaining comments related to structure laid out and build issues. Reported-by: kernel test robot Fixes: bb5dbf2cc64d ("net: marvell: prestera: add firmware v4.0 support") Signed-off-by: Volodymyr Mytnyk Signed-off-by: David S. Miller --- .../ethernet/marvell/prestera/prestera_hw.c | 131 +++++++++--------- 1 file changed, 68 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c index bc3c9310678a..9b8b1ed474fc 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c @@ -180,109 +180,113 @@ struct prestera_msg_common_resp { struct prestera_msg_ret ret; }; -union prestera_msg_switch_param { - u8 mac[ETH_ALEN]; - __le32 ageing_timeout_ms; -} __packed; - struct prestera_msg_switch_attr_req { struct prestera_msg_cmd cmd; __le32 attr; - union prestera_msg_switch_param param; - u8 pad[2]; + union { + __le32 ageing_timeout_ms; + struct { + u8 mac[ETH_ALEN]; + u8 __pad[2]; + }; + } param; }; struct prestera_msg_switch_init_resp { struct prestera_msg_ret ret; __le32 port_count; __le32 mtu_max; - u8 switch_id; - u8 lag_max; - u8 lag_member_max; __le32 size_tbl_router_nexthop; -} __packed __aligned(4); + u8 switch_id; + u8 lag_max; + u8 lag_member_max; +}; struct prestera_msg_event_port_param { union { struct { - u8 oper; __le32 mode; __le32 speed; + u8 oper; u8 duplex; u8 fc; u8 fec; - } __packed mac; + } mac; struct { - u8 mdix; __le64 lmode_bmap; + u8 mdix; u8 fc; - } __packed phy; - } __packed; -} __packed __aligned(4); + u8 __pad[2]; + } __packed phy; /* make sure always 12 bytes size */ + }; +}; struct prestera_msg_port_cap_param { __le64 link_mode; - u8 type; - u8 fec; - u8 fc; - u8 transceiver; + u8 type; + u8 fec; + u8 fc; + u8 transceiver; }; struct prestera_msg_port_flood_param { u8 type; u8 enable; + u8 __pad[2]; }; union prestera_msg_port_param { + __le32 mtu; + __le32 speed; + __le32 link_mode; u8 admin_state; u8 oper_state; - __le32 mtu; u8 mac[ETH_ALEN]; u8 accept_frm_type; - __le32 speed; u8 learning; u8 flood; - __le32 link_mode; u8 type; u8 duplex; u8 fec; u8 fc; - union { struct { - u8 admin:1; + u8 admin; u8 fc; u8 ap_enable; + u8 __reserved[5]; union { struct { __le32 mode; - u8 inband:1; __le32 speed; - u8 duplex; - u8 fec; - u8 fec_supp; - } __packed reg_mode; + u8 inband; + u8 duplex; + u8 fec; + u8 fec_supp; + } reg_mode; struct { __le32 mode; __le32 speed; - u8 fec; - u8 fec_supp; - } __packed ap_modes[PRESTERA_AP_PORT_MAX]; - } __packed; - } __packed mac; + u8 fec; + u8 fec_supp; + u8 __pad[2]; + } ap_modes[PRESTERA_AP_PORT_MAX]; + }; + } mac; struct { - u8 admin:1; - u8 adv_enable; __le64 modes; __le32 mode; + u8 admin; + u8 adv_enable; u8 mdix; - } __packed phy; - } __packed link; + u8 __pad; + } phy; + } link; struct prestera_msg_port_cap_param cap; struct prestera_msg_port_flood_param flood_ext; struct prestera_msg_event_port_param link_evt; -} __packed; +}; struct prestera_msg_port_attr_req { struct prestera_msg_cmd cmd; @@ -290,14 +294,12 @@ struct prestera_msg_port_attr_req { __le32 port; __le32 dev; union prestera_msg_port_param param; -} __packed __aligned(4); - +}; struct prestera_msg_port_attr_resp { struct prestera_msg_ret ret; union prestera_msg_port_param param; -} __packed __aligned(4); - +}; struct prestera_msg_port_stats_resp { struct prestera_msg_ret ret; @@ -322,13 +324,13 @@ struct prestera_msg_vlan_req { __le32 port; __le32 dev; __le16 vid; - u8 is_member; - u8 is_tagged; + u8 is_member; + u8 is_tagged; }; struct prestera_msg_fdb_req { struct prestera_msg_cmd cmd; - u8 dest_type; + __le32 flush_mode; union { struct { __le32 port; @@ -336,11 +338,12 @@ struct prestera_msg_fdb_req { }; __le16 lag_id; } dest; - u8 mac[ETH_ALEN]; __le16 vid; - u8 dynamic; - __le32 flush_mode; -} __packed __aligned(4); + u8 dest_type; + u8 dynamic; + u8 mac[ETH_ALEN]; + u8 __pad[2]; +}; struct prestera_msg_bridge_req { struct prestera_msg_cmd cmd; @@ -363,11 +366,12 @@ struct prestera_msg_acl_action { struct prestera_msg_acl_match { __le32 type; + __le32 __reserved; union { struct { u8 key; u8 mask; - } __packed u8; + } u8; struct { __le16 key; __le16 mask; @@ -383,7 +387,7 @@ struct prestera_msg_acl_match { struct { u8 key[ETH_ALEN]; u8 mask[ETH_ALEN]; - } __packed mac; + } mac; } keymask; }; @@ -446,7 +450,8 @@ struct prestera_msg_stp_req { __le32 port; __le32 dev; __le16 vid; - u8 state; + u8 state; + u8 __pad; }; struct prestera_msg_rxtx_req { @@ -497,21 +502,21 @@ union prestera_msg_event_fdb_param { struct prestera_msg_event_fdb { struct prestera_msg_event id; - u8 dest_type; + __le32 vid; union { __le32 port_id; __le16 lag_id; } dest; - __le32 vid; union prestera_msg_event_fdb_param param; -} __packed __aligned(4); + u8 dest_type; +}; -static inline void prestera_hw_build_tests(void) +static void prestera_hw_build_tests(void) { /* check requests */ BUILD_BUG_ON(sizeof(struct prestera_msg_common_req) != 4); BUILD_BUG_ON(sizeof(struct prestera_msg_switch_attr_req) != 16); - BUILD_BUG_ON(sizeof(struct prestera_msg_port_attr_req) != 120); + BUILD_BUG_ON(sizeof(struct prestera_msg_port_attr_req) != 144); BUILD_BUG_ON(sizeof(struct prestera_msg_port_info_req) != 8); BUILD_BUG_ON(sizeof(struct prestera_msg_vlan_req) != 16); BUILD_BUG_ON(sizeof(struct prestera_msg_fdb_req) != 28); @@ -528,7 +533,7 @@ static inline void prestera_hw_build_tests(void) /* check responses */ BUILD_BUG_ON(sizeof(struct prestera_msg_common_resp) != 8); BUILD_BUG_ON(sizeof(struct prestera_msg_switch_init_resp) != 24); - BUILD_BUG_ON(sizeof(struct prestera_msg_port_attr_resp) != 112); + BUILD_BUG_ON(sizeof(struct prestera_msg_port_attr_resp) != 136); BUILD_BUG_ON(sizeof(struct prestera_msg_port_stats_resp) != 248); BUILD_BUG_ON(sizeof(struct prestera_msg_port_info_resp) != 20); BUILD_BUG_ON(sizeof(struct prestera_msg_bridge_resp) != 12); @@ -561,9 +566,9 @@ static int __prestera_cmd_ret(struct prestera_switch *sw, if (err) return err; - if (__le32_to_cpu(ret->cmd.type) != PRESTERA_CMD_TYPE_ACK) + if (ret->cmd.type != __cpu_to_le32(PRESTERA_CMD_TYPE_ACK)) return -EBADE; - if (__le32_to_cpu(ret->status) != PRESTERA_CMD_ACK_OK) + if (ret->status != __cpu_to_le32(PRESTERA_CMD_ACK_OK)) return -EINVAL; return 0; From 62b12ab5dff038ea43b69207b1f42ddc2f0a0b09 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 5 Nov 2021 17:45:11 +0100 Subject: [PATCH 356/433] selftests: net: tls: remove unused variable and code When building selftests/net with clang, the compiler warn about the function abs() see below: tls.c:657:15: warning: variable 'len_compared' set but not used [-Wunused-but-set-variable] unsigned int len_compared = 0; ^ Rework to remove the unused variable and the for-loop where the variable 'len_compared' was assinged. Fixes: 7f657d5bf507 ("selftests: tls: add selftests for TLS sockets") Signed-off-by: Anders Roxell Reviewed-by: Nick Desaulniers Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index d3047e251fe9..e61fc4c32ba2 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -654,7 +654,6 @@ TEST_F(tls, recvmsg_single_max) TEST_F(tls, recvmsg_multiple) { unsigned int msg_iovlen = 1024; - unsigned int len_compared = 0; struct iovec vec[1024]; char *iov_base[1024]; unsigned int iov_len = 16; @@ -675,8 +674,6 @@ TEST_F(tls, recvmsg_multiple) hdr.msg_iovlen = msg_iovlen; hdr.msg_iov = vec; EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1); - for (i = 0; i < msg_iovlen; i++) - len_compared += iov_len; for (i = 0; i < msg_iovlen; i++) free(iov_base[i]); From 9fec40f850658e00a14a7dd9e06f7fbc7e59cc4a Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Fri, 5 Nov 2021 06:36:36 -0700 Subject: [PATCH 357/433] nfc: pn533: Fix double free when pn533_fill_fragment_skbs() fails skb is already freed by dev_kfree_skb in pn533_fill_fragment_skbs, but follow error handler branch when pn533_fill_fragment_skbs() fails, skb is freed again, results in double free issue. Fix this by not free skb in error path of pn533_fill_fragment_skbs. Fixes: 963a82e07d4e ("NFC: pn533: Split large Tx frames in chunks") Fixes: 93ad42020c2d ("NFC: pn533: Target mode Tx fragmentation support") Signed-off-by: Chengfeng Ye Reviewed-by: Dan Carpenter Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/pn533/pn533.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index 787bcbd290f7..a491db46e3bd 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2216,7 +2216,7 @@ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb) frag = pn533_alloc_skb(dev, frag_size); if (!frag) { skb_queue_purge(&dev->fragment_skb); - break; + return -ENOMEM; } if (!dev->tgt_mode) { @@ -2285,7 +2285,7 @@ static int pn533_transceive(struct nfc_dev *nfc_dev, /* jumbo frame ? */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); - if (rc <= 0) + if (rc < 0) goto error; skb = skb_dequeue(&dev->fragment_skb); @@ -2353,7 +2353,7 @@ static int pn533_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) /* let's split in multiple chunks if size's too big */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); - if (rc <= 0) + if (rc < 0) goto error; /* get the first skb */ From c45231a7668d6b632534f692b10592ea375b55b0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Nov 2021 21:13:07 +0100 Subject: [PATCH 358/433] litex_liteeth: Fix a double free in the remove function 'netdev' is a managed resource allocated in the probe using 'devm_alloc_etherdev()'. It must not be freed explicitly in the remove function. Fixes: ee7da21ac4c3 ("net: Add driver for LiteX's LiteETH network interface") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/litex/litex_liteeth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c index ab9fa1525053..fdd99f0de424 100644 --- a/drivers/net/ethernet/litex/litex_liteeth.c +++ b/drivers/net/ethernet/litex/litex_liteeth.c @@ -287,7 +287,6 @@ static int liteeth_remove(struct platform_device *pdev) struct net_device *netdev = platform_get_drvdata(pdev); unregister_netdev(netdev); - free_netdev(netdev); return 0; } From f91140e4553408cacd326624cd50fc367725e04a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Nov 2021 08:51:12 +0100 Subject: [PATCH 359/433] soc: ti: fix wkup_m3_rproc_boot_thread return type The wkup_m3_rproc_boot_thread() function uses a nonstandard prototype, which broke after Eric's recent cleanup: drivers/soc/ti/wkup_m3_ipc.c: In function 'wkup_m3_rproc_boot_thread': drivers/soc/ti/wkup_m3_ipc.c:429:16: error: 'return' with a value, in function returning void [-Werror=return-type] 429 | return 0; | ^ drivers/soc/ti/wkup_m3_ipc.c:416:13: note: declared here 416 | static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc) | ^~~~~~~~~~~~~~~~~~~~~~~~~ Change it to the normal prototype as it should have been from the start. Fixes: 111e70490d2a ("exit/kthread: Have kernel threads return instead of calling do_exit") Fixes: cdd5de500b2c ("soc: ti: Add wkup_m3_ipc driver") Signed-off-by: Arnd Bergmann Link: https://lkml.kernel.org/r/20211105075119.2327190-1-arnd@kernel.org Acked-by: Santosh Shilimkar Acked-by: Tony Lindgren Signed-off-by: Eric W. Biederman --- drivers/soc/ti/wkup_m3_ipc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 0733443a2631..72386bd393fe 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -413,8 +413,9 @@ void wkup_m3_ipc_put(struct wkup_m3_ipc *m3_ipc) } EXPORT_SYMBOL_GPL(wkup_m3_ipc_put); -static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc) +static int wkup_m3_rproc_boot_thread(void *arg) { + struct wkup_m3_ipc *m3_ipc = arg; struct device *dev = m3_ipc->dev; int ret; @@ -500,7 +501,7 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev) * can boot the wkup_m3 as soon as it's ready without holding * up kernel boot */ - task = kthread_run((void *)wkup_m3_rproc_boot_thread, m3_ipc, + task = kthread_run(wkup_m3_rproc_boot_thread, m3_ipc, "wkup_m3_rproc_loader"); if (IS_ERR(task)) { From 40a34121ac1dc52ed9cd34a8f4e48e32517a52fd Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 3 Nov 2021 13:47:32 -0700 Subject: [PATCH 360/433] bpf, sockmap: Use stricter sk state checks in sk_lookup_assign In order to fix an issue with sockets in TCP sockmap redirect cases we plan to allow CLOSE state sockets to exist in the sockmap. However, the check in bpf_sk_lookup_assign() currently only invalidates sockets in the TCP_ESTABLISHED case relying on the checks on sockmap insert to ensure we never SOCK_CLOSE state sockets in the map. To prepare for this change we flip the logic in bpf_sk_lookup_assign() to explicitly test for the accepted cases. Namely, a tcp socket in TCP_LISTEN or a udp socket in TCP_CLOSE state. This also makes the code more resilent to future changes. Suggested-by: Jakub Sitnicki Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20211103204736.248403-2-john.fastabend@gmail.com --- include/linux/skmsg.h | 12 ++++++++++++ net/core/filter.c | 6 ++++-- net/core/sock_map.c | 6 ------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index b4256847c707..584d94be9c8b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -507,6 +507,18 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } +static inline bool sk_is_tcp(const struct sock *sk) +{ + return sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + +static inline bool sk_is_udp(const struct sock *sk) +{ + return sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; +} + #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/net/core/filter.c b/net/core/filter.c index 8e8d3b49c297..a68418268e92 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10423,8 +10423,10 @@ BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, return -EINVAL; if (unlikely(sk && sk_is_refcounted(sk))) return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */ - if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED)) - return -ESOCKTNOSUPPORT; /* reject connected sockets */ + if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN)) + return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */ + if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE)) + return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */ /* Check if socket is suitable for packet L3/L4 protocol */ if (sk && sk->sk_protocol != ctx->protocol) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index e252b8ec2b85..f39ef79ced67 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -511,12 +511,6 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops) ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB; } -static bool sk_is_tcp(const struct sock *sk) -{ - return sk->sk_type == SOCK_STREAM && - sk->sk_protocol == IPPROTO_TCP; -} - static bool sock_map_redirect_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) From b8b8315e39ffaca82e79d86dde26e9144addf66b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 3 Nov 2021 13:47:33 -0700 Subject: [PATCH 361/433] bpf, sockmap: Remove unhash handler for BPF sockmap usage We do not need to handle unhash from BPF side we can simply wait for the close to happen. The original concern was a socket could transition from ESTABLISHED state to a new state while the BPF hook was still attached. But, we convinced ourself this is no longer possible and we also improved BPF sockmap to handle listen sockets so this is no longer a problem. More importantly though there are cases where unhash is called when data is in the receive queue. The BPF unhash logic will flush this data which is wrong. To be correct it should keep the data in the receive queue and allow a receiving application to continue reading the data. This may happen when tcp_abort() is received for example. Instead of complicating the logic in unhash simply moving all this to tcp_close() hook solves this. Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Tested-by: Jussi Maki Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20211103204736.248403-3-john.fastabend@gmail.com --- net/ipv4/tcp_bpf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 5f4d6f45d87f..246f725b78c9 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -475,7 +475,6 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; - prot[TCP_BPF_BASE].unhash = sock_map_unhash; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable; From c5d2177a72a1659554922728fc407f59950aa929 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 3 Nov 2021 13:47:34 -0700 Subject: [PATCH 362/433] bpf, sockmap: Fix race in ingress receive verdict with redirect to self A socket in a sockmap may have different combinations of programs attached depending on configuration. There can be no programs in which case the socket acts as a sink only. There can be a TX program in this case a BPF program is attached to sending side, but no RX program is attached. There can be an RX program only where sends have no BPF program attached, but receives are hooked with BPF. And finally, both TX and RX programs may be attached. Giving us the permutations: None, Tx, Rx, and TxRx To date most of our use cases have been TX case being used as a fast datapath to directly copy between local application and a userspace proxy. Or Rx cases and TxRX applications that are operating an in kernel based proxy. The traffic in the first case where we hook applications into a userspace application looks like this: AppA redirect AppB Tx <-----------> Rx | | + + TCP <--> lo <--> TCP In this case all traffic from AppA (after 3whs) is copied into the AppB ingress queue and no traffic is ever on the TCP recieive_queue. In the second case the application never receives, except in some rare error cases, traffic on the actual user space socket. Instead the send happens in the kernel. AppProxy socket pool sk0 ------------->{sk1,sk2, skn} ^ | | | | v ingress lb egress TCP TCP Here because traffic is never read off the socket with userspace recv() APIs there is only ever one reader on the sk receive_queue. Namely the BPF programs. However, we've started to introduce a third configuration where the BPF program on receive should process the data, but then the normal case is to push the data into the receive queue of AppB. AppB recv() (userspace) ----------------------- tcp_bpf_recvmsg() (kernel) | | | | | | ingress_msgQ | | | RX_BPF | | | v v sk->receive_queue This is different from the App{A,B} redirect because traffic is first received on the sk->receive_queue. Now for the issue. The tcp_bpf_recvmsg() handler first checks the ingress_msg queue for any data handled by the BPF rx program and returned with PASS code so that it was enqueued on the ingress msg queue. Then if no data exists on that queue it checks the socket receive queue. Unfortunately, this is the same receive_queue the BPF program is reading data off of. So we get a race. Its possible for the recvmsg() hook to pull data off the receive_queue before the BPF hook has a chance to read it. It typically happens when an application is banging on recv() and getting EAGAINs. Until they manage to race with the RX BPF program. To fix this we note that before this patch at attach time when the socket is loaded into the map we check if it needs a TX program or just the base set of proto bpf hooks. Then it uses the above general RX hook regardless of if we have a BPF program attached at rx or not. This patch now extends this check to handle all cases enumerated above, TX, RX, TXRX, and none. And to fix above race when an RX program is attached we use a new hook that is nearly identical to the old one except now we do not let the recv() call skip the RX BPF program. Now only the BPF program pulls data from sk->receive_queue and recv() only pulls data from the ingress msgQ post BPF program handling. With this resolved our AppB from above has been up and running for many hours without detecting any errors. We do this by correlating counters in RX BPF events and the AppB to ensure data is never skipping the BPF program. Selftests, was not able to detect this because we only run them for a short period of time on well ordered send/recvs so we don't get any of the noise we see in real application environments. Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Tested-by: Jussi Maki Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20211103204736.248403-4-john.fastabend@gmail.com --- net/ipv4/tcp_bpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 246f725b78c9..f70aa0932bd6 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -172,6 +172,41 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, return ret; } +static int tcp_bpf_recvmsg_parser(struct sock *sk, + struct msghdr *msg, + size_t len, + int nonblock, + int flags, + int *addr_len) +{ + struct sk_psock *psock; + int copied; + + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + + lock_sock(sk); +msg_bytes_ready: + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + if (!copied) { + long timeo; + int data; + + timeo = sock_rcvtimeo(sk, nonblock); + data = tcp_msg_wait_data(sk, psock, timeo); + if (data && !sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + copied = -EAGAIN; + } + release_sock(sk); + sk_psock_put(sk, psock); + return copied; +} + static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@ -464,6 +499,8 @@ enum { enum { TCP_BPF_BASE, TCP_BPF_TX, + TCP_BPF_RX, + TCP_BPF_TXRX, TCP_BPF_NUM_CFGS, }; @@ -482,6 +519,12 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage; + + prot[TCP_BPF_RX] = prot[TCP_BPF_BASE]; + prot[TCP_BPF_RX].recvmsg = tcp_bpf_recvmsg_parser; + + prot[TCP_BPF_TXRX] = prot[TCP_BPF_TX]; + prot[TCP_BPF_TXRX].recvmsg = tcp_bpf_recvmsg_parser; } static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops) @@ -519,6 +562,10 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; + if (psock->progs.stream_verdict || psock->progs.skb_verdict) { + config = (config == TCP_BPF_TX) ? TCP_BPF_TXRX : TCP_BPF_RX; + } + if (restore) { if (inet_csk_has_ulp(sk)) { /* TLS does not have an unhash proto in SW cases, From e0dc3b93bd7bcff8c3813d1df43e0908499c7cf0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 3 Nov 2021 13:47:35 -0700 Subject: [PATCH 363/433] bpf: sockmap, strparser, and tls are reusing qdisc_skb_cb and colliding Strparser is reusing the qdisc_skb_cb struct to stash the skb message handling progress, e.g. offset and length of the skb. First this is poorly named and inherits a struct from qdisc that doesn't reflect the actual usage of cb[] at this layer. But, more importantly strparser is using the following to access its metadata. (struct _strp_msg *)((void *)skb->cb + offsetof(struct qdisc_skb_cb, data)) Where _strp_msg is defined as: struct _strp_msg { struct strp_msg strp; /* 0 8 */ int accum_len; /* 8 4 */ /* size: 12, cachelines: 1, members: 2 */ /* last cacheline: 12 bytes */ }; So we use 12 bytes of ->data[] in struct. However in BPF code running parser and verdict the user has read capabilities into the data[] array as well. Its not too problematic, but we should not be exposing internal state to BPF program. If its really needed then we can use the probe_read() APIs which allow reading kernel memory. And I don't believe cb[] layer poses any API breakage by moving this around because programs can't depend on cb[] across layers. In order to fix another issue with a ctx rewrite we need to stash a temp variable somewhere. To make this work cleanly this patch builds a cb struct for sk_skb types called sk_skb_cb struct. Then we can use this consistently in the strparser, sockmap space. Additionally we can start allowing ->cb[] write access after this. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Tested-by: Jussi Maki Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20211103204736.248403-5-john.fastabend@gmail.com --- include/net/strparser.h | 16 +++++++++++++++- net/core/filter.c | 22 ++++++++++++++++++++++ net/strparser/strparser.c | 10 +--------- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/include/net/strparser.h b/include/net/strparser.h index 1d20b98493a1..bec1439bd3be 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -54,10 +54,24 @@ struct strp_msg { int offset; }; +struct _strp_msg { + /* Internal cb structure. struct strp_msg must be first for passing + * to upper layer. + */ + struct strp_msg strp; + int accum_len; +}; + +struct sk_skb_cb { +#define SK_SKB_CB_PRIV_LEN 20 + unsigned char data[SK_SKB_CB_PRIV_LEN]; + struct _strp_msg strp; +}; + static inline struct strp_msg *strp_msg(struct sk_buff *skb) { return (struct strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Structure for an attached lower socket */ diff --git a/net/core/filter.c b/net/core/filter.c index a68418268e92..c3936d0724b8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9782,11 +9782,33 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; + int off; switch (si->off) { case offsetof(struct __sk_buff, data_end): insn = bpf_convert_data_end_access(si, insn); break; + case offsetof(struct __sk_buff, cb[0]) ... + offsetofend(struct __sk_buff, cb[4]) - 1: + BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20); + BUILD_BUG_ON((offsetof(struct sk_buff, cb) + + offsetof(struct sk_skb_cb, data)) % + sizeof(__u64)); + + prog->cb_access = 1; + off = si->off; + off -= offsetof(struct __sk_buff, cb[0]); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct sk_skb_cb, data); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, + si->src_reg, off); + else + *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, + si->src_reg, off); + break; + + default: return bpf_convert_ctx_access(type, si, insn_buf, prog, target_size); diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 9c0343568d2a..1a72c67afed5 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -27,18 +27,10 @@ static struct workqueue_struct *strp_wq; -struct _strp_msg { - /* Internal cb structure. struct strp_msg must be first for passing - * to upper layer. - */ - struct strp_msg strp; - int accum_len; -}; - static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) { return (struct _strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Lower lock held */ From b2c4618162ec615a15883a804cce7e27afecfa58 Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Wed, 3 Nov 2021 13:47:36 -0700 Subject: [PATCH 364/433] bpf, sockmap: sk_skb data_end access incorrect when src_reg = dst_reg The current conversion of skb->data_end reads like this: ; data_end = (void*)(long)skb->data_end; 559: (79) r1 = *(u64 *)(r2 +200) ; r1 = skb->data 560: (61) r11 = *(u32 *)(r2 +112) ; r11 = skb->len 561: (0f) r1 += r11 562: (61) r11 = *(u32 *)(r2 +116) 563: (1f) r1 -= r11 But similar to the case in 84f44df664e9 ("bpf: sock_ops sk access may stomp registers when dst_reg = src_reg"), the code will read an incorrect skb->len when src == dst. In this case we end up generating this xlated code: ; data_end = (void*)(long)skb->data_end; 559: (79) r1 = *(u64 *)(r1 +200) ; r1 = skb->data 560: (61) r11 = *(u32 *)(r1 +112) ; r11 = (skb->data)->len 561: (0f) r1 += r11 562: (61) r11 = *(u32 *)(r1 +116) 563: (1f) r1 -= r11 ... where line 560 is the reading 4B of (skb->data + 112) instead of the intended skb->len Here the skb pointer in r1 gets set to skb->data and the later deref for skb->len ends up following skb->data instead of skb. This fixes the issue similarly to the patch mentioned above by creating an additional temporary variable and using to store the register when dst_reg = src_reg. We name the variable bpf_temp_reg and place it in the cb context for sk_skb. Then we restore from the temp to ensure nothing is lost. Fixes: 16137b09a66f2 ("bpf: Compute data_end dynamically with JIT code") Signed-off-by: Jussi Maki Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20211103204736.248403-6-john.fastabend@gmail.com --- include/net/strparser.h | 4 ++++ net/core/filter.c | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/net/strparser.h b/include/net/strparser.h index bec1439bd3be..732b7097d78e 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -66,6 +66,10 @@ struct sk_skb_cb { #define SK_SKB_CB_PRIV_LEN 20 unsigned char data[SK_SKB_CB_PRIV_LEN]; struct _strp_msg strp; + /* temp_reg is a temporary register used for bpf_convert_data_end_access + * when dst_reg == src_reg. + */ + u64 temp_reg; }; static inline struct strp_msg *strp_msg(struct sk_buff *skb) diff --git a/net/core/filter.c b/net/core/filter.c index c3936d0724b8..e471c9b09670 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9756,22 +9756,46 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si, struct bpf_insn *insn) { - /* si->dst_reg = skb->data */ + int reg; + int temp_reg_off = offsetof(struct sk_buff, cb) + + offsetof(struct sk_skb_cb, temp_reg); + + if (si->src_reg == si->dst_reg) { + /* We need an extra register, choose and save a register. */ + reg = BPF_REG_9; + if (si->src_reg == reg || si->dst_reg == reg) + reg--; + if (si->src_reg == reg || si->dst_reg == reg) + reg--; + *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off); + } else { + reg = si->dst_reg; + } + + /* reg = skb->data */ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), - si->dst_reg, si->src_reg, + reg, si->src_reg, offsetof(struct sk_buff, data)); /* AX = skb->len */ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len), BPF_REG_AX, si->src_reg, offsetof(struct sk_buff, len)); - /* si->dst_reg = skb->data + skb->len */ - *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); + /* reg = skb->data + skb->len */ + *insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX); /* AX = skb->data_len */ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len), BPF_REG_AX, si->src_reg, offsetof(struct sk_buff, data_len)); - /* si->dst_reg = skb->data + skb->len - skb->data_len */ - *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX); + + /* reg = skb->data + skb->len - skb->data_len */ + *insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX); + + if (si->src_reg == si->dst_reg) { + /* Restore the saved register */ + *insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg); + *insn++ = BPF_MOV64_REG(si->dst_reg, reg); + *insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off); + } return insn; } From 54f0bad6686cdc50a3f4c5f7c4252c5018511459 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sun, 7 Nov 2021 23:59:41 -0700 Subject: [PATCH 365/433] net: sungem_phy: fix code indentation Remove extra space in front of the return statement. Fixes: eb5b5b2ff96e ("sungem_phy: support bcm5461 phy, autoneg.") Signed-off-by: Jean Sacren Signed-off-by: David S. Miller --- drivers/net/sungem_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c index 291fa449993f..4daac5fda073 100644 --- a/drivers/net/sungem_phy.c +++ b/drivers/net/sungem_phy.c @@ -409,7 +409,7 @@ static int genmii_read_link(struct mii_phy *phy) * though magic-aneg shouldn't prevent this case from occurring */ - return 0; + return 0; } static int generic_suspend(struct mii_phy* phy) From 3e0588c291d6ce225f2b891753ca41d45ba42469 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Mon, 8 Nov 2021 18:37:21 +0800 Subject: [PATCH 366/433] hamradio: defer ax25 kfree after unregister_netdev There is a possible race condition (use-after-free) like below (USE) | (FREE) ax25_sendmsg | ax25_queue_xmit | dev_queue_xmit | __dev_queue_xmit | __dev_xmit_skb | sch_direct_xmit | ... xmit_one | netdev_start_xmit | tty_ldisc_kill __netdev_start_xmit | mkiss_close ax_xmit | kfree ax_encaps | | Even though there are two synchronization primitives before the kfree: 1. wait_for_completion(&ax->dead). This can prevent the race with routines from mkiss_ioctl. However, it cannot stop the routine coming from upper layer, i.e., the ax25_sendmsg. 2. netif_stop_queue(ax->dev). It seems that this line of code aims to halt the transmit queue but it fails to stop the routine that already being xmit. This patch reorder the kfree after the unregister_netdev to avoid the possible UAF as the unregister_netdev() is well synchronized and won't return if there is a running routine. Signed-off-by: Lin Ma Signed-off-by: David S. Miller --- drivers/net/hamradio/mkiss.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 867252a0247b..e2b332b54f06 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -792,13 +792,14 @@ static void mkiss_close(struct tty_struct *tty) */ netif_stop_queue(ax->dev); - /* Free all AX25 frame buffers. */ - kfree(ax->rbuff); - kfree(ax->xbuff); - ax->tty = NULL; unregister_netdev(ax->dev); + + /* Free all AX25 frame buffers after unreg. */ + kfree(ax->rbuff); + kfree(ax->xbuff); + free_netdev(ax->dev); } From 0b9111922b1f399aba6ed1e1b8f2079c3da1aed8 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Mon, 8 Nov 2021 18:37:59 +0800 Subject: [PATCH 367/433] hamradio: defer 6pack kfree after unregister_netdev There is a possible race condition (use-after-free) like below (USE) | (FREE) dev_queue_xmit | __dev_queue_xmit | __dev_xmit_skb | sch_direct_xmit | ... xmit_one | netdev_start_xmit | tty_ldisc_kill __netdev_start_xmit | 6pack_close sp_xmit | kfree sp_encaps | | According to the patch "defer ax25 kfree after unregister_netdev", this patch reorder the kfree after the unregister_netdev to avoid the possible UAF as the unregister_netdev() is well synchronized and won't return if there is a running routine. Signed-off-by: Lin Ma Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index f4e8793e995d..fb0a3825edd0 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -672,11 +672,13 @@ static void sixpack_close(struct tty_struct *tty) del_timer_sync(&sp->tx_t); del_timer_sync(&sp->resync_t); - /* Free all 6pack frame buffers. */ + unregister_netdev(sp->dev); + + /* Free all 6pack frame buffers after unreg. */ kfree(sp->rbuff); kfree(sp->xbuff); - unregister_netdev(sp->dev); + free_netdev(sp->dev); } /* Perform I/O control on an active 6pack channel. */ From 51bd9563b6783de8315f38f7baed949e77c42311 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 25 Oct 2021 17:27:47 +0100 Subject: [PATCH 368/433] btrfs: fix deadlock due to page faults during direct IO reads and writes If we do a direct IO read or write when the buffer given by the user is memory mapped to the file range we are going to do IO, we end up ending in a deadlock. This is triggered by the new test case generic/647 from fstests. For a direct IO read we get a trace like this: [967.872718] INFO: task mmap-rw-fault:12176 blocked for more than 120 seconds. [967.874161] Not tainted 5.14.0-rc7-btrfs-next-95 #1 [967.874909] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [967.875983] task:mmap-rw-fault state:D stack: 0 pid:12176 ppid: 11884 flags:0x00000000 [967.875992] Call Trace: [967.875999] __schedule+0x3ca/0xe10 [967.876015] schedule+0x43/0xe0 [967.876020] wait_extent_bit.constprop.0+0x1eb/0x260 [btrfs] [967.876109] ? do_wait_intr_irq+0xb0/0xb0 [967.876118] lock_extent_bits+0x37/0x90 [btrfs] [967.876150] btrfs_lock_and_flush_ordered_range+0xa9/0x120 [btrfs] [967.876184] ? extent_readahead+0xa7/0x530 [btrfs] [967.876214] extent_readahead+0x32d/0x530 [btrfs] [967.876253] ? lru_cache_add+0x104/0x220 [967.876255] ? kvm_sched_clock_read+0x14/0x40 [967.876258] ? sched_clock_cpu+0xd/0x110 [967.876263] ? lock_release+0x155/0x4a0 [967.876271] read_pages+0x86/0x270 [967.876274] ? lru_cache_add+0x125/0x220 [967.876281] page_cache_ra_unbounded+0x1a3/0x220 [967.876291] filemap_fault+0x626/0xa20 [967.876303] __do_fault+0x36/0xf0 [967.876308] __handle_mm_fault+0x83f/0x15f0 [967.876322] handle_mm_fault+0x9e/0x260 [967.876327] __get_user_pages+0x204/0x620 [967.876332] ? get_user_pages_unlocked+0x69/0x340 [967.876340] get_user_pages_unlocked+0xd3/0x340 [967.876349] internal_get_user_pages_fast+0xbca/0xdc0 [967.876366] iov_iter_get_pages+0x8d/0x3a0 [967.876374] bio_iov_iter_get_pages+0x82/0x4a0 [967.876379] ? lock_release+0x155/0x4a0 [967.876387] iomap_dio_bio_actor+0x232/0x410 [967.876396] iomap_apply+0x12a/0x4a0 [967.876398] ? iomap_dio_rw+0x30/0x30 [967.876414] __iomap_dio_rw+0x29f/0x5e0 [967.876415] ? iomap_dio_rw+0x30/0x30 [967.876420] ? lock_acquired+0xf3/0x420 [967.876429] iomap_dio_rw+0xa/0x30 [967.876431] btrfs_file_read_iter+0x10b/0x140 [btrfs] [967.876460] new_sync_read+0x118/0x1a0 [967.876472] vfs_read+0x128/0x1b0 [967.876477] __x64_sys_pread64+0x90/0xc0 [967.876483] do_syscall_64+0x3b/0xc0 [967.876487] entry_SYSCALL_64_after_hwframe+0x44/0xae [967.876490] RIP: 0033:0x7fb6f2c038d6 [967.876493] RSP: 002b:00007fffddf586b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011 [967.876496] RAX: ffffffffffffffda RBX: 0000000000001000 RCX: 00007fb6f2c038d6 [967.876498] RDX: 0000000000001000 RSI: 00007fb6f2c17000 RDI: 0000000000000003 [967.876499] RBP: 0000000000001000 R08: 0000000000000003 R09: 0000000000000000 [967.876501] R10: 0000000000001000 R11: 0000000000000246 R12: 0000000000000003 [967.876502] R13: 0000000000000000 R14: 00007fb6f2c17000 R15: 0000000000000000 This happens because at btrfs_dio_iomap_begin() we lock the extent range and return with it locked - we only unlock in the endio callback, at end_bio_extent_readpage() -> endio_readpage_release_extent(). Then after iomap called the btrfs_dio_iomap_begin() callback, it triggers the page faults that resulting in reading the pages, through the readahead callback btrfs_readahead(), and through there we end to attempt to lock again the same extent range (or a subrange of what we locked before), resulting in the deadlock. For a direct IO write, the scenario is a bit different, and it results in trace like this: [1132.442520] run fstests generic/647 at 2021-08-31 18:53:35 [1330.349355] INFO: task mmap-rw-fault:184017 blocked for more than 120 seconds. [1330.350540] Not tainted 5.14.0-rc7-btrfs-next-95 #1 [1330.351158] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [1330.351900] task:mmap-rw-fault state:D stack: 0 pid:184017 ppid:183725 flags:0x00000000 [1330.351906] Call Trace: [1330.351913] __schedule+0x3ca/0xe10 [1330.351930] schedule+0x43/0xe0 [1330.351935] btrfs_start_ordered_extent+0x108/0x1c0 [btrfs] [1330.352020] ? do_wait_intr_irq+0xb0/0xb0 [1330.352028] btrfs_lock_and_flush_ordered_range+0x8c/0x120 [btrfs] [1330.352064] ? extent_readahead+0xa7/0x530 [btrfs] [1330.352094] extent_readahead+0x32d/0x530 [btrfs] [1330.352133] ? lru_cache_add+0x104/0x220 [1330.352135] ? kvm_sched_clock_read+0x14/0x40 [1330.352138] ? sched_clock_cpu+0xd/0x110 [1330.352143] ? lock_release+0x155/0x4a0 [1330.352151] read_pages+0x86/0x270 [1330.352155] ? lru_cache_add+0x125/0x220 [1330.352162] page_cache_ra_unbounded+0x1a3/0x220 [1330.352172] filemap_fault+0x626/0xa20 [1330.352176] ? filemap_map_pages+0x18b/0x660 [1330.352184] __do_fault+0x36/0xf0 [1330.352189] __handle_mm_fault+0x1253/0x15f0 [1330.352203] handle_mm_fault+0x9e/0x260 [1330.352208] __get_user_pages+0x204/0x620 [1330.352212] ? get_user_pages_unlocked+0x69/0x340 [1330.352220] get_user_pages_unlocked+0xd3/0x340 [1330.352229] internal_get_user_pages_fast+0xbca/0xdc0 [1330.352246] iov_iter_get_pages+0x8d/0x3a0 [1330.352254] bio_iov_iter_get_pages+0x82/0x4a0 [1330.352259] ? lock_release+0x155/0x4a0 [1330.352266] iomap_dio_bio_actor+0x232/0x410 [1330.352275] iomap_apply+0x12a/0x4a0 [1330.352278] ? iomap_dio_rw+0x30/0x30 [1330.352292] __iomap_dio_rw+0x29f/0x5e0 [1330.352294] ? iomap_dio_rw+0x30/0x30 [1330.352306] btrfs_file_write_iter+0x238/0x480 [btrfs] [1330.352339] new_sync_write+0x11f/0x1b0 [1330.352344] ? NF_HOOK_LIST.constprop.0.cold+0x31/0x3e [1330.352354] vfs_write+0x292/0x3c0 [1330.352359] __x64_sys_pwrite64+0x90/0xc0 [1330.352365] do_syscall_64+0x3b/0xc0 [1330.352369] entry_SYSCALL_64_after_hwframe+0x44/0xae [1330.352372] RIP: 0033:0x7f4b0a580986 [1330.352379] RSP: 002b:00007ffd34d75418 EFLAGS: 00000246 ORIG_RAX: 0000000000000012 [1330.352382] RAX: ffffffffffffffda RBX: 0000000000001000 RCX: 00007f4b0a580986 [1330.352383] RDX: 0000000000001000 RSI: 00007f4b0a3a4000 RDI: 0000000000000003 [1330.352385] RBP: 00007f4b0a3a4000 R08: 0000000000000003 R09: 0000000000000000 [1330.352386] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003 [1330.352387] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Unlike for reads, at btrfs_dio_iomap_begin() we return with the extent range unlocked, but later when the page faults are triggered and we try to read the extents, we end up btrfs_lock_and_flush_ordered_range() where we find the ordered extent for our write, created by the iomap callback btrfs_dio_iomap_begin(), and we wait for it to complete, which makes us deadlock since we can't complete the ordered extent without reading the pages (the iomap code only submits the bio after the pages are faulted in). Fix this by setting the nofault attribute of the given iov_iter and retry the direct IO read/write if we get an -EFAULT error returned from iomap. For reads, also disable page faults completely, this is because when we read from a hole or a prealloc extent, we can still trigger page faults due to the call to iov_iter_zero() done by iomap - at the moment, it is oblivious to the value of the ->nofault attribute of an iov_iter. We also need to keep track of the number of bytes written or read, and pass it to iomap_dio_rw(), as well as use the new flag IOMAP_DIO_PARTIAL. This depends on the iov_iter and iomap changes introduced in commit c03098d4b9ad ("Merge tag 'gfs2-v5.15-rc5-mmap-fault' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2"). Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 139 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 123 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 581662d16b72..11204dbbe053 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1912,16 +1912,17 @@ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) { + const bool is_sync_write = (iocb->ki_flags & IOCB_DSYNC); struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); loff_t pos; ssize_t written = 0; ssize_t written_buffered; + size_t prev_left = 0; loff_t endbyte; ssize_t err; unsigned int ilock_flags = 0; - struct iomap_dio *dio = NULL; if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; @@ -1964,23 +1965,80 @@ relock: goto buffered; } - dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops, - 0, 0); + /* + * We remove IOCB_DSYNC so that we don't deadlock when iomap_dio_rw() + * calls generic_write_sync() (through iomap_dio_complete()), because + * that results in calling fsync (btrfs_sync_file()) which will try to + * lock the inode in exclusive/write mode. + */ + if (is_sync_write) + iocb->ki_flags &= ~IOCB_DSYNC; + + /* + * The iov_iter can be mapped to the same file range we are writing to. + * If that's the case, then we will deadlock in the iomap code, because + * it first calls our callback btrfs_dio_iomap_begin(), which will create + * an ordered extent, and after that it will fault in the pages that the + * iov_iter refers to. During the fault in we end up in the readahead + * pages code (starting at btrfs_readahead()), which will lock the range, + * find that ordered extent and then wait for it to complete (at + * btrfs_lock_and_flush_ordered_range()), resulting in a deadlock since + * obviously the ordered extent can never complete as we didn't submit + * yet the respective bio(s). This always happens when the buffer is + * memory mapped to the same file range, since the iomap DIO code always + * invalidates pages in the target file range (after starting and waiting + * for any writeback). + * + * So here we disable page faults in the iov_iter and then retry if we + * got -EFAULT, faulting in the pages before the retry. + */ +again: + from->nofault = true; + err = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops, + IOMAP_DIO_PARTIAL, written); + from->nofault = false; + + /* No increment (+=) because iomap returns a cumulative value. */ + if (err > 0) + written = err; + + if (iov_iter_count(from) > 0 && (err == -EFAULT || err > 0)) { + const size_t left = iov_iter_count(from); + /* + * We have more data left to write. Try to fault in as many as + * possible of the remainder pages and retry. We do this without + * releasing and locking again the inode, to prevent races with + * truncate. + * + * Also, in case the iov refers to pages in the file range of the + * file we want to write to (due to a mmap), we could enter an + * infinite loop if we retry after faulting the pages in, since + * iomap will invalidate any pages in the range early on, before + * it tries to fault in the pages of the iov. So we keep track of + * how much was left of iov in the previous EFAULT and fallback + * to buffered IO in case we haven't made any progress. + */ + if (left == prev_left) { + err = -ENOTBLK; + } else { + fault_in_iov_iter_readable(from, left); + prev_left = left; + goto again; + } + } btrfs_inode_unlock(inode, ilock_flags); - if (IS_ERR_OR_NULL(dio)) { - err = PTR_ERR_OR_ZERO(dio); - if (err < 0 && err != -ENOTBLK) - goto out; - } else { - written = iomap_dio_complete(dio); - } + /* + * Add back IOCB_DSYNC. Our caller, btrfs_file_write_iter(), will do + * the fsync (call generic_write_sync()). + */ + if (is_sync_write) + iocb->ki_flags |= IOCB_DSYNC; - if (written < 0 || !iov_iter_count(from)) { - err = written; + /* If 'err' is -ENOTBLK then it means we must fallback to buffered IO. */ + if ((err < 0 && err != -ENOTBLK) || !iov_iter_count(from)) goto out; - } buffered: pos = iocb->ki_pos; @@ -2005,7 +2063,7 @@ buffered: invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT, endbyte >> PAGE_SHIFT); out: - return written ? written : err; + return err < 0 ? err : written; } static ssize_t btrfs_file_write_iter(struct kiocb *iocb, @@ -3659,6 +3717,8 @@ static int check_direct_read(struct btrfs_fs_info *fs_info, static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); + size_t prev_left = 0; + ssize_t read = 0; ssize_t ret; if (fsverity_active(inode)) @@ -3668,10 +3728,57 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) return 0; btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); +again: + /* + * This is similar to what we do for direct IO writes, see the comment + * at btrfs_direct_write(), but we also disable page faults in addition + * to disabling them only at the iov_iter level. This is because when + * reading from a hole or prealloc extent, iomap calls iov_iter_zero(), + * which can still trigger page fault ins despite having set ->nofault + * to true of our 'to' iov_iter. + * + * The difference to direct IO writes is that we deadlock when trying + * to lock the extent range in the inode's tree during he page reads + * triggered by the fault in (while for writes it is due to waiting for + * our own ordered extent). This is because for direct IO reads, + * btrfs_dio_iomap_begin() returns with the extent range locked, which + * is only unlocked in the endio callback (end_bio_extent_readpage()). + */ + pagefault_disable(); + to->nofault = true; ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, - 0, 0); + IOMAP_DIO_PARTIAL, read); + to->nofault = false; + pagefault_enable(); + + /* No increment (+=) because iomap returns a cumulative value. */ + if (ret > 0) + read = ret; + + if (iov_iter_count(to) > 0 && (ret == -EFAULT || ret > 0)) { + const size_t left = iov_iter_count(to); + + if (left == prev_left) { + /* + * We didn't make any progress since the last attempt, + * fallback to a buffered read for the remainder of the + * range. This is just to avoid any possibility of looping + * for too long. + */ + ret = read; + } else { + /* + * We made some progress since the last retry or this is + * the first time we are retrying. Fault in as many pages + * as possible and retry. + */ + fault_in_iov_iter_writeable(to, left); + prev_left = left; + goto again; + } + } btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); - return ret; + return ret < 0 ? ret : read; } static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) From 1c360cc1cc883fbdf0a258b4df376571fbeac5ee Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 9 Nov 2021 14:47:36 +0300 Subject: [PATCH 369/433] gve: Fix off by one in gve_tx_timeout() The priv->ntfy_blocks[] has "priv->num_ntfy_blks" elements so this > needs to be >= to prevent an off by one bug. The priv->ntfy_blocks[] array is allocated in gve_alloc_notify_blocks(). Fixes: 87a7f321bb6a ("gve: Recover from queue stall due to missed IRQ") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 6b02ef432eda..59b66f679e46 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1137,7 +1137,7 @@ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) goto reset; ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); - if (ntfy_idx > priv->num_ntfy_blks) + if (ntfy_idx >= priv->num_ntfy_blks) goto reset; block = &priv->ntfy_blocks[ntfy_idx]; From 9758aba8542bb43029d077303d05df1d00a8dbb5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Nov 2021 12:12:24 +0100 Subject: [PATCH 370/433] amt: add IPV6 Kconfig dependency This driver cannot be built-in if IPV6 is a loadable module: x86_64-linux-ld: drivers/net/amt.o: in function `amt_build_mld_gq': amt.c:(.text+0x2e7d): undefined reference to `ipv6_dev_get_saddr' Add the idiomatic Kconfig dependency that all such modules have. Fixes: b9022b53adad ("amt: add control plane of amt interface") Signed-off-by: Arnd Bergmann Acked-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 034dbd487c33..10506a4b66ef 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -294,6 +294,7 @@ config GTP config AMT tristate "Automatic Multicast Tunneling (AMT)" depends on INET && IP_MULTICAST + depends on IPV6 || !IPV6 select NET_UDP_TUNNEL help This allows one to create AMT(Automatic Multicast Tunneling) From 03a86cda4123084c7969387e7e0b69f23c2f8acf Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 1 Nov 2021 10:33:59 +0900 Subject: [PATCH 371/433] rtc: rv8803: fix writing back ctrl in flag register ctrl is set from read_regs(..FLAG, 2, ctrl), so ctrl[0] is FLAG and ctrl[1] is the CTRL register. Use ctrl[0] to write back to the FLAG register as appropriate. Signed-off-by: Dominique Martinet Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211101013400.325855-1-dominique.martinet@atmark-techno.com --- drivers/rtc/rtc-rv8803.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index 72adef5a5ebe..0d5ed38bf60c 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -340,8 +340,8 @@ static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) } } - ctrl[1] &= ~RV8803_FLAG_AF; - err = rv8803_write_reg(rv8803->client, RV8803_FLAG, ctrl[1]); + ctrl[0] &= ~RV8803_FLAG_AF; + err = rv8803_write_reg(rv8803->client, RV8803_FLAG, ctrl[0]); mutex_unlock(&rv8803->flags_lock); if (err) return err; From 72e4ee638d8e1d7d80079c37ec8641fec0d63016 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:53:48 +0100 Subject: [PATCH 372/433] rtc: pcf85063: silence cppcheck warning cppcheck warnings: (new ones prefixed by >>) >> drivers/rtc/rtc-pcf85063.c:292:40: warning: Clarify calculation precedence for '&' and '?'. [clarifyCalculation] status = status & PCF85063_REG_SC_OS ? RTC_VL_DATA_INVALID : 0; Reported-by: kernel test robot Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225349.110707-1-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-pcf85063.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 4a70d6bae859..15e50bb10cf0 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -299,7 +299,7 @@ static int pcf85063_ioctl(struct device *dev, unsigned int cmd, if (ret < 0) return ret; - status = status & PCF85063_REG_SC_OS ? RTC_VL_DATA_INVALID : 0; + status = (status & PCF85063_REG_SC_OS) ? RTC_VL_DATA_INVALID : 0; return put_user(status, (unsigned int __user *)arg); From d87f741dddabd670a82bccf94ac73cec80209bc0 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:46 +0100 Subject: [PATCH 373/433] rtc: handle alarms with a minute resolution Handle alarms with a minute resolution in the core. Until now drivers have been open coding the seconds part removal and have been doing that wrongly. Most of them are rounding up which means the allow the system to miss deadlines. So, round down and let __rtc_set_alarm return immediately if the time has already passed. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-1-alexandre.belloni@bootlin.com --- drivers/rtc/interface.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index d005623e6eb3..d8e835798153 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -423,6 +423,7 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) if (err) return err; now = rtc_tm_to_time64(&tm); + if (scheduled <= now) return -ETIME; /* @@ -447,6 +448,7 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { + ktime_t alarm_time; int err; if (!rtc->ops) @@ -468,7 +470,15 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) if (rtc->aie_timer.enabled) rtc_timer_remove(rtc, &rtc->aie_timer); - rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time); + alarm_time = rtc_tm_to_ktime(alarm->time); + /* + * Round down so we never miss a deadline, checking for past deadline is + * done in __rtc_set_alarm + */ + if (test_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->features)) + alarm_time = ktime_sub_ns(alarm_time, (u64)alarm->time.tm_sec * NSEC_PER_SEC); + + rtc->aie_timer.node.expires = alarm_time; rtc->aie_timer.period = 0; if (alarm->enabled) err = rtc_timer_enqueue(rtc, &rtc->aie_timer); From 654815eff130a42e861241b848dfea2c50d6c7b1 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:47 +0100 Subject: [PATCH 374/433] rtc: s35390a: let the core handle the alarm resolution Tell the RTC core UIE are not supported because the resolution of the alarm is a minute. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-2-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-s35390a.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index b5bdeda7d767..26278c770731 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -285,9 +285,6 @@ static int s35390a_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) alm->time.tm_min, alm->time.tm_hour, alm->time.tm_mday, alm->time.tm_mon, alm->time.tm_year, alm->time.tm_wday); - if (alm->time.tm_sec != 0) - dev_warn(&client->dev, "Alarms are only supported on a per minute basis!\n"); - /* disable interrupt (which deasserts the irq line) */ err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &sts, sizeof(sts)); if (err < 0) @@ -491,8 +488,8 @@ static int s35390a_probe(struct i2c_client *client, s35390a->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; s35390a->rtc->range_max = RTC_TIMESTAMP_END_2099; - /* supports per-minute alarms only, therefore set uie_unsupported */ - s35390a->rtc->uie_unsupported = 1; + set_bit(RTC_FEATURE_ALARM_RES_MINUTE, s35390a->rtc->features); + clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, s35390a->rtc->features ); if (status1 & S35390A_FLAG_INT2) rtc_update_irq(s35390a->rtc, 1, RTC_AF); From ac86964ff9791f8da27b321772db85369ec59506 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:48 +0100 Subject: [PATCH 375/433] rtc: rv3032: let the core handle the alarm resolution Let the RTC core know the resolution of the alarm is a minute. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-3-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rv3032.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index a3c73179ecb1..c3bee305eacc 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -311,14 +311,6 @@ static int rv3032_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) u8 ctrl = 0; int ret; - /* The alarm has no seconds, round up to nearest minute */ - if (alrm->time.tm_sec) { - time64_t alarm_time = rtc_tm_to_time64(&alrm->time); - - alarm_time += 60 - alrm->time.tm_sec; - rtc_time64_to_tm(alarm_time, &alrm->time); - } - ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2, RV3032_CTRL2_AIE | RV3032_CTRL2_UIE, 0); if (ret) @@ -958,6 +950,7 @@ static int rv3032_probe(struct i2c_client *client) rv3032_trickle_charger_setup(&client->dev, rv3032); set_bit(RTC_FEATURE_BACKUP_SWITCH_MODE, rv3032->rtc->features); + set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rv3032->rtc->features); rv3032->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; rv3032->rtc->range_max = RTC_TIMESTAMP_END_2099; From 24370014011f8df7a098a83a453204b1f05fd1d2 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:49 +0100 Subject: [PATCH 376/433] rtc: ab-eoz9: use RTC_FEATURE_UPDATE_INTERRUPT Switch from uie_unsupported to RTC_FEATURE_UPDATE_INTERRUPT Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-4-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-ab-eoz9.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ab-eoz9.c b/drivers/rtc/rtc-ab-eoz9.c index a9b355510cd4..50ead6fce880 100644 --- a/drivers/rtc/rtc-ab-eoz9.c +++ b/drivers/rtc/rtc-ab-eoz9.c @@ -534,7 +534,7 @@ static int abeoz9_probe(struct i2c_client *client, data->rtc->ops = &rtc_ops; data->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; data->rtc->range_max = RTC_TIMESTAMP_END_2099; - data->rtc->uie_unsupported = 1; + clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, data->rtc->features); clear_bit(RTC_FEATURE_ALARM, data->rtc->features); if (client->irq > 0) { From 27f06af753149f62d681297177bafc95c1551bfe Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:50 +0100 Subject: [PATCH 377/433] rtc: ab-eoz9: support UIE when available The RTC actually supports UIE when an interrupt is available. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-5-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-ab-eoz9.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ab-eoz9.c b/drivers/rtc/rtc-ab-eoz9.c index 50ead6fce880..e188ab517f1e 100644 --- a/drivers/rtc/rtc-ab-eoz9.c +++ b/drivers/rtc/rtc-ab-eoz9.c @@ -534,7 +534,6 @@ static int abeoz9_probe(struct i2c_client *client, data->rtc->ops = &rtc_ops; data->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; data->rtc->range_max = RTC_TIMESTAMP_END_2099; - clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, data->rtc->features); clear_bit(RTC_FEATURE_ALARM, data->rtc->features); if (client->irq > 0) { @@ -546,6 +545,8 @@ static int abeoz9_probe(struct i2c_client *client, dev_err(dev, "failed to request alarm irq\n"); return ret; } + } else { + clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, data->rtc->features); } if (client->irq > 0 || device_property_read_bool(dev, "wakeup-source")) { From a5f828036c2e9f46d84c7c9b27bf248c7f4bb0fe Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:51 +0100 Subject: [PATCH 378/433] rtc: ab8500: let the core handle the alarm resolution Tell the RTC core UIE are not supported because the resolution of the alarm is a minute. Note that this is in fact also fixing how the resolution is reported as the previous test was simply ensuring the alarm was more than a minute in the future while the register has a minute resolution. This would be ok if the alarm was a countdown but ab8500_rtc_read_alarm suggests otherwise and the AB8500 datasheet states that the RTC documentation is not public. Finally, the comment is wrong and what makes the UIE emulation work is uie_unsupported being set. Signed-off-by: Alexandre Belloni Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20211107225458.111068-6-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-ab8500.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c index b40048871295..ea33e149d545 100644 --- a/drivers/rtc/rtc-ab8500.c +++ b/drivers/rtc/rtc-ab8500.c @@ -184,25 +184,9 @@ static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) { int retval, i; unsigned char buf[ARRAY_SIZE(ab8500_rtc_alarm_regs)]; - unsigned long mins, secs = 0, cursec = 0; - struct rtc_time curtm; + unsigned long mins; - /* Get the number of seconds since 1970 */ - secs = rtc_tm_to_time64(&alarm->time); - - /* - * Check whether alarm is set less than 1min. - * Since our RTC doesn't support alarm resolution less than 1min, - * return -EINVAL, so UIE EMUL can take it up, incase of UIE_ON - */ - ab8500_rtc_read_time(dev, &curtm); /* Read current time */ - cursec = rtc_tm_to_time64(&curtm); - if ((secs - cursec) < 59) { - dev_dbg(dev, "Alarm less than 1 minute not supported\r\n"); - return -EINVAL; - } - - mins = secs / 60; + mins = (unsigned long)rtc_tm_to_time64(&alarm->time) / 60; buf[2] = mins & 0xFF; buf[1] = (mins >> 8) & 0xFF; @@ -394,7 +378,8 @@ static int ab8500_rtc_probe(struct platform_device *pdev) dev_pm_set_wake_irq(&pdev->dev, irq); platform_set_drvdata(pdev, rtc); - rtc->uie_unsupported = 1; + set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->features); + clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features); rtc->range_max = (1ULL << 24) * 60 - 1; // 24-bit minutes + 59 secs rtc->start_secs = RTC_TIMESTAMP_BEGIN_2000; From 5e7f635aa64764f34266d22ee0fa27c8f4834309 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:52 +0100 Subject: [PATCH 379/433] rtc: rx8025: switch to devm_rtc_allocate_device Switch to devm_rtc_allocate_device/devm_rtc_register_device, this allows for further improvement of the driver. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-7-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rx8025.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index d38aaf08108c..617b044c66f0 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -559,12 +559,11 @@ static int rx8025_probe(struct i2c_client *client, if (err) return err; - rx8025->rtc = devm_rtc_device_register(&client->dev, client->name, - &rx8025_rtc_ops, THIS_MODULE); - if (IS_ERR(rx8025->rtc)) { - dev_err(&client->dev, "unable to register the class device\n"); + rx8025->rtc = devm_rtc_allocate_device(&client->dev); + if (IS_ERR(rx8025->rtc)) return PTR_ERR(rx8025->rtc); - } + + rx8025->rtc->ops = &rx8025_rtc_ops; if (client->irq > 0) { dev_info(&client->dev, "IRQ %d supplied\n", client->irq); @@ -583,6 +582,10 @@ static int rx8025_probe(struct i2c_client *client, /* the rx8025 alarm only supports a minute accuracy */ rx8025->rtc->uie_unsupported = 1; + err = devm_rtc_register_device(rx8025->rtc); + if (err) + return err; + err = rx8025_sysfs_register(&client->dev); return err; } From 8670558f9e296ce9874284b3c3fcd3ed9fa717d3 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:53 +0100 Subject: [PATCH 380/433] rtc: rx8025: let the core handle the alarm resolution Tell the RTC core UIE are not supported because the resolution of the alarm is a minute. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-8-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rx8025.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index 617b044c66f0..6941e0518290 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -358,17 +358,6 @@ static int rx8025_set_alarm(struct device *dev, struct rtc_wkalrm *t) if (client->irq <= 0) return -EINVAL; - /* - * Hardware alarm precision is 1 minute! - * round up to nearest minute - */ - if (t->time.tm_sec) { - time64_t alarm_time = rtc_tm_to_time64(&t->time); - - alarm_time += 60 - t->time.tm_sec; - rtc_time64_to_tm(alarm_time, &t->time); - } - ald[0] = bin2bcd(t->time.tm_min); if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) ald[1] = bin2bcd(t->time.tm_hour); @@ -579,8 +568,8 @@ static int rx8025_probe(struct i2c_client *client, rx8025->rtc->max_user_freq = 1; - /* the rx8025 alarm only supports a minute accuracy */ - rx8025->rtc->uie_unsupported = 1; + set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rx8025->rtc->features); + clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rx8025->rtc->features); err = devm_rtc_register_device(rx8025->rtc); if (err) From 1709d7eea1c6d7269eb85248dc8c3cacb8324d83 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:54 +0100 Subject: [PATCH 381/433] rtc: rx8025: set range Set the RTC range, it is a classic BCD RTC, with 00 being a leap year. Let the core handle range checking. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-9-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rx8025.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index 6941e0518290..6002305efa2d 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -248,9 +248,6 @@ static int rx8025_set_time(struct device *dev, struct rtc_time *dt) u8 date[7]; int ret; - if ((dt->tm_year < 100) || (dt->tm_year > 199)) - return -EINVAL; - /* * Here the read-only bits are written as "0". I'm not sure if that * is sound. @@ -553,6 +550,8 @@ static int rx8025_probe(struct i2c_client *client, return PTR_ERR(rx8025->rtc); rx8025->rtc->ops = &rx8025_rtc_ops; + rx8025->rtc->range_min = RTC_TIMESTAMP_BEGIN_1900; + rx8025->rtc->range_max = RTC_TIMESTAMP_END_2099; if (client->irq > 0) { dev_info(&client->dev, "IRQ %d supplied\n", client->irq); From 5be3933fea2e9de50655f3cd3cda23184e8bf6bb Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:55 +0100 Subject: [PATCH 382/433] rtc: rx8025: clear RTC_FEATURE_ALARM when alarm are not supported Clear RTC_FEATURE_ALARM to signal alarms are not supported to the core instead of checking client->irq. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-10-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rx8025.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index 6002305efa2d..fcfdefe94a7c 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -315,9 +315,6 @@ static int rx8025_read_alarm(struct device *dev, struct rtc_wkalrm *t) u8 ald[2]; int ctrl2, err; - if (client->irq <= 0) - return -EINVAL; - err = rx8025_read_regs(client, RX8025_REG_ALDMIN, 2, ald); if (err) return err; @@ -352,9 +349,6 @@ static int rx8025_set_alarm(struct device *dev, struct rtc_wkalrm *t) u8 ald[2]; int err; - if (client->irq <= 0) - return -EINVAL; - ald[0] = bin2bcd(t->time.tm_min); if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) ald[1] = bin2bcd(t->time.tm_hour); @@ -559,10 +553,8 @@ static int rx8025_probe(struct i2c_client *client, rx8025_handle_irq, IRQF_ONESHOT, "rx8025", client); - if (err) { - dev_err(&client->dev, "unable to request IRQ, alarms disabled\n"); - client->irq = 0; - } + if (err) + clear_bit(RTC_FEATURE_ALARM, rx8025->rtc->features); } rx8025->rtc->max_user_freq = 1; From 3d35840dfb75ac1a87dfbbddc9446b17446c2473 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:56 +0100 Subject: [PATCH 383/433] rtc: rx8025: use rtc_add_group Remove open coded sysfs registration by using rtc_add_group. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-11-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rx8025.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index fcfdefe94a7c..c5b3814f8c8e 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -502,15 +502,14 @@ static DEVICE_ATTR(clock_adjust_ppb, S_IRUGO | S_IWUSR, rx8025_sysfs_show_clock_adjust, rx8025_sysfs_store_clock_adjust); -static int rx8025_sysfs_register(struct device *dev) -{ - return device_create_file(dev, &dev_attr_clock_adjust_ppb); -} +static struct attribute *rx8025_attrs[] = { + &dev_attr_clock_adjust_ppb.attr, + NULL +}; -static void rx8025_sysfs_unregister(struct device *dev) -{ - device_remove_file(dev, &dev_attr_clock_adjust_ppb); -} +static const struct attribute_group rx8025_attr_group = { + .attrs = rx8025_attrs, +}; static int rx8025_probe(struct i2c_client *client, const struct i2c_device_id *id) @@ -562,18 +561,11 @@ static int rx8025_probe(struct i2c_client *client, set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rx8025->rtc->features); clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rx8025->rtc->features); - err = devm_rtc_register_device(rx8025->rtc); + err = rtc_add_group(rx8025->rtc, &rx8025_attr_group); if (err) return err; - err = rx8025_sysfs_register(&client->dev); - return err; -} - -static int rx8025_remove(struct i2c_client *client) -{ - rx8025_sysfs_unregister(&client->dev); - return 0; + return devm_rtc_register_device(rx8025->rtc); } static struct i2c_driver rx8025_driver = { @@ -581,7 +573,6 @@ static struct i2c_driver rx8025_driver = { .name = "rtc-rx8025", }, .probe = rx8025_probe, - .remove = rx8025_remove, .id_table = rx8025_id, }; From b476266f063e680039be1541cfde5f5cee400da3 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 7 Nov 2021 23:54:57 +0100 Subject: [PATCH 384/433] rtc: rx8025: use .set_offset/.read_offset The driver has its own sysfs file to adjust the clock. Fortunately, it is already in pbb, however, the sign it expects is the opposite of what the RTC core does (which actually aligns with the RTC). Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211107225458.111068-12-alexandre.belloni@bootlin.com --- drivers/rtc/rtc-rx8025.c | 73 ++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index c5b3814f8c8e..5bfdd34a72ff 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -403,17 +403,7 @@ static int rx8025_alarm_irq_enable(struct device *dev, unsigned int enabled) return 0; } -static const struct rtc_class_ops rx8025_rtc_ops = { - .read_time = rx8025_get_time, - .set_time = rx8025_set_time, - .read_alarm = rx8025_read_alarm, - .set_alarm = rx8025_set_alarm, - .alarm_irq_enable = rx8025_alarm_irq_enable, -}; - /* - * Clock precision adjustment support - * * According to the RX8025 SA/NB application manual the frequency and * temperature characteristics can be approximated using the following * equation: @@ -424,11 +414,8 @@ static const struct rtc_class_ops rx8025_rtc_ops = { * a : Coefficient = (-35 +-5) * 10**-9 * ut: Ultimate temperature in degree = +25 +-5 degree * t : Any temperature in degree - * - * Note that the clock adjustment in ppb must be entered (which is - * the negative value of the deviation). */ -static int rx8025_get_clock_adjust(struct device *dev, int *adj) +static int rx8025_read_offset(struct device *dev, long *offset) { struct i2c_client *client = to_i2c_client(dev); int digoff; @@ -437,63 +424,75 @@ static int rx8025_get_clock_adjust(struct device *dev, int *adj) if (digoff < 0) return digoff; - *adj = digoff >= 64 ? digoff - 128 : digoff; - if (*adj > 0) - (*adj)--; - *adj *= -RX8025_ADJ_RESOLUTION; + *offset = digoff >= 64 ? digoff - 128 : digoff; + if (*offset > 0) + (*offset)--; + *offset *= RX8025_ADJ_RESOLUTION; return 0; } -static int rx8025_set_clock_adjust(struct device *dev, int adj) +static int rx8025_set_offset(struct device *dev, long offset) { struct i2c_client *client = to_i2c_client(dev); u8 digoff; int err; - adj /= -RX8025_ADJ_RESOLUTION; - if (adj > RX8025_ADJ_DATA_MAX) - adj = RX8025_ADJ_DATA_MAX; - else if (adj < RX8025_ADJ_DATA_MIN) - adj = RX8025_ADJ_DATA_MIN; - else if (adj > 0) - adj++; - else if (adj < 0) - adj += 128; - digoff = adj; + offset /= RX8025_ADJ_RESOLUTION; + if (offset > RX8025_ADJ_DATA_MAX) + offset = RX8025_ADJ_DATA_MAX; + else if (offset < RX8025_ADJ_DATA_MIN) + offset = RX8025_ADJ_DATA_MIN; + else if (offset > 0) + offset++; + else if (offset < 0) + offset += 128; + digoff = offset; err = rx8025_write_reg(client, RX8025_REG_DIGOFF, digoff); if (err) return err; - dev_dbg(dev, "%s: write 0x%02x\n", __func__, digoff); - return 0; } +static const struct rtc_class_ops rx8025_rtc_ops = { + .read_time = rx8025_get_time, + .set_time = rx8025_set_time, + .read_alarm = rx8025_read_alarm, + .set_alarm = rx8025_set_alarm, + .alarm_irq_enable = rx8025_alarm_irq_enable, + .read_offset = rx8025_read_offset, + .set_offset = rx8025_set_offset, +}; + static ssize_t rx8025_sysfs_show_clock_adjust(struct device *dev, struct device_attribute *attr, char *buf) { - int err, adj; + long adj; + int err; - err = rx8025_get_clock_adjust(dev, &adj); + dev_warn_once(dev, "clock_adjust_ppb is deprecated, use offset\n"); + err = rx8025_read_offset(dev, &adj); if (err) return err; - return sprintf(buf, "%d\n", adj); + return sprintf(buf, "%ld\n", -adj); } static ssize_t rx8025_sysfs_store_clock_adjust(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int adj, err; + long adj; + int err; - if (sscanf(buf, "%i", &adj) != 1) + dev_warn_once(dev, "clock_adjust_ppb is deprecated, use offset\n"); + if (kstrtol(buf, 10, &adj) != 0) return -EINVAL; - err = rx8025_set_clock_adjust(dev, adj); + err = rx8025_set_offset(dev, -adj); return err ? err : count; } From dc2fc9f03c5c410d8f01c2206b3d529f80b13733 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 4 Nov 2021 18:17:47 +0100 Subject: [PATCH 385/433] net: dsa: mv88e6xxx: Don't support >1G speeds on 6191X on ports other than 10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model 88E6191X only supports >1G speeds on port 10. Port 0 and 9 are only 1G. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: Marek Behún Cc: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20211104171747.10509-1-kabel@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 14c678a9e41b..f00cbf5753b9 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -640,7 +640,10 @@ static void mv88e6393x_phylink_validate(struct mv88e6xxx_chip *chip, int port, unsigned long *mask, struct phylink_link_state *state) { - if (port == 0 || port == 9 || port == 10) { + bool is_6191x = + chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6191X; + + if (((port == 0 || port == 9) && !is_6191x) || port == 10) { phylink_set(mask, 10000baseT_Full); phylink_set(mask, 10000baseKR_Full); phylink_set(mask, 10000baseCR_Full); From 43aa4937994f39a5ffc1a581b5ca382a1a2a8b1e Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 8 Nov 2021 14:53:40 +0000 Subject: [PATCH 386/433] amt: use cancel_delayed_work() instead of flush_delayed_work() in amt_fini() When the amt module is being removed, it calls flush_delayed_work() to exit source_gc_wq. But it wouldn't be exited properly because the amt_source_gc_work(), which is the callback function of source_gc_wq internally calls mod_delayed_work() again. So, amt_source_gc_work() would be called after the amt module is removed. Therefore kernel panic would occur. In order to avoid it, cancel_delayed_work() should be used instead of flush_delayed_work(). Test commands: modprobe amt modprobe -rv amt Splat looks like: BUG: unable to handle page fault for address: fffffbfff80f50db #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 1237ee067 P4D 1237ee067 PUD 1237b2067 PMD 100c11067 PTE 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN PTI CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.15.0+ #27 5a0ebebc29fe5c40c68bea90197606c3a832b09f RIP: 0010:run_timer_softirq+0x221/0xfc0 Code: 00 00 4c 89 e1 4c 8b 30 48 c1 e9 03 80 3c 29 00 0f 85 ed 0b 00 00 4d 89 34 24 4d 85 f6 74 19 49 8d 7e 08 48 89 f9 48 c1 e9 03 <80> 3c 29 00 0f 85 fa 0b 00 00 4d 89 66 08 83 04 24 01 49 89 d4 48 RSP: 0018:ffff888119009e50 EFLAGS: 00010806 RAX: ffff8881191f8a80 RBX: 00000000007ffe2a RCX: 1ffffffff80f50db RDX: ffff888119009ed0 RSI: 0000000000000008 RDI: ffffffffc07a86d8 RBP: dffffc0000000000 R08: ffff8881191f8280 R09: ffffed102323f061 R10: ffff8881191f8307 R11: ffffed102323f060 R12: ffff888119009ec8 R13: 00000000000000c0 R14: ffffffffc07a86d0 R15: ffff8881191f82e8 FS: 0000000000000000(0000) GS:ffff888119000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffbfff80f50db CR3: 00000001062dc002 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? add_timer+0x650/0x650 ? kvm_clock_read+0x14/0x30 ? ktime_get+0xb9/0x180 ? rcu_read_lock_held_common+0xe/0xa0 ? rcu_read_lock_sched_held+0x56/0xc0 ? rcu_read_lock_bh_held+0xa0/0xa0 ? hrtimer_interrupt+0x271/0x790 __do_softirq+0x1d0/0x88f irq_exit_rcu+0xe7/0x120 sysvec_apic_timer_interrupt+0x8a/0xb0 [ ... ] Fixes: bc54e49c140b ("amt: add multicast(IGMP) report message handler") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20211108145340.17208-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/amt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index c384b2694f9e..47a04c330885 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -3286,7 +3286,7 @@ static void __exit amt_fini(void) { rtnl_link_unregister(&amt_link_ops); unregister_netdevice_notifier(&amt_notifier_block); - flush_delayed_work(&source_gc_wq); + cancel_delayed_work(&source_gc_wq); __amt_source_gc_work(); destroy_workqueue(amt_wq); } From 6dc25401cba4d428328eade8ceae717633fdd702 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Nov 2021 10:08:15 -0800 Subject: [PATCH 387/433] net/sched: sch_taprio: fix undefined behavior in ktime_mono_to_any 1) if q->tk_offset == TK_OFFS_MAX, then get_tcp_tstamp() calls ktime_mono_to_any() with out-of-bound value. 2) if q->tk_offset is changed in taprio_parse_clockid(), taprio_get_time() might also call ktime_mono_to_any() with out-of-bound value as sysbot found: UBSAN: array-index-out-of-bounds in kernel/time/timekeeping.c:908:27 index 3 is out of range for type 'ktime_t *[3]' CPU: 1 PID: 25668 Comm: kworker/u4:0 Not tainted 5.15.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: bat_events batadv_iv_send_outstanding_bat_ogm_packet Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 ubsan_epilogue+0xb/0x5a lib/ubsan.c:151 __ubsan_handle_out_of_bounds.cold+0x62/0x6c lib/ubsan.c:291 ktime_mono_to_any+0x1d4/0x1e0 kernel/time/timekeeping.c:908 get_tcp_tstamp net/sched/sch_taprio.c:322 [inline] get_packet_txtime net/sched/sch_taprio.c:353 [inline] taprio_enqueue_one+0x5b0/0x1460 net/sched/sch_taprio.c:420 taprio_enqueue+0x3b1/0x730 net/sched/sch_taprio.c:485 dev_qdisc_enqueue+0x40/0x300 net/core/dev.c:3785 __dev_xmit_skb net/core/dev.c:3869 [inline] __dev_queue_xmit+0x1f6e/0x3630 net/core/dev.c:4194 batadv_send_skb_packet+0x4a9/0x5f0 net/batman-adv/send.c:108 batadv_iv_ogm_send_to_if net/batman-adv/bat_iv_ogm.c:393 [inline] batadv_iv_ogm_emit net/batman-adv/bat_iv_ogm.c:421 [inline] batadv_iv_send_outstanding_bat_ogm_packet+0x6d7/0x8e0 net/batman-adv/bat_iv_ogm.c:1701 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Fixes: 7ede7b03484b ("taprio: make clock reference conversions easier") Fixes: 54002066100b ("taprio: Adjust timestamps for TCP packets") Signed-off-by: Eric Dumazet Cc: Vedang Patel Reported-by: syzbot Reviewed-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/20211108180815.1822479-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 9ab068fa2672..377f896bdedc 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -95,18 +95,22 @@ static ktime_t sched_base_time(const struct sched_gate_list *sched) return ns_to_ktime(sched->base_time); } -static ktime_t taprio_get_time(struct taprio_sched *q) +static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono) { - ktime_t mono = ktime_get(); + /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */ + enum tk_offsets tk_offset = READ_ONCE(q->tk_offset); - switch (q->tk_offset) { + switch (tk_offset) { case TK_OFFS_MAX: return mono; default: - return ktime_mono_to_any(mono, q->tk_offset); + return ktime_mono_to_any(mono, tk_offset); } +} - return KTIME_MAX; +static ktime_t taprio_get_time(const struct taprio_sched *q) +{ + return taprio_mono_to_any(q, ktime_get()); } static void taprio_free_sched_cb(struct rcu_head *head) @@ -319,7 +323,7 @@ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) return 0; } - return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset); + return taprio_mono_to_any(q, skb->skb_mstamp_ns); } /* There are a few scenarios where we will have to modify the txtime from @@ -1352,6 +1356,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, } } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + enum tk_offsets tk_offset; /* We only support static clockids and we don't allow * for it to be modified after the first init. @@ -1366,22 +1371,24 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, switch (clockid) { case CLOCK_REALTIME: - q->tk_offset = TK_OFFS_REAL; + tk_offset = TK_OFFS_REAL; break; case CLOCK_MONOTONIC: - q->tk_offset = TK_OFFS_MAX; + tk_offset = TK_OFFS_MAX; break; case CLOCK_BOOTTIME: - q->tk_offset = TK_OFFS_BOOT; + tk_offset = TK_OFFS_BOOT; break; case CLOCK_TAI: - q->tk_offset = TK_OFFS_TAI; + tk_offset = TK_OFFS_TAI; break; default: NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); err = -EINVAL; goto out; } + /* This pairs with READ_ONCE() in taprio_mono_to_any */ + WRITE_ONCE(q->tk_offset, tk_offset); q->clockid = clockid; } else { From 8f1bc38bbb516826ede8c96cb73a884221f1a314 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 8 Nov 2021 20:18:17 +0000 Subject: [PATCH 388/433] net: mana: Fix spelling mistake "calledd" -> "called" There is a spelling mistake in a dev_info message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/20211108201817.43121-1-colin.i.king@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index c96ac81212f7..636dfef24a6c 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1424,7 +1424,7 @@ static void mana_gd_shutdown(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); - dev_info(&pdev->dev, "Shutdown was calledd\n"); + dev_info(&pdev->dev, "Shutdown was called\n"); mana_remove(&gc->mana, true); From be896bd3b72b44126c55768f14c22a8729b0992e Mon Sep 17 00:00:00 2001 From: Phoenix Huang Date: Sun, 7 Nov 2021 22:00:03 -0800 Subject: [PATCH 389/433] Input: elantench - fix misreporting trackpoint coordinates Some firmwares occasionally report bogus data from trackpoint, with X or Y displacement being too large (outside of [-127, 127] range). Let's drop such packets so that we do not generate jumps. Signed-off-by: Phoenix Huang Tested-by: Yufei Du Link: https://lore.kernel.org/r/20210729010940.5752-1-phoenix@emc.com.tw Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 2d0bc029619f..956d9cd34796 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -517,6 +517,19 @@ static void elantech_report_trackpoint(struct psmouse *psmouse, case 0x16008020U: case 0x26800010U: case 0x36808000U: + + /* + * This firmware misreport coordinates for trackpoint + * occasionally. Discard packets outside of [-127, 127] range + * to prevent cursor jumps. + */ + if (packet[4] == 0x80 || packet[5] == 0x80 || + packet[1] >> 7 == packet[4] >> 7 || + packet[2] >> 7 == packet[5] >> 7) { + elantech_debug("discarding packet [%6ph]\n", packet); + break; + + } x = packet[4] - (int)((packet[1]^0x80) << 1); y = (int)((packet[2]^0x80) << 1) - packet[5]; From de889108391f0d6b8d5cdebb538d3629cf0050c6 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 7 Nov 2021 19:37:53 -0800 Subject: [PATCH 390/433] Input: ili210x - special case ili251x sample read out The ili251x touch controller needs 5ms delay between sending I2C device address and register address, and, writing or reading register data. According to downstream ili251x example code, this 5ms delay is not required when reading touch samples out of the controller. Implement such a special case. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20211108005259.480545-1-marex@denx.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ili210x.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 867c13d3cb17..e1551cdc6e1a 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -224,15 +224,17 @@ static const struct ili2xxx_chip ili212x_chip = { .has_calibrate_reg = true, }; -static int ili251x_read_reg(struct i2c_client *client, - u8 reg, void *buf, size_t len) +static int ili251x_read_reg_common(struct i2c_client *client, + u8 reg, void *buf, size_t len, + unsigned int delay) { int error; int ret; ret = i2c_master_send(client, ®, 1); if (ret == 1) { - usleep_range(5000, 5500); + if (delay) + usleep_range(delay, delay + 500); ret = i2c_master_recv(client, buf, len); if (ret == len) @@ -244,12 +246,18 @@ static int ili251x_read_reg(struct i2c_client *client, return ret; } +static int ili251x_read_reg(struct i2c_client *client, + u8 reg, void *buf, size_t len) +{ + return ili251x_read_reg_common(client, reg, buf, len, 5000); +} + static int ili251x_read_touch_data(struct i2c_client *client, u8 *data) { int error; - error = ili251x_read_reg(client, REG_TOUCHDATA, - data, ILI251X_DATA_SIZE1); + error = ili251x_read_reg_common(client, REG_TOUCHDATA, + data, ILI251X_DATA_SIZE1, 0); if (!error && data[0] == 2) { error = i2c_master_recv(client, data + ILI251X_DATA_SIZE1, ILI251X_DATA_SIZE2); From 8639e042ad6aca7fc2a5b2fe8652396e2f522627 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 9 Nov 2021 22:17:03 -0800 Subject: [PATCH 391/433] Input: ili210x - improve polled sample spacing Currently the ili210x driver implements a threaded interrupt handler which starts upon edge on the interrupt line, and then polls the touch controller for samples. Every time a sample is obtained from the controller, the thread function checks whether further polling is required, and if so, waits fixed amount of time before polling for next sample. The delay between consecutive samples can thus vary greatly, because the I2C transfer required to retrieve the sample from the controller takes different amount of time on different platforms. Furthermore, different models of the touch controllers supported by this driver require different delays during retrieval of samples too. Instead of waiting fixed amount of time before polling for next sample, determine how much time passed since the beginning of sampling cycle and then wait only the remaining amount of time within the sampling cycle. This makes the driver deliver samples with equal spacing between them. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20211108005216.480525-1-marex@denx.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ili210x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index e1551cdc6e1a..de81ba77ffc7 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -334,10 +334,13 @@ static irqreturn_t ili210x_irq(int irq, void *irq_data) const struct ili2xxx_chip *chip = priv->chip; u8 touchdata[ILI210X_DATA_SIZE] = { 0 }; bool keep_polling; + ktime_t time_next; + s64 time_delta; bool touch; int error; do { + time_next = ktime_add_ms(ktime_get(), ILI2XXX_POLL_PERIOD); error = chip->get_touch_data(client, touchdata); if (error) { dev_err(&client->dev, @@ -347,8 +350,11 @@ static irqreturn_t ili210x_irq(int irq, void *irq_data) touch = ili210x_report_events(priv, touchdata); keep_polling = chip->continue_polling(touchdata, touch); - if (keep_polling) - msleep(ILI2XXX_POLL_PERIOD); + if (keep_polling) { + time_delta = ktime_us_delta(time_next, ktime_get()); + if (time_delta > 0) + usleep_range(time_delta, time_delta + 1000); + } } while (!priv->stop && keep_polling); return IRQ_HANDLED; From 27931d38ce057f36e68bc68cd4bf4ba24bbb9c57 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 9 Nov 2021 22:17:49 -0800 Subject: [PATCH 392/433] Input: ili210x - reduce sample period to 15ms Modern devices may redraw display at 60 Hz, make sure we have one input sample per one frame. Reduce sample period to 15ms, so we would get up to 66.6 samples per second, although realistically with all the jitter and extra scheduling wiggle room, we would end up just above 60 samples per second. This should be a good compromise between sampling too often and sampling too seldom. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20211108114145.84118-1-marex@denx.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ili210x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index de81ba77ffc7..2bd407d86bae 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -14,7 +14,7 @@ #include #include -#define ILI2XXX_POLL_PERIOD 20 +#define ILI2XXX_POLL_PERIOD 15 #define ILI210X_DATA_SIZE 64 #define ILI211X_DATA_SIZE 43 From 91e2e76695fe52339be4bc4722475641ab0fc75c Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Tue, 12 Oct 2021 20:10:59 -0700 Subject: [PATCH 393/433] Input: wacom_i2c - use macros for the bit masks To make the code easier to read use macros for the bit masks. Signed-off-by: Alistair Francis Link: https://lore.kernel.org/r/20211009113707.17568-2-alistair@alistair23.me Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wacom_i2c.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/input/touchscreen/wacom_i2c.c b/drivers/input/touchscreen/wacom_i2c.c index 22826c387da5..fe4ea6204a4e 100644 --- a/drivers/input/touchscreen/wacom_i2c.c +++ b/drivers/input/touchscreen/wacom_i2c.c @@ -6,6 +6,7 @@ * */ +#include #include #include #include @@ -14,6 +15,15 @@ #include #include +/* Bitmasks (for data[3]) */ +#define WACOM_TIP_SWITCH BIT(0) +#define WACOM_BARREL_SWITCH BIT(1) +#define WACOM_ERASER BIT(2) +#define WACOM_INVERT BIT(3) +#define WACOM_BARREL_SWITCH_2 BIT(4) +#define WACOM_IN_PROXIMITY BIT(5) + +/* Registers */ #define WACOM_CMD_QUERY0 0x04 #define WACOM_CMD_QUERY1 0x00 #define WACOM_CMD_QUERY2 0x33 @@ -99,19 +109,19 @@ static irqreturn_t wacom_i2c_irq(int irq, void *dev_id) if (error < 0) goto out; - tsw = data[3] & 0x01; - ers = data[3] & 0x04; - f1 = data[3] & 0x02; - f2 = data[3] & 0x10; + tsw = data[3] & WACOM_TIP_SWITCH; + ers = data[3] & WACOM_ERASER; + f1 = data[3] & WACOM_BARREL_SWITCH; + f2 = data[3] & WACOM_BARREL_SWITCH_2; x = le16_to_cpup((__le16 *)&data[4]); y = le16_to_cpup((__le16 *)&data[6]); pressure = le16_to_cpup((__le16 *)&data[8]); if (!wac_i2c->prox) - wac_i2c->tool = (data[3] & 0x0c) ? + wac_i2c->tool = (data[3] & (WACOM_ERASER | WACOM_INVERT)) ? BTN_TOOL_RUBBER : BTN_TOOL_PEN; - wac_i2c->prox = data[3] & 0x20; + wac_i2c->prox = data[3] & WACOM_IN_PROXIMITY; input_report_key(input, BTN_TOUCH, tsw || ers); input_report_key(input, wac_i2c->tool, wac_i2c->prox); From 744d0090a5f6dfa4c81b53402ccdf08313100429 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 9 Nov 2021 22:58:01 -0800 Subject: [PATCH 394/433] Input: iforce - fix control-message timeout USB control-message timeouts are specified in milliseconds and should specifically not vary with CONFIG_HZ. Fixes: 487358627825 ("Input: iforce - use DMA-safe buffer when getting IDs from USB") Signed-off-by: Johan Hovold Cc: stable@vger.kernel.org # 5.3 Link: https://lore.kernel.org/r/20211025115501.5190-1-johan@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/iforce/iforce-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index 6c554c11a7ac..ea58805c480f 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -92,7 +92,7 @@ static int iforce_usb_get_id(struct iforce *iforce, u8 id, id, USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_INTERFACE, - 0, 0, buf, IFORCE_MAX_LENGTH, HZ); + 0, 0, buf, IFORCE_MAX_LENGTH, 1000); if (status < 0) { dev_err(&iforce_usb->intf->dev, "usb_submit_urb failed: %d\n", status); From 3b4c6566c158e0449d490165c1a64d9e410b3007 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 10 Nov 2021 21:42:49 +0800 Subject: [PATCH 395/433] net: hns3: fix failed to add reuse multicast mac addr to hardware when mc mac table is full Currently, when driver is failed to add a new multicast mac address to hardware due to the multicast mac table is full, it will directly return. In this case, if the multicast mac list has some reuse addresses after the new address, those reuse addresses will never be added to hardware. To fix this problem, if function hclge_add_mc_addr_common() returns -ENOSPC, hclge_sync_vport_mac_list() should judge whether continue or stop to add next address. As function hclge_sync_vport_mac_list() needs parameter mac_type to know whether is uc or mc, refine this function to add parameter mac_type and remove parameter sync. So does function hclge_unsync_vport_mac_list(). Fixes: ee4bcd3b7ae4 ("net: hns3: refactor the MAC address configure") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2e41aa2d1df8..eb96bea9e3ce 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8949,8 +8949,11 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport, err_no_space: /* if already overflow, not to print each time */ - if (!(vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE)) + if (!(vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE)) { + vport->overflow_promisc_flags |= HNAE3_OVERFLOW_MPE; dev_err(&hdev->pdev->dev, "mc mac vlan table is full\n"); + } + return -ENOSPC; } @@ -9006,12 +9009,17 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, static void hclge_sync_vport_mac_list(struct hclge_vport *vport, struct list_head *list, - int (*sync)(struct hclge_vport *, - const unsigned char *)) + enum HCLGE_MAC_ADDR_TYPE mac_type) { + int (*sync)(struct hclge_vport *vport, const unsigned char *addr); struct hclge_mac_node *mac_node, *tmp; int ret; + if (mac_type == HCLGE_MAC_ADDR_UC) + sync = hclge_add_uc_addr_common; + else + sync = hclge_add_mc_addr_common; + list_for_each_entry_safe(mac_node, tmp, list, node) { ret = sync(vport, mac_node->mac_addr); if (!ret) { @@ -9023,8 +9031,13 @@ static void hclge_sync_vport_mac_list(struct hclge_vport *vport, /* If one unicast mac address is existing in hardware, * we need to try whether other unicast mac addresses * are new addresses that can be added. + * Multicast mac address can be reusable, even though + * there is no space to add new multicast mac address, + * we should check whether other mac addresses are + * existing in hardware for reuse. */ - if (ret != -EEXIST) + if ((mac_type == HCLGE_MAC_ADDR_UC && ret != -EEXIST) || + (mac_type == HCLGE_MAC_ADDR_MC && ret != -ENOSPC)) break; } } @@ -9032,12 +9045,17 @@ static void hclge_sync_vport_mac_list(struct hclge_vport *vport, static void hclge_unsync_vport_mac_list(struct hclge_vport *vport, struct list_head *list, - int (*unsync)(struct hclge_vport *, - const unsigned char *)) + enum HCLGE_MAC_ADDR_TYPE mac_type) { + int (*unsync)(struct hclge_vport *vport, const unsigned char *addr); struct hclge_mac_node *mac_node, *tmp; int ret; + if (mac_type == HCLGE_MAC_ADDR_UC) + unsync = hclge_rm_uc_addr_common; + else + unsync = hclge_rm_mc_addr_common; + list_for_each_entry_safe(mac_node, tmp, list, node) { ret = unsync(vport, mac_node->mac_addr); if (!ret || ret == -ENOENT) { @@ -9168,17 +9186,8 @@ stop_traverse: spin_unlock_bh(&vport->mac_list_lock); /* delete first, in order to get max mac table space for adding */ - if (mac_type == HCLGE_MAC_ADDR_UC) { - hclge_unsync_vport_mac_list(vport, &tmp_del_list, - hclge_rm_uc_addr_common); - hclge_sync_vport_mac_list(vport, &tmp_add_list, - hclge_add_uc_addr_common); - } else { - hclge_unsync_vport_mac_list(vport, &tmp_del_list, - hclge_rm_mc_addr_common); - hclge_sync_vport_mac_list(vport, &tmp_add_list, - hclge_add_mc_addr_common); - } + hclge_unsync_vport_mac_list(vport, &tmp_del_list, mac_type); + hclge_sync_vport_mac_list(vport, &tmp_add_list, mac_type); /* if some mac addresses were added/deleted fail, move back to the * mac_list, and retry at next time. @@ -9337,12 +9346,7 @@ static void hclge_uninit_vport_mac_list(struct hclge_vport *vport, spin_unlock_bh(&vport->mac_list_lock); - if (mac_type == HCLGE_MAC_ADDR_UC) - hclge_unsync_vport_mac_list(vport, &tmp_del_list, - hclge_rm_uc_addr_common); - else - hclge_unsync_vport_mac_list(vport, &tmp_del_list, - hclge_rm_mc_addr_common); + hclge_unsync_vport_mac_list(vport, &tmp_del_list, mac_type); if (!list_empty(&tmp_del_list)) dev_warn(&hdev->pdev->dev, From beb27ca451a57a1c0e52b5268703f3c3173c1f8c Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 10 Nov 2021 21:42:50 +0800 Subject: [PATCH 396/433] net: hns3: fix ROCE base interrupt vector initialization bug Currently, NIC init ROCE interrupt vector with MSIX interrupt. But ROCE use pci_irq_vector() to get interrupt vector, which adds the relative interrupt vector again and gets wrong interrupt vector. So fixes it by assign relative interrupt vector to ROCE instead of MSIX interrupt vector and delete the unused struct member base_msi_vector declaration of hclgevf_dev. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +----- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 -- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 5 +---- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 -- 4 files changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index eb96bea9e3ce..0fc2b81f4712 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2581,7 +2581,7 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport) if (hdev->num_msi < hdev->num_nic_msi + hdev->num_roce_msi) return -EINVAL; - roce->rinfo.base_vector = hdev->roce_base_vector; + roce->rinfo.base_vector = hdev->num_nic_msi; roce->rinfo.netdev = nic->kinfo.netdev; roce->rinfo.roce_io_base = hdev->hw.io_base; @@ -2617,10 +2617,6 @@ static int hclge_init_msi(struct hclge_dev *hdev) hdev->num_msi = vectors; hdev->num_msi_left = vectors; - hdev->base_msi_vector = pdev->irq; - hdev->roce_base_vector = hdev->base_msi_vector + - hdev->num_nic_msi; - hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi, sizeof(u16), GFP_KERNEL); if (!hdev->vector_status) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 9e1eede599ec..21013776de55 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -904,12 +904,10 @@ struct hclge_dev { u16 num_msi; u16 num_msi_left; u16 num_msi_used; - u32 base_msi_vector; u16 *vector_status; int *vector_irq; u16 num_nic_msi; /* Num of nic vectors for this PF */ u16 num_roce_msi; /* Num of roce vectors for this PF */ - int roce_base_vector; unsigned long service_timer_period; unsigned long service_timer_previous; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 645b2c0011e6..98332dad804d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2557,7 +2557,7 @@ static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev) hdev->num_msi_left == 0) return -EINVAL; - roce->rinfo.base_vector = hdev->roce_base_vector; + roce->rinfo.base_vector = hdev->roce_base_msix_offset; roce->rinfo.netdev = nic->kinfo.netdev; roce->rinfo.roce_io_base = hdev->hw.io_base; @@ -2823,9 +2823,6 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev) hdev->num_msi = vectors; hdev->num_msi_left = vectors; - hdev->base_msi_vector = pdev->irq; - hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset; - hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi, sizeof(u16), GFP_KERNEL); if (!hdev->vector_status) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 28288d7e3303..4bd922b47501 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -308,8 +308,6 @@ struct hclgevf_dev { u16 num_nic_msix; /* Num of nic vectors for this VF */ u16 num_roce_msix; /* Num of roce vectors for this VF */ u16 roce_base_msix_offset; - int roce_base_vector; - u32 base_msi_vector; u16 *vector_status; int *vector_irq; From 0b653a81a26d66ffe526a54c2177e24fb1400301 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 10 Nov 2021 21:42:51 +0800 Subject: [PATCH 397/433] net: hns3: fix pfc packet number incorrect after querying pfc parameters Currently, driver will send command to firmware to query pfc packet number when user uses dcb tool to get pfc parameters. However, the periodic service task will also periodically query and record MAC statistics, including pfc packet number. As the hardware registers of statistics is cleared after reading, it will cause pfc packet number of MAC statistics are not correct after using dcb tool to get pfc parameters. To fix this problem, when user uses dcb tool to get pfc parameters, driver updates MAC statistics firstly and then get pfc packet number from MAC statistics. Fixes: 64fd2300fcc1 ("net: hns3: add support for querying pfc puase packets statistic") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_dcb.c | 20 +++--- .../hisilicon/hns3/hns3pf/hclge_main.c | 4 +- .../hisilicon/hns3/hns3pf/hclge_main.h | 4 ++ .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 70 +++++++++---------- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 4 +- 5 files changed, 50 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 91cb578f56b8..90013c131e94 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -286,28 +286,24 @@ err_out: static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) { - u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC]; struct hclge_vport *vport = hclge_get_vport(h); struct hclge_dev *hdev = vport->back; int ret; - u8 i; memset(pfc, 0, sizeof(*pfc)); pfc->pfc_cap = hdev->pfc_max; pfc->pfc_en = hdev->tm_info.pfc_en; - ret = hclge_pfc_tx_stats_get(hdev, requests); - if (ret) + ret = hclge_mac_update_stats(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to update MAC stats, ret = %d.\n", ret); return ret; - - ret = hclge_pfc_rx_stats_get(hdev, indications); - if (ret) - return ret; - - for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - pfc->requests[i] = requests[i]; - pfc->indications[i] = indications[i]; } + + hclge_pfc_tx_stats_get(hdev, pfc->requests); + hclge_pfc_rx_stats_get(hdev, pfc->indications); + return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 0fc2b81f4712..21aec4e470cf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -26,8 +26,6 @@ #include "hclge_devlink.h" #define HCLGE_NAME "hclge" -#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset))) -#define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f)) #define HCLGE_BUF_SIZE_UNIT 256U #define HCLGE_BUF_MUL_BY 2 @@ -587,7 +585,7 @@ static int hclge_mac_query_reg_num(struct hclge_dev *hdev, u32 *reg_num) return 0; } -static int hclge_mac_update_stats(struct hclge_dev *hdev) +int hclge_mac_update_stats(struct hclge_dev *hdev) { /* The firmware supports the new statistics acquisition method */ if (hdev->ae_dev->dev_specs.mac_stats_num) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 21013776de55..3c95c957d1e3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -852,6 +852,9 @@ struct hclge_vf_vlan_cfg { (y) = (_k_ ^ ~_v_) & (_k_); \ } while (0) +#define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f)) +#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset))) + #define HCLGE_MAC_TNL_LOG_SIZE 8 #define HCLGE_VPORT_NUM 256 struct hclge_dev { @@ -1166,4 +1169,5 @@ void hclge_inform_vf_promisc_info(struct hclge_vport *vport); int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len); int hclge_push_vf_link_status(struct hclge_vport *vport); int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en); +int hclge_mac_update_stats(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 95074e91a846..a50e2edbf4a0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -113,50 +113,50 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, return 0; } -static int hclge_pfc_stats_get(struct hclge_dev *hdev, - enum hclge_opcode_type opcode, u64 *stats) +static const u16 hclge_pfc_tx_stats_offset[] = { + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri0_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri1_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri2_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri3_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri4_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri5_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri6_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri7_pkt_num) +}; + +static const u16 hclge_pfc_rx_stats_offset[] = { + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri0_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri1_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri2_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri3_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri4_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri5_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri6_pkt_num), + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri7_pkt_num) +}; + +static void hclge_pfc_stats_get(struct hclge_dev *hdev, bool tx, u64 *stats) { - struct hclge_desc desc[HCLGE_TM_PFC_PKT_GET_CMD_NUM]; - int ret, i, j; + const u16 *offset; + int i; - if (!(opcode == HCLGE_OPC_QUERY_PFC_RX_PKT_CNT || - opcode == HCLGE_OPC_QUERY_PFC_TX_PKT_CNT)) - return -EINVAL; + if (tx) + offset = hclge_pfc_tx_stats_offset; + else + offset = hclge_pfc_rx_stats_offset; - for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1; i++) { - hclge_cmd_setup_basic_desc(&desc[i], opcode, true); - desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - } - - hclge_cmd_setup_basic_desc(&desc[i], opcode, true); - - ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM); - if (ret) - return ret; - - for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) { - struct hclge_pfc_stats_cmd *pfc_stats = - (struct hclge_pfc_stats_cmd *)desc[i].data; - - for (j = 0; j < HCLGE_TM_PFC_NUM_GET_PER_CMD; j++) { - u32 index = i * HCLGE_TM_PFC_PKT_GET_CMD_NUM + j; - - if (index < HCLGE_MAX_TC_NUM) - stats[index] = - le64_to_cpu(pfc_stats->pkt_num[j]); - } - } - return 0; + for (i = 0; i < HCLGE_MAX_TC_NUM; i++) + stats[i] = HCLGE_STATS_READ(&hdev->mac_stats, offset[i]); } -int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats) +void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats) { - return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_RX_PKT_CNT, stats); + hclge_pfc_stats_get(hdev, false, stats); } -int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats) +void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats) { - return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_TX_PKT_CNT, stats); + hclge_pfc_stats_get(hdev, true, stats); } int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 2ee9b795f71d..1db7f40b4525 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -228,8 +228,8 @@ int hclge_tm_dwrr_cfg(struct hclge_dev *hdev); int hclge_tm_init_hw(struct hclge_dev *hdev, bool init); int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx); int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr); -int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats); -int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats); +void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats); +void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats); int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate); int hclge_tm_get_qset_num(struct hclge_dev *hdev, u16 *qset_num); int hclge_tm_get_pri_num(struct hclge_dev *hdev, u8 *pri_num); From 3b6db4a0492beed36545a2bc6075117faecebfe2 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Wed, 10 Nov 2021 21:42:52 +0800 Subject: [PATCH 398/433] net: hns3: sync rx ring head in echo common pull When the driver processes rx packets, the head pointer is updated only after the number of received packets reaches 16. However, hardware relies on the head pointer to calculate the number of FBDs. As a result, the hardware calculates the FBD incorrectly. Therefore, the driver proactively updates the head pointer in each common poll to ensure that the number of FBDs calculated by the hardware is correct. Fixes: 68752b24f51a ("net: hns3: schedule the polling again when allocation fails") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 7 ++++ .../hisilicon/hns3/hns3pf/hclge_cmd.c | 1 + .../hisilicon/hns3/hns3pf/hclge_cmd.h | 1 + .../hisilicon/hns3/hns3vf/hclgevf_cmd.c | 32 +++++++++++++++++++ .../hisilicon/hns3/hns3vf/hclgevf_cmd.h | 9 ++++++ 5 files changed, 50 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a2b993d62822..9ccebbaa0d69 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -4210,6 +4210,13 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget, } out: + /* sync head pointer before exiting, since hardware will calculate + * FBD number with head pointer + */ + if (unused_count > 0) + failure = failure || + hns3_nic_alloc_rx_buffers(ring, unused_count); + return failure ? budget : recv_pkts; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index c327df9dbac4..c5d5466810bb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -483,6 +483,7 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev, bool en) if (hnae3_dev_phy_imp_supported(hdev)) hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1); hnae3_set_bit(compat, HCLGE_MAC_STATS_EXT_EN_B, 1); + hnae3_set_bit(compat, HCLGE_SYNC_RX_RING_HEAD_EN_B, 1); req->compat = cpu_to_le32(compat); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index c38b57fc6c6a..d24e59028798 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -1151,6 +1151,7 @@ struct hclge_query_ppu_pf_other_int_dfx_cmd { #define HCLGE_NCSI_ERROR_REPORT_EN_B 1 #define HCLGE_PHY_IMP_EN_B 2 #define HCLGE_MAC_STATS_EXT_EN_B 3 +#define HCLGE_SYNC_RX_RING_HEAD_EN_B 4 struct hclge_firmware_compat_cmd { __le32 compat; u8 rsv[20]; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index f89bfb352adf..e605c2c5bcce 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -434,8 +434,28 @@ err_csq: return ret; } +static int hclgevf_firmware_compat_config(struct hclgevf_dev *hdev, bool en) +{ + struct hclgevf_firmware_compat_cmd *req; + struct hclgevf_desc desc; + u32 compat = 0; + + hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_IMP_COMPAT_CFG, false); + + if (en) { + req = (struct hclgevf_firmware_compat_cmd *)desc.data; + + hnae3_set_bit(compat, HCLGEVF_SYNC_RX_RING_HEAD_EN_B, 1); + + req->compat = cpu_to_le32(compat); + } + + return hclgevf_cmd_send(&hdev->hw, &desc, 1); +} + int hclgevf_cmd_init(struct hclgevf_dev *hdev) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); int ret; spin_lock_bh(&hdev->hw.cmq.csq.lock); @@ -484,6 +504,17 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev) hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK, HNAE3_FW_VERSION_BYTE0_SHIFT)); + if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) { + /* ask the firmware to enable some features, driver can work + * without it. + */ + ret = hclgevf_firmware_compat_config(hdev, true); + if (ret) + dev_warn(&hdev->pdev->dev, + "Firmware compatible features not enabled(%d).\n", + ret); + } + return 0; err_cmd_init: @@ -508,6 +539,7 @@ static void hclgevf_cmd_uninit_regs(struct hclgevf_hw *hw) void hclgevf_cmd_uninit(struct hclgevf_dev *hdev) { + hclgevf_firmware_compat_config(hdev, false); set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); /* wait to ensure that the firmware completes the possible left * over commands. diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index 39d0b589c720..edc9e154061a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -15,6 +15,12 @@ struct hclgevf_hw; struct hclgevf_dev; +#define HCLGEVF_SYNC_RX_RING_HEAD_EN_B 4 +struct hclgevf_firmware_compat_cmd { + __le32 compat; + u8 rsv[20]; +}; + struct hclgevf_desc { __le16 opcode; __le16 flag; @@ -107,6 +113,9 @@ enum hclgevf_opcode_type { HCLGEVF_OPC_RSS_TC_MODE = 0x0D08, /* Mailbox cmd */ HCLGEVF_OPC_MBX_VF_TO_PF = 0x2001, + + /* IMP stats command */ + HCLGEVF_OPC_IMP_COMPAT_CFG = 0x701A, }; #define HCLGEVF_TQP_REG_OFFSET 0x80000 From e140c7983e3054be0652bf914f4454f16c5520b0 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Wed, 10 Nov 2021 21:42:53 +0800 Subject: [PATCH 399/433] net: hns3: fix kernel crash when unload VF while it is being reset When fully configure VLANs for a VF, then unload the VF while triggering a reset to PF, will cause a kernel crash because the irq is already uninit. [ 293.177579] ------------[ cut here ]------------ [ 293.183502] kernel BUG at drivers/pci/msi.c:352! [ 293.189547] Internal error: Oops - BUG: 0 [#1] SMP ...... [ 293.390124] Workqueue: hclgevf hclgevf_service_task [hclgevf] [ 293.402627] pstate: 80c00009 (Nzcv daif +PAN +UAO) [ 293.414324] pc : free_msi_irqs+0x19c/0x1b8 [ 293.425429] lr : free_msi_irqs+0x18c/0x1b8 [ 293.436545] sp : ffff00002716fbb0 [ 293.446950] x29: ffff00002716fbb0 x28: 0000000000000000 [ 293.459519] x27: 0000000000000000 x26: ffff45b91ea16b00 [ 293.472183] x25: 0000000000000000 x24: ffffa587b08f4700 [ 293.484717] x23: ffffc591ac30e000 x22: ffffa587b08f8428 [ 293.497190] x21: ffffc591ac30e300 x20: 0000000000000000 [ 293.509594] x19: ffffa58a062a8300 x18: 0000000000000000 [ 293.521949] x17: 0000000000000000 x16: ffff45b91dcc3f48 [ 293.534013] x15: 0000000000000000 x14: 0000000000000000 [ 293.545883] x13: 0000000000000040 x12: 0000000000000228 [ 293.557508] x11: 0000000000000020 x10: 0000000000000040 [ 293.568889] x9 : ffff45b91ea1e190 x8 : ffffc591802d0000 [ 293.580123] x7 : ffffc591802d0148 x6 : 0000000000000120 [ 293.591190] x5 : ffffc591802d0000 x4 : 0000000000000000 [ 293.602015] x3 : 0000000000000000 x2 : 0000000000000000 [ 293.612624] x1 : 00000000000004a4 x0 : ffffa58a1e0c6b80 [ 293.623028] Call trace: [ 293.630340] free_msi_irqs+0x19c/0x1b8 [ 293.638849] pci_disable_msix+0x118/0x140 [ 293.647452] pci_free_irq_vectors+0x20/0x38 [ 293.656081] hclgevf_uninit_msi+0x44/0x58 [hclgevf] [ 293.665309] hclgevf_reset_rebuild+0x1ac/0x2e0 [hclgevf] [ 293.674866] hclgevf_reset+0x358/0x400 [hclgevf] [ 293.683545] hclgevf_reset_service_task+0xd0/0x1b0 [hclgevf] [ 293.693325] hclgevf_service_task+0x4c/0x2e8 [hclgevf] [ 293.702307] process_one_work+0x1b0/0x448 [ 293.710034] worker_thread+0x54/0x468 [ 293.717331] kthread+0x134/0x138 [ 293.724114] ret_from_fork+0x10/0x18 [ 293.731324] Code: f940b000 b4ffff00 a903e7b8 f90017b6 (d4210000) This patch fixes the problem by waiting for the VF reset done while unloading the VF. Fixes: e2cb1dec9779 ("net: hns3: Add HNS3 VF HCL(Hardware Compatibility Layer) Support") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 5 +++++ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 98332dad804d..25c419d40066 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3010,7 +3010,10 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, /* un-init roce, if it exists */ if (hdev->roce_client) { + while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGEVF_WAIT_RESET_DONE); clear_bit(HCLGEVF_STATE_ROCE_REGISTERED, &hdev->state); + hdev->roce_client->ops->uninit_instance(&hdev->roce, 0); hdev->roce_client = NULL; hdev->roce.client = NULL; @@ -3019,6 +3022,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, /* un-init nic/unic, if this was not called by roce client */ if (client->ops->uninit_instance && hdev->nic_client && client->type != HNAE3_CLIENT_ROCE) { + while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGEVF_WAIT_RESET_DONE); clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state); client->ops->uninit_instance(&hdev->nic, 0); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 4bd922b47501..f6f736c0091c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -109,6 +109,8 @@ #define HCLGEVF_VF_RST_ING 0x07008 #define HCLGEVF_VF_RST_ING_BIT BIT(16) +#define HCLGEVF_WAIT_RESET_DONE 100 + #define HCLGEVF_RSS_IND_TBL_SIZE 512 #define HCLGEVF_RSS_SET_BITMAP_MSK 0xffff #define HCLGEVF_RSS_KEY_SIZE 40 From 1122eac19476c5ccf200009d4e4dc9b11458019c Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 10 Nov 2021 21:42:54 +0800 Subject: [PATCH 400/433] net: hns3: fix some mac statistics is always 0 in device version V2 When driver queries the register number of mac statistics from firmware, the old firmware runs in device version V2 only returns number of valid registers, not include number of three reserved registers among of them. It cause driver doesn't record the last three data when query mac statistics. To fix this problem, driver never query register number in device version V2 and set it to a fixed value which include three reserved registers. Fixes: c8af2887c941 ("net: hns3: add support pause/pfc durations for mac statistics") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 ++++++++++ .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 21aec4e470cf..de9cadf9b9f3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -566,6 +566,16 @@ static int hclge_mac_query_reg_num(struct hclge_dev *hdev, u32 *reg_num) struct hclge_desc desc; int ret; + /* Driver needs total register number of both valid registers and + * reserved registers, but the old firmware only returns number + * of valid registers in device V2. To be compatible with these + * devices, driver uses a fixed value. + */ + if (hdev->ae_dev->dev_version == HNAE3_DEVICE_VERSION_V2) { + *reg_num = HCLGE_MAC_STATS_MAX_NUM_V1; + return 0; + } + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_REG_NUM, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 3c95c957d1e3..ebba603483a0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -404,7 +404,7 @@ struct hclge_tm_info { }; /* max number of mac statistics on each version */ -#define HCLGE_MAC_STATS_MAX_NUM_V1 84 +#define HCLGE_MAC_STATS_MAX_NUM_V1 87 #define HCLGE_MAC_STATS_MAX_NUM_V2 105 struct hclge_comm_stats_str { From 91fcc79bff406e30c35dcde9fd8568f2b8712e03 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 10 Nov 2021 21:42:55 +0800 Subject: [PATCH 401/433] net: hns3: remove check VF uc mac exist when set by PF If users set unicast mac address for VFs by PF, they need to guarantee all VFs' address is different. This patch removes the check mac address exist of VFs, for usrs can refresh mac addresses of all VFs directly without need to modify the exist mac address to other value firstly. Fixes: 8e6de441b8e6 ("net: hns3: add support for configuring VF MAC from the host") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 36 ------------------- 1 file changed, 36 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index de9cadf9b9f3..c2a58101144e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9418,36 +9418,6 @@ static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev, return return_status; } -static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx, - u8 *mac_addr) -{ - struct hclge_mac_vlan_tbl_entry_cmd req; - struct hclge_dev *hdev = vport->back; - struct hclge_desc desc; - u16 egress_port = 0; - int i; - - if (is_zero_ether_addr(mac_addr)) - return false; - - memset(&req, 0, sizeof(req)); - hnae3_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M, - HCLGE_MAC_EPORT_VFID_S, vport->vport_id); - req.egress_port = cpu_to_le16(egress_port); - hclge_prepare_mac_addr(&req, mac_addr, false); - - if (hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false) != -ENOENT) - return true; - - vf_idx += HCLGE_VF_VPORT_START_NUM; - for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) - if (i != vf_idx && - ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac)) - return true; - - return false; -} - static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, u8 *mac_addr) { @@ -9465,12 +9435,6 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, return 0; } - if (hclge_check_vf_mac_exist(vport, vf, mac_addr)) { - dev_err(&hdev->pdev->dev, "Specified MAC(=%pM) exists!\n", - mac_addr); - return -EEXIST; - } - ether_addr_copy(vport->vf_info.mac, mac_addr); if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) { From 688db0c7a4a69ddc8b8143a1cac01eb20082a3aa Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 10 Nov 2021 21:42:56 +0800 Subject: [PATCH 402/433] net: hns3: allow configure ETS bandwidth of all TCs Currently, driver only allow configuring ETS bandwidth of TCs according to the max TC number queried from firmware. However, the hardware actually supports 8 TCs and users may need to configure ETS bandwidth of all TCs, so remove the restriction. Fixes: 330baff5423b ("net: hns3: add ETS TC weight setting in SSU module") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 90013c131e94..375ebf105a9a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -129,7 +129,7 @@ static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev, u32 total_ets_bw = 0; u8 i; - for (i = 0; i < hdev->tc_max; i++) { + for (i = 0; i < HNAE3_MAX_TC; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: if (hdev->tm_info.tc_info[i].tc_sch_mode != diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index a50e2edbf4a0..429652a8cde1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1123,7 +1123,6 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev) { -#define DEFAULT_TC_WEIGHT 1 #define DEFAULT_TC_OFFSET 14 struct hclge_ets_tc_weight_cmd *ets_weight; @@ -1136,13 +1135,7 @@ static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev) for (i = 0; i < HNAE3_MAX_TC; i++) { struct hclge_pg_info *pg_info; - ets_weight->tc_weight[i] = DEFAULT_TC_WEIGHT; - - if (!(hdev->hw_tc_map & BIT(i))) - continue; - - pg_info = - &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid]; + pg_info = &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid]; ets_weight->tc_weight[i] = pg_info->tc_dwrr[i]; } From e7e4785fa30f9b5d1b60ed2d8e221891325dfc5f Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Fri, 5 Nov 2021 16:55:29 +0100 Subject: [PATCH 403/433] selftests: net: test_vxlan_under_vrf: fix HV connectivity test It looks like test_vxlan_under_vrf.sh is always failing to verify the connectivity test during the ping between the two simulated VMs. This is due to the fact that veth-hv in each VM should have a distinct MAC address. Fix by setting a unique MAC address on each simulated VM interface. Without this fix: $ sudo ./tools/testing/selftests/net/test_vxlan_under_vrf.sh Checking HV connectivity [ OK ] Check VM connectivity through VXLAN (underlay in the default VRF) [FAIL] With this fix applied: $ sudo ./tools/testing/selftests/net/test_vxlan_under_vrf.sh Checking HV connectivity [ OK ] Check VM connectivity through VXLAN (underlay in the default VRF) [ OK ] Check VM connectivity through VXLAN (underlay in a VRF) [FAIL] NOTE: the connectivity test with the underlay VRF is still failing; it seems that ARP requests are blocked at the simulated hypervisor level, probably due to some missing ARP forwarding rules. This requires more investigation (in the meantime we may consider to set that test as expected failure - XFAIL). Signed-off-by: Andrea Righi Signed-off-by: David S. Miller --- tools/testing/selftests/net/test_vxlan_under_vrf.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh index 534c8b7699ab..ea5a7a808f12 100755 --- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -101,6 +101,8 @@ setup-vm() { ip -netns hv-$id link set veth-tap master br0 ip -netns hv-$id link set veth-tap up + ip link set veth-hv address 02:1d:8d:dd:0c:6$id + ip link set veth-hv netns vm-$id ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv ip -netns vm-$id link set veth-hv up From f64ab8e4f368f48afb08ae91928e103d17b235e9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 8 Nov 2021 22:28:54 +0200 Subject: [PATCH 404/433] net: stmmac: allow a tc-taprio base-time of zero Commit fe28c53ed71d ("net: stmmac: fix taprio configuration when base_time is in the past") allowed some base time values in the past, but apparently not all, the base-time value of 0 (Jan 1st 1970) is still explicitly denied by the driver. Remove the bogus check. Fixes: b60189e0392f ("net: stmmac: Integrate EST with TAPRIO scheduler API") Signed-off-by: Vladimir Oltean Reviewed-by: Kurt Kanzenbach Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 8160087ee92f..1c4ea0b1b845 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -786,8 +786,6 @@ static int tc_setup_taprio(struct stmmac_priv *priv, goto disable; if (qopt->num_entries >= dep) return -EINVAL; - if (!qopt->base_time) - return -ERANGE; if (!qopt->cycle_time) return -ERANGE; From 7a166854b4e24c57d56b3eba9fe1594985ee0a2c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 8 Nov 2021 22:28:55 +0100 Subject: [PATCH 405/433] net: ethernet: ti: cpsw_ale: Fix access to un-initialized memory It is spurious to allocate a bitmap without initializing it. So, better safe than sorry, initialize it to 0 at least to have some known values. While at it, switch to the devm_bitmap_ API which is less verbose. Fixes: 4b41d3436796 ("net: ethernet: ti: cpsw: allow untagged traffic on host port") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw_ale.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 0c75e0576ee1..1ef0aaef5c61 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -1299,10 +1299,8 @@ struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params) if (!ale) return ERR_PTR(-ENOMEM); - ale->p0_untag_vid_mask = - devm_kmalloc_array(params->dev, BITS_TO_LONGS(VLAN_N_VID), - sizeof(unsigned long), - GFP_KERNEL); + ale->p0_untag_vid_mask = devm_bitmap_zalloc(params->dev, VLAN_N_VID, + GFP_KERNEL); if (!ale->p0_untag_vid_mask) return ERR_PTR(-ENOMEM); From bb7bbb6e36474933540c24ae1f1ad651b843981f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Mon, 8 Nov 2021 22:49:18 +0100 Subject: [PATCH 406/433] net: marvell: mvpp2: Fix wrong SerDes reconfiguration order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bfe301ebbc94 ("net: mvpp2: convert to use mac_prepare()/mac_finish()") introduced a bug wherein it leaves the MAC RESET register asserted after mac_finish(), due to wrong order of function calls. Before it was: .mac_config() mvpp22_mode_reconfigure() assert reset mvpp2_xlg_config() deassert reset Now it is: .mac_prepare() .mac_config() mvpp2_xlg_config() deassert reset .mac_finish() mvpp2_xlg_config() assert reset Obviously this is wrong. This bug is triggered when phylink tries to change the PHY interface mode from a GMAC mode (sgmii, 1000base-x, 2500base-x) to XLG mode (10gbase-r, xaui). The XLG mode does not work since reset is left asserted. Only after ifconfig down && ifconfig up is called will the XLG mode work. Move the call to mvpp22_mode_reconfigure() to .mac_prepare() implementation. Since some of the subsequent functions need to know whether the interface is being changed, we unfortunately also need to pass around the new interface mode before setting port->phy_interface. Fixes: bfe301ebbc94 ("net: mvpp2: convert to use mac_prepare()/mac_finish()") Signed-off-by: Marek Behún Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 587def69a6f7..2b18d89d9756 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1605,7 +1605,7 @@ static void mvpp22_gop_fca_set_periodic_timer(struct mvpp2_port *port) mvpp22_gop_fca_enable_periodic(port, true); } -static int mvpp22_gop_init(struct mvpp2_port *port) +static int mvpp22_gop_init(struct mvpp2_port *port, phy_interface_t interface) { struct mvpp2 *priv = port->priv; u32 val; @@ -1613,7 +1613,7 @@ static int mvpp22_gop_init(struct mvpp2_port *port) if (!priv->sysctrl_base) return 0; - switch (port->phy_interface) { + switch (interface) { case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: @@ -1743,15 +1743,15 @@ static void mvpp22_gop_setup_irq(struct mvpp2_port *port) * lanes by the physical layer. This is why configurations like * "PPv2 (2500BaseX) - COMPHY (2500SGMII)" are valid. */ -static int mvpp22_comphy_init(struct mvpp2_port *port) +static int mvpp22_comphy_init(struct mvpp2_port *port, + phy_interface_t interface) { int ret; if (!port->comphy) return 0; - ret = phy_set_mode_ext(port->comphy, PHY_MODE_ETHERNET, - port->phy_interface); + ret = phy_set_mode_ext(port->comphy, PHY_MODE_ETHERNET, interface); if (ret) return ret; @@ -2172,7 +2172,8 @@ static void mvpp22_pcs_reset_assert(struct mvpp2_port *port) writel(val & ~MVPP22_XPCS_CFG0_RESET_DIS, xpcs + MVPP22_XPCS_CFG0); } -static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port) +static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port, + phy_interface_t interface) { struct mvpp2 *priv = port->priv; void __iomem *mpcs, *xpcs; @@ -2184,7 +2185,7 @@ static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port) mpcs = priv->iface_base + MVPP22_MPCS_BASE(port->gop_id); xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id); - switch (port->phy_interface) { + switch (interface) { case PHY_INTERFACE_MODE_10GBASER: val = readl(mpcs + MVPP22_MPCS_CLK_RESET); val |= MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX | @@ -4529,7 +4530,8 @@ static int mvpp2_poll(struct napi_struct *napi, int budget) return rx_done; } -static void mvpp22_mode_reconfigure(struct mvpp2_port *port) +static void mvpp22_mode_reconfigure(struct mvpp2_port *port, + phy_interface_t interface) { u32 ctrl3; @@ -4540,18 +4542,18 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port) mvpp22_pcs_reset_assert(port); /* comphy reconfiguration */ - mvpp22_comphy_init(port); + mvpp22_comphy_init(port, interface); /* gop reconfiguration */ - mvpp22_gop_init(port); + mvpp22_gop_init(port, interface); - mvpp22_pcs_reset_deassert(port); + mvpp22_pcs_reset_deassert(port, interface); if (mvpp2_port_supports_xlg(port)) { ctrl3 = readl(port->base + MVPP22_XLG_CTRL3_REG); ctrl3 &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK; - if (mvpp2_is_xlg(port->phy_interface)) + if (mvpp2_is_xlg(interface)) ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_10G; else ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC; @@ -4559,7 +4561,7 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port) writel(ctrl3, port->base + MVPP22_XLG_CTRL3_REG); } - if (mvpp2_port_supports_xlg(port) && mvpp2_is_xlg(port->phy_interface)) + if (mvpp2_port_supports_xlg(port) && mvpp2_is_xlg(interface)) mvpp2_xlg_max_rx_size_set(port); else mvpp2_gmac_max_rx_size_set(port); @@ -4579,7 +4581,7 @@ static void mvpp2_start_dev(struct mvpp2_port *port) mvpp2_interrupts_enable(port); if (port->priv->hw_version >= MVPP22) - mvpp22_mode_reconfigure(port); + mvpp22_mode_reconfigure(port, port->phy_interface); if (port->phylink) { phylink_start(port->phylink); @@ -6444,6 +6446,9 @@ static int mvpp2__mac_prepare(struct phylink_config *config, unsigned int mode, mvpp22_gop_mask_irq(port); phy_power_off(port->comphy); + + /* Reconfigure the serdes lanes */ + mvpp22_mode_reconfigure(port, interface); } } @@ -6498,9 +6503,6 @@ static int mvpp2_mac_finish(struct phylink_config *config, unsigned int mode, port->phy_interface != interface) { port->phy_interface = interface; - /* Reconfigure the serdes lanes */ - mvpp22_mode_reconfigure(port); - /* Unmask interrupts */ mvpp22_gop_unmask_irq(port); } @@ -6961,7 +6963,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, * driver does this, we can remove this code. */ if (port->comphy) { - err = mvpp22_comphy_init(port); + err = mvpp22_comphy_init(port, port->phy_interface); if (err == 0) phy_power_off(port->comphy); } From c7cd82b90599fa10915f41e3dd9098a77d0aa7b6 Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Tue, 9 Nov 2021 00:15:02 +0000 Subject: [PATCH 407/433] vsock: prevent unnecessary refcnt inc for nonblocking connect Currently vosck_connect() increments sock refcount for nonblocking socket each time it's called, which can lead to memory leak if it's called multiple times because connect timeout function decrements sock refcount only once. Fixes it by making vsock_connect() return -EALREADY immediately when sock state is already SS_CONNECTING. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Reviewed-by: Stefano Garzarella Signed-off-by: Eiichi Tsukata Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7d851eb3a683..ed0df839c38c 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1322,6 +1322,8 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, * non-blocking call. */ err = -EALREADY; + if (flags & O_NONBLOCK) + goto out; break; default: if ((sk->sk_state == TCP_LISTEN) || From af0a51113cb7445435488e5f9514598f2b52d7a7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 9 Nov 2021 17:17:34 +0100 Subject: [PATCH 408/433] selftests: forwarding: Fix packet matching in mirroring selftests In commit 6de6e46d27ef ("cls_flower: Fix inability to match GRE/IPIP packets"), cls_flower was fixed to match an outer packet of a tunneled packet as would be expected, rather than dissecting to the inner packet and matching on that. This fix uncovered several issues in packet matching in mirroring selftests: - in mirror_gre_bridge_1d_vlan.sh and mirror_gre_vlan_bridge_1q.sh, the vlan_ethtype match is copied around as "ip", even as some of the tests are running over ip6gretap. This is fixed by using an "ipv6" for vlan_ethtype in the ip6gretap tests. - in mirror_gre_changes.sh, a filter to count GRE packets is set up to match TTL of 50. This used to trigger in the offloaded datapath, where the envelope TTL was matched, but not in the software datapath, which considered TTL of the inner packet. Now that both match consistently, all the packets were double-counted. This is fixed by marking the filter as skip_hw, leaving only the SW datapath component active. Fixes: 6de6e46d27ef ("cls_flower: Fix inability to match GRE/IPIP packets") Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../net/forwarding/mirror_gre_bridge_1d_vlan.sh | 2 +- .../selftests/net/forwarding/mirror_gre_changes.sh | 2 +- .../net/forwarding/mirror_gre_vlan_bridge_1q.sh | 13 +++++++------ .../testing/selftests/net/forwarding/mirror_lib.sh | 3 ++- .../testing/selftests/net/forwarding/mirror_vlan.sh | 4 ++-- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index f8cda822c1ce..1b27f2b0f196 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -80,7 +80,7 @@ test_gretap() test_ip6gretap() { - test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \ + test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \ "mirror to ip6gretap" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index 472bd023e2a5..aff88f78e339 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -74,7 +74,7 @@ test_span_gre_ttl() mirror_install $swp1 ingress $tundev "matchall $tcflags" tc filter add dev $h3 ingress pref 77 prot $prot \ - flower ip_ttl 50 action pass + flower skip_hw ip_ttl 50 action pass mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index 880e3ab9d088..c8a9b5bd841f 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -141,7 +141,7 @@ test_gretap() test_ip6gretap() { - test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \ + test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \ "mirror to ip6gretap" } @@ -218,6 +218,7 @@ test_ip6gretap_forbidden_egress() test_span_gre_untagged_egress() { local tundev=$1; shift + local ul_proto=$1; shift local what=$1; shift RET=0 @@ -225,7 +226,7 @@ test_span_gre_untagged_egress() mirror_install $swp1 ingress $tundev "matchall $tcflags" quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress + quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" h3_addr_add_del del $h3.555 bridge vlan add dev $swp3 vid 555 pvid untagged @@ -233,7 +234,7 @@ test_span_gre_untagged_egress() sleep 5 quick_test_span_gre_dir $tundev ingress - fail_test_span_vlan_dir $h3 555 ingress + fail_test_span_vlan_dir $h3 555 ingress "$ul_proto" h3_addr_add_del del $h3 bridge vlan add dev $swp3 vid 555 @@ -241,7 +242,7 @@ test_span_gre_untagged_egress() sleep 5 quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress + quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" mirror_uninstall $swp1 ingress @@ -250,12 +251,12 @@ test_span_gre_untagged_egress() test_gretap_untagged_egress() { - test_span_gre_untagged_egress gt4 "mirror to gretap" + test_span_gre_untagged_egress gt4 ip "mirror to gretap" } test_ip6gretap_untagged_egress() { - test_span_gre_untagged_egress gt6 "mirror to ip6gretap" + test_span_gre_untagged_egress gt6 ipv6 "mirror to ip6gretap" } test_span_gre_fdb_roaming() diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 6406cd76a19d..3e8ebeff3019 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -115,13 +115,14 @@ do_test_span_vlan_dir_ips() local dev=$1; shift local vid=$1; shift local direction=$1; shift + local ul_proto=$1; shift local ip1=$1; shift local ip2=$1; shift # Install the capture as skip_hw to avoid double-counting of packets. # The traffic is meant for local box anyway, so will be trapped to # kernel. - vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip" + vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto" mirror_test v$h1 $ip1 $ip2 $dev 100 $expect mirror_test v$h2 $ip2 $ip1 $dev 100 $expect vlan_capture_uninstall $dev diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh index 9ab2ce77b332..0b44e148235e 100755 --- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -85,9 +85,9 @@ test_tagged_vlan_dir() RET=0 mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \ + do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \ 192.0.2.17 192.0.2.18 - do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \ + do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \ 192.0.2.17 192.0.2.18 mirror_uninstall $swp1 $direction From 68eabc348148ae051631e8dab13c3b1a85c82896 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Tue, 9 Nov 2021 23:23:54 +0100 Subject: [PATCH 409/433] net: ethernet: lantiq_etop: Fix compilation error This fixes the error detected when compiling the driver. Fixes: 14d4e308e0aa ("net: lantiq: configure the burst length in ethernet drivers") Reported-by: kernel test robot Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_etop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 2258e3f19161..6433c909c6b2 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -262,7 +262,7 @@ ltq_etop_hw_init(struct net_device *dev) /* enable crc generation */ ltq_etop_w32(PPE32_CGEN, LQ_PPE32_ENET_MAC_CFG); - ltq_dma_init_port(DMA_PORT_ETOP, priv->tx_burst_len, rx_burst_len); + ltq_dma_init_port(DMA_PORT_ETOP, priv->tx_burst_len, priv->rx_burst_len); for (i = 0; i < MAX_DMA_CHAN; i++) { int irq = LTQ_DMA_CH0_INT + i; From 721111b1b29c67fd18ac2f69b3a48c06ba996762 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Nov 2021 11:11:09 +0300 Subject: [PATCH 410/433] gve: fix unmatched u64_stats_update_end() The u64_stats_update_end() call is supposed to be inside the curly braces so it pairs with the u64_stats_update_begin(). Fixes: 37149e9374bf ("gve: Implement packet continuation for RX.") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index c8500babbd1d..3d04b5aff331 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -500,7 +500,8 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, rx->rx_copied_pkt++; rx->rx_frag_copy_cnt++; rx->rx_copybreak_pkt++; - } u64_stats_update_end(&rx->statss); + u64_stats_update_end(&rx->statss); + } } else { if (rx->data.raw_addressing) { int recycle = gve_rx_can_recycle_buffer(page_info); From c7ebe23cee350fb187ee00ff445b01e11de0bfe9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Nov 2021 11:07:06 +0300 Subject: [PATCH 411/433] net/mlx5: Lag, fix a potential Oops with mlx5_lag_create_definer() There is a minus character missing from ERR_PTR(ENOMEM) so if this allocation fails it will lead to an Oops in the caller. Fixes: dc48516ec7d3 ("net/mlx5: Lag, add support to create definers for LAG") Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index adc836b3d857..ad63dd45c8fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -289,7 +289,7 @@ mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, lag_definer = kzalloc(sizeof(*lag_definer), GFP_KERNEL); if (!lag_definer) - return ERR_PTR(ENOMEM); + return ERR_PTR(-ENOMEM); match_definer_mask = kvzalloc(MLX5_FLD_SZ_BYTES(match_definer, match_mask), From e5d5aadcf3cd59949316df49c27cb21788d7efe4 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Wed, 10 Nov 2021 15:02:34 +0800 Subject: [PATCH 412/433] net/smc: fix sk_refcnt underflow on linkdown and fallback We got the following WARNING when running ab/nginx test with RDMA link flapping (up-down-up). The reason is when smc_sock fallback and at linkdown happens simultaneously, we may got the following situation: __smc_lgr_terminate() --> smc_conn_kill() --> smc_close_active_abort() smc_sock->sk_state = SMC_CLOSED sock_put(smc_sock) smc_sock was set to SMC_CLOSED and sock_put() been called when terminate the link group. But later application call close() on the socket, then we got: __smc_release(): if (smc_sock->fallback) smc_sock->sk_state = SMC_CLOSED sock_put(smc_sock) Again we set the smc_sock to CLOSED through it's already in CLOSED state, and double put the refcnt, so the following warning happens: refcount_t: underflow; use-after-free. WARNING: CPU: 5 PID: 860 at lib/refcount.c:28 refcount_warn_saturate+0x8d/0xf0 Modules linked in: CPU: 5 PID: 860 Comm: nginx Not tainted 5.10.46+ #403 Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 8c24b4c 04/01/2014 RIP: 0010:refcount_warn_saturate+0x8d/0xf0 Code: 05 5c 1e b5 01 01 e8 52 25 bc ff 0f 0b c3 80 3d 4f 1e b5 01 00 75 ad 48 RSP: 0018:ffffc90000527e50 EFLAGS: 00010286 RAX: 0000000000000026 RBX: ffff8881300df2c0 RCX: 0000000000000027 RDX: 0000000000000000 RSI: ffff88813bd58040 RDI: ffff88813bd58048 RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000000000001 R10: ffff8881300df2c0 R11: ffffc90000527c78 R12: ffff8881300df340 R13: ffff8881300df930 R14: ffff88810b3dad80 R15: ffff8881300df4f8 FS: 00007f739de8fb80(0000) GS:ffff88813bd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000a01b008 CR3: 0000000111b64003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: smc_release+0x353/0x3f0 __sock_release+0x3d/0xb0 sock_close+0x11/0x20 __fput+0x93/0x230 task_work_run+0x65/0xa0 exit_to_user_mode_prepare+0xf9/0x100 syscall_exit_to_user_mode+0x27/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This patch adds check in __smc_release() to make sure we won't do an extra sock_put() and set the socket to CLOSED when its already in CLOSED state. Fixes: 51f1de79ad8e (net/smc: replace sock_put worker by socket refcounting) Signed-off-by: Dust Li Reviewed-by: Tony Lu Signed-off-by: Dust Li Acked-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0cf7ed2f5d41..59284da9116d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -149,14 +149,18 @@ static int __smc_release(struct smc_sock *smc) sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; } else { - if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) - sock_put(sk); /* passive closing */ - if (sk->sk_state == SMC_LISTEN) { - /* wake up clcsock accept */ - rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); + if (sk->sk_state != SMC_CLOSED) { + if (sk->sk_state != SMC_LISTEN && + sk->sk_state != SMC_INIT) + sock_put(sk); /* passive closing */ + if (sk->sk_state == SMC_LISTEN) { + /* wake up clcsock accept */ + rc = kernel_sock_shutdown(smc->clcsock, + SHUT_RDWR); + } + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); } - sk->sk_state = SMC_CLOSED; - sk->sk_state_change(sk); smc_restore_fallback_changes(smc); } From 51d157946666382e779f94c39891e8e9a020da78 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 8 Nov 2021 10:58:10 -0500 Subject: [PATCH 413/433] ring-buffer: Protect ring_buffer_reset() from reentrancy The resetting of the entire ring buffer use to simply go through and reset each individual CPU buffer that had its own protection and synchronization. But this was very slow, due to performing a synchronization for each CPU. The code was reshuffled to do one disabling of all CPU buffers, followed by a single RCU synchronization, and then the resetting of each of the CPU buffers. But unfortunately, the mutex that prevented multiple occurrences of resetting the buffer was not moved to the upper function, and there is nothing to protect from it. Take the ring buffer mutex around the global reset. Cc: stable@vger.kernel.org Fixes: b23d7a5f4a07a ("ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU") Reported-by: "Tzvetomir Stoyanov (VMware)" Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f6520d0a4c8c..2699e9e562b1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5228,6 +5228,9 @@ void ring_buffer_reset(struct trace_buffer *buffer) struct ring_buffer_per_cpu *cpu_buffer; int cpu; + /* prevent another thread from changing buffer sizes */ + mutex_lock(&buffer->mutex); + for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; @@ -5246,6 +5249,8 @@ void ring_buffer_reset(struct trace_buffer *buffer) atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->resize_disabled); } + + mutex_unlock(&buffer->mutex); } EXPORT_SYMBOL_GPL(ring_buffer_reset); From 2e6e9058d13a22a6fdd36a8c444ac71d9656003a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Nov 2021 12:42:17 +0100 Subject: [PATCH 414/433] ftrace/direct: Fix lockup in modify_ftrace_direct_multi We can't call unregister_ftrace_function under ftrace_lock. Link: https://lkml.kernel.org/r/20211109114217.1645296-1-jolsa@kernel.org Fixes: ed29271894aa ("ftrace/direct: Do not disable when switching direct callers") Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b4ed1a301232..fc49e8809a56 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5602,10 +5602,11 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) } } + mutex_unlock(&ftrace_lock); + /* Removing the tmp_ops will add the updated direct callers to the functions */ unregister_ftrace_function(&tmp_ops); - mutex_unlock(&ftrace_lock); out_direct: mutex_unlock(&direct_mutex); return err; From 0315a075f1343966ea2d9a085666a88a69ea6a3d Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 10 Nov 2021 20:56:05 +0100 Subject: [PATCH 415/433] net: fix premature exit from NAPI state polling in napi_disable() Commit 719c57197010 ("net: make napi_disable() symmetric with enable") accidentally introduced a bug sometimes leading to a kernel BUG when bringing an iface up/down under heavy traffic load. Prior to this commit, napi_disable() was polling n->state until none of (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC) is set and then always flip them. Now there's a possibility to get away with the NAPIF_STATE_SCHE unset as 'continue' drops us to the cmpxchg() call with an uninitialized variable, rather than straight to another round of the state check. Error path looks like: napi_disable(): unsigned long val, new; /* new is uninitialized */ do { val = READ_ONCE(n->state); /* NAPIF_STATE_NPSVC and/or NAPIF_STATE_SCHED is set */ if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) { /* true */ usleep_range(20, 200); continue; /* go straight to the condition check */ } new = val | <...> } while (cmpxchg(&n->state, val, new) != val); /* state == val, cmpxchg() writes garbage */ napi_enable(): do { val = READ_ONCE(n->state); BUG_ON(!test_bit(NAPI_STATE_SCHED, &val)); /* 50/50 boom */ <...> while the typical BUG splat is like: [ 172.652461] ------------[ cut here ]------------ [ 172.652462] kernel BUG at net/core/dev.c:6937! [ 172.656914] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 172.661966] CPU: 36 PID: 2829 Comm: xdp_redirect_cp Tainted: G I 5.15.0 #42 [ 172.670222] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0014.082620210524 08/26/2021 [ 172.680646] RIP: 0010:napi_enable+0x5a/0xd0 [ 172.684832] Code: 07 49 81 cc 00 01 00 00 4c 89 e2 48 89 d8 80 e6 fb f0 48 0f b1 55 10 48 39 c3 74 10 48 8b 5d 10 f6 c7 04 75 3d f6 c3 01 75 b4 <0f> 0b 5b 5d 41 5c c3 65 ff 05 b8 e5 61 53 48 c7 c6 c0 f3 34 ad 48 [ 172.703578] RSP: 0018:ffffa3c9497477a8 EFLAGS: 00010246 [ 172.708803] RAX: ffffa3c96615a014 RBX: 0000000000000000 RCX: ffff8a4b575301a0 < snip > [ 172.782403] Call Trace: [ 172.784857] [ 172.786963] ice_up_complete+0x6f/0x210 [ice] [ 172.791349] ice_xdp+0x136/0x320 [ice] [ 172.795108] ? ice_change_mtu+0x180/0x180 [ice] [ 172.799648] dev_xdp_install+0x61/0xe0 [ 172.803401] dev_xdp_attach+0x1e0/0x550 [ 172.807240] dev_change_xdp_fd+0x1e6/0x220 [ 172.811338] do_setlink+0xee8/0x1010 [ 172.814917] rtnl_setlink+0xe5/0x170 [ 172.818499] ? bpf_lsm_binder_set_context_mgr+0x10/0x10 [ 172.823732] ? security_capable+0x36/0x50 < snip > Fix this by replacing 'do { } while (cmpxchg())' with an "infinite" for-loop with an explicit break. From v1 [0]: - just use a for-loop to simplify both the fix and the existing code (Eric). [0] https://lore.kernel.org/netdev/20211110191126.1214-1-alexandr.lobakin@intel.com Fixes: 719c57197010 ("net: make napi_disable() symmetric with enable") Suggested-by: Eric Dumazet # for-loop Signed-off-by: Alexander Lobakin Reviewed-by: Jesse Brandeburg Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20211110195605.1304-1-alexandr.lobakin@intel.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index edeb811c454e..15ac064b5562 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6928,7 +6928,7 @@ void napi_disable(struct napi_struct *n) might_sleep(); set_bit(NAPI_STATE_DISABLE, &n->state); - do { + for ( ; ; ) { val = READ_ONCE(n->state); if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) { usleep_range(20, 200); @@ -6937,7 +6937,10 @@ void napi_disable(struct napi_struct *n) new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC; new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL); - } while (cmpxchg(&n->state, val, new) != val); + + if (cmpxchg(&n->state, val, new) == val) + break; + } hrtimer_cancel(&n->timer); From 68dbbe7d5b4fde736d104cbbc9a2fce875562012 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 4 Nov 2021 17:31:58 +0900 Subject: [PATCH 416/433] libata: fix read log timeout value Some ATA drives are very slow to respond to READ_LOG_EXT and READ_LOG_DMA_EXT commands issued from ata_dev_configure() when the device is revalidated right after resuming a system or inserting the ATA adapter driver (e.g. ahci). The default 5s timeout (ATA_EH_CMD_DFL_TIMEOUT) used for these commands is too short, causing errors during the device configuration. Ex: ... ata9: SATA max UDMA/133 abar m524288@0x9d200000 port 0x9d200400 irq 209 ata9: SATA link up 6.0 Gbps (SStatus 133 SControl 300) ata9.00: ATA-9: XXX XXXXXXXXXXXXXXX, XXXXXXXX, max UDMA/133 ata9.00: qc timeout (cmd 0x2f) ata9.00: Read log page 0x00 failed, Emask 0x4 ata9.00: Read log page 0x00 failed, Emask 0x40 ata9.00: NCQ Send/Recv Log not supported ata9.00: Read log page 0x08 failed, Emask 0x40 ata9.00: 27344764928 sectors, multi 16: LBA48 NCQ (depth 32), AA ata9.00: Read log page 0x00 failed, Emask 0x40 ata9.00: ATA Identify Device Log not supported ata9.00: failed to set xfermode (err_mask=0x40) ata9: SATA link up 6.0 Gbps (SStatus 133 SControl 300) ata9.00: configured for UDMA/133 ... The timeout error causes a soft reset of the drive link, followed in most cases by a successful revalidation as that give enough time to the drive to become fully ready to quickly process the read log commands. However, in some cases, this also fails resulting in the device being dropped. Fix this by using adding the ata_eh_revalidate_timeouts entries for the READ_LOG_EXT and READ_LOG_DMA_EXT commands. This defines a timeout increased to 15s, retriable one time. Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal --- drivers/ata/libata-eh.c | 8 ++++++++ include/linux/libata.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index bf9c4b6c5c3d..1d4a6f1e88cd 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -93,6 +93,12 @@ static const unsigned long ata_eh_identify_timeouts[] = { ULONG_MAX, }; +static const unsigned long ata_eh_revalidate_timeouts[] = { + 15000, /* Some drives are slow to read log pages when waking-up */ + 15000, /* combined time till here is enough even for media access */ + ULONG_MAX, +}; + static const unsigned long ata_eh_flush_timeouts[] = { 15000, /* be generous with flush */ 15000, /* ditto */ @@ -129,6 +135,8 @@ static const struct ata_eh_cmd_timeout_ent ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), .timeouts = ata_eh_identify_timeouts, }, + { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT), + .timeouts = ata_eh_revalidate_timeouts, }, { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), diff --git a/include/linux/libata.h b/include/linux/libata.h index 2884383f1718..5331557316e8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -394,7 +394,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ From 51839e25d43dc6c91d653995a41a3dfc52a21e33 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Fri, 5 Nov 2021 01:50:21 +0000 Subject: [PATCH 417/433] ata: sata_highbank: Remove unnecessary print function dev_err() The print function dev_err() is redundant because platform_get_irq() already prints an error. Signed-off-by: Xu Wang Signed-off-by: Damien Le Moal --- drivers/ata/sata_highbank.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index 8440203e835e..b29d3f1d64b0 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -469,10 +469,8 @@ static int ahci_highbank_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(dev, "no irq\n"); + if (irq < 0) return irq; - } if (!irq) return -EINVAL; From 4ca110bf8d9b31a60f8f8ff6706ea147d38ad97c Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 11 Nov 2021 15:55:16 +0530 Subject: [PATCH 418/433] cxgb4: fix eeprom len when diagnostics not implemented Ensure diagnostics monitoring support is implemented for the SFF 8472 compliant port module and set the correct length for ethtool port module eeprom read. Fixes: f56ec6766dcf ("cxgb4: Add support for ethtool i2c dump") Signed-off-by: Manoj Malviya Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 7 +++++-- drivers/net/ethernet/chelsio/cxgb4/t4_hw.h | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 5903bdb78916..129352bbe114 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -2015,12 +2015,15 @@ static int cxgb4_get_module_info(struct net_device *dev, if (ret) return ret; - if (!sff8472_comp || (sff_diag_type & 4)) { + if (!sff8472_comp || (sff_diag_type & SFP_DIAG_ADDRMODE)) { modinfo->type = ETH_MODULE_SFF_8079; modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; } else { modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + if (sff_diag_type & SFP_DIAG_IMPLEMENTED) + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + else + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2; } break; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 002fc62ea726..63bc956d2037 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -293,6 +293,8 @@ enum { #define I2C_PAGE_SIZE 0x100 #define SFP_DIAG_TYPE_ADDR 0x5c #define SFP_DIAG_TYPE_LEN 0x1 +#define SFP_DIAG_ADDRMODE BIT(2) +#define SFP_DIAG_IMPLEMENTED BIT(6) #define SFF_8472_COMP_ADDR 0x5e #define SFF_8472_COMP_LEN 0x1 #define SFF_REV_ADDR 0x1 From 29cd386750412297fd064c01c87bc40a26f24047 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Wed, 10 Nov 2021 21:50:36 +0530 Subject: [PATCH 419/433] net: wwan: iosm: fix compilation warning curr_phase is unused. Removed the dead code. Fixes: 8d9be0634181 ("net: wwan: iosm: transport layer support for fw flashing/cd") Reported-by: kernel test robot Signed-off-by: M Chetan Kumar Reviewed-by: Loic Poulain Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c index b885a6570235..825e8e5ffb2a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c @@ -394,12 +394,10 @@ void ipc_imem_sys_devlink_close(struct iosm_devlink *ipc_devlink) int boot_check_timeout = BOOT_CHECK_DEFAULT_TIMEOUT; enum ipc_mem_exec_stage exec_stage; struct ipc_mem_channel *channel; - enum ipc_phase curr_phase; int status = 0; u32 tail = 0; channel = ipc_imem->ipc_devlink->devlink_sio.channel; - curr_phase = ipc_imem->phase; /* Increase the total wait time to boot_check_timeout */ do { exec_stage = ipc_mmio_get_exec_stage(ipc_imem->mmio); From d336509cb9d03970911878bb77f0497f64fda061 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 11 Nov 2021 06:57:17 -0500 Subject: [PATCH 420/433] selftests/net: udpgso_bench_rx: fix port argument The below commit added optional support for passing a bind address. It configures the sockaddr bind arguments before parsing options and reconfigures on options -b and -4. This broke support for passing port (-p) on its own. Configure sockaddr after parsing all arguments. Fixes: 3327a9c46352 ("selftests: add functionals test for UDP GRO") Reported-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgso_bench_rx.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index 76a24052f4b4..6a193425c367 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -293,19 +293,17 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int c; - /* bind to any by default */ - setup_sockaddr(PF_INET6, "::", &cfg_bind_addr); while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) { switch (c) { case '4': cfg_family = PF_INET; cfg_alen = sizeof(struct sockaddr_in); - setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr); break; case 'b': - setup_sockaddr(cfg_family, optarg, &cfg_bind_addr); + bind_addr = optarg; break; case 'C': cfg_connect_timeout_ms = strtoul(optarg, NULL, 0); @@ -341,6 +339,11 @@ static void parse_opts(int argc, char **argv) } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_bind_addr); + if (optind != argc) usage(argv[0]); From 0093de693fe7baf31641d1863793e6db93a43b6e Mon Sep 17 00:00:00 2001 From: Yixuan Cao Date: Wed, 10 Nov 2021 20:32:30 -0800 Subject: [PATCH 421/433] mm/page_owner.c: modify the type of argument "order" in some functions The type of "order" in struct page_owner is unsigned short. However, it is unsigned int in the following 3 functions: __reset_page_owner __set_page_owner_handle __set_page_owner_handle The type of "order" in argument list is unsigned int, which is inconsistent. [akpm@linux-foundation.org: update include/linux/page_owner.h] Link: https://lkml.kernel.org/r/20211020125945.47792-1-caoyixuan2019@email.szu.edu.cn Signed-off-by: Yixuan Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_owner.h | 12 ++++++------ mm/page_owner.c | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 43c638c51c1f..119a0c9d2a8b 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -8,9 +8,9 @@ extern struct static_key_false page_owner_inited; extern struct page_ext_operations page_owner_ops; -extern void __reset_page_owner(struct page *page, unsigned int order); +extern void __reset_page_owner(struct page *page, unsigned short order); extern void __set_page_owner(struct page *page, - unsigned int order, gfp_t gfp_mask); + unsigned short order, gfp_t gfp_mask); extern void __split_page_owner(struct page *page, unsigned int nr); extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); extern void __set_page_owner_migrate_reason(struct page *page, int reason); @@ -18,14 +18,14 @@ extern void __dump_page_owner(const struct page *page); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone); -static inline void reset_page_owner(struct page *page, unsigned int order) +static inline void reset_page_owner(struct page *page, unsigned short order) { if (static_branch_unlikely(&page_owner_inited)) __reset_page_owner(page, order); } static inline void set_page_owner(struct page *page, - unsigned int order, gfp_t gfp_mask) + unsigned short order, gfp_t gfp_mask) { if (static_branch_unlikely(&page_owner_inited)) __set_page_owner(page, order, gfp_mask); @@ -52,7 +52,7 @@ static inline void dump_page_owner(const struct page *page) __dump_page_owner(page); } #else -static inline void reset_page_owner(struct page *page, unsigned int order) +static inline void reset_page_owner(struct page *page, unsigned short order) { } static inline void set_page_owner(struct page *page, @@ -60,7 +60,7 @@ static inline void set_page_owner(struct page *page, { } static inline void split_page_owner(struct page *page, - unsigned int order) + unsigned short order) { } static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) diff --git a/mm/page_owner.c b/mm/page_owner.c index 79936db59859..4f924957ce7a 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -125,7 +125,7 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags) return handle; } -void __reset_page_owner(struct page *page, unsigned int order) +void __reset_page_owner(struct page *page, unsigned short order) { int i; struct page_ext *page_ext; @@ -149,7 +149,7 @@ void __reset_page_owner(struct page *page, unsigned int order) static inline void __set_page_owner_handle(struct page_ext *page_ext, depot_stack_handle_t handle, - unsigned int order, gfp_t gfp_mask) + unsigned short order, gfp_t gfp_mask) { struct page_owner *page_owner; int i; @@ -169,7 +169,7 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext, } } -noinline void __set_page_owner(struct page *page, unsigned int order, +noinline void __set_page_owner(struct page *page, unsigned short order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page); From 252220dab9d4e4aa61f87b729468e8ac68fcc8bb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 10 Nov 2021 20:32:33 -0800 Subject: [PATCH 422/433] mm: allow only SLUB on PREEMPT_RT Memory allocators may disable interrupts or preemption as part of the allocation and freeing process. For PREEMPT_RT it is important that these sections remain deterministic and short and therefore don't depend on the size of the memory to allocate/ free or the inner state of the algorithm. Until v3.12-RT the SLAB allocator was an option but involved several changes to meet all the requirements. The SLUB design fits better with PREEMPT_RT model and so the SLAB patches were dropped in the 3.12-RT patchset. Comparing the two allocator, SLUB outperformed SLAB in both throughput (time needed to allocate and free memory) and the maximal latency of the system measured with cyclictest during hackbench. SLOB was never evaluated since it was unlikely that it preforms better than SLAB. During a quick test, the kernel crashed with SLOB enabled during boot. Disable SLAB and SLOB on PREEMPT_RT. [bigeasy@linutronix.de: commit description] Link: https://lkml.kernel.org/r/20211015210336.gen3tib33ig5q2md@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Acked-by: Vlastimil Babka Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 21b1f4870c80..45bcaa8e7481 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1896,6 +1896,7 @@ choice config SLAB bool "SLAB" + depends on !PREEMPT_RT select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work @@ -1916,6 +1917,7 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" + depends on !PREEMPT_RT help SLOB replaces the stock allocator with a drastically simpler allocator. SLOB is generally more space efficient but From 0ef024621417fa3fcdeb2c3320f90ee34e18a5d9 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 10 Nov 2021 20:32:37 -0800 Subject: [PATCH 423/433] mm: migrate: simplify the file-backed pages validation when migrating its mapping There is no need to validate the file-backed page's refcount before trying to freeze the page's expected refcount, instead we can rely on the folio_ref_freeze() to validate if the page has the expected refcount before migrating its mapping. Moreover we are always under the page lock when migrating the page mapping, which means nowhere else can remove it from the page cache, so we can remove the xas_load() validation under the i_pages lock. Link: https://lkml.kernel.org/r/cover.1629447552.git.baolin.wang@linux.alibaba.com Link: https://lkml.kernel.org/r/df4c129fd8e86a95dbc55f4663d77441cc0d3bd1.1629447552.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Suggested-by: Matthew Wilcox Cc: Yang Shi Cc: Alistair Popple Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index a11e948593df..43dd88c7fcdc 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -404,12 +404,6 @@ int folio_migrate_mapping(struct address_space *mapping, newzone = folio_zone(newfolio); xas_lock_irq(&xas); - if (folio_ref_count(folio) != expected_count || - xas_load(&xas) != folio) { - xas_unlock_irq(&xas); - return -EAGAIN; - } - if (!folio_ref_freeze(folio, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; From ab09243aa95a72bac5c71e852773de34116f8d0f Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 10 Nov 2021 20:32:40 -0800 Subject: [PATCH 424/433] mm/migrate.c: remove MIGRATE_PFN_LOCKED MIGRATE_PFN_LOCKED is used to indicate to migrate_vma_prepare() that a source page was already locked during migrate_vma_collect(). If it wasn't then the a second attempt is made to lock the page. However if the first attempt failed it's unlikely a second attempt will succeed, and the retry adds complexity. So clean this up by removing the retry and MIGRATE_PFN_LOCKED flag. Destination pages are also meant to have the MIGRATE_PFN_LOCKED flag set, but nothing actually checks that. Link: https://lkml.kernel.org/r/20211025041608.289017-1-apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Ralph Campbell Acked-by: Felix Kuehling Cc: Alex Deucher Cc: Jerome Glisse Cc: John Hubbard Cc: Zi Yan Cc: Christoph Hellwig Cc: Ben Skeggs Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/hmm.rst | 2 +- arch/powerpc/kvm/book3s_hv_uvmem.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 - drivers/gpu/drm/nouveau/nouveau_dmem.c | 4 +- include/linux/migrate.h | 1 - lib/test_hmm.c | 5 +- mm/migrate.c | 149 +++++------------------ 7 files changed, 37 insertions(+), 130 deletions(-) diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index a14c2938e7af..f2a59ed82ed3 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst @@ -360,7 +360,7 @@ between device driver specific code and shared common code: system memory page, locks the page with ``lock_page()``, and fills in the ``dst`` array entry with:: - dst[i] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + dst[i] = migrate_pfn(page_to_pfn(dpage)); Now that the driver knows that this page is being migrated, it can invalidate device private MMU mappings and copy device private memory diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index a7061ee3b157..28c436df9935 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -560,7 +560,7 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma, gpa, 0, page_shift); if (ret == U_SUCCESS) - *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; + *mig.dst = migrate_pfn(pfn); else { unlock_page(dpage); __free_page(dpage); @@ -774,7 +774,7 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, } } - *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + *mig.dst = migrate_pfn(page_to_pfn(dpage)); migrate_vma_pages(&mig); out_finalize: migrate_vma_finalize(&mig); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6d8634e40b3b..d43bfd8b35ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -317,7 +317,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); svm_migrate_get_vram_page(prange, migrate->dst[i]); migrate->dst[i] = migrate_pfn(migrate->dst[i]); - migrate->dst[i] |= MIGRATE_PFN_LOCKED; src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_TO_DEVICE); r = dma_mapping_error(dev, src[i]); @@ -610,7 +609,6 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); - migrate->dst[i] |= MIGRATE_PFN_LOCKED; j++; } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 92987daa5e17..3828aafd3ac4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -166,7 +166,7 @@ static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm, goto error_dma_unmap; mutex_unlock(&svmm->mutex); - args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + args->dst[0] = migrate_pfn(page_to_pfn(dpage)); return 0; error_dma_unmap: @@ -602,7 +602,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, ((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT); if (src & MIGRATE_PFN_WRITE) *pfn |= NVIF_VMM_PFNMAP_V0_W; - return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + return migrate_pfn(page_to_pfn(dpage)); out_dma_unmap: dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index eeb818c4fc78..4850cc5bf813 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -110,7 +110,6 @@ static inline int migrate_misplaced_page(struct page *page, */ #define MIGRATE_PFN_VALID (1UL << 0) #define MIGRATE_PFN_MIGRATE (1UL << 1) -#define MIGRATE_PFN_LOCKED (1UL << 2) #define MIGRATE_PFN_WRITE (1UL << 3) #define MIGRATE_PFN_SHIFT 6 diff --git a/lib/test_hmm.c b/lib/test_hmm.c index c259842f6d44..e2ce8f9b7605 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -613,8 +613,7 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, */ rpage->zone_device_data = dmirror; - *dst = migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED; + *dst = migrate_pfn(page_to_pfn(dpage)); if ((*src & MIGRATE_PFN_WRITE) || (!spage && args->vma->vm_flags & VM_WRITE)) *dst |= MIGRATE_PFN_WRITE; @@ -1137,7 +1136,7 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, lock_page(dpage); xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); copy_highpage(dpage, spage); - *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + *dst = migrate_pfn(page_to_pfn(dpage)); if (*src & MIGRATE_PFN_WRITE) *dst |= MIGRATE_PFN_WRITE; } diff --git a/mm/migrate.c b/mm/migrate.c index 43dd88c7fcdc..cf25b00f03c8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2362,7 +2362,6 @@ again: * can't be dropped from it). */ get_page(page); - migrate->cpages++; /* * Optimize for the common case where page is only mapped once @@ -2372,7 +2371,7 @@ again: if (trylock_page(page)) { pte_t swp_pte; - mpfn |= MIGRATE_PFN_LOCKED; + migrate->cpages++; ptep_get_and_clear(mm, addr, ptep); /* Setup special migration page table entry */ @@ -2406,6 +2405,9 @@ again: if (pte_present(pte)) unmapped++; + } else { + put_page(page); + mpfn = 0; } next: @@ -2510,15 +2512,17 @@ static bool migrate_vma_check_page(struct page *page) } /* - * migrate_vma_prepare() - lock pages and isolate them from the lru + * migrate_vma_unmap() - replace page mapping with special migration pte entry * @migrate: migrate struct containing all migration information * - * This locks pages that have been collected by migrate_vma_collect(). Once each - * page is locked it is isolated from the lru (for non-device pages). Finally, - * the ref taken by migrate_vma_collect() is dropped, as locked pages cannot be - * migrated by concurrent kernel threads. + * Isolate pages from the LRU and replace mappings (CPU page table pte) with a + * special migration pte entry and check if it has been pinned. Pinned pages are + * restored because we cannot migrate them. + * + * This is the last step before we call the device driver callback to allocate + * destination memory and copy contents of original page over to new page. */ -static void migrate_vma_prepare(struct migrate_vma *migrate) +static void migrate_vma_unmap(struct migrate_vma *migrate) { const unsigned long npages = migrate->npages; const unsigned long start = migrate->start; @@ -2527,32 +2531,12 @@ static void migrate_vma_prepare(struct migrate_vma *migrate) lru_add_drain(); - for (i = 0; (i < npages) && migrate->cpages; i++) { + for (i = 0; i < npages; i++) { struct page *page = migrate_pfn_to_page(migrate->src[i]); - bool remap = true; if (!page) continue; - if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) { - /* - * Because we are migrating several pages there can be - * a deadlock between 2 concurrent migration where each - * are waiting on each other page lock. - * - * Make migrate_vma() a best effort thing and backoff - * for any page we can not lock right away. - */ - if (!trylock_page(page)) { - migrate->src[i] = 0; - migrate->cpages--; - put_page(page); - continue; - } - remap = false; - migrate->src[i] |= MIGRATE_PFN_LOCKED; - } - /* ZONE_DEVICE pages are not on LRU */ if (!is_zone_device_page(page)) { if (!PageLRU(page) && allow_drain) { @@ -2562,16 +2546,9 @@ static void migrate_vma_prepare(struct migrate_vma *migrate) } if (isolate_lru_page(page)) { - if (remap) { - migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; - migrate->cpages--; - restore++; - } else { - migrate->src[i] = 0; - unlock_page(page); - migrate->cpages--; - put_page(page); - } + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + migrate->cpages--; + restore++; continue; } @@ -2579,80 +2556,20 @@ static void migrate_vma_prepare(struct migrate_vma *migrate) put_page(page); } - if (!migrate_vma_check_page(page)) { - if (remap) { - migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; - migrate->cpages--; - restore++; - - if (!is_zone_device_page(page)) { - get_page(page); - putback_lru_page(page); - } - } else { - migrate->src[i] = 0; - unlock_page(page); - migrate->cpages--; - - if (!is_zone_device_page(page)) - putback_lru_page(page); - else - put_page(page); - } - } - } - - for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) { - struct page *page = migrate_pfn_to_page(migrate->src[i]); - - if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE)) - continue; - - remove_migration_pte(page, migrate->vma, addr, page); - - migrate->src[i] = 0; - unlock_page(page); - put_page(page); - restore--; - } -} - -/* - * migrate_vma_unmap() - replace page mapping with special migration pte entry - * @migrate: migrate struct containing all migration information - * - * Replace page mapping (CPU page table pte) with a special migration pte entry - * and check again if it has been pinned. Pinned pages are restored because we - * cannot migrate them. - * - * This is the last step before we call the device driver callback to allocate - * destination memory and copy contents of original page over to new page. - */ -static void migrate_vma_unmap(struct migrate_vma *migrate) -{ - const unsigned long npages = migrate->npages; - const unsigned long start = migrate->start; - unsigned long addr, i, restore = 0; - - for (i = 0; i < npages; i++) { - struct page *page = migrate_pfn_to_page(migrate->src[i]); - - if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE)) - continue; - - if (page_mapped(page)) { + if (page_mapped(page)) try_to_migrate(page, 0); - if (page_mapped(page)) - goto restore; - } - if (migrate_vma_check_page(page)) + if (page_mapped(page) || !migrate_vma_check_page(page)) { + if (!is_zone_device_page(page)) { + get_page(page); + putback_lru_page(page); + } + + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + migrate->cpages--; + restore++; continue; - -restore: - migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; - migrate->cpages--; - restore++; + } } for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) { @@ -2665,12 +2582,8 @@ restore: migrate->src[i] = 0; unlock_page(page); + put_page(page); restore--; - - if (is_zone_device_page(page)) - put_page(page); - else - putback_lru_page(page); } } @@ -2693,8 +2606,8 @@ restore: * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE * flag set). Once these are allocated and copied, the caller must update each * corresponding entry in the dst array with the pfn value of the destination - * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set - * (destination pages must have their struct pages locked, via lock_page()). + * page and with MIGRATE_PFN_VALID. Destination pages must be locked via + * lock_page(). * * Note that the caller does not have to migrate all the pages that are marked * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from @@ -2763,8 +2676,6 @@ int migrate_vma_setup(struct migrate_vma *args) migrate_vma_collect(args); - if (args->cpages) - migrate_vma_prepare(args); if (args->cpages) migrate_vma_unmap(args); From 913ffbdd99856184b4e8004f984d7432dcb990cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Nov 2021 20:32:43 -0800 Subject: [PATCH 425/433] mm: unexport folio_memcg_{,un}lock Patch series "unexport memcg locking helpers". Neither the old page-based nor the new folio-based memcg locking helpers are used in modular code at all, so drop the exports. This patch (of 2): folio_memcg_{,un}lock are only used in built-in core mm code. Link: https://lkml.kernel.org/r/20210820095815.445392-1-hch@lst.de Link: https://lkml.kernel.org/r/20210820095815.445392-2-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 508bcea7df56..070bcc647e66 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2058,7 +2058,6 @@ again: memcg->move_lock_task = current; memcg->move_lock_flags = flags; } -EXPORT_SYMBOL(folio_memcg_lock); void lock_page_memcg(struct page *page) { @@ -2092,7 +2091,6 @@ void folio_memcg_unlock(struct folio *folio) { __folio_memcg_unlock(folio_memcg(folio)); } -EXPORT_SYMBOL(folio_memcg_unlock); void unlock_page_memcg(struct page *page) { From ab2f9d2d3626a8f31bfe2f47746e04819d8ffe9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Nov 2021 20:32:46 -0800 Subject: [PATCH 426/433] mm: unexport {,un}lock_page_memcg These are only used in built-in core mm code. Link: https://lkml.kernel.org/r/20210820095815.445392-3-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 070bcc647e66..781605e92015 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2063,7 +2063,6 @@ void lock_page_memcg(struct page *page) { folio_memcg_lock(page_folio(page)); } -EXPORT_SYMBOL(lock_page_memcg); static void __folio_memcg_unlock(struct mem_cgroup *memcg) { @@ -2096,7 +2095,6 @@ void unlock_page_memcg(struct page *page) { folio_memcg_unlock(page_folio(page)); } -EXPORT_SYMBOL(unlock_page_memcg); struct obj_stock { #ifdef CONFIG_MEMCG_KMEM From b873e986816a0b8408c177b2c52a6915cca8713c Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Wed, 10 Nov 2021 20:32:49 -0800 Subject: [PATCH 427/433] kasan: add kasan mode messages when kasan init There are multiple kasan modes. It makes sense that we add some messages to know which kasan mode is active when booting up [1]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=212195 [1] Link: https://lkml.kernel.org/r/20211020094850.4113-1-Kuan-Ying.Lee@mediatek.com Signed-off-by: Kuan-Ying Lee Reviewed-by: Marco Elver Reviewed-by: David Hildenbrand Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Catalin Marinas Cc: Will Deacon Cc: Matthias Brugger Cc: Chinwen Chang Cc: Yee Lee Cc: Nicholas Tang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/kasan_init.c | 2 +- mm/kasan/hw_tags.c | 14 +++++++++++++- mm/kasan/sw_tags.c | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index ec276f75fa05..c12cd700598f 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -310,7 +310,7 @@ void __init kasan_init(void) kasan_init_depth(); #if defined(CONFIG_KASAN_GENERIC) /* CONFIG_KASAN_SW_TAGS also requires kasan_init_sw_tags(). */ - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (generic)\n"); #endif } diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index dc892119e88f..7355cb534e4f 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -106,6 +106,16 @@ static int __init early_kasan_flag_stacktrace(char *arg) } early_param("kasan.stacktrace", early_kasan_flag_stacktrace); +static inline const char *kasan_mode_info(void) +{ + if (kasan_mode == KASAN_MODE_ASYNC) + return "async"; + else if (kasan_mode == KASAN_MODE_ASYMM) + return "asymm"; + else + return "sync"; +} + /* kasan_init_hw_tags_cpu() is called for each CPU. */ void kasan_init_hw_tags_cpu(void) { @@ -177,7 +187,9 @@ void __init kasan_init_hw_tags(void) break; } - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (hw-tags, mode=%s, stacktrace=%s)\n", + kasan_mode_info(), + kasan_stack_collection_enabled() ? "on" : "off"); } void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index bd3f540feb47..77f13f391b57 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -42,7 +42,7 @@ void __init kasan_init_sw_tags(void) for_each_possible_cpu(cpu) per_cpu(prng_state, cpu) = (u32)get_cycles(); - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (sw-tags)\n"); } /* From 68da4e0eaaab421f228eac57cbe7505b136464af Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 10 Nov 2021 12:01:14 -0600 Subject: [PATCH 428/433] Revert "PCI: Remove struct pci_dev->driver" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b5f9c644eb1baafcd349ad134e2110773f8d0a38. Revert b5f9c644eb1b ("PCI: Remove struct pci_dev->driver"), which is needed to revert 2a4d9408c9e8 ("PCI: Use to_pci_driver() instead of pci_dev->driver"). 2a4d9408c9e8 caused a NULL pointer dereference reported by Robert ÅšwiÄ™cki. Details in the revert of that commit. Fixes: 2a4d9408c9e8 ("PCI: Use to_pci_driver() instead of pci_dev->driver") Link: https://lore.kernel.org/linux-i2c/CAP145pgdrdiMAT7=-iB1DMgA7t_bMqTcJL4N0=6u8kNY3EU0dw@mail.gmail.com/ Reported-by: Robert ÅšwiÄ™cki Tested-by: Robert ÅšwiÄ™cki Signed-off-by: Bjorn Helgaas --- drivers/pci/pci-driver.c | 4 ++++ include/linux/pci.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 1d98c974381c..4c1f46dbfa87 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -319,10 +319,12 @@ static long local_pci_probe(void *_ddi) * its remove routine. */ pm_runtime_get_sync(dev); + pci_dev->driver = pci_drv; rc = pci_drv->probe(pci_dev, ddi->id); if (!rc) return rc; if (rc < 0) { + pci_dev->driver = NULL; pm_runtime_put_sync(dev); return rc; } @@ -388,6 +390,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, * @pci_dev: PCI device being probed * * returns 0 on success, else error. + * side-effect: pci_dev->driver is set to drv when drv claims pci_dev. */ static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev) { @@ -462,6 +465,7 @@ static void pci_device_remove(struct device *dev) pm_runtime_put_noidle(dev); } pcibios_free_irq(pci_dev); + pci_dev->driver = NULL; pci_iov_remove(pci_dev); /* Undo the runtime PM settings in local_pci_probe() */ diff --git a/include/linux/pci.h b/include/linux/pci.h index b4dbcc86b3f1..e58888e21ab7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -342,6 +342,7 @@ struct pci_dev { u16 pcie_flags_reg; /* Cached PCIe Capabilities Register */ unsigned long *dma_alias_mask;/* Mask of enabled devfn aliases */ + struct pci_driver *driver; /* Driver bound to this device */ u64 dma_mask; /* Mask of the bits of bus address this device implements. Normally this is 0xffffffff. You only need to change From e0217c5ba10d7bf640f038b2feae58e630f2f958 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 10 Nov 2021 12:03:34 -0600 Subject: [PATCH 429/433] Revert "PCI: Use to_pci_driver() instead of pci_dev->driver" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2a4d9408c9e8b6f6fc150c66f3fef755c9e20d4a. Robert reported a NULL pointer dereference caused by the PCI core (local_pci_probe()) calling the i2c_designware_pci driver's .runtime_resume() method before the .probe() method. i2c_dw_pci_resume() depends on initialization done by i2c_dw_pci_probe(). Prior to 2a4d9408c9e8 ("PCI: Use to_pci_driver() instead of pci_dev->driver"), pci_pm_runtime_resume() avoided calling the .runtime_resume() method because pci_dev->driver had not been set yet. 2a4d9408c9e8 and b5f9c644eb1b ("PCI: Remove struct pci_dev->driver"), removed pci_dev->driver, replacing it by device->driver, which *has* been set by this time, so pci_pm_runtime_resume() called the .runtime_resume() method when it previously had not. Fixes: 2a4d9408c9e8 ("PCI: Use to_pci_driver() instead of pci_dev->driver") Link: https://lore.kernel.org/linux-i2c/CAP145pgdrdiMAT7=-iB1DMgA7t_bMqTcJL4N0=6u8kNY3EU0dw@mail.gmail.com/ Reported-by: Robert ÅšwiÄ™cki Tested-by: Robert ÅšwiÄ™cki Signed-off-by: Bjorn Helgaas --- drivers/pci/iov.c | 24 +++++++++--------------- drivers/pci/pci-driver.c | 33 ++++++++++++++++----------------- drivers/pci/pci.c | 17 ++++++++--------- drivers/pci/pcie/err.c | 8 ++++---- 4 files changed, 37 insertions(+), 45 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 1d7a7c5b5307..0267977c9f17 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -164,15 +164,13 @@ static ssize_t sriov_vf_total_msix_show(struct device *dev, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); - struct pci_driver *pdrv; u32 vf_total_msix = 0; device_lock(dev); - pdrv = to_pci_driver(dev->driver); - if (!pdrv || !pdrv->sriov_get_vf_total_msix) + if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix) goto unlock; - vf_total_msix = pdrv->sriov_get_vf_total_msix(pdev); + vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev); unlock: device_unlock(dev); return sysfs_emit(buf, "%u\n", vf_total_msix); @@ -185,7 +183,6 @@ static ssize_t sriov_vf_msix_count_store(struct device *dev, { struct pci_dev *vf_dev = to_pci_dev(dev); struct pci_dev *pdev = pci_physfn(vf_dev); - struct pci_driver *pdrv; int val, ret = 0; if (kstrtoint(buf, 0, &val) < 0) @@ -195,14 +192,13 @@ static ssize_t sriov_vf_msix_count_store(struct device *dev, return -EINVAL; device_lock(&pdev->dev); - pdrv = to_pci_driver(dev->driver); - if (!pdrv || !pdrv->sriov_set_msix_vec_count) { + if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) { ret = -EOPNOTSUPP; goto err_pdev; } device_lock(&vf_dev->dev); - if (to_pci_driver(vf_dev->dev.driver)) { + if (vf_dev->driver) { /* * A driver is already attached to this VF and has configured * itself based on the current MSI-X vector count. Changing @@ -212,7 +208,7 @@ static ssize_t sriov_vf_msix_count_store(struct device *dev, goto err_dev; } - ret = pdrv->sriov_set_msix_vec_count(vf_dev, val); + ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val); err_dev: device_unlock(&vf_dev->dev); @@ -379,7 +375,6 @@ static ssize_t sriov_numvfs_store(struct device *dev, const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); - struct pci_driver *pdrv; int ret = 0; u16 num_vfs; @@ -395,15 +390,14 @@ static ssize_t sriov_numvfs_store(struct device *dev, goto exit; /* is PF driver loaded */ - pdrv = to_pci_driver(dev->driver); - if (!pdrv) { + if (!pdev->driver) { pci_info(pdev, "no driver bound to device; cannot configure SR-IOV\n"); ret = -ENOENT; goto exit; } /* is PF driver loaded w/callback */ - if (!pdrv->sriov_configure) { + if (!pdev->driver->sriov_configure) { pci_info(pdev, "driver does not support SR-IOV configuration via sysfs\n"); ret = -ENOENT; goto exit; @@ -411,7 +405,7 @@ static ssize_t sriov_numvfs_store(struct device *dev, if (num_vfs == 0) { /* disable VFs */ - ret = pdrv->sriov_configure(pdev, 0); + ret = pdev->driver->sriov_configure(pdev, 0); goto exit; } @@ -423,7 +417,7 @@ static ssize_t sriov_numvfs_store(struct device *dev, goto exit; } - ret = pdrv->sriov_configure(pdev, num_vfs); + ret = pdev->driver->sriov_configure(pdev, num_vfs); if (ret < 0) goto exit; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 4c1f46dbfa87..588588cfda48 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -457,7 +457,7 @@ static int pci_device_probe(struct device *dev) static void pci_device_remove(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = to_pci_driver(dev->driver); + struct pci_driver *drv = pci_dev->driver; if (drv->remove) { pm_runtime_get_sync(dev); @@ -493,7 +493,7 @@ static void pci_device_remove(struct device *dev) static void pci_device_shutdown(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = to_pci_driver(dev->driver); + struct pci_driver *drv = pci_dev->driver; pm_runtime_resume(dev); @@ -589,7 +589,7 @@ static int pci_pm_reenable_device(struct pci_dev *pci_dev) static int pci_legacy_suspend(struct device *dev, pm_message_t state) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = to_pci_driver(dev->driver); + struct pci_driver *drv = pci_dev->driver; if (drv && drv->suspend) { pci_power_t prev = pci_dev->current_state; @@ -630,7 +630,7 @@ static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) static int pci_legacy_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = to_pci_driver(dev->driver); + struct pci_driver *drv = pci_dev->driver; pci_fixup_device(pci_fixup_resume, pci_dev); @@ -649,7 +649,7 @@ static void pci_pm_default_suspend(struct pci_dev *pci_dev) static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) { - struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver); + struct pci_driver *drv = pci_dev->driver; bool ret = drv && (drv->suspend || drv->resume); /* @@ -1242,11 +1242,11 @@ static int pci_pm_runtime_suspend(struct device *dev) int error; /* - * If the device has no driver, we leave it in D0, but it may go to - * D3cold when the bridge above it runtime suspends. Save its - * config space in case that happens. + * If pci_dev->driver is not set (unbound), we leave the device in D0, + * but it may go to D3cold when the bridge above it runtime suspends. + * Save its config space in case that happens. */ - if (!to_pci_driver(dev->driver)) { + if (!pci_dev->driver) { pci_save_state(pci_dev); return 0; } @@ -1303,7 +1303,7 @@ static int pci_pm_runtime_resume(struct device *dev) */ pci_restore_standard_config(pci_dev); - if (!to_pci_driver(dev->driver)) + if (!pci_dev->driver) return 0; pci_fixup_device(pci_fixup_resume_early, pci_dev); @@ -1322,13 +1322,14 @@ static int pci_pm_runtime_resume(struct device *dev) static int pci_pm_runtime_idle(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; /* - * If the device has no driver, it should always remain in D0 - * regardless of the runtime PM status + * If pci_dev->driver is not set (unbound), the device should + * always remain in D0 regardless of the runtime PM status */ - if (!to_pci_driver(dev->driver)) + if (!pci_dev->driver) return 0; if (!pm) @@ -1435,10 +1436,8 @@ static struct pci_driver pci_compat_driver = { */ struct pci_driver *pci_dev_driver(const struct pci_dev *dev) { - struct pci_driver *drv = to_pci_driver(dev->dev.driver); - - if (drv) - return drv; + if (dev->driver) + return dev->driver; else { int i; for (i = 0; i <= PCI_ROM_RESOURCE; i++) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b88db815ee01..40012d13c3c4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5123,14 +5123,13 @@ EXPORT_SYMBOL_GPL(pci_dev_unlock); static void pci_dev_save_and_disable(struct pci_dev *dev) { - struct pci_driver *drv = to_pci_driver(dev->dev.driver); const struct pci_error_handlers *err_handler = - drv ? drv->err_handler : NULL; + dev->driver ? dev->driver->err_handler : NULL; /* - * drv->err_handler->reset_prepare() is protected against races - * with ->remove() by the device lock, which must be held by the - * caller. + * dev->driver->err_handler->reset_prepare() is protected against + * races with ->remove() by the device lock, which must be held by + * the caller. */ if (err_handler && err_handler->reset_prepare) err_handler->reset_prepare(dev); @@ -5155,15 +5154,15 @@ static void pci_dev_save_and_disable(struct pci_dev *dev) static void pci_dev_restore(struct pci_dev *dev) { - struct pci_driver *drv = to_pci_driver(dev->dev.driver); const struct pci_error_handlers *err_handler = - drv ? drv->err_handler : NULL; + dev->driver ? dev->driver->err_handler : NULL; pci_restore_state(dev); /* - * drv->err_handler->reset_done() is protected against races with - * ->remove() by the device lock, which must be held by the caller. + * dev->driver->err_handler->reset_done() is protected against + * races with ->remove() by the device lock, which must be held by + * the caller. */ if (err_handler && err_handler->reset_done) err_handler->reset_done(dev); diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 356b9317297e..0c5a143025af 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -54,7 +54,7 @@ static int report_error_detected(struct pci_dev *dev, const struct pci_error_handlers *err_handler; device_lock(&dev->dev); - pdrv = to_pci_driver(dev->dev.driver); + pdrv = dev->driver; if (!pci_dev_set_io_state(dev, state) || !pdrv || !pdrv->err_handler || @@ -98,7 +98,7 @@ static int report_mmio_enabled(struct pci_dev *dev, void *data) const struct pci_error_handlers *err_handler; device_lock(&dev->dev); - pdrv = to_pci_driver(dev->dev.driver); + pdrv = dev->driver; if (!pdrv || !pdrv->err_handler || !pdrv->err_handler->mmio_enabled) @@ -119,7 +119,7 @@ static int report_slot_reset(struct pci_dev *dev, void *data) const struct pci_error_handlers *err_handler; device_lock(&dev->dev); - pdrv = to_pci_driver(dev->dev.driver); + pdrv = dev->driver; if (!pdrv || !pdrv->err_handler || !pdrv->err_handler->slot_reset) @@ -139,7 +139,7 @@ static int report_resume(struct pci_dev *dev, void *data) const struct pci_error_handlers *err_handler; device_lock(&dev->dev); - pdrv = to_pci_driver(dev->dev.driver); + pdrv = dev->driver; if (!pci_dev_set_io_state(dev, pci_channel_io_normal) || !pdrv || !pdrv->err_handler || From 636f6e2af4fb916b9f1b432964294b6979c34002 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 9 Nov 2021 08:45:25 +0900 Subject: [PATCH 430/433] libata: add horkage for missing Identify Device log ACS-3 introduced the ATA Identify Device Data log as mandatory. A warning message currently signals to the user if a device does not report supporting this log page in the log directory page, regardless of the ATA version of the device. Furthermore, this warning will appear for all attempts at accessing this missing log page during device revalidation. Since it is useless to constantly access the log directory and warn about this lack of support once we have discovered that the device does not support this log page, introduce the horkage flag ATA_HORKAGE_NO_ID_DEV_LOG to mark a device as lacking support for the Identify Device Data log page. Set this flag when ata_log_supported() returns false in ata_identify_page_supported(). The warning is printed only if the device ATA level is 10 or above (ACS-3 or above), and only once on device scan. With this flag set, the log directory page is not accessed again to test for Identify Device Data log page support. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen --- drivers/ata/libata-core.c | 13 ++++++++++++- include/linux/libata.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 3018ca84a3d8..8a0ccb190d76 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2052,8 +2052,19 @@ static bool ata_identify_page_supported(struct ata_device *dev, u8 page) struct ata_port *ap = dev->link->ap; unsigned int err, i; + if (dev->horkage & ATA_HORKAGE_NO_ID_DEV_LOG) + return false; + if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE)) { - ata_dev_warn(dev, "ATA Identify Device Log not supported\n"); + /* + * IDENTIFY DEVICE data log is defined as mandatory starting + * with ACS-3 (ATA version 10). Warn about the missing log + * for drives which implement this ATA level or above. + */ + if (ata_id_major_version(dev->id) >= 10) + ata_dev_warn(dev, + "ATA Identify Device Log not supported\n"); + dev->horkage |= ATA_HORKAGE_NO_ID_DEV_LOG; return false; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 5331557316e8..2a8404b26083 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -427,6 +427,7 @@ enum { ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ + ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ From 1b87bda1f29a91720a410ac0819866a3cf0df32d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Nov 2021 12:03:27 +0900 Subject: [PATCH 431/433] libata: libahci: declare ahci_shost_attr_group as static ahci_shost_attr_group is referenced only in drivers/ata/libahci.c. Declare it as static. Fixes: c3f69c7f629f ("scsi: ata: Switch to attribute groups") Cc: Bart Van Assche Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig --- drivers/ata/libahci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 28430c093a7f..8a6835bfd18a 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -131,7 +131,7 @@ const struct attribute_group *ahci_shost_groups[] = { }; EXPORT_SYMBOL_GPL(ahci_shost_groups); -struct attribute *ahci_sdev_attrs[] = { +static struct attribute *ahci_sdev_attrs[] = { &dev_attr_sw_activity.attr, &dev_attr_unload_heads.attr, &dev_attr_ncq_prio_supported.attr, From d9c8e52ff9e84ff1a406330f9ea4de7c5eb40282 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 12 Nov 2021 10:56:25 -0800 Subject: [PATCH 432/433] thermal: int340x: fix build on 32-bit targets Commit aeb58c860dc5 ("thermal/drivers/int340x: processor_thermal: Suppot 64 bit RFIM responses") started using 'readq()' to read 64-bit status responses from the int340x hardware. That's all fine and good, but on 32-bit targets a 64-bit 'readq()' is ambiguous, since it's no longer an atomic access. Some hardware might require 64-bit accesses, and other hardware might want low word first or high word first. It's quite likely that the driver isn't relevant in a 32-bit environment any more, and there's a patch floating around to just make it depend on X86_64, but let's make it buildable on x86-32 anyway. The driver previously just read the low 32 bits, so the hardware certainly is ok with 32-bit reads, and in a little-endian environment the low word first model is the natural one. So just add the include for the 'io-64-nonatomic-lo-hi.h' version. Fixes: aeb58c860dc5 ("thermal/drivers/int340x: processor_thermal: Suppot 64 bit RFIM responses") Reported-by: Jakub Kicinski Cc: Srinivas Pandruvada Cc: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c index a86521973dad..01008ae00e7f 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "processor_thermal_device.h" #define MBOX_CMD_WORKLOAD_TYPE_READ 0x0E From 7246f4dcaccc8de76a96a41359d89c3c791579bc Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 12 Nov 2021 10:16:02 -0500 Subject: [PATCH 433/433] tools/lib/lockdep: drop liblockdep TL;DR: While a tool like liblockdep is useful, it probably doesn't belong within the kernel tree. liblockdep attempts to reuse kernel code both directly (by directly building the kernel's lockdep code) as well as indirectly (by using sanitized headers). This makes liblockdep an integral part of the kernel. It also makes liblockdep quite unique: while other userspace code might use sanitized headers, it generally doesn't attempt to use kernel code directly which means that changes on the kernel side of things don't affect (and break) it directly. All our workflows and tooling around liblockdep don't support this uniqueness. Changes that go into the kernel code aren't validated to not break in-tree userspace code. liblockdep ended up being very fragile, breaking over and over, to the point that living in the same tree as the lockdep code lost most of it's value. liblockdep should continue living in an external tree, syncing with the kernel often, in a controllable way. Signed-off-by: Sasha Levin Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- MAINTAINERS | 5 - tools/Makefile | 16 +- tools/lib/lockdep/.gitignore | 2 - tools/lib/lockdep/Build | 1 - tools/lib/lockdep/Makefile | 162 ------- tools/lib/lockdep/common.c | 29 -- tools/lib/lockdep/include/liblockdep/common.h | 54 --- tools/lib/lockdep/include/liblockdep/mutex.h | 73 --- tools/lib/lockdep/include/liblockdep/rwlock.h | 87 ---- tools/lib/lockdep/lockdep | 3 - tools/lib/lockdep/lockdep.c | 33 -- tools/lib/lockdep/lockdep_internals.h | 1 - tools/lib/lockdep/lockdep_states.h | 1 - tools/lib/lockdep/preload.c | 443 ------------------ tools/lib/lockdep/rbtree.c | 1 - tools/lib/lockdep/run_tests.sh | 47 -- tools/lib/lockdep/tests/AA.c | 14 - tools/lib/lockdep/tests/AA.sh | 2 - tools/lib/lockdep/tests/ABA.c | 14 - tools/lib/lockdep/tests/ABA.sh | 2 - tools/lib/lockdep/tests/ABBA.c | 26 - tools/lib/lockdep/tests/ABBA.sh | 2 - tools/lib/lockdep/tests/ABBA_2threads.c | 47 -- tools/lib/lockdep/tests/ABBA_2threads.sh | 2 - tools/lib/lockdep/tests/ABBCCA.c | 20 - tools/lib/lockdep/tests/ABBCCA.sh | 2 - tools/lib/lockdep/tests/ABBCCDDA.c | 23 - tools/lib/lockdep/tests/ABBCCDDA.sh | 2 - tools/lib/lockdep/tests/ABCABC.c | 20 - tools/lib/lockdep/tests/ABCABC.sh | 2 - tools/lib/lockdep/tests/ABCDBCDA.c | 23 - tools/lib/lockdep/tests/ABCDBCDA.sh | 2 - tools/lib/lockdep/tests/ABCDBDDA.c | 23 - tools/lib/lockdep/tests/ABCDBDDA.sh | 2 - tools/lib/lockdep/tests/WW.c | 14 - tools/lib/lockdep/tests/WW.sh | 2 - tools/lib/lockdep/tests/common.h | 13 - tools/lib/lockdep/tests/unlock_balance.c | 15 - tools/lib/lockdep/tests/unlock_balance.sh | 2 - 39 files changed, 3 insertions(+), 1229 deletions(-) delete mode 100644 tools/lib/lockdep/.gitignore delete mode 100644 tools/lib/lockdep/Build delete mode 100644 tools/lib/lockdep/Makefile delete mode 100644 tools/lib/lockdep/common.c delete mode 100644 tools/lib/lockdep/include/liblockdep/common.h delete mode 100644 tools/lib/lockdep/include/liblockdep/mutex.h delete mode 100644 tools/lib/lockdep/include/liblockdep/rwlock.h delete mode 100755 tools/lib/lockdep/lockdep delete mode 100644 tools/lib/lockdep/lockdep.c delete mode 100644 tools/lib/lockdep/lockdep_internals.h delete mode 100644 tools/lib/lockdep/lockdep_states.h delete mode 100644 tools/lib/lockdep/preload.c delete mode 100644 tools/lib/lockdep/rbtree.c delete mode 100755 tools/lib/lockdep/run_tests.sh delete mode 100644 tools/lib/lockdep/tests/AA.c delete mode 100644 tools/lib/lockdep/tests/AA.sh delete mode 100644 tools/lib/lockdep/tests/ABA.c delete mode 100644 tools/lib/lockdep/tests/ABA.sh delete mode 100644 tools/lib/lockdep/tests/ABBA.c delete mode 100644 tools/lib/lockdep/tests/ABBA.sh delete mode 100644 tools/lib/lockdep/tests/ABBA_2threads.c delete mode 100644 tools/lib/lockdep/tests/ABBA_2threads.sh delete mode 100644 tools/lib/lockdep/tests/ABBCCA.c delete mode 100644 tools/lib/lockdep/tests/ABBCCA.sh delete mode 100644 tools/lib/lockdep/tests/ABBCCDDA.c delete mode 100644 tools/lib/lockdep/tests/ABBCCDDA.sh delete mode 100644 tools/lib/lockdep/tests/ABCABC.c delete mode 100644 tools/lib/lockdep/tests/ABCABC.sh delete mode 100644 tools/lib/lockdep/tests/ABCDBCDA.c delete mode 100644 tools/lib/lockdep/tests/ABCDBCDA.sh delete mode 100644 tools/lib/lockdep/tests/ABCDBDDA.c delete mode 100644 tools/lib/lockdep/tests/ABCDBDDA.sh delete mode 100644 tools/lib/lockdep/tests/WW.c delete mode 100644 tools/lib/lockdep/tests/WW.sh delete mode 100644 tools/lib/lockdep/tests/common.h delete mode 100644 tools/lib/lockdep/tests/unlock_balance.c delete mode 100644 tools/lib/lockdep/tests/unlock_balance.sh diff --git a/MAINTAINERS b/MAINTAINERS index ceba033023c3..a2fe76f56b32 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10799,11 +10799,6 @@ F: drivers/ata/ F: include/linux/ata.h F: include/linux/libata.h -LIBLOCKDEP -M: Sasha Levin -S: Maintained -F: tools/lib/lockdep/ - LIBNVDIMM BLK: MMIO-APERTURE DRIVER M: Dan Williams M: Vishal Verma diff --git a/tools/Makefile b/tools/Makefile index 5da1fde03a9a..db2f7b8ebed5 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -24,7 +24,6 @@ help: @echo ' intel-speed-select - Intel Speed Select tool' @echo ' kvm_stat - top-like utility for displaying kvm statistics' @echo ' leds - LEDs tools' - @echo ' liblockdep - user-space wrapper for kernel locking-validator' @echo ' objtool - an ELF object analysis tool' @echo ' pci - PCI tools' @echo ' perf - Linux performance measurement and analysis tool' @@ -72,9 +71,6 @@ cgroup counter firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objto bpf/%: FORCE $(call descend,$@) -liblockdep: FORCE - $(call descend,lib/lockdep) - libapi: FORCE $(call descend,lib/api) @@ -101,7 +97,7 @@ freefall: FORCE kvm_stat: FORCE $(call descend,kvm/$@) -all: acpi cgroup counter cpupower gpio hv firewire liblockdep \ +all: acpi cgroup counter cpupower gpio hv firewire \ perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ @@ -116,9 +112,6 @@ cpupower_install: cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: $(call descend,$(@:_install=),install) -liblockdep_install: - $(call descend,lib/lockdep,install) - selftests_install: $(call descend,testing/$(@:_install=),install) @@ -135,7 +128,7 @@ kvm_stat_install: $(call descend,kvm/$(@:_install=),install) install: acpi_install cgroup_install counter_install cpupower_install gpio_install \ - hv_install firewire_install iio_install liblockdep_install \ + hv_install firewire_install iio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ @@ -151,9 +144,6 @@ cpupower_clean: cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: $(call descend,$(@:_clean=),clean) -liblockdep_clean: - $(call descend,lib/lockdep,clean) - libapi_clean: $(call descend,lib/api,clean) @@ -185,7 +175,7 @@ build_clean: clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ - freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ + freefall_clean build_clean libbpf_clean libsubcmd_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ intel-speed-select_clean tracing_clean diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore deleted file mode 100644 index 6c308ac4388c..000000000000 --- a/tools/lib/lockdep/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -liblockdep.so.* diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build deleted file mode 100644 index 6f667355b068..000000000000 --- a/tools/lib/lockdep/Build +++ /dev/null @@ -1 +0,0 @@ -liblockdep-y += common.o lockdep.o preload.o rbtree.o diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile deleted file mode 100644 index 9dafb8cb752f..000000000000 --- a/tools/lib/lockdep/Makefile +++ /dev/null @@ -1,162 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# file format version -FILE_VERSION = 1 - -LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion) - -# Makefiles suck: This macro sets a default value of $(2) for the -# variable named by $(1), unless the variable has been set by -# environment or command line. This is necessary for CC and AR -# because make sets default values, so the simpler ?= approach -# won't work as expected. -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) -$(call allow-override,LD,$(CROSS_COMPILE)ld) - -INSTALL = install - -# Use DESTDIR for installing into a different root directory. -# This is useful for building a package. The program will be -# installed in this directory as if it was the root directory. -# Then the build tool can move it later. -DESTDIR ?= -DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' - -prefix ?= /usr/local -libdir_relative = lib -libdir = $(prefix)/$(libdir_relative) -bindir_relative = bin -bindir = $(prefix)/$(bindir_relative) - -export DESTDIR DESTDIR_SQ INSTALL - -MAKEFLAGS += --no-print-directory - -include ../../scripts/Makefile.include - -# copy a bit from Linux kbuild - -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -# Shell quotes -libdir_SQ = $(subst ','\'',$(libdir)) -bindir_SQ = $(subst ','\'',$(bindir)) - -LIB_IN := $(OUTPUT)liblockdep-in.o - -BIN_FILE = lockdep -LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION) - -CONFIG_INCLUDES = -CONFIG_LIBS = -CONFIG_FLAGS = - -OBJ = $@ -N = - -export Q VERBOSE - -INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES) - -# Set compile option CFLAGS if not set elsewhere -CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g -CFLAGS += -fPIC -CFLAGS += -Wall - -override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) - -ifeq ($(VERBOSE),1) - Q = - print_shared_lib_compile = - print_install = -else - Q = @ - print_shared_lib_compile = echo ' LD '$(OBJ); - print_static_lib_build = echo ' LD '$(OBJ); - print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; -endif - -all: - -export srctree OUTPUT CC LD CFLAGS V -include $(srctree)/tools/build/Makefile.include - -do_compile_shared_library = \ - ($(print_shared_lib_compile) \ - $(CC) $(LDFLAGS) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='$(@F)';$(shell ln -sf $(@F) $(@D)/liblockdep.so)) - -do_build_static_lib = \ - ($(print_static_lib_build) \ - $(RM) $@; $(AR) rcs $@ $^) - -CMD_TARGETS = $(LIB_FILE) - -TARGETS = $(CMD_TARGETS) - - -all: fixdep all_cmd - -all_cmd: $(CMD_TARGETS) - -$(LIB_IN): force - $(Q)$(MAKE) $(build)=liblockdep - -$(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN) - $(Q)$(do_compile_shared_library) - -$(OUTPUT)liblockdep.a: $(LIB_IN) - $(Q)$(do_build_static_lib) - -tags: force - $(RM) tags - find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ - --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/' - -TAGS: force - $(RM) TAGS - find . -name '*.[ch]' | xargs etags \ - --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/' - -define do_install - $(print_install) \ - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ - fi; \ - $(INSTALL) $1 '$(DESTDIR_SQ)$2' -endef - -install_lib: all_cmd - $(Q)$(call do_install,$(LIB_FILE),$(libdir_SQ)) - $(Q)$(call do_install,$(BIN_FILE),$(bindir_SQ)) - -install: install_lib - -clean: - $(RM) $(OUTPUT)*.o *~ $(TARGETS) $(OUTPUT)*.a $(OUTPUT)*liblockdep*.so* $(VERSION_FILES) $(OUTPUT).*.d $(OUTPUT).*.cmd - $(RM) tags TAGS - -PHONY += force -force: - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable so we can use it in if_changed and friends. -.PHONY: $(PHONY) diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c deleted file mode 100644 index 5c3b58cce8a9..000000000000 --- a/tools/lib/lockdep/common.c +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include - -static __thread struct task_struct current_obj; - -/* lockdep wants these */ -bool debug_locks = true; -bool debug_locks_silent; - -__attribute__((destructor)) static void liblockdep_exit(void) -{ - debug_check_no_locks_held(); -} - -struct task_struct *__curr(void) -{ - if (current_obj.pid == 0) { - /* Makes lockdep output pretty */ - prctl(PR_GET_NAME, current_obj.comm); - current_obj.pid = syscall(__NR_gettid); - } - - return ¤t_obj; -} diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h deleted file mode 100644 index a6d7ee5f18ba..000000000000 --- a/tools/lib/lockdep/include/liblockdep/common.h +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_COMMON_H -#define _LIBLOCKDEP_COMMON_H - -#include - -#define NR_LOCKDEP_CACHING_CLASSES 2 -#define MAX_LOCKDEP_SUBCLASSES 8UL - -#ifndef CALLER_ADDR0 -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#endif - -#ifndef _RET_IP_ -#define _RET_IP_ CALLER_ADDR0 -#endif - -#ifndef _THIS_IP_ -#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) -#endif - -struct lockdep_subclass_key { - char __one_byte; -}; - -struct lock_class_key { - struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; -}; - -struct lockdep_map { - struct lock_class_key *key; - struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; - const char *name; -#ifdef CONFIG_LOCK_STAT - int cpu; - unsigned long ip; -#endif -}; - -void lockdep_init_map(struct lockdep_map *lock, const char *name, - struct lock_class_key *key, int subclass); -void lock_acquire(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *nest_lock, unsigned long ip); -void lock_release(struct lockdep_map *lock, unsigned long ip); -void lockdep_reset_lock(struct lockdep_map *lock); -void lockdep_register_key(struct lock_class_key *key); -void lockdep_unregister_key(struct lock_class_key *key); -extern void debug_check_no_locks_freed(const void *from, unsigned long len); - -#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ - { .name = (_name), .key = (void *)(_key), } - -#endif diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h deleted file mode 100644 index bd106b82759b..000000000000 --- a/tools/lib/lockdep/include/liblockdep/mutex.h +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_MUTEX_H -#define _LIBLOCKDEP_MUTEX_H - -#include -#include "common.h" - -struct liblockdep_pthread_mutex { - pthread_mutex_t mutex; - struct lock_class_key key; - struct lockdep_map dep_map; -}; - -typedef struct liblockdep_pthread_mutex liblockdep_pthread_mutex_t; - -#define LIBLOCKDEP_PTHREAD_MUTEX_INITIALIZER(mtx) \ - (const struct liblockdep_pthread_mutex) { \ - .mutex = PTHREAD_MUTEX_INITIALIZER, \ - .dep_map = STATIC_LOCKDEP_MAP_INIT(#mtx, &((&(mtx))->dep_map)), \ -} - -static inline int __mutex_init(liblockdep_pthread_mutex_t *lock, - const char *name, - struct lock_class_key *key, - const pthread_mutexattr_t *__mutexattr) -{ - lockdep_init_map(&lock->dep_map, name, key, 0); - return pthread_mutex_init(&lock->mutex, __mutexattr); -} - -#define liblockdep_pthread_mutex_init(mutex, mutexattr) \ -({ \ - lockdep_register_key(&(mutex)->key); \ - __mutex_init((mutex), #mutex, &(mutex)->key, (mutexattr)); \ -}) - -static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock) -{ - lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); - return pthread_mutex_lock(&lock->mutex); -} - -static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock) -{ - lock_release(&lock->dep_map, (unsigned long)_RET_IP_); - return pthread_mutex_unlock(&lock->mutex); -} - -static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock) -{ - lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); - return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0; -} - -static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock) -{ - lockdep_reset_lock(&lock->dep_map); - lockdep_unregister_key(&lock->key); - return pthread_mutex_destroy(&lock->mutex); -} - -#ifdef __USE_LIBLOCKDEP - -#define pthread_mutex_t liblockdep_pthread_mutex_t -#define pthread_mutex_init liblockdep_pthread_mutex_init -#define pthread_mutex_lock liblockdep_pthread_mutex_lock -#define pthread_mutex_unlock liblockdep_pthread_mutex_unlock -#define pthread_mutex_trylock liblockdep_pthread_mutex_trylock -#define pthread_mutex_destroy liblockdep_pthread_mutex_destroy - -#endif - -#endif diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h deleted file mode 100644 index 6d5d2932bf4d..000000000000 --- a/tools/lib/lockdep/include/liblockdep/rwlock.h +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_RWLOCK_H -#define _LIBLOCKDEP_RWLOCK_H - -#include -#include "common.h" - -struct liblockdep_pthread_rwlock { - pthread_rwlock_t rwlock; - struct lockdep_map dep_map; -}; - -typedef struct liblockdep_pthread_rwlock liblockdep_pthread_rwlock_t; - -#define LIBLOCKDEP_PTHREAD_RWLOCK_INITIALIZER(rwl) \ - (struct liblockdep_pthread_rwlock) { \ - .rwlock = PTHREAD_RWLOCK_INITIALIZER, \ - .dep_map = STATIC_LOCKDEP_MAP_INIT(#rwl, &((&(rwl))->dep_map)), \ -} - -static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock, - const char *name, - struct lock_class_key *key, - const pthread_rwlockattr_t *attr) -{ - lockdep_init_map(&lock->dep_map, name, key, 0); - - return pthread_rwlock_init(&lock->rwlock, attr); -} - -#define liblockdep_pthread_rwlock_init(lock, attr) \ -({ \ - static struct lock_class_key __key; \ - \ - __rwlock_init((lock), #lock, &__key, (attr)); \ -}) - -static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock) -{ - lock_acquire(&lock->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_); - return pthread_rwlock_rdlock(&lock->rwlock); - -} - -static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock) -{ - lock_release(&lock->dep_map, (unsigned long)_RET_IP_); - return pthread_rwlock_unlock(&lock->rwlock); -} - -static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock) -{ - lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); - return pthread_rwlock_wrlock(&lock->rwlock); -} - -static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock) -{ - lock_acquire(&lock->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_); - return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0; -} - -static inline int liblockdep_pthread_rwlock_trywrlock(liblockdep_pthread_rwlock_t *lock) -{ - lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); - return pthread_rwlock_trywrlock(&lock->rwlock) == 0 ? 1 : 0; -} - -static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock) -{ - return pthread_rwlock_destroy(&lock->rwlock); -} - -#ifdef __USE_LIBLOCKDEP - -#define pthread_rwlock_t liblockdep_pthread_rwlock_t -#define pthread_rwlock_init liblockdep_pthread_rwlock_init -#define pthread_rwlock_rdlock liblockdep_pthread_rwlock_rdlock -#define pthread_rwlock_unlock liblockdep_pthread_rwlock_unlock -#define pthread_rwlock_wrlock liblockdep_pthread_rwlock_wrlock -#define pthread_rwlock_tryrdlock liblockdep_pthread_rwlock_tryrdlock -#define pthread_rwlock_trywrlock liblockdep_pthread_rwlock_trywrlock -#define pthread_rwlock_destroy liblockdep_rwlock_destroy - -#endif - -#endif diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep deleted file mode 100755 index 49af9fe19f5b..000000000000 --- a/tools/lib/lockdep/lockdep +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -LD_PRELOAD="./liblockdep.so $LD_PRELOAD" "$@" diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c deleted file mode 100644 index 348a9d0fb766..000000000000 --- a/tools/lib/lockdep/lockdep.c +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -/* Trivial API wrappers, we don't (yet) have RCU in user-space: */ -#define hlist_for_each_entry_rcu hlist_for_each_entry -#define hlist_add_head_rcu hlist_add_head -#define hlist_del_rcu hlist_del -#define list_for_each_entry_rcu list_for_each_entry -#define list_add_tail_rcu list_add_tail - -u32 prandom_u32(void) -{ - /* Used only by lock_pin_lock() which is dead code */ - abort(); -} - -void print_irqtrace_events(struct task_struct *curr) -{ - abort(); -} - -static struct new_utsname *init_utsname(void) -{ - static struct new_utsname n = (struct new_utsname) { - .release = "liblockdep", - .version = LIBLOCKDEP_VERSION, - }; - - return &n; -} - -#include "../../../kernel/locking/lockdep.c" diff --git a/tools/lib/lockdep/lockdep_internals.h b/tools/lib/lockdep/lockdep_internals.h deleted file mode 100644 index 29d0c954cc24..000000000000 --- a/tools/lib/lockdep/lockdep_internals.h +++ /dev/null @@ -1 +0,0 @@ -#include "../../../kernel/locking/lockdep_internals.h" diff --git a/tools/lib/lockdep/lockdep_states.h b/tools/lib/lockdep/lockdep_states.h deleted file mode 100644 index 248d235efda9..000000000000 --- a/tools/lib/lockdep/lockdep_states.h +++ /dev/null @@ -1 +0,0 @@ -#include "../../../kernel/locking/lockdep_states.h" diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c deleted file mode 100644 index 8f1adbe887b2..000000000000 --- a/tools/lib/lockdep/preload.c +++ /dev/null @@ -1,443 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include "include/liblockdep/mutex.h" -#include "../../include/linux/rbtree.h" - -/** - * struct lock_lookup - liblockdep's view of a single unique lock - * @orig: pointer to the original pthread lock, used for lookups - * @dep_map: lockdep's dep_map structure - * @key: lockdep's key structure - * @node: rb-tree node used to store the lock in a global tree - * @name: a unique name for the lock - */ -struct lock_lookup { - void *orig; /* Original pthread lock, used for lookups */ - struct lockdep_map dep_map; /* Since all locks are dynamic, we need - * a dep_map and a key for each lock */ - /* - * Wait, there's no support for key classes? Yup :( - * Most big projects wrap the pthread api with their own calls to - * be compatible with different locking methods. This means that - * "classes" will be brokes since the function that creates all - * locks will point to a generic locking function instead of the - * actual code that wants to do the locking. - */ - struct lock_class_key key; - struct rb_node node; -#define LIBLOCKDEP_MAX_LOCK_NAME 22 - char name[LIBLOCKDEP_MAX_LOCK_NAME]; -}; - -/* This is where we store our locks */ -static struct rb_root locks = RB_ROOT; -static pthread_rwlock_t locks_rwlock = PTHREAD_RWLOCK_INITIALIZER; - -/* pthread mutex API */ - -#ifdef __GLIBC__ -extern int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr); -extern int __pthread_mutex_lock(pthread_mutex_t *mutex); -extern int __pthread_mutex_trylock(pthread_mutex_t *mutex); -extern int __pthread_mutex_unlock(pthread_mutex_t *mutex); -extern int __pthread_mutex_destroy(pthread_mutex_t *mutex); -#else -#define __pthread_mutex_init NULL -#define __pthread_mutex_lock NULL -#define __pthread_mutex_trylock NULL -#define __pthread_mutex_unlock NULL -#define __pthread_mutex_destroy NULL -#endif -static int (*ll_pthread_mutex_init)(pthread_mutex_t *mutex, - const pthread_mutexattr_t *attr) = __pthread_mutex_init; -static int (*ll_pthread_mutex_lock)(pthread_mutex_t *mutex) = __pthread_mutex_lock; -static int (*ll_pthread_mutex_trylock)(pthread_mutex_t *mutex) = __pthread_mutex_trylock; -static int (*ll_pthread_mutex_unlock)(pthread_mutex_t *mutex) = __pthread_mutex_unlock; -static int (*ll_pthread_mutex_destroy)(pthread_mutex_t *mutex) = __pthread_mutex_destroy; - -/* pthread rwlock API */ - -#ifdef __GLIBC__ -extern int __pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr); -extern int __pthread_rwlock_destroy(pthread_rwlock_t *rwlock); -extern int __pthread_rwlock_wrlock(pthread_rwlock_t *rwlock); -extern int __pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock); -extern int __pthread_rwlock_rdlock(pthread_rwlock_t *rwlock); -extern int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock); -extern int __pthread_rwlock_unlock(pthread_rwlock_t *rwlock); -#else -#define __pthread_rwlock_init NULL -#define __pthread_rwlock_destroy NULL -#define __pthread_rwlock_wrlock NULL -#define __pthread_rwlock_trywrlock NULL -#define __pthread_rwlock_rdlock NULL -#define __pthread_rwlock_tryrdlock NULL -#define __pthread_rwlock_unlock NULL -#endif - -static int (*ll_pthread_rwlock_init)(pthread_rwlock_t *rwlock, - const pthread_rwlockattr_t *attr) = __pthread_rwlock_init; -static int (*ll_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock) = __pthread_rwlock_destroy; -static int (*ll_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_rdlock; -static int (*ll_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_tryrdlock; -static int (*ll_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_trywrlock; -static int (*ll_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_wrlock; -static int (*ll_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_unlock; - -enum { none, prepare, done, } __init_state; -static void init_preload(void); -static void try_init_preload(void) -{ - if (__init_state != done) - init_preload(); -} - -static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent) -{ - struct rb_node **node = &locks.rb_node; - struct lock_lookup *l; - - *parent = NULL; - - while (*node) { - l = rb_entry(*node, struct lock_lookup, node); - - *parent = *node; - if (lock < l->orig) - node = &l->node.rb_left; - else if (lock > l->orig) - node = &l->node.rb_right; - else - return node; - } - - return node; -} - -#ifndef LIBLOCKDEP_STATIC_ENTRIES -#define LIBLOCKDEP_STATIC_ENTRIES 1024 -#endif - -static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES]; -static int __locks_nr; - -static inline bool is_static_lock(struct lock_lookup *lock) -{ - return lock >= __locks && lock < __locks + ARRAY_SIZE(__locks); -} - -static struct lock_lookup *alloc_lock(void) -{ - if (__init_state != done) { - /* - * Some programs attempt to initialize and use locks in their - * allocation path. This means that a call to malloc() would - * result in locks being initialized and locked. - * - * Why is it an issue for us? dlsym() below will try allocating - * to give us the original function. Since this allocation will - * result in a locking operations, we have to let pthread deal - * with it, but we can't! we don't have the pointer to the - * original API since we're inside dlsym() trying to get it - */ - - int idx = __locks_nr++; - if (idx >= ARRAY_SIZE(__locks)) { - dprintf(STDERR_FILENO, - "LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n"); - exit(EX_UNAVAILABLE); - } - return __locks + idx; - } - - return malloc(sizeof(struct lock_lookup)); -} - -static inline void free_lock(struct lock_lookup *lock) -{ - if (likely(!is_static_lock(lock))) - free(lock); -} - -/** - * __get_lock - find or create a lock instance - * @lock: pointer to a pthread lock function - * - * Try to find an existing lock in the rbtree using the provided pointer. If - * one wasn't found - create it. - */ -static struct lock_lookup *__get_lock(void *lock) -{ - struct rb_node **node, *parent; - struct lock_lookup *l; - - ll_pthread_rwlock_rdlock(&locks_rwlock); - node = __get_lock_node(lock, &parent); - ll_pthread_rwlock_unlock(&locks_rwlock); - if (*node) { - return rb_entry(*node, struct lock_lookup, node); - } - - /* We didn't find the lock, let's create it */ - l = alloc_lock(); - if (l == NULL) - return NULL; - - l->orig = lock; - /* - * Currently the name of the lock is the ptr value of the pthread lock, - * while not optimal, it makes debugging a bit easier. - * - * TODO: Get the real name of the lock using libdwarf - */ - sprintf(l->name, "%p", lock); - lockdep_init_map(&l->dep_map, l->name, &l->key, 0); - - ll_pthread_rwlock_wrlock(&locks_rwlock); - /* This might have changed since the last time we fetched it */ - node = __get_lock_node(lock, &parent); - rb_link_node(&l->node, parent, node); - rb_insert_color(&l->node, &locks); - ll_pthread_rwlock_unlock(&locks_rwlock); - - return l; -} - -static void __del_lock(struct lock_lookup *lock) -{ - ll_pthread_rwlock_wrlock(&locks_rwlock); - rb_erase(&lock->node, &locks); - ll_pthread_rwlock_unlock(&locks_rwlock); - free_lock(lock); -} - -int pthread_mutex_init(pthread_mutex_t *mutex, - const pthread_mutexattr_t *attr) -{ - int r; - - /* - * We keep trying to init our preload module because there might be - * code in init sections that tries to touch locks before we are - * initialized, in that case we'll need to manually call preload - * to get us going. - * - * Funny enough, kernel's lockdep had the same issue, and used - * (almost) the same solution. See look_up_lock_class() in - * kernel/locking/lockdep.c for details. - */ - try_init_preload(); - - r = ll_pthread_mutex_init(mutex, attr); - if (r == 0) - /* - * We do a dummy initialization here so that lockdep could - * warn us if something fishy is going on - such as - * initializing a held lock. - */ - __get_lock(mutex); - - return r; -} - -int pthread_mutex_lock(pthread_mutex_t *mutex) -{ - int r; - - try_init_preload(); - - lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, - (unsigned long)_RET_IP_); - /* - * Here's the thing with pthread mutexes: unlike the kernel variant, - * they can fail. - * - * This means that the behaviour here is a bit different from what's - * going on in the kernel: there we just tell lockdep that we took the - * lock before actually taking it, but here we must deal with the case - * that locking failed. - * - * To do that we'll "release" the lock if locking failed - this way - * we'll get lockdep doing the correct checks when we try to take - * the lock, and if that fails - we'll be back to the correct - * state by releasing it. - */ - r = ll_pthread_mutex_lock(mutex); - if (r) - lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_); - - return r; -} - -int pthread_mutex_trylock(pthread_mutex_t *mutex) -{ - int r; - - try_init_preload(); - - lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); - r = ll_pthread_mutex_trylock(mutex); - if (r) - lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_); - - return r; -} - -int pthread_mutex_unlock(pthread_mutex_t *mutex) -{ - int r; - - try_init_preload(); - - lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_); - /* - * Just like taking a lock, only in reverse! - * - * If we fail releasing the lock, tell lockdep we're holding it again. - */ - r = ll_pthread_mutex_unlock(mutex); - if (r) - lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); - - return r; -} - -int pthread_mutex_destroy(pthread_mutex_t *mutex) -{ - try_init_preload(); - - /* - * Let's see if we're releasing a lock that's held. - * - * TODO: Hook into free() and add that check there as well. - */ - debug_check_no_locks_freed(mutex, sizeof(*mutex)); - __del_lock(__get_lock(mutex)); - return ll_pthread_mutex_destroy(mutex); -} - -/* This is the rwlock part, very similar to what happened with mutex above */ -int pthread_rwlock_init(pthread_rwlock_t *rwlock, - const pthread_rwlockattr_t *attr) -{ - int r; - - try_init_preload(); - - r = ll_pthread_rwlock_init(rwlock, attr); - if (r == 0) - __get_lock(rwlock); - - return r; -} - -int pthread_rwlock_destroy(pthread_rwlock_t *rwlock) -{ - try_init_preload(); - - debug_check_no_locks_freed(rwlock, sizeof(*rwlock)); - __del_lock(__get_lock(rwlock)); - return ll_pthread_rwlock_destroy(rwlock); -} - -int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock) -{ - int r; - - init_preload(); - - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_); - r = ll_pthread_rwlock_rdlock(rwlock); - if (r) - lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_); - - return r; -} - -int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock) -{ - int r; - - init_preload(); - - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_); - r = ll_pthread_rwlock_tryrdlock(rwlock); - if (r) - lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_); - - return r; -} - -int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock) -{ - int r; - - init_preload(); - - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); - r = ll_pthread_rwlock_trywrlock(rwlock); - if (r) - lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_); - - return r; -} - -int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock) -{ - int r; - - init_preload(); - - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); - r = ll_pthread_rwlock_wrlock(rwlock); - if (r) - lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_); - - return r; -} - -int pthread_rwlock_unlock(pthread_rwlock_t *rwlock) -{ - int r; - - init_preload(); - - lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_); - r = ll_pthread_rwlock_unlock(rwlock); - if (r) - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); - - return r; -} - -__attribute__((constructor)) static void init_preload(void) -{ - if (__init_state == done) - return; - -#ifndef __GLIBC__ - __init_state = prepare; - - ll_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init"); - ll_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock"); - ll_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock"); - ll_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock"); - ll_pthread_mutex_destroy = dlsym(RTLD_NEXT, "pthread_mutex_destroy"); - - ll_pthread_rwlock_init = dlsym(RTLD_NEXT, "pthread_rwlock_init"); - ll_pthread_rwlock_destroy = dlsym(RTLD_NEXT, "pthread_rwlock_destroy"); - ll_pthread_rwlock_rdlock = dlsym(RTLD_NEXT, "pthread_rwlock_rdlock"); - ll_pthread_rwlock_tryrdlock = dlsym(RTLD_NEXT, "pthread_rwlock_tryrdlock"); - ll_pthread_rwlock_wrlock = dlsym(RTLD_NEXT, "pthread_rwlock_wrlock"); - ll_pthread_rwlock_trywrlock = dlsym(RTLD_NEXT, "pthread_rwlock_trywrlock"); - ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock"); -#endif - - __init_state = done; -} diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c deleted file mode 100644 index 297c304571f8..000000000000 --- a/tools/lib/lockdep/rbtree.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../lib/rbtree.c" diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh deleted file mode 100755 index 11f425662b43..000000000000 --- a/tools/lib/lockdep/run_tests.sh +++ /dev/null @@ -1,47 +0,0 @@ -#! /bin/bash -# SPDX-License-Identifier: GPL-2.0 - -if ! make >/dev/null; then - echo "Building liblockdep failed." - echo "FAILED!" - exit 1 -fi - -find tests -name '*.c' | sort | while read -r i; do - testname=$(basename "$i" .c) - echo -ne "$testname... " - if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude -D__USE_LIBLOCKDEP && - timeout 1 "tests/$testname" 2>&1 | /bin/bash "tests/${testname}.sh"; then - echo "PASSED!" - else - echo "FAILED!" - fi - rm -f "tests/$testname" -done - -find tests -name '*.c' | sort | while read -r i; do - testname=$(basename "$i" .c) - echo -ne "(PRELOAD) $testname... " - if gcc -o "tests/$testname" -pthread -Iinclude "$i" && - timeout 1 ./lockdep "tests/$testname" 2>&1 | - /bin/bash "tests/${testname}.sh"; then - echo "PASSED!" - else - echo "FAILED!" - fi - rm -f "tests/$testname" -done - -find tests -name '*.c' | sort | while read -r i; do - testname=$(basename "$i" .c) - echo -ne "(PRELOAD + Valgrind) $testname... " - if gcc -o "tests/$testname" -pthread -Iinclude "$i" && - { timeout 10 valgrind --read-var-info=yes ./lockdep "./tests/$testname" >& "tests/${testname}.vg.out"; true; } && - /bin/bash "tests/${testname}.sh" < "tests/${testname}.vg.out" && - ! grep -Eq '(^==[0-9]*== (Invalid |Uninitialised ))|Mismatched free|Source and destination overlap| UME ' "tests/${testname}.vg.out"; then - echo "PASSED!" - else - echo "FAILED!" - fi - rm -f "tests/$testname" -done diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c deleted file mode 100644 index 63c7ce97bda3..000000000000 --- a/tools/lib/lockdep/tests/AA.c +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -int main(void) -{ - pthread_mutex_t a; - - pthread_mutex_init(&a, NULL); - - pthread_mutex_lock(&a); - pthread_mutex_lock(&a); - - return 0; -} diff --git a/tools/lib/lockdep/tests/AA.sh b/tools/lib/lockdep/tests/AA.sh deleted file mode 100644 index f39b32865074..000000000000 --- a/tools/lib/lockdep/tests/AA.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible recursive locking detected' diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c deleted file mode 100644 index efa39b23f05d..000000000000 --- a/tools/lib/lockdep/tests/ABA.c +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -void main(void) -{ - pthread_mutex_t a, b; - - pthread_mutex_init(&a, NULL); - pthread_mutex_init(&b, NULL); - - pthread_mutex_lock(&a); - pthread_mutex_lock(&b); - pthread_mutex_lock(&a); -} diff --git a/tools/lib/lockdep/tests/ABA.sh b/tools/lib/lockdep/tests/ABA.sh deleted file mode 100644 index f39b32865074..000000000000 --- a/tools/lib/lockdep/tests/ABA.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible recursive locking detected' diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c deleted file mode 100644 index 543789bc3e37..000000000000 --- a/tools/lib/lockdep/tests/ABBA.c +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "common.h" - -void main(void) -{ - pthread_mutex_t a, b; - - pthread_mutex_init(&a, NULL); - pthread_mutex_init(&b, NULL); - - LOCK_UNLOCK_2(a, b); - LOCK_UNLOCK_2(b, a); - - pthread_mutex_destroy(&b); - pthread_mutex_destroy(&a); - - pthread_mutex_init(&a, NULL); - pthread_mutex_init(&b, NULL); - - LOCK_UNLOCK_2(a, b); - LOCK_UNLOCK_2(b, a); - - pthread_mutex_destroy(&b); - pthread_mutex_destroy(&a); -} diff --git a/tools/lib/lockdep/tests/ABBA.sh b/tools/lib/lockdep/tests/ABBA.sh deleted file mode 100644 index fc31c607a5a8..000000000000 --- a/tools/lib/lockdep/tests/ABBA.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c deleted file mode 100644 index 39325ef8a2ac..000000000000 --- a/tools/lib/lockdep/tests/ABBA_2threads.c +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER; -pthread_barrier_t bar; - -void *ba_lock(void *arg) -{ - int ret, i; - - pthread_mutex_lock(&b); - - if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD) - pthread_barrier_destroy(&bar); - - pthread_mutex_lock(&a); - - pthread_mutex_unlock(&a); - pthread_mutex_unlock(&b); -} - -int main(void) -{ - pthread_t t; - - pthread_barrier_init(&bar, NULL, 2); - - if (pthread_create(&t, NULL, ba_lock, NULL)) { - fprintf(stderr, "pthread_create() failed\n"); - return 1; - } - pthread_mutex_lock(&a); - - if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD) - pthread_barrier_destroy(&bar); - - pthread_mutex_lock(&b); - - pthread_mutex_unlock(&b); - pthread_mutex_unlock(&a); - - pthread_join(t, NULL); - - return 0; -} diff --git a/tools/lib/lockdep/tests/ABBA_2threads.sh b/tools/lib/lockdep/tests/ABBA_2threads.sh deleted file mode 100644 index fc31c607a5a8..000000000000 --- a/tools/lib/lockdep/tests/ABBA_2threads.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c deleted file mode 100644 index 48446129d496..000000000000 --- a/tools/lib/lockdep/tests/ABBCCA.c +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "common.h" - -void main(void) -{ - pthread_mutex_t a, b, c; - - pthread_mutex_init(&a, NULL); - pthread_mutex_init(&b, NULL); - pthread_mutex_init(&c, NULL); - - LOCK_UNLOCK_2(a, b); - LOCK_UNLOCK_2(b, c); - LOCK_UNLOCK_2(c, a); - - pthread_mutex_destroy(&c); - pthread_mutex_destroy(&b); - pthread_mutex_destroy(&a); -} diff --git a/tools/lib/lockdep/tests/ABBCCA.sh b/tools/lib/lockdep/tests/ABBCCA.sh deleted file mode 100644 index fc31c607a5a8..000000000000 --- a/tools/lib/lockdep/tests/ABBCCA.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c deleted file mode 100644 index 3570bf7b3804..000000000000 --- a/tools/lib/lockdep/tests/ABBCCDDA.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "common.h" - -void main(void) -{ - pthread_mutex_t a, b, c, d; - - pthread_mutex_init(&a, NULL); - pthread_mutex_init(&b, NULL); - pthread_mutex_init(&c, NULL); - pthread_mutex_init(&d, NULL); - - LOCK_UNLOCK_2(a, b); - LOCK_UNLOCK_2(b, c); - LOCK_UNLOCK_2(c, d); - LOCK_UNLOCK_2(d, a); - - pthread_mutex_destroy(&d); - pthread_mutex_destroy(&c); - pthread_mutex_destroy(&b); - pthread_mutex_destroy(&a); -} diff --git a/tools/lib/lockdep/tests/ABBCCDDA.sh b/tools/lib/lockdep/tests/ABBCCDDA.sh deleted file mode 100644 index fc31c607a5a8..000000000000 --- a/tools/lib/lockdep/tests/ABBCCDDA.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c deleted file mode 100644 index a1c4659894cd..000000000000 --- a/tools/lib/lockdep/tests/ABCABC.c +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "common.h" - -void main(void) -{ - pthread_mutex_t a, b, c; - - pthread_mutex_init(&a, NULL); - pthread_mutex_init(&b, NULL); - pthread_mutex_init(&c, NULL); - - LOCK_UNLOCK_2(a, b); - LOCK_UNLOCK_2(c, a); - LOCK_UNLOCK_2(b, c); - - pthread_mutex_destroy(&c); - pthread_mutex_destroy(&b); - pthread_mutex_destroy(&a); -} diff --git a/tools/lib/lockdep/tests/ABCABC.sh b/tools/lib/lockdep/tests/ABCABC.sh deleted file mode 100644 index fc31c607a5a8..000000000000 --- a/tools/lib/lockdep/tests/ABCABC.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c deleted file mode 100644 index 335af1c90ab5..000000000000 --- a/tools/lib/lockdep/tests/ABCDBCDA.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "common.h" - -void main(void) -{ - pthread_mutex_t a, b, c, d; - - pthread_mutex_init(&a, NULL); - pthread_mutex_init(&b, NULL); - pthread_mutex_init(&c, NULL); - pthread_mutex_init(&d, NULL); - - LOCK_UNLOCK_2(a, b); - LOCK_UNLOCK_2(c, d); - LOCK_UNLOCK_2(b, c); - LOCK_UNLOCK_2(d, a); - - pthread_mutex_destroy(&d); - pthread_mutex_destroy(&c); - pthread_mutex_destroy(&b); - pthread_mutex_destroy(&a); -} diff --git a/tools/lib/lockdep/tests/ABCDBCDA.sh b/tools/lib/lockdep/tests/ABCDBCDA.sh deleted file mode 100644 index fc31c607a5a8..000000000000 --- a/tools/lib/lockdep/tests/ABCDBCDA.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c deleted file mode 100644 index 3c5972863049..000000000000 --- a/tools/lib/lockdep/tests/ABCDBDDA.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "common.h" - -void main(void) -{ - pthread_mutex_t a, b, c, d; - - pthread_mutex_init(&a, NULL); - pthread_mutex_init(&b, NULL); - pthread_mutex_init(&c, NULL); - pthread_mutex_init(&d, NULL); - - LOCK_UNLOCK_2(a, b); - LOCK_UNLOCK_2(c, d); - LOCK_UNLOCK_2(b, d); - LOCK_UNLOCK_2(d, a); - - pthread_mutex_destroy(&d); - pthread_mutex_destroy(&c); - pthread_mutex_destroy(&b); - pthread_mutex_destroy(&a); -} diff --git a/tools/lib/lockdep/tests/ABCDBDDA.sh b/tools/lib/lockdep/tests/ABCDBDDA.sh deleted file mode 100644 index fc31c607a5a8..000000000000 --- a/tools/lib/lockdep/tests/ABCDBDDA.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/WW.c b/tools/lib/lockdep/tests/WW.c deleted file mode 100644 index eee88df7fc41..000000000000 --- a/tools/lib/lockdep/tests/WW.c +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -void main(void) -{ - pthread_rwlock_t a, b; - - pthread_rwlock_init(&a, NULL); - pthread_rwlock_init(&b, NULL); - - pthread_rwlock_wrlock(&a); - pthread_rwlock_rdlock(&b); - pthread_rwlock_wrlock(&a); -} diff --git a/tools/lib/lockdep/tests/WW.sh b/tools/lib/lockdep/tests/WW.sh deleted file mode 100644 index f39b32865074..000000000000 --- a/tools/lib/lockdep/tests/WW.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: possible recursive locking detected' diff --git a/tools/lib/lockdep/tests/common.h b/tools/lib/lockdep/tests/common.h deleted file mode 100644 index 3026c29ccb5c..000000000000 --- a/tools/lib/lockdep/tests/common.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_TEST_COMMON_H -#define _LIBLOCKDEP_TEST_COMMON_H - -#define LOCK_UNLOCK_2(a, b) \ - do { \ - pthread_mutex_lock(&(a)); \ - pthread_mutex_lock(&(b)); \ - pthread_mutex_unlock(&(b)); \ - pthread_mutex_unlock(&(a)); \ - } while(0) - -#endif diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c deleted file mode 100644 index dba25064b50a..000000000000 --- a/tools/lib/lockdep/tests/unlock_balance.c +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -void main(void) -{ - pthread_mutex_t a; - - pthread_mutex_init(&a, NULL); - - pthread_mutex_lock(&a); - pthread_mutex_unlock(&a); - pthread_mutex_unlock(&a); - - pthread_mutex_destroy(&a); -} diff --git a/tools/lib/lockdep/tests/unlock_balance.sh b/tools/lib/lockdep/tests/unlock_balance.sh deleted file mode 100644 index c6e3952303fe..000000000000 --- a/tools/lib/lockdep/tests/unlock_balance.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -grep -q 'WARNING: bad unlock balance detected'