Merge pull request #25439 from NeQuissimus/linux_4_11_cgroups

linux: Add cgroups patches for 4.9, 4.10, 4.11
author: Michael Raskin <7c6f434c@mail.ru> 2017-05-02 15:03:42 +0200
committer: GitHub <noreply@github.com> 2017-05-02 15:03:42 +0200
commit: a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67 (patch)
tree: 5258ac5985cec4ef4425c0ad9bc9045dc206bf0b /pkgs
parent: cac205c25223f1839c453a2329334aed9f3920e1 (diff)
parent: b5169fd2771d60a359af7263d84453adc0b5dbfb (diff)
download: nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar
nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.gz
nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.bz2
nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.lz
nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.xz
nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.zst
nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.zip
6 files changed, 2355 insertions, 824 deletions
diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.10.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.10.patch
new file mode 100644
index 000000000000..21040239a2b7
--- /dev/null
+++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.10.patch
@@ -0,0 +1,784 @@
+commit d0273888226b264d34795970c073d6e935d5114f
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Misc preps for cgroup unified hierarchy interface
+    
+    Make the following changes in preparation for the cpu controller
+    interface implementation for the unified hierarchy.  This patch
+    doesn't cause any functional differences.
+    
+    * s/cpu_stats_show()/cpu_cfs_stats_show()/
+    
+    * s/cpu_files/cpu_legacy_files/
+    
+    * Separate out cpuacct_stats_read() from cpuacct_stats_show().  While
+      at it, make the @val array u64 for consistency.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index c56fb57f2991..112037890e9b 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8724,7 +8724,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
+ 	return ret;
+ }
+ 
+-static int cpu_stats_show(struct seq_file *sf, void *v)
++static int cpu_cfs_stats_show(struct seq_file *sf, void *v)
+ {
+ 	struct task_group *tg = css_tg(seq_css(sf));
+ 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+@@ -8764,7 +8764,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ }
+ #endif /* CONFIG_RT_GROUP_SCHED */
+ 
+-static struct cftype cpu_files[] = {
++static struct cftype cpu_legacy_files[] = {
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	{
+ 		.name = "shares",
+@@ -8785,7 +8785,7 @@ static struct cftype cpu_files[] = {
+ 	},
+ 	{
+ 		.name = "stat",
+-		.seq_show = cpu_stats_show,
++		.seq_show = cpu_cfs_stats_show,
+ 	},
+ #endif
+ #ifdef CONFIG_RT_GROUP_SCHED
+@@ -8810,7 +8810,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.fork		= cpu_cgroup_fork,
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+-	.legacy_cftypes	= cpu_files,
++	.legacy_cftypes	= cpu_legacy_files,
+ 	.early_init	= true,
+ };
+ 
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 9add206b5608..4dd7b8588b69 100644
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -276,26 +276,33 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V)
+ 	return 0;
+ }
+ 
+-static int cpuacct_stats_show(struct seq_file *sf, void *v)
++static void cpuacct_stats_read(struct cpuacct *ca,
++			       u64 (*val)[CPUACCT_STAT_NSTATS])
+ {
+-	struct cpuacct *ca = css_ca(seq_css(sf));
+-	s64 val[CPUACCT_STAT_NSTATS];
+ 	int cpu;
+-	int stat;
+ 
+-	memset(val, 0, sizeof(val));
++	memset(val, 0, sizeof(*val));
++
+ 	for_each_possible_cpu(cpu) {
+ 		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
+ 
+-		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
+-		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
+-		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
+-		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
+-		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
++		(*val)[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
++		(*val)[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
++		(*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
++		(*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
++		(*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
+ 	}
++}
++
++static int cpuacct_stats_show(struct seq_file *sf, void *v)
++{
++	u64 val[CPUACCT_STAT_NSTATS];
++	int stat;
++
++	cpuacct_stats_read(css_ca(seq_css(sf)), &val);
+ 
+ 	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
+-		seq_printf(sf, "%s %lld\n",
++		seq_printf(sf, "%s %llu\n",
+ 			   cpuacct_stat_desc[stat],
+ 			   (long long)cputime64_to_clock_t(val[stat]));
+ 	}
+
+commit 41103511fa43b4aa04fea259c5c60fef752ddefb
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Implement interface for cgroup unified hierarchy
+    
+    While the cpu controller doesn't have any functional problems, there
+    are a couple interface issues which can be addressed in the v2
+    interface.
+    
+    * cpuacct being a separate controller.  This separation is artificial
+      and rather pointless as demonstrated by most use cases co-mounting
+      the two controllers.  It also forces certain information to be
+      accounted twice.
+    
+    * Use of different time units.  Writable control knobs use
+      microseconds, some stat fields use nanoseconds while other cpuacct
+      stat fields use centiseconds.
+    
+    * Control knobs which can't be used in the root cgroup still show up
+      in the root.
+    
+    * Control knob names and semantics aren't consistent with other
+      controllers.
+    
+    This patchset implements cpu controller's interface on the unified
+    hierarchy which adheres to the controller file conventions described
+    in Documentation/cgroups/unified-hierarchy.txt.  Overall, the
+    following changes are made.
+    
+    * cpuacct is implictly enabled and disabled by cpu and its information
+      is reported through "cpu.stat" which now uses microseconds for all
+      time durations.  All time duration fields now have "_usec" appended
+      to them for clarity.  While this doesn't solve the double accounting
+      immediately, once majority of users switch to v2, cpu can directly
+      account and report the relevant stats and cpuacct can be disabled on
+      the unified hierarchy.
+    
+      Note that cpuacct.usage_percpu is currently not included in
+      "cpu.stat".  If this information is actually called for, it can be
+      added later.
+    
+    * "cpu.shares" is replaced with "cpu.weight" and operates on the
+      standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000).
+      The weight is scaled to scheduler weight so that 100 maps to 1024
+      and the ratio relationship is preserved - if weight is W and its
+      scaled value is S, W / 100 == S / 1024.  While the mapped range is a
+      bit smaller than the orignal scheduler weight range, the dead zones
+      on both sides are relatively small and covers wider range than the
+      nice value mappings.  This file doesn't make sense in the root
+      cgroup and isn't create on root.
+    
+    * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max"
+      which contains both quota and period.
+    
+    * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by
+      "cpu.rt.max" which contains both runtime and period.
+    
+    v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for
+        CFS bandwidth stats and also using raw division for u64.  Use
+        CONFIG_CFS_BANDWITH and do_div() instead.
+    
+        The semantics of "cpu.rt.max" is not fully decided yet.  Dropped
+        for now.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 112037890e9b..a80d586a4317 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8803,6 +8803,139 @@ static struct cftype cpu_legacy_files[] = {
+ 	{ }	/* terminate */
+ };
+ 
++static int cpu_stats_show(struct seq_file *sf, void *v)
++{
++	cpuacct_cpu_stats_show(sf);
++
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		struct task_group *tg = css_tg(seq_css(sf));
++		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
++		u64 throttled_usec;
++
++		throttled_usec = cfs_b->throttled_time;
++		do_div(throttled_usec, NSEC_PER_USEC);
++
++		seq_printf(sf, "nr_periods %d\n"
++			   "nr_throttled %d\n"
++			   "throttled_usec %llu\n",
++			   cfs_b->nr_periods, cfs_b->nr_throttled,
++			   throttled_usec);
++	}
++#endif
++	return 0;
++}
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
++			       struct cftype *cft)
++{
++	struct task_group *tg = css_tg(css);
++	u64 weight = scale_load_down(tg->shares);
++
++	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
++}
++
++static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
++				struct cftype *cftype, u64 weight)
++{
++	/*
++	 * cgroup weight knobs should use the common MIN, DFL and MAX
++	 * values which are 1, 100 and 10000 respectively.  While it loses
++	 * a bit of range on both ends, it maps pretty well onto the shares
++	 * value used by scheduler and the round-trip conversions preserve
++	 * the original value over the entire range.
++	 */
++	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
++		return -ERANGE;
++
++	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
++
++	return sched_group_set_shares(css_tg(css), scale_load(weight));
++}
++#endif
++
++static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
++						  long period, long quota)
++{
++	if (quota < 0)
++		seq_puts(sf, "max");
++	else
++		seq_printf(sf, "%ld", quota);
++
++	seq_printf(sf, " %ld\n", period);
++}
++
++/* caller should put the current value in *@periodp before calling */
++static int __maybe_unused cpu_period_quota_parse(char *buf,
++						 u64 *periodp, u64 *quotap)
++{
++	char tok[21];	/* U64_MAX */
++
++	if (!sscanf(buf, "%s %llu", tok, periodp))
++		return -EINVAL;
++
++	*periodp *= NSEC_PER_USEC;
++
++	if (sscanf(tok, "%llu", quotap))
++		*quotap *= NSEC_PER_USEC;
++	else if (!strcmp(tok, "max"))
++		*quotap = RUNTIME_INF;
++	else
++		return -EINVAL;
++
++	return 0;
++}
++
++#ifdef CONFIG_CFS_BANDWIDTH
++static int cpu_max_show(struct seq_file *sf, void *v)
++{
++	struct task_group *tg = css_tg(seq_css(sf));
++
++	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
++	return 0;
++}
++
++static ssize_t cpu_max_write(struct kernfs_open_file *of,
++			     char *buf, size_t nbytes, loff_t off)
++{
++	struct task_group *tg = css_tg(of_css(of));
++	u64 period = tg_get_cfs_period(tg);
++	u64 quota;
++	int ret;
++
++	ret = cpu_period_quota_parse(buf, &period, &quota);
++	if (!ret)
++		ret = tg_set_cfs_bandwidth(tg, period, quota);
++	return ret ?: nbytes;
++}
++#endif
++
++static struct cftype cpu_files[] = {
++	{
++		.name = "stat",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = cpu_stats_show,
++	},
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	{
++		.name = "weight",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.read_u64 = cpu_weight_read_u64,
++		.write_u64 = cpu_weight_write_u64,
++	},
++#endif
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		.name = "max",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = cpu_max_show,
++		.write = cpu_max_write,
++	},
++#endif
++	{ }	/* terminate */
++};
++
+ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.css_alloc	= cpu_cgroup_css_alloc,
+ 	.css_released	= cpu_cgroup_css_released,
+@@ -8811,7 +8944,15 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+ 	.legacy_cftypes	= cpu_legacy_files,
++	.dfl_cftypes	= cpu_files,
+ 	.early_init	= true,
++#ifdef CONFIG_CGROUP_CPUACCT
++	/*
++	 * cpuacct is enabled together with cpu on the unified hierarchy
++	 * and its stats are reported through "cpu.stat".
++	 */
++	.depends_on	= 1 << cpuacct_cgrp_id,
++#endif
+ };
+ 
+ #endif	/* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 4dd7b8588b69..97c6dd7d8f59 100644
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -347,6 +347,31 @@ static struct cftype files[] = {
+ 	{ }	/* terminate */
+ };
+ 
++/* used to print cpuacct stats in cpu.stat on the unified hierarchy */
++void cpuacct_cpu_stats_show(struct seq_file *sf)
++{
++	struct cgroup_subsys_state *css;
++	u64 usage, val[CPUACCT_STAT_NSTATS];
++
++	css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys);
++
++	usage = cpuusage_read(css, seq_cft(sf));
++	cpuacct_stats_read(css_ca(css), &val);
++
++	val[CPUACCT_STAT_USER] *= TICK_NSEC;
++	val[CPUACCT_STAT_SYSTEM] *= TICK_NSEC;
++	do_div(usage, NSEC_PER_USEC);
++	do_div(val[CPUACCT_STAT_USER], NSEC_PER_USEC);
++	do_div(val[CPUACCT_STAT_SYSTEM], NSEC_PER_USEC);
++
++	seq_printf(sf, "usage_usec %llu\n"
++		   "user_usec %llu\n"
++		   "system_usec %llu\n",
++		   usage, val[CPUACCT_STAT_USER], val[CPUACCT_STAT_SYSTEM]);
++
++	css_put(css);
++}
++
+ /*
+  * charge this task's execution time to its accounting group.
+  *
+diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
+index ba72807c73d4..ddf7af466d35 100644
+--- a/kernel/sched/cpuacct.h
++++ b/kernel/sched/cpuacct.h
+@@ -2,6 +2,7 @@
+ 
+ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+ extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
++extern void cpuacct_cpu_stats_show(struct seq_file *sf);
+ 
+ #else
+ 
+@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
+ {
+ }
+ 
++static inline void cpuacct_cpu_stats_show(struct seq_file *sf)
++{
++}
++
+ #endif
+
+commit 2dae6b0ec091c93131e02eb56987ec6c26818f42
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Aug 5 12:41:01 2016 -0400
+
+    cgroup: add documentation regarding CPU controller cgroup v2 support
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+
+diff --git a/Documentation/cgroup-v2-cpu.txt b/Documentation/cgroup-v2-cpu.txt
+new file mode 100644
+index 000000000000..1ed7032d4472
+--- /dev/null
++++ b/Documentation/cgroup-v2-cpu.txt
+@@ -0,0 +1,368 @@
++
++
++CPU Controller on Control Group v2
++
++August, 2016		Tejun Heo <tj@kernel.org>
++
++
++While most controllers have support for cgroup v2 now, the CPU
++controller support is not upstream yet due to objections from the
++scheduler maintainers on the basic designs of cgroup v2.  This
++document explains the current situation as well as an interim
++solution, and details the disagreements and arguments.  The latest
++version of this document can be found at the following URL.
++
++ https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu
++
++This document was posted to the linux-kernel and cgroup mailing lists.
++Unfortunately, no consensus was reached as of Oct, 2016.  The thread
++can be found at the following URL.
++
++ http://lkml.kernel.org/r/20160805170752.GK2542@mtj.duckdns.org
++
++
++CONTENTS
++
++1. Current Situation and Interim Solution
++2. Disagreements and Arguments
++  2-1. Contentious Restrictions
++    2-1-1. Process Granularity
++    2-1-2. No Internal Process Constraint
++  2-2. Impact on CPU Controller
++    2-2-1. Impact of Process Granularity
++    2-2-2. Impact of No Internal Process Constraint
++  2-3. Arguments for cgroup v2
++3. Way Forward
++4. References
++
++
++1. Current Situation and Interim Solution
++
++All objections from the scheduler maintainers apply to cgroup v2 core
++design, and there are no known objections to the specifics of the CPU
++controller cgroup v2 interface.  The only blocked part is changes to
++expose the CPU controller interface on cgroup v2, which comprises the
++following two patches:
++
++ [1] sched: Misc preps for cgroup unified hierarchy interface
++ [2] sched: Implement interface for cgroup unified hierarchy
++
++The necessary changes are superficial and implement the interface
++files on cgroup v2.  The combined diffstat is as follows.
++
++ kernel/sched/core.c    |  149 +++++++++++++++++++++++++++++++++++++++++++++++--
++ kernel/sched/cpuacct.c |   57 ++++++++++++------
++ kernel/sched/cpuacct.h |    5 +
++ 3 files changed, 189 insertions(+), 22 deletions(-)
++
++The patches are easy to apply and forward-port.  The following git
++branch will always carry the two patches on top of the latest release
++of the upstream kernel.
++
++ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu
++
++There also are versioned branches going back to v4.4.
++
++ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu-$KERNEL_VER
++
++While it's difficult to tell whether the CPU controller support will
++be merged, there are crucial resource control features in cgroup v2
++that are only possible due to the design choices that are being
++objected to, and every effort will be made to ease enabling the CPU
++controller cgroup v2 support out-of-tree for parties which choose to.
++
++
++2. Disagreements and Arguments
++
++There have been several lengthy discussion threads [3][4] on LKML
++around the structural constraints of cgroup v2.  The two that affect
++the CPU controller are process granularity and no internal process
++constraint.  Both arise primarily from the need for common resource
++domain definition across different resources.
++
++The common resource domain is a powerful concept in cgroup v2 that
++allows controllers to make basic assumptions about the structural
++organization of processes and controllers inside the cgroup hierarchy,
++and thus solve problems spanning multiple types of resources.  The
++prime example for this is page cache writeback: dirty page cache is
++regulated through throttling buffered writers based on memory
++availability, and initiating batched write outs to the disk based on
++IO capacity.  Tracking and controlling writeback inside a cgroup thus
++requires the direct cooperation of the memory and the IO controller.
++
++This easily extends to other areas, such as CPU cycles consumed while
++performing memory reclaim or IO encryption.
++
++
++2-1. Contentious Restrictions
++
++For controllers of different resources to work together, they must
++agree on a common organization.  This uniform model across controllers
++imposes two contentious restrictions on the CPU controller: process
++granularity and the no-internal-process constraint.
++
++
++  2-1-1. Process Granularity
++
++  For memory, because an address space is shared between all threads
++  of a process, the terminal consumer is a process, not a thread.
++  Separating the threads of a single process into different memory
++  control domains doesn't make semantical sense.  cgroup v2 ensures
++  that all controller can agree on the same organization by requiring
++  that threads of the same process belong to the same cgroup.
++
++  There are other reasons to enforce process granularity.  One
++  important one is isolating system-level management operations from
++  in-process application operations.  The cgroup interface, being a
++  virtual filesystem, is very unfit for multiple independent
++  operations taking place at the same time as most operations have to
++  be multi-step and there is no way to synchronize multiple accessors.
++  See also [5] Documentation/cgroup-v2.txt, "R-2. Thread Granularity"
++
++
++  2-1-2. No Internal Process Constraint
++
++  cgroup v2 does not allow processes to belong to any cgroup which has
++  child cgroups when resource controllers are enabled on it (the
++  notable exception being the root cgroup itself).  This is because,
++  for some resources, a resource domain (cgroup) is not directly
++  comparable to the terminal consumer (process/task) of said resource,
++  and so putting the two into a sibling relationship isn't meaningful.
++
++  - Differing Control Parameters and Capabilities
++
++    A cgroup controller has different resource control parameters and
++    capabilities from a terminal consumer, be that a task or process.
++    There are a couple cases where a cgroup control knob can be mapped
++    to a per-task or per-process API but they are exceptions and the
++    mappings aren't obvious even in those cases.
++
++    For example, task priorities (also known as nice values) set
++    through setpriority(2) are mapped to the CPU controller
++    "cpu.shares" values.  However, how exactly the two ranges map and
++    even the fact that they map to each other at all are not obvious.
++
++    The situation gets further muddled when considering other resource
++    types and control knobs.  IO priorities set through ioprio_set(2)
++    cannot be mapped to IO controller weights and most cgroup resource
++    control knobs including the bandwidth control knobs of the CPU
++    controller don't have counterparts in the terminal consumers.
++
++  - Anonymous Resource Consumption
++
++    For CPU, every time slice consumed from inside a cgroup, which
++    comprises most but not all of consumed CPU time for the cgroup,
++    can be clearly attributed to a specific task or process.  Because
++    these two types of entities are directly comparable as consumers
++    of CPU time, it's theoretically possible to mix tasks and cgroups
++    on the same tree levels and let them directly compete for the time
++    quota available to their common ancestor.
++
++    However, the same can't be said for resource types like memory or
++    IO: the memory consumed by the page cache, for example, can be
++    tracked on a per-cgroup level, but due to mismatches in lifetimes
++    of involved objects (page cache can persist long after processes
++    are gone), shared usages and the implementation overhead of
++    tracking persistent state, it can no longer be attributed to
++    individual processes after instantiation.  Consequently, any IO
++    incurred by page cache writeback can be attributed to a cgroup,
++    but not to the individual consumers inside the cgroup.
++
++  For memory and IO, this makes a resource domain (cgroup) an object
++  of a fundamentally different type than a terminal consumer
++  (process).  A process can't be a first class object in the resource
++  distribution graph as its total resource consumption can't be
++  described without the containing resource domain.
++
++  Disallowing processes in internal cgroups avoids competition between
++  cgroups and processes which cannot be meaningfully defined for these
++  resources.  All resource control takes place among cgroups and a
++  terminal consumer interacts with the containing cgroup the same way
++  it would with the system without cgroup.
++
++  Root cgroup is exempt from this constraint, which is in line with
++  how root cgroup is handled in general - it's excluded from cgroup
++  resource accounting and control.
++
++
++Enforcing process granularity and no internal process constraint
++allows all controllers to be on the same footing in terms of resource
++distribution hierarchy.
++
++
++2-2. Impact on CPU Controller
++
++As indicated earlier, the CPU controller's resource distribution graph
++is the simplest.  Every schedulable resource consumption can be
++attributed to a specific task.  In addition, for weight based control,
++the per-task priority set through setpriority(2) can be translated to
++and from a per-cgroup weight.  As such, the CPU controller can treat a
++task and a cgroup symmetrically, allowing support for any tree layout
++of cgroups and tasks.  Both process granularity and the no internal
++process constraint restrict how the CPU controller can be used.
++
++
++  2-2-1. Impact of Process Granularity
++
++  Process granularity prevents tasks belonging to the same process to
++  be assigned to different cgroups.  It was pointed out [6] that this
++  excludes the valid use case of hierarchical CPU distribution within
++  processes.
++
++  To address this issue, the rgroup (resource group) [7][8][9]
++  interface, an extension of the existing setpriority(2) API, was
++  proposed, which is in line with other programmable priority
++  mechanisms and eliminates the risk of in-application configuration
++  and system configuration stepping on each other's toes.
++  Unfortunately, the proposal quickly turned into discussions around
++  cgroup v2 design decisions [4] and no consensus could be reached.
++
++
++  2-2-2. Impact of No Internal Process Constraint
++
++  The no internal process constraint disallows tasks from competing
++  directly against cgroups.  Here is an excerpt from Peter Zijlstra
++  pointing out the issue [10] - R, L and A are cgroups; t1, t2, t3 and
++  t4 are tasks:
++
++
++          R
++        / | \
++       t1 t2 A
++           /   \
++          t3   t4
++
++
++    Is fundamentally different from:
++
++
++               R
++             /   \
++           L       A
++         /   \   /   \
++        t1  t2  t3   t4
++
++
++    Because if in the first hierarchy you add a task (t5) to R, all of
++    its A will run at 1/4th of total bandwidth where before it had
++    1/3rd, whereas with the second example, if you add our t5 to L, A
++    doesn't get any less bandwidth.
++
++
++  It is true that the trees are semantically different from each other
++  and the symmetric handling of tasks and cgroups is aesthetically
++  pleasing.  However, it isn't clear what the practical usefulness of
++  a layout with direct competition between tasks and cgroups would be,
++  considering that number and behavior of tasks are controlled by each
++  application, and cgroups primarily deal with system level resource
++  distribution; changes in the number of active threads would directly
++  impact resource distribution.  Real world use cases of such layouts
++  could not be established during the discussions.
++
++
++2-3. Arguments for cgroup v2
++
++There are strong demands for comprehensive hierarchical resource
++control across all major resources, and establishing a common resource
++hierarchy is an essential step.  As with most engineering decisions,
++common resource hierarchy definition comes with its trade-offs.  With
++cgroup v2, the trade-offs are in the form of structural constraints
++which, among others, restrict the CPU controller's space of possible
++configurations.
++
++However, even with the restrictions, cgroup v2, in combination with
++rgroup, covers most of identified real world use cases while enabling
++new important use cases of resource control across multiple resource
++types that were fundamentally broken previously.
++
++Furthermore, for resource control, treating resource domains as
++objects of a different type from terminal consumers has important
++advantages - it can account for resource consumptions which are not
++tied to any specific terminal consumer, be that a task or process, and
++allows decoupling resource distribution controls from in-application
++APIs.  Even the CPU controller may benefit from it as the kernel can
++consume significant amount of CPU cycles in interrupt context or tasks
++shared across multiple resource domains (e.g. softirq).
++
++Finally, it's important to note that enabling cgroup v2 support for
++the CPU controller doesn't block use cases which require the features
++which are not available on cgroup v2.  Unlikely, but should anybody
++actually rely on the CPU controller's symmetric handling of tasks and
++cgroups, backward compatibility is and will be maintained by being
++able to disconnect the controller from the cgroup v2 hierarchy and use
++it standalone.  This also holds for cpuset which is often used in
++highly customized configurations which might be a poor fit for common
++resource domains.
++
++The required changes are minimal, the benefits for the target use
++cases are critical and obvious, and use cases which have to use v1 can
++continue to do so.
++
++
++3. Way Forward
++
++cgroup v2 primarily aims to solve the problem of comprehensive
++hierarchical resource control across all major computing resources,
++which is one of the core problems of modern server infrastructure
++engineering.  The trade-offs that cgroup v2 took are results of
++pursuing that goal and gaining a better understanding of the nature of
++resource control in the process.
++
++I believe that real world usages will prove cgroup v2's model right,
++considering the crucial pieces of comprehensive resource control that
++cannot be implemented without common resource domains.  This is not to
++say that cgroup v2 is fixed in stone and can't be updated; if there is
++an approach which better serves both comprehensive resource control
++and the CPU controller's flexibility, we will surely move towards
++that.  It goes without saying that discussions around such approach
++should consider practical aspects of resource control as a whole
++rather than absolutely focusing on a particular controller.
++
++Until such consensus can be reached, the CPU controller cgroup v2
++support will be maintained out of the mainline kernel in an easily
++accessible form.  If there is anything cgroup developers can do to
++ease the pain, please feel free to contact us on the cgroup mailing
++list at cgroups@vger.kernel.org.
++
++
++4. References
++
++[1]  http://lkml.kernel.org/r/20160105164834.GE5995@mtj.duckdns.org
++     [PATCH 1/2] sched: Misc preps for cgroup unified hierarchy interface
++     Tejun Heo <tj@kernel.org>
++
++[2]  http://lkml.kernel.org/r/20160105164852.GF5995@mtj.duckdns.org
++     [PATCH 2/2] sched: Implement interface for cgroup unified hierarchy
++     Tejun Heo <tj@kernel.org>
++
++[3]  http://lkml.kernel.org/r/1438641689-14655-4-git-send-email-tj@kernel.org
++     [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy
++     Tejun Heo <tj@kernel.org>
++
++[4]  http://lkml.kernel.org/r/20160407064549.GH3430@twins.programming.kicks-ass.net
++     Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP
++     Peter Zijlstra <peterz@infradead.org>
++
++[5]  https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/cgroup-v2.txt
++     Control Group v2
++     Tejun Heo <tj@kernel.org>
++
++[6]  http://lkml.kernel.org/r/CAPM31RJNy3jgG=DYe6GO=wyL4BPPxwUm1f2S6YXacQmo7viFZA@mail.gmail.com
++     Re: [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy
++     Paul Turner <pjt@google.com>
++
++[7]  http://lkml.kernel.org/r/20160105154503.GC5995@mtj.duckdns.org
++     [RFD] cgroup: thread granularity support for cpu controller
++     Tejun Heo <tj@kernel.org>
++
++[8]  http://lkml.kernel.org/r/1457710888-31182-1-git-send-email-tj@kernel.org
++     [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP
++     Tejun Heo <tj@kernel.org>
++
++[9]  http://lkml.kernel.org/r/20160311160522.GA24046@htj.duckdns.org
++     Example program for PRIO_RGRP
++     Tejun Heo <tj@kernel.org>
++
++[10] http://lkml.kernel.org/r/20160407082810.GN3430@twins.programming.kicks-ass.net
++     Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource
++     Peter Zijlstra <peterz@infradead.org>
diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.11.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.11.patch
new file mode 100644
index 000000000000..38cc0532ba97
--- /dev/null
+++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.11.patch
@@ -0,0 +1,784 @@
+commit 827b86ad1dd21feed4c0b99faf6059f245f7dadb
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Misc preps for cgroup unified hierarchy interface
+    
+    Make the following changes in preparation for the cpu controller
+    interface implementation for the unified hierarchy.  This patch
+    doesn't cause any functional differences.
+    
+    * s/cpu_stats_show()/cpu_cfs_stats_show()/
+    
+    * s/cpu_files/cpu_legacy_files/
+    
+    * Separate out cpuacct_stats_read() from cpuacct_stats_show().  While
+      at it, make the @val array u64 for consistency.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 3b31fc05a0f1..a1b95e83fa87 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7174,7 +7174,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
+ 	return ret;
+ }
+ 
+-static int cpu_stats_show(struct seq_file *sf, void *v)
++static int cpu_cfs_stats_show(struct seq_file *sf, void *v)
+ {
+ 	struct task_group *tg = css_tg(seq_css(sf));
+ 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+@@ -7214,7 +7214,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ }
+ #endif /* CONFIG_RT_GROUP_SCHED */
+ 
+-static struct cftype cpu_files[] = {
++static struct cftype cpu_legacy_files[] = {
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	{
+ 		.name = "shares",
+@@ -7235,7 +7235,7 @@ static struct cftype cpu_files[] = {
+ 	},
+ 	{
+ 		.name = "stat",
+-		.seq_show = cpu_stats_show,
++		.seq_show = cpu_cfs_stats_show,
+ 	},
+ #endif
+ #ifdef CONFIG_RT_GROUP_SCHED
+@@ -7261,7 +7261,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.fork		= cpu_cgroup_fork,
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+-	.legacy_cftypes	= cpu_files,
++	.legacy_cftypes	= cpu_legacy_files,
+ 	.early_init	= true,
+ };
+ 
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index f95ab29a45d0..6151c23f722f 100644
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -276,26 +276,33 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V)
+ 	return 0;
+ }
+ 
+-static int cpuacct_stats_show(struct seq_file *sf, void *v)
++static void cpuacct_stats_read(struct cpuacct *ca,
++			       u64 (*val)[CPUACCT_STAT_NSTATS])
+ {
+-	struct cpuacct *ca = css_ca(seq_css(sf));
+-	s64 val[CPUACCT_STAT_NSTATS];
+ 	int cpu;
+-	int stat;
+ 
+-	memset(val, 0, sizeof(val));
++	memset(val, 0, sizeof(*val));
++
+ 	for_each_possible_cpu(cpu) {
+ 		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
+ 
+-		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
+-		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
+-		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
+-		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
+-		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
++		(*val)[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
++		(*val)[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
++		(*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
++		(*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
++		(*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
+ 	}
++}
++
++static int cpuacct_stats_show(struct seq_file *sf, void *v)
++{
++	u64 val[CPUACCT_STAT_NSTATS];
++	int stat;
++
++	cpuacct_stats_read(css_ca(seq_css(sf)), &val);
+ 
+ 	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
+-		seq_printf(sf, "%s %lld\n",
++		seq_printf(sf, "%s %llu\n",
+ 			   cpuacct_stat_desc[stat],
+ 			   (long long)nsec_to_clock_t(val[stat]));
+ 	}
+
+commit fdb64d002b3a223ce4bb11aa4448a42050470052
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Implement interface for cgroup unified hierarchy
+    
+    While the cpu controller doesn't have any functional problems, there
+    are a couple interface issues which can be addressed in the v2
+    interface.
+    
+    * cpuacct being a separate controller.  This separation is artificial
+      and rather pointless as demonstrated by most use cases co-mounting
+      the two controllers.  It also forces certain information to be
+      accounted twice.
+    
+    * Use of different time units.  Writable control knobs use
+      microseconds, some stat fields use nanoseconds while other cpuacct
+      stat fields use centiseconds.
+    
+    * Control knobs which can't be used in the root cgroup still show up
+      in the root.
+    
+    * Control knob names and semantics aren't consistent with other
+      controllers.
+    
+    This patchset implements cpu controller's interface on the unified
+    hierarchy which adheres to the controller file conventions described
+    in Documentation/cgroups/unified-hierarchy.txt.  Overall, the
+    following changes are made.
+    
+    * cpuacct is implictly enabled and disabled by cpu and its information
+      is reported through "cpu.stat" which now uses microseconds for all
+      time durations.  All time duration fields now have "_usec" appended
+      to them for clarity.  While this doesn't solve the double accounting
+      immediately, once majority of users switch to v2, cpu can directly
+      account and report the relevant stats and cpuacct can be disabled on
+      the unified hierarchy.
+    
+      Note that cpuacct.usage_percpu is currently not included in
+      "cpu.stat".  If this information is actually called for, it can be
+      added later.
+    
+    * "cpu.shares" is replaced with "cpu.weight" and operates on the
+      standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000).
+      The weight is scaled to scheduler weight so that 100 maps to 1024
+      and the ratio relationship is preserved - if weight is W and its
+      scaled value is S, W / 100 == S / 1024.  While the mapped range is a
+      bit smaller than the orignal scheduler weight range, the dead zones
+      on both sides are relatively small and covers wider range than the
+      nice value mappings.  This file doesn't make sense in the root
+      cgroup and isn't create on root.
+    
+    * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max"
+      which contains both quota and period.
+    
+    * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by
+      "cpu.rt.max" which contains both runtime and period.
+    
+    v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for
+        CFS bandwidth stats and also using raw division for u64.  Use
+        CONFIG_CFS_BANDWITH and do_div() instead.
+    
+        The semantics of "cpu.rt.max" is not fully decided yet.  Dropped
+        for now.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index a1b95e83fa87..f01d56e58a1b 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7253,6 +7253,139 @@ static struct cftype cpu_legacy_files[] = {
+ 	{ }	/* Terminate */
+ };
+ 
++static int cpu_stats_show(struct seq_file *sf, void *v)
++{
++	cpuacct_cpu_stats_show(sf);
++
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		struct task_group *tg = css_tg(seq_css(sf));
++		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
++		u64 throttled_usec;
++
++		throttled_usec = cfs_b->throttled_time;
++		do_div(throttled_usec, NSEC_PER_USEC);
++
++		seq_printf(sf, "nr_periods %d\n"
++			   "nr_throttled %d\n"
++			   "throttled_usec %llu\n",
++			   cfs_b->nr_periods, cfs_b->nr_throttled,
++			   throttled_usec);
++	}
++#endif
++	return 0;
++}
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
++			       struct cftype *cft)
++{
++	struct task_group *tg = css_tg(css);
++	u64 weight = scale_load_down(tg->shares);
++
++	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
++}
++
++static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
++				struct cftype *cftype, u64 weight)
++{
++	/*
++	 * cgroup weight knobs should use the common MIN, DFL and MAX
++	 * values which are 1, 100 and 10000 respectively.  While it loses
++	 * a bit of range on both ends, it maps pretty well onto the shares
++	 * value used by scheduler and the round-trip conversions preserve
++	 * the original value over the entire range.
++	 */
++	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
++		return -ERANGE;
++
++	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
++
++	return sched_group_set_shares(css_tg(css), scale_load(weight));
++}
++#endif
++
++static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
++						  long period, long quota)
++{
++	if (quota < 0)
++		seq_puts(sf, "max");
++	else
++		seq_printf(sf, "%ld", quota);
++
++	seq_printf(sf, " %ld\n", period);
++}
++
++/* caller should put the current value in *@periodp before calling */
++static int __maybe_unused cpu_period_quota_parse(char *buf,
++						 u64 *periodp, u64 *quotap)
++{
++	char tok[21];	/* U64_MAX */
++
++	if (!sscanf(buf, "%s %llu", tok, periodp))
++		return -EINVAL;
++
++	*periodp *= NSEC_PER_USEC;
++
++	if (sscanf(tok, "%llu", quotap))
++		*quotap *= NSEC_PER_USEC;
++	else if (!strcmp(tok, "max"))
++		*quotap = RUNTIME_INF;
++	else
++		return -EINVAL;
++
++	return 0;
++}
++
++#ifdef CONFIG_CFS_BANDWIDTH
++static int cpu_max_show(struct seq_file *sf, void *v)
++{
++	struct task_group *tg = css_tg(seq_css(sf));
++
++	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
++	return 0;
++}
++
++static ssize_t cpu_max_write(struct kernfs_open_file *of,
++			     char *buf, size_t nbytes, loff_t off)
++{
++	struct task_group *tg = css_tg(of_css(of));
++	u64 period = tg_get_cfs_period(tg);
++	u64 quota;
++	int ret;
++
++	ret = cpu_period_quota_parse(buf, &period, &quota);
++	if (!ret)
++		ret = tg_set_cfs_bandwidth(tg, period, quota);
++	return ret ?: nbytes;
++}
++#endif
++
++static struct cftype cpu_files[] = {
++	{
++		.name = "stat",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = cpu_stats_show,
++	},
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	{
++		.name = "weight",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.read_u64 = cpu_weight_read_u64,
++		.write_u64 = cpu_weight_write_u64,
++	},
++#endif
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		.name = "max",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = cpu_max_show,
++		.write = cpu_max_write,
++	},
++#endif
++	{ }	/* terminate */
++};
++
+ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.css_alloc	= cpu_cgroup_css_alloc,
+ 	.css_online	= cpu_cgroup_css_online,
+@@ -7262,7 +7395,15 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+ 	.legacy_cftypes	= cpu_legacy_files,
++	.dfl_cftypes	= cpu_files,
+ 	.early_init	= true,
++#ifdef CONFIG_CGROUP_CPUACCT
++	/*
++	 * cpuacct is enabled together with cpu on the unified hierarchy
++	 * and its stats are reported through "cpu.stat".
++	 */
++	.depends_on	= 1 << cpuacct_cgrp_id,
++#endif
+ };
+ 
+ #endif	/* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 6151c23f722f..fc1cf13c3af1 100644
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -347,6 +347,31 @@ static struct cftype files[] = {
+ 	{ }	/* terminate */
+ };
+ 
++/* used to print cpuacct stats in cpu.stat on the unified hierarchy */
++void cpuacct_cpu_stats_show(struct seq_file *sf)
++{
++	struct cgroup_subsys_state *css;
++	u64 usage, val[CPUACCT_STAT_NSTATS];
++
++	css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys);
++
++	usage = cpuusage_read(css, seq_cft(sf));
++	cpuacct_stats_read(css_ca(css), &val);
++
++	val[CPUACCT_STAT_USER] *= TICK_NSEC;
++	val[CPUACCT_STAT_SYSTEM] *= TICK_NSEC;
++	do_div(usage, NSEC_PER_USEC);
++	do_div(val[CPUACCT_STAT_USER], NSEC_PER_USEC);
++	do_div(val[CPUACCT_STAT_SYSTEM], NSEC_PER_USEC);
++
++	seq_printf(sf, "usage_usec %llu\n"
++		   "user_usec %llu\n"
++		   "system_usec %llu\n",
++		   usage, val[CPUACCT_STAT_USER], val[CPUACCT_STAT_SYSTEM]);
++
++	css_put(css);
++}
++
+ /*
+  * charge this task's execution time to its accounting group.
+  *
+diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
+index ba72807c73d4..ddf7af466d35 100644
+--- a/kernel/sched/cpuacct.h
++++ b/kernel/sched/cpuacct.h
+@@ -2,6 +2,7 @@
+ 
+ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+ extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
++extern void cpuacct_cpu_stats_show(struct seq_file *sf);
+ 
+ #else
+ 
+@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
+ {
+ }
+ 
++static inline void cpuacct_cpu_stats_show(struct seq_file *sf)
++{
++}
++
+ #endif
+
+commit 8dde150866b8c433216105c50b7e889d5242d583
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Aug 5 12:41:01 2016 -0400
+
+    cgroup: add documentation regarding CPU controller cgroup v2 support
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+
+diff --git a/Documentation/cgroup-v2-cpu.txt b/Documentation/cgroup-v2-cpu.txt
+new file mode 100644
+index 000000000000..1ed7032d4472
+--- /dev/null
++++ b/Documentation/cgroup-v2-cpu.txt
+@@ -0,0 +1,368 @@
++
++
++CPU Controller on Control Group v2
++
++August, 2016		Tejun Heo <tj@kernel.org>
++
++
++While most controllers have support for cgroup v2 now, the CPU
++controller support is not upstream yet due to objections from the
++scheduler maintainers on the basic designs of cgroup v2.  This
++document explains the current situation as well as an interim
++solution, and details the disagreements and arguments.  The latest
++version of this document can be found at the following URL.
++
++ https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu
++
++This document was posted to the linux-kernel and cgroup mailing lists.
++Unfortunately, no consensus was reached as of Oct, 2016.  The thread
++can be found at the following URL.
++
++ http://lkml.kernel.org/r/20160805170752.GK2542@mtj.duckdns.org
++
++
++CONTENTS
++
++1. Current Situation and Interim Solution
++2. Disagreements and Arguments
++  2-1. Contentious Restrictions
++    2-1-1. Process Granularity
++    2-1-2. No Internal Process Constraint
++  2-2. Impact on CPU Controller
++    2-2-1. Impact of Process Granularity
++    2-2-2. Impact of No Internal Process Constraint
++  2-3. Arguments for cgroup v2
++3. Way Forward
++4. References
++
++
++1. Current Situation and Interim Solution
++
++All objections from the scheduler maintainers apply to cgroup v2 core
++design, and there are no known objections to the specifics of the CPU
++controller cgroup v2 interface.  The only blocked part is changes to
++expose the CPU controller interface on cgroup v2, which comprises the
++following two patches:
++
++ [1] sched: Misc preps for cgroup unified hierarchy interface
++ [2] sched: Implement interface for cgroup unified hierarchy
++
++The necessary changes are superficial and implement the interface
++files on cgroup v2.  The combined diffstat is as follows.
++
++ kernel/sched/core.c    |  149 +++++++++++++++++++++++++++++++++++++++++++++++--
++ kernel/sched/cpuacct.c |   57 ++++++++++++------
++ kernel/sched/cpuacct.h |    5 +
++ 3 files changed, 189 insertions(+), 22 deletions(-)
++
++The patches are easy to apply and forward-port.  The following git
++branch will always carry the two patches on top of the latest release
++of the upstream kernel.
++
++ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu
++
++There also are versioned branches going back to v4.4.
++
++ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu-$KERNEL_VER
++
++While it's difficult to tell whether the CPU controller support will
++be merged, there are crucial resource control features in cgroup v2
++that are only possible due to the design choices that are being
++objected to, and every effort will be made to ease enabling the CPU
++controller cgroup v2 support out-of-tree for parties which choose to.
++
++
++2. Disagreements and Arguments
++
++There have been several lengthy discussion threads [3][4] on LKML
++around the structural constraints of cgroup v2.  The two that affect
++the CPU controller are process granularity and no internal process
++constraint.  Both arise primarily from the need for common resource
++domain definition across different resources.
++
++The common resource domain is a powerful concept in cgroup v2 that
++allows controllers to make basic assumptions about the structural
++organization of processes and controllers inside the cgroup hierarchy,
++and thus solve problems spanning multiple types of resources.  The
++prime example for this is page cache writeback: dirty page cache is
++regulated through throttling buffered writers based on memory
++availability, and initiating batched write outs to the disk based on
++IO capacity.  Tracking and controlling writeback inside a cgroup thus
++requires the direct cooperation of the memory and the IO controller.
++
++This easily extends to other areas, such as CPU cycles consumed while
++performing memory reclaim or IO encryption.
++
++
++2-1. Contentious Restrictions
++
++For controllers of different resources to work together, they must
++agree on a common organization.  This uniform model across controllers
++imposes two contentious restrictions on the CPU controller: process
++granularity and the no-internal-process constraint.
++
++
++  2-1-1. Process Granularity
++
++  For memory, because an address space is shared between all threads
++  of a process, the terminal consumer is a process, not a thread.
++  Separating the threads of a single process into different memory
++  control domains doesn't make semantical sense.  cgroup v2 ensures
++  that all controller can agree on the same organization by requiring
++  that threads of the same process belong to the same cgroup.
++
++  There are other reasons to enforce process granularity.  One
++  important one is isolating system-level management operations from
++  in-process application operations.  The cgroup interface, being a
++  virtual filesystem, is very unfit for multiple independent
++  operations taking place at the same time as most operations have to
++  be multi-step and there is no way to synchronize multiple accessors.
++  See also [5] Documentation/cgroup-v2.txt, "R-2. Thread Granularity"
++
++
++  2-1-2. No Internal Process Constraint
++
++  cgroup v2 does not allow processes to belong to any cgroup which has
++  child cgroups when resource controllers are enabled on it (the
++  notable exception being the root cgroup itself).  This is because,
++  for some resources, a resource domain (cgroup) is not directly
++  comparable to the terminal consumer (process/task) of said resource,
++  and so putting the two into a sibling relationship isn't meaningful.
++
++  - Differing Control Parameters and Capabilities
++
++    A cgroup controller has different resource control parameters and
++    capabilities from a terminal consumer, be that a task or process.
++    There are a couple cases where a cgroup control knob can be mapped
++    to a per-task or per-process API but they are exceptions and the
++    mappings aren't obvious even in those cases.
++
++    For example, task priorities (also known as nice values) set
++    through setpriority(2) are mapped to the CPU controller
++    "cpu.shares" values.  However, how exactly the two ranges map and
++    even the fact that they map to each other at all are not obvious.
++
++    The situation gets further muddled when considering other resource
++    types and control knobs.  IO priorities set through ioprio_set(2)
++    cannot be mapped to IO controller weights and most cgroup resource
++    control knobs including the bandwidth control knobs of the CPU
++    controller don't have counterparts in the terminal consumers.
++
++  - Anonymous Resource Consumption
++
++    For CPU, every time slice consumed from inside a cgroup, which
++    comprises most but not all of consumed CPU time for the cgroup,
++    can be clearly attributed to a specific task or process.  Because
++    these two types of entities are directly comparable as consumers
++    of CPU time, it's theoretically possible to mix tasks and cgroups
++    on the same tree levels and let them directly compete for the time
++    quota available to their common ancestor.
++
++    However, the same can't be said for resource types like memory or
++    IO: the memory consumed by the page cache, for example, can be
++    tracked on a per-cgroup level, but due to mismatches in lifetimes
++    of involved objects (page cache can persist long after processes
++    are gone), shared usages and the implementation overhead of
++    tracking persistent state, it can no longer be attributed to
++    individual processes after instantiation.  Consequently, any IO
++    incurred by page cache writeback can be attributed to a cgroup,
++    but not to the individual consumers inside the cgroup.
++
++  For memory and IO, this makes a resource domain (cgroup) an object
++  of a fundamentally different type than a terminal consumer
++  (process).  A process can't be a first class object in the resource
++  distribution graph as its total resource consumption can't be
++  described without the containing resource domain.
++
++  Disallowing processes in internal cgroups avoids competition between
++  cgroups and processes which cannot be meaningfully defined for these
++  resources.  All resource control takes place among cgroups and a
++  terminal consumer interacts with the containing cgroup the same way
++  it would with the system without cgroup.
++
++  Root cgroup is exempt from this constraint, which is in line with
++  how root cgroup is handled in general - it's excluded from cgroup
++  resource accounting and control.
++
++
++Enforcing process granularity and no internal process constraint
++allows all controllers to be on the same footing in terms of resource
++distribution hierarchy.
++
++
++2-2. Impact on CPU Controller
++
++As indicated earlier, the CPU controller's resource distribution graph
++is the simplest.  Every schedulable resource consumption can be
++attributed to a specific task.  In addition, for weight based control,
++the per-task priority set through setpriority(2) can be translated to
++and from a per-cgroup weight.  As such, the CPU controller can treat a
++task and a cgroup symmetrically, allowing support for any tree layout
++of cgroups and tasks.  Both process granularity and the no internal
++process constraint restrict how the CPU controller can be used.
++
++
++  2-2-1. Impact of Process Granularity
++
++  Process granularity prevents tasks belonging to the same process to
++  be assigned to different cgroups.  It was pointed out [6] that this
++  excludes the valid use case of hierarchical CPU distribution within
++  processes.
++
++  To address this issue, the rgroup (resource group) [7][8][9]
++  interface, an extension of the existing setpriority(2) API, was
++  proposed, which is in line with other programmable priority
++  mechanisms and eliminates the risk of in-application configuration
++  and system configuration stepping on each other's toes.
++  Unfortunately, the proposal quickly turned into discussions around
++  cgroup v2 design decisions [4] and no consensus could be reached.
++
++
++  2-2-2. Impact of No Internal Process Constraint
++
++  The no internal process constraint disallows tasks from competing
++  directly against cgroups.  Here is an excerpt from Peter Zijlstra
++  pointing out the issue [10] - R, L and A are cgroups; t1, t2, t3 and
++  t4 are tasks:
++
++
++          R
++        / | \
++       t1 t2 A
++           /   \
++          t3   t4
++
++
++    Is fundamentally different from:
++
++
++               R
++             /   \
++           L       A
++         /   \   /   \
++        t1  t2  t3   t4
++
++
++    Because if in the first hierarchy you add a task (t5) to R, all of
++    its A will run at 1/4th of total bandwidth where before it had
++    1/3rd, whereas with the second example, if you add our t5 to L, A
++    doesn't get any less bandwidth.
++
++
++  It is true that the trees are semantically different from each other
++  and the symmetric handling of tasks and cgroups is aesthetically
++  pleasing.  However, it isn't clear what the practical usefulness of
++  a layout with direct competition between tasks and cgroups would be,
++  considering that number and behavior of tasks are controlled by each
++  application, and cgroups primarily deal with system level resource
++  distribution; changes in the number of active threads would directly
++  impact resource distribution.  Real world use cases of such layouts
++  could not be established during the discussions.
++
++
++2-3. Arguments for cgroup v2
++
++There are strong demands for comprehensive hierarchical resource
++control across all major resources, and establishing a common resource
++hierarchy is an essential step.  As with most engineering decisions,
++common resource hierarchy definition comes with its trade-offs.  With
++cgroup v2, the trade-offs are in the form of structural constraints
++which, among others, restrict the CPU controller's space of possible
++configurations.
++
++However, even with the restrictions, cgroup v2, in combination with
++rgroup, covers most of identified real world use cases while enabling
++new important use cases of resource control across multiple resource
++types that were fundamentally broken previously.
++
++Furthermore, for resource control, treating resource domains as
++objects of a different type from terminal consumers has important
++advantages - it can account for resource consumptions which are not
++tied to any specific terminal consumer, be that a task or process, and
++allows decoupling resource distribution controls from in-application
++APIs.  Even the CPU controller may benefit from it as the kernel can
++consume significant amount of CPU cycles in interrupt context or tasks
++shared across multiple resource domains (e.g. softirq).
++
++Finally, it's important to note that enabling cgroup v2 support for
++the CPU controller doesn't block use cases which require the features
++which are not available on cgroup v2.  Unlikely, but should anybody
++actually rely on the CPU controller's symmetric handling of tasks and
++cgroups, backward compatibility is and will be maintained by being
++able to disconnect the controller from the cgroup v2 hierarchy and use
++it standalone.  This also holds for cpuset which is often used in
++highly customized configurations which might be a poor fit for common
++resource domains.
++
++The required changes are minimal, the benefits for the target use
++cases are critical and obvious, and use cases which have to use v1 can
++continue to do so.
++
++
++3. Way Forward
++
++cgroup v2 primarily aims to solve the problem of comprehensive
++hierarchical resource control across all major computing resources,
++which is one of the core problems of modern server infrastructure
++engineering.  The trade-offs that cgroup v2 took are results of
++pursuing that goal and gaining a better understanding of the nature of
++resource control in the process.
++
++I believe that real world usages will prove cgroup v2's model right,
++considering the crucial pieces of comprehensive resource control that
++cannot be implemented without common resource domains.  This is not to
++say that cgroup v2 is fixed in stone and can't be updated; if there is
++an approach which better serves both comprehensive resource control
++and the CPU controller's flexibility, we will surely move towards
++that.  It goes without saying that discussions around such approach
++should consider practical aspects of resource control as a whole
++rather than absolutely focusing on a particular controller.
++
++Until such consensus can be reached, the CPU controller cgroup v2
++support will be maintained out of the mainline kernel in an easily
++accessible form.  If there is anything cgroup developers can do to
++ease the pain, please feel free to contact us on the cgroup mailing
++list at cgroups@vger.kernel.org.
++
++
++4. References
++
++[1]  http://lkml.kernel.org/r/20160105164834.GE5995@mtj.duckdns.org
++     [PATCH 1/2] sched: Misc preps for cgroup unified hierarchy interface
++     Tejun Heo <tj@kernel.org>
++
++[2]  http://lkml.kernel.org/r/20160105164852.GF5995@mtj.duckdns.org
++     [PATCH 2/2] sched: Implement interface for cgroup unified hierarchy
++     Tejun Heo <tj@kernel.org>
++
++[3]  http://lkml.kernel.org/r/1438641689-14655-4-git-send-email-tj@kernel.org
++     [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy
++     Tejun Heo <tj@kernel.org>
++
++[4]  http://lkml.kernel.org/r/20160407064549.GH3430@twins.programming.kicks-ass.net
++     Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP
++     Peter Zijlstra <peterz@infradead.org>
++
++[5]  https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/cgroup-v2.txt
++     Control Group v2
++     Tejun Heo <tj@kernel.org>
++
++[6]  http://lkml.kernel.org/r/CAPM31RJNy3jgG=DYe6GO=wyL4BPPxwUm1f2S6YXacQmo7viFZA@mail.gmail.com
++     Re: [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy
++     Paul Turner <pjt@google.com>
++
++[7]  http://lkml.kernel.org/r/20160105154503.GC5995@mtj.duckdns.org
++     [RFD] cgroup: thread granularity support for cpu controller
++     Tejun Heo <tj@kernel.org>
++
++[8]  http://lkml.kernel.org/r/1457710888-31182-1-git-send-email-tj@kernel.org
++     [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP
++     Tejun Heo <tj@kernel.org>
++
++[9]  http://lkml.kernel.org/r/20160311160522.GA24046@htj.duckdns.org
++     Example program for PRIO_RGRP
++     Tejun Heo <tj@kernel.org>
++
++[10] http://lkml.kernel.org/r/20160407082810.GN3430@twins.programming.kicks-ass.net
++     Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource
++     Peter Zijlstra <peterz@infradead.org>
diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.6.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.6.patch
deleted file mode 100644
index f64908317cac..000000000000
--- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.6.patch
+++ /dev/null
@@ -1,407 +0,0 @@
-commit 6426c5b02d4aab620219b08a5d97ad8851b56b0d
-Author: Tejun Heo <tj@kernel.org>
-Date:   Fri Mar 11 07:31:23 2016 -0500
-
-    sched: Misc preps for cgroup unified hierarchy interface
-    
-    Make the following changes in preparation for the cpu controller
-    interface implementation for the unified hierarchy.  This patch
-    doesn't cause any functional differences.
-    
-    * s/cpu_stats_show()/cpu_cfs_stats_show()/
-    
-    * s/cpu_files/cpu_legacy_files/
-    
-    * Separate out cpuacct_stats_read() from cpuacct_stats_show().  While
-      at it, remove pointless cpuacct_stat_desc[] array.
-    
-    Signed-off-by: Tejun Heo <tj@kernel.org>
-    Cc: Ingo Molnar <mingo@redhat.com>
-    Cc: Peter Zijlstra <peterz@infradead.org>
-    Cc: Li Zefan <lizefan@huawei.com>
-    Cc: Johannes Weiner <hannes@cmpxchg.org>
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index d1f7149..0d34f35 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -8371,7 +8371,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
- 	return ret;
- }
- 
--static int cpu_stats_show(struct seq_file *sf, void *v)
-+static int cpu_cfs_stats_show(struct seq_file *sf, void *v)
- {
- 	struct task_group *tg = css_tg(seq_css(sf));
- 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
-@@ -8411,7 +8411,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
- }
- #endif /* CONFIG_RT_GROUP_SCHED */
- 
--static struct cftype cpu_files[] = {
-+static struct cftype cpu_legacy_files[] = {
- #ifdef CONFIG_FAIR_GROUP_SCHED
- 	{
- 		.name = "shares",
-@@ -8432,7 +8432,7 @@ static struct cftype cpu_files[] = {
- 	},
- 	{
- 		.name = "stat",
--		.seq_show = cpu_stats_show,
-+		.seq_show = cpu_cfs_stats_show,
- 	},
- #endif
- #ifdef CONFIG_RT_GROUP_SCHED
-@@ -8457,7 +8457,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
- 	.fork		= cpu_cgroup_fork,
- 	.can_attach	= cpu_cgroup_can_attach,
- 	.attach		= cpu_cgroup_attach,
--	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
- 	.early_init	= true,
- };
- 
-diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
-index 4a81120..b99030a 100644
---- a/kernel/sched/cpuacct.c
-+++ b/kernel/sched/cpuacct.c
-@@ -180,36 +180,33 @@ static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
- 	return 0;
- }
- 
--static const char * const cpuacct_stat_desc[] = {
--	[CPUACCT_STAT_USER] = "user",
--	[CPUACCT_STAT_SYSTEM] = "system",
--};
--
--static int cpuacct_stats_show(struct seq_file *sf, void *v)
-+static void cpuacct_stats_read(struct cpuacct *ca, u64 *userp, u64 *sysp)
- {
--	struct cpuacct *ca = css_ca(seq_css(sf));
- 	int cpu;
--	s64 val = 0;
- 
-+	*userp = 0;
- 	for_each_online_cpu(cpu) {
- 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
--		val += kcpustat->cpustat[CPUTIME_USER];
--		val += kcpustat->cpustat[CPUTIME_NICE];
-+		*userp += kcpustat->cpustat[CPUTIME_USER];
-+		*userp += kcpustat->cpustat[CPUTIME_NICE];
- 	}
--	val = cputime64_to_clock_t(val);
--	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
- 
--	val = 0;
-+	*sysp = 0;
- 	for_each_online_cpu(cpu) {
- 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
--		val += kcpustat->cpustat[CPUTIME_SYSTEM];
--		val += kcpustat->cpustat[CPUTIME_IRQ];
--		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
-+		*sysp += kcpustat->cpustat[CPUTIME_SYSTEM];
-+		*sysp += kcpustat->cpustat[CPUTIME_IRQ];
-+		*sysp += kcpustat->cpustat[CPUTIME_SOFTIRQ];
- 	}
-+}
- 
--	val = cputime64_to_clock_t(val);
--	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
-+static int cpuacct_stats_show(struct seq_file *sf, void *v)
-+{
-+	cputime64_t user, sys;
- 
-+	cpuacct_stats_read(css_ca(seq_css(sf)), &user, &sys);
-+	seq_printf(sf, "user %lld\n", cputime64_to_clock_t(user));
-+	seq_printf(sf, "system %lld\n", cputime64_to_clock_t(sys));
- 	return 0;
- }
- 
-
-commit d2a799f795a5d5a69c9dc365c34f926e0649f840
-Author: Tejun Heo <tj@kernel.org>
-Date:   Fri Mar 11 07:31:23 2016 -0500
-
-    sched: Implement interface for cgroup unified hierarchy
-    
-    While the cpu controller doesn't have any functional problems, there
-    are a couple interface issues which can be addressed in the v2
-    interface.
-    
-    * cpuacct being a separate controller.  This separation is artificial
-      and rather pointless as demonstrated by most use cases co-mounting
-      the two controllers.  It also forces certain information to be
-      accounted twice.
-    
-    * Use of different time units.  Writable control knobs use
-      microseconds, some stat fields use nanoseconds while other cpuacct
-      stat fields use centiseconds.
-    
-    * Control knobs which can't be used in the root cgroup still show up
-      in the root.
-    
-    * Control knob names and semantics aren't consistent with other
-      controllers.
-    
-    This patchset implements cpu controller's interface on the unified
-    hierarchy which adheres to the controller file conventions described
-    in Documentation/cgroups/unified-hierarchy.txt.  Overall, the
-    following changes are made.
-    
-    * cpuacct is implictly enabled and disabled by cpu and its information
-      is reported through "cpu.stat" which now uses microseconds for all
-      time durations.  All time duration fields now have "_usec" appended
-      to them for clarity.  While this doesn't solve the double accounting
-      immediately, once majority of users switch to v2, cpu can directly
-      account and report the relevant stats and cpuacct can be disabled on
-      the unified hierarchy.
-    
-      Note that cpuacct.usage_percpu is currently not included in
-      "cpu.stat".  If this information is actually called for, it can be
-      added later.
-    
-    * "cpu.shares" is replaced with "cpu.weight" and operates on the
-      standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000).
-      The weight is scaled to scheduler weight so that 100 maps to 1024
-      and the ratio relationship is preserved - if weight is W and its
-      scaled value is S, W / 100 == S / 1024.  While the mapped range is a
-      bit smaller than the orignal scheduler weight range, the dead zones
-      on both sides are relatively small and covers wider range than the
-      nice value mappings.  This file doesn't make sense in the root
-      cgroup and isn't create on root.
-    
-    * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max"
-      which contains both quota and period.
-    
-    * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by
-      "cpu.rt.max" which contains both runtime and period.
-    
-    v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for
-        CFS bandwidth stats and also using raw division for u64.  Use
-        CONFIG_CFS_BANDWITH and do_div() instead.
-    
-        The semantics of "cpu.rt.max" is not fully decided yet.  Dropped
-        for now.
-    
-    Signed-off-by: Tejun Heo <tj@kernel.org>
-    Cc: Ingo Molnar <mingo@redhat.com>
-    Cc: Peter Zijlstra <peterz@infradead.org>
-    Cc: Li Zefan <lizefan@huawei.com>
-    Cc: Johannes Weiner <hannes@cmpxchg.org>
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 0d34f35..5990efc 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -8450,6 +8450,139 @@ static struct cftype cpu_legacy_files[] = {
- 	{ }	/* terminate */
- };
- 
-+static int cpu_stats_show(struct seq_file *sf, void *v)
-+{
-+	cpuacct_cpu_stats_show(sf);
-+
-+#ifdef CONFIG_CFS_BANDWIDTH
-+	{
-+		struct task_group *tg = css_tg(seq_css(sf));
-+		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
-+		u64 throttled_usec;
-+
-+		throttled_usec = cfs_b->throttled_time;
-+		do_div(throttled_usec, NSEC_PER_USEC);
-+
-+		seq_printf(sf, "nr_periods %d\n"
-+			   "nr_throttled %d\n"
-+			   "throttled_usec %llu\n",
-+			   cfs_b->nr_periods, cfs_b->nr_throttled,
-+			   throttled_usec);
-+	}
-+#endif
-+	return 0;
-+}
-+
-+#ifdef CONFIG_FAIR_GROUP_SCHED
-+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
-+			       struct cftype *cft)
-+{
-+	struct task_group *tg = css_tg(css);
-+	u64 weight = scale_load_down(tg->shares);
-+
-+	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
-+}
-+
-+static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
-+				struct cftype *cftype, u64 weight)
-+{
-+	/*
-+	 * cgroup weight knobs should use the common MIN, DFL and MAX
-+	 * values which are 1, 100 and 10000 respectively.  While it loses
-+	 * a bit of range on both ends, it maps pretty well onto the shares
-+	 * value used by scheduler and the round-trip conversions preserve
-+	 * the original value over the entire range.
-+	 */
-+	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
-+		return -ERANGE;
-+
-+	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
-+
-+	return sched_group_set_shares(css_tg(css), scale_load(weight));
-+}
-+#endif
-+
-+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
-+						  long period, long quota)
-+{
-+	if (quota < 0)
-+		seq_puts(sf, "max");
-+	else
-+		seq_printf(sf, "%ld", quota);
-+
-+	seq_printf(sf, " %ld\n", period);
-+}
-+
-+/* caller should put the current value in *@periodp before calling */
-+static int __maybe_unused cpu_period_quota_parse(char *buf,
-+						 u64 *periodp, u64 *quotap)
-+{
-+	char tok[21];	/* U64_MAX */
-+
-+	if (!sscanf(buf, "%s %llu", tok, periodp))
-+		return -EINVAL;
-+
-+	*periodp *= NSEC_PER_USEC;
-+
-+	if (sscanf(tok, "%llu", quotap))
-+		*quotap *= NSEC_PER_USEC;
-+	else if (!strcmp(tok, "max"))
-+		*quotap = RUNTIME_INF;
-+	else
-+		return -EINVAL;
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_CFS_BANDWIDTH
-+static int cpu_max_show(struct seq_file *sf, void *v)
-+{
-+	struct task_group *tg = css_tg(seq_css(sf));
-+
-+	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
-+	return 0;
-+}
-+
-+static ssize_t cpu_max_write(struct kernfs_open_file *of,
-+			     char *buf, size_t nbytes, loff_t off)
-+{
-+	struct task_group *tg = css_tg(of_css(of));
-+	u64 period = tg_get_cfs_period(tg);
-+	u64 quota;
-+	int ret;
-+
-+	ret = cpu_period_quota_parse(buf, &period, &quota);
-+	if (!ret)
-+		ret = tg_set_cfs_bandwidth(tg, period, quota);
-+	return ret ?: nbytes;
-+}
-+#endif
-+
-+static struct cftype cpu_files[] = {
-+	{
-+		.name = "stat",
-+		.flags = CFTYPE_NOT_ON_ROOT,
-+		.seq_show = cpu_stats_show,
-+	},
-+#ifdef CONFIG_FAIR_GROUP_SCHED
-+	{
-+		.name = "weight",
-+		.flags = CFTYPE_NOT_ON_ROOT,
-+		.read_u64 = cpu_weight_read_u64,
-+		.write_u64 = cpu_weight_write_u64,
-+	},
-+#endif
-+#ifdef CONFIG_CFS_BANDWIDTH
-+	{
-+		.name = "max",
-+		.flags = CFTYPE_NOT_ON_ROOT,
-+		.seq_show = cpu_max_show,
-+		.write = cpu_max_write,
-+	},
-+#endif
-+	{ }	/* terminate */
-+};
-+
- struct cgroup_subsys cpu_cgrp_subsys = {
- 	.css_alloc	= cpu_cgroup_css_alloc,
- 	.css_released	= cpu_cgroup_css_released,
-@@ -8458,7 +8591,15 @@ struct cgroup_subsys cpu_cgrp_subsys = {
- 	.can_attach	= cpu_cgroup_can_attach,
- 	.attach		= cpu_cgroup_attach,
- 	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
- 	.early_init	= true,
-+#ifdef CONFIG_CGROUP_CPUACCT
-+	/*
-+	 * cpuacct is enabled together with cpu on the unified hierarchy
-+	 * and its stats are reported through "cpu.stat".
-+	 */
-+	.depends_on	= 1 << cpuacct_cgrp_id,
-+#endif
- };
- 
- #endif	/* CONFIG_CGROUP_SCHED */
-diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
-index b99030a..a1a5a4b 100644
---- a/kernel/sched/cpuacct.c
-+++ b/kernel/sched/cpuacct.c
-@@ -227,6 +227,30 @@ static struct cftype files[] = {
- 	{ }	/* terminate */
- };
- 
-+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */
-+void cpuacct_cpu_stats_show(struct seq_file *sf)
-+{
-+	struct cgroup_subsys_state *css;
-+	u64 usage, user, sys;
-+
-+	css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys);
-+
-+	usage = cpuusage_read(css, seq_cft(sf));
-+	cpuacct_stats_read(css_ca(css), &user, &sys);
-+
-+	user *= TICK_NSEC;
-+	sys *= TICK_NSEC;
-+	do_div(usage, NSEC_PER_USEC);
-+	do_div(user, NSEC_PER_USEC);
-+	do_div(sys, NSEC_PER_USEC);
-+
-+	seq_printf(sf, "usage_usec %llu\n"
-+		   "user_usec %llu\n"
-+		   "system_usec %llu\n", usage, user, sys);
-+
-+	css_put(css);
-+}
-+
- /*
-  * charge this task's execution time to its accounting group.
-  *
-diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
-index ba72807..ddf7af4 100644
---- a/kernel/sched/cpuacct.h
-+++ b/kernel/sched/cpuacct.h
-@@ -2,6 +2,7 @@
- 
- extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
- extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
-+extern void cpuacct_cpu_stats_show(struct seq_file *sf);
- 
- #else
- 
-@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
- {
- }
- 
-+static inline void cpuacct_cpu_stats_show(struct seq_file *sf)
-+{
-+}
-+
- #endif
diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch
deleted file mode 100644
index 74dae740e37c..000000000000
--- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.7.patch
+++ /dev/null
@@ -1,407 +0,0 @@
-commit 0d966df508ef4d6c0b1baae9e369f4fb0d3e10af
-Author: Tejun Heo <tj@kernel.org>
-Date:   Fri Mar 11 07:31:23 2016 -0500
-
-    sched: Misc preps for cgroup unified hierarchy interface
-    
-    Make the following changes in preparation for the cpu controller
-    interface implementation for the unified hierarchy.  This patch
-    doesn't cause any functional differences.
-    
-    * s/cpu_stats_show()/cpu_cfs_stats_show()/
-    
-    * s/cpu_files/cpu_legacy_files/
-    
-    * Separate out cpuacct_stats_read() from cpuacct_stats_show().  While
-      at it, remove pointless cpuacct_stat_desc[] array.
-    
-    Signed-off-by: Tejun Heo <tj@kernel.org>
-    Cc: Ingo Molnar <mingo@redhat.com>
-    Cc: Peter Zijlstra <peterz@infradead.org>
-    Cc: Li Zefan <lizefan@huawei.com>
-    Cc: Johannes Weiner <hannes@cmpxchg.org>
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 97ee9ac..c148dfe 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -8482,7 +8482,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
- 	return ret;
- }
- 
--static int cpu_stats_show(struct seq_file *sf, void *v)
-+static int cpu_cfs_stats_show(struct seq_file *sf, void *v)
- {
- 	struct task_group *tg = css_tg(seq_css(sf));
- 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
-@@ -8522,7 +8522,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
- }
- #endif /* CONFIG_RT_GROUP_SCHED */
- 
--static struct cftype cpu_files[] = {
-+static struct cftype cpu_legacy_files[] = {
- #ifdef CONFIG_FAIR_GROUP_SCHED
- 	{
- 		.name = "shares",
-@@ -8543,7 +8543,7 @@ static struct cftype cpu_files[] = {
- 	},
- 	{
- 		.name = "stat",
--		.seq_show = cpu_stats_show,
-+		.seq_show = cpu_cfs_stats_show,
- 	},
- #endif
- #ifdef CONFIG_RT_GROUP_SCHED
-@@ -8568,7 +8568,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
- 	.fork		= cpu_cgroup_fork,
- 	.can_attach	= cpu_cgroup_can_attach,
- 	.attach		= cpu_cgroup_attach,
--	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
- 	.early_init	= true,
- };
- 
-diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
-index 41f85c4..3eb9eda 100644
---- a/kernel/sched/cpuacct.c
-+++ b/kernel/sched/cpuacct.c
-@@ -242,36 +242,33 @@ static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
- 	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
- }
- 
--static const char * const cpuacct_stat_desc[] = {
--	[CPUACCT_STAT_USER] = "user",
--	[CPUACCT_STAT_SYSTEM] = "system",
--};
--
--static int cpuacct_stats_show(struct seq_file *sf, void *v)
-+static void cpuacct_stats_read(struct cpuacct *ca, u64 *userp, u64 *sysp)
- {
--	struct cpuacct *ca = css_ca(seq_css(sf));
- 	int cpu;
--	s64 val = 0;
- 
-+	*userp = 0;
- 	for_each_possible_cpu(cpu) {
- 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
--		val += kcpustat->cpustat[CPUTIME_USER];
--		val += kcpustat->cpustat[CPUTIME_NICE];
-+		*userp += kcpustat->cpustat[CPUTIME_USER];
-+		*userp += kcpustat->cpustat[CPUTIME_NICE];
- 	}
--	val = cputime64_to_clock_t(val);
--	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
- 
--	val = 0;
-+	*sysp = 0;
- 	for_each_possible_cpu(cpu) {
- 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
--		val += kcpustat->cpustat[CPUTIME_SYSTEM];
--		val += kcpustat->cpustat[CPUTIME_IRQ];
--		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
-+		*sysp += kcpustat->cpustat[CPUTIME_SYSTEM];
-+		*sysp += kcpustat->cpustat[CPUTIME_IRQ];
-+		*sysp += kcpustat->cpustat[CPUTIME_SOFTIRQ];
- 	}
-+}
- 
--	val = cputime64_to_clock_t(val);
--	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
-+static int cpuacct_stats_show(struct seq_file *sf, void *v)
-+{
-+	cputime64_t user, sys;
- 
-+	cpuacct_stats_read(css_ca(seq_css(sf)), &user, &sys);
-+	seq_printf(sf, "user %lld\n", cputime64_to_clock_t(user));
-+	seq_printf(sf, "system %lld\n", cputime64_to_clock_t(sys));
- 	return 0;
- }
- 
-
-commit ed6d93036ec930cb774da10b7c87f67905ce71f1
-Author: Tejun Heo <tj@kernel.org>
-Date:   Fri Mar 11 07:31:23 2016 -0500
-
-    sched: Implement interface for cgroup unified hierarchy
-    
-    While the cpu controller doesn't have any functional problems, there
-    are a couple interface issues which can be addressed in the v2
-    interface.
-    
-    * cpuacct being a separate controller.  This separation is artificial
-      and rather pointless as demonstrated by most use cases co-mounting
-      the two controllers.  It also forces certain information to be
-      accounted twice.
-    
-    * Use of different time units.  Writable control knobs use
-      microseconds, some stat fields use nanoseconds while other cpuacct
-      stat fields use centiseconds.
-    
-    * Control knobs which can't be used in the root cgroup still show up
-      in the root.
-    
-    * Control knob names and semantics aren't consistent with other
-      controllers.
-    
-    This patchset implements cpu controller's interface on the unified
-    hierarchy which adheres to the controller file conventions described
-    in Documentation/cgroups/unified-hierarchy.txt.  Overall, the
-    following changes are made.
-    
-    * cpuacct is implictly enabled and disabled by cpu and its information
-      is reported through "cpu.stat" which now uses microseconds for all
-      time durations.  All time duration fields now have "_usec" appended
-      to them for clarity.  While this doesn't solve the double accounting
-      immediately, once majority of users switch to v2, cpu can directly
-      account and report the relevant stats and cpuacct can be disabled on
-      the unified hierarchy.
-    
-      Note that cpuacct.usage_percpu is currently not included in
-      "cpu.stat".  If this information is actually called for, it can be
-      added later.
-    
-    * "cpu.shares" is replaced with "cpu.weight" and operates on the
-      standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000).
-      The weight is scaled to scheduler weight so that 100 maps to 1024
-      and the ratio relationship is preserved - if weight is W and its
-      scaled value is S, W / 100 == S / 1024.  While the mapped range is a
-      bit smaller than the orignal scheduler weight range, the dead zones
-      on both sides are relatively small and covers wider range than the
-      nice value mappings.  This file doesn't make sense in the root
-      cgroup and isn't create on root.
-    
-    * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max"
-      which contains both quota and period.
-    
-    * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by
-      "cpu.rt.max" which contains both runtime and period.
-    
-    v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for
-        CFS bandwidth stats and also using raw division for u64.  Use
-        CONFIG_CFS_BANDWITH and do_div() instead.
-    
-        The semantics of "cpu.rt.max" is not fully decided yet.  Dropped
-        for now.
-    
-    Signed-off-by: Tejun Heo <tj@kernel.org>
-    Cc: Ingo Molnar <mingo@redhat.com>
-    Cc: Peter Zijlstra <peterz@infradead.org>
-    Cc: Li Zefan <lizefan@huawei.com>
-    Cc: Johannes Weiner <hannes@cmpxchg.org>
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index c148dfe..7bba2c5 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -8561,6 +8561,139 @@ static struct cftype cpu_legacy_files[] = {
- 	{ }	/* terminate */
- };
- 
-+static int cpu_stats_show(struct seq_file *sf, void *v)
-+{
-+	cpuacct_cpu_stats_show(sf);
-+
-+#ifdef CONFIG_CFS_BANDWIDTH
-+	{
-+		struct task_group *tg = css_tg(seq_css(sf));
-+		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
-+		u64 throttled_usec;
-+
-+		throttled_usec = cfs_b->throttled_time;
-+		do_div(throttled_usec, NSEC_PER_USEC);
-+
-+		seq_printf(sf, "nr_periods %d\n"
-+			   "nr_throttled %d\n"
-+			   "throttled_usec %llu\n",
-+			   cfs_b->nr_periods, cfs_b->nr_throttled,
-+			   throttled_usec);
-+	}
-+#endif
-+	return 0;
-+}
-+
-+#ifdef CONFIG_FAIR_GROUP_SCHED
-+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
-+			       struct cftype *cft)
-+{
-+	struct task_group *tg = css_tg(css);
-+	u64 weight = scale_load_down(tg->shares);
-+
-+	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
-+}
-+
-+static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
-+				struct cftype *cftype, u64 weight)
-+{
-+	/*
-+	 * cgroup weight knobs should use the common MIN, DFL and MAX
-+	 * values which are 1, 100 and 10000 respectively.  While it loses
-+	 * a bit of range on both ends, it maps pretty well onto the shares
-+	 * value used by scheduler and the round-trip conversions preserve
-+	 * the original value over the entire range.
-+	 */
-+	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
-+		return -ERANGE;
-+
-+	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
-+
-+	return sched_group_set_shares(css_tg(css), scale_load(weight));
-+}
-+#endif
-+
-+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
-+						  long period, long quota)
-+{
-+	if (quota < 0)
-+		seq_puts(sf, "max");
-+	else
-+		seq_printf(sf, "%ld", quota);
-+
-+	seq_printf(sf, " %ld\n", period);
-+}
-+
-+/* caller should put the current value in *@periodp before calling */
-+static int __maybe_unused cpu_period_quota_parse(char *buf,
-+						 u64 *periodp, u64 *quotap)
-+{
-+	char tok[21];	/* U64_MAX */
-+
-+	if (!sscanf(buf, "%s %llu", tok, periodp))
-+		return -EINVAL;
-+
-+	*periodp *= NSEC_PER_USEC;
-+
-+	if (sscanf(tok, "%llu", quotap))
-+		*quotap *= NSEC_PER_USEC;
-+	else if (!strcmp(tok, "max"))
-+		*quotap = RUNTIME_INF;
-+	else
-+		return -EINVAL;
-+
-+	return 0;
-+}
-+
-+#ifdef CONFIG_CFS_BANDWIDTH
-+static int cpu_max_show(struct seq_file *sf, void *v)
-+{
-+	struct task_group *tg = css_tg(seq_css(sf));
-+
-+	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
-+	return 0;
-+}
-+
-+static ssize_t cpu_max_write(struct kernfs_open_file *of,
-+			     char *buf, size_t nbytes, loff_t off)
-+{
-+	struct task_group *tg = css_tg(of_css(of));
-+	u64 period = tg_get_cfs_period(tg);
-+	u64 quota;
-+	int ret;
-+
-+	ret = cpu_period_quota_parse(buf, &period, &quota);
-+	if (!ret)
-+		ret = tg_set_cfs_bandwidth(tg, period, quota);
-+	return ret ?: nbytes;
-+}
-+#endif
-+
-+static struct cftype cpu_files[] = {
-+	{
-+		.name = "stat",
-+		.flags = CFTYPE_NOT_ON_ROOT,
-+		.seq_show = cpu_stats_show,
-+	},
-+#ifdef CONFIG_FAIR_GROUP_SCHED
-+	{
-+		.name = "weight",
-+		.flags = CFTYPE_NOT_ON_ROOT,
-+		.read_u64 = cpu_weight_read_u64,
-+		.write_u64 = cpu_weight_write_u64,
-+	},
-+#endif
-+#ifdef CONFIG_CFS_BANDWIDTH
-+	{
-+		.name = "max",
-+		.flags = CFTYPE_NOT_ON_ROOT,
-+		.seq_show = cpu_max_show,
-+		.write = cpu_max_write,
-+	},
-+#endif
-+	{ }	/* terminate */
-+};
-+
- struct cgroup_subsys cpu_cgrp_subsys = {
- 	.css_alloc	= cpu_cgroup_css_alloc,
- 	.css_released	= cpu_cgroup_css_released,
-@@ -8569,7 +8702,15 @@ struct cgroup_subsys cpu_cgrp_subsys = {
- 	.can_attach	= cpu_cgroup_can_attach,
- 	.attach		= cpu_cgroup_attach,
- 	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
- 	.early_init	= true,
-+#ifdef CONFIG_CGROUP_CPUACCT
-+	/*
-+	 * cpuacct is enabled together with cpu on the unified hierarchy
-+	 * and its stats are reported through "cpu.stat".
-+	 */
-+	.depends_on	= 1 << cpuacct_cgrp_id,
-+#endif
- };
- 
- #endif	/* CONFIG_CGROUP_SCHED */
-diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
-index 3eb9eda..7a02d26 100644
---- a/kernel/sched/cpuacct.c
-+++ b/kernel/sched/cpuacct.c
-@@ -305,6 +305,30 @@ static struct cftype files[] = {
- 	{ }	/* terminate */
- };
- 
-+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */
-+void cpuacct_cpu_stats_show(struct seq_file *sf)
-+{
-+	struct cgroup_subsys_state *css;
-+	u64 usage, user, sys;
-+
-+	css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys);
-+
-+	usage = cpuusage_read(css, seq_cft(sf));
-+	cpuacct_stats_read(css_ca(css), &user, &sys);
-+
-+	user *= TICK_NSEC;
-+	sys *= TICK_NSEC;
-+	do_div(usage, NSEC_PER_USEC);
-+	do_div(user, NSEC_PER_USEC);
-+	do_div(sys, NSEC_PER_USEC);
-+
-+	seq_printf(sf, "usage_usec %llu\n"
-+		   "user_usec %llu\n"
-+		   "system_usec %llu\n", usage, user, sys);
-+
-+	css_put(css);
-+}
-+
- /*
-  * charge this task's execution time to its accounting group.
-  *
-diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
-index ba72807..ddf7af4 100644
---- a/kernel/sched/cpuacct.h
-+++ b/kernel/sched/cpuacct.h
-@@ -2,6 +2,7 @@
- 
- extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
- extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
-+extern void cpuacct_cpu_stats_show(struct seq_file *sf);
- 
- #else
- 
-@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
- {
- }
- 
-+static inline void cpuacct_cpu_stats_show(struct seq_file *sf)
-+{
-+}
-+
- #endif
diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch
new file mode 100644
index 000000000000..6f0904cbce99
--- /dev/null
+++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch
@@ -0,0 +1,784 @@
+commit 280858b0bb3384b9ec06b455e196b453888bd6b8
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Misc preps for cgroup unified hierarchy interface
+    
+    Make the following changes in preparation for the cpu controller
+    interface implementation for the unified hierarchy.  This patch
+    doesn't cause any functional differences.
+    
+    * s/cpu_stats_show()/cpu_cfs_stats_show()/
+    
+    * s/cpu_files/cpu_legacy_files/
+    
+    * Separate out cpuacct_stats_read() from cpuacct_stats_show().  While
+      at it, make the @val array u64 for consistency.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 154fd689fe02..57472485b79c 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8705,7 +8705,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
+ 	return ret;
+ }
+ 
+-static int cpu_stats_show(struct seq_file *sf, void *v)
++static int cpu_cfs_stats_show(struct seq_file *sf, void *v)
+ {
+ 	struct task_group *tg = css_tg(seq_css(sf));
+ 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+@@ -8745,7 +8745,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+ }
+ #endif /* CONFIG_RT_GROUP_SCHED */
+ 
+-static struct cftype cpu_files[] = {
++static struct cftype cpu_legacy_files[] = {
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	{
+ 		.name = "shares",
+@@ -8766,7 +8766,7 @@ static struct cftype cpu_files[] = {
+ 	},
+ 	{
+ 		.name = "stat",
+-		.seq_show = cpu_stats_show,
++		.seq_show = cpu_cfs_stats_show,
+ 	},
+ #endif
+ #ifdef CONFIG_RT_GROUP_SCHED
+@@ -8791,7 +8791,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.fork		= cpu_cgroup_fork,
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+-	.legacy_cftypes	= cpu_files,
++	.legacy_cftypes	= cpu_legacy_files,
+ 	.early_init	= true,
+ };
+ 
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index bc0b309c3f19..d1e5dd0b3a64 100644
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -276,26 +276,33 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V)
+ 	return 0;
+ }
+ 
+-static int cpuacct_stats_show(struct seq_file *sf, void *v)
++static void cpuacct_stats_read(struct cpuacct *ca,
++			       u64 (*val)[CPUACCT_STAT_NSTATS])
+ {
+-	struct cpuacct *ca = css_ca(seq_css(sf));
+-	s64 val[CPUACCT_STAT_NSTATS];
+ 	int cpu;
+-	int stat;
+ 
+-	memset(val, 0, sizeof(val));
++	memset(val, 0, sizeof(*val));
++
+ 	for_each_possible_cpu(cpu) {
+ 		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
+ 
+-		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
+-		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
+-		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
+-		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
+-		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
++		(*val)[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
++		(*val)[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
++		(*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
++		(*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
++		(*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
+ 	}
++}
++
++static int cpuacct_stats_show(struct seq_file *sf, void *v)
++{
++	u64 val[CPUACCT_STAT_NSTATS];
++	int stat;
++
++	cpuacct_stats_read(css_ca(seq_css(sf)), &val);
+ 
+ 	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
+-		seq_printf(sf, "%s %lld\n",
++		seq_printf(sf, "%s %llu\n",
+ 			   cpuacct_stat_desc[stat],
+ 			   cputime64_to_clock_t(val[stat]));
+ 	}
+
+commit 015cbdcb90034fd566d00de9d3d405613da3cd26
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Mar 11 07:31:23 2016 -0500
+
+    sched: Implement interface for cgroup unified hierarchy
+    
+    While the cpu controller doesn't have any functional problems, there
+    are a couple interface issues which can be addressed in the v2
+    interface.
+    
+    * cpuacct being a separate controller.  This separation is artificial
+      and rather pointless as demonstrated by most use cases co-mounting
+      the two controllers.  It also forces certain information to be
+      accounted twice.
+    
+    * Use of different time units.  Writable control knobs use
+      microseconds, some stat fields use nanoseconds while other cpuacct
+      stat fields use centiseconds.
+    
+    * Control knobs which can't be used in the root cgroup still show up
+      in the root.
+    
+    * Control knob names and semantics aren't consistent with other
+      controllers.
+    
+    This patchset implements cpu controller's interface on the unified
+    hierarchy which adheres to the controller file conventions described
+    in Documentation/cgroups/unified-hierarchy.txt.  Overall, the
+    following changes are made.
+    
+    * cpuacct is implictly enabled and disabled by cpu and its information
+      is reported through "cpu.stat" which now uses microseconds for all
+      time durations.  All time duration fields now have "_usec" appended
+      to them for clarity.  While this doesn't solve the double accounting
+      immediately, once majority of users switch to v2, cpu can directly
+      account and report the relevant stats and cpuacct can be disabled on
+      the unified hierarchy.
+    
+      Note that cpuacct.usage_percpu is currently not included in
+      "cpu.stat".  If this information is actually called for, it can be
+      added later.
+    
+    * "cpu.shares" is replaced with "cpu.weight" and operates on the
+      standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000).
+      The weight is scaled to scheduler weight so that 100 maps to 1024
+      and the ratio relationship is preserved - if weight is W and its
+      scaled value is S, W / 100 == S / 1024.  While the mapped range is a
+      bit smaller than the orignal scheduler weight range, the dead zones
+      on both sides are relatively small and covers wider range than the
+      nice value mappings.  This file doesn't make sense in the root
+      cgroup and isn't create on root.
+    
+    * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max"
+      which contains both quota and period.
+    
+    * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by
+      "cpu.rt.max" which contains both runtime and period.
+    
+    v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for
+        CFS bandwidth stats and also using raw division for u64.  Use
+        CONFIG_CFS_BANDWITH and do_div() instead.
+    
+        The semantics of "cpu.rt.max" is not fully decided yet.  Dropped
+        for now.
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+    Cc: Ingo Molnar <mingo@redhat.com>
+    Cc: Peter Zijlstra <peterz@infradead.org>
+    Cc: Li Zefan <lizefan@huawei.com>
+    Cc: Johannes Weiner <hannes@cmpxchg.org>
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 57472485b79c..c0ae869f51c4 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8784,6 +8784,139 @@ static struct cftype cpu_legacy_files[] = {
+ 	{ }	/* terminate */
+ };
+ 
++static int cpu_stats_show(struct seq_file *sf, void *v)
++{
++	cpuacct_cpu_stats_show(sf);
++
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		struct task_group *tg = css_tg(seq_css(sf));
++		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
++		u64 throttled_usec;
++
++		throttled_usec = cfs_b->throttled_time;
++		do_div(throttled_usec, NSEC_PER_USEC);
++
++		seq_printf(sf, "nr_periods %d\n"
++			   "nr_throttled %d\n"
++			   "throttled_usec %llu\n",
++			   cfs_b->nr_periods, cfs_b->nr_throttled,
++			   throttled_usec);
++	}
++#endif
++	return 0;
++}
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
++			       struct cftype *cft)
++{
++	struct task_group *tg = css_tg(css);
++	u64 weight = scale_load_down(tg->shares);
++
++	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
++}
++
++static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
++				struct cftype *cftype, u64 weight)
++{
++	/*
++	 * cgroup weight knobs should use the common MIN, DFL and MAX
++	 * values which are 1, 100 and 10000 respectively.  While it loses
++	 * a bit of range on both ends, it maps pretty well onto the shares
++	 * value used by scheduler and the round-trip conversions preserve
++	 * the original value over the entire range.
++	 */
++	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
++		return -ERANGE;
++
++	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
++
++	return sched_group_set_shares(css_tg(css), scale_load(weight));
++}
++#endif
++
++static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
++						  long period, long quota)
++{
++	if (quota < 0)
++		seq_puts(sf, "max");
++	else
++		seq_printf(sf, "%ld", quota);
++
++	seq_printf(sf, " %ld\n", period);
++}
++
++/* caller should put the current value in *@periodp before calling */
++static int __maybe_unused cpu_period_quota_parse(char *buf,
++						 u64 *periodp, u64 *quotap)
++{
++	char tok[21];	/* U64_MAX */
++
++	if (!sscanf(buf, "%s %llu", tok, periodp))
++		return -EINVAL;
++
++	*periodp *= NSEC_PER_USEC;
++
++	if (sscanf(tok, "%llu", quotap))
++		*quotap *= NSEC_PER_USEC;
++	else if (!strcmp(tok, "max"))
++		*quotap = RUNTIME_INF;
++	else
++		return -EINVAL;
++
++	return 0;
++}
++
++#ifdef CONFIG_CFS_BANDWIDTH
++static int cpu_max_show(struct seq_file *sf, void *v)
++{
++	struct task_group *tg = css_tg(seq_css(sf));
++
++	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
++	return 0;
++}
++
++static ssize_t cpu_max_write(struct kernfs_open_file *of,
++			     char *buf, size_t nbytes, loff_t off)
++{
++	struct task_group *tg = css_tg(of_css(of));
++	u64 period = tg_get_cfs_period(tg);
++	u64 quota;
++	int ret;
++
++	ret = cpu_period_quota_parse(buf, &period, &quota);
++	if (!ret)
++		ret = tg_set_cfs_bandwidth(tg, period, quota);
++	return ret ?: nbytes;
++}
++#endif
++
++static struct cftype cpu_files[] = {
++	{
++		.name = "stat",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = cpu_stats_show,
++	},
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	{
++		.name = "weight",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.read_u64 = cpu_weight_read_u64,
++		.write_u64 = cpu_weight_write_u64,
++	},
++#endif
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		.name = "max",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = cpu_max_show,
++		.write = cpu_max_write,
++	},
++#endif
++	{ }	/* terminate */
++};
++
+ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.css_alloc	= cpu_cgroup_css_alloc,
+ 	.css_released	= cpu_cgroup_css_released,
+@@ -8792,7 +8925,15 @@ struct cgroup_subsys cpu_cgrp_subsys = {
+ 	.can_attach	= cpu_cgroup_can_attach,
+ 	.attach		= cpu_cgroup_attach,
+ 	.legacy_cftypes	= cpu_legacy_files,
++	.dfl_cftypes	= cpu_files,
+ 	.early_init	= true,
++#ifdef CONFIG_CGROUP_CPUACCT
++	/*
++	 * cpuacct is enabled together with cpu on the unified hierarchy
++	 * and its stats are reported through "cpu.stat".
++	 */
++	.depends_on	= 1 << cpuacct_cgrp_id,
++#endif
+ };
+ 
+ #endif	/* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index d1e5dd0b3a64..57f390514c39 100644
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -347,6 +347,31 @@ static struct cftype files[] = {
+ 	{ }	/* terminate */
+ };
+ 
++/* used to print cpuacct stats in cpu.stat on the unified hierarchy */
++void cpuacct_cpu_stats_show(struct seq_file *sf)
++{
++	struct cgroup_subsys_state *css;
++	u64 usage, val[CPUACCT_STAT_NSTATS];
++
++	css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys);
++
++	usage = cpuusage_read(css, seq_cft(sf));
++	cpuacct_stats_read(css_ca(css), &val);
++
++	val[CPUACCT_STAT_USER] *= TICK_NSEC;
++	val[CPUACCT_STAT_SYSTEM] *= TICK_NSEC;
++	do_div(usage, NSEC_PER_USEC);
++	do_div(val[CPUACCT_STAT_USER], NSEC_PER_USEC);
++	do_div(val[CPUACCT_STAT_SYSTEM], NSEC_PER_USEC);
++
++	seq_printf(sf, "usage_usec %llu\n"
++		   "user_usec %llu\n"
++		   "system_usec %llu\n",
++		   usage, val[CPUACCT_STAT_USER], val[CPUACCT_STAT_SYSTEM]);
++
++	css_put(css);
++}
++
+ /*
+  * charge this task's execution time to its accounting group.
+  *
+diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
+index ba72807c73d4..ddf7af466d35 100644
+--- a/kernel/sched/cpuacct.h
++++ b/kernel/sched/cpuacct.h
+@@ -2,6 +2,7 @@
+ 
+ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+ extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
++extern void cpuacct_cpu_stats_show(struct seq_file *sf);
+ 
+ #else
+ 
+@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
+ {
+ }
+ 
++static inline void cpuacct_cpu_stats_show(struct seq_file *sf)
++{
++}
++
+ #endif
+
+commit 5019fe3d7ec456b58d451ef06fe1f81d7d9f28a9
+Author: Tejun Heo <tj@kernel.org>
+Date:   Fri Aug 5 12:41:01 2016 -0400
+
+    cgroup: add documentation regarding CPU controller cgroup v2 support
+    
+    Signed-off-by: Tejun Heo <tj@kernel.org>
+
+diff --git a/Documentation/cgroup-v2-cpu.txt b/Documentation/cgroup-v2-cpu.txt
+new file mode 100644
+index 000000000000..1ed7032d4472
+--- /dev/null
++++ b/Documentation/cgroup-v2-cpu.txt
+@@ -0,0 +1,368 @@
++
++
++CPU Controller on Control Group v2
++
++August, 2016		Tejun Heo <tj@kernel.org>
++
++
++While most controllers have support for cgroup v2 now, the CPU
++controller support is not upstream yet due to objections from the
++scheduler maintainers on the basic designs of cgroup v2.  This
++document explains the current situation as well as an interim
++solution, and details the disagreements and arguments.  The latest
++version of this document can be found at the following URL.
++
++ https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu
++
++This document was posted to the linux-kernel and cgroup mailing lists.
++Unfortunately, no consensus was reached as of Oct, 2016.  The thread
++can be found at the following URL.
++
++ http://lkml.kernel.org/r/20160805170752.GK2542@mtj.duckdns.org
++
++
++CONTENTS
++
++1. Current Situation and Interim Solution
++2. Disagreements and Arguments
++  2-1. Contentious Restrictions
++    2-1-1. Process Granularity
++    2-1-2. No Internal Process Constraint
++  2-2. Impact on CPU Controller
++    2-2-1. Impact of Process Granularity
++    2-2-2. Impact of No Internal Process Constraint
++  2-3. Arguments for cgroup v2
++3. Way Forward
++4. References
++
++
++1. Current Situation and Interim Solution
++
++All objections from the scheduler maintainers apply to cgroup v2 core
++design, and there are no known objections to the specifics of the CPU
++controller cgroup v2 interface.  The only blocked part is changes to
++expose the CPU controller interface on cgroup v2, which comprises the
++following two patches:
++
++ [1] sched: Misc preps for cgroup unified hierarchy interface
++ [2] sched: Implement interface for cgroup unified hierarchy
++
++The necessary changes are superficial and implement the interface
++files on cgroup v2.  The combined diffstat is as follows.
++
++ kernel/sched/core.c    |  149 +++++++++++++++++++++++++++++++++++++++++++++++--
++ kernel/sched/cpuacct.c |   57 ++++++++++++------
++ kernel/sched/cpuacct.h |    5 +
++ 3 files changed, 189 insertions(+), 22 deletions(-)
++
++The patches are easy to apply and forward-port.  The following git
++branch will always carry the two patches on top of the latest release
++of the upstream kernel.
++
++ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu
++
++There also are versioned branches going back to v4.4.
++
++ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu-$KERNEL_VER
++
++While it's difficult to tell whether the CPU controller support will
++be merged, there are crucial resource control features in cgroup v2
++that are only possible due to the design choices that are being
++objected to, and every effort will be made to ease enabling the CPU
++controller cgroup v2 support out-of-tree for parties which choose to.
++
++
++2. Disagreements and Arguments
++
++There have been several lengthy discussion threads [3][4] on LKML
++around the structural constraints of cgroup v2.  The two that affect
++the CPU controller are process granularity and no internal process
++constraint.  Both arise primarily from the need for common resource
++domain definition across different resources.
++
++The common resource domain is a powerful concept in cgroup v2 that
++allows controllers to make basic assumptions about the structural
++organization of processes and controllers inside the cgroup hierarchy,
++and thus solve problems spanning multiple types of resources.  The
++prime example for this is page cache writeback: dirty page cache is
++regulated through throttling buffered writers based on memory
++availability, and initiating batched write outs to the disk based on
++IO capacity.  Tracking and controlling writeback inside a cgroup thus
++requires the direct cooperation of the memory and the IO controller.
++
++This easily extends to other areas, such as CPU cycles consumed while
++performing memory reclaim or IO encryption.
++
++
++2-1. Contentious Restrictions
++
++For controllers of different resources to work together, they must
++agree on a common organization.  This uniform model across controllers
++imposes two contentious restrictions on the CPU controller: process
++granularity and the no-internal-process constraint.
++
++
++  2-1-1. Process Granularity
++
++  For memory, because an address space is shared between all threads
++  of a process, the terminal consumer is a process, not a thread.
++  Separating the threads of a single process into different memory
++  control domains doesn't make semantical sense.  cgroup v2 ensures
++  that all controller can agree on the same organization by requiring
++  that threads of the same process belong to the same cgroup.
++
++  There are other reasons to enforce process granularity.  One
++  important one is isolating system-level management operations from
++  in-process application operations.  The cgroup interface, being a
++  virtual filesystem, is very unfit for multiple independent
++  operations taking place at the same time as most operations have to
++  be multi-step and there is no way to synchronize multiple accessors.
++  See also [5] Documentation/cgroup-v2.txt, "R-2. Thread Granularity"
++
++
++  2-1-2. No Internal Process Constraint
++
++  cgroup v2 does not allow processes to belong to any cgroup which has
++  child cgroups when resource controllers are enabled on it (the
++  notable exception being the root cgroup itself).  This is because,
++  for some resources, a resource domain (cgroup) is not directly
++  comparable to the terminal consumer (process/task) of said resource,
++  and so putting the two into a sibling relationship isn't meaningful.
++
++  - Differing Control Parameters and Capabilities
++
++    A cgroup controller has different resource control parameters and
++    capabilities from a terminal consumer, be that a task or process.
++    There are a couple cases where a cgroup control knob can be mapped
++    to a per-task or per-process API but they are exceptions and the
++    mappings aren't obvious even in those cases.
++
++    For example, task priorities (also known as nice values) set
++    through setpriority(2) are mapped to the CPU controller
++    "cpu.shares" values.  However, how exactly the two ranges map and
++    even the fact that they map to each other at all are not obvious.
++
++    The situation gets further muddled when considering other resource
++    types and control knobs.  IO priorities set through ioprio_set(2)
++    cannot be mapped to IO controller weights and most cgroup resource
++    control knobs including the bandwidth control knobs of the CPU
++    controller don't have counterparts in the terminal consumers.
++
++  - Anonymous Resource Consumption
++
++    For CPU, every time slice consumed from inside a cgroup, which
++    comprises most but not all of consumed CPU time for the cgroup,
++    can be clearly attributed to a specific task or process.  Because
++    these two types of entities are directly comparable as consumers
++    of CPU time, it's theoretically possible to mix tasks and cgroups
++    on the same tree levels and let them directly compete for the time
++    quota available to their common ancestor.
++
++    However, the same can't be said for resource types like memory or
++    IO: the memory consumed by the page cache, for example, can be
++    tracked on a per-cgroup level, but due to mismatches in lifetimes
++    of involved objects (page cache can persist long after processes
++    are gone), shared usages and the implementation overhead of
++    tracking persistent state, it can no longer be attributed to
++    individual processes after instantiation.  Consequently, any IO
++    incurred by page cache writeback can be attributed to a cgroup,
++    but not to the individual consumers inside the cgroup.
++
++  For memory and IO, this makes a resource domain (cgroup) an object
++  of a fundamentally different type than a terminal consumer
++  (process).  A process can't be a first class object in the resource
++  distribution graph as its total resource consumption can't be
++  described without the containing resource domain.
++
++  Disallowing processes in internal cgroups avoids competition between
++  cgroups and processes which cannot be meaningfully defined for these
++  resources.  All resource control takes place among cgroups and a
++  terminal consumer interacts with the containing cgroup the same way
++  it would with the system without cgroup.
++
++  Root cgroup is exempt from this constraint, which is in line with
++  how root cgroup is handled in general - it's excluded from cgroup
++  resource accounting and control.
++
++
++Enforcing process granularity and no internal process constraint
++allows all controllers to be on the same footing in terms of resource
++distribution hierarchy.
++
++
++2-2. Impact on CPU Controller
++
++As indicated earlier, the CPU controller's resource distribution graph
++is the simplest.  Every schedulable resource consumption can be
++attributed to a specific task.  In addition, for weight based control,
++the per-task priority set through setpriority(2) can be translated to
++and from a per-cgroup weight.  As such, the CPU controller can treat a
++task and a cgroup symmetrically, allowing support for any tree layout
++of cgroups and tasks.  Both process granularity and the no internal
++process constraint restrict how the CPU controller can be used.
++
++
++  2-2-1. Impact of Process Granularity
++
++  Process granularity prevents tasks belonging to the same process to
++  be assigned to different cgroups.  It was pointed out [6] that this
++  excludes the valid use case of hierarchical CPU distribution within
++  processes.
++
++  To address this issue, the rgroup (resource group) [7][8][9]
++  interface, an extension of the existing setpriority(2) API, was
++  proposed, which is in line with other programmable priority
++  mechanisms and eliminates the risk of in-application configuration
++  and system configuration stepping on each other's toes.
++  Unfortunately, the proposal quickly turned into discussions around
++  cgroup v2 design decisions [4] and no consensus could be reached.
++
++
++  2-2-2. Impact of No Internal Process Constraint
++
++  The no internal process constraint disallows tasks from competing
++  directly against cgroups.  Here is an excerpt from Peter Zijlstra
++  pointing out the issue [10] - R, L and A are cgroups; t1, t2, t3 and
++  t4 are tasks:
++
++
++          R
++        / | \
++       t1 t2 A
++           /   \
++          t3   t4
++
++
++    Is fundamentally different from:
++
++
++               R
++             /   \
++           L       A
++         /   \   /   \
++        t1  t2  t3   t4
++
++
++    Because if in the first hierarchy you add a task (t5) to R, all of
++    its A will run at 1/4th of total bandwidth where before it had
++    1/3rd, whereas with the second example, if you add our t5 to L, A
++    doesn't get any less bandwidth.
++
++
++  It is true that the trees are semantically different from each other
++  and the symmetric handling of tasks and cgroups is aesthetically
++  pleasing.  However, it isn't clear what the practical usefulness of
++  a layout with direct competition between tasks and cgroups would be,
++  considering that number and behavior of tasks are controlled by each
++  application, and cgroups primarily deal with system level resource
++  distribution; changes in the number of active threads would directly
++  impact resource distribution.  Real world use cases of such layouts
++  could not be established during the discussions.
++
++
++2-3. Arguments for cgroup v2
++
++There are strong demands for comprehensive hierarchical resource
++control across all major resources, and establishing a common resource
++hierarchy is an essential step.  As with most engineering decisions,
++common resource hierarchy definition comes with its trade-offs.  With
++cgroup v2, the trade-offs are in the form of structural constraints
++which, among others, restrict the CPU controller's space of possible
++configurations.
++
++However, even with the restrictions, cgroup v2, in combination with
++rgroup, covers most of identified real world use cases while enabling
++new important use cases of resource control across multiple resource
++types that were fundamentally broken previously.
++
++Furthermore, for resource control, treating resource domains as
++objects of a different type from terminal consumers has important
++advantages - it can account for resource consumptions which are not
++tied to any specific terminal consumer, be that a task or process, and
++allows decoupling resource distribution controls from in-application
++APIs.  Even the CPU controller may benefit from it as the kernel can
++consume significant amount of CPU cycles in interrupt context or tasks
++shared across multiple resource domains (e.g. softirq).
++
++Finally, it's important to note that enabling cgroup v2 support for
++the CPU controller doesn't block use cases which require the features
++which are not available on cgroup v2.  Unlikely, but should anybody
++actually rely on the CPU controller's symmetric handling of tasks and
++cgroups, backward compatibility is and will be maintained by being
++able to disconnect the controller from the cgroup v2 hierarchy and use
++it standalone.  This also holds for cpuset which is often used in
++highly customized configurations which might be a poor fit for common
++resource domains.
++
++The required changes are minimal, the benefits for the target use
++cases are critical and obvious, and use cases which have to use v1 can
++continue to do so.
++
++
++3. Way Forward
++
++cgroup v2 primarily aims to solve the problem of comprehensive
++hierarchical resource control across all major computing resources,
++which is one of the core problems of modern server infrastructure
++engineering.  The trade-offs that cgroup v2 took are results of
++pursuing that goal and gaining a better understanding of the nature of
++resource control in the process.
++
++I believe that real world usages will prove cgroup v2's model right,
++considering the crucial pieces of comprehensive resource control that
++cannot be implemented without common resource domains.  This is not to
++say that cgroup v2 is fixed in stone and can't be updated; if there is
++an approach which better serves both comprehensive resource control
++and the CPU controller's flexibility, we will surely move towards
++that.  It goes without saying that discussions around such approach
++should consider practical aspects of resource control as a whole
++rather than absolutely focusing on a particular controller.
++
++Until such consensus can be reached, the CPU controller cgroup v2
++support will be maintained out of the mainline kernel in an easily
++accessible form.  If there is anything cgroup developers can do to
++ease the pain, please feel free to contact us on the cgroup mailing
++list at cgroups@vger.kernel.org.
++
++
++4. References
++
++[1]  http://lkml.kernel.org/r/20160105164834.GE5995@mtj.duckdns.org
++     [PATCH 1/2] sched: Misc preps for cgroup unified hierarchy interface
++     Tejun Heo <tj@kernel.org>
++
++[2]  http://lkml.kernel.org/r/20160105164852.GF5995@mtj.duckdns.org
++     [PATCH 2/2] sched: Implement interface for cgroup unified hierarchy
++     Tejun Heo <tj@kernel.org>
++
++[3]  http://lkml.kernel.org/r/1438641689-14655-4-git-send-email-tj@kernel.org
++     [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy
++     Tejun Heo <tj@kernel.org>
++
++[4]  http://lkml.kernel.org/r/20160407064549.GH3430@twins.programming.kicks-ass.net
++     Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP
++     Peter Zijlstra <peterz@infradead.org>
++
++[5]  https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/cgroup-v2.txt
++     Control Group v2
++     Tejun Heo <tj@kernel.org>
++
++[6]  http://lkml.kernel.org/r/CAPM31RJNy3jgG=DYe6GO=wyL4BPPxwUm1f2S6YXacQmo7viFZA@mail.gmail.com
++     Re: [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy
++     Paul Turner <pjt@google.com>
++
++[7]  http://lkml.kernel.org/r/20160105154503.GC5995@mtj.duckdns.org
++     [RFD] cgroup: thread granularity support for cpu controller
++     Tejun Heo <tj@kernel.org>
++
++[8]  http://lkml.kernel.org/r/1457710888-31182-1-git-send-email-tj@kernel.org
++     [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP
++     Tejun Heo <tj@kernel.org>
++
++[9]  http://lkml.kernel.org/r/20160311160522.GA24046@htj.duckdns.org
++     Example program for PRIO_RGRP
++     Tejun Heo <tj@kernel.org>
++
++[10] http://lkml.kernel.org/r/20160407082810.GN3430@twins.programming.kicks-ass.net
++     Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource
++     Peter Zijlstra <peterz@infradead.org>
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index 67014b02d9c2..d7ba2652fe17 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -11716,10 +11716,7 @@ with pkgs;
     kernelPatches =
       [ kernelPatches.bridge_stp_helper
         kernelPatches.p9_fixes
-        # See pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/README.md
-        # when adding a new linux version
-        # !!! 4.7 patch doesn't apply, 4.9 patch not up yet, will keep checking
-        # kernelPatches.cpu-cgroup-v2."4.7"
+        kernelPatches.cpu-cgroup-v2."4.9"
         kernelPatches.modinst_arg_list_too_long
       ]
       ++ lib.optionals ((platform.kernelArch or null) == "mips")
@@ -11733,10 +11730,7 @@ with pkgs;
     kernelPatches =
       [ kernelPatches.bridge_stp_helper
         kernelPatches.p9_fixes
-        # See pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/README.md
-        # when adding a new linux version
-        # !!! 4.7 patch doesn't apply, 4.9 patch not up yet, will keep checking
-        # kernelPatches.cpu-cgroup-v2."4.7"
+        kernelPatches.cpu-cgroup-v2."4.10"
         kernelPatches.modinst_arg_list_too_long
       ]
       ++ lib.optionals ((platform.kernelArch or null) == "mips")
@@ -11752,8 +11746,7 @@ with pkgs;
         kernelPatches.p9_fixes
         # See pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/README.md
         # when adding a new linux version
-        # !!! 4.7 patch doesn't apply, 4.9 patch not up yet, will keep checking
-        # kernelPatches.cpu-cgroup-v2."4.7"
+        kernelPatches.cpu-cgroup-v2."4.11"
         kernelPatches.modinst_arg_list_too_long
       ]
       ++ lib.optionals ((platform.kernelArch or null) == "mips")
author	Michael Raskin <7c6f434c@mail.ru>	2017-05-02 15:03:42 +0200
committer	GitHub <noreply@github.com>	2017-05-02 15:03:42 +0200
commit	a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67 (patch)
tree	5258ac5985cec4ef4425c0ad9bc9045dc206bf0b /pkgs
parent	cac205c25223f1839c453a2329334aed9f3920e1 (diff)
parent	b5169fd2771d60a359af7263d84453adc0b5dbfb (diff)
download	nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.gz nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.bz2 nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.lz nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.xz nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.tar.zst nixlib-a841935d4b7f6b0b8cf1d7b7fe3f342e61dafd67.zip