mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-28 09:22:08 +00:00
PM: EM: Optimize em_cpu_energy() and remove division
The Energy Model (EM) can be modified at runtime which brings new possibilities. The em_cpu_energy() is called by the Energy Aware Scheduler (EAS) in its hot path. The energy calculation uses power value for a given performance state (ps) and the CPU busy time as percentage for that given frequency. It is possible to avoid the division by 'scale_cpu' at runtime, because EM is updated whenever new max capacity CPU is set in the system. Use that feature and do the needed division during the calculation of the coefficient 'ps->cost'. That enhanced 'ps->cost' value can be then just multiplied simply by utilization: pd_nrg = ps->cost * \Sum cpu_util to get the needed energy for whole Performance Domain (PD). With this optimization and earlier removal of map_util_freq(), the em_cpu_energy() should run faster on the Big CPU by 1.43x and on the Little CPU by 1.69x (RockPi 4B board). Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
parent
e3f1164fc9
commit
1b600da510
@ -115,27 +115,6 @@ struct em_perf_domain {
|
|||||||
#define EM_MAX_NUM_CPUS 16
|
#define EM_MAX_NUM_CPUS 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* To avoid an overflow on 32bit machines while calculating the energy
|
|
||||||
* use a different order in the operation. First divide by the 'cpu_scale'
|
|
||||||
* which would reduce big value stored in the 'cost' field, then multiply by
|
|
||||||
* the 'sum_util'. This would allow to handle existing platforms, which have
|
|
||||||
* e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts.
|
|
||||||
* In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util'
|
|
||||||
* could be 4096, then multiplication: 'cost' * 'sum_util' would overflow.
|
|
||||||
* This reordering of operations has some limitations, we lose small
|
|
||||||
* precision in the estimation (comparing to 64bit platform w/o reordering).
|
|
||||||
*
|
|
||||||
* We are safe on 64bit machine.
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_64BIT
|
|
||||||
#define em_estimate_energy(cost, sum_util, scale_cpu) \
|
|
||||||
(((cost) * (sum_util)) / (scale_cpu))
|
|
||||||
#else
|
|
||||||
#define em_estimate_energy(cost, sum_util, scale_cpu) \
|
|
||||||
(((cost) / (scale_cpu)) * (sum_util))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct em_data_callback {
|
struct em_data_callback {
|
||||||
/**
|
/**
|
||||||
* active_power() - Provide power at the next performance state of
|
* active_power() - Provide power at the next performance state of
|
||||||
@ -249,8 +228,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
|||||||
{
|
{
|
||||||
struct em_perf_table *em_table;
|
struct em_perf_table *em_table;
|
||||||
struct em_perf_state *ps;
|
struct em_perf_state *ps;
|
||||||
unsigned long scale_cpu;
|
int i;
|
||||||
int cpu, i;
|
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_DEBUG
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
|
WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
|
||||||
@ -267,9 +245,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
|||||||
* max utilization to the allowed CPU capacity before calculating
|
* max utilization to the allowed CPU capacity before calculating
|
||||||
* effective performance.
|
* effective performance.
|
||||||
*/
|
*/
|
||||||
cpu = cpumask_first(to_cpumask(pd->cpus));
|
max_util = map_util_perf(max_util);
|
||||||
scale_cpu = arch_scale_cpu_capacity(cpu);
|
|
||||||
|
|
||||||
max_util = min(max_util, allowed_cpu_cap);
|
max_util = min(max_util, allowed_cpu_cap);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -282,11 +258,11 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
|||||||
ps = &em_table->state[i];
|
ps = &em_table->state[i];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The capacity of a CPU in the domain at the performance state (ps)
|
* The performance (capacity) of a CPU in the domain at the performance
|
||||||
* can be computed as:
|
* state (ps) can be computed as:
|
||||||
*
|
*
|
||||||
* ps->freq * scale_cpu
|
* ps->freq * scale_cpu
|
||||||
* ps->cap = -------------------- (1)
|
* ps->performance = -------------------- (1)
|
||||||
* cpu_max_freq
|
* cpu_max_freq
|
||||||
*
|
*
|
||||||
* So, ignoring the costs of idle states (which are not available in
|
* So, ignoring the costs of idle states (which are not available in
|
||||||
@ -295,9 +271,10 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
|||||||
*
|
*
|
||||||
* ps->power * cpu_util
|
* ps->power * cpu_util
|
||||||
* cpu_nrg = -------------------- (2)
|
* cpu_nrg = -------------------- (2)
|
||||||
* ps->cap
|
* ps->performance
|
||||||
*
|
*
|
||||||
* since 'cpu_util / ps->cap' represents its percentage of busy time.
|
* since 'cpu_util / ps->performance' represents its percentage of busy
|
||||||
|
* time.
|
||||||
*
|
*
|
||||||
* NOTE: Although the result of this computation actually is in
|
* NOTE: Although the result of this computation actually is in
|
||||||
* units of power, it can be manipulated as an energy value
|
* units of power, it can be manipulated as an energy value
|
||||||
@ -307,9 +284,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
|||||||
* By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
|
* By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
|
||||||
* of two terms:
|
* of two terms:
|
||||||
*
|
*
|
||||||
* ps->power * cpu_max_freq cpu_util
|
* ps->power * cpu_max_freq
|
||||||
* cpu_nrg = ------------------------ * --------- (3)
|
* cpu_nrg = ------------------------ * cpu_util (3)
|
||||||
* ps->freq scale_cpu
|
* ps->freq * scale_cpu
|
||||||
*
|
*
|
||||||
* The first term is static, and is stored in the em_perf_state struct
|
* The first term is static, and is stored in the em_perf_state struct
|
||||||
* as 'ps->cost'.
|
* as 'ps->cost'.
|
||||||
@ -319,11 +296,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
|||||||
* total energy of the domain (which is the simple sum of the energy of
|
* total energy of the domain (which is the simple sum of the energy of
|
||||||
* all of its CPUs) can be factorized as:
|
* all of its CPUs) can be factorized as:
|
||||||
*
|
*
|
||||||
* ps->cost * \Sum cpu_util
|
* pd_nrg = ps->cost * \Sum cpu_util (4)
|
||||||
* pd_nrg = ------------------------ (4)
|
|
||||||
* scale_cpu
|
|
||||||
*/
|
*/
|
||||||
return em_estimate_energy(ps->cost, sum_util, scale_cpu);
|
return ps->cost * sum_util;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -192,11 +192,9 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table,
|
|||||||
unsigned long flags)
|
unsigned long flags)
|
||||||
{
|
{
|
||||||
unsigned long prev_cost = ULONG_MAX;
|
unsigned long prev_cost = ULONG_MAX;
|
||||||
u64 fmax;
|
|
||||||
int i, ret;
|
int i, ret;
|
||||||
|
|
||||||
/* Compute the cost of each performance state. */
|
/* Compute the cost of each performance state. */
|
||||||
fmax = (u64) table[nr_states - 1].frequency;
|
|
||||||
for (i = nr_states - 1; i >= 0; i--) {
|
for (i = nr_states - 1; i >= 0; i--) {
|
||||||
unsigned long power_res, cost;
|
unsigned long power_res, cost;
|
||||||
|
|
||||||
@ -208,8 +206,9 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
power_res = table[i].power;
|
/* increase resolution of 'cost' precision */
|
||||||
cost = div64_u64(fmax * power_res, table[i].frequency);
|
power_res = table[i].power * 10;
|
||||||
|
cost = power_res / table[i].performance;
|
||||||
}
|
}
|
||||||
|
|
||||||
table[i].cost = cost;
|
table[i].cost = cost;
|
||||||
|
Loading…
Reference in New Issue
Block a user