All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver
@ 2015-06-11  8:27 Wei Wang
  2015-06-11 14:00 ` Julien Grall
  2015-06-18 15:03 ` Jan Beulich
  0 siblings, 2 replies; 7+ messages in thread
From: Wei Wang @ 2015-06-11  8:27 UTC (permalink / raw)
  To: xen-devel, jbeulich; +Cc: andrew.cooper3, Wei Wang

The intel_pstate driver is ported following its kernel code logic
(commit: 93f0822d).In order to port the Linux source file with
minimal modifications, some of the variable types are kept intact
(e.g. "int current_pstae", would otherwise be changed to
"unsigned int").

In the kernel, a user can adjust the limits via sysfs
(limits.min_sysfs_pct/max_sysfs_pct). In Xen, the
policy->limits.min_perf_pct/max_perf_pct acts as the transit station.
A user interacts with it via xenpm.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
---
 xen/arch/x86/acpi/cpufreq/Makefile       |   1 +
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 818 +++++++++++++++++++++++++++++++
 xen/arch/x86/cpu/common.c                |   3 +
 xen/include/acpi/cpufreq/cpufreq.h       |  14 +
 xen/include/asm-x86/cpufeature.h         |   1 +
 xen/include/asm-x86/msr-index.h          |   3 +
 6 files changed, 840 insertions(+)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c

diff --git a/xen/arch/x86/acpi/cpufreq/Makefile b/xen/arch/x86/acpi/cpufreq/Makefile
index f75da9b..99fa9f4 100644
--- a/xen/arch/x86/acpi/cpufreq/Makefile
+++ b/xen/arch/x86/acpi/cpufreq/Makefile
@@ -1,2 +1,3 @@
 obj-y += cpufreq.o
+obj-y += intel_pstate.o
 obj-y += powernow.o
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
new file mode 100644
index 0000000..48bbc30
--- /dev/null
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -0,0 +1,818 @@
+#include <xen/kernel.h>
+#include <xen/types.h>
+#include <xen/init.h>
+#include <xen/bitmap.h>
+#include <xen/cpumask.h>
+#include <xen/timer.h>
+#include <asm/msr.h>
+#include <asm/msr-index.h>
+#include <asm/processor.h>
+#include <asm/div64.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+#define BYT_RATIOS        0x66a
+#define BYT_VIDS          0x66b
+#define BYT_TURBO_RATIOS  0x66c
+#define BYT_TURBO_VIDS    0x66d
+
+#define FRAC_BITS 8
+#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
+#define fp_toint(X) ((X) >> FRAC_BITS)
+
+static inline int32_t mul_fp(int32_t x, int32_t y)
+{
+    return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
+}
+
+static inline int32_t div_fp(int32_t x, int32_t y)
+{
+    return div_s64((int64_t)x << FRAC_BITS, y);
+}
+
+static inline int ceiling_fp(int32_t x)
+{
+    int mask, ret;
+
+    ret = fp_toint(x);
+    mask = (1 << FRAC_BITS) - 1;
+    if (x & mask)
+    ret += 1;
+    return ret;
+}
+
+struct sample {
+    int32_t core_pct_busy;
+    u64 aperf;
+    u64 mperf;
+    int freq;
+    s_time_t time;
+};
+
+struct pstate_data {
+    int    current_pstate;
+    int    min_pstate;
+    int    max_pstate;
+    int    scaling;
+    int    turbo_pstate;
+};
+
+struct vid_data {
+    int min;
+    int max;
+    int turbo;
+    int32_t ratio;
+};
+
+struct _pid {
+    int setpoint;
+    int32_t integral;
+    int32_t p_gain;
+    int32_t i_gain;
+    int32_t d_gain;
+    int deadband;
+    int32_t last_err;
+};
+
+struct cpudata {
+    int cpu;
+
+    struct timer timer;
+
+    struct pstate_data pstate;
+    struct vid_data vid;
+    struct _pid pid;
+
+    s_time_t last_sample_time;
+    u64    prev_aperf;
+    u64    prev_mperf;
+    struct sample sample;
+};
+
+static struct cpudata **all_cpu_data;
+
+struct pstate_adjust_policy {
+    int sample_rate_ms;
+    int deadband;
+    int setpoint;
+    int p_gain_pct;
+    int d_gain_pct;
+    int i_gain_pct;
+};
+
+struct pstate_funcs {
+    int (*get_max)(void);
+    int (*get_min)(void);
+    int (*get_turbo)(void);
+    int (*get_scaling)(void);
+    void (*set)(struct perf_limits *, struct cpudata *, int pstate);
+    void (*get_vid)(struct cpudata *);
+};
+
+struct cpu_defaults {
+    struct pstate_adjust_policy pid_policy;
+    struct pstate_funcs funcs;
+};
+
+static struct pstate_adjust_policy pid_params;
+static struct pstate_funcs pstate_funcs;
+
+static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
+                                 int deadband, int integral)
+{
+    pid->setpoint = setpoint;
+    pid->deadband  = deadband;
+    pid->integral  = int_tofp(integral);
+    pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
+}
+
+static inline void pid_p_gain_set(struct _pid *pid, int percent)
+{
+    pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_i_gain_set(struct _pid *pid, int percent)
+{
+    pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_d_gain_set(struct _pid *pid, int percent)
+{
+    pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static signed int pid_calc(struct _pid *pid, int32_t busy)
+{
+    signed int result;
+    int32_t pterm, dterm, fp_error;
+    int32_t integral_limit;
+
+    fp_error = int_tofp(pid->setpoint) - busy;
+
+    if (ABS(fp_error) <= int_tofp(pid->deadband))
+        return 0;
+
+    pterm = mul_fp(pid->p_gain, fp_error);
+
+    pid->integral += fp_error;
+
+    /*
+     * We limit the integral here so that it will never
+     * get higher than 30.  This prevents it from becoming
+     * too large an input over long periods of time and allows
+     * it to get factored out sooner.
+     * The value of 30 was chosen through experimentation.
+     */
+    integral_limit = int_tofp(30);
+    if (pid->integral > integral_limit)
+    pid->integral = integral_limit;
+    if (pid->integral < -integral_limit)
+    pid->integral = -integral_limit;
+
+    dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
+    pid->last_err = fp_error;
+
+    result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
+    result = result + (1 << (FRAC_BITS-1));
+    return (signed int)fp_toint(result);
+}
+
+static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
+{
+    pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
+    pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
+    pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
+
+    pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
+}
+
+static inline void intel_pstate_reset_all_pid(void)
+{
+    unsigned int cpu;
+
+    for_each_online_cpu(cpu) {
+        if (all_cpu_data[cpu])
+            intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
+    }
+}
+
+static inline void update_turbo_state(struct cpufreq_policy *policy)
+{
+    u64 misc_en;
+    struct cpudata *cpu;
+
+    cpu = all_cpu_data[policy->cpu];
+    rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
+    policy->limits.turbo_disabled =
+        (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
+            cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
+}
+
+#define BYT_TURBO_CONTROL_BIT 32
+#define BYT_MIN_PSTATE(val) (((value) >> 8) & 0x7f)
+#define BYT_MAX_PSTATE(val) (((value) >> 16) & 0x7f)
+#define BYT_TURBO_PSTATE(value) ((value) & 0x7f)
+static int byt_get_min_pstate(void)
+{
+    u64 value;
+
+    rdmsrl(BYT_RATIOS, value);
+    return BYT_MIN_PSTATE(val);
+}
+
+static int byt_get_max_pstate(void)
+{
+    u64 value;
+
+    rdmsrl(BYT_RATIOS, value);
+    return BYT_MAX_PSTATE(val);
+}
+
+static int byt_get_turbo_pstate(void)
+{
+    u64 value;
+
+    rdmsrl(BYT_TURBO_RATIOS, value);
+    return BYT_TURBO_PSTATE(value);
+}
+
+static void byt_set_pstate(struct perf_limits *limits,
+				struct cpudata *cpudata, int pstate)
+{
+    u64 val;
+    int32_t vid_fp;
+    u32 vid;
+
+    val = pstate << 8;
+    if (limits->no_turbo && !limits->turbo_disabled)
+        val |= (u64)1 << BYT_TURBO_CONTROL_BIT;
+
+    vid_fp = cpudata->vid.min + mul_fp(
+        int_tofp(pstate - cpudata->pstate.min_pstate),
+        cpudata->vid.ratio);
+
+    vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
+    vid = ceiling_fp(vid_fp);
+
+    if (pstate > cpudata->pstate.max_pstate)
+        vid = cpudata->vid.turbo;
+
+    val |= vid;
+
+    wrmsrl(MSR_IA32_PERF_CTL, val);
+}
+
+#define BYT_BCLK_FREQS 5
+#define TO_FREQ_TABLE_IDX_MASK 0x7
+static const int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
+
+static int byt_get_scaling(void)
+{
+    u64 value;
+    int i;
+
+    rdmsrl(MSR_FSB_FREQ, value);
+    i = value & TO_FREQ_TABLE_IDX_MASK;
+
+    BUG_ON(i > BYT_BCLK_FREQS);
+
+    return byt_freq_table[i] * 100;
+}
+
+static void byt_get_vid(struct cpudata *cpudata)
+{
+    u64 value;
+
+    rdmsrl(BYT_VIDS, value);
+    cpudata->vid.min = int_tofp(BYT_MIN_PSTATE(val));
+    cpudata->vid.max = int_tofp(BYT_MAX_PSTATE(val));
+    cpudata->vid.ratio = div_fp(
+        cpudata->vid.max - cpudata->vid.min,
+        int_tofp(cpudata->pstate.max_pstate -
+            cpudata->pstate.min_pstate));
+
+    rdmsrl(BYT_TURBO_VIDS, value);
+    cpudata->vid.turbo = BYT_TURBO_PSTATE(value);
+}
+
+#define SCALING_FACTOR 100000
+#define CORE_TURBO_CONTROL_BIT 32
+#define CORE_MIN_PSTATE(val) (((value) >> 40) & 0xff)
+#define CORE_MAX_PSTATE(val) (((value) >> 8) & 0xff)
+#define CORE_TURBO_PSTATE(value) ((value) & 0xff)
+static int core_get_min_pstate(void)
+{
+    u64 value;
+
+    rdmsrl(MSR_INTEL_PLATFORM_INFO, value);
+    return CORE_MIN_PSTATE(val);
+}
+
+static int core_get_max_pstate(void)
+{
+    u64 value;
+
+    rdmsrl(MSR_INTEL_PLATFORM_INFO, value);
+    return CORE_MAX_PSTATE(val);
+}
+
+static int core_get_turbo_pstate(void)
+{
+    u64 value;
+    int nont, ret;
+
+    rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
+    nont = core_get_max_pstate();
+    ret = CORE_TURBO_PSTATE(value);
+    if (ret <= nont)
+        ret = nont;
+    return ret;
+}
+
+static inline int core_get_scaling(void)
+{
+    return SCALING_FACTOR;
+}
+
+static void core_set_pstate(struct perf_limits *limits,
+				struct cpudata *cpudata, int pstate)
+{
+    u64 val;
+
+    val = pstate << 8;
+    if (limits->no_turbo && !limits->turbo_disabled)
+        val |= (u64)1 << CORE_TURBO_CONTROL_BIT;
+
+    wrmsrl(MSR_IA32_PERF_CTL, val);
+}
+
+static const struct cpu_defaults core_params = {
+    .pid_policy = {
+        .sample_rate_ms = 10,
+        .deadband = 0,
+        .setpoint = 97,
+        .p_gain_pct = 20,
+        .d_gain_pct = 0,
+        .i_gain_pct = 0,
+    },
+    .funcs = {
+        .get_max = core_get_max_pstate,
+        .get_min = core_get_min_pstate,
+        .get_turbo = core_get_turbo_pstate,
+        .get_scaling = core_get_scaling,
+        .set = core_set_pstate,
+    },
+};
+
+static const struct cpu_defaults byt_params = {
+    .pid_policy = {
+        .sample_rate_ms = 10,
+        .deadband = 0,
+        .setpoint = 97,
+        .p_gain_pct = 14,
+        .d_gain_pct = 0,
+        .i_gain_pct = 4,
+    },
+    .funcs = {
+        .get_max = byt_get_max_pstate,
+        .get_min = byt_get_min_pstate,
+        .get_turbo = byt_get_turbo_pstate,
+        .set = byt_set_pstate,
+        .get_scaling = byt_get_scaling,
+        .get_vid = byt_get_vid,
+    },
+};
+
+static void intel_pstate_get_min_max(struct perf_limits *limits,
+					struct cpudata *cpu, int *min, int *max)
+{
+    int max_perf = cpu->pstate.turbo_pstate;
+    int max_perf_adj;
+    int min_perf;
+
+    if (limits->no_turbo || limits->turbo_disabled)
+        max_perf = cpu->pstate.max_pstate;
+
+    /* performance can be limited by user through xenpm */
+    max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
+    *max = clamp_t(int, max_perf_adj,
+            cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
+
+    min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
+    *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
+}
+
+static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
+{
+    int max_perf, min_perf;
+    struct cpufreq_policy *policy;
+    struct perf_limits *limits;
+
+    policy = per_cpu(cpufreq_cpu_policy, cpu->cpu);
+    limits = &policy->limits;
+
+    update_turbo_state(policy);
+
+    if (limits->turbo_disabled)
+        policy->turbo = CPUFREQ_TURBO_UNSUPPORTED;
+    else if (limits->no_turbo)
+        policy->turbo = CPUFREQ_TURBO_DISABLED;
+    else
+        policy->turbo = CPUFREQ_TURBO_ENABLED;
+
+    intel_pstate_get_min_max(limits, cpu, &min_perf, &max_perf);
+
+    pstate = clamp_t(int, pstate, min_perf, max_perf);
+
+    if (pstate == cpu->pstate.current_pstate)
+        return;
+
+    cpu->pstate.current_pstate = pstate;
+    policy->cur = pstate * SCALING_FACTOR;
+
+    pstate_funcs.set(limits, cpu, pstate);
+}
+
+static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
+{
+    cpu->pstate.min_pstate = pstate_funcs.get_min();
+    cpu->pstate.max_pstate = pstate_funcs.get_max();
+    cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
+    cpu->pstate.scaling = pstate_funcs.get_scaling();
+
+    if (pstate_funcs.get_vid)
+        pstate_funcs.get_vid(cpu);
+    intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
+}
+
+static inline void intel_pstate_calc_busy(struct cpudata *cpu)
+{
+    struct sample *sample = &cpu->sample;
+    int64_t core_pct;
+
+    core_pct = int_tofp(sample->aperf) * int_tofp(100);
+    core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
+
+    sample->freq = fp_toint(
+        mul_fp(int_tofp(
+            cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
+            core_pct));
+
+    sample->core_pct_busy = (int32_t)core_pct;
+}
+
+static inline void intel_pstate_sample(struct cpudata *cpu)
+{
+    u64 aperf, mperf;
+    unsigned long flags;
+
+    local_irq_save(flags);
+    rdmsrl(MSR_IA32_APERF, aperf);
+    rdmsrl(MSR_IA32_MPERF, mperf);
+    local_irq_restore(flags);
+
+    cpu->last_sample_time = cpu->sample.time;
+    cpu->sample.time = get_s_time();
+    cpu->sample.aperf = aperf;
+    cpu->sample.mperf = mperf;
+    cpu->sample.aperf -= cpu->prev_aperf;
+    cpu->sample.mperf -= cpu->prev_mperf;
+
+    intel_pstate_calc_busy(cpu);
+
+    cpu->prev_aperf = aperf;
+    cpu->prev_mperf = mperf;
+}
+
+static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
+{
+    set_timer(&cpu->timer, NOW() + MILLISECS(pid_params.sample_rate_ms));
+}
+
+static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
+{
+    int32_t core_busy, max_pstate, current_pstate, sample_ratio;
+    u32 duration_us;
+    u32 sample_time_us;
+
+    /*
+     * core_busy is the ratio of actual performance to max
+     * max_pstate is the max non turbo pstate available
+     * current_pstate was the pstate that was requested during
+     * the last sample period.
+     *
+     * We normalize core_busy, which was our actual percent
+     * performance to what we requested during the last sample
+     * period. The result will be a percentage of busy at a
+     * specified pstate.
+     */
+    core_busy = cpu->sample.core_pct_busy;
+    max_pstate = int_tofp(cpu->pstate.max_pstate);
+    current_pstate = int_tofp(cpu->pstate.current_pstate);
+    core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
+
+    /*
+     * Since we have a deferred timer, it will not fire unless
+     * we are in C0.  So, determine if the actual elapsed time
+     * is significantly greater (3x) than our sample interval. If it
+     * is, then we were idle for a long enough period of time
+     * to adjust our busyness.
+     */
+    sample_time_us = pid_params.sample_rate_ms  * 1000ULL;
+    duration_us = (u32)((s_time_t)(cpu->sample.time - cpu->last_sample_time)
+                                    / 1000);
+    if (duration_us > sample_time_us * 3) {
+        sample_ratio = div_fp(int_tofp(sample_time_us),
+                                       int_tofp(duration_us));
+        core_busy = mul_fp(core_busy, sample_ratio);
+    }
+
+    return core_busy;
+}
+
+static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
+{
+    int32_t busy_scaled;
+    struct _pid *pid;
+    signed int ctl;
+
+    pid = &cpu->pid;
+    busy_scaled = intel_pstate_get_scaled_busy(cpu);
+
+    ctl = pid_calc(pid, busy_scaled);
+
+    /* Negative values of ctl increase the pstate and vice versa */
+    intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl);
+}
+
+static void intel_pstate_timer_func(void *data)
+{
+    struct cpudata *cpu = (struct cpudata *) data;
+
+    intel_pstate_sample(cpu);
+
+    intel_pstate_adjust_busy_pstate(cpu);
+
+    intel_pstate_set_sample_time(cpu);
+}
+
+#define ICPU(model, policy) \
+    { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
+            &policy##_params }
+
+static const struct x86_cpu_id intel_pstate_cpu_ids[] __initconst = {
+    ICPU(0x2a, core),
+    ICPU(0x2d, core),
+    ICPU(0x37, byt),
+    ICPU(0x3a, core),
+    ICPU(0x3c, core),
+    ICPU(0x3d, core),
+    ICPU(0x3e, core),
+    ICPU(0x3f, core),
+    ICPU(0x45, core),
+    ICPU(0x46, core),
+    ICPU(0x47, core),
+    ICPU(0x4c, byt),
+    ICPU(0x4e, core),
+    ICPU(0x4f, core),
+    ICPU(0x56, core),
+    {}
+};
+
+static int intel_pstate_init_cpu(unsigned int cpunum)
+{
+    struct cpudata *cpu;
+    s_time_t expires;
+
+    if (!all_cpu_data[cpunum])
+        all_cpu_data[cpunum] = xzalloc(struct cpudata);
+    if (!all_cpu_data[cpunum])
+        return -ENOMEM;
+
+    cpu = all_cpu_data[cpunum];
+
+    cpu->cpu = cpunum;
+    intel_pstate_get_cpu_pstates(cpu);
+
+    init_timer(&cpu->timer, intel_pstate_timer_func, cpu, cpunum);
+    expires = NOW() + MILLISECS(10);
+
+    intel_pstate_busy_pid_reset(cpu);
+    intel_pstate_sample(cpu);
+
+    set_timer(&cpu->timer, expires);
+
+    return 0;
+}
+
+static int intel_pstate_set_policy(struct cpufreq_policy *policy)
+{
+    struct perf_limits *limits = &policy->limits;
+
+    if (!policy->cpuinfo.max_freq)
+        return -ENODEV;
+
+    switch (policy->policy) {
+    case CPUFREQ_POLICY_PERFORMANCE:
+        limits->no_turbo = 0;
+        limits->max_perf_pct = 100;
+        limits->max_perf = int_tofp(1);
+        limits->min_perf_pct = 100;
+        limits->min_perf = int_tofp(1);
+        break;
+    case CPUFREQ_POLICY_POWERSAVE:
+        limits->min_perf = div_fp(int_tofp(limits->min_policy_pct), int_tofp(100));
+        limits->max_perf = limits->min_perf;
+        limits->min_perf_pct = limits->min_policy_pct;
+        limits->max_perf_pct = limits->min_perf_pct;
+        break;
+    case CPUFREQ_POLICY_USERSPACE:
+        limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), int_tofp(100));
+        limits->min_perf = limits->max_perf;
+        limits->min_perf_pct = limits->max_perf_pct;
+        break;
+    case CPUFREQ_POLICY_ONDEMAND:
+    default:
+        limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), int_tofp(100));
+        limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), int_tofp(100));
+        break;
+    }
+
+    return 0;
+}
+
+static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
+{
+    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+                                     policy->cpuinfo.max_freq);
+
+    switch(policy->policy) {
+    case CPUFREQ_POLICY_POWERSAVE:
+    case CPUFREQ_POLICY_PERFORMANCE:
+    case CPUFREQ_POLICY_USERSPACE:
+    case CPUFREQ_POLICY_ONDEMAND:
+                                    return 0;
+    default:
+                                    return -EINVAL;
+    }
+}
+
+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+{
+    int cpu_num = policy->cpu;
+    struct cpudata *cpu = all_cpu_data[cpu_num];
+
+    kill_timer(&all_cpu_data[cpu_num]->timer);
+
+    intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
+
+    return 0;
+}
+
+static int intel_pstate_turbo_update(int cpuid, struct cpufreq_policy *policy)
+{
+    struct cpudata *cpu = all_cpu_data[policy->cpu];
+    struct perf_limits *limits = &policy->limits;
+
+    update_turbo_state(policy);
+    if (limits->turbo_disabled) {
+        printk("Turbo disabled by BIOS or not supported on processor\n");
+        return -EINVAL;
+    }
+    limits->no_turbo = policy->turbo == CPUFREQ_TURBO_ENABLED ? 0 : 1;
+
+    if (limits->no_turbo)
+        policy->cpuinfo.max_freq =
+                        cpu->pstate.max_pstate * cpu->pstate.scaling;
+    else
+        policy->cpuinfo.max_freq =
+                        cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+
+    policy->max = clamp_t(unsigned int, policy->max,
+                        policy->cpuinfo.min_freq, policy->cpuinfo.max_freq);
+
+    return 0;
+}
+
+static int get_turbo_pct(struct cpudata *cpu)
+{
+    int total, no_turbo, turbo_pct;
+    uint32_t turbo_fp;
+
+    total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
+    no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
+    turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
+    turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
+    return turbo_pct;
+}
+
+static int intel_pstate_cpu_setup(struct cpufreq_policy *policy)
+{
+    struct cpudata *cpu;
+    struct perf_limits *limits = &policy->limits;
+    int rc;
+
+    rc = intel_pstate_init_cpu(policy->cpu);
+    if (rc)
+        return rc;
+
+    cpu = all_cpu_data[policy->cpu];
+    policy->policy = CPUFREQ_POLICY_ONDEMAND;
+    policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
+    policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+
+    /* cpuinfo and default policy values */
+    policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
+    policy->cpuinfo.max_freq =
+        cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+    policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+    cpumask_set_cpu(policy->cpu, policy->cpus);
+
+    limits->no_turbo = 0;
+    limits->turbo_disabled = 0;
+    limits->turbo_pct = get_turbo_pct(cpu);
+    limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
+    limits->min_policy_pct = clamp_t(uint32_t, limits->min_policy_pct, 0, 100);
+    limits->max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
+    limits->max_policy_pct = clamp_t(uint32_t, limits->max_policy_pct, 0, 100);
+    limits->max_perf_pct   = limits->max_policy_pct;
+    limits->min_perf_pct   = limits->min_policy_pct;
+
+    return 0;
+}
+
+static struct cpufreq_driver intel_pstate_driver = {
+    .verify       = intel_pstate_verify_policy,
+    .setpolicy    = intel_pstate_set_policy,
+    .init         = intel_pstate_cpu_setup,
+    .exit         = intel_pstate_cpu_exit,
+    .update       = intel_pstate_turbo_update,
+    .name         = "intel_pstate",
+};
+
+static int intel_pstate_msrs_not_valid(void)
+{
+    if (!pstate_funcs.get_max() ||
+        !pstate_funcs.get_min() ||
+        !pstate_funcs.get_turbo())
+        return -ENODEV;
+
+    return 0;
+}
+
+static void __init copy_pid_params(struct pstate_adjust_policy *policy)
+{
+    pid_params.sample_rate_ms = policy->sample_rate_ms;
+    pid_params.p_gain_pct = policy->p_gain_pct;
+    pid_params.i_gain_pct = policy->i_gain_pct;
+    pid_params.d_gain_pct = policy->d_gain_pct;
+    pid_params.deadband = policy->deadband;
+    pid_params.setpoint = policy->setpoint;
+}
+
+static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
+{
+    pstate_funcs.get_max   = funcs->get_max;
+    pstate_funcs.get_min   = funcs->get_min;
+    pstate_funcs.get_turbo = funcs->get_turbo;
+    pstate_funcs.get_scaling = funcs->get_scaling;
+    pstate_funcs.set       = funcs->set;
+    pstate_funcs.get_vid   = funcs->get_vid;
+}
+
+int __init intel_pstate_init(void)
+{
+    int cpu, rc = 0;
+    const struct x86_cpu_id *id;
+    struct cpu_defaults *cpu_info;
+
+    id = x86_match_cpu(intel_pstate_cpu_ids);
+    if (!id)
+        return -ENODEV;
+
+    cpu_info = (struct cpu_defaults *)id->driver_data;
+
+    copy_pid_params(&cpu_info->pid_policy);
+    copy_cpu_funcs(&cpu_info->funcs);
+
+    if (intel_pstate_msrs_not_valid())
+        return -ENODEV;
+
+    all_cpu_data = xzalloc_array(struct cpudata *, NR_CPUS);
+    if (!all_cpu_data)
+        return -ENOMEM;
+
+    rc = cpufreq_register_driver(&intel_pstate_driver);
+    if (rc)
+        goto out;
+
+    return rc;
+out:
+    for_each_online_cpu(cpu) {
+        if (all_cpu_data[cpu]) {
+            kill_timer(&all_cpu_data[cpu]->timer);
+            xfree(all_cpu_data[cpu]);
+        }
+    }
+    xfree(all_cpu_data);
+    return -ENODEV;
+}
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index e105aeb..dba29c0 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -238,6 +238,9 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 	if ( cpu_has(c, X86_FEATURE_CLFLSH) )
 		c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
 
+	if (cpuid_ecx(6) & 0x1)
+		set_bit(X86_FEATURE_APERFMPERF, c->x86_capability);
+
 	/* AMD-defined flags: level 0x80000001 */
 	c->extended_cpuid_level = cpuid_eax(0x80000000);
 	if ( (c->extended_cpuid_level & 0xffff0000) == 0x80000000 ) {
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index d10e4c7..71bb45c 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -34,6 +34,12 @@ struct acpi_cpufreq_data {
 
 extern struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
 
+/*
+ * Maximum transition latency is in nanoseconds - if it's unknown,
+ * CPUFREQ_ETERNAL shall be used.
+ */
+#define CPUFREQ_ETERNAL        (-1)
+
 struct cpufreq_cpuinfo {
     unsigned int        max_freq;
     unsigned int        second_max_freq;    /* P1 if Turbo Mode is on */
@@ -77,6 +83,8 @@ struct cpufreq_policy {
 };
 DECLARE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_policy);
 
+extern int intel_pstate_init(void);
+
 extern int __cpufreq_set_policy(struct cpufreq_policy *data,
                                 struct cpufreq_policy *policy);
 
@@ -101,6 +109,12 @@ struct cpufreq_freqs {
  *                          CPUFREQ GOVERNORS                        *
  *********************************************************************/
 
+/* The four internal governors used in intel_pstate */
+#define CPUFREQ_POLICY_POWERSAVE        (1)
+#define CPUFREQ_POLICY_PERFORMANCE      (2)
+#define CPUFREQ_POLICY_USERSPACE        (3)
+#define CPUFREQ_POLICY_ONDEMAND         (4)
+
 #define CPUFREQ_GOV_START  1
 #define CPUFREQ_GOV_STOP   2
 #define CPUFREQ_GOV_LIMITS 3
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 7963a3a..efc9711 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -69,6 +69,7 @@
 #define X86_FEATURE_XTOPOLOGY    (3*32+13) /* cpu topology enum extensions */
 #define X86_FEATURE_CPUID_FAULTING (3*32+14) /* cpuid faulting */
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+15) /* clflush reqd with monitor */
+#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 83f2f70..57945d9 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -52,6 +52,8 @@
 #define MSR_IA32_MCG_STATUS		0x0000017a
 #define MSR_IA32_MCG_CTL		0x0000017b
 
+#define MSR_NHM_TURBO_RATIO_LIMIT	0x000001ad
+
 #define MSR_IA32_PEBS_ENABLE		0x000003f1
 #define MSR_IA32_DS_AREA		0x00000600
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
@@ -319,6 +321,7 @@
 #define MSR_IA32_MISC_ENABLE_MONITOR_ENABLE (1<<18)
 #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID  (1<<22)
 #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1<<23)
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL<<38)
 
 #define MSR_IA32_TSC_DEADLINE		0x000006E0
 #define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver
  2015-06-11  8:27 [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver Wei Wang
@ 2015-06-11 14:00 ` Julien Grall
  2015-06-12  1:41   ` Wang, Wei W
  2015-06-18 15:03 ` Jan Beulich
  1 sibling, 1 reply; 7+ messages in thread
From: Julien Grall @ 2015-06-11 14:00 UTC (permalink / raw)
  To: Wei Wang, xen-devel, jbeulich; +Cc: andrew.cooper3

Hi Wei,

On 11/06/2015 04:27, Wei Wang wrote:
> diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
> index d10e4c7..71bb45c 100644
> --- a/xen/include/acpi/cpufreq/cpufreq.h
> +++ b/xen/include/acpi/cpufreq/cpufreq.h
> @@ -34,6 +34,12 @@ struct acpi_cpufreq_data {
>
>   extern struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
>
> +/*
> + * Maximum transition latency is in nanoseconds - if it's unknown,
> + * CPUFREQ_ETERNAL shall be used.
> + */
> +#define CPUFREQ_ETERNAL        (-1)
> +
>   struct cpufreq_cpuinfo {
>       unsigned int        max_freq;
>       unsigned int        second_max_freq;    /* P1 if Turbo Mode is on */
> @@ -77,6 +83,8 @@ struct cpufreq_policy {
>   };
>   DECLARE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_policy);
>
> +extern int intel_pstate_init(void);
> +

As said on a previous version [1], intel_pstate_init is x86 specific. 
Although xen/include/acpi contains common headers.

Please move everything x86 specific in asm-x86.

>   extern int __cpufreq_set_policy(struct cpufreq_policy *data,
>                                   struct cpufreq_policy *policy);
>
> @@ -101,6 +109,12 @@ struct cpufreq_freqs {
>    *                          CPUFREQ GOVERNORS                        *
>    *********************************************************************/
>
> +/* The four internal governors used in intel_pstate */
> +#define CPUFREQ_POLICY_POWERSAVE        (1)
> +#define CPUFREQ_POLICY_PERFORMANCE      (2)
> +#define CPUFREQ_POLICY_USERSPACE        (3)
> +#define CPUFREQ_POLICY_ONDEMAND         (4)
> +

 From the comment, this looks like x86 specific. Maybe even intel_pstate?

>   #define CPUFREQ_GOV_START  1
>   #define CPUFREQ_GOV_STOP   2
>   #define CPUFREQ_GOV_LIMITS 3
> diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
> index 7963a3a..efc9711 100644
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -69,6 +69,7 @@
>   #define X86_FEATURE_XTOPOLOGY    (3*32+13) /* cpu topology enum extensions */
>   #define X86_FEATURE_CPUID_FAULTING (3*32+14) /* cpuid faulting */
>   #define X86_FEATURE_CLFLUSH_MONITOR (3*32+15) /* clflush reqd with monitor */
> +#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
>
>   /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
>   #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */

Regards,

[1] http://lists.xen.org/archives/html/xen-devel/2015-04/msg02904.html

-- 
Julien Grall

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver
  2015-06-11 14:00 ` Julien Grall
@ 2015-06-12  1:41   ` Wang, Wei W
  2015-06-12 11:29     ` Julien Grall
  0 siblings, 1 reply; 7+ messages in thread
From: Wang, Wei W @ 2015-06-12  1:41 UTC (permalink / raw)
  To: Julien Grall, xen-devel@lists.xen.org, jbeulich@suse.com
  Cc: andrew.cooper3@citrix.com

Hi Julien,

On 11/06/2015 22:02, Julien Grall wrote:
> On 11/06/2015 04:27, Wei Wang wrote:
> > diff --git a/xen/include/acpi/cpufreq/cpufreq.h
> b/xen/include/acpi/cpufreq/cpufreq.h
> > index d10e4c7..71bb45c 100644
> > --- a/xen/include/acpi/cpufreq/cpufreq.h
> > +++ b/xen/include/acpi/cpufreq/cpufreq.h
> > @@ -34,6 +34,12 @@ struct acpi_cpufreq_data {
> >
> >   extern struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
> >
> > +/*
> > + * Maximum transition latency is in nanoseconds - if it's unknown,
> > + * CPUFREQ_ETERNAL shall be used.
> > + */
> > +#define CPUFREQ_ETERNAL        (-1)
> > +
> >   struct cpufreq_cpuinfo {
> >       unsigned int        max_freq;
> >       unsigned int        second_max_freq;    /* P1 if Turbo Mode is on */
> > @@ -77,6 +83,8 @@ struct cpufreq_policy {
> >   };
> >   DECLARE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_policy);
> >
> > +extern int intel_pstate_init(void);
> > +
> 
> As said on a previous version [1], intel_pstate_init is x86 specific.
> Although xen/include/acpi contains common headers.
 
Please see our latest discussion here (the bottom of the link):  http://lists.xen.org/archives/html/xen-devel/2015-06/msg00047.html


> Please move everything x86 specific in asm-x86.
> 
> >   extern int __cpufreq_set_policy(struct cpufreq_policy *data,
> >                                   struct cpufreq_policy *policy);
> >
> > @@ -101,6 +109,12 @@ struct cpufreq_freqs {
> >    *                          CPUFREQ GOVERNORS                        *
> >
> **********************************************************
> ***********/
> >
> > +/* The four internal governors used in intel_pstate */
> > +#define CPUFREQ_POLICY_POWERSAVE        (1)
> > +#define CPUFREQ_POLICY_PERFORMANCE      (2)
> > +#define CPUFREQ_POLICY_USERSPACE        (3)
> > +#define CPUFREQ_POLICY_ONDEMAND         (4)
> > +
> 
>  From the comment, this looks like x86 specific. Maybe even intel_pstate?


Yes. It's currently only used by the intel_pstate driver. 
 
> >   #define CPUFREQ_GOV_START  1
> >   #define CPUFREQ_GOV_STOP   2
> >   #define CPUFREQ_GOV_LIMITS 3
> > diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-
> x86/cpufeature.h
> > index 7963a3a..efc9711 100644
> > --- a/xen/include/asm-x86/cpufeature.h
> > +++ b/xen/include/asm-x86/cpufeature.h
> > @@ -69,6 +69,7 @@
> >   #define X86_FEATURE_XTOPOLOGY    (3*32+13) /* cpu topology enum
> extensions */
> >   #define X86_FEATURE_CPUID_FAULTING (3*32+14) /* cpuid faulting */
> >   #define X86_FEATURE_CLFLUSH_MONITOR (3*32+15) /* clflush reqd with
> monitor */
> > +#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
> >
> >   /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
> >   #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD
> Extensions-3 */
> 

Best,
Wei

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver
  2015-06-12  1:41   ` Wang, Wei W
@ 2015-06-12 11:29     ` Julien Grall
  2015-06-15  0:30       ` Wang, Wei W
  0 siblings, 1 reply; 7+ messages in thread
From: Julien Grall @ 2015-06-12 11:29 UTC (permalink / raw)
  To: Wang, Wei W, xen-devel@lists.xen.org, jbeulich@suse.com
  Cc: andrew.cooper3@citrix.com



On 11/06/2015 21:41, Wang, Wei W wrote:
> On 11/06/2015 22:02, Julien Grall wrote:
>> On 11/06/2015 04:27, Wei Wang wrote:
>>> diff --git a/xen/include/acpi/cpufreq/cpufreq.h
>> b/xen/include/acpi/cpufreq/cpufreq.h
>>> index d10e4c7..71bb45c 100644
>>> --- a/xen/include/acpi/cpufreq/cpufreq.h
>>> +++ b/xen/include/acpi/cpufreq/cpufreq.h
>>> @@ -34,6 +34,12 @@ struct acpi_cpufreq_data {
>>>
>>>    extern struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
>>>
>>> +/*
>>> + * Maximum transition latency is in nanoseconds - if it's unknown,
>>> + * CPUFREQ_ETERNAL shall be used.
>>> + */
>>> +#define CPUFREQ_ETERNAL        (-1)
>>> +
>>>    struct cpufreq_cpuinfo {
>>>        unsigned int        max_freq;
>>>        unsigned int        second_max_freq;    /* P1 if Turbo Mode is on */
>>> @@ -77,6 +83,8 @@ struct cpufreq_policy {
>>>    };
>>>    DECLARE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_policy);
>>>
>>> +extern int intel_pstate_init(void);
>>> +
>>
>> As said on a previous version [1], intel_pstate_init is x86 specific.
>> Although xen/include/acpi contains common headers.
>
> Please see our latest discussion here (the bottom of the link):  http://lists.xen.org/archives/html/xen-devel/2015-06/msg00047.html

Well we are planning to move cpufreq.h out of acpi in order to use for 
device tree based platform. Most of these declaration is common.

Although any x86 specific function would have to go out in a separate 
header.

Please avoid to add new one when it's possible. I don't see why a new 
asm-x86/cpufreq.h can't be added...

>>>    extern int __cpufreq_set_policy(struct cpufreq_policy *data,
>>>                                    struct cpufreq_policy *policy);
>>>
>>> @@ -101,6 +109,12 @@ struct cpufreq_freqs {
>>>     *                          CPUFREQ GOVERNORS                        *
>>>
>> **********************************************************
>> ***********/
>>>
>>> +/* The four internal governors used in intel_pstate */
>>> +#define CPUFREQ_POLICY_POWERSAVE        (1)
>>> +#define CPUFREQ_POLICY_PERFORMANCE      (2)
>>> +#define CPUFREQ_POLICY_USERSPACE        (3)
>>> +#define CPUFREQ_POLICY_ONDEMAND         (4)
>>> +
>>
>>   From the comment, this looks like x86 specific. Maybe even intel_pstate?
>
>
> Yes. It's currently only used by the intel_pstate driver.

After looking to this series, this statement looks wrong to me... You 
are using all these defines in the common cpufreq code (parameters, 
pmstat,...).

The cpufreq framework should be agnostic to any cpufreq driver 
implementation.

So it looks like to me that we want CPUFREQ_* to be exposed for anyone. 
And specifying the behavior when policy = 0 would be great too rather 
than relying on a future developer to not define 0.

Regards,

-- 
Julien Grall

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver
  2015-06-12 11:29     ` Julien Grall
@ 2015-06-15  0:30       ` Wang, Wei W
  2015-06-15  9:11         ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Wang, Wei W @ 2015-06-15  0:30 UTC (permalink / raw)
  To: Julien Grall, xen-devel@lists.xen.org, jbeulich@suse.com
  Cc: andrew.cooper3@citrix.com

On 12/06/2015 19:30, Julien Grall wrote:
> On 11/06/2015 21:41, Wang, Wei W wrote:
> > On 11/06/2015 22:02, Julien Grall wrote:
> >> On 11/06/2015 04:27, Wei Wang wrote:
> >>> diff --git a/xen/include/acpi/cpufreq/cpufreq.h
> >> b/xen/include/acpi/cpufreq/cpufreq.h
> >>> index d10e4c7..71bb45c 100644
> >>> --- a/xen/include/acpi/cpufreq/cpufreq.h
> >>> +++ b/xen/include/acpi/cpufreq/cpufreq.h
> >>> @@ -34,6 +34,12 @@ struct acpi_cpufreq_data {
> >>>
> >>>    extern struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
> >>>
> >>> +/*
> >>> + * Maximum transition latency is in nanoseconds - if it's unknown,
> >>> + * CPUFREQ_ETERNAL shall be used.
> >>> + */
> >>> +#define CPUFREQ_ETERNAL        (-1)
> >>> +
> >>>    struct cpufreq_cpuinfo {
> >>>        unsigned int        max_freq;
> >>>        unsigned int        second_max_freq;    /* P1 if Turbo Mode is on */
> >>> @@ -77,6 +83,8 @@ struct cpufreq_policy {
> >>>    };
> >>>    DECLARE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_policy);
> >>>
> >>> +extern int intel_pstate_init(void);
> >>> +
> >>
> >> As said on a previous version [1], intel_pstate_init is x86 specific.
> >> Although xen/include/acpi contains common headers.
> >
> > Please see our latest discussion here (the bottom of the link):
> > http://lists.xen.org/archives/html/xen-devel/2015-06/msg00047.html
> 
> Well we are planning to move cpufreq.h out of acpi in order to use for device
> tree based platform. Most of these declaration is common.
> 
> Although any x86 specific function would have to go out in a separate header.
> 
> Please avoid to add new one when it's possible. I don't see why a new asm-
> x86/cpufreq.h can't be added...

I don't have an objection to creating a new header like that. 
Jan, what's your opinion?


> >>>    extern int __cpufreq_set_policy(struct cpufreq_policy *data,
> >>>                                    struct cpufreq_policy *policy);
> >>>
> >>> @@ -101,6 +109,12 @@ struct cpufreq_freqs {
> >>>     *                          CPUFREQ GOVERNORS                        *
> >>>
> >>
> **********************************************************
> >> ***********/
> >>>
> >>> +/* The four internal governors used in intel_pstate */
> >>> +#define CPUFREQ_POLICY_POWERSAVE        (1)
> >>> +#define CPUFREQ_POLICY_PERFORMANCE      (2)
> >>> +#define CPUFREQ_POLICY_USERSPACE        (3)
> >>> +#define CPUFREQ_POLICY_ONDEMAND         (4)
> >>> +
> >>
> >>   From the comment, this looks like x86 specific. Maybe even intel_pstate?
> >
> >
> > Yes. It's currently only used by the intel_pstate driver.
> 
> After looking to this series, this statement looks wrong to me... You are using
> all these defines in the common cpufreq code (parameters, pmstat,...).
> 
> The cpufreq framework should be agnostic to any cpufreq driver
> implementation.
> 
> So it looks like to me that we want CPUFREQ_* to be exposed for anyone.
> And specifying the behavior when policy = 0 would be great too rather than
> relying on a future developer to not define 0.

How about renaming them to "INTEL_PSTATE_POLICY_XXXX" ?

Best,
Wei

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver
  2015-06-15  0:30       ` Wang, Wei W
@ 2015-06-15  9:11         ` Jan Beulich
  0 siblings, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2015-06-15  9:11 UTC (permalink / raw)
  To: Wei W Wang
  Cc: Julien Grall, andrew.cooper3@citrix.com, xen-devel@lists.xen.org

>>> On 15.06.15 at 02:30, <wei.w.wang@intel.com> wrote:
> On 12/06/2015 19:30, Julien Grall wrote:
>> On 11/06/2015 21:41, Wang, Wei W wrote:
>> > On 11/06/2015 22:02, Julien Grall wrote:
>> >> On 11/06/2015 04:27, Wei Wang wrote:
>> >>> diff --git a/xen/include/acpi/cpufreq/cpufreq.h
>> >> b/xen/include/acpi/cpufreq/cpufreq.h
>> >>> index d10e4c7..71bb45c 100644
>> >>> --- a/xen/include/acpi/cpufreq/cpufreq.h
>> >>> +++ b/xen/include/acpi/cpufreq/cpufreq.h
>> >>> @@ -34,6 +34,12 @@ struct acpi_cpufreq_data {
>> >>>
>> >>>    extern struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
>> >>>
>> >>> +/*
>> >>> + * Maximum transition latency is in nanoseconds - if it's unknown,
>> >>> + * CPUFREQ_ETERNAL shall be used.
>> >>> + */
>> >>> +#define CPUFREQ_ETERNAL        (-1)
>> >>> +
>> >>>    struct cpufreq_cpuinfo {
>> >>>        unsigned int        max_freq;
>> >>>        unsigned int        second_max_freq;    /* P1 if Turbo Mode is on */
>> >>> @@ -77,6 +83,8 @@ struct cpufreq_policy {
>> >>>    };
>> >>>    DECLARE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_policy);
>> >>>
>> >>> +extern int intel_pstate_init(void);
>> >>> +
>> >>
>> >> As said on a previous version [1], intel_pstate_init is x86 specific.
>> >> Although xen/include/acpi contains common headers.
>> >
>> > Please see our latest discussion here (the bottom of the link):
>> > http://lists.xen.org/archives/html/xen-devel/2015-06/msg00047.html 
>> 
>> Well we are planning to move cpufreq.h out of acpi in order to use for 
> device
>> tree based platform. Most of these declaration is common.
>> 
>> Although any x86 specific function would have to go out in a separate 
> header.
>> 
>> Please avoid to add new one when it's possible. I don't see why a new asm-
>> x86/cpufreq.h can't be added...
> 
> I don't have an objection to creating a new header like that. 
> Jan, what's your opinion?

If there's no suitable header to put what you need, creating one like
Julien suggested seems okay to me.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver
  2015-06-11  8:27 [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver Wei Wang
  2015-06-11 14:00 ` Julien Grall
@ 2015-06-18 15:03 ` Jan Beulich
  1 sibling, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2015-06-18 15:03 UTC (permalink / raw)
  To: Wei Wang; +Cc: andrew.cooper3, xen-devel

>>> On 11.06.15 at 10:27, <wei.w.wang@intel.com> wrote:
> The intel_pstate driver is ported following its kernel code logic
> (commit: 93f0822d).In order to port the Linux source file with
> minimal modifications, some of the variable types are kept intact
> (e.g. "int current_pstae", would otherwise be changed to
> "unsigned int").

Minimal modifications implies no whitespace changes either (and
even less so indentation corruption, like I just spotted in
ceiling_fp() and pid_calc(); I didn't look further). I.e. if you aim
at that, a diff between the original and the Xen clone should be
reasonably small (and that's what I'd likely look at in the end, as
I then don't really want to review all code which already made it
into upstream Linux).

> --- a/xen/arch/x86/cpu/common.c
> +++ b/xen/arch/x86/cpu/common.c
> @@ -238,6 +238,9 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
>  	if ( cpu_has(c, X86_FEATURE_CLFLSH) )
>  		c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
>  
> +	if (cpuid_ecx(6) & 0x1)
> +		set_bit(X86_FEATURE_APERFMPERF, c->x86_capability);

With that the kind of misplaced identical code pieces in cpufreq.c
and powernow.c should go away. The three changes together
would probably best be broken out to a separate patch, to keep
the focus on this one on the introduction of intel_pstate.c.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-06-18 15:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-11  8:27 [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver Wei Wang
2015-06-11 14:00 ` Julien Grall
2015-06-12  1:41   ` Wang, Wei W
2015-06-12 11:29     ` Julien Grall
2015-06-15  0:30       ` Wang, Wei W
2015-06-15  9:11         ` Jan Beulich
2015-06-18 15:03 ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.