Merge branch 'pm-tools'

* pm-tools:
  cpupower: Add support for new AMD family 0x17
  cpupower: Fix bug where return value was not used
  tools/power turbostat: update version number
  tools/power turbostat: decode MSR_IA32_MISC_ENABLE only on Intel
  tools/power turbostat: stop migrating, unless '-m'
  tools/power turbostat: if  --debug, print sampling overhead
  tools/power turbostat: hide SKL counters, when not requested
  intel_pstate: use updated msr-index.h HWP.EPP values
  tools/power x86_energy_perf_policy: support HWP.EPP
  x86: msr-index.h: fix shifts to ULL results in HWP macros.
  x86: msr-index.h: define HWP.EPP values
  x86: msr-index.h: define EPB mid-points
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 673f9ac..d8638e2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -249,9 +249,13 @@
 #define HWP_MIN_PERF(x) 		(x & 0xff)
 #define HWP_MAX_PERF(x) 		((x & 0xff) << 8)
 #define HWP_DESIRED_PERF(x)		((x & 0xff) << 16)
-#define HWP_ENERGY_PERF_PREFERENCE(x)	((x & 0xff) << 24)
-#define HWP_ACTIVITY_WINDOW(x)		((x & 0xff3) << 32)
-#define HWP_PACKAGE_CONTROL(x)		((x & 0x1) << 42)
+#define HWP_ENERGY_PERF_PREFERENCE(x)	(((unsigned long long) x & 0xff) << 24)
+#define HWP_EPP_PERFORMANCE		0x00
+#define HWP_EPP_BALANCE_PERFORMANCE	0x80
+#define HWP_EPP_BALANCE_POWERSAVE	0xC0
+#define HWP_EPP_POWERSAVE		0xFF
+#define HWP_ACTIVITY_WINDOW(x)		((unsigned long long)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x)		((unsigned long long)(x & 0x1) << 42)
 
 /* IA32_HWP_STATUS */
 #define HWP_GUARANTEED_CHANGE(x)	(x & 0x1)
@@ -474,9 +478,11 @@
 #define MSR_MISC_PWR_MGMT		0x000001aa
 
 #define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0
-#define ENERGY_PERF_BIAS_PERFORMANCE	0
-#define ENERGY_PERF_BIAS_NORMAL		6
-#define ENERGY_PERF_BIAS_POWERSAVE	15
+#define ENERGY_PERF_BIAS_PERFORMANCE		0
+#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE	4
+#define ENERGY_PERF_BIAS_NORMAL			6
+#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE	8
+#define ENERGY_PERF_BIAS_POWERSAVE		15
 
 #define MSR_IA32_PACKAGE_THERM_STATUS		0x000001b1
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index eb11585..029a93b 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -653,6 +653,12 @@ static const char * const energy_perf_strings[] = {
 	"power",
 	NULL
 };
+static const unsigned int epp_values[] = {
+	HWP_EPP_PERFORMANCE,
+	HWP_EPP_BALANCE_PERFORMANCE,
+	HWP_EPP_BALANCE_POWERSAVE,
+	HWP_EPP_POWERSAVE
+};
 
 static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
 {
@@ -664,17 +670,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
 		return epp;
 
 	if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
-		/*
-		 * Range:
-		 *	0x00-0x3F	:	Performance
-		 *	0x40-0x7F	:	Balance performance
-		 *	0x80-0xBF	:	Balance power
-		 *	0xC0-0xFF	:	Power
-		 * The EPP is a 8 bit value, but our ranges restrict the
-		 * value which can be set. Here only using top two bits
-		 * effectively.
-		 */
-		index = (epp >> 6) + 1;
+		if (epp == HWP_EPP_PERFORMANCE)
+			return 1;
+		if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
+			return 2;
+		if (epp <= HWP_EPP_BALANCE_POWERSAVE)
+			return 3;
+		else
+			return 4;
 	} else if (static_cpu_has(X86_FEATURE_EPB)) {
 		/*
 		 * Range:
@@ -712,15 +715,8 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
 
 		value &= ~GENMASK_ULL(31, 24);
 
-		/*
-		 * If epp is not default, convert from index into
-		 * energy_perf_strings to epp value, by shifting 6
-		 * bits left to use only top two bits in epp.
-		 * The resultant epp need to shifted by 24 bits to
-		 * epp position in MSR_HWP_REQUEST.
-		 */
 		if (epp == -EINVAL)
-			epp = (pref_index - 1) << 6;
+			epp = epp_values[pref_index - 1];
 
 		value |= (u64)epp << 24;
 		ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index 6437ef3..5fd5c5b 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -26,6 +26,15 @@ union msr_pstate {
 		unsigned res3:21;
 		unsigned en:1;
 	} bits;
+	struct {
+		unsigned fid:8;
+		unsigned did:6;
+		unsigned vid:8;
+		unsigned iddval:8;
+		unsigned idddiv:2;
+		unsigned res1:30;
+		unsigned en:1;
+	} fam17h_bits;
 	unsigned long long val;
 };
 
@@ -35,6 +44,8 @@ static int get_did(int family, union msr_pstate pstate)
 
 	if (family == 0x12)
 		t = pstate.val & 0xf;
+	else if (family == 0x17)
+		t = pstate.fam17h_bits.did;
 	else
 		t = pstate.bits.did;
 
@@ -44,16 +55,20 @@ static int get_did(int family, union msr_pstate pstate)
 static int get_cof(int family, union msr_pstate pstate)
 {
 	int t;
-	int fid, did;
+	int fid, did, cof;
 
 	did = get_did(family, pstate);
-
-	t = 0x10;
-	fid = pstate.bits.fid;
-	if (family == 0x11)
-		t = 0x8;
-
-	return (100 * (fid + t)) >> did;
+	if (family == 0x17) {
+		fid = pstate.fam17h_bits.fid;
+		cof = 200 * fid / did;
+	} else {
+		t = 0x10;
+		fid = pstate.bits.fid;
+		if (family == 0x11)
+			t = 0x8;
+		cof = (100 * (fid + t)) >> did;
+	}
+	return cof;
 }
 
 /* Needs:
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index afb66f8..799a18b 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -70,6 +70,8 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
 #define CPUPOWER_CAP_IS_SNB		0x00000020
 #define CPUPOWER_CAP_INTEL_IDA		0x00000040
 
+#define CPUPOWER_AMD_CPBDIS		0x02000000
+
 #define MAX_HW_PSTATES 10
 
 struct cpupower_cpu_info {
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 1609243..601d719 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -2,11 +2,14 @@
 
 #include "helpers/helpers.h"
 
+#define MSR_AMD_HWCR	0xc0010015
+
 int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 			int *states)
 {
 	struct cpupower_cpu_info cpu_info;
 	int ret;
+	unsigned long long val;
 
 	*support = *active = *states = 0;
 
@@ -16,10 +19,22 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 
 	if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CBP) {
 		*support = 1;
-		amd_pci_get_num_boost_states(active, states);
-		if (ret <= 0)
-			return ret;
-		*support = 1;
+
+		/* AMD Family 0x17 does not utilize PCI D18F4 like prior
+		 * families and has no fixed discrete boost states but
+		 * has Hardware determined variable increments instead.
+		 */
+
+		if (cpu_info.family == 0x17) {
+			if (!read_msr(cpu, MSR_AMD_HWCR, &val)) {
+				if (!(val & CPUPOWER_AMD_CPBDIS))
+					*active = 1;
+			}
+		} else {
+			ret = amd_pci_get_num_boost_states(active, states);
+			if (ret)
+				return ret;
+		}
 	} else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA)
 		*support = *active = 1;
 	return 0;
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index b1129473..0dafba2 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -57,7 +57,6 @@ unsigned int list_header_only;
 unsigned int dump_only;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
-unsigned int do_skl_residency;
 unsigned int do_slm_cstates;
 unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
@@ -93,6 +92,7 @@ unsigned int do_ring_perf_limit_reasons;
 unsigned int crystal_hz;
 unsigned long long tsc_hz;
 int base_cpu;
+int do_migrate;
 double discover_bclk(unsigned int family, unsigned int model);
 unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
 			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
@@ -151,6 +151,8 @@ size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
 #define MAX_ADDED_COUNTERS 16
 
 struct thread_data {
+	struct timeval tv_begin;
+	struct timeval tv_end;
 	unsigned long long tsc;
 	unsigned long long aperf;
 	unsigned long long mperf;
@@ -301,6 +303,9 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg
 
 int cpu_migrate(int cpu)
 {
+	if (!do_migrate)
+		return 0;
+
 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
 	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
 	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
@@ -384,8 +389,14 @@ struct msr_counter bic[] = {
 	{ 0x0, "CPU" },
 	{ 0x0, "Mod%c6" },
 	{ 0x0, "sysfs" },
+	{ 0x0, "Totl%C0" },
+	{ 0x0, "Any%C0" },
+	{ 0x0, "GFX%C0" },
+	{ 0x0, "CPUGFX%" },
 };
 
+
+
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
 #define	BIC_Package	(1ULL << 0)
 #define	BIC_Avg_MHz	(1ULL << 1)
@@ -426,6 +437,10 @@ struct msr_counter bic[] = {
 #define	BIC_CPU		(1ULL << 36)
 #define	BIC_Mod_c6	(1ULL << 37)
 #define	BIC_sysfs	(1ULL << 38)
+#define	BIC_Totl_c0	(1ULL << 39)
+#define	BIC_Any_c0	(1ULL << 40)
+#define	BIC_GFX_c0	(1ULL << 41)
+#define	BIC_CPUGFX	(1ULL << 42)
 
 unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
 unsigned long long bic_present = BIC_sysfs;
@@ -521,6 +536,8 @@ void print_header(char *delim)
 	struct msr_counter *mp;
 	int printed = 0;
 
+	if (debug)
+		outp += sprintf(outp, "usec %s", delim);
 	if (DO_BIC(BIC_Package))
 		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Core))
@@ -599,12 +616,14 @@ void print_header(char *delim)
 	if (DO_BIC(BIC_GFXMHz))
 		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
 
-	if (do_skl_residency) {
+	if (DO_BIC(BIC_Totl_c0))
 		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_Any_c0))
 		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_GFX_c0))
 		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_CPUGFX))
 		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
-	}
 
 	if (DO_BIC(BIC_Pkgpc2))
 		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
@@ -771,6 +790,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
 		return 0;
 
+	if (debug) {
+		/* on each row, print how many usec each timestamp took to gather */
+		struct timeval tv;
+
+		timersub(&t->tv_end, &t->tv_begin, &tv);
+		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
+	}
+
 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 
 	tsc = t->tsc * tsc_tweak;
@@ -912,12 +939,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
 
 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
-	if (do_skl_residency) {
+	if (DO_BIC(BIC_Totl_c0))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
+	if (DO_BIC(BIC_Any_c0))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
+	if (DO_BIC(BIC_GFX_c0))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
+	if (DO_BIC(BIC_CPUGFX))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
-	}
 
 	if (DO_BIC(BIC_Pkgpc2))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
@@ -1038,12 +1067,16 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
 	int i;
 	struct msr_counter *mp;
 
-	if (do_skl_residency) {
+
+	if (DO_BIC(BIC_Totl_c0))
 		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
+	if (DO_BIC(BIC_Any_c0))
 		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
+	if (DO_BIC(BIC_GFX_c0))
 		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
+	if (DO_BIC(BIC_CPUGFX))
 		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
-	}
+
 	old->pc2 = new->pc2 - old->pc2;
 	if (DO_BIC(BIC_Pkgpc3))
 		old->pc3 = new->pc3 - old->pc3;
@@ -1292,12 +1325,14 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		return 0;
 
-	if (do_skl_residency) {
+	if (DO_BIC(BIC_Totl_c0))
 		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
+	if (DO_BIC(BIC_Any_c0))
 		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
+	if (DO_BIC(BIC_GFX_c0))
 		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
+	if (DO_BIC(BIC_CPUGFX))
 		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
-	}
 
 	average.packages.pc2 += p->pc2;
 	if (DO_BIC(BIC_Pkgpc3))
@@ -1357,12 +1392,14 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	average.cores.c7 /= topo.num_cores;
 	average.cores.mc6_us /= topo.num_cores;
 
-	if (do_skl_residency) {
+	if (DO_BIC(BIC_Totl_c0))
 		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+	if (DO_BIC(BIC_Any_c0))
 		average.packages.pkg_any_core_c0 /= topo.num_packages;
+	if (DO_BIC(BIC_GFX_c0))
 		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+	if (DO_BIC(BIC_CPUGFX))
 		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
-	}
 
 	average.packages.pc2 /= topo.num_packages;
 	if (DO_BIC(BIC_Pkgpc3))
@@ -1482,6 +1519,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	struct msr_counter *mp;
 	int i;
 
+
+	gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+
 	if (cpu_migrate(cpu)) {
 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
 		return -1;
@@ -1565,7 +1605,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	/* collect core counters only for 1st thread in core */
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
-		return 0;
+		goto done;
 
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
@@ -1601,15 +1641,21 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	/* collect package counters only for 1st core in package */
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
-		return 0;
+		goto done;
 
-	if (do_skl_residency) {
+	if (DO_BIC(BIC_Totl_c0)) {
 		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
 			return -10;
+	}
+	if (DO_BIC(BIC_Any_c0)) {
 		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
 			return -11;
+	}
+	if (DO_BIC(BIC_GFX_c0)) {
 		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
 			return -12;
+	}
+	if (DO_BIC(BIC_CPUGFX)) {
 		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
 			return -13;
 	}
@@ -1688,6 +1734,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_mp(cpu, mp, &p->counter[i]))
 			return -10;
 	}
+done:
+	gettimeofday(&t->tv_end, (struct timezone *)NULL);
 
 	return 0;
 }
@@ -3895,6 +3943,9 @@ void decode_misc_enable_msr(void)
 {
 	unsigned long long msr;
 
+	if (!genuine_intel)
+		return;
+
 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
 		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
 			base_cpu, msr,
@@ -4198,7 +4249,12 @@ void process_cpuid()
 		BIC_PRESENT(BIC_Pkgpc10);
 	}
 	do_irtl_hsw = has_hsw_msrs(family, model);
-	do_skl_residency = has_skl_msrs(family, model);
+	if (has_skl_msrs(family, model)) {
+		BIC_PRESENT(BIC_Totl_c0);
+		BIC_PRESENT(BIC_Any_c0);
+		BIC_PRESENT(BIC_GFX_c0);
+		BIC_PRESENT(BIC_CPUGFX);
+	}
 	do_slm_cstates = is_slm(family, model);
 	do_knl_cstates  = is_knl(family, model);
 
@@ -4578,7 +4634,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 17.04.12"
+	fprintf(outf, "turbostat version 17.06.23"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
@@ -4951,6 +5007,7 @@ void cmdline(int argc, char **argv)
 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
 		{"Joules",	no_argument,		0, 'J'},
 		{"list",	no_argument,		0, 'l'},
+		{"migrate",	no_argument,		0, 'm'},
 		{"out",		required_argument,	0, 'o'},
 		{"quiet",	no_argument,		0, 'q'},
 		{"show",	required_argument,	0, 's'},
@@ -4962,7 +5019,7 @@ void cmdline(int argc, char **argv)
 
 	progname = argv[0];
 
-	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v",
+	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v",
 				long_options, &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -5005,6 +5062,9 @@ void cmdline(int argc, char **argv)
 			list_header_only++;
 			quiet++;
 			break;
+		case 'm':
+			do_migrate = 1;
+			break;
 		case 'o':
 			outf = fopen_or_die(optarg, "w");
 			break;
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index 971c9ff..a711eec 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -1,10 +1,27 @@
-DESTDIR ?=
+CC		= $(CROSS_COMPILE)gcc
+BUILD_OUTPUT    := $(CURDIR)
+PREFIX		:= /usr
+DESTDIR		:=
+
+ifeq ("$(origin O)", "command line")
+	BUILD_OUTPUT := $(O)
+endif
 
 x86_energy_perf_policy : x86_energy_perf_policy.c
+CFLAGS +=	-Wall
+CFLAGS +=	-DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
 
+%: %.c
+	@mkdir -p $(BUILD_OUTPUT)
+	$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
+
+.PHONY : clean
 clean :
-	rm -f x86_energy_perf_policy
+	@rm -f $(BUILD_OUTPUT)/x86_energy_perf_policy
 
-install :
-	install x86_energy_perf_policy ${DESTDIR}/usr/bin/
-	install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/
+install : x86_energy_perf_policy
+	install -d  $(DESTDIR)$(PREFIX)/bin
+	install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy
+	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
+	install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
+
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
index 8eaaad6..17db1c3 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
@@ -1,104 +1,213 @@
-.\"  This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
+.\"  This page Copyright (C) 2010 - 2015 Len Brown <len.brown@intel.com>
 .\"  Distributed under the GPL, Copyleft 1994.
 .TH X86_ENERGY_PERF_POLICY 8
 .SH NAME
-x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
+x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers
 .SH SYNOPSIS
-.ft B
 .B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB "\-r"
+.RB "[ options ] [ scope ] [field \ value]"
 .br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB 'performance'
+.RB "scope: \-\-cpu\ cpu-list | \-\-pkg\ pkg-list"
 .br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB 'normal'
+.RB "cpu-list, pkg-list: # | #,# | #-# | all"
 .br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB 'powersave'
+.RB "field: \-\-all | \-\-epb | \-\-hwp-epp | \-\-hwp-min | \-\-hwp-max | \-\-hwp-desired"
 .br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB n
+.RB "other: (\-\-force | \-\-hwp-enable | \-\-turbo-enable)  value)"
 .br
+.RB "value: # | default | performance | balance-performance | balance-power | power"
 .SH DESCRIPTION
 \fBx86_energy_perf_policy\fP
-allows software to convey
-its policy for the relative importance of performance
-versus energy savings to the processor.
+displays and updates energy-performance policy settings specific to
+Intel Architecture Processors.  Settings are accessed via Model Specific Register (MSR)
+updates, no matter if the Linux cpufreq sub-system is enabled or not.
 
-The processor uses this information in model-specific ways
-when it must select trade-offs between performance and
-energy efficiency.
+Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB)
+may affect a wide range of hardware decisions,
+such as how aggressively the hardware enters and exits CPU idle states (C-states)
+and Processor Performance States (P-states).
+This policy hint does not replace explicit OS C-state and P-state selection.
+Rather, it tells the hardware how aggressively to implement those selections.
+Further, it allows the OS to influence energy/performance trade-offs where there
+is no software interface, such as in the opportunistic "turbo-mode" P-state range.
+Note that MSR_IA32_ENERGY_PERF_BIAS is defined per CPU,
+but some implementations
+share a single MSR among all CPUs in each processor package.
+On those systems, a write to EPB on one processor will
+be visible, and will have an effect, on all CPUs
+in the same processor package.
 
-This policy hint does not supersede Processor Performance states
-(P-states) or CPU Idle power states (C-states), but allows
-software to have influence where it would otherwise be unable
-to express a preference.
+Hardware P-States (HWP) are effectively an expansion of hardware
+P-state control from the opportunistic turbo-mode P-state range
+to include the entire range of available P-states.
+On Broadwell Xeon, the initial HWP implementation, EBP influenced HWP.
+That influence was removed in subsequent generations,
+where it was moved to the
+Energy_Performance_Preference (EPP) field in
+a pair of dedicated MSRs -- MSR_IA32_HWP_REQUEST and MSR_IA32_HWP_REQUEST_PKG.
 
-For example, this setting may tell the hardware how
-aggressively or conservatively to control frequency
-in the "turbo range" above the explicitly OS-controlled
-P-state frequency range.  It may also tell the hardware
-how aggressively is should enter the OS requested C-states.
+EPP is the most commonly managed knob in HWP mode,
+but MSR_IA32_HWP_REQUEST also allows the user to specify
+minimum-frequency for Quality-of-Service,
+and maximum-frequency for power-capping.
+MSR_IA32_HWP_REQUEST is defined per-CPU.
 
-Support for this feature is indicated by CPUID.06H.ECX.bit3
-per the Intel Architectures Software Developer's Manual.
+MSR_IA32_HWP_REQUEST_PKG has the same capability as MSR_IA32_HWP_REQUEST,
+but it can simultaneously set the default policy for all CPUs within a package.
+A bit in per-CPU MSR_IA32_HWP_REQUEST indicates whether it is
+over-ruled-by or exempt-from MSR_IA32_HWP_REQUEST_PKG.
 
-.SS Options
-\fB-c\fP limits operation to a single CPU.
-The default is to operate on all CPUs.
-Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
-logical processor, but that the initial implementations
-of the MSR were shared among all processors in each package.
+MSR_HWP_CAPABILITIES shows the default values for the fields
+in MSR_IA32_HWP_REQUEST.  It is displayed when no values
+are being written.
+
+.SS SCOPE OPTIONS
 .PP
-\fB-v\fP increases verbosity.  By default
-x86_energy_perf_policy is silent.
+\fB-c, --cpu\fP Operate on the MSR_IA32_HWP_REQUEST for each CPU in a CPU-list.
+The CPU-list may be comma-separated CPU numbers, with dash for range
+or the string "all".  Eg. '--cpu 1,4,6-8' or '--cpu all'.
+When --cpu is used, \fB--hwp-use-pkg\fP is available, which specifies whether the per-cpu
+MSR_IA32_HWP_REQUEST should be over-ruled by MSR_IA32_HWP_REQUEST_PKG (1),
+or exempt from MSR_IA32_HWP_REQUEST_PKG (0).
+
+\fB-p, --pkg\fP Operate on the MSR_IA32_HWP_REQUEST_PKG for each package in the package-list.
+The list is a string of individual package numbers separated
+by commas, and or ranges of package numbers separated by a dash,
+or the string "all".
+For example '--pkg 1,3' or '--pkg all'
+
+.SS VALUE OPTIONS
 .PP
-\fB-r\fP is for "read-only" mode - the unchanged state
-is read and displayed.
-.PP
-.I performance
-Set a policy where performance is paramount.
-The processor will be unwilling to sacrifice any performance
-for the sake of energy saving. This is the hardware default.
-.PP
-.I normal
+.I normal | default
 Set a policy with a normal balance between performance and energy efficiency.
 The processor will tolerate minor performance compromise
 for potentially significant energy savings.
-This reasonable default for most desktops and servers.
+This is a reasonable default for most desktops and servers.
+"default" is a synonym for "normal".
 .PP
-.I powersave
+.I performance
+Set a policy for maximum performance,
+accepting no performance sacrifice for the benefit of energy efficiency.
+.PP
+.I balance-performance
+Set a policy with a high priority on performance,
+but allowing some performance loss to benefit energy efficiency.
+.PP
+.I balance-power
+Set a policy where the performance and power are balanced.
+This is the default.
+.PP
+.I power
 Set a policy where the processor can accept
-a measurable performance hit to maximize energy efficiency.
-.PP
-.I n
-Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
-The range of valid numbers is 0-15, where 0 is maximum
-performance and 15 is maximum energy efficiency.
+a measurable performance impact to maximize energy efficiency.
 
+.PP
+The following table shows the mapping from the value strings above to actual MSR values.
+This mapping is defined in the Linux-kernel header, msr-index.h.
+
+.nf
+VALUE STRING      	EPB	EPP
+performance       	0	0
+balance-performance	4	128
+normal, default		6	128
+balance-power	 	8	192
+power       		15	255
+.fi
+.PP
+For MSR_IA32_HWP_REQUEST performance fields
+(--hwp-min, --hwp-max, --hwp-desired), the value option
+is in units of 100 MHz, Eg. 12 signifies 1200 MHz.
+
+.SS FIELD OPTIONS
+\fB-a, --all value-string\fP Sets all EPB and EPP and HWP limit fields to the value associated with
+the value-string.  In addition, enables turbo-mode and HWP-mode, if they were previous disabled.
+Thus "--all normal" will set a system without cpufreq into a well known configuration.
+.PP
+\fB-B, --epb\fP set EPB per-core or per-package.
+See value strings in the table above.
+.PP
+\fB-d, --debug\fP debug increases verbosity.  By default
+x86_energy_perf_policy is silent for updates,
+and verbose for read-only mode.
+.PP
+\fB-P, --hwp-epp\fP set HWP.EPP per-core or per-package.
+See value strings in the table above.
+.PP
+\fB-m, --hwp-min\fP request HWP to not go below the specified core/bus ratio.
+The "default" is the value found in IA32_HWP_CAPABILITIES.min.
+.PP
+\fB-M, --hwp-max\fP request HWP not exceed a the specified core/bus ratio.
+The "default" is the value found in IA32_HWP_CAPABILITIES.max.
+.PP
+\fB-D, --hwp-desired\fP request HWP 'desired' frequency.
+The "normal" setting is 0, which
+corresponds to 'full autonomous' HWP control.
+Non-zero performance values request a specific performance
+level on this processor, specified in multiples of 100 MHz.
+.PP
+\fB-w, --hwp-window\fP specify integer number of microsec
+in the sliding window that HWP uses to maintain average frequency.
+This parameter is meaningful only when the "desired" field above is non-zero.
+Default is 0, allowing the HW to choose.
+.SH OTHER OPTIONS
+.PP
+\fB-f, --force\fP writes the specified values without bounds checking.
+.PP
+\fB-U, --hwp-use-pkg\fP (0 | 1), when used in conjunction with --cpu,
+indicates whether the per-CPU MSR_IA32_HWP_REQUEST should be overruled (1)
+or exempt (0) from per-Package MSR_IA32_HWP_REQUEST_PKG settings.
+The default is exempt.
+.PP
+\fB-H, --hwp-enable\fP enable HardWare-P-state (HWP) mode.  Once enabled, system RESET is required to disable HWP mode.
+.PP
+\fB-t, --turbo-enable\fP enable (1) or disable (0) turbo mode.
+.PP
+\fB-v, --version\fP print version and exit.
+.PP
+If no request to change policy is made,
+the default behavior is to read
+and display the current system state,
+including the default capabilities.
+.SH WARNING
+.PP
+This utility writes directly to Model Specific Registers.
+There is no locking or coordination should this utility
+be used to modify HWP limit fields at the same time that
+intel_pstate's sysfs attributes access the same MSRs.
+.PP
+Note that --hwp-desired and --hwp-window are considered experimental.
+Future versions of Linux reserve the right to access these
+fields internally -- potentially conflicting with user-space access.
+.SH EXAMPLE
+.nf
+# sudo x86_energy_perf_policy
+cpu0: EPB 6
+cpu0: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu0: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu1: EPB 6
+cpu1: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu1: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu2: EPB 6
+cpu2: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu2: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu3: EPB 6
+cpu3: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu3: HWP_CAP: low 1 eff 8 guar 27 high 35
+.fi
 .SH NOTES
-.B "x86_energy_perf_policy "
+.B "x86_energy_perf_policy"
 runs only as root.
 .SH FILES
 .ta
 .nf
 /dev/cpu/*/msr
 .fi
-
 .SH "SEE ALSO"
+.nf
 msr(4)
+Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+.fi
 .PP
 .SH AUTHORS
 .nf
-Written by Len Brown <len.brown@intel.com>
+Len Brown
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 40b3e54..65bbe62 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -3,322 +3,1424 @@
  * policy preference bias on recent X86 processors.
  */
 /*
- * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2010 - 2017 Intel Corporation.
  * Len Brown <len.brown@intel.com>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * This program is released under GPL v2
  */
 
+#define _GNU_SOURCE
+#include MSRHEADER
 #include <stdio.h>
 #include <unistd.h>
 #include <sys/types.h>
+#include <sched.h>
 #include <sys/stat.h>
 #include <sys/resource.h>
+#include <getopt.h>
+#include <err.h>
 #include <fcntl.h>
 #include <signal.h>
 #include <sys/time.h>
+#include <limits.h>
 #include <stdlib.h>
 #include <string.h>
+#include <cpuid.h>
+#include <errno.h>
 
-unsigned int verbose;		/* set with -v */
-unsigned int read_only;		/* set with -r */
+#define	OPTARG_NORMAL			(INT_MAX - 1)
+#define	OPTARG_POWER			(INT_MAX - 2)
+#define	OPTARG_BALANCE_POWER		(INT_MAX - 3)
+#define	OPTARG_BALANCE_PERFORMANCE	(INT_MAX - 4)
+#define	OPTARG_PERFORMANCE		(INT_MAX - 5)
+
+struct msr_hwp_cap {
+	unsigned char highest;
+	unsigned char guaranteed;
+	unsigned char efficient;
+	unsigned char lowest;
+};
+
+struct msr_hwp_request {
+	unsigned char hwp_min;
+	unsigned char hwp_max;
+	unsigned char hwp_desired;
+	unsigned char hwp_epp;
+	unsigned int hwp_window;
+	unsigned char hwp_use_pkg;
+} req_update;
+
+unsigned int debug;
+unsigned int verbose;
+unsigned int force;
 char *progname;
-unsigned long long new_bias;
-int cpu = -1;
+int base_cpu;
+unsigned char update_epb;
+unsigned long long new_epb;
+unsigned char turbo_is_enabled;
+unsigned char update_turbo;
+unsigned char turbo_update_value;
+unsigned char update_hwp_epp;
+unsigned char update_hwp_min;
+unsigned char update_hwp_max;
+unsigned char update_hwp_desired;
+unsigned char update_hwp_window;
+unsigned char update_hwp_use_pkg;
+unsigned char update_hwp_enable;
+#define hwp_update_enabled() (update_hwp_enable | update_hwp_epp | update_hwp_max | update_hwp_min | update_hwp_desired | update_hwp_window | update_hwp_use_pkg)
+int max_cpu_num;
+int max_pkg_num;
+#define MAX_PACKAGES 64
+unsigned int first_cpu_in_pkg[MAX_PACKAGES];
+unsigned long long pkg_present_set;
+unsigned long long pkg_selected_set;
+cpu_set_t *cpu_present_set;
+cpu_set_t *cpu_selected_set;
+int genuine_intel;
+
+size_t cpu_setsize;
+
+char *proc_stat = "/proc/stat";
+
+unsigned int has_epb;	/* MSR_IA32_ENERGY_PERF_BIAS */
+unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
+			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
+unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
+unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
+unsigned int has_hwp_epp;	/* IA32_HWP_REQUEST[bits 31:24] */
+unsigned int has_hwp_request_pkg;	/* IA32_HWP_REQUEST_PKG */
+
+unsigned int bdx_highest_ratio;
 
 /*
- * Usage:
- *
- * -c cpu: limit action to a single CPU (default is all CPUs)
- * -v: verbose output (can invoke more than once)
- * -r: read-only, don't change any settings
- *
- *  performance
- *	Performance is paramount.
- *	Unwilling to sacrifice any performance
- *	for the sake of energy saving. (hardware default)
- *
- *  normal
- *	Can tolerate minor performance compromise
- *	for potentially significant energy savings.
- *	(reasonable default for most desktops and servers)
- *
- *  powersave
- *	Can tolerate significant performance hit
- *	to maximize energy savings.
- *
- * n
- *	a numerical value to write to the underlying MSR.
+ * maintain compatibility with original implementation, but don't document it:
  */
 void usage(void)
 {
-	printf("%s: [-c cpu] [-v] "
-		"(-r | 'performance' | 'normal' | 'powersave' | n)\n",
-		progname);
+	fprintf(stderr, "%s [options] [scope][field value]\n", progname);
+	fprintf(stderr, "scope: --cpu cpu-list [--hwp-use-pkg #] | --pkg pkg-list\n");
+	fprintf(stderr, "field: --all | --epb | --hwp-epp | --hwp-min | --hwp-max | --hwp-desired\n");
+	fprintf(stderr, "other: --hwp-enable | --turbo-enable (0 | 1) | --help | --force\n");
+	fprintf(stderr,
+		"value: ( # | \"normal\" | \"performance\" | \"balance-performance\" | \"balance-power\"| \"power\")\n");
+	fprintf(stderr, "--hwp-window usec\n");
+
+	fprintf(stderr, "Specify only Energy Performance BIAS (legacy usage):\n");
+	fprintf(stderr, "%s: [-c cpu] [-v] (-r | policy-value )\n", progname);
+
 	exit(1);
 }
 
-#define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0
+/*
+ * If bdx_highest_ratio is set,
+ * then we must translate between MSR format and simple ratio
+ * used on the cmdline.
+ */
+int ratio_2_msr_perf(int ratio)
+{
+	int msr_perf;
 
-#define	BIAS_PERFORMANCE		0
-#define BIAS_BALANCE			6
-#define	BIAS_POWERSAVE			15
+	if (!bdx_highest_ratio)
+		return ratio;
+
+	msr_perf = ratio * 255 / bdx_highest_ratio;
+
+	if (debug)
+		fprintf(stderr, "%d = ratio_to_msr_perf(%d)\n", msr_perf, ratio);
+
+	return msr_perf;
+}
+int msr_perf_2_ratio(int msr_perf)
+{
+	int ratio;
+	double d;
+
+	if (!bdx_highest_ratio)
+		return msr_perf;
+
+	d = (double)msr_perf * (double) bdx_highest_ratio / 255.0;
+	d = d + 0.5;	/* round */
+	ratio = (int)d;
+
+	if (debug)
+		fprintf(stderr, "%d = msr_perf_ratio(%d) {%f}\n", ratio, msr_perf, d);
+
+	return ratio;
+}
+int parse_cmdline_epb(int i)
+{
+	if (!has_epb)
+		errx(1, "EPB not enabled on this platform");
+
+	update_epb = 1;
+
+	switch (i) {
+	case OPTARG_POWER:
+		return ENERGY_PERF_BIAS_POWERSAVE;
+	case OPTARG_BALANCE_POWER:
+		return ENERGY_PERF_BIAS_BALANCE_POWERSAVE;
+	case OPTARG_NORMAL:
+		return ENERGY_PERF_BIAS_NORMAL;
+	case OPTARG_BALANCE_PERFORMANCE:
+		return ENERGY_PERF_BIAS_BALANCE_PERFORMANCE;
+	case OPTARG_PERFORMANCE:
+		return ENERGY_PERF_BIAS_PERFORMANCE;
+	}
+	if (i < 0 || i > ENERGY_PERF_BIAS_POWERSAVE)
+		errx(1, "--epb must be from 0 to 15");
+	return i;
+}
+
+#define HWP_CAP_LOWEST 0
+#define HWP_CAP_HIGHEST 255
+
+/*
+ * "performance" changes hwp_min to cap.highest
+ * All others leave it at cap.lowest
+ */
+int parse_cmdline_hwp_min(int i)
+{
+	update_hwp_min = 1;
+
+	switch (i) {
+	case OPTARG_POWER:
+	case OPTARG_BALANCE_POWER:
+	case OPTARG_NORMAL:
+	case OPTARG_BALANCE_PERFORMANCE:
+		return HWP_CAP_LOWEST;
+	case OPTARG_PERFORMANCE:
+		return HWP_CAP_HIGHEST;
+	}
+	return i;
+}
+/*
+ * "power" changes hwp_max to cap.lowest
+ * All others leave it at cap.highest
+ */
+int parse_cmdline_hwp_max(int i)
+{
+	update_hwp_max = 1;
+
+	switch (i) {
+	case OPTARG_POWER:
+		return HWP_CAP_LOWEST;
+	case OPTARG_NORMAL:
+	case OPTARG_BALANCE_POWER:
+	case OPTARG_BALANCE_PERFORMANCE:
+	case OPTARG_PERFORMANCE:
+		return HWP_CAP_HIGHEST;
+	}
+	return i;
+}
+/*
+ * for --hwp-des, all strings leave it in autonomous mode
+ * If you want to change it, you need to explicitly pick a value
+ */
+int parse_cmdline_hwp_desired(int i)
+{
+	update_hwp_desired = 1;
+
+	switch (i) {
+	case OPTARG_POWER:
+	case OPTARG_BALANCE_POWER:
+	case OPTARG_BALANCE_PERFORMANCE:
+	case OPTARG_NORMAL:
+	case OPTARG_PERFORMANCE:
+		return 0;	/* autonomous */
+	}
+	return i;
+}
+
+int parse_cmdline_hwp_window(int i)
+{
+	unsigned int exponent;
+
+	update_hwp_window = 1;
+
+	switch (i) {
+	case OPTARG_POWER:
+	case OPTARG_BALANCE_POWER:
+	case OPTARG_NORMAL:
+	case OPTARG_BALANCE_PERFORMANCE:
+	case OPTARG_PERFORMANCE:
+		return 0;
+	}
+	if (i < 0 || i > 1270000000) {
+		fprintf(stderr, "--hwp-window: 0 for auto; 1 - 1270000000 usec for window duration\n");
+		usage();
+	}
+	for (exponent = 0; ; ++exponent) {
+		if (debug)
+			printf("%d 10^%d\n", i, exponent);
+
+		if (i <= 127)
+			break;
+
+		i = i / 10;
+	}
+	if (debug)
+		fprintf(stderr, "%d*10^%d: 0x%x\n", i, exponent, (exponent << 7) | i);
+
+	return (exponent << 7) | i;
+}
+int parse_cmdline_hwp_epp(int i)
+{
+	update_hwp_epp = 1;
+
+	switch (i) {
+	case OPTARG_POWER:
+		return HWP_EPP_POWERSAVE;
+	case OPTARG_BALANCE_POWER:
+		return HWP_EPP_BALANCE_POWERSAVE;
+	case OPTARG_NORMAL:
+	case OPTARG_BALANCE_PERFORMANCE:
+		return HWP_EPP_BALANCE_PERFORMANCE;
+	case OPTARG_PERFORMANCE:
+		return HWP_EPP_PERFORMANCE;
+	}
+	if (i < 0 || i > 0xff) {
+		fprintf(stderr, "--hwp-epp must be from 0 to 0xff\n");
+		usage();
+	}
+	return i;
+}
+int parse_cmdline_turbo(int i)
+{
+	update_turbo = 1;
+
+	switch (i) {
+	case OPTARG_POWER:
+		return 0;
+	case OPTARG_NORMAL:
+	case OPTARG_BALANCE_POWER:
+	case OPTARG_BALANCE_PERFORMANCE:
+	case OPTARG_PERFORMANCE:
+		return 1;
+	}
+	if (i < 0 || i > 1) {
+		fprintf(stderr, "--turbo-enable: 1 to enable, 0 to disable\n");
+		usage();
+	}
+	return i;
+}
+
+int parse_optarg_string(char *s)
+{
+	int i;
+	char *endptr;
+
+	if (!strncmp(s, "default", 7))
+		return OPTARG_NORMAL;
+
+	if (!strncmp(s, "normal", 6))
+		return OPTARG_NORMAL;
+
+	if (!strncmp(s, "power", 9))
+		return OPTARG_POWER;
+
+	if (!strncmp(s, "balance-power", 17))
+		return OPTARG_BALANCE_POWER;
+
+	if (!strncmp(s, "balance-performance", 19))
+		return OPTARG_BALANCE_PERFORMANCE;
+
+	if (!strncmp(s, "performance", 11))
+		return OPTARG_PERFORMANCE;
+
+	i = strtol(s, &endptr, 0);
+	if (s == endptr) {
+		fprintf(stderr, "no digits in \"%s\"\n", s);
+		usage();
+	}
+	if (i == LONG_MIN || i == LONG_MAX)
+		errx(-1, "%s", s);
+
+	if (i > 0xFF)
+		errx(-1, "%d (0x%x) must be < 256", i, i);
+
+	if (i < 0)
+		errx(-1, "%d (0x%x) must be >= 0", i, i);
+	return i;
+}
+
+void parse_cmdline_all(char *s)
+{
+	force++;
+	update_hwp_enable = 1;
+	req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(s));
+	req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(s));
+	req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(s));
+	if (has_epb)
+		new_epb = parse_cmdline_epb(parse_optarg_string(s));
+	turbo_update_value = parse_cmdline_turbo(parse_optarg_string(s));
+	req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(s));
+	req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(s));
+}
+
+void validate_cpu_selected_set(void)
+{
+	int cpu;
+
+	if (CPU_COUNT_S(cpu_setsize, cpu_selected_set) == 0)
+		errx(0, "no CPUs requested");
+
+	for (cpu = 0; cpu <= max_cpu_num; ++cpu) {
+		if (CPU_ISSET_S(cpu, cpu_setsize, cpu_selected_set))
+			if (!CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+				errx(1, "Requested cpu% is not present", cpu);
+	}
+}
+
+void parse_cmdline_cpu(char *s)
+{
+	char *startp, *endp;
+	int cpu = 0;
+
+	if (pkg_selected_set) {
+		usage();
+		errx(1, "--cpu | --pkg");
+	}
+	cpu_selected_set = CPU_ALLOC((max_cpu_num + 1));
+	if (cpu_selected_set == NULL)
+		err(1, "cpu_selected_set");
+	CPU_ZERO_S(cpu_setsize, cpu_selected_set);
+
+	for (startp = s; startp && *startp;) {
+
+		if (*startp == ',') {
+			startp++;
+			continue;
+		}
+
+		if (*startp == '-') {
+			int end_cpu;
+
+			startp++;
+			end_cpu = strtol(startp, &endp, 10);
+			if (startp == endp)
+				continue;
+
+			while (cpu <= end_cpu) {
+				if (cpu > max_cpu_num)
+					errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num);
+				CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+				cpu++;
+			}
+			startp = endp;
+			continue;
+		}
+
+		if (strncmp(startp, "all", 3) == 0) {
+			for (cpu = 0; cpu <= max_cpu_num; cpu += 1) {
+				if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+					CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+			}
+			startp += 3;
+			if (*startp == 0)
+				break;
+		}
+		/* "--cpu even" is not documented */
+		if (strncmp(startp, "even", 4) == 0) {
+			for (cpu = 0; cpu <= max_cpu_num; cpu += 2) {
+				if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+					CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+			}
+			startp += 4;
+			if (*startp == 0)
+				break;
+		}
+
+		/* "--cpu odd" is not documented */
+		if (strncmp(startp, "odd", 3) == 0) {
+			for (cpu = 1; cpu <= max_cpu_num; cpu += 2) {
+				if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+					CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+			}
+			startp += 3;
+			if (*startp == 0)
+				break;
+		}
+
+		cpu = strtol(startp, &endp, 10);
+		if (startp == endp)
+			errx(1, "--cpu cpu-set: confused by '%s'", startp);
+		if (cpu > max_cpu_num)
+			errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num);
+		CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+		startp = endp;
+	}
+
+	validate_cpu_selected_set();
+
+}
+
+void parse_cmdline_pkg(char *s)
+{
+	char *startp, *endp;
+	int pkg = 0;
+
+	if (cpu_selected_set) {
+		usage();
+		errx(1, "--pkg | --cpu");
+	}
+	pkg_selected_set = 0;
+
+	for (startp = s; startp && *startp;) {
+
+		if (*startp == ',') {
+			startp++;
+			continue;
+		}
+
+		if (*startp == '-') {
+			int end_pkg;
+
+			startp++;
+			end_pkg = strtol(startp, &endp, 10);
+			if (startp == endp)
+				continue;
+
+			while (pkg <= end_pkg) {
+				if (pkg > max_pkg_num)
+					errx(1, "Requested pkg%d exceeds max pkg%d", pkg, max_pkg_num);
+				pkg_selected_set |= 1 << pkg;
+				pkg++;
+			}
+			startp = endp;
+			continue;
+		}
+
+		if (strncmp(startp, "all", 3) == 0) {
+			pkg_selected_set = pkg_present_set;
+			return;
+		}
+
+		pkg = strtol(startp, &endp, 10);
+		if (pkg > max_pkg_num)
+			errx(1, "Requested pkg%d Exceeds max pkg%d", pkg, max_pkg_num);
+		pkg_selected_set |= 1 << pkg;
+		startp = endp;
+	}
+}
+
+void for_packages(unsigned long long pkg_set, int (func)(int))
+{
+	int pkg_num;
+
+	for (pkg_num = 0; pkg_num <= max_pkg_num; ++pkg_num) {
+		if (pkg_set & (1UL << pkg_num))
+			func(pkg_num);
+	}
+}
+
+void print_version(void)
+{
+	printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n");
+}
 
 void cmdline(int argc, char **argv)
 {
 	int opt;
+	int option_index = 0;
+
+	static struct option long_options[] = {
+		{"all",		required_argument,	0, 'a'},
+		{"cpu",		required_argument,	0, 'c'},
+		{"pkg",		required_argument,	0, 'p'},
+		{"debug",	no_argument,		0, 'd'},
+		{"hwp-desired",	required_argument,	0, 'D'},
+		{"epb",	required_argument,	0, 'B'},
+		{"force",	no_argument,	0, 'f'},
+		{"hwp-enable",	no_argument,	0, 'e'},
+		{"help",	no_argument,	0, 'h'},
+		{"hwp-epp",	required_argument,	0, 'P'},
+		{"hwp-min",	required_argument,	0, 'm'},
+		{"hwp-max",	required_argument,	0, 'M'},
+		{"read",	no_argument,		0, 'r'},
+		{"turbo-enable",	required_argument,	0, 't'},
+		{"hwp-use-pkg",	required_argument,	0, 'u'},
+		{"version",	no_argument,		0, 'v'},
+		{"hwp-window",	required_argument,	0, 'w'},
+		{0,		0,			0, 0 }
+	};
 
 	progname = argv[0];
 
-	while ((opt = getopt(argc, argv, "+rvc:")) != -1) {
+	while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw",
+				long_options, &option_index)) != -1) {
 		switch (opt) {
+		case 'a':
+			parse_cmdline_all(optarg);
+			break;
+		case 'B':
+			new_epb = parse_cmdline_epb(parse_optarg_string(optarg));
+			break;
 		case 'c':
-			cpu = atoi(optarg);
+			parse_cmdline_cpu(optarg);
+			break;
+		case 'e':
+			update_hwp_enable = 1;
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'd':
+			debug++;
+			verbose++;
+			break;
+		case 'f':
+			force++;
+			break;
+		case 'D':
+			req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(optarg));
+			break;
+		case 'm':
+			req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(optarg));
+			break;
+		case 'M':
+			req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(optarg));
+			break;
+		case 'p':
+			parse_cmdline_pkg(optarg);
+			break;
+		case 'P':
+			req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(optarg));
 			break;
 		case 'r':
-			read_only = 1;
+			/* v1 used -r to specify read-only mode, now the default */
+			break;
+		case 't':
+			turbo_update_value = parse_cmdline_turbo(parse_optarg_string(optarg));
+			break;
+		case 'u':
+			update_hwp_use_pkg++;
+			if (atoi(optarg) == 0)
+				req_update.hwp_use_pkg = 0;
+			else
+				req_update.hwp_use_pkg = 1;
 			break;
 		case 'v':
-			verbose++;
+			print_version();
+			exit(0);
+			break;
+		case 'w':
+			req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(optarg));
 			break;
 		default:
 			usage();
 		}
 	}
-	/* if -r, then should be no additional optind */
-	if (read_only && (argc > optind))
-		usage();
-
 	/*
-	 * if no -r , then must be one additional optind
+	 * v1 allowed "performance"|"normal"|"power" with no policy specifier
+	 * to update BIAS.  Continue to support that, even though no longer documented.
 	 */
-	if (!read_only) {
+	if (argc == optind + 1)
+		new_epb = parse_cmdline_epb(parse_optarg_string(argv[optind]));
 
-		if (argc != optind + 1) {
-			printf("must supply -r or policy param\n");
-			usage();
-			}
-
-		if (!strcmp("performance", argv[optind])) {
-			new_bias = BIAS_PERFORMANCE;
-		} else if (!strcmp("normal", argv[optind])) {
-			new_bias = BIAS_BALANCE;
-		} else if (!strcmp("powersave", argv[optind])) {
-			new_bias = BIAS_POWERSAVE;
-		} else {
-			char *endptr;
-
-			new_bias = strtoull(argv[optind], &endptr, 0);
-			if (endptr == argv[optind] ||
-				new_bias > BIAS_POWERSAVE) {
-					fprintf(stderr, "invalid value: %s\n",
-						argv[optind]);
-				usage();
-			}
-		}
+	if (argc > optind + 1) {
+		fprintf(stderr, "stray parameter '%s'\n", argv[optind + 1]);
+		usage();
 	}
 }
 
+
+int get_msr(int cpu, int offset, unsigned long long *msr)
+{
+	int retval;
+	char pathname[32];
+	int fd;
+
+	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+	fd = open(pathname, O_RDONLY);
+	if (fd < 0)
+		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+
+	retval = pread(fd, msr, sizeof(*msr), offset);
+	if (retval != sizeof(*msr))
+		err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
+
+	if (debug > 1)
+		fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr);
+
+	close(fd);
+	return 0;
+}
+
+int put_msr(int cpu, int offset, unsigned long long new_msr)
+{
+	char pathname[32];
+	int retval;
+	int fd;
+
+	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+	fd = open(pathname, O_RDWR);
+	if (fd < 0)
+		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+
+	retval = pwrite(fd, &new_msr, sizeof(new_msr), offset);
+	if (retval != sizeof(new_msr))
+		err(-2, "pwrite(cpu%d, offset 0x%x, 0x%llx) = %d", cpu, offset, new_msr, retval);
+
+	close(fd);
+
+	if (debug > 1)
+		fprintf(stderr, "put_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, new_msr);
+
+	return 0;
+}
+
+void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str)
+{
+	if (cpu != -1)
+		printf("cpu%d: ", cpu);
+
+	printf("HWP_CAP: low %d eff %d guar %d high %d\n",
+		cap->lowest, cap->efficient, cap->guaranteed, cap->highest);
+}
+void read_hwp_cap(int cpu, struct msr_hwp_cap *cap, unsigned int msr_offset)
+{
+	unsigned long long msr;
+
+	get_msr(cpu, msr_offset, &msr);
+
+	cap->highest = msr_perf_2_ratio(HWP_HIGHEST_PERF(msr));
+	cap->guaranteed = msr_perf_2_ratio(HWP_GUARANTEED_PERF(msr));
+	cap->efficient = msr_perf_2_ratio(HWP_MOSTEFFICIENT_PERF(msr));
+	cap->lowest = msr_perf_2_ratio(HWP_LOWEST_PERF(msr));
+}
+
+void print_hwp_request(int cpu, struct msr_hwp_request *h, char *str)
+{
+	if (cpu != -1)
+		printf("cpu%d: ", cpu);
+
+	if (str)
+		printf("%s", str);
+
+	printf("HWP_REQ: min %d max %d des %d epp %d window 0x%x (%d*10^%dus) use_pkg %d\n",
+		h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
+		h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7, h->hwp_use_pkg);
+}
+void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str)
+{
+	printf("pkg%d: ", pkg);
+
+	if (str)
+		printf("%s", str);
+
+	printf("HWP_REQ_PKG: min %d max %d des %d epp %d window 0x%x (%d*10^%dus)\n",
+		h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
+		h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7);
+}
+void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
+{
+	unsigned long long msr;
+
+	get_msr(cpu, msr_offset, &msr);
+
+	hwp_req->hwp_min = msr_perf_2_ratio((((msr) >> 0) & 0xff));
+	hwp_req->hwp_max = msr_perf_2_ratio((((msr) >> 8) & 0xff));
+	hwp_req->hwp_desired = msr_perf_2_ratio((((msr) >> 16) & 0xff));
+	hwp_req->hwp_epp = (((msr) >> 24) & 0xff);
+	hwp_req->hwp_window = (((msr) >> 32) & 0x3ff);
+	hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1);
+}
+
+void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
+{
+	unsigned long long msr = 0;
+
+	if (debug > 1)
+		printf("cpu%d: requesting min %d max %d des %d epp %d window 0x%0x use_pkg %d\n",
+			cpu, hwp_req->hwp_min, hwp_req->hwp_max,
+			hwp_req->hwp_desired, hwp_req->hwp_epp,
+			hwp_req->hwp_window, hwp_req->hwp_use_pkg);
+
+	msr |= HWP_MIN_PERF(ratio_2_msr_perf(hwp_req->hwp_min));
+	msr |= HWP_MAX_PERF(ratio_2_msr_perf(hwp_req->hwp_max));
+	msr |= HWP_DESIRED_PERF(ratio_2_msr_perf(hwp_req->hwp_desired));
+	msr |= HWP_ENERGY_PERF_PREFERENCE(hwp_req->hwp_epp);
+	msr |= HWP_ACTIVITY_WINDOW(hwp_req->hwp_window);
+	msr |= HWP_PACKAGE_CONTROL(hwp_req->hwp_use_pkg);
+
+	put_msr(cpu, msr_offset, msr);
+}
+
+int print_cpu_msrs(int cpu)
+{
+	unsigned long long msr;
+	struct msr_hwp_request req;
+	struct msr_hwp_cap cap;
+
+	if (has_epb) {
+		get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
+
+		printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr);
+	}
+
+	if (!has_hwp)
+		return 0;
+
+	read_hwp_request(cpu, &req, MSR_HWP_REQUEST);
+	print_hwp_request(cpu, &req, "");
+
+	read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
+	print_hwp_cap(cpu, &cap, "");
+
+	return 0;
+}
+
+int print_pkg_msrs(int pkg)
+{
+	struct msr_hwp_request req;
+	unsigned long long msr;
+
+	if (!has_hwp)
+		return 0;
+
+	read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
+	print_hwp_request_pkg(pkg, &req, "");
+
+	if (has_hwp_notify) {
+		get_msr(first_cpu_in_pkg[pkg], MSR_HWP_INTERRUPT, &msr);
+		fprintf(stderr,
+		"pkg%d: MSR_HWP_INTERRUPT: 0x%08llx (Excursion_Min-%sabled, Guaranteed_Perf_Change-%sabled)\n",
+		pkg, msr,
+		((msr) & 0x2) ? "EN" : "Dis",
+		((msr) & 0x1) ? "EN" : "Dis");
+	}
+	get_msr(first_cpu_in_pkg[pkg], MSR_HWP_STATUS, &msr);
+	fprintf(stderr,
+		"pkg%d: MSR_HWP_STATUS: 0x%08llx (%sExcursion_Min, %sGuaranteed_Perf_Change)\n",
+		pkg, msr,
+		((msr) & 0x4) ? "" : "No-",
+		((msr) & 0x1) ? "" : "No-");
+
+	return 0;
+}
+
 /*
- * validate_cpuid()
- * returns on success, quietly exits on failure (make verbose with -v)
+ * Assumption: All HWP systems have 100 MHz bus clock
  */
-void validate_cpuid(void)
+int ratio_2_sysfs_khz(int ratio)
+{
+	int bclk_khz = 100 * 1000;	/* 100,000 KHz = 100 MHz */
+
+	return ratio * bclk_khz;
+}
+/*
+ * If HWP is enabled and cpufreq sysfs attribtes are present,
+ * then update sysfs, so that it will not become
+ * stale when we write to MSRs.
+ * (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq,
+ *  so we don't have to touch that.)
+ */
+void update_cpufreq_scaling_freq(int is_max, int cpu, unsigned int ratio)
+{
+	char pathname[64];
+	FILE *fp;
+	int retval;
+	int khz;
+
+	sprintf(pathname, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_%s_freq",
+		cpu, is_max ? "max" : "min");
+
+	fp = fopen(pathname, "w");
+	if (!fp) {
+		if (debug)
+			perror(pathname);
+		return;
+	}
+
+	khz = ratio_2_sysfs_khz(ratio);
+	retval = fprintf(fp, "%d", khz);
+	if (retval < 0)
+		if (debug)
+			perror("fprintf");
+	if (debug)
+		printf("echo %d > %s\n", khz, pathname);
+
+	fclose(fp);
+}
+
+/*
+ * We update all sysfs before updating any MSRs because of
+ * bugs in cpufreq/intel_pstate where the sysfs writes
+ * for a CPU may change the min/max values on other CPUS.
+ */
+
+int update_sysfs(int cpu)
+{
+	if (!has_hwp)
+		return 0;
+
+	if (!hwp_update_enabled())
+		return 0;
+
+	if (access("/sys/devices/system/cpu/cpu0/cpufreq", F_OK))
+		return 0;
+
+	if (update_hwp_min)
+		update_cpufreq_scaling_freq(0, cpu, req_update.hwp_min);
+
+	if (update_hwp_max)
+		update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max);
+
+	return 0;
+}
+
+int verify_hwp_req_self_consistency(int cpu, struct msr_hwp_request *req)
+{
+	/* fail if min > max requested */
+	if (req->hwp_min > req->hwp_max) {
+		errx(1, "cpu%d: requested hwp-min %d > hwp_max %d",
+			cpu, req->hwp_min, req->hwp_max);
+	}
+
+	/* fail if desired > max requestd */
+	if (req->hwp_desired && (req->hwp_desired > req->hwp_max)) {
+		errx(1, "cpu%d: requested hwp-desired %d > hwp_max %d",
+			cpu, req->hwp_desired, req->hwp_max);
+	}
+	/* fail if desired < min requestd */
+	if (req->hwp_desired && (req->hwp_desired < req->hwp_min)) {
+		errx(1, "cpu%d: requested hwp-desired %d < requested hwp_min %d",
+			cpu, req->hwp_desired, req->hwp_min);
+	}
+
+	return 0;
+}
+
+int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, struct msr_hwp_cap *cap)
+{
+	if (update_hwp_max) {
+		if (req->hwp_max > cap->highest)
+			errx(1, "cpu%d: requested max %d > capabilities highest %d, use --force?",
+				cpu, req->hwp_max, cap->highest);
+		if (req->hwp_max < cap->lowest)
+			errx(1, "cpu%d: requested max %d < capabilities lowest %d, use --force?",
+				cpu, req->hwp_max, cap->lowest);
+	}
+
+	if (update_hwp_min) {
+		if (req->hwp_min > cap->highest)
+			errx(1, "cpu%d: requested min %d > capabilities highest %d, use --force?",
+				cpu, req->hwp_min, cap->highest);
+		if (req->hwp_min < cap->lowest)
+			errx(1, "cpu%d: requested min %d < capabilities lowest %d, use --force?",
+				cpu, req->hwp_min, cap->lowest);
+	}
+
+	if (update_hwp_min && update_hwp_max && (req->hwp_min > req->hwp_max))
+		errx(1, "cpu%d: requested min %d > requested max %d",
+			cpu, req->hwp_min, req->hwp_max);
+
+	if (update_hwp_desired && req->hwp_desired) {
+		if (req->hwp_desired > req->hwp_max)
+			errx(1, "cpu%d: requested desired %d > requested max %d, use --force?",
+				cpu, req->hwp_desired, req->hwp_max);
+		if (req->hwp_desired < req->hwp_min)
+			errx(1, "cpu%d: requested desired %d < requested min %d, use --force?",
+				cpu, req->hwp_desired, req->hwp_min);
+		if (req->hwp_desired < cap->lowest)
+			errx(1, "cpu%d: requested desired %d < capabilities lowest %d, use --force?",
+				cpu, req->hwp_desired, cap->lowest);
+		if (req->hwp_desired > cap->highest)
+			errx(1, "cpu%d: requested desired %d > capabilities highest %d, use --force?",
+				cpu, req->hwp_desired, cap->highest);
+	}
+
+	return 0;
+}
+
+int update_hwp_request(int cpu)
+{
+	struct msr_hwp_request req;
+	struct msr_hwp_cap cap;
+
+	int msr_offset = MSR_HWP_REQUEST;
+
+	read_hwp_request(cpu, &req, msr_offset);
+	if (debug)
+		print_hwp_request(cpu, &req, "old: ");
+
+	if (update_hwp_min)
+		req.hwp_min = req_update.hwp_min;
+
+	if (update_hwp_max)
+		req.hwp_max = req_update.hwp_max;
+
+	if (update_hwp_desired)
+		req.hwp_desired = req_update.hwp_desired;
+
+	if (update_hwp_window)
+		req.hwp_window = req_update.hwp_window;
+
+	if (update_hwp_epp)
+		req.hwp_epp = req_update.hwp_epp;
+
+	req.hwp_use_pkg = req_update.hwp_use_pkg;
+
+	read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
+	if (debug)
+		print_hwp_cap(cpu, &cap, "");
+
+	if (!force)
+		check_hwp_request_v_hwp_capabilities(cpu, &req, &cap);
+
+	verify_hwp_req_self_consistency(cpu, &req);
+
+	write_hwp_request(cpu, &req, msr_offset);
+
+	if (debug) {
+		read_hwp_request(cpu, &req, msr_offset);
+		print_hwp_request(cpu, &req, "new: ");
+	}
+	return 0;
+}
+int update_hwp_request_pkg(int pkg)
+{
+	struct msr_hwp_request req;
+	struct msr_hwp_cap cap;
+	int cpu = first_cpu_in_pkg[pkg];
+
+	int msr_offset = MSR_HWP_REQUEST_PKG;
+
+	read_hwp_request(cpu, &req, msr_offset);
+	if (debug)
+		print_hwp_request_pkg(pkg, &req, "old: ");
+
+	if (update_hwp_min)
+		req.hwp_min = req_update.hwp_min;
+
+	if (update_hwp_max)
+		req.hwp_max = req_update.hwp_max;
+
+	if (update_hwp_desired)
+		req.hwp_desired = req_update.hwp_desired;
+
+	if (update_hwp_window)
+		req.hwp_window = req_update.hwp_window;
+
+	if (update_hwp_epp)
+		req.hwp_epp = req_update.hwp_epp;
+
+	read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
+	if (debug)
+		print_hwp_cap(cpu, &cap, "");
+
+	if (!force)
+		check_hwp_request_v_hwp_capabilities(cpu, &req, &cap);
+
+	verify_hwp_req_self_consistency(cpu, &req);
+
+	write_hwp_request(cpu, &req, msr_offset);
+
+	if (debug) {
+		read_hwp_request(cpu, &req, msr_offset);
+		print_hwp_request_pkg(pkg, &req, "new: ");
+	}
+	return 0;
+}
+
+int enable_hwp_on_cpu(int cpu)
+{
+	unsigned long long msr;
+
+	get_msr(cpu, MSR_PM_ENABLE, &msr);
+	put_msr(cpu, MSR_PM_ENABLE, 1);
+
+	if (verbose)
+		printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1);
+
+	return 0;
+}
+
+int update_cpu_msrs(int cpu)
+{
+	unsigned long long msr;
+
+
+	if (update_epb) {
+		get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
+		put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb);
+
+		if (verbose)
+			printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
+				cpu, (unsigned int) msr, (unsigned int) new_epb);
+	}
+
+	if (update_turbo) {
+		int turbo_is_present_and_disabled;
+
+		get_msr(cpu, MSR_IA32_MISC_ENABLE, &msr);
+
+		turbo_is_present_and_disabled = ((msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE) != 0);
+
+		if (turbo_update_value == 1)	{
+			if (turbo_is_present_and_disabled) {
+				msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+				put_msr(cpu, MSR_IA32_MISC_ENABLE, msr);
+				if (verbose)
+					printf("cpu%d: turbo ENABLE\n", cpu);
+			}
+		} else {
+			/*
+			 * if "turbo_is_enabled" were known to be describe this cpu
+			 * then we could use it here to skip redundant disable requests.
+			 * but cpu may be in a different package, so we always write.
+			 */
+			msr |= MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+			put_msr(cpu, MSR_IA32_MISC_ENABLE, msr);
+			if (verbose)
+				printf("cpu%d: turbo DISABLE\n", cpu);
+		}
+	}
+
+	if (!has_hwp)
+		return 0;
+
+	if (!hwp_update_enabled())
+		return 0;
+
+	update_hwp_request(cpu);
+	return 0;
+}
+
+/*
+ * Open a file, and exit on failure
+ */
+FILE *fopen_or_die(const char *path, const char *mode)
+{
+	FILE *filep = fopen(path, "r");
+
+	if (!filep)
+		err(1, "%s: open failed", path);
+	return filep;
+}
+
+unsigned int get_pkg_num(int cpu)
+{
+	FILE *fp;
+	char pathname[128];
+	unsigned int pkg;
+	int retval;
+
+	sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
+
+	fp = fopen_or_die(pathname, "r");
+	retval = fscanf(fp, "%d\n", &pkg);
+	if (retval != 1)
+		errx(1, "%s: failed to parse", pathname);
+	return pkg;
+}
+
+int set_max_cpu_pkg_num(int cpu)
+{
+	unsigned int pkg;
+
+	if (max_cpu_num < cpu)
+		max_cpu_num = cpu;
+
+	pkg = get_pkg_num(cpu);
+
+	if (pkg >= MAX_PACKAGES)
+		errx(1, "cpu%d: %d >= MAX_PACKAGES (%d)", cpu, pkg, MAX_PACKAGES);
+
+	if (pkg > max_pkg_num)
+		max_pkg_num = pkg;
+
+	if ((pkg_present_set & (1ULL << pkg)) == 0) {
+		pkg_present_set |= (1ULL << pkg);
+		first_cpu_in_pkg[pkg] = cpu;
+	}
+
+	return 0;
+}
+int mark_cpu_present(int cpu)
+{
+	CPU_SET_S(cpu, cpu_setsize, cpu_present_set);
+	return 0;
+}
+
+/*
+ * run func(cpu) on every cpu in /proc/stat
+ * return max_cpu number
+ */
+int for_all_proc_cpus(int (func)(int))
+{
+	FILE *fp;
+	int cpu_num;
+	int retval;
+
+	fp = fopen_or_die(proc_stat, "r");
+
+	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
+	if (retval != 0)
+		err(1, "%s: failed to parse format", proc_stat);
+
+	while (1) {
+		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
+		if (retval != 1)
+			break;
+
+		retval = func(cpu_num);
+		if (retval) {
+			fclose(fp);
+			return retval;
+		}
+	}
+	fclose(fp);
+	return 0;
+}
+
+void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
+{
+	int cpu_num;
+
+	for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num)
+		if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
+			func(cpu_num);
+}
+
+void init_data_structures(void)
+{
+	for_all_proc_cpus(set_max_cpu_pkg_num);
+
+	cpu_setsize = CPU_ALLOC_SIZE((max_cpu_num + 1));
+
+	cpu_present_set = CPU_ALLOC((max_cpu_num + 1));
+	if (cpu_present_set == NULL)
+		err(3, "CPU_ALLOC");
+	CPU_ZERO_S(cpu_setsize, cpu_present_set);
+	for_all_proc_cpus(mark_cpu_present);
+}
+
+/* clear has_hwp if it is not enable (or being enabled) */
+
+void verify_hwp_is_enabled(void)
+{
+	unsigned long long msr;
+
+	if (!has_hwp)	/* set in early_cpuid() */
+		return;
+
+	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
+	get_msr(base_cpu, MSR_PM_ENABLE, &msr);
+	if ((msr & 1) == 0) {
+		fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n");
+		has_hwp = 0;
+		return;
+	}
+}
+
+int req_update_bounds_check(void)
+{
+	if (!hwp_update_enabled())
+		return 0;
+
+	/* fail if min > max requested */
+	if ((update_hwp_max && update_hwp_min) &&
+	    (req_update.hwp_min > req_update.hwp_max)) {
+		printf("hwp-min %d > hwp_max %d\n", req_update.hwp_min, req_update.hwp_max);
+		return -EINVAL;
+	}
+
+	/* fail if desired > max requestd */
+	if (req_update.hwp_desired && update_hwp_max &&
+	    (req_update.hwp_desired > req_update.hwp_max)) {
+		printf("hwp-desired cannot be greater than hwp_max\n");
+		return -EINVAL;
+	}
+	/* fail if desired < min requestd */
+	if (req_update.hwp_desired && update_hwp_min &&
+	    (req_update.hwp_desired < req_update.hwp_min)) {
+		printf("hwp-desired cannot be less than hwp_min\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void set_base_cpu(void)
+{
+	base_cpu = sched_getcpu();
+	if (base_cpu < 0)
+		err(-ENODEV, "No valid cpus found");
+}
+
+
+void probe_dev_msr(void)
+{
+	struct stat sb;
+	char pathname[32];
+
+	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+	if (stat(pathname, &sb))
+		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
+			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+}
+/*
+ * early_cpuid()
+ * initialize turbo_is_enabled, has_hwp, has_epb
+ * before cmdline is parsed
+ */
+void early_cpuid(void)
+{
+	unsigned int eax, ebx, ecx, edx, max_level;
+	unsigned int fms, family, model;
+
+	__get_cpuid(0, &max_level, &ebx, &ecx, &edx);
+
+	if (max_level < 6)
+		errx(1, "Processor not supported\n");
+
+	__get_cpuid(1, &fms, &ebx, &ecx, &edx);
+	family = (fms >> 8) & 0xf;
+	model = (fms >> 4) & 0xf;
+	if (family == 6 || family == 0xf)
+		model += ((fms >> 16) & 0xf) << 4;
+
+	if (model == 0x4F) {
+		unsigned long long msr;
+
+		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
+
+		bdx_highest_ratio = msr & 0xFF;
+	}
+
+	__get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
+	turbo_is_enabled = (eax >> 1) & 1;
+	has_hwp = (eax >> 7) & 1;
+	has_epb = (ecx >> 3) & 1;
+}
+
+/*
+ * parse_cpuid()
+ * set
+ * has_hwp, has_hwp_notify, has_hwp_activity_window, has_hwp_epp, has_hwp_request_pkg, has_epb
+ */
+void parse_cpuid(void)
 {
 	unsigned int eax, ebx, ecx, edx, max_level;
 	unsigned int fms, family, model, stepping;
 
 	eax = ebx = ecx = edx = 0;
 
-	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx),
-		"=d" (edx) : "a" (0));
+	__get_cpuid(0, &max_level, &ebx, &ecx, &edx);
 
-	if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) {
-		if (verbose)
-			fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel",
-				(char *)&ebx, (char *)&edx, (char *)&ecx);
-		exit(1);
-	}
+	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
+		genuine_intel = 1;
 
-	asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
+	if (debug)
+		fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
+			(char *)&ebx, (char *)&edx, (char *)&ecx);
+
+	__get_cpuid(1, &fms, &ebx, &ecx, &edx);
 	family = (fms >> 8) & 0xf;
 	model = (fms >> 4) & 0xf;
 	stepping = fms & 0xf;
 	if (family == 6 || family == 0xf)
 		model += ((fms >> 16) & 0xf) << 4;
 
-	if (verbose > 1)
-		printf("CPUID %d levels family:model:stepping "
-			"0x%x:%x:%x (%d:%d:%d)\n", max_level,
-			family, model, stepping, family, model, stepping);
-
-	if (!(edx & (1 << 5))) {
-		if (verbose)
-			printf("CPUID: no MSR\n");
-		exit(1);
+	if (debug) {
+		fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+			max_level, family, model, stepping, family, model, stepping);
+		fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n",
+			ecx & (1 << 0) ? "SSE3" : "-",
+			ecx & (1 << 3) ? "MONITOR" : "-",
+			ecx & (1 << 7) ? "EIST" : "-",
+			ecx & (1 << 8) ? "TM2" : "-",
+			edx & (1 << 4) ? "TSC" : "-",
+			edx & (1 << 5) ? "MSR" : "-",
+			edx & (1 << 22) ? "ACPI-TM" : "-",
+			edx & (1 << 29) ? "TM" : "-");
 	}
 
-	/*
-	 * Support for MSR_IA32_ENERGY_PERF_BIAS
-	 * is indicated by CPUID.06H.ECX.bit3
-	 */
-	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
-	if (verbose)
-		printf("CPUID.06H.ECX: 0x%x\n", ecx);
-	if (!(ecx & (1 << 3))) {
-		if (verbose)
-			printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
-		exit(1);
-	}
+	if (!(edx & (1 << 5)))
+		errx(1, "CPUID: no MSR");
+
+
+	__get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
+	/* turbo_is_enabled already set */
+	/* has_hwp already set */
+	has_hwp_notify = eax & (1 << 8);
+	has_hwp_activity_window = eax & (1 << 9);
+	has_hwp_epp = eax & (1 << 10);
+	has_hwp_request_pkg = eax & (1 << 11);
+
+	if (!has_hwp_request_pkg && update_hwp_use_pkg)
+		errx(1, "--hwp-use-pkg is not available on this hardware");
+
+	/* has_epb already set */
+
+	if (debug)
+		fprintf(stderr,
+			"CPUID(6): %sTURBO, %sHWP, %sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
+			turbo_is_enabled ? "" : "No-",
+			has_hwp ? "" : "No-",
+			has_hwp_notify ? "" : "No-",
+			has_hwp_activity_window ? "" : "No-",
+			has_hwp_epp ? "" : "No-",
+			has_hwp_request_pkg ? "" : "No-",
+			has_epb ? "" : "No-");
+
 	return;	/* success */
 }
 
-unsigned long long get_msr(int cpu, int offset)
-{
-	unsigned long long msr;
-	char msr_path[32];
-	int retval;
-	int fd;
-
-	sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
-	fd = open(msr_path, O_RDONLY);
-	if (fd < 0) {
-		printf("Try \"# modprobe msr\"\n");
-		perror(msr_path);
-		exit(1);
-	}
-
-	retval = pread(fd, &msr, sizeof msr, offset);
-
-	if (retval != sizeof msr) {
-		printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
-		exit(-2);
-	}
-	close(fd);
-	return msr;
-}
-
-unsigned long long  put_msr(int cpu, unsigned long long new_msr, int offset)
-{
-	unsigned long long old_msr;
-	char msr_path[32];
-	int retval;
-	int fd;
-
-	sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
-	fd = open(msr_path, O_RDWR);
-	if (fd < 0) {
-		perror(msr_path);
-		exit(1);
-	}
-
-	retval = pread(fd, &old_msr, sizeof old_msr, offset);
-	if (retval != sizeof old_msr) {
-		perror("pwrite");
-		printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
-		exit(-2);
-	}
-
-	retval = pwrite(fd, &new_msr, sizeof new_msr, offset);
-	if (retval != sizeof new_msr) {
-		perror("pwrite");
-		printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
-		exit(-2);
-	}
-
-	close(fd);
-
-	return old_msr;
-}
-
-void print_msr(int cpu)
-{
-	printf("cpu%d: 0x%016llx\n",
-		cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS));
-}
-
-void update_msr(int cpu)
-{
-	unsigned long long previous_msr;
-
-	previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS);
-
-	if (verbose)
-		printf("cpu%d  msr0x%x 0x%016llx -> 0x%016llx\n",
-			cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias);
-
-	return;
-}
-
-char *proc_stat = "/proc/stat";
-/*
- * run func() on every cpu in /dev/cpu
- */
-void for_every_cpu(void (func)(int))
-{
-	FILE *fp;
-	int retval;
-
-	fp = fopen(proc_stat, "r");
-	if (fp == NULL) {
-		perror(proc_stat);
-		exit(1);
-	}
-
-	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
-	if (retval != 0) {
-		perror("/proc/stat format");
-		exit(1);
-	}
-
-	while (1) {
-		int cpu;
-
-		retval = fscanf(fp,
-			"cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
-			&cpu);
-		if (retval != 1)
-			break;
-
-		func(cpu);
-	}
-	fclose(fp);
-}
-
 int main(int argc, char **argv)
 {
+	set_base_cpu();
+	probe_dev_msr();
+	init_data_structures();
+
+	early_cpuid();	/* initial cpuid parse before cmdline */
+
 	cmdline(argc, argv);
 
-	if (verbose > 1)
-		printf("x86_energy_perf_policy Nov 24, 2010"
-				" - Len Brown <lenb@kernel.org>\n");
-	if (verbose > 1 && !read_only)
-		printf("new_bias %lld\n", new_bias);
+	if (debug)
+		print_version();
 
-	validate_cpuid();
+	parse_cpuid();
 
-	if (cpu != -1) {
-		if (read_only)
-			print_msr(cpu);
-		else
-			update_msr(cpu);
-	} else {
-		if (read_only)
-			for_every_cpu(print_msr);
-		else
-			for_every_cpu(update_msr);
+	 /* If CPU-set and PKG-set are not initialized, default to all CPUs */
+	if ((cpu_selected_set == 0) && (pkg_selected_set == 0))
+		cpu_selected_set = cpu_present_set;
+
+	/*
+	 * If HWP is being enabled, do it now, so that subsequent operations
+	 * that access HWP registers can work.
+	 */
+	if (update_hwp_enable)
+		for_all_cpus_in_set(cpu_setsize, cpu_selected_set, enable_hwp_on_cpu);
+
+	/* If HWP present, but disabled, warn and ignore from here forward */
+	verify_hwp_is_enabled();
+
+	if (req_update_bounds_check())
+		return -EINVAL;
+
+	/* display information only, no updates to settings */
+	if (!update_epb && !update_turbo && !hwp_update_enabled()) {
+		if (cpu_selected_set)
+			for_all_cpus_in_set(cpu_setsize, cpu_selected_set, print_cpu_msrs);
+
+		if (has_hwp_request_pkg) {
+			if (pkg_selected_set == 0)
+				pkg_selected_set = pkg_present_set;
+
+			for_packages(pkg_selected_set, print_pkg_msrs);
+		}
+
+		return 0;
 	}
 
+	/* update CPU set */
+	if (cpu_selected_set) {
+		for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs);
+		for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs);
+	} else if (pkg_selected_set)
+		for_packages(pkg_selected_set, update_hwp_request_pkg);
+
 	return 0;
 }