Merge tag 'timers-core-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer updates from Thomas Gleixner:
 "Time, timers and related driver updates:

   - Prevent unnecessary timer softirq invocations by extending the
     tracking of the next expiring timer in the timer wheel beyond the
     existing NOHZ functionality.

     The tracking overhead at enqueue time is within the noise, but on
     sensitive workloads the avoidance of the soft interrupt invocation
     is a measurable improvement.

   - The obligatory new clocksource driver for Ingenic X100 OST

   - The usual fixes, improvements, cleanups and extensions for newer
     chip variants all over the driver space"

* tag 'timers-core-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (28 commits)
  timers: Recalculate next timer interrupt only when necessary
  clocksource/drivers/ingenic: Add support for the Ingenic X1000 OST.
  dt-bindings: timer: Add Ingenic X1000 OST bindings.
  clocksource/drivers: Replace HTTP links with HTTPS ones
  clocksource/drivers/nomadik-mtu: Handle 32kHz clock
  clocksource/drivers/sh_cmt: Use "kHz" for kilohertz
  clocksource/drivers/imx: Add support for i.MX TPM driver with ARM64
  clocksource/drivers/ingenic: Add high resolution timer support for SMP/SMT.
  timers: Lower base clock forwarding threshold
  timers: Remove must_forward_clk
  timers: Spare timer softirq until next expiry
  timers: Expand clk forward logic beyond nohz
  timers: Reuse next expiry cache after nohz exit
  timers: Always keep track of next expiry
  timers: Optimize _next_timer_interrupt() level iteration
  timers: Add comments about calc_index() ceiling work
  timers: Move trigger_dyntick_cpu() to enqueue_timer()
  timers: Use only bucket expiry for base->next_expiry value
  timers: Preserve higher bits of expiration on index calculation
  clocksource/drivers/timer-atmel-tcb: Add sama5d2 support
  ...
diff --git a/Documentation/devicetree/bindings/mfd/atmel-tcb.txt b/Documentation/devicetree/bindings/mfd/atmel-tcb.txt
deleted file mode 100644
index c4a83e3..0000000
--- a/Documentation/devicetree/bindings/mfd/atmel-tcb.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-* Device tree bindings for Atmel Timer Counter Blocks
-- compatible: Should be "atmel,<chip>-tcb", "simple-mfd", "syscon".
-  <chip> can be "at91rm9200" or "at91sam9x5"
-- reg: Should contain registers location and length
-- #address-cells: has to be 1
-- #size-cells: has to be 0
-- interrupts: Should contain all interrupts for the TC block
-  Note that you can specify several interrupt cells if the TC
-  block has one interrupt per channel.
-- clock-names: tuple listing input clock names.
-	Required elements: "t0_clk", "slow_clk"
-	Optional elements: "t1_clk", "t2_clk"
-- clocks: phandles to input clocks.
-
-The TCB can expose multiple subdevices:
- * a timer
-   - compatible: Should be "atmel,tcb-timer"
-   - reg: Should contain the TCB channels to be used. If the
-     counter width is 16 bits (at91rm9200-tcb), two consecutive
-     channels are needed. Else, only one channel will be used.
-
-Examples:
-
-One interrupt per TC block:
-	tcb0: timer@fff7c000 {
-		compatible = "atmel,at91rm9200-tcb", "simple-mfd", "syscon";
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reg = <0xfff7c000 0x100>;
-		interrupts = <18 4>;
-		clocks = <&tcb0_clk>, <&clk32k>;
-		clock-names = "t0_clk", "slow_clk";
-
-		timer@0 {
-			compatible = "atmel,tcb-timer";
-			reg = <0>, <1>;
-		};
-
-		timer@2 {
-			compatible = "atmel,tcb-timer";
-			reg = <2>;
-		};
-	};
-
-One interrupt per TC channel in a TC block:
-	tcb1: timer@fffdc000 {
-		compatible = "atmel,at91rm9200-tcb", "simple-mfd", "syscon";
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reg = <0xfffdc000 0x100>;
-		interrupts = <26 4>, <27 4>, <28 4>;
-		clocks = <&tcb1_clk>, <&clk32k>;
-		clock-names = "t0_clk", "slow_clk";
-	};
-
-
diff --git a/Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml b/Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml
new file mode 100644
index 0000000..d226fd7
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml
@@ -0,0 +1,155 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/soc/microchip/atmel,at91rm9200-tcb.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Atmel Timer Counter Block
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+description: |
+  The Atmel (now Microchip) SoCs have timers named Timer Counter Block. Each
+  timer has three channels with two counters each.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - atmel,at91rm9200-tcb
+          - atmel,at91sam9x5-tcb
+          - atmel,sama5d2-tcb
+      - const: simple-mfd
+      - const: syscon
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    description:
+      List of interrupts. One interrupt per TCB channel if available or one
+      interrupt for the TC block
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    description:
+      List of clock names. Always includes t0_clk and slow clk. Also includes
+      t1_clk and t2_clk if a clock per channel is available.
+    minItems: 2
+    maxItems: 4
+
+  clocks:
+    minItems: 2
+    maxItems: 4
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+patternProperties:
+  "^timer@[0-2]$":
+    description: The timer block channels that are used as timers.
+    type: object
+    properties:
+      compatible:
+        const: atmel,tcb-timer
+      reg:
+        description:
+          List of channels to use for this particular timer.
+        minItems: 1
+        maxItems: 3
+
+    required:
+      - compatible
+      - reg
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: atmel,sama5d2-tcb
+    then:
+      properties:
+        clocks:
+          minItems: 3
+          maxItems: 3
+        clock-names:
+          items:
+            - const: t0_clk
+            - const: gclk
+            - const: slow_clk
+    else:
+      properties:
+        clocks:
+          minItems: 2
+          maxItems: 4
+        clock-names:
+          oneOf:
+            - items:
+              - const: t0_clk
+              - const: slow_clk
+            - items:
+              - const: t0_clk
+              - const: t1_clk
+              - const: t2_clk
+              - const: slow_clk
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - '#address-cells'
+  - '#size-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    /* One interrupt per TC block: */
+        tcb0: timer@fff7c000 {
+                compatible = "atmel,at91rm9200-tcb", "simple-mfd", "syscon";
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <0xfff7c000 0x100>;
+                interrupts = <18 4>;
+                clocks = <&tcb0_clk>, <&clk32k>;
+                clock-names = "t0_clk", "slow_clk";
+
+                timer@0 {
+                        compatible = "atmel,tcb-timer";
+                        reg = <0>, <1>;
+                };
+
+                timer@2 {
+                        compatible = "atmel,tcb-timer";
+                        reg = <2>;
+                };
+        };
+
+    /* One interrupt per TC channel in a TC block: */
+        tcb1: timer@fffdc000 {
+                compatible = "atmel,at91rm9200-tcb", "simple-mfd", "syscon";
+                #address-cells = <1>;
+                #size-cells = <0>;
+                reg = <0xfffdc000 0x100>;
+                interrupts = <26 4>, <27 4>, <28 4>;
+                clocks = <&tcb1_clk>, <&clk32k>;
+                clock-names = "t0_clk", "slow_clk";
+
+                timer@0 {
+                        compatible = "atmel,tcb-timer";
+                        reg = <0>;
+                };
+
+                timer@1 {
+                        compatible = "atmel,tcb-timer";
+                        reg = <1>;
+                };
+        };
diff --git a/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml b/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml
new file mode 100644
index 0000000..df3eb76
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/ingenic,sysost.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bindings for SYSOST in Ingenic XBurst family SoCs
+
+maintainers:
+  - 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+
+description:
+  The SYSOST in an Ingenic SoC provides one 64bit timer for clocksource
+  and one or more 32bit timers for clockevent.
+
+properties:
+  "#clock-cells":
+    const: 1
+
+  compatible:
+    enum:
+      - ingenic,x1000-ost
+      - ingenic,x2000-ost
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: ost
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - "#clock-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/x1000-cgu.h>
+
+    ost: timer@12000000 {
+        compatible = "ingenic,x1000-ost";
+        reg = <0x12000000 0x3c>;
+
+        #clock-cells = <1>;
+
+        clocks = <&cgu X1000_CLK_OST>;
+        clock-names = "ost";
+
+        interrupt-parent = <&cpuintc>;
+        interrupts = <3>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt b/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt
index 5fbe361..d3905a5 100644
--- a/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt
+++ b/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt
@@ -10,7 +10,7 @@
 when the counter reaches preset counter values.
 
 Documentation:
-http://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf
+https://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf
 
 Required properties:
 
diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 31d8766..d7f2570 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -375,23 +375,23 @@ macb0: ethernet@f8008000 {
 			};
 
 			tcb0: timer@f800c000 {
-				compatible = "atmel,at91sam9x5-tcb", "simple-mfd", "syscon";
+				compatible = "atmel,sama5d2-tcb", "simple-mfd", "syscon";
 				#address-cells = <1>;
 				#size-cells = <0>;
 				reg = <0xf800c000 0x100>;
 				interrupts = <35 IRQ_TYPE_LEVEL_HIGH 0>;
-				clocks = <&pmc PMC_TYPE_PERIPHERAL 35>, <&clk32k>;
-				clock-names = "t0_clk", "slow_clk";
+				clocks = <&pmc PMC_TYPE_PERIPHERAL 35>, <&pmc PMC_TYPE_GCK 35>, <&clk32k>;
+				clock-names = "t0_clk", "gclk", "slow_clk";
 			};
 
 			tcb1: timer@f8010000 {
-				compatible = "atmel,at91sam9x5-tcb", "simple-mfd", "syscon";
+				compatible = "atmel,sama5d2-tcb", "simple-mfd", "syscon";
 				#address-cells = <1>;
 				#size-cells = <0>;
 				reg = <0xf8010000 0x100>;
 				interrupts = <36 IRQ_TYPE_LEVEL_HIGH 0>;
-				clocks = <&pmc PMC_TYPE_PERIPHERAL 36>, <&clk32k>;
-				clock-names = "t0_clk", "slow_clk";
+				clocks = <&pmc PMC_TYPE_PERIPHERAL 36>, <&pmc PMC_TYPE_GCK 36>, <&clk32k>;
+				clock-names = "t0_clk", "gclk", "slow_clk";
 			};
 
 			hsmc: hsmc@f8014000 {
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 9141838..2ed8b43 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -616,8 +616,9 @@
 
 config CLKSRC_IMX_TPM
 	bool "Clocksource using i.MX TPM" if COMPILE_TEST
-	depends on ARM && CLKDEV_LOOKUP
+	depends on (ARM || ARM64) && CLKDEV_LOOKUP
 	select CLKSRC_MMIO
+	select TIMER_OF
 	help
 	  Enable this option to use IMX Timer/PWM Module (TPM) timer as
 	  clocksource.
@@ -696,8 +697,18 @@
 	help
 	  Support for the timer/counter unit of the Ingenic JZ SoCs.
 
+config INGENIC_SYSOST
+	bool "Clocksource/timer using the SYSOST in Ingenic X SoCs"
+	depends on MIPS || COMPILE_TEST
+	depends on COMMON_CLK
+	select MFD_SYSCON
+	select TIMER_OF
+	select IRQ_DOMAIN
+	help
+	  Support for the SYSOST of the Ingenic X Series SoCs.
+
 config INGENIC_OST
-	bool "Clocksource for Ingenic OS Timer"
+	bool "Clocksource using the OST in Ingenic JZ SoCs"
 	depends on MIPS || COMPILE_TEST
 	depends on COMMON_CLK
 	select MFD_SYSCON
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index bdda1a2..3994e22 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -82,6 +82,7 @@
 obj-$(CONFIG_H8300_TMR16)		+= h8300_timer16.o
 obj-$(CONFIG_H8300_TPU)			+= h8300_tpu.o
 obj-$(CONFIG_INGENIC_OST)		+= ingenic-ost.o
+obj-$(CONFIG_INGENIC_SYSOST)	+= ingenic-sysost.o
 obj-$(CONFIG_INGENIC_TIMER)		+= ingenic-timer.o
 obj-$(CONFIG_CLKSRC_ST_LPC)		+= clksrc_st_lpc.o
 obj-$(CONFIG_X86_NUMACHIP)		+= numachip.o
diff --git a/drivers/clocksource/ingenic-sysost.c b/drivers/clocksource/ingenic-sysost.c
new file mode 100644
index 0000000..e77d584
--- /dev/null
+++ b/drivers/clocksource/ingenic-sysost.c
@@ -0,0 +1,539 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ingenic XBurst SoCs SYSOST clocks driver
+ * Copyright (c) 2020 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched_clock.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+
+#include <dt-bindings/clock/ingenic,sysost.h>
+
+/* OST register offsets */
+#define OST_REG_OSTCCR			0x00
+#define OST_REG_OSTCR			0x08
+#define OST_REG_OSTFR			0x0c
+#define OST_REG_OSTMR			0x10
+#define OST_REG_OST1DFR			0x14
+#define OST_REG_OST1CNT			0x18
+#define OST_REG_OST2CNTL		0x20
+#define OST_REG_OSTCNT2HBUF		0x24
+#define OST_REG_OSTESR			0x34
+#define OST_REG_OSTECR			0x38
+
+/* bits within the OSTCCR register */
+#define OSTCCR_PRESCALE1_MASK	0x3
+#define OSTCCR_PRESCALE2_MASK	0xc
+#define OSTCCR_PRESCALE1_LSB	0
+#define OSTCCR_PRESCALE2_LSB	2
+
+/* bits within the OSTCR register */
+#define OSTCR_OST1CLR			BIT(0)
+#define OSTCR_OST2CLR			BIT(1)
+
+/* bits within the OSTFR register */
+#define OSTFR_FFLAG				BIT(0)
+
+/* bits within the OSTMR register */
+#define OSTMR_FMASK				BIT(0)
+
+/* bits within the OSTESR register */
+#define OSTESR_OST1ENS			BIT(0)
+#define OSTESR_OST2ENS			BIT(1)
+
+/* bits within the OSTECR register */
+#define OSTECR_OST1ENC			BIT(0)
+#define OSTECR_OST2ENC			BIT(1)
+
+struct ingenic_soc_info {
+	unsigned int num_channels;
+};
+
+struct ingenic_ost_clk_info {
+	struct clk_init_data init_data;
+	u8 ostccr_reg;
+};
+
+struct ingenic_ost_clk {
+	struct clk_hw hw;
+	unsigned int idx;
+	struct ingenic_ost *ost;
+	const struct ingenic_ost_clk_info *info;
+};
+
+struct ingenic_ost {
+	void __iomem *base;
+	const struct ingenic_soc_info *soc_info;
+	struct clk *clk, *percpu_timer_clk, *global_timer_clk;
+	struct clock_event_device cevt;
+	struct clocksource cs;
+	char name[20];
+
+	struct clk_hw_onecell_data *clocks;
+};
+
+static struct ingenic_ost *ingenic_ost;
+
+static inline struct ingenic_ost_clk *to_ost_clk(struct clk_hw *hw)
+{
+	return container_of(hw, struct ingenic_ost_clk, hw);
+}
+
+static unsigned long ingenic_ost_percpu_timer_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct ingenic_ost_clk *ost_clk = to_ost_clk(hw);
+	const struct ingenic_ost_clk_info *info = ost_clk->info;
+	unsigned int prescale;
+
+	prescale = readl(ost_clk->ost->base + info->ostccr_reg);
+
+	prescale = (prescale & OSTCCR_PRESCALE1_MASK) >> OSTCCR_PRESCALE1_LSB;
+
+	return parent_rate >> (prescale * 2);
+}
+
+static unsigned long ingenic_ost_global_timer_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct ingenic_ost_clk *ost_clk = to_ost_clk(hw);
+	const struct ingenic_ost_clk_info *info = ost_clk->info;
+	unsigned int prescale;
+
+	prescale = readl(ost_clk->ost->base + info->ostccr_reg);
+
+	prescale = (prescale & OSTCCR_PRESCALE2_MASK) >> OSTCCR_PRESCALE2_LSB;
+
+	return parent_rate >> (prescale * 2);
+}
+
+static u8 ingenic_ost_get_prescale(unsigned long rate, unsigned long req_rate)
+{
+	u8 prescale;
+
+	for (prescale = 0; prescale < 2; prescale++)
+		if ((rate >> (prescale * 2)) <= req_rate)
+			return prescale;
+
+	return 2; /* /16 divider */
+}
+
+static long ingenic_ost_round_rate(struct clk_hw *hw, unsigned long req_rate,
+		unsigned long *parent_rate)
+{
+	unsigned long rate = *parent_rate;
+	u8 prescale;
+
+	if (req_rate > rate)
+		return rate;
+
+	prescale = ingenic_ost_get_prescale(rate, req_rate);
+
+	return rate >> (prescale * 2);
+}
+
+static int ingenic_ost_percpu_timer_set_rate(struct clk_hw *hw, unsigned long req_rate,
+		unsigned long parent_rate)
+{
+	struct ingenic_ost_clk *ost_clk = to_ost_clk(hw);
+	const struct ingenic_ost_clk_info *info = ost_clk->info;
+	u8 prescale = ingenic_ost_get_prescale(parent_rate, req_rate);
+	int val;
+
+	val = readl(ost_clk->ost->base + info->ostccr_reg);
+	val = (val & ~OSTCCR_PRESCALE1_MASK) | (prescale << OSTCCR_PRESCALE1_LSB);
+	writel(val, ost_clk->ost->base + info->ostccr_reg);
+
+	return 0;
+}
+
+static int ingenic_ost_global_timer_set_rate(struct clk_hw *hw, unsigned long req_rate,
+		unsigned long parent_rate)
+{
+	struct ingenic_ost_clk *ost_clk = to_ost_clk(hw);
+	const struct ingenic_ost_clk_info *info = ost_clk->info;
+	u8 prescale = ingenic_ost_get_prescale(parent_rate, req_rate);
+	int val;
+
+	val = readl(ost_clk->ost->base + info->ostccr_reg);
+	val = (val & ~OSTCCR_PRESCALE2_MASK) | (prescale << OSTCCR_PRESCALE2_LSB);
+	writel(val, ost_clk->ost->base + info->ostccr_reg);
+
+	return 0;
+}
+
+static const struct clk_ops ingenic_ost_percpu_timer_ops = {
+	.recalc_rate	= ingenic_ost_percpu_timer_recalc_rate,
+	.round_rate		= ingenic_ost_round_rate,
+	.set_rate		= ingenic_ost_percpu_timer_set_rate,
+};
+
+static const struct clk_ops ingenic_ost_global_timer_ops = {
+	.recalc_rate	= ingenic_ost_global_timer_recalc_rate,
+	.round_rate		= ingenic_ost_round_rate,
+	.set_rate		= ingenic_ost_global_timer_set_rate,
+};
+
+static const char * const ingenic_ost_clk_parents[] = { "ext" };
+
+static const struct ingenic_ost_clk_info ingenic_ost_clk_info[] = {
+	[OST_CLK_PERCPU_TIMER] = {
+		.init_data = {
+			.name = "percpu timer",
+			.parent_names = ingenic_ost_clk_parents,
+			.num_parents = ARRAY_SIZE(ingenic_ost_clk_parents),
+			.ops = &ingenic_ost_percpu_timer_ops,
+			.flags = CLK_SET_RATE_UNGATE,
+		},
+		.ostccr_reg = OST_REG_OSTCCR,
+	},
+
+	[OST_CLK_GLOBAL_TIMER] = {
+		.init_data = {
+			.name = "global timer",
+			.parent_names = ingenic_ost_clk_parents,
+			.num_parents = ARRAY_SIZE(ingenic_ost_clk_parents),
+			.ops = &ingenic_ost_global_timer_ops,
+			.flags = CLK_SET_RATE_UNGATE,
+		},
+		.ostccr_reg = OST_REG_OSTCCR,
+	},
+};
+
+static u64 notrace ingenic_ost_global_timer_read_cntl(void)
+{
+	struct ingenic_ost *ost = ingenic_ost;
+	unsigned int count;
+
+	count = readl(ost->base + OST_REG_OST2CNTL);
+
+	return count;
+}
+
+static u64 notrace ingenic_ost_clocksource_read(struct clocksource *cs)
+{
+	return ingenic_ost_global_timer_read_cntl();
+}
+
+static inline struct ingenic_ost *to_ingenic_ost(struct clock_event_device *evt)
+{
+	return container_of(evt, struct ingenic_ost, cevt);
+}
+
+static int ingenic_ost_cevt_set_state_shutdown(struct clock_event_device *evt)
+{
+	struct ingenic_ost *ost = to_ingenic_ost(evt);
+
+	writel(OSTECR_OST1ENC, ost->base + OST_REG_OSTECR);
+
+	return 0;
+}
+
+static int ingenic_ost_cevt_set_next(unsigned long next,
+				     struct clock_event_device *evt)
+{
+	struct ingenic_ost *ost = to_ingenic_ost(evt);
+
+	writel((u32)~OSTFR_FFLAG, ost->base + OST_REG_OSTFR);
+	writel(next, ost->base + OST_REG_OST1DFR);
+	writel(OSTCR_OST1CLR, ost->base + OST_REG_OSTCR);
+	writel(OSTESR_OST1ENS, ost->base + OST_REG_OSTESR);
+	writel((u32)~OSTMR_FMASK, ost->base + OST_REG_OSTMR);
+
+	return 0;
+}
+
+static irqreturn_t ingenic_ost_cevt_cb(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+	struct ingenic_ost *ost = to_ingenic_ost(evt);
+
+	writel(OSTECR_OST1ENC, ost->base + OST_REG_OSTECR);
+
+	if (evt->event_handler)
+		evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static int __init ingenic_ost_register_clock(struct ingenic_ost *ost,
+			unsigned int idx, const struct ingenic_ost_clk_info *info,
+			struct clk_hw_onecell_data *clocks)
+{
+	struct ingenic_ost_clk *ost_clk;
+	int val, err;
+
+	ost_clk = kzalloc(sizeof(*ost_clk), GFP_KERNEL);
+	if (!ost_clk)
+		return -ENOMEM;
+
+	ost_clk->hw.init = &info->init_data;
+	ost_clk->idx = idx;
+	ost_clk->info = info;
+	ost_clk->ost = ost;
+
+	/* Reset clock divider */
+	val = readl(ost->base + info->ostccr_reg);
+	val &= ~(OSTCCR_PRESCALE1_MASK | OSTCCR_PRESCALE2_MASK);
+	writel(val, ost->base + info->ostccr_reg);
+
+	err = clk_hw_register(NULL, &ost_clk->hw);
+	if (err) {
+		kfree(ost_clk);
+		return err;
+	}
+
+	clocks->hws[idx] = &ost_clk->hw;
+
+	return 0;
+}
+
+static struct clk * __init ingenic_ost_get_clock(struct device_node *np, int id)
+{
+	struct of_phandle_args args;
+
+	args.np = np;
+	args.args_count = 1;
+	args.args[0] = id;
+
+	return of_clk_get_from_provider(&args);
+}
+
+static int __init ingenic_ost_percpu_timer_init(struct device_node *np,
+					 struct ingenic_ost *ost)
+{
+	unsigned int timer_virq, channel = OST_CLK_PERCPU_TIMER;
+	unsigned long rate;
+	int err;
+
+	ost->percpu_timer_clk = ingenic_ost_get_clock(np, channel);
+	if (IS_ERR(ost->percpu_timer_clk))
+		return PTR_ERR(ost->percpu_timer_clk);
+
+	err = clk_prepare_enable(ost->percpu_timer_clk);
+	if (err)
+		goto err_clk_put;
+
+	rate = clk_get_rate(ost->percpu_timer_clk);
+	if (!rate) {
+		err = -EINVAL;
+		goto err_clk_disable;
+	}
+
+	timer_virq = of_irq_get(np, 0);
+	if (!timer_virq) {
+		err = -EINVAL;
+		goto err_clk_disable;
+	}
+
+	snprintf(ost->name, sizeof(ost->name), "OST percpu timer");
+
+	err = request_irq(timer_virq, ingenic_ost_cevt_cb, IRQF_TIMER,
+			  ost->name, &ost->cevt);
+	if (err)
+		goto err_irq_dispose_mapping;
+
+	ost->cevt.cpumask = cpumask_of(smp_processor_id());
+	ost->cevt.features = CLOCK_EVT_FEAT_ONESHOT;
+	ost->cevt.name = ost->name;
+	ost->cevt.rating = 400;
+	ost->cevt.set_state_shutdown = ingenic_ost_cevt_set_state_shutdown;
+	ost->cevt.set_next_event = ingenic_ost_cevt_set_next;
+
+	clockevents_config_and_register(&ost->cevt, rate, 4, 0xffffffff);
+
+	return 0;
+
+err_irq_dispose_mapping:
+	irq_dispose_mapping(timer_virq);
+err_clk_disable:
+	clk_disable_unprepare(ost->percpu_timer_clk);
+err_clk_put:
+	clk_put(ost->percpu_timer_clk);
+	return err;
+}
+
+static int __init ingenic_ost_global_timer_init(struct device_node *np,
+					       struct ingenic_ost *ost)
+{
+	unsigned int channel = OST_CLK_GLOBAL_TIMER;
+	struct clocksource *cs = &ost->cs;
+	unsigned long rate;
+	int err;
+
+	ost->global_timer_clk = ingenic_ost_get_clock(np, channel);
+	if (IS_ERR(ost->global_timer_clk))
+		return PTR_ERR(ost->global_timer_clk);
+
+	err = clk_prepare_enable(ost->global_timer_clk);
+	if (err)
+		goto err_clk_put;
+
+	rate = clk_get_rate(ost->global_timer_clk);
+	if (!rate) {
+		err = -EINVAL;
+		goto err_clk_disable;
+	}
+
+	/* Clear counter CNT registers */
+	writel(OSTCR_OST2CLR, ost->base + OST_REG_OSTCR);
+
+	/* Enable OST channel */
+	writel(OSTESR_OST2ENS, ost->base + OST_REG_OSTESR);
+
+	cs->name = "ingenic-ost";
+	cs->rating = 400;
+	cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
+	cs->mask = CLOCKSOURCE_MASK(32);
+	cs->read = ingenic_ost_clocksource_read;
+
+	err = clocksource_register_hz(cs, rate);
+	if (err)
+		goto err_clk_disable;
+
+	return 0;
+
+err_clk_disable:
+	clk_disable_unprepare(ost->global_timer_clk);
+err_clk_put:
+	clk_put(ost->global_timer_clk);
+	return err;
+}
+
+static const struct ingenic_soc_info x1000_soc_info = {
+	.num_channels = 2,
+};
+
+static const struct of_device_id __maybe_unused ingenic_ost_of_match[] __initconst = {
+	{ .compatible = "ingenic,x1000-ost", .data = &x1000_soc_info, },
+	{ /* sentinel */ }
+};
+
+static int __init ingenic_ost_probe(struct device_node *np)
+{
+	const struct of_device_id *id = of_match_node(ingenic_ost_of_match, np);
+	struct ingenic_ost *ost;
+	unsigned int i;
+	int ret;
+
+	ost = kzalloc(sizeof(*ost), GFP_KERNEL);
+	if (!ost)
+		return -ENOMEM;
+
+	ost->base = of_io_request_and_map(np, 0, of_node_full_name(np));
+	if (IS_ERR(ost->base)) {
+		pr_err("%s: Failed to map OST registers\n", __func__);
+		ret = PTR_ERR(ost->base);
+		goto err_free_ost;
+	}
+
+	ost->clk = of_clk_get_by_name(np, "ost");
+	if (IS_ERR(ost->clk)) {
+		ret = PTR_ERR(ost->clk);
+		pr_crit("%s: Cannot get OST clock\n", __func__);
+		goto err_free_ost;
+	}
+
+	ret = clk_prepare_enable(ost->clk);
+	if (ret) {
+		pr_crit("%s: Unable to enable OST clock\n", __func__);
+		goto err_put_clk;
+	}
+
+	ost->soc_info = id->data;
+
+	ost->clocks = kzalloc(struct_size(ost->clocks, hws, ost->soc_info->num_channels),
+			      GFP_KERNEL);
+	if (!ost->clocks) {
+		ret = -ENOMEM;
+		goto err_clk_disable;
+	}
+
+	ost->clocks->num = ost->soc_info->num_channels;
+
+	for (i = 0; i < ost->clocks->num; i++) {
+		ret = ingenic_ost_register_clock(ost, i, &ingenic_ost_clk_info[i], ost->clocks);
+		if (ret) {
+			pr_crit("%s: Cannot register clock %d\n", __func__, i);
+			goto err_unregister_ost_clocks;
+		}
+	}
+
+	ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, ost->clocks);
+	if (ret) {
+		pr_crit("%s: Cannot add OF clock provider\n", __func__);
+		goto err_unregister_ost_clocks;
+	}
+
+	ingenic_ost = ost;
+
+	return 0;
+
+err_unregister_ost_clocks:
+	for (i = 0; i < ost->clocks->num; i++)
+		if (ost->clocks->hws[i])
+			clk_hw_unregister(ost->clocks->hws[i]);
+	kfree(ost->clocks);
+err_clk_disable:
+	clk_disable_unprepare(ost->clk);
+err_put_clk:
+	clk_put(ost->clk);
+err_free_ost:
+	kfree(ost);
+	return ret;
+}
+
+static int __init ingenic_ost_init(struct device_node *np)
+{
+	struct ingenic_ost *ost;
+	unsigned long rate;
+	int ret;
+
+	ret = ingenic_ost_probe(np);
+	if (ret) {
+		pr_crit("%s: Failed to initialize OST clocks: %d\n", __func__, ret);
+		return ret;
+	}
+
+	of_node_clear_flag(np, OF_POPULATED);
+
+	ost = ingenic_ost;
+	if (IS_ERR(ost))
+		return PTR_ERR(ost);
+
+	ret = ingenic_ost_global_timer_init(np, ost);
+	if (ret) {
+		pr_crit("%s: Unable to init global timer: %x\n", __func__, ret);
+		goto err_free_ingenic_ost;
+	}
+
+	ret = ingenic_ost_percpu_timer_init(np, ost);
+	if (ret)
+		goto err_ost_global_timer_cleanup;
+
+	/* Register the sched_clock at the end as there's no way to undo it */
+	rate = clk_get_rate(ost->global_timer_clk);
+	sched_clock_register(ingenic_ost_global_timer_read_cntl, 32, rate);
+
+	return 0;
+
+err_ost_global_timer_cleanup:
+	clocksource_unregister(&ost->cs);
+	clk_disable_unprepare(ost->global_timer_clk);
+	clk_put(ost->global_timer_clk);
+err_free_ingenic_ost:
+	kfree(ost);
+	return ret;
+}
+
+TIMER_OF_DECLARE(x1000_ost,  "ingenic,x1000-ost",  ingenic_ost_init);
diff --git a/drivers/clocksource/ingenic-timer.c b/drivers/clocksource/ingenic-timer.c
index 4963336..58fd9189 100644
--- a/drivers/clocksource/ingenic-timer.c
+++ b/drivers/clocksource/ingenic-timer.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * JZ47xx SoCs TCU IRQ driver
+ * Ingenic SoCs TCU IRQ driver
  * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
+ * Copyright (C) 2020 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
  */
 
 #include <linux/bitops.h>
@@ -15,24 +16,35 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/overflow.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/sched_clock.h>
 
 #include <dt-bindings/clock/ingenic,tcu.h>
 
+static DEFINE_PER_CPU(call_single_data_t, ingenic_cevt_csd);
+
 struct ingenic_soc_info {
 	unsigned int num_channels;
 };
 
+struct ingenic_tcu_timer {
+	unsigned int cpu;
+	unsigned int channel;
+	struct clock_event_device cevt;
+	struct clk *clk;
+	char name[8];
+};
+
 struct ingenic_tcu {
 	struct regmap *map;
-	struct clk *timer_clk, *cs_clk;
-	unsigned int timer_channel, cs_channel;
-	struct clock_event_device cevt;
+	struct device_node *np;
+	struct clk *cs_clk;
+	unsigned int cs_channel;
 	struct clocksource cs;
-	char name[4];
 	unsigned long pwm_channels_mask;
+	struct ingenic_tcu_timer timers[];
 };
 
 static struct ingenic_tcu *ingenic_tcu;
@@ -52,16 +64,24 @@ static u64 notrace ingenic_tcu_timer_cs_read(struct clocksource *cs)
 	return ingenic_tcu_timer_read();
 }
 
-static inline struct ingenic_tcu *to_ingenic_tcu(struct clock_event_device *evt)
+static inline struct ingenic_tcu *
+to_ingenic_tcu(struct ingenic_tcu_timer *timer)
 {
-	return container_of(evt, struct ingenic_tcu, cevt);
+	return container_of(timer, struct ingenic_tcu, timers[timer->cpu]);
+}
+
+static inline struct ingenic_tcu_timer *
+to_ingenic_tcu_timer(struct clock_event_device *evt)
+{
+	return container_of(evt, struct ingenic_tcu_timer, cevt);
 }
 
 static int ingenic_tcu_cevt_set_state_shutdown(struct clock_event_device *evt)
 {
-	struct ingenic_tcu *tcu = to_ingenic_tcu(evt);
+	struct ingenic_tcu_timer *timer = to_ingenic_tcu_timer(evt);
+	struct ingenic_tcu *tcu = to_ingenic_tcu(timer);
 
-	regmap_write(tcu->map, TCU_REG_TECR, BIT(tcu->timer_channel));
+	regmap_write(tcu->map, TCU_REG_TECR, BIT(timer->channel));
 
 	return 0;
 }
@@ -69,27 +89,40 @@ static int ingenic_tcu_cevt_set_state_shutdown(struct clock_event_device *evt)
 static int ingenic_tcu_cevt_set_next(unsigned long next,
 				     struct clock_event_device *evt)
 {
-	struct ingenic_tcu *tcu = to_ingenic_tcu(evt);
+	struct ingenic_tcu_timer *timer = to_ingenic_tcu_timer(evt);
+	struct ingenic_tcu *tcu = to_ingenic_tcu(timer);
 
 	if (next > 0xffff)
 		return -EINVAL;
 
-	regmap_write(tcu->map, TCU_REG_TDFRc(tcu->timer_channel), next);
-	regmap_write(tcu->map, TCU_REG_TCNTc(tcu->timer_channel), 0);
-	regmap_write(tcu->map, TCU_REG_TESR, BIT(tcu->timer_channel));
+	regmap_write(tcu->map, TCU_REG_TDFRc(timer->channel), next);
+	regmap_write(tcu->map, TCU_REG_TCNTc(timer->channel), 0);
+	regmap_write(tcu->map, TCU_REG_TESR, BIT(timer->channel));
 
 	return 0;
 }
 
+static void ingenic_per_cpu_event_handler(void *info)
+{
+	struct clock_event_device *cevt = (struct clock_event_device *) info;
+
+	cevt->event_handler(cevt);
+}
+
 static irqreturn_t ingenic_tcu_cevt_cb(int irq, void *dev_id)
 {
-	struct clock_event_device *evt = dev_id;
-	struct ingenic_tcu *tcu = to_ingenic_tcu(evt);
+	struct ingenic_tcu_timer *timer = dev_id;
+	struct ingenic_tcu *tcu = to_ingenic_tcu(timer);
+	call_single_data_t *csd;
 
-	regmap_write(tcu->map, TCU_REG_TECR, BIT(tcu->timer_channel));
+	regmap_write(tcu->map, TCU_REG_TECR, BIT(timer->channel));
 
-	if (evt->event_handler)
-		evt->event_handler(evt);
+	if (timer->cevt.event_handler) {
+		csd = &per_cpu(ingenic_cevt_csd, timer->cpu);
+		csd->info = (void *) &timer->cevt;
+		csd->func = ingenic_per_cpu_event_handler;
+		smp_call_function_single_async(timer->cpu, csd);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -105,64 +138,66 @@ static struct clk * __init ingenic_tcu_get_clock(struct device_node *np, int id)
 	return of_clk_get_from_provider(&args);
 }
 
-static int __init ingenic_tcu_timer_init(struct device_node *np,
-					 struct ingenic_tcu *tcu)
+static int ingenic_tcu_setup_cevt(unsigned int cpu)
 {
-	unsigned int timer_virq, channel = tcu->timer_channel;
+	struct ingenic_tcu *tcu = ingenic_tcu;
+	struct ingenic_tcu_timer *timer = &tcu->timers[cpu];
+	unsigned int timer_virq;
 	struct irq_domain *domain;
 	unsigned long rate;
 	int err;
 
-	tcu->timer_clk = ingenic_tcu_get_clock(np, channel);
-	if (IS_ERR(tcu->timer_clk))
-		return PTR_ERR(tcu->timer_clk);
+	timer->clk = ingenic_tcu_get_clock(tcu->np, timer->channel);
+	if (IS_ERR(timer->clk))
+		return PTR_ERR(timer->clk);
 
-	err = clk_prepare_enable(tcu->timer_clk);
+	err = clk_prepare_enable(timer->clk);
 	if (err)
 		goto err_clk_put;
 
-	rate = clk_get_rate(tcu->timer_clk);
+	rate = clk_get_rate(timer->clk);
 	if (!rate) {
 		err = -EINVAL;
 		goto err_clk_disable;
 	}
 
-	domain = irq_find_host(np);
+	domain = irq_find_host(tcu->np);
 	if (!domain) {
 		err = -ENODEV;
 		goto err_clk_disable;
 	}
 
-	timer_virq = irq_create_mapping(domain, channel);
+	timer_virq = irq_create_mapping(domain, timer->channel);
 	if (!timer_virq) {
 		err = -EINVAL;
 		goto err_clk_disable;
 	}
 
-	snprintf(tcu->name, sizeof(tcu->name), "TCU");
+	snprintf(timer->name, sizeof(timer->name), "TCU%u", timer->channel);
 
 	err = request_irq(timer_virq, ingenic_tcu_cevt_cb, IRQF_TIMER,
-			  tcu->name, &tcu->cevt);
+			  timer->name, timer);
 	if (err)
 		goto err_irq_dispose_mapping;
 
-	tcu->cevt.cpumask = cpumask_of(smp_processor_id());
-	tcu->cevt.features = CLOCK_EVT_FEAT_ONESHOT;
-	tcu->cevt.name = tcu->name;
-	tcu->cevt.rating = 200;
-	tcu->cevt.set_state_shutdown = ingenic_tcu_cevt_set_state_shutdown;
-	tcu->cevt.set_next_event = ingenic_tcu_cevt_set_next;
+	timer->cpu = smp_processor_id();
+	timer->cevt.cpumask = cpumask_of(smp_processor_id());
+	timer->cevt.features = CLOCK_EVT_FEAT_ONESHOT;
+	timer->cevt.name = timer->name;
+	timer->cevt.rating = 200;
+	timer->cevt.set_state_shutdown = ingenic_tcu_cevt_set_state_shutdown;
+	timer->cevt.set_next_event = ingenic_tcu_cevt_set_next;
 
-	clockevents_config_and_register(&tcu->cevt, rate, 10, 0xffff);
+	clockevents_config_and_register(&timer->cevt, rate, 10, 0xffff);
 
 	return 0;
 
 err_irq_dispose_mapping:
 	irq_dispose_mapping(timer_virq);
 err_clk_disable:
-	clk_disable_unprepare(tcu->timer_clk);
+	clk_disable_unprepare(timer->clk);
 err_clk_put:
-	clk_put(tcu->timer_clk);
+	clk_put(timer->clk);
 	return err;
 }
 
@@ -238,10 +273,12 @@ static int __init ingenic_tcu_init(struct device_node *np)
 {
 	const struct of_device_id *id = of_match_node(ingenic_tcu_of_match, np);
 	const struct ingenic_soc_info *soc_info = id->data;
+	struct ingenic_tcu_timer *timer;
 	struct ingenic_tcu *tcu;
 	struct regmap *map;
+	unsigned int cpu;
+	int ret, last_bit = -1;
 	long rate;
-	int ret;
 
 	of_node_clear_flag(np, OF_POPULATED);
 
@@ -249,17 +286,23 @@ static int __init ingenic_tcu_init(struct device_node *np)
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
-	tcu = kzalloc(sizeof(*tcu), GFP_KERNEL);
+	tcu = kzalloc(struct_size(tcu, timers, num_possible_cpus()),
+		      GFP_KERNEL);
 	if (!tcu)
 		return -ENOMEM;
 
-	/* Enable all TCU channels for PWM use by default except channels 0/1 */
-	tcu->pwm_channels_mask = GENMASK(soc_info->num_channels - 1, 2);
+	/*
+	 * Enable all TCU channels for PWM use by default except channels 0/1,
+	 * and channel 2 if target CPU is JZ4780/X2000 and SMP is selected.
+	 */
+	tcu->pwm_channels_mask = GENMASK(soc_info->num_channels - 1,
+					 num_possible_cpus() + 1);
 	of_property_read_u32(np, "ingenic,pwm-channels-mask",
 			     (u32 *)&tcu->pwm_channels_mask);
 
-	/* Verify that we have at least two free channels */
-	if (hweight8(tcu->pwm_channels_mask) > soc_info->num_channels - 2) {
+	/* Verify that we have at least num_possible_cpus() + 1 free channels */
+	if (hweight8(tcu->pwm_channels_mask) >
+			soc_info->num_channels - num_possible_cpus() + 1) {
 		pr_crit("%s: Invalid PWM channel mask: 0x%02lx\n", __func__,
 			tcu->pwm_channels_mask);
 		ret = -EINVAL;
@@ -267,13 +310,22 @@ static int __init ingenic_tcu_init(struct device_node *np)
 	}
 
 	tcu->map = map;
+	tcu->np = np;
 	ingenic_tcu = tcu;
 
-	tcu->timer_channel = find_first_zero_bit(&tcu->pwm_channels_mask,
-						 soc_info->num_channels);
+	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+		timer = &tcu->timers[cpu];
+
+		timer->cpu = cpu;
+		timer->channel = find_next_zero_bit(&tcu->pwm_channels_mask,
+						  soc_info->num_channels,
+						  last_bit + 1);
+		last_bit = timer->channel;
+	}
+
 	tcu->cs_channel = find_next_zero_bit(&tcu->pwm_channels_mask,
 					     soc_info->num_channels,
-					     tcu->timer_channel + 1);
+					     last_bit + 1);
 
 	ret = ingenic_tcu_clocksource_init(np, tcu);
 	if (ret) {
@@ -281,9 +333,13 @@ static int __init ingenic_tcu_init(struct device_node *np)
 		goto err_free_ingenic_tcu;
 	}
 
-	ret = ingenic_tcu_timer_init(np, tcu);
-	if (ret)
+	/* Setup clock events on each CPU core */
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "Ingenic XBurst: online",
+				ingenic_tcu_setup_cevt, NULL);
+	if (ret < 0) {
+		pr_crit("%s: Unable to start CPU timers: %d\n", __func__, ret);
 		goto err_tcu_clocksource_cleanup;
+	}
 
 	/* Register the sched_clock at the end as there's no way to undo it */
 	rate = clk_get_rate(tcu->cs_clk);
@@ -315,28 +371,38 @@ static int __init ingenic_tcu_probe(struct platform_device *pdev)
 static int __maybe_unused ingenic_tcu_suspend(struct device *dev)
 {
 	struct ingenic_tcu *tcu = dev_get_drvdata(dev);
+	unsigned int cpu;
 
 	clk_disable(tcu->cs_clk);
-	clk_disable(tcu->timer_clk);
+
+	for (cpu = 0; cpu < num_online_cpus(); cpu++)
+		clk_disable(tcu->timers[cpu].clk);
+
 	return 0;
 }
 
 static int __maybe_unused ingenic_tcu_resume(struct device *dev)
 {
 	struct ingenic_tcu *tcu = dev_get_drvdata(dev);
+	unsigned int cpu;
 	int ret;
 
-	ret = clk_enable(tcu->timer_clk);
-	if (ret)
-		return ret;
-
-	ret = clk_enable(tcu->cs_clk);
-	if (ret) {
-		clk_disable(tcu->timer_clk);
-		return ret;
+	for (cpu = 0; cpu < num_online_cpus(); cpu++) {
+		ret = clk_enable(tcu->timers[cpu].clk);
+		if (ret)
+			goto err_timer_clk_disable;
 	}
 
+	ret = clk_enable(tcu->cs_clk);
+	if (ret)
+		goto err_timer_clk_disable;
+
 	return 0;
+
+err_timer_clk_disable:
+	for (; cpu > 0; cpu--)
+		clk_disable(tcu->timers[cpu - 1].clk);
+	return ret;
 }
 
 static const struct dev_pm_ops __maybe_unused ingenic_tcu_pm_ops = {
diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c
index f49a631..1cf3304 100644
--- a/drivers/clocksource/nomadik-mtu.c
+++ b/drivers/clocksource/nomadik-mtu.c
@@ -186,6 +186,7 @@ static int __init nmdk_timer_init(void __iomem *base, int irq,
 {
 	unsigned long rate;
 	int ret;
+	int min_ticks;
 
 	mtu_base = base;
 
@@ -194,7 +195,8 @@ static int __init nmdk_timer_init(void __iomem *base, int irq,
 
 	/*
 	 * Tick rate is 2.4MHz for Nomadik and 2.4Mhz, 100MHz or 133 MHz
-	 * for ux500.
+	 * for ux500, and in one specific Ux500 case 32768 Hz.
+	 *
 	 * Use a divide-by-16 counter if the tick rate is more than 32MHz.
 	 * At 32 MHz, the timer (with 32 bit counter) can be programmed
 	 * to wake-up at a max 127s a head in time. Dividing a 2.4 MHz timer
@@ -230,7 +232,12 @@ static int __init nmdk_timer_init(void __iomem *base, int irq,
 		pr_err("%s: request_irq() failed\n", "Nomadik Timer Tick");
 	nmdk_clkevt.cpumask = cpumask_of(0);
 	nmdk_clkevt.irq = irq;
-	clockevents_config_and_register(&nmdk_clkevt, rate, 2, 0xffffffffU);
+	if (rate < 100000)
+		min_ticks = 5;
+	else
+		min_ticks = 2;
+	clockevents_config_and_register(&nmdk_clkevt, rate, min_ticks,
+					0xffffffffU);
 
 	mtu_delay_timer.read_current_timer = &nmdk_timer_read_current_timer;
 	mtu_delay_timer.freq = rate;
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 12ac75f..7607774 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -349,7 +349,7 @@ static int sh_cmt_enable(struct sh_cmt_channel *ch)
 
 	/*
 	 * According to the sh73a0 user's manual, as CMCNT can be operated
-	 * only by the RCLK (Pseudo 32 KHz), there's one restriction on
+	 * only by the RCLK (Pseudo 32 kHz), there's one restriction on
 	 * modifying CMCNT register; two RCLK cycles are necessary before
 	 * this register is either read or any modification of the value
 	 * it holds is reflected in the LSI's actual operation.
diff --git a/drivers/clocksource/timer-atmel-tcb.c b/drivers/clocksource/timer-atmel-tcb.c
index 7427b07..787dbeb 100644
--- a/drivers/clocksource/timer-atmel-tcb.c
+++ b/drivers/clocksource/timer-atmel-tcb.c
@@ -27,9 +27,10 @@
  *   - Some chips support 32 bit counter. A single channel is used for
  *     this 32 bit free-running counter. the second channel is not used.
  *
- *   - The third channel may be used to provide a 16-bit clockevent
- *     source, used in either periodic or oneshot mode.  This runs
- *     at 32 KiHZ, and can handle delays of up to two seconds.
+ *   - The third channel may be used to provide a clockevent source, used in
+ *   either periodic or oneshot mode. For 16-bit counter its runs at 32 KiHZ,
+ *   and can handle delays of up to two seconds. For 32-bit counters, it runs at
+ *   the same rate as the clocksource
  *
  * REVISIT behavior during system suspend states... we should disable
  * all clocks and save the power.  Easily done for clockevent devices,
@@ -47,6 +48,8 @@ static struct
 } tcb_cache[3];
 static u32 bmr_cache;
 
+static const u8 atmel_tcb_divisors[] = { 2, 8, 32, 128 };
+
 static u64 tc_get_cycles(struct clocksource *cs)
 {
 	unsigned long	flags;
@@ -143,6 +146,7 @@ static unsigned long notrace tc_delay_timer_read32(void)
 struct tc_clkevt_device {
 	struct clock_event_device	clkevt;
 	struct clk			*clk;
+	u32				rate;
 	void __iomem			*regs;
 };
 
@@ -151,13 +155,6 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt)
 	return container_of(clkevt, struct tc_clkevt_device, clkevt);
 }
 
-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
- * because using one of the divided clocks would usually mean the
- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
- *
- * A divided clock could be good for high resolution timers, since
- * 30.5 usec resolution can seem "low".
- */
 static u32 timer_clock;
 
 static int tc_shutdown(struct clock_event_device *d)
@@ -183,7 +180,7 @@ static int tc_set_oneshot(struct clock_event_device *d)
 
 	clk_enable(tcd->clk);
 
-	/* slow clock, count up to RC, then irq and stop */
+	/* count up to RC, then irq and stop */
 	writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
 		     ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
 	writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
@@ -205,10 +202,10 @@ static int tc_set_periodic(struct clock_event_device *d)
 	 */
 	clk_enable(tcd->clk);
 
-	/* slow clock, count up to RC, then irq and restart */
+	/* count up to RC, then irq and restart */
 	writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
 		     regs + ATMEL_TC_REG(2, CMR));
-	writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
+	writel((tcd->rate + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
 
 	/* Enable clock and interrupts on RC compare */
 	writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
@@ -256,47 +253,55 @@ static irqreturn_t ch2_irq(int irq, void *handle)
 	return IRQ_NONE;
 }
 
-static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
+static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
 {
 	int ret;
 	struct clk *t2_clk = tc->clk[2];
 	int irq = tc->irq[2];
-
-	ret = clk_prepare_enable(tc->slow_clk);
-	if (ret)
-		return ret;
+	int bits = tc->tcb_config->counter_width;
 
 	/* try to enable t2 clk to avoid future errors in mode change */
 	ret = clk_prepare_enable(t2_clk);
-	if (ret) {
-		clk_disable_unprepare(tc->slow_clk);
+	if (ret)
 		return ret;
-	}
-
-	clk_disable(t2_clk);
 
 	clkevt.regs = tc->regs;
 	clkevt.clk = t2_clk;
 
-	timer_clock = clk32k_divisor_idx;
+	if (bits == 32) {
+		timer_clock = divisor_idx;
+		clkevt.rate = clk_get_rate(t2_clk) / atmel_tcb_divisors[divisor_idx];
+	} else {
+		ret = clk_prepare_enable(tc->slow_clk);
+		if (ret) {
+			clk_disable_unprepare(t2_clk);
+			return ret;
+		}
+
+		clkevt.rate = clk_get_rate(tc->slow_clk);
+		timer_clock = ATMEL_TC_TIMER_CLOCK5;
+	}
+
+	clk_disable(t2_clk);
 
 	clkevt.clkevt.cpumask = cpumask_of(0);
 
 	ret = request_irq(irq, ch2_irq, IRQF_TIMER, "tc_clkevt", &clkevt);
 	if (ret) {
 		clk_unprepare(t2_clk);
-		clk_disable_unprepare(tc->slow_clk);
+		if (bits != 32)
+			clk_disable_unprepare(tc->slow_clk);
 		return ret;
 	}
 
-	clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
+	clockevents_config_and_register(&clkevt.clkevt, clkevt.rate, 1, BIT(bits) - 1);
 
 	return ret;
 }
 
 #else /* !CONFIG_GENERIC_CLOCKEVENTS */
 
-static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
+static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
 {
 	/* NOTHING */
 	return 0;
@@ -346,11 +351,23 @@ static void __init tcb_setup_single_chan(struct atmel_tc *tc, int mck_divisor_id
 	writel(ATMEL_TC_SYNC, tcaddr + ATMEL_TC_BCR);
 }
 
-static const u8 atmel_tcb_divisors[5] = { 2, 8, 32, 128, 0, };
+static struct atmel_tcb_config tcb_rm9200_config = {
+	.counter_width = 16,
+};
+
+static struct atmel_tcb_config tcb_sam9x5_config = {
+	.counter_width = 32,
+};
+
+static struct atmel_tcb_config tcb_sama5d2_config = {
+	.counter_width = 32,
+	.has_gclk = 1,
+};
 
 static const struct of_device_id atmel_tcb_of_match[] = {
-	{ .compatible = "atmel,at91rm9200-tcb", .data = (void *)16, },
-	{ .compatible = "atmel,at91sam9x5-tcb", .data = (void *)32, },
+	{ .compatible = "atmel,at91rm9200-tcb", .data = &tcb_rm9200_config, },
+	{ .compatible = "atmel,at91sam9x5-tcb", .data = &tcb_sam9x5_config, },
+	{ .compatible = "atmel,sama5d2-tcb", .data = &tcb_sama5d2_config, },
 	{ /* sentinel */ }
 };
 
@@ -362,7 +379,6 @@ static int __init tcb_clksrc_init(struct device_node *node)
 	u64 (*tc_sched_clock)(void);
 	u32 rate, divided_rate = 0;
 	int best_divisor_idx = -1;
-	int clk32k_divisor_idx = -1;
 	int bits;
 	int i;
 	int ret;
@@ -399,7 +415,11 @@ static int __init tcb_clksrc_init(struct device_node *node)
 	}
 
 	match = of_match_node(atmel_tcb_of_match, node->parent);
-	bits = (uintptr_t)match->data;
+	if (!match)
+		return -ENODEV;
+
+	tc.tcb_config = match->data;
+	bits = tc.tcb_config->counter_width;
 
 	for (i = 0; i < ARRAY_SIZE(tc.irq); i++)
 		writel(ATMEL_TC_ALL_IRQ, tc.regs + ATMEL_TC_REG(i, IDR));
@@ -412,22 +432,17 @@ static int __init tcb_clksrc_init(struct device_node *node)
 
 	/* How fast will we be counting?  Pick something over 5 MHz.  */
 	rate = (u32) clk_get_rate(t0_clk);
-	for (i = 0; i < ARRAY_SIZE(atmel_tcb_divisors); i++) {
+	i = 0;
+	if (tc.tcb_config->has_gclk)
+		i = 1;
+	for (; i < ARRAY_SIZE(atmel_tcb_divisors); i++) {
 		unsigned divisor = atmel_tcb_divisors[i];
 		unsigned tmp;
 
-		/* remember 32 KiHz clock for later */
-		if (!divisor) {
-			clk32k_divisor_idx = i;
-			continue;
-		}
-
 		tmp = rate / divisor;
 		pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp);
-		if (best_divisor_idx > 0) {
-			if (tmp < 5 * 1000 * 1000)
-				continue;
-		}
+		if ((best_divisor_idx >= 0) && (tmp < 5 * 1000 * 1000))
+			break;
 		divided_rate = tmp;
 		best_divisor_idx = i;
 	}
@@ -467,7 +482,7 @@ static int __init tcb_clksrc_init(struct device_node *node)
 		goto err_disable_t1;
 
 	/* channel 2:  periodic and oneshot timer support */
-	ret = setup_clkevents(&tc, clk32k_divisor_idx);
+	ret = setup_clkevents(&tc, best_divisor_idx);
 	if (ret)
 		goto err_unregister_clksrc;
 
diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index ae12bbf..59b0be4 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -21,7 +21,7 @@
  * Roughly modelled after the OMAP1 MPU timer code.
  * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
  *
- * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com
  */
 
 #include <linux/clk.h>
diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index 60aff08..33eeabf 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -4,7 +4,7 @@
  *
  * OMAP Dual-Mode Timers
  *
- * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2010 Texas Instruments Incorporated - https://www.ti.com/
  * Tarun Kanti DebBarma <tarun.kanti@ti.com>
  * Thara Gopinath <thara@ti.com>
  *
diff --git a/include/dt-bindings/clock/ingenic,sysost.h b/include/dt-bindings/clock/ingenic,sysost.h
new file mode 100644
index 0000000..9ac88e9
--- /dev/null
+++ b/include/dt-bindings/clock/ingenic,sysost.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header provides clock numbers for the ingenic,tcu DT binding.
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_INGENIC_OST_H__
+#define __DT_BINDINGS_CLOCK_INGENIC_OST_H__
+
+#define OST_CLK_PERCPU_TIMER	0
+#define OST_CLK_GLOBAL_TIMER	1
+
+#endif /* __DT_BINDINGS_CLOCK_INGENIC_OST_H__ */
diff --git a/include/soc/at91/atmel_tcb.h b/include/soc/at91/atmel_tcb.h
index c3c7200..1d7071d 100644
--- a/include/soc/at91/atmel_tcb.h
+++ b/include/soc/at91/atmel_tcb.h
@@ -36,9 +36,14 @@ struct clk;
 /**
  * struct atmel_tcb_config - SoC data for a Timer/Counter Block
  * @counter_width: size in bits of a timer counter register
+ * @has_gclk: boolean indicating if a timer counter has a generic clock
+ * @has_qdec: boolean indicating if a timer counter has a quadrature
+ * decoder.
  */
 struct atmel_tcb_config {
 	size_t	counter_width;
+	bool    has_gclk;
+	bool    has_qdec;
 };
 
 /**
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 026ac01..ae5029f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -157,7 +157,8 @@ EXPORT_SYMBOL(jiffies_64);
 
 /*
  * The time start value for each level to select the bucket at enqueue
- * time.
+ * time. We start from the last possible delta of the previous level
+ * so that we can later add an extra LVL_GRAN(n) to n (see calc_index()).
  */
 #define LVL_START(n)	((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
 
@@ -204,8 +205,8 @@ struct timer_base {
 	unsigned long		clk;
 	unsigned long		next_expiry;
 	unsigned int		cpu;
+	bool			next_expiry_recalc;
 	bool			is_idle;
-	bool			must_forward_clk;
 	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
 	struct hlist_head	vectors[WHEEL_SIZE];
 } ____cacheline_aligned;
@@ -488,35 +489,48 @@ static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
  * Helper function to calculate the array index for a given expiry
  * time.
  */
-static inline unsigned calc_index(unsigned expires, unsigned lvl)
+static inline unsigned calc_index(unsigned long expires, unsigned lvl,
+				  unsigned long *bucket_expiry)
 {
+
+	/*
+	 * The timer wheel has to guarantee that a timer does not fire
+	 * early. Early expiry can happen due to:
+	 * - Timer is armed at the edge of a tick
+	 * - Truncation of the expiry time in the outer wheel levels
+	 *
+	 * Round up with level granularity to prevent this.
+	 */
 	expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
+	*bucket_expiry = expires << LVL_SHIFT(lvl);
 	return LVL_OFFS(lvl) + (expires & LVL_MASK);
 }
 
-static int calc_wheel_index(unsigned long expires, unsigned long clk)
+static int calc_wheel_index(unsigned long expires, unsigned long clk,
+			    unsigned long *bucket_expiry)
 {
 	unsigned long delta = expires - clk;
 	unsigned int idx;
 
 	if (delta < LVL_START(1)) {
-		idx = calc_index(expires, 0);
+		idx = calc_index(expires, 0, bucket_expiry);
 	} else if (delta < LVL_START(2)) {
-		idx = calc_index(expires, 1);
+		idx = calc_index(expires, 1, bucket_expiry);
 	} else if (delta < LVL_START(3)) {
-		idx = calc_index(expires, 2);
+		idx = calc_index(expires, 2, bucket_expiry);
 	} else if (delta < LVL_START(4)) {
-		idx = calc_index(expires, 3);
+		idx = calc_index(expires, 3, bucket_expiry);
 	} else if (delta < LVL_START(5)) {
-		idx = calc_index(expires, 4);
+		idx = calc_index(expires, 4, bucket_expiry);
 	} else if (delta < LVL_START(6)) {
-		idx = calc_index(expires, 5);
+		idx = calc_index(expires, 5, bucket_expiry);
 	} else if (delta < LVL_START(7)) {
-		idx = calc_index(expires, 6);
+		idx = calc_index(expires, 6, bucket_expiry);
 	} else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
-		idx = calc_index(expires, 7);
+		idx = calc_index(expires, 7, bucket_expiry);
 	} else if ((long) delta < 0) {
 		idx = clk & LVL_MASK;
+		*bucket_expiry = clk;
 	} else {
 		/*
 		 * Force expire obscene large timeouts to expire at the
@@ -525,34 +539,11 @@ static int calc_wheel_index(unsigned long expires, unsigned long clk)
 		if (delta >= WHEEL_TIMEOUT_CUTOFF)
 			expires = clk + WHEEL_TIMEOUT_MAX;
 
-		idx = calc_index(expires, LVL_DEPTH - 1);
+		idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry);
 	}
 	return idx;
 }
 
-/*
- * Enqueue the timer into the hash bucket, mark it pending in
- * the bitmap and store the index in the timer flags.
- */
-static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
-			  unsigned int idx)
-{
-	hlist_add_head(&timer->entry, base->vectors + idx);
-	__set_bit(idx, base->pending_map);
-	timer_set_idx(timer, idx);
-
-	trace_timer_start(timer, timer->expires, timer->flags);
-}
-
-static void
-__internal_add_timer(struct timer_base *base, struct timer_list *timer)
-{
-	unsigned int idx;
-
-	idx = calc_wheel_index(timer->expires, base->clk);
-	enqueue_timer(base, timer, idx);
-}
-
 static void
 trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
 {
@@ -574,34 +565,48 @@ trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
 	 * timer is not deferrable. If the other CPU is on the way to idle
 	 * then it can't set base->is_idle as we hold the base lock:
 	 */
-	if (!base->is_idle)
-		return;
-
-	/* Check whether this is the new first expiring timer: */
-	if (time_after_eq(timer->expires, base->next_expiry))
-		return;
-
-	/*
-	 * Set the next expiry time and kick the CPU so it can reevaluate the
-	 * wheel:
-	 */
-	if (time_before(timer->expires, base->clk)) {
-		/*
-		 * Prevent from forward_timer_base() moving the base->clk
-		 * backward
-		 */
-		base->next_expiry = base->clk;
-	} else {
-		base->next_expiry = timer->expires;
-	}
-	wake_up_nohz_cpu(base->cpu);
+	if (base->is_idle)
+		wake_up_nohz_cpu(base->cpu);
 }
 
-static void
-internal_add_timer(struct timer_base *base, struct timer_list *timer)
+/*
+ * Enqueue the timer into the hash bucket, mark it pending in
+ * the bitmap, store the index in the timer flags then wake up
+ * the target CPU if needed.
+ */
+static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
+			  unsigned int idx, unsigned long bucket_expiry)
 {
-	__internal_add_timer(base, timer);
-	trigger_dyntick_cpu(base, timer);
+
+	hlist_add_head(&timer->entry, base->vectors + idx);
+	__set_bit(idx, base->pending_map);
+	timer_set_idx(timer, idx);
+
+	trace_timer_start(timer, timer->expires, timer->flags);
+
+	/*
+	 * Check whether this is the new first expiring timer. The
+	 * effective expiry time of the timer is required here
+	 * (bucket_expiry) instead of timer->expires.
+	 */
+	if (time_before(bucket_expiry, base->next_expiry)) {
+		/*
+		 * Set the next expiry time and kick the CPU so it
+		 * can reevaluate the wheel:
+		 */
+		base->next_expiry = bucket_expiry;
+		base->next_expiry_recalc = false;
+		trigger_dyntick_cpu(base, timer);
+	}
+}
+
+static void internal_add_timer(struct timer_base *base, struct timer_list *timer)
+{
+	unsigned long bucket_expiry;
+	unsigned int idx;
+
+	idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);
+	enqueue_timer(base, timer, idx, bucket_expiry);
 }
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
@@ -834,8 +839,10 @@ static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
 	if (!timer_pending(timer))
 		return 0;
 
-	if (hlist_is_singular_node(&timer->entry, base->vectors + idx))
+	if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) {
 		__clear_bit(idx, base->pending_map);
+		base->next_expiry_recalc = true;
+	}
 
 	detach_timer(timer, clear_pending);
 	return 1;
@@ -885,20 +892,14 @@ get_target_base(struct timer_base *base, unsigned tflags)
 
 static inline void forward_timer_base(struct timer_base *base)
 {
-#ifdef CONFIG_NO_HZ_COMMON
-	unsigned long jnow;
+	unsigned long jnow = READ_ONCE(jiffies);
 
 	/*
-	 * We only forward the base when we are idle or have just come out of
-	 * idle (must_forward_clk logic), and have a delta between base clock
-	 * and jiffies. In the common case, run_timers will take care of it.
+	 * No need to forward if we are close enough below jiffies.
+	 * Also while executing timers, base->clk is 1 offset ahead
+	 * of jiffies to avoid endless requeuing to current jffies.
 	 */
-	if (likely(!base->must_forward_clk))
-		return;
-
-	jnow = READ_ONCE(jiffies);
-	base->must_forward_clk = base->is_idle;
-	if ((long)(jnow - base->clk) < 2)
+	if ((long)(jnow - base->clk) < 1)
 		return;
 
 	/*
@@ -912,7 +913,6 @@ static inline void forward_timer_base(struct timer_base *base)
 			return;
 		base->clk = base->next_expiry;
 	}
-#endif
 }
 
 
@@ -960,9 +960,9 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
 static inline int
 __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
 {
+	unsigned long clk = 0, flags, bucket_expiry;
 	struct timer_base *base, *new_base;
 	unsigned int idx = UINT_MAX;
-	unsigned long clk = 0, flags;
 	int ret = 0;
 
 	BUG_ON(!timer->function);
@@ -1001,7 +1001,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
 		}
 
 		clk = base->clk;
-		idx = calc_wheel_index(expires, clk);
+		idx = calc_wheel_index(expires, clk, &bucket_expiry);
 
 		/*
 		 * Retrieve and compare the array index of the pending
@@ -1054,16 +1054,13 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
 	/*
 	 * If 'idx' was calculated above and the base time did not advance
 	 * between calculating 'idx' and possibly switching the base, only
-	 * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
-	 * we need to (re)calculate the wheel index via
-	 * internal_add_timer().
+	 * enqueue_timer() is required. Otherwise we need to (re)calculate
+	 * the wheel index via internal_add_timer().
 	 */
-	if (idx != UINT_MAX && clk == base->clk) {
-		enqueue_timer(base, timer, idx);
-		trigger_dyntick_cpu(base, timer);
-	} else {
+	if (idx != UINT_MAX && clk == base->clk)
+		enqueue_timer(base, timer, idx, bucket_expiry);
+	else
 		internal_add_timer(base, timer);
-	}
 
 out_unlock:
 	raw_spin_unlock_irqrestore(&base->lock, flags);
@@ -1466,10 +1463,10 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 	}
 }
 
-static int __collect_expired_timers(struct timer_base *base,
-				    struct hlist_head *heads)
+static int collect_expired_timers(struct timer_base *base,
+				  struct hlist_head *heads)
 {
-	unsigned long clk = base->clk;
+	unsigned long clk = base->clk = base->next_expiry;
 	struct hlist_head *vec;
 	int i, levels = 0;
 	unsigned int idx;
@@ -1491,7 +1488,6 @@ static int __collect_expired_timers(struct timer_base *base,
 	return levels;
 }
 
-#ifdef CONFIG_NO_HZ_COMMON
 /*
  * Find the next pending bucket of a level. Search from level start (@offset)
  * + @clk upwards and if nothing there, search from start of the level
@@ -1524,6 +1520,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
 	clk = base->clk;
 	for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
 		int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
+		unsigned long lvl_clk = clk & LVL_CLK_MASK;
 
 		if (pos >= 0) {
 			unsigned long tmp = clk + (unsigned long) pos;
@@ -1531,6 +1528,13 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
 			tmp <<= LVL_SHIFT(lvl);
 			if (time_before(tmp, next))
 				next = tmp;
+
+			/*
+			 * If the next expiration happens before we reach
+			 * the next level, no need to check further.
+			 */
+			if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK))
+				break;
 		}
 		/*
 		 * Clock for the next level. If the current level clock lower
@@ -1568,13 +1572,17 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
 		 * So the simple check whether the lower bits of the current
 		 * level are 0 or not is sufficient for all cases.
 		 */
-		adj = clk & LVL_CLK_MASK ? 1 : 0;
+		adj = lvl_clk ? 1 : 0;
 		clk >>= LVL_CLK_SHIFT;
 		clk += adj;
 	}
+
+	base->next_expiry_recalc = false;
+
 	return next;
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * Check, if the next hrtimer event is before the next timer wheel
  * event:
@@ -1631,9 +1639,11 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 		return expires;
 
 	raw_spin_lock(&base->lock);
-	nextevt = __next_timer_interrupt(base);
+	if (base->next_expiry_recalc)
+		base->next_expiry = __next_timer_interrupt(base);
+	nextevt = base->next_expiry;
 	is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
-	base->next_expiry = nextevt;
+
 	/*
 	 * We have a fresh next event. Check whether we can forward the
 	 * base. We can only do that when @basej is past base->clk
@@ -1659,10 +1669,8 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 		 * logic is only maintained for the BASE_STD base, deferrable
 		 * timers may still see large granularity skew (by design).
 		 */
-		if ((expires - basem) > TICK_NSEC) {
-			base->must_forward_clk = true;
+		if ((expires - basem) > TICK_NSEC)
 			base->is_idle = true;
-		}
 	}
 	raw_spin_unlock(&base->lock);
 
@@ -1686,42 +1694,6 @@ void timer_clear_idle(void)
 	 */
 	base->is_idle = false;
 }
-
-static int collect_expired_timers(struct timer_base *base,
-				  struct hlist_head *heads)
-{
-	unsigned long now = READ_ONCE(jiffies);
-
-	/*
-	 * NOHZ optimization. After a long idle sleep we need to forward the
-	 * base to current jiffies. Avoid a loop by searching the bitfield for
-	 * the next expiring timer.
-	 */
-	if ((long)(now - base->clk) > 2) {
-		unsigned long next = __next_timer_interrupt(base);
-
-		/*
-		 * If the next timer is ahead of time forward to current
-		 * jiffies, otherwise forward to the next expiry time:
-		 */
-		if (time_after(next, now)) {
-			/*
-			 * The call site will increment base->clk and then
-			 * terminate the expiry loop immediately.
-			 */
-			base->clk = now;
-			return 0;
-		}
-		base->clk = next;
-	}
-	return __collect_expired_timers(base, heads);
-}
-#else
-static inline int collect_expired_timers(struct timer_base *base,
-					 struct hlist_head *heads)
-{
-	return __collect_expired_timers(base, heads);
-}
 #endif
 
 /*
@@ -1761,32 +1733,23 @@ static inline void __run_timers(struct timer_base *base)
 	struct hlist_head heads[LVL_DEPTH];
 	int levels;
 
-	if (!time_after_eq(jiffies, base->clk))
+	if (time_before(jiffies, base->next_expiry))
 		return;
 
 	timer_base_lock_expiry(base);
 	raw_spin_lock_irq(&base->lock);
 
-	/*
-	 * timer_base::must_forward_clk must be cleared before running
-	 * timers so that any timer functions that call mod_timer() will
-	 * not try to forward the base. Idle tracking / clock forwarding
-	 * logic is only used with BASE_STD timers.
-	 *
-	 * The must_forward_clk flag is cleared unconditionally also for
-	 * the deferrable base. The deferrable base is not affected by idle
-	 * tracking and never forwarded, so clearing the flag is a NOOP.
-	 *
-	 * The fact that the deferrable base is never forwarded can cause
-	 * large variations in granularity for deferrable timers, but they
-	 * can be deferred for long periods due to idle anyway.
-	 */
-	base->must_forward_clk = false;
-
-	while (time_after_eq(jiffies, base->clk)) {
-
+	while (time_after_eq(jiffies, base->clk) &&
+	       time_after_eq(jiffies, base->next_expiry)) {
 		levels = collect_expired_timers(base, heads);
+		/*
+		 * The only possible reason for not finding any expired
+		 * timer at this clk is that all matching timers have been
+		 * dequeued.
+		 */
+		WARN_ON_ONCE(!levels && !base->next_expiry_recalc);
 		base->clk++;
+		base->next_expiry = __next_timer_interrupt(base);
 
 		while (levels--)
 			expire_timers(base, heads + levels);
@@ -1816,12 +1779,12 @@ void run_local_timers(void)
 
 	hrtimer_run_queues();
 	/* Raise the softirq only if required. */
-	if (time_before(jiffies, base->clk)) {
+	if (time_before(jiffies, base->next_expiry)) {
 		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
 			return;
 		/* CPU is awake, so check the deferrable base. */
 		base++;
-		if (time_before(jiffies, base->clk))
+		if (time_before(jiffies, base->next_expiry))
 			return;
 	}
 	raise_softirq(TIMER_SOFTIRQ);
@@ -1986,7 +1949,6 @@ int timers_prepare_cpu(unsigned int cpu)
 		base->clk = jiffies;
 		base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
 		base->is_idle = false;
-		base->must_forward_clk = true;
 	}
 	return 0;
 }
@@ -2039,6 +2001,7 @@ static void __init init_timer_cpu(int cpu)
 		base->cpu = cpu;
 		raw_spin_lock_init(&base->lock);
 		base->clk = jiffies;
+		base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
 		timer_base_init_expiry_lock(base);
 	}
 }