Merge tag 'armsoc-for-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
Pull ARM SoC fixes from Olof Johansson:
"Another week, another small batch of fixes.
Most of these make zynq, socfpga and sunxi platforms work a bit
better:
- due to new requirements for regulators, DWMMC on socfpga broke past
v3.17
- SMP spinup fix for socfpga
- a few DT fixes for zynq
- another option (FIXED_REGULATOR) for sunxi is needed that used to
be selected by other options but no longer is.
- a couple of small DT fixes for at91
- ...and a couple for i.MX"
* tag 'armsoc-for-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc:
ARM: dts: imx28-evk: Let i2c0 run at 100kHz
ARM: i.MX6: Fix "emi" clock name typo
ARM: multi_v7_defconfig: enable CONFIG_MMC_DW_ROCKCHIP
ARM: sunxi_defconfig: enable CONFIG_REGULATOR_FIXED_VOLTAGE
ARM: dts: socfpga: Add a 3.3V fixed regulator node
ARM: dts: socfpga: Fix SD card detect
ARM: dts: socfpga: rename gpio nodes
ARM: at91/dt: sam9263: fix PLLB frequencies
power: reset: at91-reset: fix power down register
MAINTAINERS: add atmel ssc driver maintainer entry
arm: socfpga: fix fetching cpu1start_addr for SMP
ARM: zynq: DT: trivial: Fix mc node
ARM: zynq: DT: Add cadence watchdog node
ARM: zynq: DT: Add missing reference for memory-controller
ARM: zynq: DT: Add missing reference for ADC
ARM: zynq: DT: Add missing address for L2 pl310
ARM: zynq: DT: Remove 222 MHz OPP
ARM: zynq: DT: Fix GEM register area size
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 344e85c..d7273a5 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -17,7 +17,7 @@
the same bits set to 1. TTBRx selection is given by bit 63 of the
virtual address. The swapper_pg_dir contains only kernel (global)
mappings while the user pgd contains only user (non-global) mappings.
-The swapper_pgd_dir address is written to TTBR1 and never written to
+The swapper_pg_dir address is written to TTBR1 and never written to
TTBR0.
diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt
new file mode 100644
index 0000000..1a2cd3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt
@@ -0,0 +1,38 @@
+* Generic Mailbox Controller and client driver bindings
+
+Generic binding to provide a way for Mailbox controller drivers to
+assign appropriate mailbox channel to client drivers.
+
+* Mailbox Controller
+
+Required property:
+- #mbox-cells: Must be at least 1. Number of cells in a mailbox
+ specifier.
+
+Example:
+ mailbox: mailbox {
+ ...
+ #mbox-cells = <1>;
+ };
+
+
+* Mailbox Client
+
+Required property:
+- mboxes: List of phandle and mailbox channel specifiers.
+
+Optional property:
+- mbox-names: List of identifier strings for each mailbox channel
+ required by the client. The use of this property
+ is discouraged in favor of using index in list of
+ 'mboxes' while requesting a mailbox. Instead the
+ platforms may define channel indices, in DT headers,
+ to something legible.
+
+Example:
+ pwr_cntrl: power {
+ ...
+ mbox-names = "pwr-ctrl", "rpc";
+ mboxes = <&mailbox 0
+ &mailbox 1>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
index 0bda229..3899d6a 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
@@ -1,5 +1,20 @@
Freescale FlexTimer Module (FTM) PWM controller
+The same FTM PWM device can have a different endianness on different SoCs. The
+device tree provides a property to describing this so that an operating system
+device driver can handle all variants of the device. Refer to the table below
+for the endianness of the FTM PWM block as integrated into the existing SoCs:
+
+ SoC | FTM-PWM endianness
+ --------+-------------------
+ Vybrid | LE
+ LS1 | BE
+ LS2 | LE
+
+Please see ../regmap/regmap.txt for more detail about how to specify endian
+modes in device tree.
+
+
Required properties:
- compatible: Should be "fsl,vf610-ftm-pwm".
- reg: Physical base address and length of the controller's registers
@@ -16,7 +31,8 @@
- pinctrl-names: Must contain a "default" entry.
- pinctrl-NNN: One property must exist for each entry in pinctrl-names.
See pinctrl/pinctrl-bindings.txt for details of the property values.
-
+- big-endian: Boolean property, required if the FTM PWM registers use a big-
+ endian rather than little-endian layout.
Example:
@@ -32,4 +48,5 @@
<&clks VF610_CLK_FTM0_EXT_FIX_EN>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_pwm0_1>;
+ big-endian;
};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
index d47d15a..b8be3d0 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
@@ -7,8 +7,8 @@
"rockchip,vop-pwm": found integrated in VOP on RK3288 SoC
- reg: physical base address and length of the controller's registers
- clocks: phandle and clock specifier of the PWM reference clock
- - #pwm-cells: should be 2. See pwm.txt in this directory for a
- description of the cell format.
+ - #pwm-cells: must be 2 (rk2928) or 3 (rk3288). See pwm.txt in this directory
+ for a description of the cell format.
Example:
diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt
index 1f0f672..3c67bd5 100644
--- a/Documentation/devicetree/bindings/thermal/imx-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/imx-thermal.txt
@@ -1,7 +1,10 @@
* Temperature Monitor (TEMPMON) on Freescale i.MX SoCs
Required properties:
-- compatible : "fsl,imx6q-thermal"
+- compatible : "fsl,imx6q-tempmon" for i.MX6Q, "fsl,imx6sx-tempmon" for i.MX6SX.
+ i.MX6SX has two more IRQs than i.MX6Q, one is IRQ_LOW and the other is IRQ_PANIC,
+ when temperature is below than low threshold, IRQ_LOW will be triggered, when temperature
+ is higher than panic threshold, system will auto reboot by SRC module.
- fsl,tempmon : phandle pointer to system controller that contains TEMPMON
control registers, e.g. ANATOP on imx6q.
- fsl,tempmon-data : phandle pointer to fuse controller that contains TEMPMON
diff --git a/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt
new file mode 100644
index 0000000..c3a36ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt
@@ -0,0 +1,24 @@
+Zynq Watchdog Device Tree Bindings
+-------------------------------------------
+
+Required properties:
+- compatible : Should be "cdns,wdt-r1p2".
+- clocks : This is pclk (APB clock).
+- interrupts : This is wd_irq - watchdog timeout interrupt.
+- interrupt-parent : Must be core interrupt controller.
+
+Optional properties
+- reset-on-timeout : If this property exists, then a reset is done
+ when watchdog times out.
+- timeout-sec : Watchdog timeout value (in seconds).
+
+Example:
+ watchdog@f8005000 {
+ compatible = "cdns,wdt-r1p2";
+ clocks = <&clkc 45>;
+ interrupt-parent = <&intc>;
+ interrupts = <0 9 1>;
+ reg = <0xf8005000 0x1000>;
+ reset-on-timeout;
+ timeout-sec = <10>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
index e52ba2d..8dab6fd 100644
--- a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
@@ -7,7 +7,8 @@
Optional property:
- big-endian: If present the watchdog device's registers are implemented
- in big endian mode, otherwise in little mode.
+ in big endian mode, otherwise in native mode(same with CPU), for more
+ detail please see: Documentation/devicetree/bindings/regmap/regmap.txt.
Examples:
diff --git a/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt
new file mode 100644
index 0000000..9200fc2
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt
@@ -0,0 +1,13 @@
+Meson SoCs Watchdog timer
+
+Required properties:
+
+- compatible : should be "amlogic,meson6-wdt"
+- reg : Specifies base physical address and size of the registers.
+
+Example:
+
+wdt: watchdog@c1109900 {
+ compatible = "amlogic,meson6-wdt";
+ reg = <0xc1109900 0x8>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt
new file mode 100644
index 0000000..4726924
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt
@@ -0,0 +1,24 @@
+Qualcomm Krait Processor Sub-system (KPSS) Watchdog
+---------------------------------------------------
+
+Required properties :
+- compatible : shall contain only one of the following:
+
+ "qcom,kpss-wdt-msm8960"
+ "qcom,kpss-wdt-apq8064"
+ "qcom,kpss-wdt-ipq8064"
+
+- reg : shall contain base register location and length
+- clocks : shall contain the input clock
+
+Optional properties :
+- timeout-sec : shall contain the default watchdog timeout in seconds,
+ if unset, the default timeout is 30 seconds
+
+Example:
+ watchdog@208a038 {
+ compatible = "qcom,kpss-wdt-ipq8064";
+ reg = <0x0208a038 0x40>;
+ clocks = <&sleep_clk>;
+ timeout-sec = <10>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
index cfff375..8f3d96a 100644
--- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
@@ -9,6 +9,7 @@
(a) "samsung,s3c2410-wdt" for Exynos4 and previous SoCs
(b) "samsung,exynos5250-wdt" for Exynos5250
(c) "samsung,exynos5420-wdt" for Exynos5420
+ (c) "samsung,exynos7-wdt" for Exynos7
- reg : base physical address of the controller and length of memory mapped
region.
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 94d93b1..b30753c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -67,6 +67,7 @@
struct file *, unsigned open_flag,
umode_t create_mode, int *opened);
int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+ int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
locking rules:
all may block
@@ -96,6 +97,7 @@
update_time: no
atomic_open: yes
tmpfile: no
+dentry_open: no
Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
victim.
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
new file mode 100644
index 0000000..530850a
--- /dev/null
+++ b/Documentation/filesystems/overlayfs.txt
@@ -0,0 +1,198 @@
+Written by: Neil Brown <neilb@suse.de>
+
+Overlay Filesystem
+==================
+
+This document describes a prototype for a new approach to providing
+overlay-filesystem functionality in Linux (sometimes referred to as
+union-filesystems). An overlay-filesystem tries to present a
+filesystem which is the result over overlaying one filesystem on top
+of the other.
+
+The result will inevitably fail to look exactly like a normal
+filesystem for various technical reasons. The expectation is that
+many use cases will be able to ignore these differences.
+
+This approach is 'hybrid' because the objects that appear in the
+filesystem do not all appear to belong to that filesystem. In many
+cases an object accessed in the union will be indistinguishable
+from accessing the corresponding object from the original filesystem.
+This is most obvious from the 'st_dev' field returned by stat(2).
+
+While directories will report an st_dev from the overlay-filesystem,
+all non-directory objects will report an st_dev from the lower or
+upper filesystem that is providing the object. Similarly st_ino will
+only be unique when combined with st_dev, and both of these can change
+over the lifetime of a non-directory object. Many applications and
+tools ignore these values and will not be affected.
+
+Upper and Lower
+---------------
+
+An overlay filesystem combines two filesystems - an 'upper' filesystem
+and a 'lower' filesystem. When a name exists in both filesystems, the
+object in the 'upper' filesystem is visible while the object in the
+'lower' filesystem is either hidden or, in the case of directories,
+merged with the 'upper' object.
+
+It would be more correct to refer to an upper and lower 'directory
+tree' rather than 'filesystem' as it is quite possible for both
+directory trees to be in the same filesystem and there is no
+requirement that the root of a filesystem be given for either upper or
+lower.
+
+The lower filesystem can be any filesystem supported by Linux and does
+not need to be writable. The lower filesystem can even be another
+overlayfs. The upper filesystem will normally be writable and if it
+is it must support the creation of trusted.* extended attributes, and
+must provide valid d_type in readdir responses, so NFS is not suitable.
+
+A read-only overlay of two read-only filesystems may use any
+filesystem type.
+
+Directories
+-----------
+
+Overlaying mainly involves directories. If a given name appears in both
+upper and lower filesystems and refers to a non-directory in either,
+then the lower object is hidden - the name refers only to the upper
+object.
+
+Where both upper and lower objects are directories, a merged directory
+is formed.
+
+At mount time, the two directories given as mount options "lowerdir" and
+"upperdir" are combined into a merged directory:
+
+ mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper,\
+workdir=/work /merged
+
+The "workdir" needs to be an empty directory on the same filesystem
+as upperdir.
+
+Then whenever a lookup is requested in such a merged directory, the
+lookup is performed in each actual directory and the combined result
+is cached in the dentry belonging to the overlay filesystem. If both
+actual lookups find directories, both are stored and a merged
+directory is created, otherwise only one is stored: the upper if it
+exists, else the lower.
+
+Only the lists of names from directories are merged. Other content
+such as metadata and extended attributes are reported for the upper
+directory only. These attributes of the lower directory are hidden.
+
+whiteouts and opaque directories
+--------------------------------
+
+In order to support rm and rmdir without changing the lower
+filesystem, an overlay filesystem needs to record in the upper filesystem
+that files have been removed. This is done using whiteouts and opaque
+directories (non-directories are always opaque).
+
+A whiteout is created as a character device with 0/0 device number.
+When a whiteout is found in the upper level of a merged directory, any
+matching name in the lower level is ignored, and the whiteout itself
+is also hidden.
+
+A directory is made opaque by setting the xattr "trusted.overlay.opaque"
+to "y". Where the upper filesystem contains an opaque directory, any
+directory in the lower filesystem with the same name is ignored.
+
+readdir
+-------
+
+When a 'readdir' request is made on a merged directory, the upper and
+lower directories are each read and the name lists merged in the
+obvious way (upper is read first, then lower - entries that already
+exist are not re-added). This merged name list is cached in the
+'struct file' and so remains as long as the file is kept open. If the
+directory is opened and read by two processes at the same time, they
+will each have separate caches. A seekdir to the start of the
+directory (offset 0) followed by a readdir will cause the cache to be
+discarded and rebuilt.
+
+This means that changes to the merged directory do not appear while a
+directory is being read. This is unlikely to be noticed by many
+programs.
+
+seek offsets are assigned sequentially when the directories are read.
+Thus if
+ - read part of a directory
+ - remember an offset, and close the directory
+ - re-open the directory some time later
+ - seek to the remembered offset
+
+there may be little correlation between the old and new locations in
+the list of filenames, particularly if anything has changed in the
+directory.
+
+Readdir on directories that are not merged is simply handled by the
+underlying directory (upper or lower).
+
+
+Non-directories
+---------------
+
+Objects that are not directories (files, symlinks, device-special
+files etc.) are presented either from the upper or lower filesystem as
+appropriate. When a file in the lower filesystem is accessed in a way
+the requires write-access, such as opening for write access, changing
+some metadata etc., the file is first copied from the lower filesystem
+to the upper filesystem (copy_up). Note that creating a hard-link
+also requires copy_up, though of course creation of a symlink does
+not.
+
+The copy_up may turn out to be unnecessary, for example if the file is
+opened for read-write but the data is not modified.
+
+The copy_up process first makes sure that the containing directory
+exists in the upper filesystem - creating it and any parents as
+necessary. It then creates the object with the same metadata (owner,
+mode, mtime, symlink-target etc.) and then if the object is a file, the
+data is copied from the lower to the upper filesystem. Finally any
+extended attributes are copied up.
+
+Once the copy_up is complete, the overlay filesystem simply
+provides direct access to the newly created file in the upper
+filesystem - future operations on the file are barely noticed by the
+overlay filesystem (though an operation on the name of the file such as
+rename or unlink will of course be noticed and handled).
+
+
+Non-standard behavior
+---------------------
+
+The copy_up operation essentially creates a new, identical file and
+moves it over to the old name. The new file may be on a different
+filesystem, so both st_dev and st_ino of the file may change.
+
+Any open files referring to this inode will access the old data and
+metadata. Similarly any file locks obtained before copy_up will not
+apply to the copied up file.
+
+On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and
+fsetxattr(2) will fail with EROFS.
+
+If a file with multiple hard links is copied up, then this will
+"break" the link. Changes will not be propagated to other names
+referring to the same inode.
+
+Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory
+object in overlayfs will not contain valid absolute paths, only
+relative paths leading up to the filesystem's root. This will be
+fixed in the future.
+
+Some operations are not atomic, for example a crash during copy_up or
+rename will leave the filesystem in an inconsistent state. This will
+be addressed in the future.
+
+Changes to underlying filesystems
+---------------------------------
+
+Offline changes, when the overlay is not mounted, are allowed to either
+the upper or the lower trees.
+
+Changes to the underlying filesystems while part of a mounted overlay
+filesystem are not allowed. If the underlying filesystem is changed,
+the behavior of the overlay is undefined, though it will not result in
+a crash or deadlock.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index fceff7c..20bf204 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,6 +364,7 @@
int (*atomic_open)(struct inode *, struct dentry *, struct file *,
unsigned open_flag, umode_t create_mode, int *opened);
int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+ int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
};
Again, all methods are called without any locks being held, unless
@@ -696,6 +697,12 @@
but instead uses bmap to find out where the blocks in the file
are and uses those addresses directly.
+ dentry_open: *WARNING: probably going away soon, do not use!* This is an
+ alternative to f_op->open(), the difference is that this method may open
+ a file not necessarily originating from the same filesystem as the one
+ i_op->open() was called on. It may be useful for stacking filesystems
+ which want to allow native I/O directly on underlying files.
+
invalidatepage: If a page has PagePrivate set, then invalidatepage
will be called when part or all of the page is to be removed
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7dbe5ec..74339c5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1015,10 +1015,14 @@
Format: {"off" | "on" | "skip[mbr]"}
efi= [EFI]
- Format: { "old_map" }
+ Format: { "old_map", "nochunk", "noruntime" }
old_map [X86-64]: switch to the old ioremap-based EFI
runtime services mapping. 32-bit still uses this one by
default.
+ nochunk: disable reading files in "chunks" in the EFI
+ boot stub, as chunking can cause problems with some
+ firmware implementations.
+ noruntime : disable EFI runtime services support
efi_no_storage_paranoia [EFI; X86]
Using this parameter you can use more than 50% of
@@ -2232,7 +2236,7 @@
nodsp [SH] Disable hardware DSP at boot time.
- noefi [X86] Disable EFI runtime services support.
+ noefi Disable EFI runtime services support.
noexec [IA-64]
@@ -3465,6 +3469,12 @@
e.g. base its process migration decisions on it.
Default is on.
+ topology_updates= [KNL, PPC, NUMA]
+ Format: {off}
+ Specify if the kernel should ignore (off)
+ topology updates sent by the hypervisor to this
+ LPAR.
+
tp720= [HW,PS2]
tpm_suspend_pcr=[HW,TPM]
diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt
new file mode 100644
index 0000000..60f43ff
--- /dev/null
+++ b/Documentation/mailbox.txt
@@ -0,0 +1,122 @@
+ The Common Mailbox Framework
+ Jassi Brar <jaswinder.singh@linaro.org>
+
+ This document aims to help developers write client and controller
+drivers for the API. But before we start, let us note that the
+client (especially) and controller drivers are likely going to be
+very platform specific because the remote firmware is likely to be
+proprietary and implement non-standard protocol. So even if two
+platforms employ, say, PL320 controller, the client drivers can't
+be shared across them. Even the PL320 driver might need to accommodate
+some platform specific quirks. So the API is meant mainly to avoid
+similar copies of code written for each platform. Having said that,
+nothing prevents the remote f/w to also be Linux based and use the
+same api there. However none of that helps us locally because we only
+ever deal at client's protocol level.
+ Some of the choices made during implementation are the result of this
+peculiarity of this "common" framework.
+
+
+
+ Part 1 - Controller Driver (See include/linux/mailbox_controller.h)
+
+ Allocate mbox_controller and the array of mbox_chan.
+Populate mbox_chan_ops, except peek_data() all are mandatory.
+The controller driver might know a message has been consumed
+by the remote by getting an IRQ or polling some hardware flag
+or it can never know (the client knows by way of the protocol).
+The method in order of preference is IRQ -> Poll -> None, which
+the controller driver should set via 'txdone_irq' or 'txdone_poll'
+or neither.
+
+
+ Part 2 - Client Driver (See include/linux/mailbox_client.h)
+
+ The client might want to operate in blocking mode (synchronously
+send a message through before returning) or non-blocking/async mode (submit
+a message and a callback function to the API and return immediately).
+
+
+struct demo_client {
+ struct mbox_client cl;
+ struct mbox_chan *mbox;
+ struct completion c;
+ bool async;
+ /* ... */
+};
+
+/*
+ * This is the handler for data received from remote. The behaviour is purely
+ * dependent upon the protocol. This is just an example.
+ */
+static void message_from_remote(struct mbox_client *cl, void *mssg)
+{
+ struct demo_client *dc = container_of(mbox_client,
+ struct demo_client, cl);
+ if (dc->aysnc) {
+ if (is_an_ack(mssg)) {
+ /* An ACK to our last sample sent */
+ return; /* Or do something else here */
+ } else { /* A new message from remote */
+ queue_req(mssg);
+ }
+ } else {
+ /* Remote f/w sends only ACK packets on this channel */
+ return;
+ }
+}
+
+static void sample_sent(struct mbox_client *cl, void *mssg, int r)
+{
+ struct demo_client *dc = container_of(mbox_client,
+ struct demo_client, cl);
+ complete(&dc->c);
+}
+
+static void client_demo(struct platform_device *pdev)
+{
+ struct demo_client *dc_sync, *dc_async;
+ /* The controller already knows async_pkt and sync_pkt */
+ struct async_pkt ap;
+ struct sync_pkt sp;
+
+ dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL);
+ dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL);
+
+ /* Populate non-blocking mode client */
+ dc_async->cl.dev = &pdev->dev;
+ dc_async->cl.rx_callback = message_from_remote;
+ dc_async->cl.tx_done = sample_sent;
+ dc_async->cl.tx_block = false;
+ dc_async->cl.tx_tout = 0; /* doesn't matter here */
+ dc_async->cl.knows_txdone = false; /* depending upon protocol */
+ dc_async->async = true;
+ init_completion(&dc_async->c);
+
+ /* Populate blocking mode client */
+ dc_sync->cl.dev = &pdev->dev;
+ dc_sync->cl.rx_callback = message_from_remote;
+ dc_sync->cl.tx_done = NULL; /* operate in blocking mode */
+ dc_sync->cl.tx_block = true;
+ dc_sync->cl.tx_tout = 500; /* by half a second */
+ dc_sync->cl.knows_txdone = false; /* depending upon protocol */
+ dc_sync->async = false;
+
+ /* ASync mailbox is listed second in 'mboxes' property */
+ dc_async->mbox = mbox_request_channel(&dc_async->cl, 1);
+ /* Populate data packet */
+ /* ap.xxx = 123; etc */
+ /* Send async message to remote */
+ mbox_send_message(dc_async->mbox, &ap);
+
+ /* Sync mailbox is listed first in 'mboxes' property */
+ dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0);
+ /* Populate data packet */
+ /* sp.abc = 123; etc */
+ /* Send message to remote in blocking mode */
+ mbox_send_message(dc_sync->mbox, &sp);
+ /* At this point 'sp' has been sent */
+
+ /* Now wait for async chan to be done */
+ wait_for_completion(&dc_async->c);
+}
diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index a5da5c7..129f7c0 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -5,7 +5,8 @@
one of the parameters.
Two different PM QoS frameworks are available:
-1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput.
+1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput,
+memory_bandwidth.
2. the per-device PM QoS framework provides the API to manage the per-device latency
constraints and PM QoS flags.
@@ -13,6 +14,7 @@
* latency: usec
* timeout: usec
* throughput: kbs (kilo bit / sec)
+ * memory bandwidth: mbs (mega bit / sec)
1. PM QoS framework
diff --git a/Documentation/scsi/osd.txt b/Documentation/scsi/osd.txt
index da162f7..5a9879b 100644
--- a/Documentation/scsi/osd.txt
+++ b/Documentation/scsi/osd.txt
@@ -184,8 +184,7 @@
More up-to-date information can be found on:
http://open-osd.org
-Boaz Harrosh <bharrosh@panasas.com>
-Benny Halevy <bhalevy@panasas.com>
+Boaz Harrosh <ooo@electrozaur.com>
References
==========
diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt
new file mode 100644
index 0000000..5518465
--- /dev/null
+++ b/Documentation/target/tcmu-design.txt
@@ -0,0 +1,378 @@
+Contents:
+
+1) TCM Userspace Design
+ a) Background
+ b) Benefits
+ c) Design constraints
+ d) Implementation overview
+ i. Mailbox
+ ii. Command ring
+ iii. Data Area
+ e) Device discovery
+ f) Device events
+ g) Other contingencies
+2) Writing a user pass-through handler
+ a) Discovering and configuring TCMU uio devices
+ b) Waiting for events on the device(s)
+ c) Managing the command ring
+3) Command filtering and pass_level
+4) A final note
+
+
+TCM Userspace Design
+--------------------
+
+TCM is another name for LIO, an in-kernel iSCSI target (server).
+Existing TCM targets run in the kernel. TCMU (TCM in Userspace)
+allows userspace programs to be written which act as iSCSI targets.
+This document describes the design.
+
+The existing kernel provides modules for different SCSI transport
+protocols. TCM also modularizes the data storage. There are existing
+modules for file, block device, RAM or using another SCSI device as
+storage. These are called "backstores" or "storage engines". These
+built-in modules are implemented entirely as kernel code.
+
+Background:
+
+In addition to modularizing the transport protocol used for carrying
+SCSI commands ("fabrics"), the Linux kernel target, LIO, also modularizes
+the actual data storage as well. These are referred to as "backstores"
+or "storage engines". The target comes with backstores that allow a
+file, a block device, RAM, or another SCSI device to be used for the
+local storage needed for the exported SCSI LUN. Like the rest of LIO,
+these are implemented entirely as kernel code.
+
+These backstores cover the most common use cases, but not all. One new
+use case that other non-kernel target solutions, such as tgt, are able
+to support is using Gluster's GLFS or Ceph's RBD as a backstore. The
+target then serves as a translator, allowing initiators to store data
+in these non-traditional networked storage systems, while still only
+using standard protocols themselves.
+
+If the target is a userspace process, supporting these is easy. tgt,
+for example, needs only a small adapter module for each, because the
+modules just use the available userspace libraries for RBD and GLFS.
+
+Adding support for these backstores in LIO is considerably more
+difficult, because LIO is entirely kernel code. Instead of undertaking
+the significant work to port the GLFS or RBD APIs and protocols to the
+kernel, another approach is to create a userspace pass-through
+backstore for LIO, "TCMU".
+
+
+Benefits:
+
+In addition to allowing relatively easy support for RBD and GLFS, TCMU
+will also allow easier development of new backstores. TCMU combines
+with the LIO loopback fabric to become something similar to FUSE
+(Filesystem in Userspace), but at the SCSI layer instead of the
+filesystem layer. A SUSE, if you will.
+
+The disadvantage is there are more distinct components to configure, and
+potentially to malfunction. This is unavoidable, but hopefully not
+fatal if we're careful to keep things as simple as possible.
+
+Design constraints:
+
+- Good performance: high throughput, low latency
+- Cleanly handle if userspace:
+ 1) never attaches
+ 2) hangs
+ 3) dies
+ 4) misbehaves
+- Allow future flexibility in user & kernel implementations
+- Be reasonably memory-efficient
+- Simple to configure & run
+- Simple to write a userspace backend
+
+
+Implementation overview:
+
+The core of the TCMU interface is a memory region that is shared
+between kernel and userspace. Within this region is: a control area
+(mailbox); a lockless producer/consumer circular buffer for commands
+to be passed up, and status returned; and an in/out data buffer area.
+
+TCMU uses the pre-existing UIO subsystem. UIO allows device driver
+development in userspace, and this is conceptually very close to the
+TCMU use case, except instead of a physical device, TCMU implements a
+memory-mapped layout designed for SCSI commands. Using UIO also
+benefits TCMU by handling device introspection (e.g. a way for
+userspace to determine how large the shared region is) and signaling
+mechanisms in both directions.
+
+There are no embedded pointers in the memory region. Everything is
+expressed as an offset from the region's starting address. This allows
+the ring to still work if the user process dies and is restarted with
+the region mapped at a different virtual address.
+
+See target_core_user.h for the struct definitions.
+
+The Mailbox:
+
+The mailbox is always at the start of the shared memory region, and
+contains a version, details about the starting offset and size of the
+command ring, and head and tail pointers to be used by the kernel and
+userspace (respectively) to put commands on the ring, and indicate
+when the commands are completed.
+
+version - 1 (userspace should abort if otherwise)
+flags - none yet defined.
+cmdr_off - The offset of the start of the command ring from the start
+of the memory region, to account for the mailbox size.
+cmdr_size - The size of the command ring. This does *not* need to be a
+power of two.
+cmd_head - Modified by the kernel to indicate when a command has been
+placed on the ring.
+cmd_tail - Modified by userspace to indicate when it has completed
+processing of a command.
+
+The Command Ring:
+
+Commands are placed on the ring by the kernel incrementing
+mailbox.cmd_head by the size of the command, modulo cmdr_size, and
+then signaling userspace via uio_event_notify(). Once the command is
+completed, userspace updates mailbox.cmd_tail in the same way and
+signals the kernel via a 4-byte write(). When cmd_head equals
+cmd_tail, the ring is empty -- no commands are currently waiting to be
+processed by userspace.
+
+TCMU commands start with a common header containing "len_op", a 32-bit
+value that stores the length, as well as the opcode in the lowest
+unused bits. Currently only two opcodes are defined, TCMU_OP_PAD and
+TCMU_OP_CMD. When userspace encounters a command with PAD opcode, it
+should skip ahead by the bytes in "length". (The kernel inserts PAD
+entries to ensure each CMD entry fits contigously into the circular
+buffer.)
+
+When userspace handles a CMD, it finds the SCSI CDB (Command Data
+Block) via tcmu_cmd_entry.req.cdb_off. This is an offset from the
+start of the overall shared memory region, not the entry. The data
+in/out buffers are accessible via tht req.iov[] array. Note that
+each iov.iov_base is also an offset from the start of the region.
+
+TCMU currently does not support BIDI operations.
+
+When completing a command, userspace sets rsp.scsi_status, and
+rsp.sense_buffer if necessary. Userspace then increments
+mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the
+kernel via the UIO method, a 4-byte write to the file descriptor.
+
+The Data Area:
+
+This is shared-memory space after the command ring. The organization
+of this area is not defined in the TCMU interface, and userspace
+should access only the parts referenced by pending iovs.
+
+
+Device Discovery:
+
+Other devices may be using UIO besides TCMU. Unrelated user processes
+may also be handling different sets of TCMU devices. TCMU userspace
+processes must find their devices by scanning sysfs
+class/uio/uio*/name. For TCMU devices, these names will be of the
+format:
+
+tcm-user/<hba_num>/<device_name>/<subtype>/<path>
+
+where "tcm-user" is common for all TCMU-backed UIO devices. <hba_num>
+and <device_name> allow userspace to find the device's path in the
+kernel target's configfs tree. Assuming the usual mount point, it is
+found at:
+
+/sys/kernel/config/target/core/user_<hba_num>/<device_name>
+
+This location contains attributes such as "hw_block_size", that
+userspace needs to know for correct operation.
+
+<subtype> will be a userspace-process-unique string to identify the
+TCMU device as expecting to be backed by a certain handler, and <path>
+will be an additional handler-specific string for the user process to
+configure the device, if needed. The name cannot contain ':', due to
+LIO limitations.
+
+For all devices so discovered, the user handler opens /dev/uioX and
+calls mmap():
+
+mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)
+
+where size must be equal to the value read from
+/sys/class/uio/uioX/maps/map0/size.
+
+
+Device Events:
+
+If a new device is added or removed, a notification will be broadcast
+over netlink, using a generic netlink family name of "TCM-USER" and a
+multicast group named "config". This will include the UIO name as
+described in the previous section, as well as the UIO minor
+number. This should allow userspace to identify both the UIO device and
+the LIO device, so that after determining the device is supported
+(based on subtype) it can take the appropriate action.
+
+
+Other contingencies:
+
+Userspace handler process never attaches:
+
+- TCMU will post commands, and then abort them after a timeout period
+ (30 seconds.)
+
+Userspace handler process is killed:
+
+- It is still possible to restart and re-connect to TCMU
+ devices. Command ring is preserved. However, after the timeout period,
+ the kernel will abort pending tasks.
+
+Userspace handler process hangs:
+
+- The kernel will abort pending tasks after a timeout period.
+
+Userspace handler process is malicious:
+
+- The process can trivially break the handling of devices it controls,
+ but should not be able to access kernel memory outside its shared
+ memory areas.
+
+
+Writing a user pass-through handler (with example code)
+-------------------------------------------------------
+
+A user process handing a TCMU device must support the following:
+
+a) Discovering and configuring TCMU uio devices
+b) Waiting for events on the device(s)
+c) Managing the command ring: Parsing operations and commands,
+ performing work as needed, setting response fields (scsi_status and
+ possibly sense_buffer), updating cmd_tail, and notifying the kernel
+ that work has been finished
+
+First, consider instead writing a plugin for tcmu-runner. tcmu-runner
+implements all of this, and provides a higher-level API for plugin
+authors.
+
+TCMU is designed so that multiple unrelated processes can manage TCMU
+devices separately. All handlers should make sure to only open their
+devices, based opon a known subtype string.
+
+a) Discovering and configuring TCMU UIO devices:
+
+(error checking omitted for brevity)
+
+int fd, dev_fd;
+char buf[256];
+unsigned long long map_len;
+void *map;
+
+fd = open("/sys/class/uio/uio0/name", O_RDONLY);
+ret = read(fd, buf, sizeof(buf));
+close(fd);
+buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
+
+/* we only want uio devices whose name is a format we expect */
+if (strncmp(buf, "tcm-user", 8))
+ exit(-1);
+
+/* Further checking for subtype also needed here */
+
+fd = open(/sys/class/uio/%s/maps/map0/size, O_RDONLY);
+ret = read(fd, buf, sizeof(buf));
+close(fd);
+str_buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
+
+map_len = strtoull(buf, NULL, 0);
+
+dev_fd = open("/dev/uio0", O_RDWR);
+map = mmap(NULL, map_len, PROT_READ|PROT_WRITE, MAP_SHARED, dev_fd, 0);
+
+
+b) Waiting for events on the device(s)
+
+while (1) {
+ char buf[4];
+
+ int ret = read(dev_fd, buf, 4); /* will block */
+
+ handle_device_events(dev_fd, map);
+}
+
+
+c) Managing the command ring
+
+#include <linux/target_core_user.h>
+
+int handle_device_events(int fd, void *map)
+{
+ struct tcmu_mailbox *mb = map;
+ struct tcmu_cmd_entry *ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
+ int did_some_work = 0;
+
+ /* Process events from cmd ring until we catch up with cmd_head */
+ while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) {
+
+ if (tcmu_hdr_get_op(&ent->hdr) == TCMU_OP_CMD) {
+ uint8_t *cdb = (void *)mb + ent->req.cdb_off;
+ bool success = true;
+
+ /* Handle command here. */
+ printf("SCSI opcode: 0x%x\n", cdb[0]);
+
+ /* Set response fields */
+ if (success)
+ ent->rsp.scsi_status = SCSI_NO_SENSE;
+ else {
+ /* Also fill in rsp->sense_buffer here */
+ ent->rsp.scsi_status = SCSI_CHECK_CONDITION;
+ }
+ }
+ else {
+ /* Do nothing for PAD entries */
+ }
+
+ /* update cmd_tail */
+ mb->cmd_tail = (mb->cmd_tail + tcmu_hdr_get_len(&ent->hdr)) % mb->cmdr_size;
+ ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
+ did_some_work = 1;
+ }
+
+ /* Notify the kernel that work has been finished */
+ if (did_some_work) {
+ uint32_t buf = 0;
+
+ write(fd, &buf, 4);
+ }
+
+ return 0;
+}
+
+
+Command filtering and pass_level
+--------------------------------
+
+TCMU supports a "pass_level" option with valid values of 0 or 1. When
+the value is 0 (the default), nearly all SCSI commands received for
+the device are passed through to the handler. This allows maximum
+flexibility but increases the amount of code required by the handler,
+to support all mandatory SCSI commands. If pass_level is set to 1,
+then only IO-related commands are presented, and the rest are handled
+by LIO's in-kernel command emulation. The commands presented at level
+1 include all versions of:
+
+READ
+WRITE
+WRITE_VERIFY
+XDWRITEREAD
+WRITE_SAME
+COMPARE_AND_WRITE
+SYNCHRONIZE_CACHE
+UNMAP
+
+
+A final note
+------------
+
+Please be careful to return codes as defined by the SCSI
+specifications. These are different than some values defined in the
+scsi/scsi.h include file. For example, CHECK CONDITION's status code
+is 2, not 1.
diff --git a/MAINTAINERS b/MAINTAINERS
index 71fdbd4..dab92a7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5841,6 +5841,14 @@
F: drivers/net/macvlan.c
F: include/linux/if_macvlan.h
+MAILBOX API
+M: Jassi Brar <jassisinghbrar@gmail.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: drivers/mailbox/
+F: include/linux/mailbox_client.h
+F: include/linux/mailbox_controller.h
+
MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
M: Michael Kerrisk <mtk.manpages@gmail.com>
W: http://www.kernel.org/doc/man-pages
@@ -6829,7 +6837,7 @@
F: drivers/net/wireless/orinoco/
OSD LIBRARY and FILESYSTEM
-M: Boaz Harrosh <bharrosh@panasas.com>
+M: Boaz Harrosh <ooo@electrozaur.com>
M: Benny Halevy <bhalevy@primarydata.com>
L: osd-dev@open-osd.org
W: http://open-osd.org
@@ -6839,6 +6847,13 @@
F: include/scsi/osd_*
F: fs/exofs/
+OVERLAYFS FILESYSTEM
+M: Miklos Szeredi <miklos@szeredi.hu>
+L: linux-fsdevel@vger.kernel.org
+S: Supported
+F: fs/overlayfs/*
+F: Documentation/filesystems/overlayfs.txt
+
P54 WIRELESS DRIVER
M: Christian Lamparter <chunkeey@googlemail.com>
L: linux-wireless@vger.kernel.org
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 9596b0ab..fe44b24 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -9,6 +9,7 @@
config ARC
def_bool y
select BUILDTIME_EXTABLE_SORT
+ select COMMON_CLK
select CLONE_BACKWARDS
# ARC Busybox based initramfs absolutely relies on DEVTMPFS for /dev
select DEVTMPFS if !INITRAMFS_SOURCE=""
@@ -73,9 +74,6 @@
config HAVE_LATENCYTOP_SUPPORT
def_bool y
-config NO_DMA
- def_bool n
-
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -354,7 +352,7 @@
kernel mode. This saves memory access for each such access
-config ARC_MISALIGN_ACCESS
+config ARC_EMUL_UNALIGNED
bool "Emulate unaligned memory access (userspace only)"
select SYSCTL_ARCH_UNALIGN_NO_WARN
select SYSCTL_ARCH_UNALIGN_ALLOW
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 8c0b1aa..10bc3d4 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -25,7 +25,6 @@
LINUXINCLUDE += -include ${src}/arch/arc/include/asm/current.h
endif
-upto_gcc42 := $(call cc-ifversion, -le, 0402, y)
upto_gcc44 := $(call cc-ifversion, -le, 0404, y)
atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y)
atleast_gcc48 := $(call cc-ifversion, -ge, 0408, y)
@@ -60,25 +59,11 @@
# --build-id w/o "-marclinux". Default arc-elf32-ld is OK
ldflags-$(upto_gcc44) += -marclinux
-ARC_LIBGCC := -mA7
-cflags-$(CONFIG_ARC_HAS_HW_MPY) += -multcost=16
-
ifndef CONFIG_ARC_HAS_HW_MPY
cflags-y += -mno-mpy
-
-# newlib for ARC700 assumes MPY to be always present, which is generally true
-# However, if someone really doesn't want MPY, we need to use the 600 ver
-# which coupled with -mno-mpy will use mpy emulation
-# With gcc 4.4.7, -mno-mpy is enough to make any other related adjustments,
-# e.g. increased cost of MPY. With gcc 4.2.1 this had to be explicitly hinted
-
- ifeq ($(upto_gcc42),y)
- ARC_LIBGCC := -marc600
- cflags-y += -multcost=30
- endif
endif
-LIBGCC := $(shell $(CC) $(ARC_LIBGCC) $(cflags-y) --print-libgcc-file-name)
+LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
# Modules with short calls might break for calls into builtin-kernel
KBUILD_CFLAGS_MODULE += -mlong-calls
diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts
index 6b57475..757e0c6 100644
--- a/arch/arc/boot/dts/angel4.dts
+++ b/arch/arc/boot/dts/angel4.dts
@@ -24,11 +24,6 @@
serial0 = &arcuart0;
};
- memory {
- device_type = "memory";
- reg = <0x00000000 0x10000000>; /* 256M */
- };
-
fpga {
compatible = "simple-bus";
#address-cells = <1>;
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index 4f31b2e..cfaedd9 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -20,18 +20,13 @@
/* this is for console on PGU */
/* bootargs = "console=tty0 consoleblank=0"; */
/* this is for console on serial */
- bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=ttyS0,115200n8 consoleblank=0 debug";
+ bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
};
aliases {
serial0 = &uart0;
};
- memory {
- device_type = "memory";
- reg = <0x80000000 0x10000000>; /* 256M */
- };
-
fpga {
compatible = "simple-bus";
#address-cells = <1>;
diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/fpga_defconfig
index e283aa5..ef4d3bc 100644
--- a/arch/arc/configs/fpga_defconfig
+++ b/arch/arc/configs/fpga_defconfig
@@ -23,7 +23,6 @@
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARC_PLAT_FPGA_LEGACY=y
-CONFIG_ARC_BOARD_ML509=y
# CONFIG_ARC_HAS_RTSC is not set
CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
CONFIG_PREEMPT=y
diff --git a/arch/arc/configs/fpga_noramfs_defconfig b/arch/arc/configs/fpga_noramfs_defconfig
index 5276a52..49c9301 100644
--- a/arch/arc/configs/fpga_noramfs_defconfig
+++ b/arch/arc/configs/fpga_noramfs_defconfig
@@ -20,7 +20,6 @@
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARC_PLAT_FPGA_LEGACY=y
-CONFIG_ARC_BOARD_ML509=y
# CONFIG_ARC_HAS_RTSC is not set
CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
CONFIG_PREEMPT=y
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index c01ba35..278dacf 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -21,7 +21,6 @@
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARC_PLAT_FPGA_LEGACY=y
-CONFIG_ARC_BOARD_ML509=y
# CONFIG_ARC_IDE is not set
# CONFIG_ARCTANGENT_EMAC is not set
# CONFIG_ARC_HAS_RTSC is not set
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 372466b..be33db8 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -9,19 +9,16 @@
#ifndef _ASM_ARC_ARCREGS_H
#define _ASM_ARC_ARCREGS_H
-#ifdef __KERNEL__
-
/* Build Configuration Registers */
#define ARC_REG_DCCMBASE_BCR 0x61 /* DCCM Base Addr */
#define ARC_REG_CRC_BCR 0x62
-#define ARC_REG_DVFB_BCR 0x64
-#define ARC_REG_EXTARITH_BCR 0x65
#define ARC_REG_VECBASE_BCR 0x68
#define ARC_REG_PERIBASE_BCR 0x69
-#define ARC_REG_FP_BCR 0x6B /* Single-Precision FPU */
-#define ARC_REG_DPFP_BCR 0x6C /* Dbl Precision FPU */
+#define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */
+#define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */
#define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */
#define ARC_REG_TIMERS_BCR 0x75
+#define ARC_REG_AP_BCR 0x76
#define ARC_REG_ICCM_BCR 0x78
#define ARC_REG_XY_MEM_BCR 0x79
#define ARC_REG_MAC_BCR 0x7a
@@ -31,6 +28,9 @@
#define ARC_REG_MIXMAX_BCR 0x7e
#define ARC_REG_BARREL_BCR 0x7f
#define ARC_REG_D_UNCACH_BCR 0x6A
+#define ARC_REG_BPU_BCR 0xc0
+#define ARC_REG_ISA_CFG_BCR 0xc1
+#define ARC_REG_SMART_BCR 0xFF
/* status32 Bits Positions */
#define STATUS_AE_BIT 5 /* Exception active */
@@ -191,14 +191,6 @@
#define PAGES_TO_KB(n_pages) ((n_pages) << (PAGE_SHIFT - 10))
#define PAGES_TO_MB(n_pages) (PAGES_TO_KB(n_pages) >> 10)
-#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
-/* These DPFP regs need to be saved/restored across ctx-sw */
-struct arc_fpu {
- struct {
- unsigned int l, h;
- } aux_dpfp[2];
-};
-#endif
/*
***************************************************************
@@ -212,27 +204,19 @@
#endif
};
-#define EXTN_SWAP_VALID 0x1
-#define EXTN_NORM_VALID 0x2
-#define EXTN_MINMAX_VALID 0x2
-#define EXTN_BARREL_VALID 0x2
-
-struct bcr_extn {
+struct bcr_isa {
#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:20, crc:1, ext_arith:2, mul:2, barrel:2, minmax:2,
- norm:2, swap:1;
+ unsigned int pad1:23, atomic1:1, ver:8;
#else
- unsigned int swap:1, norm:2, minmax:2, barrel:2, mul:2, ext_arith:2,
- crc:1, pad:20;
+ unsigned int ver:8, atomic1:1, pad1:23;
#endif
};
-/* DSP Options Ref Manual */
-struct bcr_extn_mac_mul {
+struct bcr_mpy {
#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:16, type:8, ver:8;
+ unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8;
#else
- unsigned int ver:8, type:8, pad:16;
+ unsigned int ver:8, type:2, cycles:2, dsp:4, x1616:8, pad:8;
#endif
};
@@ -251,6 +235,7 @@
unsigned int pad:8, sz:8, pad2:8, start:8;
#endif
};
+
struct bcr_iccm {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int base:16, pad:5, sz:3, ver:8;
@@ -277,8 +262,8 @@
#endif
};
-/* Both SP and DP FPU BCRs have same format */
-struct bcr_fp {
+/* ARCompact: Both SP and DP FPU BCRs have same format */
+struct bcr_fp_arcompact {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int fast:1, ver:8;
#else
@@ -286,6 +271,30 @@
#endif
};
+struct bcr_timer {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8;
+#else
+ unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15;
+#endif
+};
+
+struct bcr_bpu_arcompact {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad2:19, fam:1, pad:2, ent:2, ver:8;
+#else
+ unsigned int ver:8, ent:2, pad:2, fam:1, pad2:19;
+#endif
+};
+
+struct bcr_generic {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:24, ver:8;
+#else
+ unsigned int ver:8, pad:24;
+#endif
+};
+
/*
*******************************************************************
* Generic structures to hold build configuration used at runtime
@@ -299,6 +308,10 @@
unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6;
};
+struct cpuinfo_arc_bpu {
+ unsigned int ver, full, num_cache, num_pred;
+};
+
struct cpuinfo_arc_ccm {
unsigned int base_addr, sz;
};
@@ -306,21 +319,25 @@
struct cpuinfo_arc {
struct cpuinfo_arc_cache icache, dcache;
struct cpuinfo_arc_mmu mmu;
+ struct cpuinfo_arc_bpu bpu;
struct bcr_identity core;
- unsigned int timers;
+ struct bcr_isa isa;
+ struct bcr_timer timers;
unsigned int vec_base;
unsigned int uncached_base;
struct cpuinfo_arc_ccm iccm, dccm;
- struct bcr_extn extn;
+ struct {
+ unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3,
+ fpu_sp:1, fpu_dp:1, pad2:6,
+ debug:1, ap:1, smart:1, rtt:1, pad3:4,
+ pad4:8;
+ } extn;
+ struct bcr_mpy extn_mpy;
struct bcr_extn_xymem extn_xymem;
- struct bcr_extn_mac_mul extn_mac_mul;
- struct bcr_fp fp, dpfp;
};
extern struct cpuinfo_arc cpuinfo_arc700[];
#endif /* __ASEMBLY__ */
-#endif /* __KERNEL__ */
-
#endif /* _ASM_ARC_ARCREGS_H */
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 173f303..067551b 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -9,8 +9,6 @@
#ifndef _ASM_ARC_ATOMIC_H
#define _ASM_ARC_ATOMIC_H
-#ifdef __KERNEL__
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -170,5 +168,3 @@
#endif
#endif
-
-#endif
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index ebc0cf3..1a5bf07 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -13,8 +13,6 @@
#error only <linux/bitops.h> can be included directly
#endif
-#ifdef __KERNEL__
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -508,6 +506,4 @@
#endif /* !__ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
#endif
diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h
index 5b18e94..ea022d4 100644
--- a/arch/arc/include/asm/bug.h
+++ b/arch/arc/include/asm/bug.h
@@ -21,10 +21,9 @@
unsigned long address);
void die(const char *str, struct pt_regs *regs, unsigned long address);
-#define BUG() do { \
- dump_stack(); \
- pr_warn("Kernel BUG in %s: %s: %d!\n", \
- __FILE__, __func__, __LINE__); \
+#define BUG() do { \
+ pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
+ dump_stack(); \
} while (0)
#define HAVE_ARCH_BUG
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index b3c7509..7861255 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -20,7 +20,7 @@
#define CACHE_LINE_MASK (~(L1_CACHE_BYTES - 1))
/*
- * ARC700 doesn't cache any access in top 256M.
+ * ARC700 doesn't cache any access in top 1G (0xc000_0000 to 0xFFFF_FFFF)
* Ideal for wiring memory mapped peripherals as we don't need to do
* explicit uncached accesses (LD.di/ST.di) hence more portable drivers
*/
diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h
index 87b9185..c2453ee6 100644
--- a/arch/arc/include/asm/current.h
+++ b/arch/arc/include/asm/current.h
@@ -12,8 +12,6 @@
#ifndef _ASM_ARC_CURRENT_H
#define _ASM_ARC_CURRENT_H
-#ifdef __KERNEL__
-
#ifndef __ASSEMBLY__
#ifdef CONFIG_ARC_CURR_IN_REG
@@ -27,6 +25,4 @@
#endif /* ! __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
#endif /* _ASM_ARC_CURRENT_H */
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index 587df82..742816f 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -15,8 +15,6 @@
* -Conditionally disable interrupts (if they are not enabled, don't disable)
*/
-#ifdef __KERNEL__
-
#include <asm/arcregs.h>
/* status32 Reg bits related to Interrupt Handling */
@@ -169,6 +167,4 @@
#endif /* __ASSEMBLY__ */
-#endif /* KERNEL */
-
#endif
diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h
index b65fca7..fea9316 100644
--- a/arch/arc/include/asm/kgdb.h
+++ b/arch/arc/include/asm/kgdb.h
@@ -19,7 +19,7 @@
* register API yet */
#undef DBG_MAX_REG_NUM
-#define GDB_MAX_REGS 39
+#define GDB_MAX_REGS 87
#define BREAK_INSTR_SIZE 2
#define CACHE_FLUSH_IS_SAFE 1
@@ -33,23 +33,27 @@
extern void kgdb_trap(struct pt_regs *regs);
-enum arc700_linux_regnums {
+/* This is the numbering of registers according to the GDB. See GDB's
+ * arc-tdep.h for details.
+ *
+ * Registers are ordered for GDB 7.5. It is incompatible with GDB 6.8. */
+enum arc_linux_regnums {
_R0 = 0,
_R1, _R2, _R3, _R4, _R5, _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13,
_R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22, _R23, _R24,
_R25, _R26,
- _BTA = 27,
- _LP_START = 28,
- _LP_END = 29,
- _LP_COUNT = 30,
- _STATUS32 = 31,
- _BLINK = 32,
- _FP = 33,
- __SP = 34,
- _EFA = 35,
- _RET = 36,
- _ORIG_R8 = 37,
- _STOP_PC = 38
+ _FP = 27,
+ __SP = 28,
+ _R30 = 30,
+ _BLINK = 31,
+ _LP_COUNT = 60,
+ _STOP_PC = 64,
+ _RET = 64,
+ _LP_START = 65,
+ _LP_END = 66,
+ _STATUS32 = 67,
+ _ECR = 76,
+ _BTA = 82,
};
#else
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 82588f3..210fe97 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -14,12 +14,19 @@
#ifndef __ASM_ARC_PROCESSOR_H
#define __ASM_ARC_PROCESSOR_H
-#ifdef __KERNEL__
-
#ifndef __ASSEMBLY__
#include <asm/ptrace.h>
+#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
+/* These DPFP regs need to be saved/restored across ctx-sw */
+struct arc_fpu {
+ struct {
+ unsigned int l, h;
+ } aux_dpfp[2];
+};
+#endif
+
/* Arch specific stuff which needs to be saved per task.
* However these items are not so important so as to earn a place in
* struct thread_info
@@ -128,6 +135,4 @@
*/
#define TASK_UNMAPPED_BASE (TASK_SIZE / 3)
-#endif /* __KERNEL__ */
-
#endif /* __ASM_ARC_PROCESSOR_H */
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index e10f8ce..6e3ef5b 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -29,7 +29,6 @@
};
extern int root_mountflags, end_mem;
-extern int running_on_hw;
void setup_processor(void);
void __init setup_arch_memory(void);
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index 5d06eee..3845b9e 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -59,7 +59,15 @@
/* TBD: stop exporting it for direct population by platform */
extern struct plat_smp_ops plat_smp_ops;
-#endif /* CONFIG_SMP */
+#else /* CONFIG_SMP */
+
+static inline void smp_init_cpus(void) {}
+static inline const char *arc_platform_smp_cpuinfo(void)
+{
+ return "";
+}
+
+#endif /* !CONFIG_SMP */
/*
* ARC700 doesn't support atomic Read-Modify-Write ops.
diff --git a/arch/arc/include/asm/string.h b/arch/arc/include/asm/string.h
index 87676c8..95822b5 100644
--- a/arch/arc/include/asm/string.h
+++ b/arch/arc/include/asm/string.h
@@ -17,8 +17,6 @@
#include <linux/types.h>
-#ifdef __KERNEL__
-
#define __HAVE_ARCH_MEMSET
#define __HAVE_ARCH_MEMCPY
#define __HAVE_ARCH_MEMCMP
@@ -36,5 +34,4 @@
extern int strcmp(const char *cs, const char *ct);
extern __kernel_size_t strlen(const char *);
-#endif /* __KERNEL__ */
#endif /* _ASM_ARC_STRING_H */
diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h
index dd785be..e56f9fc 100644
--- a/arch/arc/include/asm/syscalls.h
+++ b/arch/arc/include/asm/syscalls.h
@@ -9,8 +9,6 @@
#ifndef _ASM_ARC_SYSCALLS_H
#define _ASM_ARC_SYSCALLS_H 1
-#ifdef __KERNEL__
-
#include <linux/compiler.h>
#include <linux/linkage.h>
#include <linux/types.h>
@@ -22,6 +20,4 @@
#include <asm-generic/syscalls.h>
-#endif /* __KERNEL__ */
-
#endif
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 45be216..02bc5ec 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -16,8 +16,6 @@
#ifndef _ASM_THREAD_INFO_H
#define _ASM_THREAD_INFO_H
-#ifdef __KERNEL__
-
#include <asm/page.h>
#ifdef CONFIG_16KSTACKS
@@ -114,6 +112,4 @@
* syscall, so all that reamins to be tested is _TIF_WORK_MASK
*/
-#endif /* __KERNEL__ */
-
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h
index 3e5f071..6da6b4e 100644
--- a/arch/arc/include/asm/unaligned.h
+++ b/arch/arc/include/asm/unaligned.h
@@ -14,7 +14,7 @@
#include <asm-generic/unaligned.h>
#include <asm/ptrace.h>
-#ifdef CONFIG_ARC_MISALIGN_ACCESS
+#ifdef CONFIG_ARC_EMUL_UNALIGNED
int misaligned_fixup(unsigned long address, struct pt_regs *regs,
struct callee_regs *cregs);
#else
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 8004b4f..113f203 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -16,7 +16,7 @@
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_ARC_DW2_UNWIND) += unwind.o
obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_ARC_MISALIGN_ACCESS) += unaligned.o
+obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c
index b8a549c..3b7cd48 100644
--- a/arch/arc/kernel/disasm.c
+++ b/arch/arc/kernel/disasm.c
@@ -15,7 +15,7 @@
#include <linux/uaccess.h>
#include <asm/disasm.h>
-#if defined(CONFIG_KGDB) || defined(CONFIG_ARC_MISALIGN_ACCESS) || \
+#if defined(CONFIG_KGDB) || defined(CONFIG_ARC_EMUL_UNALIGNED) || \
defined(CONFIG_KPROBES)
/* disasm_instr: Analyses instruction at addr, stores
@@ -535,4 +535,4 @@
return instr.is_branch;
}
-#endif /* CONFIG_KGDB || CONFIG_ARC_MISALIGN_ACCESS || CONFIG_KPROBES */
+#endif /* CONFIG_KGDB || CONFIG_ARC_EMUL_UNALIGNED || CONFIG_KPROBES */
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 4d2481b..b0e8666 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -91,16 +91,6 @@
st r0, [@uboot_tag]
st r2, [@uboot_arg]
- ; Identify if running on ISS vs Silicon
- ; IDENTITY Reg [ 3 2 1 0 ]
- ; (chip-id) ^^^^^ ==> 0xffff for ISS
- lr r0, [identity]
- lsr r3, r0, 16
- cmp r3, 0xffff
- mov.z r4, 0
- mov.nz r4, 1
- st r4, [@running_on_hw]
-
; setup "current" tsk and optionally cache it in dedicated r25
mov r9, @init_task
SET_CURR_TASK_ON_CPU r9, r0 ; r9 = tsk, r0 = scratch
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index a2ff5c5..ecf6a78 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -158,11 +158,6 @@
return -1;
}
-unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs)
-{
- return instruction_pointer(regs);
-}
-
int kgdb_arch_init(void)
{
single_step_data.armed = 0;
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index b9a5685..ae1c485 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -244,25 +244,23 @@
pr_err("This core does not have performance counters!\n");
return -ENODEV;
}
+ BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS);
- arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu),
- GFP_KERNEL);
+ READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
+ if (!cc_bcr.v) {
+ pr_err("Performance counters exist, but no countable conditions?\n");
+ return -ENODEV;
+ }
+
+ arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
if (!arc_pmu)
return -ENOMEM;
arc_pmu->n_counters = pct_bcr.c;
- BUG_ON(arc_pmu->n_counters > ARC_PMU_MAX_HWEVENTS);
-
arc_pmu->counter_size = 32 + (pct_bcr.s << 4);
- pr_info("ARC PMU found with %d counters of size %d bits\n",
- arc_pmu->n_counters, arc_pmu->counter_size);
- READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
-
- if (!cc_bcr.v)
- pr_err("Strange! Performance counters exist, but no countable conditions?\n");
-
- pr_info("ARC PMU has %d countable conditions\n", cc_bcr.c);
+ pr_info("ARC perf\t: %d counters (%d bits), %d countable conditions\n",
+ arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c);
cc_name.str[8] = 0;
for (i = 0; i < PERF_COUNT_HW_MAX; i++)
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 119dddb..252bf60 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -13,7 +13,9 @@
#include <linux/console.h>
#include <linux/module.h>
#include <linux/cpu.h>
+#include <linux/clk-provider.h>
#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
#include <linux/cache.h>
#include <asm/sections.h>
#include <asm/arcregs.h>
@@ -24,11 +26,10 @@
#include <asm/unwind.h>
#include <asm/clk.h>
#include <asm/mach_desc.h>
+#include <asm/smp.h>
#define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x))
-int running_on_hw = 1; /* vs. on ISS */
-
/* Part of U-boot ABI: see head.S */
int __initdata uboot_tag;
char __initdata *uboot_arg;
@@ -42,26 +43,26 @@
static void read_arc_build_cfg_regs(void)
{
struct bcr_perip uncached_space;
+ struct bcr_generic bcr;
struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
FIX_PTR(cpu);
READ_BCR(AUX_IDENTITY, cpu->core);
+ READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa);
- cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR);
+ READ_BCR(ARC_REG_TIMERS_BCR, cpu->timers);
cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
cpu->uncached_base = uncached_space.start << 24;
- cpu->extn.mul = read_aux_reg(ARC_REG_MUL_BCR);
- cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR);
- cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR);
- cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR);
- cpu->extn.barrel = read_aux_reg(ARC_REG_BARREL_BCR);
- READ_BCR(ARC_REG_MAC_BCR, cpu->extn_mac_mul);
+ READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
- cpu->extn.ext_arith = read_aux_reg(ARC_REG_EXTARITH_BCR);
- cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR);
+ cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR) > 1 ? 1 : 0; /* 2,3 */
+ cpu->extn.barrel = read_aux_reg(ARC_REG_BARREL_BCR) > 1 ? 1 : 0; /* 2,3 */
+ cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0; /* 1,3 */
+ cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0;
+ cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */
/* Note that we read the CCM BCRs independent of kernel config
* This is to catch the cases where user doesn't know that
@@ -95,43 +96,76 @@
read_decode_mmu_bcr();
read_decode_cache_bcr();
- READ_BCR(ARC_REG_FP_BCR, cpu->fp);
- READ_BCR(ARC_REG_DPFP_BCR, cpu->dpfp);
+ {
+ struct bcr_fp_arcompact sp, dp;
+ struct bcr_bpu_arcompact bpu;
+
+ READ_BCR(ARC_REG_FP_BCR, sp);
+ READ_BCR(ARC_REG_DPFP_BCR, dp);
+ cpu->extn.fpu_sp = sp.ver ? 1 : 0;
+ cpu->extn.fpu_dp = dp.ver ? 1 : 0;
+
+ READ_BCR(ARC_REG_BPU_BCR, bpu);
+ cpu->bpu.ver = bpu.ver;
+ cpu->bpu.full = bpu.fam ? 1 : 0;
+ if (bpu.ent) {
+ cpu->bpu.num_cache = 256 << (bpu.ent - 1);
+ cpu->bpu.num_pred = 256 << (bpu.ent - 1);
+ }
+ }
+
+ READ_BCR(ARC_REG_AP_BCR, bcr);
+ cpu->extn.ap = bcr.ver ? 1 : 0;
+
+ READ_BCR(ARC_REG_SMART_BCR, bcr);
+ cpu->extn.smart = bcr.ver ? 1 : 0;
+
+ cpu->extn.debug = cpu->extn.ap | cpu->extn.smart;
}
static const struct cpuinfo_data arc_cpu_tbl[] = {
- { {0x10, "ARCTangent A5"}, 0x1F},
{ {0x20, "ARC 600" }, 0x2F},
{ {0x30, "ARC 700" }, 0x33},
{ {0x34, "ARC 700 R4.10"}, 0x34},
+ { {0x35, "ARC 700 R4.11"}, 0x35},
{ {0x00, NULL } }
};
+#define IS_AVAIL1(v, str) ((v) ? str : "")
+#define IS_USED(cfg) (IS_ENABLED(cfg) ? "" : "(not used) ")
+#define IS_AVAIL2(v, str, cfg) IS_AVAIL1(v, str), IS_AVAIL1(v, IS_USED(cfg))
+
static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
{
- int n = 0;
struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
struct bcr_identity *core = &cpu->core;
const struct cpuinfo_data *tbl;
- int be = 0;
-#ifdef CONFIG_CPU_BIG_ENDIAN
- be = 1;
-#endif
+ char *isa_nm;
+ int i, be, atomic;
+ int n = 0;
+
FIX_PTR(cpu);
+ {
+ isa_nm = "ARCompact";
+ be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+
+ atomic = cpu->isa.atomic1;
+ if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */
+ atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+ }
+
n += scnprintf(buf + n, len - n,
- "\nARC IDENTITY\t: Family [%#02x]"
- " Cpu-id [%#02x] Chip-id [%#4x]\n",
- core->family, core->cpu_id,
- core->chip_id);
+ "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
+ core->family, core->cpu_id, core->chip_id);
for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) {
if ((core->family >= tbl->info.id) &&
(core->family <= tbl->up_range)) {
n += scnprintf(buf + n, len - n,
- "processor\t: %s %s\n",
- tbl->info.str,
- be ? "[Big Endian]" : "");
+ "processor [%d]\t: %s (%s ISA) %s\n",
+ cpu_id, tbl->info.str, isa_nm,
+ IS_AVAIL1(be, "[Big-Endian]"));
break;
}
}
@@ -143,102 +177,82 @@
(unsigned int)(arc_get_core_freq() / 1000000),
(unsigned int)(arc_get_core_freq() / 10000) % 100);
- n += scnprintf(buf + n, len - n, "Timers\t\t: %s %s\n",
- (cpu->timers & 0x200) ? "TIMER1" : "",
- (cpu->timers & 0x100) ? "TIMER0" : "");
+ n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
+ IS_AVAIL1(cpu->timers.t0, "Timer0 "),
+ IS_AVAIL1(cpu->timers.t1, "Timer1 "),
+ IS_AVAIL2(cpu->timers.rtsc, "64-bit RTSC ", CONFIG_ARC_HAS_RTSC));
- n += scnprintf(buf + n, len - n, "Vect Tbl Base\t: %#x\n",
- cpu->vec_base);
+ n += i = scnprintf(buf + n, len - n, "%s%s",
+ IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC));
- n += scnprintf(buf + n, len - n, "UNCACHED Base\t: %#x\n",
- cpu->uncached_base);
+ if (i)
+ n += scnprintf(buf + n, len - n, "\n\t\t: ");
+
+ n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
+ IS_AVAIL1(cpu->extn_mpy.ver, "mpy "),
+ IS_AVAIL1(cpu->extn.norm, "norm "),
+ IS_AVAIL1(cpu->extn.barrel, "barrel-shift "),
+ IS_AVAIL1(cpu->extn.swap, "swap "),
+ IS_AVAIL1(cpu->extn.minmax, "minmax "),
+ IS_AVAIL1(cpu->extn.crc, "crc "),
+ IS_AVAIL2(1, "swape", CONFIG_ARC_HAS_SWAPE));
+
+ if (cpu->bpu.ver)
+ n += scnprintf(buf + n, len - n,
+ "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n",
+ IS_AVAIL1(cpu->bpu.full, "full"),
+ IS_AVAIL1(!cpu->bpu.full, "partial"),
+ cpu->bpu.num_cache, cpu->bpu.num_pred);
return buf;
}
-static const struct id_to_str mul_type_nm[] = {
- { 0x0, "N/A"},
- { 0x1, "32x32 (spl Result Reg)" },
- { 0x2, "32x32 (ANY Result Reg)" }
-};
-
-static const struct id_to_str mac_mul_nm[] = {
- {0x0, "N/A"},
- {0x1, "N/A"},
- {0x2, "Dual 16 x 16"},
- {0x3, "N/A"},
- {0x4, "32x16"},
- {0x5, "N/A"},
- {0x6, "Dual 16x16 and 32x16"}
-};
-
static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
{
int n = 0;
struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
FIX_PTR(cpu);
-#define IS_AVAIL1(var, str) ((var) ? str : "")
-#define IS_AVAIL2(var, str) ((var == 0x2) ? str : "")
-#define IS_USED(cfg) (IS_ENABLED(cfg) ? "(in-use)" : "(not used)")
n += scnprintf(buf + n, len - n,
- "Extn [700-Base]\t: %s %s %s %s %s %s\n",
- IS_AVAIL2(cpu->extn.norm, "norm,"),
- IS_AVAIL2(cpu->extn.barrel, "barrel-shift,"),
- IS_AVAIL1(cpu->extn.swap, "swap,"),
- IS_AVAIL2(cpu->extn.minmax, "minmax,"),
- IS_AVAIL1(cpu->extn.crc, "crc,"),
- IS_AVAIL2(cpu->extn.ext_arith, "ext-arith"));
+ "Vector Table\t: %#x\nUncached Base\t: %#x\n",
+ cpu->vec_base, cpu->uncached_base);
- n += scnprintf(buf + n, len - n, "Extn [700-MPY]\t: %s",
- mul_type_nm[cpu->extn.mul].str);
+ if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
+ n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
+ IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
+ IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
- n += scnprintf(buf + n, len - n, " MAC MPY: %s\n",
- mac_mul_nm[cpu->extn_mac_mul.type].str);
+ if (cpu->extn.debug)
+ n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s%s\n",
+ IS_AVAIL1(cpu->extn.ap, "ActionPoint "),
+ IS_AVAIL1(cpu->extn.smart, "smaRT "),
+ IS_AVAIL1(cpu->extn.rtt, "RTT "));
- if (cpu->core.family == 0x34) {
- n += scnprintf(buf + n, len - n,
- "Extn [700-4.10]\t: LLOCK/SCOND %s, SWAPE %s, RTSC %s\n",
- IS_USED(CONFIG_ARC_HAS_LLSC),
- IS_USED(CONFIG_ARC_HAS_SWAPE),
- IS_USED(CONFIG_ARC_HAS_RTSC));
- }
-
- n += scnprintf(buf + n, len - n, "Extn [CCM]\t: %s",
- !(cpu->dccm.sz || cpu->iccm.sz) ? "N/A" : "");
-
- if (cpu->dccm.sz)
- n += scnprintf(buf + n, len - n, "DCCM: @ %x, %d KB ",
- cpu->dccm.base_addr, TO_KB(cpu->dccm.sz));
-
- if (cpu->iccm.sz)
- n += scnprintf(buf + n, len - n, "ICCM: @ %x, %d KB",
+ if (cpu->dccm.sz || cpu->iccm.sz)
+ n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
+ cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
- n += scnprintf(buf + n, len - n, "\nExtn [FPU]\t: %s",
- !(cpu->fp.ver || cpu->dpfp.ver) ? "N/A" : "");
-
- if (cpu->fp.ver)
- n += scnprintf(buf + n, len - n, "SP [v%d] %s",
- cpu->fp.ver, cpu->fp.fast ? "(fast)" : "");
-
- if (cpu->dpfp.ver)
- n += scnprintf(buf + n, len - n, "DP [v%d] %s",
- cpu->dpfp.ver, cpu->dpfp.fast ? "(fast)" : "");
-
- n += scnprintf(buf + n, len - n, "\n");
-
n += scnprintf(buf + n, len - n,
"OS ABI [v3]\t: no-legacy-syscalls\n");
return buf;
}
-static void arc_chk_ccms(void)
+static void arc_chk_core_config(void)
{
-#if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM)
struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+ int fpu_enabled;
+
+ if (!cpu->timers.t0)
+ panic("Timer0 is not present!\n");
+
+ if (!cpu->timers.t1)
+ panic("Timer1 is not present!\n");
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_RTSC) && !cpu->timers.rtsc)
+ panic("RTSC is not present\n");
#ifdef CONFIG_ARC_HAS_DCCM
/*
@@ -256,33 +270,20 @@
if (CONFIG_ARC_ICCM_SZ != cpu->iccm.sz)
panic("Linux built with incorrect ICCM Size\n");
#endif
-#endif
-}
-/*
- * Ensure that FP hardware and kernel config match
- * -If hardware contains DPFP, kernel needs to save/restore FPU state
- * across context switches
- * -If hardware lacks DPFP, but kernel configured to save FPU state then
- * kernel trying to access non-existant DPFP regs will crash
- *
- * We only check for Dbl precision Floating Point, because only DPFP
- * hardware has dedicated regs which need to be saved/restored on ctx-sw
- * (Single Precision uses core regs), thus kernel is kind of oblivious to it
- */
-static void arc_chk_fpu(void)
-{
- struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+ /*
+ * FP hardware/software config sanity
+ * -If hardware contains DPFP, kernel needs to save/restore FPU state
+ * -If not, it will crash trying to save/restore the non-existant regs
+ *
+ * (only DPDP checked since SP has no arch visible regs)
+ */
+ fpu_enabled = IS_ENABLED(CONFIG_ARC_FPU_SAVE_RESTORE);
- if (cpu->dpfp.ver) {
-#ifndef CONFIG_ARC_FPU_SAVE_RESTORE
- pr_warn("DPFP support broken in this kernel...\n");
-#endif
- } else {
-#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
- panic("H/w lacks DPFP support, apps won't work\n");
-#endif
- }
+ if (cpu->extn.fpu_dp && !fpu_enabled)
+ pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
+ else if (!cpu->extn.fpu_dp && fpu_enabled)
+ panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
}
/*
@@ -303,15 +304,11 @@
arc_mmu_init();
arc_cache_init();
- arc_chk_ccms();
printk(arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
-
-#ifdef CONFIG_SMP
printk(arc_platform_smp_cpuinfo());
-#endif
- arc_chk_fpu();
+ arc_chk_core_config();
}
static inline int is_kernel(unsigned long addr)
@@ -360,11 +357,7 @@
machine_desc->init_early();
setup_processor();
-
-#ifdef CONFIG_SMP
smp_init_cpus();
-#endif
-
setup_arch_memory();
/* copy flat DT out of .init and then unflatten it */
@@ -385,7 +378,13 @@
static int __init customize_machine(void)
{
- /* Add platform devices */
+ of_clk_init(NULL);
+ /*
+ * Traverses flattened DeviceTree - registering platform devices
+ * (if any) complete with their resources
+ */
+ of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+
if (machine_desc->init_machine)
machine_desc->init_machine();
@@ -419,19 +418,14 @@
seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
- seq_printf(m, "Bogo MIPS : \t%lu.%02lu\n",
+ seq_printf(m, "Bogo MIPS\t: %lu.%02lu\n",
loops_per_jiffy / (500000 / HZ),
(loops_per_jiffy / (5000 / HZ)) % 100);
seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE));
-
seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE));
-
seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE));
-
-#ifdef CONFIG_SMP
seq_printf(m, arc_platform_smp_cpuinfo());
-#endif
free_page((unsigned long)str);
done:
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index dcd317c..d01df0c 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -101,7 +101,7 @@
const char *arc_platform_smp_cpuinfo(void)
{
- return plat_smp_ops.info;
+ return plat_smp_ops.info ? : "";
}
/*
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 9e11427..8c3a3e0 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -530,16 +530,9 @@
*/
void flush_icache_range(unsigned long kstart, unsigned long kend)
{
- unsigned int tot_sz, off, sz;
- unsigned long phy, pfn;
+ unsigned int tot_sz;
- /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
-
- /* This is not the right API for user virtual address */
- if (kstart < TASK_SIZE) {
- BUG_ON("Flush icache range for user virtual addr space");
- return;
- }
+ WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
/* Shortcut for bigger flush ranges.
* Here we don't care if this was kernel virtual or phy addr
@@ -572,6 +565,9 @@
* straddles across 2 virtual pages and hence need for loop
*/
while (tot_sz > 0) {
+ unsigned int off, sz;
+ unsigned long phy, pfn;
+
off = kstart % PAGE_SIZE;
pfn = vmalloc_to_pfn((void *)kstart);
phy = (pfn << PAGE_SHIFT) + off;
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index e1acf0c..7f47d2a 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -609,14 +609,12 @@
int n = 0;
struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
- n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ",
- p_mmu->ver, TO_KB(p_mmu->pg_sz));
-
n += scnprintf(buf + n, len - n,
- "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n",
+ "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
+ p_mmu->ver, TO_KB(p_mmu->pg_sz),
p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
p_mmu->u_dtlb, p_mmu->u_itlb,
- IS_ENABLED(CONFIG_ARC_MMU_SASID) ? "SASID" : "");
+ IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
return buf;
}
diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig
index b9f34cf..217593a 100644
--- a/arch/arc/plat-arcfpga/Kconfig
+++ b/arch/arc/plat-arcfpga/Kconfig
@@ -8,7 +8,7 @@
menuconfig ARC_PLAT_FPGA_LEGACY
bool "\"Legacy\" ARC FPGA dev Boards"
- select ISS_SMP_EXTN if SMP
+ select ARC_HAS_COH_CACHES if SMP
help
Support for ARC development boards, provided by Synopsys.
These are based on FPGA or ISS. e.g.
@@ -18,17 +18,6 @@
if ARC_PLAT_FPGA_LEGACY
-config ARC_BOARD_ANGEL4
- bool "ARC Angel4"
- default y
- help
- ARC Angel4 FPGA Ref Platform (Xilinx Virtex Based)
-
-config ARC_BOARD_ML509
- bool "ML509"
- help
- ARC ML509 FPGA Ref Platform (Xilinx Virtex-5 Based)
-
config ISS_SMP_EXTN
bool "ARC SMP Extensions (ISS Models only)"
default n
diff --git a/arch/arc/plat-arcfpga/include/plat/irq.h b/arch/arc/plat-arcfpga/include/plat/irq.h
deleted file mode 100644
index 2c9dea6..0000000
--- a/arch/arc/plat-arcfpga/include/plat/irq.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * vineetg: Feb 2009
- * -For AA4 board, IRQ assignments to peripherals
- */
-
-#ifndef __PLAT_IRQ_H
-#define __PLAT_IRQ_H
-
-#define UART0_IRQ 5
-#define UART1_IRQ 10
-#define UART2_IRQ 11
-
-#define IDE_IRQ 13
-#define PCI_IRQ 14
-#define PS2_IRQ 15
-
-#ifdef CONFIG_SMP
-#define IDU_INTERRUPT_0 16
-#endif
-
-#endif
diff --git a/arch/arc/plat-arcfpga/include/plat/memmap.h b/arch/arc/plat-arcfpga/include/plat/memmap.h
deleted file mode 100644
index 5c78e61..0000000
--- a/arch/arc/plat-arcfpga/include/plat/memmap.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * vineetg: Feb 2009
- * -For AA4 board, System Memory Map for Peripherals etc
- */
-
-#ifndef __PLAT_MEMMAP_H
-#define __PLAT_MEMMAP_H
-
-#define UART0_BASE 0xC0FC1000
-#define UART1_BASE 0xC0FC1100
-
-#define IDE_CONTROLLER_BASE 0xC0FC9000
-
-#define AHB_PCI_HOST_BRG_BASE 0xC0FD0000
-
-#define PGU_BASEADDR 0xC0FC8000
-#define VLCK_ADDR 0xC0FCF028
-
-#define BVCI_LAT_UNIT_BASE 0xC0FED000
-
-#define PS2_BASE_ADDR 0xC0FCC000
-
-#endif
diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-arcfpga/platform.c
index 1038949..afc8825 100644
--- a/arch/arc/plat-arcfpga/platform.c
+++ b/arch/arc/plat-arcfpga/platform.c
@@ -8,37 +8,9 @@
* published by the Free Software Foundation.
*/
-#include <linux/types.h>
#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/console.h>
-#include <linux/of_platform.h>
-#include <asm/setup.h>
-#include <asm/clk.h>
#include <asm/mach_desc.h>
-#include <plat/memmap.h>
#include <plat/smp.h>
-#include <plat/irq.h>
-
-static void __init plat_fpga_early_init(void)
-{
- pr_info("[plat-arcfpga]: registering early dev resources\n");
-
-#ifdef CONFIG_ISS_SMP_EXTN
- iss_model_init_early_smp();
-#endif
-}
-
-static void __init plat_fpga_populate_dev(void)
-{
- /*
- * Traverses flattened DeviceTree - registering platform devices
- * (if any) complete with their resources
- */
- of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
/*----------------------- Machine Descriptions ------------------------------
*
@@ -48,41 +20,26 @@
* callback set, by matching the DT compatible name.
*/
-static const char *aa4_compat[] __initconst = {
+static const char *legacy_fpga_compat[] __initconst = {
"snps,arc-angel4",
- NULL,
-};
-
-MACHINE_START(ANGEL4, "angel4")
- .dt_compat = aa4_compat,
- .init_early = plat_fpga_early_init,
- .init_machine = plat_fpga_populate_dev,
-#ifdef CONFIG_ISS_SMP_EXTN
- .init_smp = iss_model_init_smp,
-#endif
-MACHINE_END
-
-static const char *ml509_compat[] __initconst = {
"snps,arc-ml509",
NULL,
};
-MACHINE_START(ML509, "ml509")
- .dt_compat = ml509_compat,
- .init_early = plat_fpga_early_init,
- .init_machine = plat_fpga_populate_dev,
-#ifdef CONFIG_SMP
+MACHINE_START(LEGACY_FPGA, "legacy_fpga")
+ .dt_compat = legacy_fpga_compat,
+#ifdef CONFIG_ISS_SMP_EXTN
+ .init_early = iss_model_init_early_smp,
.init_smp = iss_model_init_smp,
#endif
MACHINE_END
-static const char *nsimosci_compat[] __initconst = {
+static const char *simulation_compat[] __initconst = {
+ "snps,nsim",
"snps,nsimosci",
NULL,
};
-MACHINE_START(NSIMOSCI, "nsimosci")
- .dt_compat = nsimosci_compat,
- .init_early = NULL,
- .init_machine = plat_fpga_populate_dev,
+MACHINE_START(SIMULATION, "simulation")
+ .dt_compat = simulation_compat,
MACHINE_END
diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c
index 92bad91..64797ba 100644
--- a/arch/arc/plat-arcfpga/smp.c
+++ b/arch/arc/plat-arcfpga/smp.c
@@ -13,9 +13,10 @@
#include <linux/smp.h>
#include <linux/irq.h>
-#include <plat/irq.h>
#include <plat/smp.h>
+#define IDU_INTERRUPT_0 16
+
static char smp_cpuinfo_buf[128];
/*
diff --git a/arch/arc/plat-tb10x/Kconfig b/arch/arc/plat-tb10x/Kconfig
index 6994c18..d14b3d3 100644
--- a/arch/arc/plat-tb10x/Kconfig
+++ b/arch/arc/plat-tb10x/Kconfig
@@ -18,7 +18,6 @@
menuconfig ARC_PLAT_TB10X
bool "Abilis TB10x"
- select COMMON_CLK
select PINCTRL
select PINCTRL_TB10X
select PINMUX
diff --git a/arch/arc/plat-tb10x/tb10x.c b/arch/arc/plat-tb10x/tb10x.c
index 06cb309..da0ac09 100644
--- a/arch/arc/plat-tb10x/tb10x.c
+++ b/arch/arc/plat-tb10x/tb10x.c
@@ -19,21 +19,9 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-
#include <linux/init.h>
-#include <linux/of_platform.h>
-#include <linux/clk-provider.h>
-#include <linux/pinctrl/consumer.h>
-
#include <asm/mach_desc.h>
-
-static void __init tb10x_platform_init(void)
-{
- of_clk_init(NULL);
- of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
static const char *tb10x_compat[] __initdata = {
"abilis,arc-tb10x",
NULL,
@@ -41,5 +29,4 @@
MACHINE_START(TB10x, "tb10x")
.dt_compat = tb10x_compat,
- .init_machine = tb10x_platform_init,
MACHINE_END
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 709ecc9..f1dc7fc 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -261,6 +261,7 @@
CONFIG_XILINX_WATCHDOG=y
CONFIG_ORION_WATCHDOG=y
CONFIG_SUNXI_WATCHDOG=y
+CONFIG_MESON_WATCHDOG=y
CONFIG_MFD_AS3722=y
CONFIG_MFD_BCM590XX=y
CONFIG_MFD_CROS_EC=y
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index 8c35ae4..07a0957 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -20,7 +20,7 @@
#include <linux/input.h>
#include <linux/io.h>
#include <linux/irqchip.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ac9afde..9532f8d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,5 +1,6 @@
config ARM64
def_bool y
+ select ARCH_BINFMT_ELF_RANDOMIZE_PIE
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
@@ -232,7 +233,7 @@
config ARM64_VA_BITS_48
bool "48-bit"
- depends on BROKEN
+ depends on !ARM_SMMU
endchoice
diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
index ac2cb24..c46cbb2 100644
--- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
+++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
@@ -22,7 +22,7 @@
bank-width = <4>;
};
- vram@2,00000000 {
+ v2m_video_ram: vram@2,00000000 {
compatible = "arm,vexpress-vram";
reg = <2 0x00000000 0x00800000>;
};
@@ -179,9 +179,42 @@
clcd@1f0000 {
compatible = "arm,pl111", "arm,primecell";
reg = <0x1f0000 0x1000>;
+ interrupt-names = "combined";
interrupts = <14>;
clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>;
clock-names = "clcdclk", "apb_pclk";
+ arm,pl11x,framebuffer = <0x18000000 0x00180000>;
+ memory-region = <&v2m_video_ram>;
+ max-memory-bandwidth = <130000000>; /* 16bpp @ 63.5MHz */
+
+ port {
+ v2m_clcd_pads: endpoint {
+ remote-endpoint = <&v2m_clcd_panel>;
+ arm,pl11x,tft-r0g0b0-pads = <0 8 16>;
+ };
+ };
+
+ panel {
+ compatible = "panel-dpi";
+
+ port {
+ v2m_clcd_panel: endpoint {
+ remote-endpoint = <&v2m_clcd_pads>;
+ };
+ };
+
+ panel-timing {
+ clock-frequency = <63500127>;
+ hactive = <1024>;
+ hback-porch = <152>;
+ hfront-porch = <48>;
+ hsync-len = <104>;
+ vactive = <768>;
+ vback-porch = <23>;
+ vfront-porch = <3>;
+ vsync-len = <4>;
+ };
+ };
};
virtio_block@0130000 {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 9cd37de..4ce602c 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -78,6 +78,7 @@
# CONFIG_WLAN is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_AMBAKMI=y
CONFIG_LEGACY_PTY_COUNT=16
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
@@ -90,6 +91,7 @@
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_FB=y
+CONFIG_FB_ARMCLCD=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 253e33b..56de5aa 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -37,8 +37,8 @@
typedef s32 compat_time_t;
typedef s32 compat_clock_t;
typedef s32 compat_pid_t;
-typedef u32 __compat_uid_t;
-typedef u32 __compat_gid_t;
+typedef u16 __compat_uid_t;
+typedef u16 __compat_gid_t;
typedef u16 __compat_uid16_t;
typedef u16 __compat_gid16_t;
typedef u32 __compat_uid32_t;
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 01d3aab..1f65be3 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -126,7 +126,7 @@
* that it will "exec", and that there is sufficient room for the brk.
*/
extern unsigned long randomize_et_dyn(unsigned long base);
-#define ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_64 / 3))
+#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
/*
* When the program starts, a1 contains a pointer to a function to be
@@ -169,7 +169,7 @@
#define COMPAT_ELF_PLATFORM ("v8l")
#endif
-#define COMPAT_ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_32 / 3))
+#define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3)
/* AArch32 registers. */
#define COMPAT_ELF_NGREG 18
diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h
index 8e24ef3..b4f6b19 100644
--- a/arch/arm64/include/asm/irq_work.h
+++ b/arch/arm64/include/asm/irq_work.h
@@ -1,6 +1,8 @@
#ifndef __ASM_IRQ_WORK_H
#define __ASM_IRQ_WORK_H
+#ifdef CONFIG_SMP
+
#include <asm/smp.h>
static inline bool arch_irq_work_has_interrupt(void)
@@ -8,4 +10,13 @@
return !!__smp_cross_call;
}
+#else
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+ return false;
+}
+
+#endif
+
#endif /* __ASM_IRQ_WORK_H */
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 03aaa99..95c49eb 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -89,7 +89,8 @@
*/
if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
pr_err("System table signature incorrect\n");
- return -EINVAL;
+ retval = -EINVAL;
+ goto out;
}
if ((efi.systab->hdr.revision >> 16) < 2)
pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n",
@@ -103,6 +104,7 @@
for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
vendor[i] = c16[i];
vendor[i] = '\0';
+ early_memunmap(c16, sizeof(vendor));
}
pr_info("EFI v%u.%.02u by %s\n",
@@ -113,29 +115,11 @@
if (retval == 0)
set_bit(EFI_CONFIG_TABLES, &efi.flags);
- early_memunmap(c16, sizeof(vendor));
+out:
early_memunmap(efi.systab, sizeof(efi_system_table_t));
-
return retval;
}
-static __initdata char memory_type_name[][32] = {
- {"Reserved"},
- {"Loader Code"},
- {"Loader Data"},
- {"Boot Code"},
- {"Boot Data"},
- {"Runtime Code"},
- {"Runtime Data"},
- {"Conventional Memory"},
- {"Unusable Memory"},
- {"ACPI Reclaim Memory"},
- {"ACPI Memory NVS"},
- {"Memory Mapped I/O"},
- {"MMIO Port Space"},
- {"PAL Code"},
-};
-
/*
* Return true for RAM regions we want to permanently reserve.
*/
@@ -166,10 +150,13 @@
paddr = md->phys_addr;
npages = md->num_pages;
- if (uefi_debug)
- pr_info(" 0x%012llx-0x%012llx [%s]",
+ if (uefi_debug) {
+ char buf[64];
+
+ pr_info(" 0x%012llx-0x%012llx %s",
paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
- memory_type_name[md->type]);
+ efi_md_typeattr_format(buf, sizeof(buf), md));
+ }
memrange_efi_to_native(&paddr, &npages);
size = npages << PAGE_SHIFT;
@@ -393,11 +380,16 @@
return -1;
}
- pr_info("Remapping and enabling EFI services.\n");
-
- /* replace early memmap mapping with permanent mapping */
mapsize = memmap.map_end - memmap.map;
early_memunmap(memmap.map, mapsize);
+
+ if (efi_runtime_disabled()) {
+ pr_info("EFI runtime services will be disabled.\n");
+ return -1;
+ }
+
+ pr_info("Remapping and enabling EFI services.\n");
+ /* replace early memmap mapping with permanent mapping */
memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map,
mapsize);
memmap.map_end = memmap.map + mapsize;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c3065db..fde9923 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -378,8 +378,3 @@
{
return randomize_base(mm->brk);
}
-
-unsigned long randomize_et_dyn(unsigned long base)
-{
- return randomize_base(base);
-}
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index fa324bd..4a07630 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -105,10 +105,10 @@
static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
#if CONFIG_ARM64_PGTABLE_LEVELS > 2
-static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
#endif
#if CONFIG_ARM64_PGTABLE_LEVELS > 3
-static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
#endif
static inline pud_t * __init early_ioremap_pud(unsigned long addr)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6894ef3..0bf90d2 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -297,11 +297,15 @@
* create_mapping requires puds, pmds and ptes to be allocated from
* memory addressable from the initial direct kernel mapping.
*
- * The initial direct kernel mapping, located at swapper_pg_dir,
- * gives us PUD_SIZE memory starting from PHYS_OFFSET (which must be
- * aligned to 2MB as per Documentation/arm64/booting.txt).
+ * The initial direct kernel mapping, located at swapper_pg_dir, gives
+ * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
+ * PHYS_OFFSET (which must be aligned to 2MB as per
+ * Documentation/arm64/booting.txt).
*/
- limit = PHYS_OFFSET + PUD_SIZE;
+ if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
+ limit = PHYS_OFFSET + PMD_SIZE;
+ else
+ limit = PHYS_OFFSET + PUD_SIZE;
memblock_set_current_limit(limit);
/* map all the memory banks */
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 62c6101..6682b36 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -30,12 +30,14 @@
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
+static struct kmem_cache *pgd_cache;
+
pgd_t *pgd_alloc(struct mm_struct *mm)
{
if (PGD_SIZE == PAGE_SIZE)
return (pgd_t *)get_zeroed_page(GFP_KERNEL);
else
- return kzalloc(PGD_SIZE, GFP_KERNEL);
+ return kmem_cache_zalloc(pgd_cache, GFP_KERNEL);
}
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -43,5 +45,17 @@
if (PGD_SIZE == PAGE_SIZE)
free_page((unsigned long)pgd);
else
- kfree(pgd);
+ kmem_cache_free(pgd_cache, pgd);
}
+
+static int __init pgd_cache_init(void)
+{
+ /*
+ * Naturally aligned pgds required by the architecture.
+ */
+ if (PGD_SIZE != PAGE_SIZE)
+ pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE,
+ SLAB_PANIC, NULL);
+ return 0;
+}
+core_initcall(pgd_cache_init);
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index 2134f7e..de0a81a 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -144,8 +144,12 @@
/* Data-processing (2 source) */
/* Rd = Rn OP Rm */
-#define A64_UDIV(sf, Rd, Rn, Rm) aarch64_insn_gen_data2(Rd, Rn, Rm, \
- A64_VARIANT(sf), AARCH64_INSN_DATA2_UDIV)
+#define A64_DATA2(sf, Rd, Rn, Rm, type) aarch64_insn_gen_data2(Rd, Rn, Rm, \
+ A64_VARIANT(sf), AARCH64_INSN_DATA2_##type)
+#define A64_UDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, UDIV)
+#define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV)
+#define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV)
+#define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV)
/* Data-processing (3 source) */
/* Rd = Ra + Rn * Rm */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 7ae3354..41f1e3e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -19,12 +19,13 @@
#define pr_fmt(fmt) "bpf_jit: " fmt
#include <linux/filter.h>
-#include <linux/moduleloader.h>
#include <linux/printk.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
+
#include <asm/byteorder.h>
#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
#include "bpf_jit.h"
@@ -119,6 +120,14 @@
return to - from;
}
+static void jit_fill_hole(void *area, unsigned int size)
+{
+ u32 *ptr;
+ /* We are guaranteed to have aligned memory. */
+ for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
+ *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
+}
+
static inline int epilogue_offset(const struct jit_ctx *ctx)
{
int to = ctx->offset[ctx->prog->len - 1];
@@ -196,6 +205,12 @@
emit(A64_RET(A64_LR), ctx);
}
+/* JITs an eBPF instruction.
+ * Returns:
+ * 0 - successfully JITed an 8-byte eBPF instruction.
+ * >0 - successfully JITed a 16-byte eBPF instruction.
+ * <0 - failed to JIT.
+ */
static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
{
const u8 code = insn->code;
@@ -252,6 +267,18 @@
emit(A64_MUL(is64, tmp, tmp, src), ctx);
emit(A64_SUB(is64, dst, dst, tmp), ctx);
break;
+ case BPF_ALU | BPF_LSH | BPF_X:
+ case BPF_ALU64 | BPF_LSH | BPF_X:
+ emit(A64_LSLV(is64, dst, dst, src), ctx);
+ break;
+ case BPF_ALU | BPF_RSH | BPF_X:
+ case BPF_ALU64 | BPF_RSH | BPF_X:
+ emit(A64_LSRV(is64, dst, dst, src), ctx);
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_X:
+ case BPF_ALU64 | BPF_ARSH | BPF_X:
+ emit(A64_ASRV(is64, dst, dst, src), ctx);
+ break;
/* dst = -dst */
case BPF_ALU | BPF_NEG:
case BPF_ALU64 | BPF_NEG:
@@ -443,6 +470,27 @@
emit(A64_B(jmp_offset), ctx);
break;
+ /* dst = imm64 */
+ case BPF_LD | BPF_IMM | BPF_DW:
+ {
+ const struct bpf_insn insn1 = insn[1];
+ u64 imm64;
+
+ if (insn1.code != 0 || insn1.src_reg != 0 ||
+ insn1.dst_reg != 0 || insn1.off != 0) {
+ /* Note: verifier in BPF core must catch invalid
+ * instructions.
+ */
+ pr_err_once("Invalid BPF_LD_IMM64 instruction\n");
+ return -EINVAL;
+ }
+
+ imm64 = (u64)insn1.imm << 32 | imm;
+ emit_a64_mov_i64(dst, imm64, ctx);
+
+ return 1;
+ }
+
/* LDX: dst = *(size *)(src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
case BPF_LDX | BPF_MEM | BPF_H:
@@ -594,6 +642,10 @@
ctx->offset[i] = ctx->idx;
ret = build_insn(insn, ctx);
+ if (ret > 0) {
+ i++;
+ continue;
+ }
if (ret)
return ret;
}
@@ -613,8 +665,10 @@
void bpf_int_jit_compile(struct bpf_prog *prog)
{
+ struct bpf_binary_header *header;
struct jit_ctx ctx;
int image_size;
+ u8 *image_ptr;
if (!bpf_jit_enable)
return;
@@ -636,23 +690,25 @@
goto out;
build_prologue(&ctx);
-
build_epilogue(&ctx);
/* Now we know the actual image size. */
image_size = sizeof(u32) * ctx.idx;
- ctx.image = module_alloc(image_size);
- if (unlikely(ctx.image == NULL))
+ header = bpf_jit_binary_alloc(image_size, &image_ptr,
+ sizeof(u32), jit_fill_hole);
+ if (header == NULL)
goto out;
/* 2. Now, the actual pass. */
+ ctx.image = (u32 *)image_ptr;
ctx.idx = 0;
+
build_prologue(&ctx);
ctx.body_offset = ctx.idx;
if (build_body(&ctx)) {
- module_free(NULL, ctx.image);
+ bpf_jit_binary_free(header);
goto out;
}
@@ -663,17 +719,25 @@
bpf_jit_dump(prog->len, image_size, 2, ctx.image);
bpf_flush_icache(ctx.image, ctx.image + ctx.idx);
- prog->bpf_func = (void *)ctx.image;
- prog->jited = 1;
+ set_memory_ro((unsigned long)header, header->pages);
+ prog->bpf_func = (void *)ctx.image;
+ prog->jited = true;
out:
kfree(ctx.offset);
}
void bpf_jit_free(struct bpf_prog *prog)
{
- if (prog->jited)
- module_free(NULL, prog->bpf_func);
+ unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK;
+ struct bpf_binary_header *header = (void *)addr;
- kfree(prog);
+ if (!prog->jited)
+ goto free_filter;
+
+ set_memory_rw(addr, header->pages);
+ bpf_jit_binary_free(header);
+
+free_filter:
+ bpf_prog_unlock_free(prog);
}
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 741b99c..c52d754 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -568,6 +568,7 @@
{
const char *unit;
unsigned long size;
+ char buf[64];
md = p;
size = md->num_pages << EFI_PAGE_SHIFT;
@@ -586,9 +587,10 @@
unit = "KB";
}
- printk("mem%02d: type=%2u, attr=0x%016lx, "
+ printk("mem%02d: %s "
"range=[0x%016lx-0x%016lx) (%4lu%s)\n",
- i, md->type, md->attribute, md->phys_addr,
+ i, efi_md_typeattr_format(buf, sizeof(buf), md),
+ md->phys_addr,
md->phys_addr + efi_md_size(md), size, unit);
}
}
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ad6badb..f43aa53 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2066,6 +2066,7 @@
support is unavailable.
config MIPS_CPS_PM
+ depends on MIPS_CPS
select MIPS_CPC
bool
diff --git a/arch/mips/ath79/mach-db120.c b/arch/mips/ath79/mach-db120.c
index 4d661a1..9423f5a 100644
--- a/arch/mips/ath79/mach-db120.c
+++ b/arch/mips/ath79/mach-db120.c
@@ -113,7 +113,7 @@
ath79_register_pci();
}
#else
-static inline void db120_pci_init(void) {}
+static inline void db120_pci_init(u8 *eeprom) {}
#endif /* CONFIG_PCI */
static void __init db120_setup(void)
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 38f4c32..5ebdb32 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -806,15 +806,6 @@
#endif
}
- if (octeon_is_simulation()) {
- /*
- * The simulator uses a mtdram device pre filled with
- * the filesystem. Also specify the calibration delay
- * to avoid calculating it every time.
- */
- strcat(arcs_cmdline, " rw root=1f00 slram=root,0x40000000,+1073741824");
- }
-
mips_hpt_frequency = octeon_get_clock_rate();
octeon_init_cvmcount();
diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h
index 51f80bd..63b3468 100644
--- a/arch/mips/include/asm/cop2.h
+++ b/arch/mips/include/asm/cop2.h
@@ -37,15 +37,15 @@
#define cop2_present 1
#define cop2_lazy_restore 1
-#define cop2_save(r) do { (r); } while (0)
-#define cop2_restore(r) do { (r); } while (0)
+#define cop2_save(r) do { (void)(r); } while (0)
+#define cop2_restore(r) do { (void)(r); } while (0)
#else
#define cop2_present 0
#define cop2_lazy_restore 0
-#define cop2_save(r) do { (r); } while (0)
-#define cop2_restore(r) do { (r); } while (0)
+#define cop2_save(r) do { (void)(r); } while (0)
+#define cop2_restore(r) do { (void)(r); } while (0)
#endif
enum cu2_ops {
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index 992aaba..b463f2a 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -24,7 +24,7 @@
asm volatile ( \
"1: " load " %[tmp_dst], 0(%[tmp_src])\n" \
" li %[tmp_err], 0\n" \
- "2:\n" \
+ "2: .insn\n" \
\
".section .fixup, \"ax\"\n" \
"3: li %[tmp_err], 1\n" \
@@ -46,7 +46,7 @@
asm volatile ( \
"1: " store " %[tmp_src], 0(%[tmp_dst])\n"\
" li %[tmp_err], 0\n" \
- "2:\n" \
+ "2: .insn\n" \
\
".section .fixup, \"ax\"\n" \
"3: li %[tmp_err], 1\n" \
diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h
index d9f932d..1c967ab 100644
--- a/arch/mips/include/asm/idle.h
+++ b/arch/mips/include/asm/idle.h
@@ -8,19 +8,12 @@
extern void r4k_wait(void);
extern asmlinkage void __r4k_wait(void);
extern void r4k_wait_irqoff(void);
-extern void __pastwait(void);
static inline int using_rollback_handler(void)
{
return cpu_wait == r4k_wait;
}
-static inline int address_is_in_r4k_wait_irqoff(unsigned long addr)
-{
- return addr >= (unsigned long)r4k_wait_irqoff &&
- addr < (unsigned long)__pastwait;
-}
-
extern int mips_cpuidle_wait_enter(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index);
diff --git a/arch/mips/include/uapi/asm/ptrace.h b/arch/mips/include/uapi/asm/ptrace.h
index bbcfb8b..91a3d19 100644
--- a/arch/mips/include/uapi/asm/ptrace.h
+++ b/arch/mips/include/uapi/asm/ptrace.h
@@ -9,6 +9,8 @@
#ifndef _UAPI_ASM_PTRACE_H
#define _UAPI_ASM_PTRACE_H
+#include <linux/types.h>
+
/* 0 - 31 are integer registers, 32 - 63 are fp registers. */
#define FPR_BASE 32
#define PC 64
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 09ce459..0b9082b 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -68,9 +68,6 @@
" wait \n"
" .set pop \n");
local_irq_enable();
- __asm__(
- " .globl __pastwait \n"
- "__pastwait: \n");
}
/*
diff --git a/arch/mips/lasat/Kconfig b/arch/mips/lasat/Kconfig
index 1d2ee8a..8776d0a 100644
--- a/arch/mips/lasat/Kconfig
+++ b/arch/mips/lasat/Kconfig
@@ -4,7 +4,7 @@
config PICVUE_PROC
tristate "PICVUE LCD display driver /proc interface"
- depends on PICVUE
+ depends on PICVUE && PROC_FS
config DS1603
bool "DS1603 RTC driver"
diff --git a/arch/mips/loongson/lemote-2f/clock.c b/arch/mips/loongson/lemote-2f/clock.c
index a217061..462e34d 100644
--- a/arch/mips/loongson/lemote-2f/clock.c
+++ b/arch/mips/loongson/lemote-2f/clock.c
@@ -91,6 +91,7 @@
int clk_set_rate(struct clk *clk, unsigned long rate)
{
+ unsigned int rate_khz = rate / 1000;
struct cpufreq_frequency_table *pos;
int ret = 0;
int regval;
@@ -107,9 +108,9 @@
propagate_rate(clk);
cpufreq_for_each_valid_entry(pos, loongson2_clockmod_table)
- if (rate == pos->frequency)
+ if (rate_khz == pos->frequency)
break;
- if (rate != pos->frequency)
+ if (rate_khz != pos->frequency)
return -ENOTSUPP;
clk->rate = rate;
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 7a47277..51a0fde 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -1023,7 +1023,7 @@
goto emul;
case cop1x_op:
- if (cpu_has_mips_4_5 || cpu_has_mips64)
+ if (cpu_has_mips_4_5 || cpu_has_mips64 || cpu_has_mips32r2)
/* its one of ours */
goto emul;
@@ -1068,7 +1068,7 @@
break;
case cop1x_op:
- if (!cpu_has_mips_4_5 && !cpu_has_mips64)
+ if (!cpu_has_mips_4_5 && !cpu_has_mips64 && !cpu_has_mips32r2)
return SIGILL;
sig = fpux_emu(xcp, ctx, ir, fault_addr);
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index a08dd53a..b5f228e 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -1062,6 +1062,7 @@
struct mips_huge_tlb_info {
int huge_pte;
int restore_scratch;
+ bool need_reload_pte;
};
static struct mips_huge_tlb_info
@@ -1076,6 +1077,7 @@
rv.huge_pte = scratch;
rv.restore_scratch = 0;
+ rv.need_reload_pte = false;
if (check_for_high_segbits) {
UASM_i_MFC0(p, tmp, C0_BADVADDR);
@@ -1264,6 +1266,7 @@
} else {
htlb_info.huge_pte = K0;
htlb_info.restore_scratch = 0;
+ htlb_info.need_reload_pte = true;
vmalloc_mode = refill_noscratch;
/*
* create the plain linear handler
@@ -1300,7 +1303,8 @@
}
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
uasm_l_tlb_huge_update(&l, p);
- UASM_i_LW(&p, K0, 0, K1);
+ if (htlb_info.need_reload_pte)
+ UASM_i_LW(&p, htlb_info.huge_pte, 0, K1);
build_huge_update_entries(&p, htlb_info.huge_pte, K1);
build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
htlb_info.restore_scratch);
diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile
index b9510ea..6510ace 100644
--- a/arch/mips/mti-malta/Makefile
+++ b/arch/mips/mti-malta/Makefile
@@ -5,8 +5,9 @@
# Copyright (C) 2008 Wind River Systems, Inc.
# written by Ralf Baechle <ralf@linux-mips.org>
#
-obj-y := malta-amon.o malta-display.o malta-init.o \
+obj-y := malta-display.o malta-init.o \
malta-int.o malta-memory.o malta-platform.o \
malta-reset.o malta-setup.o malta-time.o
+obj-$(CONFIG_MIPS_CMP) += malta-amon.o
obj-$(CONFIG_MIPS_MALTA_PM) += malta-pm.o
diff --git a/arch/mips/mti-sead3/Makefile b/arch/mips/mti-sead3/Makefile
index febf433..2ae49e9 100644
--- a/arch/mips/mti-sead3/Makefile
+++ b/arch/mips/mti-sead3/Makefile
@@ -14,7 +14,6 @@
sead3-setup.o sead3-time.o
obj-y += sead3-i2c-dev.o sead3-i2c.o \
- sead3-pic32-i2c-drv.o sead3-pic32-bus.o \
leds-sead3.o sead3-leds.o
obj-$(CONFIG_EARLY_PRINTK) += sead3-console.o
diff --git a/arch/mips/mti-sead3/sead3-i2c.c b/arch/mips/mti-sead3/sead3-i2c.c
index f70d5fc5..795ae83 100644
--- a/arch/mips/mti-sead3/sead3-i2c.c
+++ b/arch/mips/mti-sead3/sead3-i2c.c
@@ -5,10 +5,8 @@
*
* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
*/
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/platform_device.h>
-#include <irq.h>
struct resource sead3_i2c_resources[] = {
{
@@ -30,8 +28,4 @@
return platform_device_register(&sead3_i2c_device);
}
-module_init(sead3_i2c_init);
-
-MODULE_AUTHOR("Chris Dearman <chris@mips.com>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("I2C probe driver for SEAD3");
+device_initcall(sead3_i2c_init);
diff --git a/arch/mips/mti-sead3/sead3-pic32-bus.c b/arch/mips/mti-sead3/sead3-pic32-bus.c
deleted file mode 100644
index 3b12aa5..0000000
--- a/arch/mips/mti-sead3/sead3-pic32-bus.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
- */
-#include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/io.h>
-#include <linux/errno.h>
-
-#define PIC32_NULL 0x00
-#define PIC32_RD 0x01
-#define PIC32_SYSRD 0x02
-#define PIC32_WR 0x10
-#define PIC32_SYSWR 0x20
-#define PIC32_IRQ_CLR 0x40
-#define PIC32_STATUS 0x80
-
-#define DELAY() udelay(100) /* FIXME: needed? */
-
-/* spinlock to ensure atomic access to PIC32 */
-static DEFINE_SPINLOCK(pic32_bus_lock);
-
-/* FIXME: io_remap these */
-static void __iomem *bus_xfer = (void __iomem *)0xbf000600;
-static void __iomem *bus_status = (void __iomem *)0xbf000060;
-
-static inline unsigned int ioready(void)
-{
- return readl(bus_status) & 1;
-}
-
-static inline void wait_ioready(void)
-{
- do { } while (!ioready());
-}
-
-static inline void wait_ioclear(void)
-{
- do { } while (ioready());
-}
-
-static inline void check_ioclear(void)
-{
- if (ioready()) {
- pr_debug("ioclear: initially busy\n");
- do {
- (void) readl(bus_xfer);
- DELAY();
- } while (ioready());
- pr_debug("ioclear: cleared busy\n");
- }
-}
-
-u32 pic32_bus_readl(u32 reg)
-{
- unsigned long flags;
- u32 status, val;
-
- spin_lock_irqsave(&pic32_bus_lock, flags);
-
- check_ioclear();
-
- writel((PIC32_RD << 24) | (reg & 0x00ffffff), bus_xfer);
- DELAY();
- wait_ioready();
- status = readl(bus_xfer);
- DELAY();
- val = readl(bus_xfer);
- wait_ioclear();
-
- pr_debug("pic32_bus_readl: *%x -> %x (status=%x)\n", reg, val, status);
-
- spin_unlock_irqrestore(&pic32_bus_lock, flags);
-
- return val;
-}
-
-void pic32_bus_writel(u32 val, u32 reg)
-{
- unsigned long flags;
- u32 status;
-
- spin_lock_irqsave(&pic32_bus_lock, flags);
-
- check_ioclear();
-
- writel((PIC32_WR << 24) | (reg & 0x00ffffff), bus_xfer);
- DELAY();
- writel(val, bus_xfer);
- DELAY();
- wait_ioready();
- status = readl(bus_xfer);
- wait_ioclear();
-
- pr_debug("pic32_bus_writel: *%x <- %x (status=%x)\n", reg, val, status);
-
- spin_unlock_irqrestore(&pic32_bus_lock, flags);
-}
diff --git a/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c b/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c
deleted file mode 100644
index 80fe194..0000000
--- a/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
- */
-#include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/platform_device.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/i2c.h>
-#include <linux/slab.h>
-
-#define PIC32_I2CxCON 0x0000
-#define PIC32_I2CxCONCLR 0x0004
-#define PIC32_I2CxCONSET 0x0008
-#define PIC32_I2CxCONINV 0x000C
-#define I2CCON_ON (1<<15)
-#define I2CCON_FRZ (1<<14)
-#define I2CCON_SIDL (1<<13)
-#define I2CCON_SCLREL (1<<12)
-#define I2CCON_STRICT (1<<11)
-#define I2CCON_A10M (1<<10)
-#define I2CCON_DISSLW (1<<9)
-#define I2CCON_SMEN (1<<8)
-#define I2CCON_GCEN (1<<7)
-#define I2CCON_STREN (1<<6)
-#define I2CCON_ACKDT (1<<5)
-#define I2CCON_ACKEN (1<<4)
-#define I2CCON_RCEN (1<<3)
-#define I2CCON_PEN (1<<2)
-#define I2CCON_RSEN (1<<1)
-#define I2CCON_SEN (1<<0)
-
-#define PIC32_I2CxSTAT 0x0010
-#define PIC32_I2CxSTATCLR 0x0014
-#define PIC32_I2CxSTATSET 0x0018
-#define PIC32_I2CxSTATINV 0x001C
-#define I2CSTAT_ACKSTAT (1<<15)
-#define I2CSTAT_TRSTAT (1<<14)
-#define I2CSTAT_BCL (1<<10)
-#define I2CSTAT_GCSTAT (1<<9)
-#define I2CSTAT_ADD10 (1<<8)
-#define I2CSTAT_IWCOL (1<<7)
-#define I2CSTAT_I2COV (1<<6)
-#define I2CSTAT_DA (1<<5)
-#define I2CSTAT_P (1<<4)
-#define I2CSTAT_S (1<<3)
-#define I2CSTAT_RW (1<<2)
-#define I2CSTAT_RBF (1<<1)
-#define I2CSTAT_TBF (1<<0)
-
-#define PIC32_I2CxADD 0x0020
-#define PIC32_I2CxADDCLR 0x0024
-#define PIC32_I2CxADDSET 0x0028
-#define PIC32_I2CxADDINV 0x002C
-#define PIC32_I2CxMSK 0x0030
-#define PIC32_I2CxMSKCLR 0x0034
-#define PIC32_I2CxMSKSET 0x0038
-#define PIC32_I2CxMSKINV 0x003C
-#define PIC32_I2CxBRG 0x0040
-#define PIC32_I2CxBRGCLR 0x0044
-#define PIC32_I2CxBRGSET 0x0048
-#define PIC32_I2CxBRGINV 0x004C
-#define PIC32_I2CxTRN 0x0050
-#define PIC32_I2CxTRNCLR 0x0054
-#define PIC32_I2CxTRNSET 0x0058
-#define PIC32_I2CxTRNINV 0x005C
-#define PIC32_I2CxRCV 0x0060
-
-struct i2c_platform_data {
- u32 base;
- struct i2c_adapter adap;
- u32 xfer_timeout;
- u32 ack_timeout;
- u32 ctl_timeout;
-};
-
-extern u32 pic32_bus_readl(u32 reg);
-extern void pic32_bus_writel(u32 val, u32 reg);
-
-static inline void
-StartI2C(struct i2c_platform_data *adap)
-{
- pr_debug("StartI2C\n");
- pic32_bus_writel(I2CCON_SEN, adap->base + PIC32_I2CxCONSET);
-}
-
-static inline void
-StopI2C(struct i2c_platform_data *adap)
-{
- pr_debug("StopI2C\n");
- pic32_bus_writel(I2CCON_PEN, adap->base + PIC32_I2CxCONSET);
-}
-
-static inline void
-AckI2C(struct i2c_platform_data *adap)
-{
- pr_debug("AckI2C\n");
- pic32_bus_writel(I2CCON_ACKDT, adap->base + PIC32_I2CxCONCLR);
- pic32_bus_writel(I2CCON_ACKEN, adap->base + PIC32_I2CxCONSET);
-}
-
-static inline void
-NotAckI2C(struct i2c_platform_data *adap)
-{
- pr_debug("NakI2C\n");
- pic32_bus_writel(I2CCON_ACKDT, adap->base + PIC32_I2CxCONSET);
- pic32_bus_writel(I2CCON_ACKEN, adap->base + PIC32_I2CxCONSET);
-}
-
-static inline int
-IdleI2C(struct i2c_platform_data *adap)
-{
- int i;
-
- pr_debug("IdleI2C\n");
- for (i = 0; i < adap->ctl_timeout; i++) {
- if (((pic32_bus_readl(adap->base + PIC32_I2CxCON) &
- (I2CCON_ACKEN | I2CCON_RCEN | I2CCON_PEN | I2CCON_RSEN |
- I2CCON_SEN)) == 0) &&
- ((pic32_bus_readl(adap->base + PIC32_I2CxSTAT) &
- (I2CSTAT_TRSTAT)) == 0))
- return 0;
- udelay(1);
- }
- return -ETIMEDOUT;
-}
-
-static inline u32
-MasterWriteI2C(struct i2c_platform_data *adap, u32 byte)
-{
- pr_debug("MasterWriteI2C\n");
-
- pic32_bus_writel(byte, adap->base + PIC32_I2CxTRN);
-
- return pic32_bus_readl(adap->base + PIC32_I2CxSTAT) & I2CSTAT_IWCOL;
-}
-
-static inline u32
-MasterReadI2C(struct i2c_platform_data *adap)
-{
- pr_debug("MasterReadI2C\n");
-
- pic32_bus_writel(I2CCON_RCEN, adap->base + PIC32_I2CxCONSET);
-
- while (pic32_bus_readl(adap->base + PIC32_I2CxCON) & I2CCON_RCEN)
- ;
-
- pic32_bus_writel(I2CSTAT_I2COV, adap->base + PIC32_I2CxSTATCLR);
-
- return pic32_bus_readl(adap->base + PIC32_I2CxRCV);
-}
-
-static int
-do_address(struct i2c_platform_data *adap, unsigned int addr, int rd)
-{
- pr_debug("doaddress\n");
-
- IdleI2C(adap);
- StartI2C(adap);
- IdleI2C(adap);
-
- addr <<= 1;
- if (rd)
- addr |= 1;
-
- if (MasterWriteI2C(adap, addr))
- return -EIO;
- IdleI2C(adap);
- if (pic32_bus_readl(adap->base + PIC32_I2CxSTAT) & I2CSTAT_ACKSTAT)
- return -EIO;
- return 0;
-}
-
-static int
-i2c_read(struct i2c_platform_data *adap, unsigned char *buf,
- unsigned int len)
-{
- int i;
- u32 data;
-
- pr_debug("i2c_read\n");
-
- i = 0;
- while (i < len) {
- data = MasterReadI2C(adap);
- buf[i++] = data;
- if (i < len)
- AckI2C(adap);
- else
- NotAckI2C(adap);
- }
-
- StopI2C(adap);
- IdleI2C(adap);
- return 0;
-}
-
-static int
-i2c_write(struct i2c_platform_data *adap, unsigned char *buf,
- unsigned int len)
-{
- int i;
- u32 data;
-
- pr_debug("i2c_write\n");
-
- i = 0;
- while (i < len) {
- data = buf[i];
- if (MasterWriteI2C(adap, data))
- return -EIO;
- IdleI2C(adap);
- if (pic32_bus_readl(adap->base + PIC32_I2CxSTAT) &
- I2CSTAT_ACKSTAT)
- return -EIO;
- i++;
- }
-
- StopI2C(adap);
- IdleI2C(adap);
- return 0;
-}
-
-static int
-platform_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int num)
-{
- struct i2c_platform_data *adap = i2c_adap->algo_data;
- struct i2c_msg *p;
- int i, err = 0;
-
- pr_debug("platform_xfer\n");
- for (i = 0; i < num; i++) {
-#define __BUFSIZE 80
- int ii;
- static char buf[__BUFSIZE];
- char *b = buf;
-
- p = &msgs[i];
- b += sprintf(buf, " [%d bytes]", p->len);
- if ((p->flags & I2C_M_RD) == 0) {
- for (ii = 0; ii < p->len; ii++) {
- if (b < &buf[__BUFSIZE-4]) {
- b += sprintf(b, " %02x", p->buf[ii]);
- } else {
- strcat(b, "...");
- break;
- }
- }
- }
- pr_debug("xfer%d: DevAddr: %04x Op:%s Data:%s\n", i, p->addr,
- (p->flags & I2C_M_RD) ? "Rd" : "Wr", buf);
- }
-
-
- for (i = 0; !err && i < num; i++) {
- p = &msgs[i];
- err = do_address(adap, p->addr, p->flags & I2C_M_RD);
- if (err || !p->len)
- continue;
- if (p->flags & I2C_M_RD)
- err = i2c_read(adap, p->buf, p->len);
- else
- err = i2c_write(adap, p->buf, p->len);
- }
-
- /* Return the number of messages processed, or the error code. */
- if (err == 0)
- err = num;
-
- return err;
-}
-
-static u32
-platform_func(struct i2c_adapter *adap)
-{
- pr_debug("platform_algo\n");
- return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
-}
-
-static const struct i2c_algorithm platform_algo = {
- .master_xfer = platform_xfer,
- .functionality = platform_func,
-};
-
-static void i2c_platform_setup(struct i2c_platform_data *priv)
-{
- pr_debug("i2c_platform_setup\n");
-
- pic32_bus_writel(500, priv->base + PIC32_I2CxBRG);
- pic32_bus_writel(I2CCON_ON, priv->base + PIC32_I2CxCONCLR);
- pic32_bus_writel(I2CCON_ON, priv->base + PIC32_I2CxCONSET);
- pic32_bus_writel((I2CSTAT_BCL | I2CSTAT_IWCOL),
- (priv->base + PIC32_I2CxSTATCLR));
-}
-
-static void i2c_platform_disable(struct i2c_platform_data *priv)
-{
- pr_debug("i2c_platform_disable\n");
-}
-
-static int i2c_platform_probe(struct platform_device *pdev)
-{
- struct i2c_platform_data *priv;
- struct resource *r;
- int ret;
-
- pr_debug("i2c_platform_probe\n");
- r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!r)
- return -ENODEV;
-
- priv = devm_kzalloc(&pdev->dev, sizeof(struct i2c_platform_data),
- GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
-
- /* FIXME: need to allocate resource in PIC32 space */
-#if 0
- priv->base = bus_request_region(r->start, resource_size(r),
- pdev->name);
-#else
- priv->base = r->start;
-#endif
- if (!priv->base)
- return -EBUSY;
-
- priv->xfer_timeout = 200;
- priv->ack_timeout = 200;
- priv->ctl_timeout = 200;
-
- priv->adap.nr = pdev->id;
- priv->adap.algo = &platform_algo;
- priv->adap.algo_data = priv;
- priv->adap.dev.parent = &pdev->dev;
- strlcpy(priv->adap.name, "PIC32 I2C", sizeof(priv->adap.name));
-
- i2c_platform_setup(priv);
-
- ret = i2c_add_numbered_adapter(&priv->adap);
- if (ret) {
- i2c_platform_disable(priv);
- return ret;
- }
-
- platform_set_drvdata(pdev, priv);
- return 0;
-}
-
-static int i2c_platform_remove(struct platform_device *pdev)
-{
- struct i2c_platform_data *priv = platform_get_drvdata(pdev);
-
- pr_debug("i2c_platform_remove\n");
- platform_set_drvdata(pdev, NULL);
- i2c_del_adapter(&priv->adap);
- i2c_platform_disable(priv);
- return 0;
-}
-
-#ifdef CONFIG_PM
-static int
-i2c_platform_suspend(struct platform_device *pdev, pm_message_t state)
-{
- struct i2c_platform_data *priv = platform_get_drvdata(pdev);
-
- dev_dbg(&pdev->dev, "i2c_platform_disable\n");
- i2c_platform_disable(priv);
-
- return 0;
-}
-
-static int
-i2c_platform_resume(struct platform_device *pdev)
-{
- struct i2c_platform_data *priv = platform_get_drvdata(pdev);
-
- dev_dbg(&pdev->dev, "i2c_platform_setup\n");
- i2c_platform_setup(priv);
-
- return 0;
-}
-#else
-#define i2c_platform_suspend NULL
-#define i2c_platform_resume NULL
-#endif
-
-static struct platform_driver i2c_platform_driver = {
- .driver = {
- .name = "i2c_pic32",
- .owner = THIS_MODULE,
- },
- .probe = i2c_platform_probe,
- .remove = i2c_platform_remove,
- .suspend = i2c_platform_suspend,
- .resume = i2c_platform_resume,
-};
-
-static int __init
-i2c_platform_init(void)
-{
- pr_debug("i2c_platform_init\n");
- return platform_driver_register(&i2c_platform_driver);
-}
-
-static void __exit
-i2c_platform_exit(void)
-{
- pr_debug("i2c_platform_exit\n");
- platform_driver_unregister(&i2c_platform_driver);
-}
-
-MODULE_AUTHOR("Chris Dearman, MIPS Technologies INC.");
-MODULE_DESCRIPTION("PIC32 I2C driver");
-MODULE_LICENSE("GPL");
-
-module_init(i2c_platform_init);
-module_exit(i2c_platform_exit);
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
index 37fe8e7..d3ed15b 100644
--- a/arch/mips/pci/pci-lantiq.c
+++ b/arch/mips/pci/pci-lantiq.c
@@ -215,17 +215,12 @@
pci_clear_flags(PCI_PROBE_ONLY);
- res_cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
res_bridge = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- if (!res_cfg || !res_bridge) {
- dev_err(&pdev->dev, "missing memory resources\n");
- return -EINVAL;
- }
-
ltq_pci_membase = devm_ioremap_resource(&pdev->dev, res_bridge);
if (IS_ERR(ltq_pci_membase))
return PTR_ERR(ltq_pci_membase);
+ res_cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
ltq_pci_mapped_cfg = devm_ioremap_resource(&pdev->dev, res_cfg);
if (IS_ERR(ltq_pci_mapped_cfg))
return PTR_ERR(ltq_pci_mapped_cfg);
diff --git a/arch/mips/pmcs-msp71xx/msp_irq.c b/arch/mips/pmcs-msp71xx/msp_irq.c
index f914c75..8d53d7a 100644
--- a/arch/mips/pmcs-msp71xx/msp_irq.c
+++ b/arch/mips/pmcs-msp71xx/msp_irq.c
@@ -16,6 +16,7 @@
#include <linux/time.h>
#include <asm/irq_cpu.h>
+#include <asm/setup.h>
#include <msp_int.h>
diff --git a/arch/mips/pmcs-msp71xx/msp_irq_cic.c b/arch/mips/pmcs-msp71xx/msp_irq_cic.c
index b8df2f7..1207ec4 100644
--- a/arch/mips/pmcs-msp71xx/msp_irq_cic.c
+++ b/arch/mips/pmcs-msp71xx/msp_irq_cic.c
@@ -131,11 +131,11 @@
int cpu;
unsigned long flags;
unsigned int mtflags;
- unsigned long imask = (1 << (irq - MSP_CIC_INTBASE));
+ unsigned long imask = (1 << (d->irq - MSP_CIC_INTBASE));
volatile u32 *cic_mask = (volatile u32 *)CIC_VPE0_MSK_REG;
/* timer balancing should be disabled in kernel code */
- BUG_ON(irq == MSP_INT_VPE0_TIMER || irq == MSP_INT_VPE1_TIMER);
+ BUG_ON(d->irq == MSP_INT_VPE0_TIMER || d->irq == MSP_INT_VPE1_TIMER);
LOCK_CORE(flags, mtflags);
/* enable if any of each VPE's TCs require this IRQ */
diff --git a/arch/mips/sibyte/Makefile b/arch/mips/sibyte/Makefile
index c8ed2c8..455c40d 100644
--- a/arch/mips/sibyte/Makefile
+++ b/arch/mips/sibyte/Makefile
@@ -25,3 +25,4 @@
obj-$(CONFIG_SIBYTE_SENTOSA) += swarm/
obj-$(CONFIG_SIBYTE_SWARM) += swarm/
obj-$(CONFIG_SIBYTE_BIGSUR) += swarm/
+obj-$(CONFIG_SIBYTE_LITTLESUR) += swarm/
diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig
index 63392f4..d200888 100644
--- a/arch/powerpc/configs/pseries_le_defconfig
+++ b/arch/powerpc/configs/pseries_le_defconfig
@@ -48,7 +48,6 @@
CONFIG_IRQ_ALL_CPUS=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_CMA=y
CONFIG_PPC_64K_PAGES=y
CONFIG_PPC_SUBPAGE_PROT=y
CONFIG_SCHED_SMT=y
@@ -138,6 +137,7 @@
CONFIG_NETPOLL_TRAP=y
CONFIG_TUN=m
CONFIG_VIRTIO_NET=m
+CONFIG_VHOST_NET=m
CONFIG_VORTEX=y
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -303,4 +303,9 @@
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_NX=y
CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM_BOOK3S_64=m
+CONFIG_KVM_BOOK3S_64_HV=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 3b260ef..ca07f9c 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -71,9 +71,10 @@
#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */
#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */
-#define EEH_PE_RESET (1 << 2) /* PE reset in progress */
+#define EEH_PE_CFG_BLOCKED (1 << 2) /* Block config access */
#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
+#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
struct eeh_pe {
int type; /* PE type: PHB/Bus/Device */
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index 0bb2372..8bf1b63 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -34,7 +34,7 @@
do { \
(regs)->result = 0; \
(regs)->nip = __ip; \
- (regs)->gpr[1] = *(unsigned long *)__get_SP(); \
+ (regs)->gpr[1] = current_stack_pointer(); \
asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
} while (0)
#endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fe3f948..c998279 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1265,8 +1265,7 @@
#define proc_trap() asm volatile("trap")
-#define __get_SP() ({unsigned long sp; \
- asm volatile("mr %0,1": "=r" (sp)); sp;})
+extern unsigned long current_stack_pointer(void);
extern unsigned long scom970_read(unsigned int address);
extern void scom970_write(unsigned int address, unsigned long value);
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 6fa2708..6240698 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -19,7 +19,7 @@
/* ftrace syscalls requires exporting the sys_call_table */
#ifdef CONFIG_FTRACE_SYSCALLS
-extern const unsigned long *sys_call_table;
+extern const unsigned long sys_call_table[];
#endif /* CONFIG_FTRACE_SYSCALLS */
static inline long syscall_get_nr(struct task_struct *task,
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index adac9dc..484b2d4 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -53,9 +53,16 @@
#else
struct page *page;
int node = dev_to_node(dev);
+#ifdef CONFIG_FSL_SOC
u64 pfn = get_pfn_limit(dev);
int zone;
+ /*
+ * This code should be OK on other platforms, but we have drivers that
+ * don't set coherent_dma_mask. As a workaround we just ifdef it. This
+ * whole routine needs some serious cleanup.
+ */
+
zone = dma_pfn_limit_to_zone(pfn);
if (zone < 0) {
dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
@@ -73,6 +80,7 @@
break;
#endif
};
+#endif /* CONFIG_FSL_SOC */
/* ignore region specifiers */
flag &= ~(__GFP_HIGHMEM);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index d543e41..2248a19 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -257,6 +257,13 @@
struct eeh_dev *edev, *tmp;
size_t *plen = flag;
+ /* If the PE's config space is blocked, 0xFF's will be
+ * returned. It's pointless to collect the log in this
+ * case.
+ */
+ if (pe->state & EEH_PE_CFG_BLOCKED)
+ return NULL;
+
eeh_pe_for_each_dev(pe, edev, tmp)
*plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
EEH_PCI_REGS_LOG_LEN - *plen);
@@ -673,18 +680,18 @@
switch (state) {
case pcie_deassert_reset:
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
- eeh_pe_state_clear(pe, EEH_PE_RESET);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
break;
case pcie_hot_reset:
- eeh_pe_state_mark(pe, EEH_PE_RESET);
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_HOT);
break;
case pcie_warm_reset:
- eeh_pe_state_mark(pe, EEH_PE_RESET);
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
break;
default:
- eeh_pe_state_clear(pe, EEH_PE_RESET);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
return -EINVAL;
};
@@ -1523,7 +1530,7 @@
switch (option) {
case EEH_RESET_DEACTIVATE:
ret = eeh_ops->reset(pe, option);
- eeh_pe_state_clear(pe, EEH_PE_RESET);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
if (ret)
break;
@@ -1538,7 +1545,7 @@
*/
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
- eeh_pe_state_mark(pe, EEH_PE_RESET);
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
ret = eeh_ops->reset(pe, option);
break;
default:
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 3fd514f..6535936 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -528,13 +528,13 @@
eeh_pe_dev_traverse(pe, eeh_report_error, &result);
/* Issue reset */
- eeh_pe_state_mark(pe, EEH_PE_RESET);
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
ret = eeh_reset_pe(pe);
if (ret) {
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_CFG_BLOCKED);
return ret;
}
- eeh_pe_state_clear(pe, EEH_PE_RESET);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
/* Unfreeze the PE */
ret = eeh_clear_pe_frozen_state(pe, true);
@@ -601,10 +601,10 @@
* config accesses. So we prefer to block them. However, controlled
* PCI config accesses initiated from EEH itself are allowed.
*/
- eeh_pe_state_mark(pe, EEH_PE_RESET);
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
rc = eeh_reset_pe(pe);
if (rc) {
- eeh_pe_state_clear(pe, EEH_PE_RESET);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
return rc;
}
@@ -613,7 +613,7 @@
/* Restore PE */
eeh_ops->configure_bridge(pe);
eeh_pe_restore_bars(pe);
- eeh_pe_state_clear(pe, EEH_PE_RESET);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
/* Clear frozen state */
rc = eeh_clear_pe_frozen_state(pe, false);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 53dd091..5a63e2b0 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -525,7 +525,7 @@
pe->state |= state;
/* Offline PCI devices if applicable */
- if (state != EEH_PE_ISOLATED)
+ if (!(state & EEH_PE_ISOLATED))
return NULL;
eeh_pe_for_each_dev(pe, edev, tmp) {
@@ -534,6 +534,10 @@
pdev->error_state = pci_channel_io_frozen;
}
+ /* Block PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state |= EEH_PE_CFG_BLOCKED;
+
return NULL;
}
@@ -611,6 +615,10 @@
pdev->error_state = pci_channel_io_normal;
}
+ /* Unblock PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state &= ~EEH_PE_CFG_BLOCKED;
+
return NULL;
}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 050f79a..72e783e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1270,11 +1270,6 @@
addi r3,r1,STACK_FRAME_OVERHEAD
bl hmi_exception_realmode
/* Windup the stack. */
- /* Clear MSR_RI before setting SRR0 and SRR1. */
- li r0,MSR_RI
- mfmsr r9 /* get MSR value */
- andc r9,r9,r0
- mtmsrd r9,1 /* Clear MSR_RI */
/* Move original HSRR0 and HSRR1 into the respective regs */
ld r9,_MSR(r1)
mtspr SPRN_HSRR1,r9
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8eb857f..c143835 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -466,7 +466,7 @@
#ifdef CONFIG_DEBUG_STACKOVERFLOW
long sp;
- sp = __get_SP() & (THREAD_SIZE-1);
+ sp = current_stack_pointer() & (THREAD_SIZE-1);
/* check for stack overflow: is there less than 2KB free? */
if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 7ce26d4..0d43219 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -114,3 +114,7 @@
mtlr r0
mr r3,r4
blr
+
+_GLOBAL(current_stack_pointer)
+ PPC_LL r3,0(r1)
+ blr
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index c4dfff6..202963e 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -41,3 +41,5 @@
#ifdef CONFIG_EPAPR_PARAVIRT
EXPORT_SYMBOL(epapr_hypercall_start);
#endif
+
+EXPORT_SYMBOL(current_stack_pointer);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index aa1df89..923cd2d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1545,7 +1545,7 @@
tsk = current;
if (sp == 0) {
if (tsk == current)
- asm("mr %0,1" : "=r" (sp));
+ sp = current_stack_pointer();
else
sp = tsk->thread.ksp;
}
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index c168337..7c55b86 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -66,6 +66,11 @@
return PCIBIOS_DEVICE_NOT_FOUND;
if (!config_access_valid(pdn, where))
return PCIBIOS_BAD_REGISTER_NUMBER;
+#ifdef CONFIG_EEH
+ if (pdn->edev && pdn->edev->pe &&
+ (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
+ return PCIBIOS_SET_FAILED;
+#endif
addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
buid = pdn->phb->buid;
@@ -90,9 +95,6 @@
struct device_node *busdn, *dn;
struct pci_dn *pdn;
bool found = false;
-#ifdef CONFIG_EEH
- struct eeh_dev *edev;
-#endif
int ret;
/* Search only direct children of the bus */
@@ -109,11 +111,6 @@
if (!found)
return PCIBIOS_DEVICE_NOT_FOUND;
-#ifdef CONFIG_EEH
- edev = of_node_to_eeh_dev(dn);
- if (edev && edev->pe && edev->pe->state & EEH_PE_RESET)
- return PCIBIOS_DEVICE_NOT_FOUND;
-#endif
ret = rtas_read_config(pdn, where, size, val);
if (*val == EEH_IO_ERROR_VALUE(size) &&
@@ -132,6 +129,11 @@
return PCIBIOS_DEVICE_NOT_FOUND;
if (!config_access_valid(pdn, where))
return PCIBIOS_BAD_REGISTER_NUMBER;
+#ifdef CONFIG_EEH
+ if (pdn->edev && pdn->edev->pe &&
+ (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
+ return PCIBIOS_SET_FAILED;
+#endif
addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
buid = pdn->phb->buid;
@@ -155,10 +157,6 @@
struct device_node *busdn, *dn;
struct pci_dn *pdn;
bool found = false;
-#ifdef CONFIG_EEH
- struct eeh_dev *edev;
-#endif
- int ret;
/* Search only direct children of the bus */
busdn = pci_bus_to_OF_node(bus);
@@ -173,14 +171,8 @@
if (!found)
return PCIBIOS_DEVICE_NOT_FOUND;
-#ifdef CONFIG_EEH
- edev = of_node_to_eeh_dev(dn);
- if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET))
- return PCIBIOS_DEVICE_NOT_FOUND;
-#endif
- ret = rtas_write_config(pdn, where, size, val);
- return ret;
+ return rtas_write_config(pdn, where, size, val);
}
static struct pci_ops rtas_pci_ops = {
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index cd07d79..4f3cfe1 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -522,36 +522,36 @@
smp_release_cpus();
#endif
- printk("Starting Linux PPC64 %s\n", init_utsname()->version);
+ pr_info("Starting Linux PPC64 %s\n", init_utsname()->version);
- printk("-----------------------------------------------------\n");
- printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
- printk("phys_mem_size = 0x%llx\n", memblock_phys_mem_size());
+ pr_info("-----------------------------------------------------\n");
+ pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
+ pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size());
if (ppc64_caches.dline_size != 0x80)
- printk("dcache_line_size = 0x%x\n", ppc64_caches.dline_size);
+ pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size);
if (ppc64_caches.iline_size != 0x80)
- printk("icache_line_size = 0x%x\n", ppc64_caches.iline_size);
+ pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size);
- printk("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features);
- printk(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE);
- printk(" always = 0x%016lx\n", CPU_FTRS_ALWAYS);
- printk("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features,
+ pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features);
+ pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE);
+ pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS);
+ pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features,
cur_cpu_spec->cpu_user_features2);
- printk("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
- printk("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+ pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
+ pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
#ifdef CONFIG_PPC_STD_MMU_64
if (htab_address)
- printk("htab_address = 0x%p\n", htab_address);
+ pr_info("htab_address = 0x%p\n", htab_address);
- printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
+ pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
#endif
if (PHYSICAL_START > 0)
- printk("physical_start = 0x%llx\n",
+ pr_info("physical_start = 0x%llx\n",
(unsigned long long)PHYSICAL_START);
- printk("-----------------------------------------------------\n");
+ pr_info("-----------------------------------------------------\n");
DBG(" <- setup_system()\n");
}
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 3d30ef1..ea43a347 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -50,7 +50,7 @@
{
unsigned long sp;
- asm("mr %0,1" : "=r" (sp));
+ sp = current_stack_pointer();
save_context_stack(trace, sp, current, 1);
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 649666d..e5236c2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -8,6 +8,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) "numa: " fmt
+
#include <linux/threads.h>
#include <linux/bootmem.h>
#include <linux/init.h>
@@ -1153,6 +1155,22 @@
}
early_param("numa", early_numa);
+static bool topology_updates_enabled = true;
+
+static int __init early_topology_updates(char *p)
+{
+ if (!p)
+ return 0;
+
+ if (!strcmp(p, "off")) {
+ pr_info("Disabling topology updates\n");
+ topology_updates_enabled = false;
+ }
+
+ return 0;
+}
+early_param("topology_updates", early_topology_updates);
+
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Find the node associated with a hot added memory section for
@@ -1442,8 +1460,11 @@
long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
u64 flags = 1;
int hwcpu = get_hard_smp_processor_id(cpu);
+ int i;
rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
+ for (i = 0; i < 6; i++)
+ retbuf[i] = cpu_to_be64(retbuf[i]);
vphn_unpack_associativity(retbuf, associativity);
return rc;
@@ -1539,6 +1560,9 @@
struct device *dev;
int weight, new_nid, i = 0;
+ if (!prrn_enabled && !vphn_enabled)
+ return 0;
+
weight = cpumask_weight(&cpu_associativity_changes_mask);
if (!weight)
return 0;
@@ -1592,6 +1616,15 @@
cpu = cpu_last_thread_sibling(cpu);
}
+ pr_debug("Topology update for the following CPUs:\n");
+ if (cpumask_weight(&updated_cpus)) {
+ for (ud = &updates[0]; ud; ud = ud->next) {
+ pr_debug("cpu %d moving from node %d "
+ "to %d\n", ud->cpu,
+ ud->old_nid, ud->new_nid);
+ }
+ }
+
/*
* In cases where we have nothing to update (because the updates list
* is too short or because the new topology is same as the old one),
@@ -1800,8 +1833,12 @@
static int topology_update_init(void)
{
- start_topology_update();
- proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops);
+ /* Do not poll for changes if disabled at boot */
+ if (topology_updates_enabled)
+ start_topology_update();
+
+ if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
+ return -ENOMEM;
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 426814a..eba9cb1 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -373,7 +373,7 @@
* moving forward, we have to return operational
* state during PE reset.
*/
- if (pe->state & EEH_PE_RESET) {
+ if (pe->state & EEH_PE_CFG_BLOCKED) {
result = (EEH_STATE_MMIO_ACTIVE |
EEH_STATE_DMA_ACTIVE |
EEH_STATE_MMIO_ENABLED |
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3e89cbf..1d19e79 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -169,6 +169,26 @@
}
/*
+ * If the PE contains any one of following adapters, the
+ * PCI config space can't be accessed when dumping EEH log.
+ * Otherwise, we will run into fenced PHB caused by shortage
+ * of outbound credits in the adapter. The PCI config access
+ * should be blocked until PE reset. MMIO access is dropped
+ * by hardware certainly. In order to drop PCI config requests,
+ * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
+ * will be checked in the backend for PE state retrival. If
+ * the PE becomes frozen for the first time and the flag has
+ * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
+ * that PE to block its config space.
+ *
+ * Broadcom Austin 4-ports NICs (14e4:1657)
+ * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
+ */
+ if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) ||
+ (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e))
+ edev->pe->state |= EEH_PE_CFG_RESTRICTED;
+
+ /*
* Cache the PE primary bus, which can't be fetched when
* full hotplug is in progress. In that case, all child
* PCI devices of the PE are expected to be removed prior
@@ -383,6 +403,39 @@
return ret;
}
+static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
+{
+ struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+
+ if (!edev || !edev->pe)
+ return false;
+
+ if (edev->pe->state & EEH_PE_CFG_BLOCKED)
+ return true;
+
+ return false;
+}
+
+static int powernv_eeh_read_config(struct device_node *dn,
+ int where, int size, u32 *val)
+{
+ if (powernv_eeh_cfg_blocked(dn)) {
+ *val = 0xFFFFFFFF;
+ return PCIBIOS_SET_FAILED;
+ }
+
+ return pnv_pci_cfg_read(dn, where, size, val);
+}
+
+static int powernv_eeh_write_config(struct device_node *dn,
+ int where, int size, u32 val)
+{
+ if (powernv_eeh_cfg_blocked(dn))
+ return PCIBIOS_SET_FAILED;
+
+ return pnv_pci_cfg_write(dn, where, size, val);
+}
+
/**
* powernv_eeh_next_error - Retrieve next EEH error to handle
* @pe: Affected PE
@@ -440,8 +493,8 @@
.get_log = powernv_eeh_get_log,
.configure_bridge = powernv_eeh_configure_bridge,
.err_inject = powernv_eeh_err_inject,
- .read_config = pnv_pci_cfg_read,
- .write_config = pnv_pci_cfg_write,
+ .read_config = powernv_eeh_read_config,
+ .write_config = powernv_eeh_write_config,
.next_error = powernv_eeh_next_error,
.restore_config = powernv_eeh_restore_config
};
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index b642b05..d019b08 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -194,6 +194,27 @@
* fwnmi area at 0x7000 to provide the glue space to OPAL
*/
glue = 0x7000;
+
+ /*
+ * Check if we are running on newer firmware that exports
+ * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
+ * the HMI interrupt and we catch it directly in Linux.
+ *
+ * For older firmware (i.e currently released POWER8 System Firmware
+ * as of today <= SV810_087), we fallback to old behavior and let OPAL
+ * patch the HMI vector and handle it inside OPAL firmware.
+ *
+ * For newer firmware (in development/yet to be released) we will
+ * start catching/handling HMI directly in Linux.
+ */
+ if (!opal_check_token(OPAL_HANDLE_HMI)) {
+ pr_info("opal: Old firmware detected, OPAL handles HMIs.\n");
+ opal_register_exception_handler(
+ OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
+ 0, glue);
+ glue += 128;
+ }
+
opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
#endif
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b3ca77d..b2187d0 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -505,7 +505,7 @@
edev = of_node_to_eeh_dev(dn);
if (edev) {
if (edev->pe &&
- (edev->pe->state & EEH_PE_RESET))
+ (edev->pe->state & EEH_PE_CFG_BLOCKED))
return false;
if (edev->mode & EEH_DEV_REMOVED)
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index fdf01b6..6ad83bd 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -25,11 +25,11 @@
#include <asm/rtas.h>
struct cc_workarea {
- u32 drc_index;
- u32 zero;
- u32 name_offset;
- u32 prop_length;
- u32 prop_offset;
+ __be32 drc_index;
+ __be32 zero;
+ __be32 name_offset;
+ __be32 prop_length;
+ __be32 prop_offset;
};
void dlpar_free_cc_property(struct property *prop)
@@ -49,11 +49,11 @@
if (!prop)
return NULL;
- name = (char *)ccwa + ccwa->name_offset;
+ name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
prop->name = kstrdup(name, GFP_KERNEL);
- prop->length = ccwa->prop_length;
- value = (char *)ccwa + ccwa->prop_offset;
+ prop->length = be32_to_cpu(ccwa->prop_length);
+ value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset);
prop->value = kmemdup(value, prop->length, GFP_KERNEL);
if (!prop->value) {
dlpar_free_cc_property(prop);
@@ -79,7 +79,7 @@
if (!dn)
return NULL;
- name = (char *)ccwa + ccwa->name_offset;
+ name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
dn->full_name = kasprintf(GFP_KERNEL, "%s/%s", path, name);
if (!dn->full_name) {
kfree(dn);
@@ -126,7 +126,7 @@
#define CALL_AGAIN -2
#define ERR_CFG_USE -9003
-struct device_node *dlpar_configure_connector(u32 drc_index,
+struct device_node *dlpar_configure_connector(__be32 drc_index,
struct device_node *parent)
{
struct device_node *dn;
@@ -414,7 +414,7 @@
if (!parent)
return -ENODEV;
- dn = dlpar_configure_connector(drc_index, parent);
+ dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
if (!dn)
return -EINVAL;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index b174fa7..5c375f9 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -247,7 +247,7 @@
unsigned int cpu;
cpumask_var_t candidate_mask, tmp;
int err = -ENOSPC, len, nthreads, i;
- const u32 *intserv;
+ const __be32 *intserv;
intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
if (!intserv)
@@ -293,7 +293,7 @@
for_each_cpu(cpu, tmp) {
BUG_ON(cpu_present(cpu));
set_cpu_present(cpu, true);
- set_hard_smp_processor_id(cpu, *intserv++);
+ set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
}
err = 0;
out_unlock:
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index de1ec54..e32e009 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -30,7 +30,6 @@
#include <linux/mm.h>
#include <linux/memblock.h>
#include <linux/spinlock.h>
-#include <linux/sched.h> /* for show_stack */
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
@@ -168,7 +167,7 @@
printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
printk("\ttcenum = 0x%llx\n", (u64)tcenum);
printk("\ttce val = 0x%llx\n", tce );
- show_stack(current, (unsigned long *)__get_SP());
+ dump_stack();
}
tcenum++;
@@ -257,7 +256,7 @@
printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
printk("\tnpages = 0x%llx\n", (u64)npages);
printk("\ttce[0] val = 0x%llx\n", tcep[0]);
- show_stack(current, (unsigned long *)__get_SP());
+ dump_stack();
}
return ret;
}
@@ -273,7 +272,7 @@
printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
printk("\ttcenum = 0x%llx\n", (u64)tcenum);
- show_stack(current, (unsigned long *)__get_SP());
+ dump_stack();
}
tcenum++;
@@ -292,7 +291,7 @@
printk("\trc = %lld\n", rc);
printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
printk("\tnpages = 0x%llx\n", (u64)npages);
- show_stack(current, (unsigned long *)__get_SP());
+ dump_stack();
}
}
@@ -307,7 +306,7 @@
printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
printk("\ttcenum = 0x%llx\n", (u64)tcenum);
- show_stack(current, (unsigned long *)__get_SP());
+ dump_stack();
}
return tce_ret;
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 361add6..1796c54 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -56,7 +56,8 @@
/* Dynamic logical Partitioning/Mobility */
extern void dlpar_free_cc_nodes(struct device_node *);
extern void dlpar_free_cc_property(struct property *);
-extern struct device_node *dlpar_configure_connector(u32, struct device_node *);
+extern struct device_node *dlpar_configure_connector(__be32,
+ struct device_node *);
extern int dlpar_attach_node(struct device_node *);
extern int dlpar_detach_node(struct device_node *);
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 0c75214..73b64c7 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -145,59 +145,64 @@
#ifdef CONFIG_MSI_BITMAP_SELFTEST
-#define check(x) \
- if (!(x)) printk("msi_bitmap: test failed at line %d\n", __LINE__);
-
static void __init test_basics(void)
{
struct msi_bitmap bmp;
- int i, size = 512;
+ int rc, i, size = 512;
/* Can't allocate a bitmap of 0 irqs */
- check(msi_bitmap_alloc(&bmp, 0, NULL) != 0);
+ WARN_ON(msi_bitmap_alloc(&bmp, 0, NULL) == 0);
/* of_node may be NULL */
- check(0 == msi_bitmap_alloc(&bmp, size, NULL));
+ WARN_ON(msi_bitmap_alloc(&bmp, size, NULL));
/* Should all be free by default */
- check(0 == bitmap_find_free_region(bmp.bitmap, size,
- get_count_order(size)));
+ WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
/* With no node, there's no msi-available-ranges, so expect > 0 */
- check(msi_bitmap_reserve_dt_hwirqs(&bmp) > 0);
+ WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
/* Should all still be free */
- check(0 == bitmap_find_free_region(bmp.bitmap, size,
- get_count_order(size)));
+ WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
/* Check we can fill it up and then no more */
for (i = 0; i < size; i++)
- check(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0);
+ WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0);
- check(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0);
+ WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0);
/* Should all be allocated */
- check(bitmap_find_free_region(bmp.bitmap, size, 0) < 0);
+ WARN_ON(bitmap_find_free_region(bmp.bitmap, size, 0) >= 0);
/* And if we free one we can then allocate another */
msi_bitmap_free_hwirqs(&bmp, size / 2, 1);
- check(msi_bitmap_alloc_hwirqs(&bmp, 1) == size / 2);
+ WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) != size / 2);
+
+ /* Free most of them for the alignment tests */
+ msi_bitmap_free_hwirqs(&bmp, 3, size - 3);
/* Check we get a naturally aligned offset */
- check(msi_bitmap_alloc_hwirqs(&bmp, 2) % 2 == 0);
- check(msi_bitmap_alloc_hwirqs(&bmp, 4) % 4 == 0);
- check(msi_bitmap_alloc_hwirqs(&bmp, 8) % 8 == 0);
- check(msi_bitmap_alloc_hwirqs(&bmp, 9) % 16 == 0);
- check(msi_bitmap_alloc_hwirqs(&bmp, 3) % 4 == 0);
- check(msi_bitmap_alloc_hwirqs(&bmp, 7) % 8 == 0);
- check(msi_bitmap_alloc_hwirqs(&bmp, 121) % 128 == 0);
+ rc = msi_bitmap_alloc_hwirqs(&bmp, 2);
+ WARN_ON(rc < 0 && rc % 2 != 0);
+ rc = msi_bitmap_alloc_hwirqs(&bmp, 4);
+ WARN_ON(rc < 0 && rc % 4 != 0);
+ rc = msi_bitmap_alloc_hwirqs(&bmp, 8);
+ WARN_ON(rc < 0 && rc % 8 != 0);
+ rc = msi_bitmap_alloc_hwirqs(&bmp, 9);
+ WARN_ON(rc < 0 && rc % 16 != 0);
+ rc = msi_bitmap_alloc_hwirqs(&bmp, 3);
+ WARN_ON(rc < 0 && rc % 4 != 0);
+ rc = msi_bitmap_alloc_hwirqs(&bmp, 7);
+ WARN_ON(rc < 0 && rc % 8 != 0);
+ rc = msi_bitmap_alloc_hwirqs(&bmp, 121);
+ WARN_ON(rc < 0 && rc % 128 != 0);
msi_bitmap_free(&bmp);
- /* Clients may check bitmap == NULL for "not-allocated" */
- check(bmp.bitmap == NULL);
+ /* Clients may WARN_ON bitmap == NULL for "not-allocated" */
+ WARN_ON(bmp.bitmap != NULL);
kfree(bmp.bitmap);
}
@@ -219,14 +224,13 @@
of_node_init(&of_node);
of_node.full_name = node_name;
- check(0 == msi_bitmap_alloc(&bmp, size, &of_node));
+ WARN_ON(msi_bitmap_alloc(&bmp, size, &of_node));
/* No msi-available-ranges, so expect > 0 */
- check(msi_bitmap_reserve_dt_hwirqs(&bmp) > 0);
+ WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
/* Should all still be free */
- check(0 == bitmap_find_free_region(bmp.bitmap, size,
- get_count_order(size)));
+ WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
/* Now create a fake msi-available-ranges property */
@@ -240,11 +244,11 @@
of_node.properties = ∝
/* msi-available-ranges, so expect == 0 */
- check(msi_bitmap_reserve_dt_hwirqs(&bmp) == 0);
+ WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp));
/* Check we got the expected result */
- check(0 == bitmap_parselist(expected_str, expected, size));
- check(bitmap_equal(expected, bmp.bitmap, size));
+ WARN_ON(bitmap_parselist(expected_str, expected, size));
+ WARN_ON(!bitmap_equal(expected, bmp.bitmap, size));
msi_bitmap_free(&bmp);
kfree(bmp.bitmap);
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 940ac49..4197c89 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -286,7 +286,8 @@
#define __NR_seccomp 348
#define __NR_getrandom 349
#define __NR_memfd_create 350
-#define NR_syscalls 351
+#define __NR_bpf 351
+#define NR_syscalls 352
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index faf6caa..c4f7a3d 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -217,3 +217,4 @@
COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs)
COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags)
COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags)
+COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 6fe886a..9f7087f 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -359,3 +359,4 @@
SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp)
SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom)
SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */
+SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf)
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index 956f4f7..f6b3cd0 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -5,13 +5,13 @@
* Author(s): Jan Willeke,
*/
-#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/uprobes.h>
#include <linux/compat.h>
#include <linux/kdebug.h>
#include <asm/switch_to.h>
#include <asm/facility.h>
+#include <asm/kprobes.h>
#include <asm/dis.h>
#include "entry.h"
diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c
index c5d64a0..ae90e1a 100644
--- a/arch/s390/lib/probes.c
+++ b/arch/s390/lib/probes.c
@@ -4,7 +4,7 @@
* Copyright IBM Corp. 2014
*/
-#include <linux/kprobes.h>
+#include <asm/kprobes.h>
#include <asm/dis.h>
int probe_is_prohibited_opcode(u16 *insn)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 296b61a..1b79ca6 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -656,7 +656,7 @@
}
pgste_set_unlock(ptep, pgste);
out_pte:
- pte_unmap_unlock(*ptep, ptl);
+ pte_unmap_unlock(ptep, ptl);
}
EXPORT_SYMBOL_GPL(__gmap_zap);
@@ -943,7 +943,7 @@
}
if (!(pte_val(*ptep) & _PAGE_INVALID) &&
(pte_val(*ptep) & _PAGE_PROTECT)) {
- pte_unmap_unlock(*ptep, ptl);
+ pte_unmap_unlock(ptep, ptl);
if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
up_read(&mm->mmap_sem);
return -EFAULT;
@@ -974,7 +974,7 @@
pgste_val(new) |= PGSTE_UC_BIT;
pgste_set_unlock(ptep, new);
- pte_unmap_unlock(*ptep, ptl);
+ pte_unmap_unlock(ptep, ptl);
up_read(&mm->mmap_sem);
return 0;
}
diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h
index f346824..2e3a4ad 100644
--- a/arch/sparc/include/asm/oplib_64.h
+++ b/arch/sparc/include/asm/oplib_64.h
@@ -62,7 +62,8 @@
/* You must call prom_init() before using any of the library services,
* preferably as early as possible. Pass it the romvec pointer.
*/
-void prom_init(void *cif_handler, void *cif_stack);
+void prom_init(void *cif_handler);
+void prom_init_report(void);
/* Boot argument acquisition, returns the boot command line string. */
char *prom_getbootargs(void);
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index f5fffd8..29d64b1 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -48,6 +48,8 @@
#endif
#ifdef CONFIG_SPARC64
+void __init start_early_boot(void);
+
/* unaligned_64.c */
int handle_ldf_stq(u32 insn, struct pt_regs *regs);
void handle_ld_nf(u32 insn, struct pt_regs *regs);
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index ebaba61..88d322b 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -65,13 +65,10 @@
extern struct pause_patch_entry __pause_3insn_patch,
__pause_3insn_patch_end;
-void __init per_cpu_patch(void);
void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
struct sun4v_1insn_patch_entry *);
void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
struct sun4v_2insn_patch_entry *);
-void __init sun4v_patch(void);
-void __init boot_cpu_id_too_large(int cpu);
extern unsigned int dcache_parity_tl1_occurred;
extern unsigned int icache_parity_tl1_occurred;
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4fdeb80..3d61fca 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -672,14 +672,12 @@
sethi %hi(init_thread_union), %g6
or %g6, %lo(init_thread_union), %g6
ldx [%g6 + TI_TASK], %g4
- mov %sp, %l6
wr %g0, ASI_P, %asi
mov 1, %g1
sllx %g1, THREAD_SHIFT, %g1
sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
add %g6, %g1, %sp
- mov 0, %fp
/* Set per-cpu pointer initially to zero, this makes
* the boot-cpu use the in-kernel-image per-cpu areas
@@ -706,44 +704,14 @@
nop
#endif
- mov %l6, %o1 ! OpenPROM stack
call prom_init
mov %l7, %o0 ! OpenPROM cif handler
- /* Initialize current_thread_info()->cpu as early as possible.
- * In order to do that accurately we have to patch up the get_cpuid()
- * assembler sequences. And that, in turn, requires that we know
- * if we are on a Starfire box or not. While we're here, patch up
- * the sun4v sequences as well.
+ /* To create a one-register-window buffer between the kernel's
+ * initial stack and the last stack frame we use from the firmware,
+ * do the rest of the boot from a C helper function.
*/
- call check_if_starfire
- nop
- call per_cpu_patch
- nop
- call sun4v_patch
- nop
-
-#ifdef CONFIG_SMP
- call hard_smp_processor_id
- nop
- cmp %o0, NR_CPUS
- blu,pt %xcc, 1f
- nop
- call boot_cpu_id_too_large
- nop
- /* Not reached... */
-
-1:
-#else
- mov 0, %o0
-#endif
- sth %o0, [%g6 + TI_CPU]
-
- call prom_init_report
- nop
-
- /* Off we go.... */
- call start_kernel
+ call start_early_boot
nop
/* Not reached... */
diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S
index b7ddcdd..cdbfec2 100644
--- a/arch/sparc/kernel/hvtramp.S
+++ b/arch/sparc/kernel/hvtramp.S
@@ -109,7 +109,6 @@
sllx %g5, THREAD_SHIFT, %g5
sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
add %g6, %g5, %sp
- mov 0, %fp
call init_irqwork_curcpu
nop
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index e629b83..c38d19f 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -30,6 +30,7 @@
#include <linux/cpu.h>
#include <linux/initrd.h>
#include <linux/module.h>
+#include <linux/start_kernel.h>
#include <asm/io.h>
#include <asm/processor.h>
@@ -162,7 +163,7 @@
static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
-void __init per_cpu_patch(void)
+static void __init per_cpu_patch(void)
{
struct cpuid_patch_entry *p;
unsigned long ver;
@@ -254,7 +255,7 @@
}
}
-void __init sun4v_patch(void)
+static void __init sun4v_patch(void)
{
extern void sun4v_hvapi_init(void);
@@ -323,14 +324,25 @@
}
}
-#ifdef CONFIG_SMP
-void __init boot_cpu_id_too_large(int cpu)
+void __init start_early_boot(void)
{
- prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
- cpu, NR_CPUS);
- prom_halt();
+ int cpu;
+
+ check_if_starfire();
+ per_cpu_patch();
+ sun4v_patch();
+
+ cpu = hard_smp_processor_id();
+ if (cpu >= NR_CPUS) {
+ prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
+ cpu, NR_CPUS);
+ prom_halt();
+ }
+ current_thread_info()->cpu = cpu;
+
+ prom_init_report();
+ start_kernel();
}
-#endif
/* On Ultra, we support all of the v8 capabilities. */
unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S
index 737f8cb..88ede1d 100644
--- a/arch/sparc/kernel/trampoline_64.S
+++ b/arch/sparc/kernel/trampoline_64.S
@@ -109,10 +109,13 @@
brnz,pn %g1, 1b
nop
- sethi %hi(p1275buf), %g2
- or %g2, %lo(p1275buf), %g2
- ldx [%g2 + 0x10], %l2
- add %l2, -(192 + 128), %sp
+ /* Get onto temporary stack which will be in the locked
+ * kernel image.
+ */
+ sethi %hi(tramp_stack), %g1
+ or %g1, %lo(tramp_stack), %g1
+ add %g1, TRAMP_STACK_SIZE, %g1
+ sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
flushw
/* Setup the loop variables:
@@ -394,7 +397,6 @@
sllx %g5, THREAD_SHIFT, %g5
sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
add %g6, %g5, %sp
- mov 0, %fp
rdpr %pstate, %o1
or %o1, PSTATE_IE, %o1
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 1aed043..ae6ce38 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -160,6 +160,36 @@
return 1;
}
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, len, end;
+ unsigned long next, flags;
+ pgd_t *pgdp;
+ int nr = 0;
+
+ start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+
+ local_irq_save(flags);
+ pgdp = pgd_offset(mm, addr);
+ do {
+ pgd_t pgd = *pgdp;
+
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ break;
+ if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ break;
+ } while (pgdp++, addr = next, addr != end);
+ local_irq_restore(flags);
+
+ return nr;
+}
+
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
diff --git a/arch/sparc/prom/cif.S b/arch/sparc/prom/cif.S
index 9c86b4b..8050f38 100644
--- a/arch/sparc/prom/cif.S
+++ b/arch/sparc/prom/cif.S
@@ -11,11 +11,10 @@
.text
.globl prom_cif_direct
prom_cif_direct:
+ save %sp, -192, %sp
sethi %hi(p1275buf), %o1
or %o1, %lo(p1275buf), %o1
- ldx [%o1 + 0x0010], %o2 ! prom_cif_stack
- save %o2, -192, %sp
- ldx [%i1 + 0x0008], %l2 ! prom_cif_handler
+ ldx [%o1 + 0x0008], %l2 ! prom_cif_handler
mov %g4, %l0
mov %g5, %l1
mov %g6, %l3
diff --git a/arch/sparc/prom/init_64.c b/arch/sparc/prom/init_64.c
index d95db75..110b0d7 100644
--- a/arch/sparc/prom/init_64.c
+++ b/arch/sparc/prom/init_64.c
@@ -26,13 +26,13 @@
* It gets passed the pointer to the PROM vector.
*/
-extern void prom_cif_init(void *, void *);
+extern void prom_cif_init(void *);
-void __init prom_init(void *cif_handler, void *cif_stack)
+void __init prom_init(void *cif_handler)
{
phandle node;
- prom_cif_init(cif_handler, cif_stack);
+ prom_cif_init(cif_handler);
prom_chosen_node = prom_finddevice(prom_chosen_path);
if (!prom_chosen_node || (s32)prom_chosen_node == -1)
diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c
index b2340f0..545d8bb 100644
--- a/arch/sparc/prom/p1275.c
+++ b/arch/sparc/prom/p1275.c
@@ -20,7 +20,6 @@
struct {
long prom_callback; /* 0x00 */
void (*prom_cif_handler)(long *); /* 0x08 */
- unsigned long prom_cif_stack; /* 0x10 */
} p1275buf;
extern void prom_world(int);
@@ -52,5 +51,4 @@
void prom_cif_init(void *cif_handler, void *cif_stack)
{
p1275buf.prom_cif_handler = (void (*)(long *))cif_handler;
- p1275buf.prom_cif_stack = (unsigned long)cif_stack;
}
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index de8eebd..1acf605 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -330,8 +330,10 @@
size = pci->romsize + sizeof(*rom);
status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to alloc mem for rom\n");
return status;
+ }
memset(rom, 0, sizeof(*rom));
@@ -344,14 +346,18 @@
status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
PCI_VENDOR_ID, 1, &(rom->vendor));
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to read rom->vendor\n");
goto free_struct;
+ }
status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
PCI_DEVICE_ID, 1, &(rom->devid));
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to read rom->devid\n");
goto free_struct;
+ }
status = efi_early->call(pci->get_location, pci, &(rom->segment),
&(rom->bus), &(rom->device), &(rom->function));
@@ -432,8 +438,10 @@
size = pci->romsize + sizeof(*rom);
status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to alloc mem for rom\n");
return status;
+ }
rom->data.type = SETUP_PCI;
rom->data.len = size - sizeof(struct setup_data);
@@ -444,14 +452,18 @@
status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
PCI_VENDOR_ID, 1, &(rom->vendor));
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to read rom->vendor\n");
goto free_struct;
+ }
status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
PCI_DEVICE_ID, 1, &(rom->devid));
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to read rom->devid\n");
goto free_struct;
+ }
status = efi_early->call(pci->get_location, pci, &(rom->segment),
&(rom->bus), &(rom->device), &(rom->function));
@@ -538,8 +550,10 @@
EFI_LOADER_DATA,
size, (void **)&pci_handle);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table, "Failed to alloc mem for pci_handle\n");
return;
+ }
status = efi_call_early(locate_handle,
EFI_LOCATE_BY_PROTOCOL, &pci_proto,
@@ -1105,6 +1119,10 @@
memset(sdt, 0, sizeof(*sdt));
+ status = efi_parse_options(cmdline_ptr);
+ if (status != EFI_SUCCESS)
+ goto fail2;
+
status = handle_cmdline_files(sys_table, image,
(char *)(unsigned long)hdr->cmd_line_ptr,
"initrd=", hdr->initrd_addr_max,
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0ec241e..9b11757 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -81,24 +81,23 @@
*/
#define __efi_call_virt(f, args...) efi_call_virt(f, args)
-extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
- u32 type, u64 attribute);
+extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
+ u32 type, u64 attribute);
#endif /* CONFIG_X86_32 */
-extern int add_efi_memmap;
extern struct efi_scratch efi_scratch;
-extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
-extern int efi_memblock_x86_reserve_range(void);
-extern void efi_call_phys_prelog(void);
-extern void efi_call_phys_epilog(void);
-extern void efi_unmap_memmap(void);
-extern void efi_memory_uc(u64 addr, unsigned long size);
+extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
+extern int __init efi_memblock_x86_reserve_range(void);
+extern void __init efi_call_phys_prolog(void);
+extern void __init efi_call_phys_epilog(void);
+extern void __init efi_unmap_memmap(void);
+extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
extern void efi_sync_low_kernel_mappings(void);
-extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
-extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
+extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
+extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
extern void __init old_map_region(efi_memory_desc_t *md);
extern void __init runtime_code_page_mkexec(void);
extern void __init efi_runtime_mkexec(void);
@@ -162,16 +161,6 @@
extern bool efi_reboot_required(void);
#else
-/*
- * IF EFI is not configured, have the EFI calls return -ENOSYS.
- */
-#define efi_call0(_f) (-ENOSYS)
-#define efi_call1(_f, _a1) (-ENOSYS)
-#define efi_call2(_f, _a1, _a2) (-ENOSYS)
-#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS)
-#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS)
-#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS)
-#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS)
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
static inline bool efi_reboot_required(void)
{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7d603a7..6ed0c30 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -989,6 +989,20 @@
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}
+static inline u64 get_canonical(u64 la)
+{
+ return ((int64_t)la << 16) >> 16;
+}
+
+static inline bool is_noncanonical_address(u64 la)
+{
+#ifdef CONFIG_X86_64
+ return get_canonical(la) != la;
+#else
+ return false;
+#endif
+}
+
#define TSS_IOPB_BASE_OFFSET 0x66
#define TSS_BASE_SIZE 0x68
#define TSS_IOPB_SIZE (65536 / 8)
@@ -1050,7 +1064,7 @@
unsigned long address);
void kvm_define_shared_msr(unsigned index, u32 msr);
-void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 0e79420..990a2fe 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
@@ -114,6 +115,7 @@
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
{ EXIT_REASON_INVD, "INVD" }, \
+ { EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a46207a..749f9fa 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -504,11 +504,6 @@
masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
}
-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
- register_address_increment(ctxt, &ctxt->_eip, rel);
-}
-
static u32 desc_limit_scaled(struct desc_struct *desc)
{
u32 limit = get_desc_limit(desc);
@@ -569,6 +564,38 @@
return emulate_exception(ctxt, NM_VECTOR, 0, false);
}
+static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+ int cs_l)
+{
+ switch (ctxt->op_bytes) {
+ case 2:
+ ctxt->_eip = (u16)dst;
+ break;
+ case 4:
+ ctxt->_eip = (u32)dst;
+ break;
+ case 8:
+ if ((cs_l && is_noncanonical_address(dst)) ||
+ (!cs_l && (dst & ~(u32)-1)))
+ return emulate_gp(ctxt, 0);
+ ctxt->_eip = dst;
+ break;
+ default:
+ WARN(1, "unsupported eip assignment size\n");
+ }
+ return X86EMUL_CONTINUE;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+ return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+ return assign_eip_near(ctxt, ctxt->_eip + rel);
+}
+
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
{
u16 selector;
@@ -751,8 +778,10 @@
static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
unsigned size)
{
- if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size))
- return __do_insn_fetch_bytes(ctxt, size);
+ unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
+
+ if (unlikely(done_size < size))
+ return __do_insn_fetch_bytes(ctxt, size - done_size);
else
return X86EMUL_CONTINUE;
}
@@ -1416,7 +1445,9 @@
/* Does not support long mode */
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
- u16 selector, int seg, u8 cpl, bool in_task_switch)
+ u16 selector, int seg, u8 cpl,
+ bool in_task_switch,
+ struct desc_struct *desc)
{
struct desc_struct seg_desc, old_desc;
u8 dpl, rpl;
@@ -1557,6 +1588,8 @@
}
load:
ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
+ if (desc)
+ *desc = seg_desc;
return X86EMUL_CONTINUE;
exception:
return emulate_exception(ctxt, err_vec, err_code, true);
@@ -1566,7 +1599,7 @@
u16 selector, int seg)
{
u8 cpl = ctxt->ops->cpl(ctxt);
- return __load_segment_descriptor(ctxt, selector, seg, cpl, false);
+ return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
}
static void write_register_operand(struct operand *op)
@@ -1960,17 +1993,31 @@
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned short sel;
+ unsigned short sel, old_sel;
+ struct desc_struct old_desc, new_desc;
+ const struct x86_emulate_ops *ops = ctxt->ops;
+ u8 cpl = ctxt->ops->cpl(ctxt);
+
+ /* Assignment of RIP may only fail in 64-bit mode */
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
+ VCPU_SREG_CS);
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
- rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
+ rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
+ &new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->_eip = 0;
- memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
- return X86EMUL_CONTINUE;
+ rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+ if (rc != X86EMUL_CONTINUE) {
+ WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
+ /* assigning eip failed; restore the old cs */
+ ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
+ return rc;
+ }
+ return rc;
}
static int em_grp45(struct x86_emulate_ctxt *ctxt)
@@ -1981,13 +2028,15 @@
case 2: /* call near abs */ {
long int old_eip;
old_eip = ctxt->_eip;
- ctxt->_eip = ctxt->src.val;
+ rc = assign_eip_near(ctxt, ctxt->src.val);
+ if (rc != X86EMUL_CONTINUE)
+ break;
ctxt->src.val = old_eip;
rc = em_push(ctxt);
break;
}
case 4: /* jmp abs */
- ctxt->_eip = ctxt->src.val;
+ rc = assign_eip_near(ctxt, ctxt->src.val);
break;
case 5: /* jmp far */
rc = em_jmp_far(ctxt);
@@ -2022,30 +2071,47 @@
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
- ctxt->dst.type = OP_REG;
- ctxt->dst.addr.reg = &ctxt->_eip;
- ctxt->dst.bytes = ctxt->op_bytes;
- return em_pop(ctxt);
+ int rc;
+ unsigned long eip;
+
+ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ return assign_eip_near(ctxt, eip);
}
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned long cs;
+ unsigned long eip, cs;
+ u16 old_cs;
int cpl = ctxt->ops->cpl(ctxt);
+ struct desc_struct old_desc, new_desc;
+ const struct x86_emulate_ops *ops = ctxt->ops;
- rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
+ VCPU_SREG_CS);
+
+ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
- if (ctxt->op_bytes == 4)
- ctxt->_eip = (u32)ctxt->_eip;
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
/* Outer-privilege level return is not implemented */
if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
return X86EMUL_UNHANDLEABLE;
- rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
+ rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
+ &new_desc);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ rc = assign_eip_far(ctxt, eip, new_desc.l);
+ if (rc != X86EMUL_CONTINUE) {
+ WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
+ ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+ }
return rc;
}
@@ -2306,7 +2372,7 @@
{
const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct cs, ss;
- u64 msr_data;
+ u64 msr_data, rcx, rdx;
int usermode;
u16 cs_sel = 0, ss_sel = 0;
@@ -2322,6 +2388,9 @@
else
usermode = X86EMUL_MODE_PROT32;
+ rcx = reg_read(ctxt, VCPU_REGS_RCX);
+ rdx = reg_read(ctxt, VCPU_REGS_RDX);
+
cs.dpl = 3;
ss.dpl = 3;
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2339,6 +2408,9 @@
ss_sel = cs_sel + 8;
cs.d = 0;
cs.l = 1;
+ if (is_noncanonical_address(rcx) ||
+ is_noncanonical_address(rdx))
+ return emulate_gp(ctxt, 0);
break;
}
cs_sel |= SELECTOR_RPL_MASK;
@@ -2347,8 +2419,8 @@
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
- ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
- *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
+ ctxt->_eip = rdx;
+ *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE;
}
@@ -2466,19 +2538,24 @@
* Now load segment descriptors. If fault happens at this stage
* it is handled in a context of new task
*/
- ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
@@ -2603,25 +2680,32 @@
* Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task
*/
- ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
+ cpl, true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true);
+ ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
+ true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
@@ -2888,10 +2972,13 @@
static int em_call(struct x86_emulate_ctxt *ctxt)
{
+ int rc;
long rel = ctxt->src.val;
ctxt->src.val = (unsigned long)ctxt->_eip;
- jmp_rel(ctxt, rel);
+ rc = jmp_rel(ctxt, rel);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
return em_push(ctxt);
}
@@ -2900,34 +2987,50 @@
u16 sel, old_cs;
ulong old_eip;
int rc;
+ struct desc_struct old_desc, new_desc;
+ const struct x86_emulate_ops *ops = ctxt->ops;
+ int cpl = ctxt->ops->cpl(ctxt);
- old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
old_eip = ctxt->_eip;
+ ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
- if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
+ rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
+ &new_desc);
+ if (rc != X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
- ctxt->_eip = 0;
- memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
+ rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+ if (rc != X86EMUL_CONTINUE)
+ goto fail;
ctxt->src.val = old_cs;
rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE)
- return rc;
+ goto fail;
ctxt->src.val = old_eip;
- return em_push(ctxt);
+ rc = em_push(ctxt);
+ /* If we failed, we tainted the memory, but the very least we should
+ restore cs */
+ if (rc != X86EMUL_CONTINUE)
+ goto fail;
+ return rc;
+fail:
+ ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+ return rc;
+
}
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{
int rc;
+ unsigned long eip;
- ctxt->dst.type = OP_REG;
- ctxt->dst.addr.reg = &ctxt->_eip;
- ctxt->dst.bytes = ctxt->op_bytes;
- rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
+ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ rc = assign_eip_near(ctxt, eip);
if (rc != X86EMUL_CONTINUE)
return rc;
rsp_increment(ctxt, ctxt->src.val);
@@ -3254,20 +3357,24 @@
static int em_loop(struct x86_emulate_ctxt *ctxt)
{
+ int rc = X86EMUL_CONTINUE;
+
register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
(ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
- return X86EMUL_CONTINUE;
+ return rc;
}
static int em_jcxz(struct x86_emulate_ctxt *ctxt)
{
- if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
- jmp_rel(ctxt, ctxt->src.val);
+ int rc = X86EMUL_CONTINUE;
- return X86EMUL_CONTINUE;
+ if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
+ rc = jmp_rel(ctxt, ctxt->src.val);
+
+ return rc;
}
static int em_in(struct x86_emulate_ctxt *ctxt)
@@ -3355,6 +3462,12 @@
return X86EMUL_CONTINUE;
}
+static int em_clflush(struct x86_emulate_ctxt *ctxt)
+{
+ /* emulating clflush regardless of cpuid */
+ return X86EMUL_CONTINUE;
+}
+
static bool valid_cr(int nr)
{
switch (nr) {
@@ -3693,6 +3806,16 @@
X7(D(Undefined)),
};
+static const struct gprefix pfx_0f_ae_7 = {
+ I(SrcMem | ByteOp, em_clflush), N, N, N,
+};
+
+static const struct group_dual group15 = { {
+ N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
+}, {
+ N, N, N, N, N, N, N, N,
+} };
+
static const struct gprefix pfx_0f_6f_0f_7f = {
I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
};
@@ -3901,10 +4024,11 @@
N, I(ImplicitOps | EmulateOnUD, em_syscall),
II(ImplicitOps | Priv, em_clts, clts), N,
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
- N, D(ImplicitOps | ModRM), N, N,
+ N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
/* 0x10 - 0x1F */
N, N, N, N, N, N, N, N,
- D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM),
+ D(ImplicitOps | ModRM | SrcMem | NoAccess),
+ N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
/* 0x20 - 0x2F */
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
@@ -3956,7 +4080,7 @@
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
- D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),
+ GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
/* 0xB0 - 0xB7 */
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
@@ -4473,10 +4597,10 @@
/* Decode and fetch the destination operand: register or memory. */
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
-done:
if (ctxt->rip_relative)
ctxt->memopp->addr.mem.ea += ctxt->_eip;
+done:
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
}
@@ -4726,7 +4850,7 @@
break;
case 0x70 ... 0x7f: /* jcc (short) */
if (test_cc(ctxt->b, ctxt->eflags))
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
break;
case 0x8d: /* lea r16/r32, m */
ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -4756,7 +4880,7 @@
break;
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
ctxt->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xf4: /* hlt */
@@ -4881,13 +5005,11 @@
break;
case 0x80 ... 0x8f: /* jnz rel, etc*/
if (test_cc(ctxt->b, ctxt->eflags))
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
break;
case 0x90 ... 0x9f: /* setcc r/m8 */
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
break;
- case 0xae: /* clflush */
- break;
case 0xb6 ... 0xb7: /* movzx */
ctxt->dst.bytes = ctxt->op_bytes;
ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 518d864..298781d 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -262,8 +262,10 @@
return;
timer = &pit->pit_state.timer;
+ mutex_lock(&pit->pit_state.lock);
if (hrtimer_cancel(timer))
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+ mutex_unlock(&pit->pit_state.lock);
}
static void destroy_pit_timer(struct kvm_pit *pit)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 806d58e..fd49c86 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,7 +298,7 @@
}
#endif
walker->max_level = walker->level;
- ASSERT(!is_long_mode(vcpu) && is_pae(vcpu));
+ ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
accessed_dirty = PT_GUEST_ACCESSED_MASK;
pt_access = pte_access = ACC_ALL;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 65510f6..7527cef 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3251,7 +3251,7 @@
msr.host_initiated = false;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
- if (svm_set_msr(&svm->vcpu, &msr)) {
+ if (kvm_set_msr(&svm->vcpu, &msr)) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0);
} else {
@@ -3551,9 +3551,9 @@
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
- kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
- kvm_run->hw.hardware_exit_reason = exit_code;
- return 0;
+ WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
}
return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0acac81..a8b76c4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2659,12 +2659,15 @@
default:
msr = find_msr_entry(vmx, msr_index);
if (msr) {
+ u64 old_msr_data = msr->data;
msr->data = data;
if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
preempt_disable();
- kvm_set_shared_msr(msr->index, msr->data,
- msr->mask);
+ ret = kvm_set_shared_msr(msr->index, msr->data,
+ msr->mask);
preempt_enable();
+ if (ret)
+ msr->data = old_msr_data;
}
break;
}
@@ -5291,7 +5294,7 @@
msr.data = data;
msr.index = ecx;
msr.host_initiated = false;
- if (vmx_set_msr(vcpu, &msr) != 0) {
+ if (kvm_set_msr(vcpu, &msr) != 0) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0);
return 1;
@@ -6743,6 +6746,12 @@
return 1;
}
+static int handle_invvpid(struct kvm_vcpu *vcpu)
+{
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6788,6 +6797,7 @@
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
[EXIT_REASON_INVEPT] = handle_invept,
+ [EXIT_REASON_INVVPID] = handle_invvpid,
};
static const int kvm_vmx_max_exit_handlers =
@@ -7023,7 +7033,7 @@
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
- case EXIT_REASON_INVEPT:
+ case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
/*
* VMX instructions trap unconditionally. This allows L1 to
* emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -7164,10 +7174,10 @@
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
else {
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
- vcpu->run->hw.hardware_exit_reason = exit_reason;
+ WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
}
- return 0;
}
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c8f94..0033df32 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -229,20 +229,25 @@
shared_msr_update(i, shared_msrs_global.msrs[i]);
}
-void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
+int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
{
unsigned int cpu = smp_processor_id();
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+ int err;
if (((value ^ smsr->values[slot].curr) & mask) == 0)
- return;
+ return 0;
smsr->values[slot].curr = value;
- wrmsrl(shared_msrs_global.msrs[slot], value);
+ err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
+ if (err)
+ return 1;
+
if (!smsr->registered) {
smsr->urn.on_user_return = kvm_on_user_return;
user_return_notifier_register(&smsr->urn);
smsr->registered = true;
}
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
@@ -987,7 +992,6 @@
}
EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
-
/*
* Writes msr value into into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
@@ -995,8 +999,34 @@
*/
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
+ switch (msr->index) {
+ case MSR_FS_BASE:
+ case MSR_GS_BASE:
+ case MSR_KERNEL_GS_BASE:
+ case MSR_CSTAR:
+ case MSR_LSTAR:
+ if (is_noncanonical_address(msr->data))
+ return 1;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ case MSR_IA32_SYSENTER_ESP:
+ /*
+ * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
+ * non-canonical address is written on Intel but not on
+ * AMD (which ignores the top 32-bits, because it does
+ * not implement 64-bit SYSENTER).
+ *
+ * 64-bit code should hence be able to write a non-canonical
+ * value on AMD. Making the address canonical ensures that
+ * vmentry does not fail on Intel after writing a non-canonical
+ * value, and that something deterministic happens if the guest
+ * invokes 64-bit SYSENTER.
+ */
+ msr->data = get_canonical(msr->data);
+ }
return kvm_x86_ops->set_msr(vcpu, msr);
}
+EXPORT_SYMBOL_GPL(kvm_set_msr);
/*
* Adapt set_msr() to msr_io()'s calling convention
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index f15103d..d143d21 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -40,20 +40,40 @@
if (ACPI_FAILURE(status))
return;
- if (bgrt_tab->header.length < sizeof(*bgrt_tab))
+ if (bgrt_tab->header.length < sizeof(*bgrt_tab)) {
+ pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n",
+ bgrt_tab->header.length, sizeof(*bgrt_tab));
return;
- if (bgrt_tab->version != 1 || bgrt_tab->status != 1)
+ }
+ if (bgrt_tab->version != 1) {
+ pr_err("Ignoring BGRT: invalid version %u (expected 1)\n",
+ bgrt_tab->version);
return;
- if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
+ }
+ if (bgrt_tab->status != 1) {
+ pr_err("Ignoring BGRT: invalid status %u (expected 1)\n",
+ bgrt_tab->status);
return;
+ }
+ if (bgrt_tab->image_type != 0) {
+ pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
+ bgrt_tab->image_type);
+ return;
+ }
+ if (!bgrt_tab->image_address) {
+ pr_err("Ignoring BGRT: null image address\n");
+ return;
+ }
image = efi_lookup_mapped_addr(bgrt_tab->image_address);
if (!image) {
image = early_memremap(bgrt_tab->image_address,
sizeof(bmp_header));
ioremapped = true;
- if (!image)
+ if (!image) {
+ pr_err("Ignoring BGRT: failed to map image header memory\n");
return;
+ }
}
memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
@@ -61,14 +81,18 @@
early_iounmap(image, sizeof(bmp_header));
bgrt_image_size = bmp_header.size;
- bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
- if (!bgrt_image)
+ bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
+ if (!bgrt_image) {
+ pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n",
+ bgrt_image_size);
return;
+ }
if (ioremapped) {
image = early_memremap(bgrt_tab->image_address,
bmp_header.size);
if (!image) {
+ pr_err("Ignoring BGRT: failed to map image memory\n");
kfree(bgrt_image);
bgrt_image = NULL;
return;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 850da94..dbc8627 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,17 +70,7 @@
u64 efi_setup; /* efi setup_data physical address */
-static bool disable_runtime __initdata = false;
-static int __init setup_noefi(char *arg)
-{
- disable_runtime = true;
- return 0;
-}
-early_param("noefi", setup_noefi);
-
-int add_efi_memmap;
-EXPORT_SYMBOL(add_efi_memmap);
-
+static int add_efi_memmap __initdata;
static int __init setup_add_efi_memmap(char *arg)
{
add_efi_memmap = 1;
@@ -96,7 +86,7 @@
{
efi_status_t status;
- efi_call_phys_prelog();
+ efi_call_phys_prolog();
status = efi_call_phys(efi_phys.set_virtual_address_map,
memory_map_size, descriptor_size,
descriptor_version, virtual_map);
@@ -210,9 +200,12 @@
for (p = memmap.map, i = 0;
p < memmap.map_end;
p += memmap.desc_size, i++) {
+ char buf[64];
+
md = p;
- pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n",
- i, md->type, md->attribute, md->phys_addr,
+ pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
+ i, efi_md_typeattr_format(buf, sizeof(buf), md),
+ md->phys_addr,
md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
@@ -344,9 +337,9 @@
}
/*
- * We will only need *early* access to the following two
- * EFI runtime services before set_virtual_address_map
- * is invoked.
+ * We will only need *early* access to the SetVirtualAddressMap
+ * EFI runtime service. All other runtime services will be called
+ * via the virtual mapping.
*/
efi_phys.set_virtual_address_map =
(efi_set_virtual_address_map_t *)
@@ -368,9 +361,9 @@
}
/*
- * We will only need *early* access to the following two
- * EFI runtime services before set_virtual_address_map
- * is invoked.
+ * We will only need *early* access to the SetVirtualAddressMap
+ * EFI runtime service. All other runtime services will be called
+ * via the virtual mapping.
*/
efi_phys.set_virtual_address_map =
(efi_set_virtual_address_map_t *)
@@ -492,7 +485,7 @@
if (!efi_runtime_supported())
pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
else {
- if (disable_runtime || efi_runtime_init())
+ if (efi_runtime_disabled() || efi_runtime_init())
return;
}
if (efi_memmap_init())
@@ -537,7 +530,7 @@
}
}
-void efi_memory_uc(u64 addr, unsigned long size)
+void __init efi_memory_uc(u64 addr, unsigned long size)
{
unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
u64 npages;
@@ -732,6 +725,7 @@
*/
if (!efi_is_native()) {
efi_unmap_memmap();
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
}
@@ -805,6 +799,7 @@
new_memmap = efi_map_regions(&count, &pg_shift);
if (!new_memmap) {
pr_err("Error reallocating memory, EFI runtime non-functional!\n");
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
}
@@ -812,8 +807,10 @@
BUG_ON(!efi.systab);
- if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))
+ if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
+ }
efi_sync_low_kernel_mappings();
efi_dump_pagetable();
@@ -938,14 +935,11 @@
return 0;
}
-static int __init parse_efi_cmdline(char *str)
+static int __init arch_parse_efi_cmdline(char *str)
{
- if (*str == '=')
- str++;
-
- if (!strncmp(str, "old_map", 7))
+ if (parse_option_str(str, "old_map"))
set_bit(EFI_OLD_MEMMAP, &efi.flags);
return 0;
}
-early_param("efi", parse_efi_cmdline);
+early_param("efi", arch_parse_efi_cmdline);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 9ee3491..40e7cda 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -33,7 +33,7 @@
/*
* To make EFI call EFI runtime service in physical addressing mode we need
- * prelog/epilog before/after the invocation to disable interrupt, to
+ * prolog/epilog before/after the invocation to disable interrupt, to
* claim EFI runtime service handler exclusively and to duplicate a memory in
* low memory space say 0 - 3G.
*/
@@ -41,11 +41,13 @@
void efi_sync_low_kernel_mappings(void) {}
void __init efi_dump_pagetable(void) {}
-int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
return 0;
}
-void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {}
+void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+{
+}
void __init efi_map_region(efi_memory_desc_t *md)
{
@@ -55,7 +57,7 @@
void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
-void efi_call_phys_prelog(void)
+void __init efi_call_phys_prolog(void)
{
struct desc_ptr gdt_descr;
@@ -69,7 +71,7 @@
load_gdt(&gdt_descr);
}
-void efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(void)
{
struct desc_ptr gdt_descr;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 290d397..35aecb6 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -79,7 +79,7 @@
}
}
-void __init efi_call_phys_prelog(void)
+void __init efi_call_phys_prolog(void)
{
unsigned long vaddress;
int pgd;
@@ -139,7 +139,7 @@
sizeof(pgd_t) * num_pgds);
}
-int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
unsigned long text;
struct page *page;
@@ -192,7 +192,7 @@
return 0;
}
-void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index fbe66e6..040192b 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -27,13 +27,13 @@
* set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
* the values of these registers are the same. And, the corresponding
* GDT entries are identical. So I will do nothing about segment reg
- * and GDT, but change GDT base register in prelog and epilog.
+ * and GDT, but change GDT base register in prolog and epilog.
*/
/*
* 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
* But to make it smoothly switch from virtual mode to flat mode.
- * The mapping of lower virtual memory has been created in prelog and
+ * The mapping of lower virtual memory has been created in prolog and
* epilog.
*/
movl $1f, %edx
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
index 46aa25c..3c1c386 100644
--- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
+++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
@@ -10,10 +10,9 @@
*/
-/* __attribute__((weak)) makes these declarations overridable */
/* For every CPU addition a new get_<cpuname>_ops interface needs
* to be added.
*/
-extern void *get_penwell_ops(void) __attribute__((weak));
-extern void *get_cloverview_ops(void) __attribute__((weak));
-extern void *get_tangier_ops(void) __attribute__((weak));
+extern void *get_penwell_ops(void);
+extern void *get_cloverview_ops(void);
+extern void *get_tangier_ops(void);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1a3f04454..fac5e4f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1636,9 +1636,6 @@
xen_raw_console_write("mapping kernel into physical memory\n");
xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
- /* Allocate and initialize top and mid mfn levels for p2m structure */
- xen_build_mfn_list_list();
-
/* keep using Xen gdt for now; no urgent need to change it */
#ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f62af76..a8a1a3d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1217,10 +1217,13 @@
static void __init xen_pagetable_init(void)
{
paging_init();
- xen_setup_shared_info();
#ifdef CONFIG_X86_64
xen_pagetable_p2m_copy();
#endif
+ /* Allocate and initialize top and mid mfn levels for p2m structure */
+ xen_build_mfn_list_list();
+
+ xen_setup_shared_info();
xen_post_allocator_init();
}
static void xen_write_cr2(unsigned long cr2)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 9f5983b..b456b04 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -163,6 +163,7 @@
#include <linux/hash.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/bootmem.h>
#include <asm/cache.h>
#include <asm/setup.h>
@@ -181,21 +182,20 @@
unsigned long xen_max_p2m_pfn __read_mostly;
+static unsigned long *p2m_mid_missing_mfn;
+static unsigned long *p2m_top_mfn;
+static unsigned long **p2m_top_mfn_p;
+
/* Placeholders for holes in the address space */
static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
/* For each I/O range remapped we may lose up to two leaf pages for the boundary
* violations and three mid pages to cover up to 3GB. With
@@ -272,11 +272,11 @@
* Build the parallel p2m_top_mfn and p2m_mid_mfn structures
*
* This is called both at boot time, and after resuming from suspend:
- * - At boot time we're called very early, and must use extend_brk()
+ * - At boot time we're called rather early, and must use alloc_bootmem*()
* to allocate memory.
*
* - After resume we're called from within stop_machine, but the mfn
- * tree should alreay be completely allocated.
+ * tree should already be completely allocated.
*/
void __ref xen_build_mfn_list_list(void)
{
@@ -287,20 +287,17 @@
/* Pre-initialize p2m_top_mfn to be completely missing */
if (p2m_top_mfn == NULL) {
- p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
- p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
- p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
p2m_top_mfn_p_init(p2m_top_mfn_p);
- p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
p2m_top_mfn_init(p2m_top_mfn);
} else {
/* Reinitialise, mfn's all change after migration */
p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
- p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
}
for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
@@ -328,10 +325,9 @@
/*
* XXX boot-time only! We should never find
* missing parts of the mfn tree after
- * runtime. extend_brk() will BUG if we call
- * it too late.
+ * runtime.
*/
- mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
p2m_top_mfn_p[topidx] = mid_mfn_p;
@@ -415,7 +411,6 @@
m2p_override_init();
}
#ifdef CONFIG_X86_64
-#include <linux/bootmem.h>
unsigned long __init xen_revector_p2m_tree(void)
{
unsigned long va_start;
@@ -477,7 +472,6 @@
copy_page(new, mid_p);
p2m_top[topidx][mididx] = &mfn_list[pfn_free];
- p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
pfn_free += P2M_PER_PAGE;
@@ -538,12 +532,13 @@
unsigned topidx, mididx;
unsigned long ***top_p, **mid;
unsigned long *top_mfn_p, *mid_mfn;
+ unsigned long *p2m_orig;
topidx = p2m_top_index(pfn);
mididx = p2m_mid_index(pfn);
top_p = &p2m_top[topidx];
- mid = *top_p;
+ mid = ACCESS_ONCE(*top_p);
if (mid == p2m_mid_missing) {
/* Mid level is missing, allocate a new one */
@@ -558,7 +553,7 @@
}
top_mfn_p = &p2m_top_mfn[topidx];
- mid_mfn = p2m_top_mfn_p[topidx];
+ mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
@@ -566,6 +561,7 @@
/* Separately check the mid mfn level */
unsigned long missing_mfn;
unsigned long mid_mfn_mfn;
+ unsigned long old_mfn;
mid_mfn = alloc_p2m_page();
if (!mid_mfn)
@@ -575,17 +571,19 @@
missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
mid_mfn_mfn = virt_to_mfn(mid_mfn);
- if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
+ old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
+ if (old_mfn != missing_mfn) {
free_p2m_page(mid_mfn);
- else
+ mid_mfn = mfn_to_virt(old_mfn);
+ } else {
p2m_top_mfn_p[topidx] = mid_mfn;
+ }
}
- if (p2m_top[topidx][mididx] == p2m_identity ||
- p2m_top[topidx][mididx] == p2m_missing) {
+ p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]);
+ if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) {
/* p2m leaf page is missing */
unsigned long *p2m;
- unsigned long *p2m_orig = p2m_top[topidx][mididx];
p2m = alloc_p2m_page();
if (!p2m)
@@ -606,7 +604,6 @@
{
unsigned topidx, mididx, idx;
unsigned long *p2m;
- unsigned long *mid_mfn_p;
topidx = p2m_top_index(pfn);
mididx = p2m_mid_index(pfn);
@@ -633,43 +630,21 @@
p2m_top[topidx][mididx] = p2m;
- /* For save/restore we need to MFN of the P2M saved */
-
- mid_mfn_p = p2m_top_mfn_p[topidx];
- WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
- "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
- topidx, mididx);
- mid_mfn_p[mididx] = virt_to_mfn(p2m);
-
return true;
}
static bool __init early_alloc_p2m_middle(unsigned long pfn)
{
unsigned topidx = p2m_top_index(pfn);
- unsigned long *mid_mfn_p;
unsigned long **mid;
mid = p2m_top[topidx];
- mid_mfn_p = p2m_top_mfn_p[topidx];
if (mid == p2m_mid_missing) {
mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(mid, p2m_missing);
p2m_top[topidx] = mid;
-
- BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
- }
- /* And the save/restore P2M tables.. */
- if (mid_mfn_p == p2m_mid_missing_mfn) {
- mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
-
- p2m_top_mfn_p[topidx] = mid_mfn_p;
- p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
- /* Note: we don't set mid_mfn_p[midix] here,
- * look in early_alloc_p2m() */
}
return true;
}
@@ -680,14 +655,13 @@
* replace the P2M leaf with a p2m_missing or p2m_identity.
* Stick the old page in the new P2M tree location.
*/
-bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn)
+static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn)
{
unsigned topidx;
unsigned mididx;
unsigned ident_pfns;
unsigned inv_pfns;
unsigned long *p2m;
- unsigned long *mid_mfn_p;
unsigned idx;
unsigned long pfn;
@@ -733,11 +707,6 @@
found:
/* Found one, replace old with p2m_identity or p2m_missing */
p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
- /* And the other for save/restore.. */
- mid_mfn_p = p2m_top_mfn_p[topidx];
- /* NOTE: Even if it is a p2m_identity it should still be point to
- * a page filled with INVALID_P2M_ENTRY entries. */
- mid_mfn_p[mididx] = virt_to_mfn(p2m_missing);
/* Reset where we want to stick the old page in. */
topidx = p2m_top_index(set_pfn);
@@ -752,8 +721,6 @@
p2m_init(p2m);
p2m_top[topidx][mididx] = p2m;
- mid_mfn_p = p2m_top_mfn_p[topidx];
- mid_mfn_p[mididx] = virt_to_mfn(p2m);
return true;
}
@@ -763,7 +730,7 @@
if (!early_alloc_p2m_middle(pfn))
return false;
- if (early_can_reuse_p2m_middle(pfn, mfn))
+ if (early_can_reuse_p2m_middle(pfn))
return __set_phys_to_machine(pfn, mfn);
if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index af72161..29834b3 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -595,6 +595,7 @@
rc = 0;
}
BUG_ON(rc);
+ BUG_ON(memmap.nr_entries == 0);
/*
* Xen won't allow a 1:1 mapping to be created to UNUSABLE
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a1d430b..f473d26 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -158,7 +158,7 @@
cycle_t ret;
preempt_disable_notrace();
- src = this_cpu_ptr(&xen_vcpu->time);
+ src = &__this_cpu_read(xen_vcpu)->time;
ret = pvclock_clocksource_read(src);
preempt_enable_notrace();
return ret;
diff --git a/crypto/cts.c b/crypto/cts.c
index 042223f..133f0874 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -202,7 +202,8 @@
/* 5. Append the tail (BB - Ln) bytes of Xn (tmp) to Cn to create En */
memcpy(s + bsize + lastn, tmp + lastn, bsize - lastn);
/* 6. Decrypt En to create Pn-1 */
- memset(iv, 0, sizeof(iv));
+ memzero_explicit(iv, sizeof(iv));
+
sg_set_buf(&sgsrc[0], s + bsize, bsize);
sg_set_buf(&sgdst[0], d, bsize);
err = crypto_blkcipher_decrypt_iv(&lcldesc, sgdst, sgsrc, bsize);
diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index 4279480..7bb0474 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -64,7 +64,7 @@
src = data + done;
} while (done + SHA1_BLOCK_SIZE <= len);
- memset(temp, 0, sizeof(temp));
+ memzero_explicit(temp, sizeof(temp));
partial = 0;
}
memcpy(sctx->buffer + partial, src, len - done);
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index 0bb5583..65e7b76 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -211,10 +211,9 @@
/* clear any sensitive info... */
a = b = c = d = e = f = g = h = t1 = t2 = 0;
- memset(W, 0, 64 * sizeof(u32));
+ memzero_explicit(W, 64 * sizeof(u32));
}
-
static int sha224_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
@@ -317,7 +316,7 @@
sha256_final(desc, D);
memcpy(hash, D, SHA224_DIGEST_SIZE);
- memset(D, 0, SHA256_DIGEST_SIZE);
+ memzero_explicit(D, SHA256_DIGEST_SIZE);
return 0;
}
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 6dde57d..95db671 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -239,7 +239,7 @@
sha512_final(desc, D);
memcpy(hash, D, 48);
- memset(D, 0, 64);
+ memzero_explicit(D, 64);
return 0;
}
diff --git a/crypto/tgr192.c b/crypto/tgr192.c
index 8740355..3c7af0d 100644
--- a/crypto/tgr192.c
+++ b/crypto/tgr192.c
@@ -612,7 +612,7 @@
tgr192_final(desc, D);
memcpy(out, D, TGR160_DIGEST_SIZE);
- memset(D, 0, TGR192_DIGEST_SIZE);
+ memzero_explicit(D, TGR192_DIGEST_SIZE);
return 0;
}
@@ -623,7 +623,7 @@
tgr192_final(desc, D);
memcpy(out, D, TGR128_DIGEST_SIZE);
- memset(D, 0, TGR192_DIGEST_SIZE);
+ memzero_explicit(D, TGR192_DIGEST_SIZE);
return 0;
}
diff --git a/crypto/vmac.c b/crypto/vmac.c
index 2eb11a3..d84c24b 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -613,7 +613,7 @@
}
mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx);
memcpy(out, &mac, sizeof(vmac_t));
- memset(&mac, 0, sizeof(vmac_t));
+ memzero_explicit(&mac, sizeof(vmac_t));
memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
ctx->partial_size = 0;
return 0;
diff --git a/crypto/wp512.c b/crypto/wp512.c
index 180f1d6..ec64e77 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -1102,8 +1102,8 @@
u8 D[64];
wp512_final(desc, D);
- memcpy (out, D, WP384_DIGEST_SIZE);
- memset (D, 0, WP512_DIGEST_SIZE);
+ memcpy(out, D, WP384_DIGEST_SIZE);
+ memzero_explicit(D, WP512_DIGEST_SIZE);
return 0;
}
@@ -1113,8 +1113,8 @@
u8 D[64];
wp512_final(desc, D);
- memcpy (out, D, WP256_DIGEST_SIZE);
- memset (D, 0, WP512_DIGEST_SIZE);
+ memcpy(out, D, WP256_DIGEST_SIZE);
+ memzero_explicit(D, WP512_DIGEST_SIZE);
return 0;
}
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index d0f3265..b23fe37 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -144,7 +144,7 @@
config ACPI_FAN
tristate "Fan"
- select THERMAL
+ depends on THERMAL
default y
help
This driver supports ACPI fan devices, allowing user-mode
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 505d4d7..c3b2fcb 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -43,6 +43,7 @@
acpi-y += acpi_lpss.o
acpi-y += acpi_platform.o
acpi-y += acpi_pnp.o
+acpi-y += int340x_thermal.o
acpi-y += power.o
acpi-y += event.o
acpi-y += sysfs.o
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 2bf9082..6ba8beb 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -16,6 +16,7 @@
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/dma-mapping.h>
#include <linux/platform_device.h>
#include "internal.h"
@@ -102,6 +103,7 @@
pdevinfo.res = resources;
pdevinfo.num_res = count;
pdevinfo.acpi_node.companion = adev;
+ pdevinfo.dma_mask = DMA_BIT_MASK(32);
pdev = platform_device_register_full(&pdevinfo);
if (IS_ERR(pdev))
dev_err(&adev->dev, "platform device creation failed: %ld\n",
@@ -113,3 +115,4 @@
kfree(resources);
return pdev;
}
+EXPORT_SYMBOL_GPL(acpi_create_platform_device);
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index 2ad2351..c318d3e 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -127,7 +127,7 @@
acpi_status
acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
- acpi_event_status * event_status);
+ acpi_event_status *event_status);
acpi_status acpi_hw_disable_all_gpes(void);
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 2747279..c00e7e4 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -413,8 +413,8 @@
acpi_gpe_handler address; /* Address of handler, if any */
void *context; /* Context to be passed to handler */
struct acpi_namespace_node *method_node; /* Method node for this GPE level (saved) */
- u8 original_flags; /* Original (pre-handler) GPE info */
- u8 originally_enabled; /* True if GPE was originally enabled */
+ u8 original_flags; /* Original (pre-handler) GPE info */
+ u8 originally_enabled; /* True if GPE was originally enabled */
};
/* Notify info for implicit notify, multiple device objects */
diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index f148827..1afe46e 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -49,6 +49,8 @@
/*
* tbxfroot - Root pointer utilities
*/
+u32 acpi_tb_get_rsdp_length(struct acpi_table_rsdp *rsdp);
+
acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp);
u8 *acpi_tb_scan_memory_for_rsdp(u8 *start_address, u32 length);
diff --git a/drivers/acpi/acpica/amlresrc.h b/drivers/acpi/acpica/amlresrc.h
index f3f8344..3a0beeb 100644
--- a/drivers/acpi/acpica/amlresrc.h
+++ b/drivers/acpi/acpica/amlresrc.h
@@ -117,6 +117,12 @@
struct asl_resource_node *next;
};
+struct asl_resource_info {
+ union acpi_parse_object *descriptor_type_op; /* Resource descriptor parse node */
+ union acpi_parse_object *mapping_op; /* Used for mapfile support */
+ u32 current_byte_offset; /* Offset in resource template */
+};
+
/* Macros used to generate AML resource length fields */
#define ACPI_AML_SIZE_LARGE(r) (sizeof (r) - sizeof (struct aml_resource_large_header))
@@ -449,4 +455,32 @@
u8 byte_item;
};
+/* Interfaces used by both the disassembler and compiler */
+
+void
+mp_save_gpio_info(union acpi_parse_object *op,
+ union aml_resource *resource,
+ u32 pin_count, u16 *pin_list, char *device_name);
+
+void
+mp_save_serial_info(union acpi_parse_object *op,
+ union aml_resource *resource, char *device_name);
+
+char *mp_get_hid_from_parse_tree(struct acpi_namespace_node *hid_node);
+
+char *mp_get_hid_via_namestring(char *device_name);
+
+char *mp_get_connection_info(union acpi_parse_object *op,
+ u32 pin_index,
+ struct acpi_namespace_node **target_node,
+ char **target_name);
+
+char *mp_get_parent_device_hid(union acpi_parse_object *op,
+ struct acpi_namespace_node **target_node,
+ char **parent_device_name);
+
+char *mp_get_ddn_value(char *device_name);
+
+char *mp_get_hid_value(struct acpi_namespace_node *device_node);
+
#endif
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index e4ba4de..2095dfb 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -100,13 +100,14 @@
*
* FUNCTION: acpi_ev_enable_gpe
*
- * PARAMETERS: gpe_event_info - GPE to enable
+ * PARAMETERS: gpe_event_info - GPE to enable
*
* RETURN: Status
*
* DESCRIPTION: Clear a GPE of stale events and enable it.
*
******************************************************************************/
+
acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info)
{
acpi_status status;
@@ -125,6 +126,7 @@
}
/* Clear the GPE (of stale events) */
+
status = acpi_hw_clear_gpe(gpe_event_info);
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
@@ -136,7 +138,6 @@
return_ACPI_STATUS(status);
}
-
/*******************************************************************************
*
* FUNCTION: acpi_ev_add_gpe_reference
@@ -212,7 +213,7 @@
if (ACPI_SUCCESS(status)) {
status =
acpi_hw_low_set_gpe(gpe_event_info,
- ACPI_GPE_DISABLE);
+ ACPI_GPE_DISABLE);
}
if (ACPI_FAILURE(status)) {
@@ -334,7 +335,7 @@
*
******************************************************************************/
-u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
+u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info *gpe_xrupt_list)
{
acpi_status status;
struct acpi_gpe_block_info *gpe_block;
@@ -427,7 +428,7 @@
/* Check if there is anything active at all in this register */
- enabled_status_byte = (u8) (status_reg & enable_reg);
+ enabled_status_byte = (u8)(status_reg & enable_reg);
if (!enabled_status_byte) {
/* No active GPEs in this register, move on */
@@ -450,7 +451,7 @@
acpi_ev_gpe_dispatch(gpe_block->
node,
&gpe_block->
- event_info[((acpi_size) i * ACPI_GPE_REGISTER_WIDTH) + j], j + gpe_register_info->base_gpe_number);
+ event_info[((acpi_size) i * ACPI_GPE_REGISTER_WIDTH) + j], j + gpe_register_info->base_gpe_number);
}
}
}
@@ -636,7 +637,7 @@
*
******************************************************************************/
-acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info *gpe_event_info)
+acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info * gpe_event_info)
{
acpi_status status;
@@ -666,9 +667,9 @@
*
* FUNCTION: acpi_ev_gpe_dispatch
*
- * PARAMETERS: gpe_device - Device node. NULL for GPE0/GPE1
- * gpe_event_info - Info for this GPE
- * gpe_number - Number relative to the parent GPE block
+ * PARAMETERS: gpe_device - Device node. NULL for GPE0/GPE1
+ * gpe_event_info - Info for this GPE
+ * gpe_number - Number relative to the parent GPE block
*
* RETURN: INTERRUPT_HANDLED or INTERRUPT_NOT_HANDLED
*
@@ -681,7 +682,7 @@
u32
acpi_ev_gpe_dispatch(struct acpi_namespace_node *gpe_device,
- struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number)
+ struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number)
{
acpi_status status;
u32 return_value;
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 49fc7ef..7be9283 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -424,6 +424,7 @@
}
/* Disable the GPE in case it's been enabled already. */
+
(void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
/*
diff --git a/drivers/acpi/acpica/evxface.c b/drivers/acpi/acpica/evxface.c
index 11e5803..55a58f3 100644
--- a/drivers/acpi/acpica/evxface.c
+++ b/drivers/acpi/acpica/evxface.c
@@ -786,18 +786,26 @@
handler->method_node = gpe_event_info->dispatch.method_node;
handler->original_flags = (u8)(gpe_event_info->flags &
(ACPI_GPE_XRUPT_TYPE_MASK |
- ACPI_GPE_DISPATCH_MASK));
+ ACPI_GPE_DISPATCH_MASK));
/*
* If the GPE is associated with a method, it may have been enabled
* automatically during initialization, in which case it has to be
* disabled now to avoid spurious execution of the handler.
*/
-
- if ((handler->original_flags & ACPI_GPE_DISPATCH_METHOD)
- && gpe_event_info->runtime_count) {
- handler->originally_enabled = 1;
+ if (((handler->original_flags & ACPI_GPE_DISPATCH_METHOD) ||
+ (handler->original_flags & ACPI_GPE_DISPATCH_NOTIFY)) &&
+ gpe_event_info->runtime_count) {
+ handler->originally_enabled = TRUE;
(void)acpi_ev_remove_gpe_reference(gpe_event_info);
+
+ /* Sanity check of original type against new type */
+
+ if (type !=
+ (u32)(gpe_event_info->flags & ACPI_GPE_XRUPT_TYPE_MASK)) {
+ ACPI_WARNING((AE_INFO,
+ "GPE type mismatch (level/edge)"));
+ }
}
/* Install the handler */
@@ -808,7 +816,7 @@
gpe_event_info->flags &=
~(ACPI_GPE_XRUPT_TYPE_MASK | ACPI_GPE_DISPATCH_MASK);
- gpe_event_info->flags |= (u8) (type | ACPI_GPE_DISPATCH_HANDLER);
+ gpe_event_info->flags |= (u8)(type | ACPI_GPE_DISPATCH_HANDLER);
acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
@@ -893,7 +901,7 @@
gpe_event_info->dispatch.method_node = handler->method_node;
gpe_event_info->flags &=
- ~(ACPI_GPE_XRUPT_TYPE_MASK | ACPI_GPE_DISPATCH_MASK);
+ ~(ACPI_GPE_XRUPT_TYPE_MASK | ACPI_GPE_DISPATCH_MASK);
gpe_event_info->flags |= handler->original_flags;
/*
@@ -901,7 +909,8 @@
* enabled, it should be enabled at this point to restore the
* post-initialization configuration.
*/
- if ((handler->original_flags & ACPI_GPE_DISPATCH_METHOD) &&
+ if (((handler->original_flags & ACPI_GPE_DISPATCH_METHOD) ||
+ (handler->original_flags & ACPI_GPE_DISPATCH_NOTIFY)) &&
handler->originally_enabled) {
(void)acpi_ev_add_gpe_reference(gpe_event_info);
}
@@ -946,7 +955,7 @@
* handle is returned.
*
******************************************************************************/
-acpi_status acpi_acquire_global_lock(u16 timeout, u32 * handle)
+acpi_status acpi_acquire_global_lock(u16 timeout, u32 *handle)
{
acpi_status status;
diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index e286640..bb8cbf5 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -324,8 +324,9 @@
******************************************************************************/
acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status)
{
- acpi_status status = AE_OK;
- u32 value;
+ acpi_status status;
+ acpi_event_status local_event_status = 0;
+ u32 in_byte;
ACPI_FUNCTION_TRACE(acpi_get_event_status);
@@ -339,29 +340,40 @@
return_ACPI_STATUS(AE_BAD_PARAMETER);
}
- /* Get the status of the requested fixed event */
+ /* Fixed event currently can be dispatched? */
+
+ if (acpi_gbl_fixed_event_handlers[event].handler) {
+ local_event_status |= ACPI_EVENT_FLAG_HAS_HANDLER;
+ }
+
+ /* Fixed event currently enabled? */
status =
acpi_read_bit_register(acpi_gbl_fixed_event_info[event].
- enable_register_id, &value);
- if (ACPI_FAILURE(status))
+ enable_register_id, &in_byte);
+ if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
+ }
- *event_status = value;
+ if (in_byte) {
+ local_event_status |= ACPI_EVENT_FLAG_ENABLED;
+ }
+
+ /* Fixed event currently active? */
status =
acpi_read_bit_register(acpi_gbl_fixed_event_info[event].
- status_register_id, &value);
- if (ACPI_FAILURE(status))
+ status_register_id, &in_byte);
+ if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
+ }
- if (value)
- *event_status |= ACPI_EVENT_FLAG_SET;
+ if (in_byte) {
+ local_event_status |= ACPI_EVENT_FLAG_SET;
+ }
- if (acpi_gbl_fixed_event_handlers[event].handler)
- *event_status |= ACPI_EVENT_FLAG_HANDLE;
-
- return_ACPI_STATUS(status);
+ (*event_status) = local_event_status;
+ return_ACPI_STATUS(AE_OK);
}
ACPI_EXPORT_SYMBOL(acpi_get_event_status)
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 56710a0..e889a53 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -106,8 +106,8 @@
*
* FUNCTION: acpi_enable_gpe
*
- * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1
- * gpe_number - GPE level within the GPE block
+ * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1
+ * gpe_number - GPE level within the GPE block
*
* RETURN: Status
*
@@ -115,7 +115,6 @@
* hardware-enabled.
*
******************************************************************************/
-
acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number)
{
acpi_status status = AE_BAD_PARAMETER;
@@ -490,8 +489,8 @@
*
* FUNCTION: acpi_get_gpe_status
*
- * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1
- * gpe_number - GPE level within the GPE block
+ * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1
+ * gpe_number - GPE level within the GPE block
* event_status - Where the current status of the event
* will be returned
*
@@ -524,9 +523,6 @@
status = acpi_hw_get_gpe_status(gpe_event_info, event_status);
- if (gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK)
- *event_status |= ACPI_EVENT_FLAG_HANDLE;
-
unlock_and_exit:
acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
return_ACPI_STATUS(status);
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index ea62d40..48ac7b7 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -202,7 +202,7 @@
acpi_status
acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
- acpi_event_status * event_status)
+ acpi_event_status *event_status)
{
u32 in_byte;
u32 register_bit;
@@ -216,6 +216,13 @@
return (AE_BAD_PARAMETER);
}
+ /* GPE currently handled? */
+
+ if ((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) !=
+ ACPI_GPE_DISPATCH_NONE) {
+ local_event_status |= ACPI_EVENT_FLAG_HAS_HANDLER;
+ }
+
/* Get the info block for the entire GPE register */
gpe_register_info = gpe_event_info->register_info;
diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index 65ab8fe..43a54af 100644
--- a/drivers/acpi/acpica/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c
@@ -50,6 +50,36 @@
/*******************************************************************************
*
+ * FUNCTION: acpi_tb_get_rsdp_length
+ *
+ * PARAMETERS: rsdp - Pointer to RSDP
+ *
+ * RETURN: Table length
+ *
+ * DESCRIPTION: Get the length of the RSDP
+ *
+ ******************************************************************************/
+u32 acpi_tb_get_rsdp_length(struct acpi_table_rsdp *rsdp)
+{
+
+ if (!ACPI_VALIDATE_RSDP_SIG(rsdp->signature)) {
+
+ /* BAD Signature */
+
+ return (0);
+ }
+
+ /* "Length" field is available if table version >= 2 */
+
+ if (rsdp->revision >= 2) {
+ return (rsdp->length);
+ } else {
+ return (ACPI_RSDP_CHECKSUM_LENGTH);
+ }
+}
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_tb_validate_rsdp
*
* PARAMETERS: rsdp - Pointer to unvalidated RSDP
@@ -59,7 +89,8 @@
* DESCRIPTION: Validate the RSDP (ptr)
*
******************************************************************************/
-acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp)
+
+acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp * rsdp)
{
/*
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index bea6896..143ec6e 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -343,6 +343,7 @@
return 0;
}
+EXPORT_SYMBOL_GPL(acpi_device_update_power);
int acpi_bus_update_power(acpi_handle handle, int *state_p)
{
@@ -710,7 +711,7 @@
return -ENODEV;
}
- return acpi_device_wakeup(adev, enable, ACPI_STATE_S0);
+ return acpi_device_wakeup(adev, ACPI_STATE_S0, enable);
}
EXPORT_SYMBOL(acpi_pm_device_run_wake);
#endif /* CONFIG_PM_RUNTIME */
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index cb6066c..3d304ff 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -128,12 +128,13 @@
static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
/* --------------------------------------------------------------------------
- Transaction Management
- -------------------------------------------------------------------------- */
+ * Transaction Management
+ * -------------------------------------------------------------------------- */
static inline u8 acpi_ec_read_status(struct acpi_ec *ec)
{
u8 x = inb(ec->command_addr);
+
pr_debug("EC_SC(R) = 0x%2.2x "
"SCI_EVT=%d BURST=%d CMD=%d IBF=%d OBF=%d\n",
x,
@@ -148,6 +149,7 @@
static inline u8 acpi_ec_read_data(struct acpi_ec *ec)
{
u8 x = inb(ec->data_addr);
+
pr_debug("EC_DATA(R) = 0x%2.2x\n", x);
return x;
}
@@ -164,10 +166,32 @@
outb(data, ec->data_addr);
}
+#ifdef DEBUG
+static const char *acpi_ec_cmd_string(u8 cmd)
+{
+ switch (cmd) {
+ case 0x80:
+ return "RD_EC";
+ case 0x81:
+ return "WR_EC";
+ case 0x82:
+ return "BE_EC";
+ case 0x83:
+ return "BD_EC";
+ case 0x84:
+ return "QR_EC";
+ }
+ return "UNKNOWN";
+}
+#else
+#define acpi_ec_cmd_string(cmd) "UNDEF"
+#endif
+
static int ec_transaction_completed(struct acpi_ec *ec)
{
unsigned long flags;
int ret = 0;
+
spin_lock_irqsave(&ec->lock, flags);
if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_COMPLETE))
ret = 1;
@@ -181,7 +205,8 @@
u8 status;
bool wakeup = false;
- pr_debug("===== %s =====\n", in_interrupt() ? "IRQ" : "TASK");
+ pr_debug("===== %s (%d) =====\n",
+ in_interrupt() ? "IRQ" : "TASK", smp_processor_id());
status = acpi_ec_read_status(ec);
t = ec->curr;
if (!t)
@@ -198,7 +223,8 @@
if (t->rlen == t->ri) {
t->flags |= ACPI_EC_COMMAND_COMPLETE;
if (t->command == ACPI_EC_COMMAND_QUERY)
- pr_debug("hardware QR_EC completion\n");
+ pr_debug("***** Command(%s) hardware completion *****\n",
+ acpi_ec_cmd_string(t->command));
wakeup = true;
}
} else
@@ -221,7 +247,8 @@
t->flags |= ACPI_EC_COMMAND_POLL;
t->rdata[t->ri++] = 0x00;
t->flags |= ACPI_EC_COMMAND_COMPLETE;
- pr_debug("software QR_EC completion\n");
+ pr_debug("***** Command(%s) software completion *****\n",
+ acpi_ec_cmd_string(t->command));
wakeup = true;
} else if ((status & ACPI_EC_FLAG_IBF) == 0) {
acpi_ec_write_cmd(ec, t->command);
@@ -264,6 +291,7 @@
{
unsigned long flags;
int repeat = 5; /* number of command restarts */
+
while (repeat--) {
unsigned long delay = jiffies +
msecs_to_jiffies(ec_delay);
@@ -296,18 +324,25 @@
{
unsigned long tmp;
int ret = 0;
+
if (EC_FLAGS_MSI)
udelay(ACPI_EC_MSI_UDELAY);
/* start transaction */
spin_lock_irqsave(&ec->lock, tmp);
/* following two actions should be kept atomic */
ec->curr = t;
+ pr_debug("***** Command(%s) started *****\n",
+ acpi_ec_cmd_string(t->command));
start_transaction(ec);
spin_unlock_irqrestore(&ec->lock, tmp);
ret = ec_poll(ec);
spin_lock_irqsave(&ec->lock, tmp);
- if (ec->curr->command == ACPI_EC_COMMAND_QUERY)
+ if (ec->curr->command == ACPI_EC_COMMAND_QUERY) {
clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags);
+ pr_debug("***** Event stopped *****\n");
+ }
+ pr_debug("***** Command(%s) stopped *****\n",
+ acpi_ec_cmd_string(t->command));
ec->curr = NULL;
spin_unlock_irqrestore(&ec->lock, tmp);
return ret;
@@ -317,6 +352,7 @@
{
int status;
u32 glk;
+
if (!ec || (!t) || (t->wlen && !t->wdata) || (t->rlen && !t->rdata))
return -EINVAL;
if (t->rdata)
@@ -333,8 +369,6 @@
goto unlock;
}
}
- pr_debug("transaction start (cmd=0x%02x, addr=0x%02x)\n",
- t->command, t->wdata ? t->wdata[0] : 0);
/* disable GPE during transaction if storm is detected */
if (test_bit(EC_FLAGS_GPE_STORM, &ec->flags)) {
/* It has to be disabled, so that it doesn't trigger. */
@@ -355,7 +389,6 @@
t->irq_count);
set_bit(EC_FLAGS_GPE_STORM, &ec->flags);
}
- pr_debug("transaction end\n");
if (ec->global_lock)
acpi_release_global_lock(glk);
unlock:
@@ -383,7 +416,7 @@
acpi_ec_transaction(ec, &t) : 0;
}
-static int acpi_ec_read(struct acpi_ec *ec, u8 address, u8 * data)
+static int acpi_ec_read(struct acpi_ec *ec, u8 address, u8 *data)
{
int result;
u8 d;
@@ -419,10 +452,9 @@
if (!err) {
*val = temp_data;
return 0;
- } else
- return err;
+ }
+ return err;
}
-
EXPORT_SYMBOL(ec_read);
int ec_write(u8 addr, u8 val)
@@ -436,22 +468,21 @@
return err;
}
-
EXPORT_SYMBOL(ec_write);
int ec_transaction(u8 command,
- const u8 * wdata, unsigned wdata_len,
- u8 * rdata, unsigned rdata_len)
+ const u8 *wdata, unsigned wdata_len,
+ u8 *rdata, unsigned rdata_len)
{
struct transaction t = {.command = command,
.wdata = wdata, .rdata = rdata,
.wlen = wdata_len, .rlen = rdata_len};
+
if (!first_ec)
return -ENODEV;
return acpi_ec_transaction(first_ec, &t);
}
-
EXPORT_SYMBOL(ec_transaction);
/* Get the handle to the EC device */
@@ -461,7 +492,6 @@
return NULL;
return first_ec->handle;
}
-
EXPORT_SYMBOL(ec_get_handle);
/*
@@ -525,13 +555,14 @@
clear_bit(EC_FLAGS_BLOCKED, &first_ec->flags);
}
-static int acpi_ec_query_unlocked(struct acpi_ec *ec, u8 * data)
+static int acpi_ec_query_unlocked(struct acpi_ec *ec, u8 *data)
{
int result;
u8 d;
struct transaction t = {.command = ACPI_EC_COMMAND_QUERY,
.wdata = NULL, .rdata = &d,
.wlen = 0, .rlen = 1};
+
if (!ec || !data)
return -EINVAL;
/*
@@ -557,6 +588,7 @@
{
struct acpi_ec_query_handler *handler =
kzalloc(sizeof(struct acpi_ec_query_handler), GFP_KERNEL);
+
if (!handler)
return -ENOMEM;
@@ -569,12 +601,12 @@
mutex_unlock(&ec->mutex);
return 0;
}
-
EXPORT_SYMBOL_GPL(acpi_ec_add_query_handler);
void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit)
{
struct acpi_ec_query_handler *handler, *tmp;
+
mutex_lock(&ec->mutex);
list_for_each_entry_safe(handler, tmp, &ec->list, node) {
if (query_bit == handler->query_bit) {
@@ -584,20 +616,20 @@
}
mutex_unlock(&ec->mutex);
}
-
EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler);
static void acpi_ec_run(void *cxt)
{
struct acpi_ec_query_handler *handler = cxt;
+
if (!handler)
return;
- pr_debug("start query execution\n");
+ pr_debug("##### Query(0x%02x) started #####\n", handler->query_bit);
if (handler->func)
handler->func(handler->data);
else if (handler->handle)
acpi_evaluate_object(handler->handle, NULL, NULL, NULL);
- pr_debug("stop query execution\n");
+ pr_debug("##### Query(0x%02x) stopped #####\n", handler->query_bit);
kfree(handler);
}
@@ -620,8 +652,8 @@
if (!copy)
return -ENOMEM;
memcpy(copy, handler, sizeof(*copy));
- pr_debug("push query execution (0x%2x) on queue\n",
- value);
+ pr_debug("##### Query(0x%02x) scheduled #####\n",
+ handler->query_bit);
return acpi_os_execute((copy->func) ?
OSL_NOTIFY_HANDLER : OSL_GPE_HANDLER,
acpi_ec_run, copy);
@@ -633,6 +665,7 @@
static void acpi_ec_gpe_query(void *ec_cxt)
{
struct acpi_ec *ec = ec_cxt;
+
if (!ec)
return;
mutex_lock(&ec->mutex);
@@ -644,7 +677,7 @@
{
if (state & ACPI_EC_FLAG_SCI) {
if (!test_and_set_bit(EC_FLAGS_QUERY_PENDING, &ec->flags)) {
- pr_debug("push gpe query to the queue\n");
+ pr_debug("***** Event started *****\n");
return acpi_os_execute(OSL_NOTIFY_HANDLER,
acpi_ec_gpe_query, ec);
}
@@ -667,8 +700,8 @@
}
/* --------------------------------------------------------------------------
- Address Space Management
- -------------------------------------------------------------------------- */
+ * Address Space Management
+ * -------------------------------------------------------------------------- */
static acpi_status
acpi_ec_space_handler(u32 function, acpi_physical_address address,
@@ -699,27 +732,26 @@
switch (result) {
case -EINVAL:
return AE_BAD_PARAMETER;
- break;
case -ENODEV:
return AE_NOT_FOUND;
- break;
case -ETIME:
return AE_TIME;
- break;
default:
return AE_OK;
}
}
/* --------------------------------------------------------------------------
- Driver Interface
- -------------------------------------------------------------------------- */
+ * Driver Interface
+ * -------------------------------------------------------------------------- */
+
static acpi_status
ec_parse_io_ports(struct acpi_resource *resource, void *context);
static struct acpi_ec *make_acpi_ec(void)
{
struct acpi_ec *ec = kzalloc(sizeof(struct acpi_ec), GFP_KERNEL);
+
if (!ec)
return NULL;
ec->flags = 1 << EC_FLAGS_QUERY_PENDING;
@@ -742,9 +774,8 @@
status = acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer);
- if (ACPI_SUCCESS(status) && sscanf(node_name, "_Q%x", &value) == 1) {
+ if (ACPI_SUCCESS(status) && sscanf(node_name, "_Q%x", &value) == 1)
acpi_ec_add_query_handler(ec, value, handle, NULL, NULL);
- }
return AE_OK;
}
@@ -753,7 +784,6 @@
{
acpi_status status;
unsigned long long tmp = 0;
-
struct acpi_ec *ec = context;
/* clear addr values, ec_parse_io_ports depend on it */
@@ -781,6 +811,7 @@
static int ec_install_handlers(struct acpi_ec *ec)
{
acpi_status status;
+
if (test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
return 0;
status = acpi_install_gpe_handler(NULL, ec->gpe,
@@ -1078,7 +1109,8 @@
boot_ec->data_addr = ecdt_ptr->data.address;
boot_ec->gpe = ecdt_ptr->gpe;
boot_ec->handle = ACPI_ROOT_OBJECT;
- acpi_get_handle(ACPI_ROOT_OBJECT, ecdt_ptr->id, &boot_ec->handle);
+ acpi_get_handle(ACPI_ROOT_OBJECT, ecdt_ptr->id,
+ &boot_ec->handle);
/* Don't trust ECDT, which comes from ASUSTek */
if (!EC_FLAGS_VALIDATE_ECDT)
goto install;
@@ -1162,6 +1194,5 @@
{
acpi_bus_unregister_driver(&acpi_ec_driver);
- return;
}
#endif /* 0 */
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 5328b10..caf9b76 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -30,22 +30,19 @@
#include <linux/uaccess.h>
#include <linux/thermal.h>
#include <linux/acpi.h>
-
-#define ACPI_FAN_CLASS "fan"
-#define ACPI_FAN_FILE_STATE "state"
-
-#define _COMPONENT ACPI_FAN_COMPONENT
-ACPI_MODULE_NAME("fan");
+#include <linux/platform_device.h>
+#include <linux/sort.h>
MODULE_AUTHOR("Paul Diefenbaugh");
MODULE_DESCRIPTION("ACPI Fan Driver");
MODULE_LICENSE("GPL");
-static int acpi_fan_add(struct acpi_device *device);
-static int acpi_fan_remove(struct acpi_device *device);
+static int acpi_fan_probe(struct platform_device *pdev);
+static int acpi_fan_remove(struct platform_device *pdev);
static const struct acpi_device_id fan_device_ids[] = {
{"PNP0C0B", 0},
+ {"INT3404", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, fan_device_ids);
@@ -64,37 +61,100 @@
#define FAN_PM_OPS_PTR NULL
#endif
-static struct acpi_driver acpi_fan_driver = {
- .name = "fan",
- .class = ACPI_FAN_CLASS,
- .ids = fan_device_ids,
- .ops = {
- .add = acpi_fan_add,
- .remove = acpi_fan_remove,
- },
- .drv.pm = FAN_PM_OPS_PTR,
+struct acpi_fan_fps {
+ u64 control;
+ u64 trip_point;
+ u64 speed;
+ u64 noise_level;
+ u64 power;
+};
+
+struct acpi_fan_fif {
+ u64 revision;
+ u64 fine_grain_ctrl;
+ u64 step_size;
+ u64 low_speed_notification;
+};
+
+struct acpi_fan {
+ bool acpi4;
+ struct acpi_fan_fif fif;
+ struct acpi_fan_fps *fps;
+ int fps_count;
+ struct thermal_cooling_device *cdev;
+};
+
+static struct platform_driver acpi_fan_driver = {
+ .probe = acpi_fan_probe,
+ .remove = acpi_fan_remove,
+ .driver = {
+ .name = "acpi-fan",
+ .acpi_match_table = fan_device_ids,
+ .pm = FAN_PM_OPS_PTR,
+ },
};
/* thermal cooling device callbacks */
static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long
*state)
{
- /* ACPI fan device only support two states: ON/OFF */
- *state = 1;
+ struct acpi_device *device = cdev->devdata;
+ struct acpi_fan *fan = acpi_driver_data(device);
+
+ if (fan->acpi4)
+ *state = fan->fps_count - 1;
+ else
+ *state = 1;
return 0;
}
-static int fan_get_cur_state(struct thermal_cooling_device *cdev, unsigned long
- *state)
+static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
{
- struct acpi_device *device = cdev->devdata;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_fan *fan = acpi_driver_data(device);
+ union acpi_object *obj;
+ acpi_status status;
+ int control, i;
+
+ status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer);
+ if (ACPI_FAILURE(status)) {
+ dev_err(&device->dev, "Get fan state failed\n");
+ return status;
+ }
+
+ obj = buffer.pointer;
+ if (!obj || obj->type != ACPI_TYPE_PACKAGE ||
+ obj->package.count != 3 ||
+ obj->package.elements[1].type != ACPI_TYPE_INTEGER) {
+ dev_err(&device->dev, "Invalid _FST data\n");
+ status = -EINVAL;
+ goto err;
+ }
+
+ control = obj->package.elements[1].integer.value;
+ for (i = 0; i < fan->fps_count; i++) {
+ if (control == fan->fps[i].control)
+ break;
+ }
+ if (i == fan->fps_count) {
+ dev_dbg(&device->dev, "Invalid control value returned\n");
+ status = -EINVAL;
+ goto err;
+ }
+
+ *state = i;
+
+err:
+ kfree(obj);
+ return status;
+}
+
+static int fan_get_state(struct acpi_device *device, unsigned long *state)
+{
int result;
int acpi_state = ACPI_STATE_D0;
- if (!device)
- return -EINVAL;
-
- result = acpi_bus_update_power(device->handle, &acpi_state);
+ result = acpi_device_update_power(device, &acpi_state);
if (result)
return result;
@@ -103,20 +163,56 @@
return 0;
}
+static int fan_get_cur_state(struct thermal_cooling_device *cdev, unsigned long
+ *state)
+{
+ struct acpi_device *device = cdev->devdata;
+ struct acpi_fan *fan = acpi_driver_data(device);
+
+ if (fan->acpi4)
+ return fan_get_state_acpi4(device, state);
+ else
+ return fan_get_state(device, state);
+}
+
+static int fan_set_state(struct acpi_device *device, unsigned long state)
+{
+ if (state != 0 && state != 1)
+ return -EINVAL;
+
+ return acpi_device_set_power(device,
+ state ? ACPI_STATE_D0 : ACPI_STATE_D3_COLD);
+}
+
+static int fan_set_state_acpi4(struct acpi_device *device, unsigned long state)
+{
+ struct acpi_fan *fan = acpi_driver_data(device);
+ acpi_status status;
+
+ if (state >= fan->fps_count)
+ return -EINVAL;
+
+ status = acpi_execute_simple_method(device->handle, "_FSL",
+ fan->fps[state].control);
+ if (ACPI_FAILURE(status)) {
+ dev_dbg(&device->dev, "Failed to set state by _FSL\n");
+ return status;
+ }
+
+ return 0;
+}
+
static int
fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
{
struct acpi_device *device = cdev->devdata;
- int result;
+ struct acpi_fan *fan = acpi_driver_data(device);
- if (!device || (state != 0 && state != 1))
- return -EINVAL;
-
- result = acpi_bus_set_power(device->handle,
- state ? ACPI_STATE_D0 : ACPI_STATE_D3_COLD);
-
- return result;
-}
+ if (fan->acpi4)
+ return fan_set_state_acpi4(device, state);
+ else
+ return fan_set_state(device, state);
+ }
static const struct thermal_cooling_device_ops fan_cooling_ops = {
.get_max_state = fan_get_max_state,
@@ -129,21 +225,125 @@
* --------------------------------------------------------------------------
*/
-static int acpi_fan_add(struct acpi_device *device)
+static bool acpi_fan_is_acpi4(struct acpi_device *device)
+{
+ return acpi_has_method(device->handle, "_FIF") &&
+ acpi_has_method(device->handle, "_FPS") &&
+ acpi_has_method(device->handle, "_FSL") &&
+ acpi_has_method(device->handle, "_FST");
+}
+
+static int acpi_fan_get_fif(struct acpi_device *device)
+{
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_fan *fan = acpi_driver_data(device);
+ struct acpi_buffer format = { sizeof("NNNN"), "NNNN" };
+ struct acpi_buffer fif = { sizeof(fan->fif), &fan->fif };
+ union acpi_object *obj;
+ acpi_status status;
+
+ status = acpi_evaluate_object(device->handle, "_FIF", NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return status;
+
+ obj = buffer.pointer;
+ if (!obj || obj->type != ACPI_TYPE_PACKAGE) {
+ dev_err(&device->dev, "Invalid _FIF data\n");
+ status = -EINVAL;
+ goto err;
+ }
+
+ status = acpi_extract_package(obj, &format, &fif);
+ if (ACPI_FAILURE(status)) {
+ dev_err(&device->dev, "Invalid _FIF element\n");
+ status = -EINVAL;
+ }
+
+err:
+ kfree(obj);
+ return status;
+}
+
+static int acpi_fan_speed_cmp(const void *a, const void *b)
+{
+ const struct acpi_fan_fps *fps1 = a;
+ const struct acpi_fan_fps *fps2 = b;
+ return fps1->speed - fps2->speed;
+}
+
+static int acpi_fan_get_fps(struct acpi_device *device)
+{
+ struct acpi_fan *fan = acpi_driver_data(device);
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *obj;
+ acpi_status status;
+ int i;
+
+ status = acpi_evaluate_object(device->handle, "_FPS", NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return status;
+
+ obj = buffer.pointer;
+ if (!obj || obj->type != ACPI_TYPE_PACKAGE || obj->package.count < 2) {
+ dev_err(&device->dev, "Invalid _FPS data\n");
+ status = -EINVAL;
+ goto err;
+ }
+
+ fan->fps_count = obj->package.count - 1; /* minus revision field */
+ fan->fps = devm_kzalloc(&device->dev,
+ fan->fps_count * sizeof(struct acpi_fan_fps),
+ GFP_KERNEL);
+ if (!fan->fps) {
+ dev_err(&device->dev, "Not enough memory\n");
+ status = -ENOMEM;
+ goto err;
+ }
+ for (i = 0; i < fan->fps_count; i++) {
+ struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" };
+ struct acpi_buffer fps = { sizeof(fan->fps[i]), &fan->fps[i] };
+ status = acpi_extract_package(&obj->package.elements[i + 1],
+ &format, &fps);
+ if (ACPI_FAILURE(status)) {
+ dev_err(&device->dev, "Invalid _FPS element\n");
+ break;
+ }
+ }
+
+ /* sort the state array according to fan speed in increase order */
+ sort(fan->fps, fan->fps_count, sizeof(*fan->fps),
+ acpi_fan_speed_cmp, NULL);
+
+err:
+ kfree(obj);
+ return status;
+}
+
+static int acpi_fan_probe(struct platform_device *pdev)
{
int result = 0;
struct thermal_cooling_device *cdev;
+ struct acpi_fan *fan;
+ struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
- if (!device)
- return -EINVAL;
+ fan = devm_kzalloc(&pdev->dev, sizeof(*fan), GFP_KERNEL);
+ if (!fan) {
+ dev_err(&device->dev, "No memory for fan\n");
+ return -ENOMEM;
+ }
+ device->driver_data = fan;
+ platform_set_drvdata(pdev, fan);
- strcpy(acpi_device_name(device), "Fan");
- strcpy(acpi_device_class(device), ACPI_FAN_CLASS);
-
- result = acpi_bus_update_power(device->handle, NULL);
- if (result) {
- dev_err(&device->dev, "Setting initial power state\n");
- goto end;
+ if (acpi_fan_is_acpi4(device)) {
+ if (acpi_fan_get_fif(device) || acpi_fan_get_fps(device))
+ goto end;
+ fan->acpi4 = true;
+ } else {
+ result = acpi_device_update_power(device, NULL);
+ if (result) {
+ dev_err(&device->dev, "Setting initial power state\n");
+ goto end;
+ }
}
cdev = thermal_cooling_device_register("Fan", device,
@@ -153,44 +353,32 @@
goto end;
}
- dev_dbg(&device->dev, "registered as cooling_device%d\n", cdev->id);
+ dev_dbg(&pdev->dev, "registered as cooling_device%d\n", cdev->id);
- device->driver_data = cdev;
- result = sysfs_create_link(&device->dev.kobj,
+ fan->cdev = cdev;
+ result = sysfs_create_link(&pdev->dev.kobj,
&cdev->device.kobj,
"thermal_cooling");
if (result)
- dev_err(&device->dev, "Failed to create sysfs link "
- "'thermal_cooling'\n");
+ dev_err(&pdev->dev, "Failed to create sysfs link 'thermal_cooling'\n");
result = sysfs_create_link(&cdev->device.kobj,
- &device->dev.kobj,
+ &pdev->dev.kobj,
"device");
if (result)
- dev_err(&device->dev, "Failed to create sysfs link 'device'\n");
-
- dev_info(&device->dev, "ACPI: %s [%s] (%s)\n",
- acpi_device_name(device), acpi_device_bid(device),
- !device->power.state ? "on" : "off");
+ dev_err(&pdev->dev, "Failed to create sysfs link 'device'\n");
end:
return result;
}
-static int acpi_fan_remove(struct acpi_device *device)
+static int acpi_fan_remove(struct platform_device *pdev)
{
- struct thermal_cooling_device *cdev;
+ struct acpi_fan *fan = platform_get_drvdata(pdev);
- if (!device)
- return -EINVAL;
-
- cdev = acpi_driver_data(device);
- if (!cdev)
- return -EINVAL;
-
- sysfs_remove_link(&device->dev.kobj, "thermal_cooling");
- sysfs_remove_link(&cdev->device.kobj, "device");
- thermal_cooling_device_unregister(cdev);
+ sysfs_remove_link(&pdev->dev.kobj, "thermal_cooling");
+ sysfs_remove_link(&fan->cdev->device.kobj, "device");
+ thermal_cooling_device_unregister(fan->cdev);
return 0;
}
@@ -198,10 +386,11 @@
#ifdef CONFIG_PM_SLEEP
static int acpi_fan_suspend(struct device *dev)
{
- if (!dev)
- return -EINVAL;
+ struct acpi_fan *fan = dev_get_drvdata(dev);
+ if (fan->acpi4)
+ return 0;
- acpi_bus_set_power(to_acpi_device(dev)->handle, ACPI_STATE_D0);
+ acpi_device_set_power(ACPI_COMPANION(dev), ACPI_STATE_D0);
return AE_OK;
}
@@ -209,11 +398,12 @@
static int acpi_fan_resume(struct device *dev)
{
int result;
+ struct acpi_fan *fan = dev_get_drvdata(dev);
- if (!dev)
- return -EINVAL;
+ if (fan->acpi4)
+ return 0;
- result = acpi_bus_update_power(to_acpi_device(dev)->handle, NULL);
+ result = acpi_device_update_power(ACPI_COMPANION(dev), NULL);
if (result)
dev_err(dev, "Error updating fan power state\n");
@@ -221,4 +411,4 @@
}
#endif
-module_acpi_driver(acpi_fan_driver);
+module_platform_driver(acpi_fan_driver);
diff --git a/drivers/acpi/int340x_thermal.c b/drivers/acpi/int340x_thermal.c
new file mode 100644
index 0000000..a27d31d
--- /dev/null
+++ b/drivers/acpi/int340x_thermal.c
@@ -0,0 +1,51 @@
+/*
+ * ACPI support for int340x thermal drivers
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Zhang Rui <rui.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+#define DO_ENUMERATION 0x01
+static const struct acpi_device_id int340x_thermal_device_ids[] = {
+ {"INT3400", DO_ENUMERATION },
+ {"INT3401"},
+ {"INT3402"},
+ {"INT3403"},
+ {"INT3404"},
+ {"INT3406"},
+ {"INT3407"},
+ {"INT3408"},
+ {"INT3409"},
+ {"INT340A"},
+ {"INT340B"},
+ {""},
+};
+
+static int int340x_thermal_handler_attach(struct acpi_device *adev,
+ const struct acpi_device_id *id)
+{
+#if defined(CONFIG_INT340X_THERMAL) || defined(CONFIG_INT340X_THERMAL_MODULE)
+ if (id->driver_data == DO_ENUMERATION)
+ acpi_create_platform_device(adev);
+#endif
+ return 1;
+}
+
+static struct acpi_scan_handler int340x_thermal_handler = {
+ .ids = int340x_thermal_device_ids,
+ .attach = int340x_thermal_handler_attach,
+};
+
+void __init acpi_int340x_thermal_init(void)
+{
+ acpi_scan_add_handler(&int340x_thermal_handler);
+}
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 4c5cf77..447f6d6 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -31,6 +31,7 @@
void acpi_processor_init(void);
void acpi_platform_init(void);
void acpi_pnp_init(void);
+void acpi_int340x_thermal_init(void);
int acpi_sysfs_init(void);
void acpi_container_init(void);
void acpi_memory_hotplug_init(void);
@@ -103,8 +104,6 @@
int acpi_power_on_resources(struct acpi_device *device, int state);
int acpi_power_transition(struct acpi_device *device, int state);
-int acpi_device_update_power(struct acpi_device *device, int *state_p);
-
int acpi_wakeup_device_init(void);
#ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
@@ -168,13 +167,6 @@
#endif
/*--------------------------------------------------------------------------
- Platform bus support
- -------------------------------------------------------------------------- */
-struct platform_device;
-
-struct platform_device *acpi_create_platform_device(struct acpi_device *adev);
-
-/*--------------------------------------------------------------------------
Video
-------------------------------------------------------------------------- */
#if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index ae44d86..d670158 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1470,7 +1470,7 @@
if (ACPI_FAILURE(status))
return;
- wakeup->flags.run_wake = !!(event_status & ACPI_EVENT_FLAG_HANDLE);
+ wakeup->flags.run_wake = !!(event_status & ACPI_EVENT_FLAG_HAS_HANDLER);
}
static void acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
@@ -2315,6 +2315,7 @@
acpi_container_init();
acpi_memory_hotplug_init();
acpi_pnp_init();
+ acpi_int340x_thermal_init();
mutex_lock(&acpi_scan_lock);
/*
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 38cb978..13e577c 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -537,7 +537,7 @@
if (result)
goto end;
- if (!(status & ACPI_EVENT_FLAG_HANDLE))
+ if (!(status & ACPI_EVENT_FLAG_HAS_HANDLER))
size += sprintf(buf + size, " invalid");
else if (status & ACPI_EVENT_FLAG_ENABLED)
size += sprintf(buf + size, " enabled");
@@ -581,7 +581,7 @@
if (result)
goto end;
- if (!(status & ACPI_EVENT_FLAG_HANDLE)) {
+ if (!(status & ACPI_EVENT_FLAG_HAS_HANDLER)) {
printk(KERN_WARNING PREFIX
"Can not change Invalid GPE/Fixed Event status\n");
return -EINVAL;
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 112817e..d24fa19 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -528,7 +528,6 @@
}
/* sys I/F for generic thermal sysfs support */
-#define KELVIN_TO_MILLICELSIUS(t, off) (((t) - (off)) * 100)
static int thermal_get_temp(struct thermal_zone_device *thermal,
unsigned long *temp)
@@ -543,7 +542,8 @@
if (result)
return result;
- *temp = KELVIN_TO_MILLICELSIUS(tz->temperature, tz->kelvin_offset);
+ *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(tz->temperature,
+ tz->kelvin_offset);
return 0;
}
@@ -647,7 +647,7 @@
if (tz->trips.critical.flags.valid) {
if (!trip) {
- *temp = KELVIN_TO_MILLICELSIUS(
+ *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
tz->trips.critical.temperature,
tz->kelvin_offset);
return 0;
@@ -657,7 +657,7 @@
if (tz->trips.hot.flags.valid) {
if (!trip) {
- *temp = KELVIN_TO_MILLICELSIUS(
+ *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
tz->trips.hot.temperature,
tz->kelvin_offset);
return 0;
@@ -667,7 +667,7 @@
if (tz->trips.passive.flags.valid) {
if (!trip) {
- *temp = KELVIN_TO_MILLICELSIUS(
+ *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
tz->trips.passive.temperature,
tz->kelvin_offset);
return 0;
@@ -678,7 +678,7 @@
for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE &&
tz->trips.active[i].flags.valid; i++) {
if (!trip) {
- *temp = KELVIN_TO_MILLICELSIUS(
+ *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
tz->trips.active[i].temperature,
tz->kelvin_offset);
return 0;
@@ -694,7 +694,7 @@
struct acpi_thermal *tz = thermal->devdata;
if (tz->trips.critical.flags.valid) {
- *temperature = KELVIN_TO_MILLICELSIUS(
+ *temperature = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
tz->trips.critical.temperature,
tz->kelvin_offset);
return 0;
@@ -714,8 +714,8 @@
if (type == THERMAL_TRIP_ACTIVE) {
unsigned long trip_temp;
- unsigned long temp = KELVIN_TO_MILLICELSIUS(tz->temperature,
- tz->kelvin_offset);
+ unsigned long temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
+ tz->temperature, tz->kelvin_offset);
if (thermal_get_trip_temp(thermal, trip, &trip_temp))
return -EINVAL;
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 834f35c..371ac12 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -149,6 +149,21 @@
break;
}
break;
+ case ACPI_TYPE_LOCAL_REFERENCE:
+ switch (format_string[i]) {
+ case 'R':
+ size_required += sizeof(void *);
+ tail_offset += sizeof(void *);
+ break;
+ default:
+ printk(KERN_WARNING PREFIX "Invalid package element"
+ " [%d] got reference,"
+ " expecting [%c]\n",
+ i, format_string[i]);
+ return AE_BAD_DATA;
+ break;
+ }
+ break;
case ACPI_TYPE_PACKAGE:
default:
@@ -247,7 +262,18 @@
break;
}
break;
-
+ case ACPI_TYPE_LOCAL_REFERENCE:
+ switch (format_string[i]) {
+ case 'R':
+ *(void **)head =
+ (void *)element->reference.handle;
+ head += sizeof(void *);
+ break;
+ default:
+ /* Should never get here */
+ break;
+ }
+ break;
case ACPI_TYPE_PACKAGE:
/* TBD: handle nested packages... */
default:
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 82759ce..04645c0 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1106,7 +1106,7 @@
__mix_pool_bytes(r, hash.w, sizeof(hash.w));
spin_unlock_irqrestore(&r->lock, flags);
- memset(workspace, 0, sizeof(workspace));
+ memzero_explicit(workspace, sizeof(workspace));
/*
* In case the hash function has some recognizable output
@@ -1118,7 +1118,7 @@
hash.w[2] ^= rol32(hash.w[2], 16);
memcpy(out, &hash, EXTRACT_SIZE);
- memset(&hash, 0, sizeof(hash));
+ memzero_explicit(&hash, sizeof(hash));
}
/*
@@ -1175,7 +1175,7 @@
}
/* Wipe data just returned from memory */
- memset(tmp, 0, sizeof(tmp));
+ memzero_explicit(tmp, sizeof(tmp));
return ret;
}
@@ -1218,7 +1218,7 @@
}
/* Wipe data just returned from memory */
- memset(tmp, 0, sizeof(tmp));
+ memzero_explicit(tmp, sizeof(tmp));
return ret;
}
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 6bbb8b9..92c162a 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -18,6 +18,7 @@
#include <linux/cpu.h>
#include <linux/cpu_cooling.h>
#include <linux/cpufreq.h>
+#include <linux/cpufreq-dt.h>
#include <linux/cpumask.h>
#include <linux/err.h>
#include <linux/module.h>
@@ -146,8 +147,8 @@
goto try_again;
}
- dev_warn(cpu_dev, "failed to get cpu%d regulator: %ld\n",
- cpu, PTR_ERR(cpu_reg));
+ dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
+ cpu, PTR_ERR(cpu_reg));
}
cpu_clk = clk_get(cpu_dev, NULL);
@@ -178,6 +179,7 @@
static int cpufreq_init(struct cpufreq_policy *policy)
{
+ struct cpufreq_dt_platform_data *pd;
struct cpufreq_frequency_table *freq_table;
struct thermal_cooling_device *cdev;
struct device_node *np;
@@ -265,9 +267,18 @@
policy->driver_data = priv;
policy->clk = cpu_clk;
- ret = cpufreq_generic_init(policy, freq_table, transition_latency);
- if (ret)
+ ret = cpufreq_table_validate_and_show(policy, freq_table);
+ if (ret) {
+ dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
+ ret);
goto out_cooling_unregister;
+ }
+
+ policy->cpuinfo.transition_latency = transition_latency;
+
+ pd = cpufreq_get_driver_data();
+ if (pd && !pd->independent_clocks)
+ cpumask_setall(policy->cpus);
of_node_put(np);
@@ -335,6 +346,8 @@
if (!IS_ERR(cpu_reg))
regulator_put(cpu_reg);
+ dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
+
ret = cpufreq_register_driver(&dt_cpufreq_driver);
if (ret)
dev_err(cpu_dev, "failed register driver: %d\n", ret);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 24bf76f..644b54e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -512,7 +512,18 @@
show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
show_one(scaling_min_freq, min);
show_one(scaling_max_freq, max);
-show_one(scaling_cur_freq, cur);
+
+static ssize_t show_scaling_cur_freq(
+ struct cpufreq_policy *policy, char *buf)
+{
+ ssize_t ret;
+
+ if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
+ ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu));
+ else
+ ret = sprintf(buf, "%u\n", policy->cur);
+ return ret;
+}
static int cpufreq_set_policy(struct cpufreq_policy *policy,
struct cpufreq_policy *new_policy);
@@ -906,11 +917,11 @@
if (ret)
goto err_out_kobj_put;
}
- if (has_target()) {
- ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
- if (ret)
- goto err_out_kobj_put;
- }
+
+ ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
+ if (ret)
+ goto err_out_kobj_put;
+
if (cpufreq_driver->bios_limit) {
ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
if (ret)
@@ -1731,6 +1742,21 @@
}
EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
+/**
+ * cpufreq_get_driver_data - return current driver data
+ *
+ * Return the private data of the currently loaded cpufreq
+ * driver, or NULL if no cpufreq driver is loaded.
+ */
+void *cpufreq_get_driver_data(void)
+{
+ if (cpufreq_driver)
+ return cpufreq_driver->driver_data;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_driver_data);
+
/*********************************************************************
* NOTIFIER LISTS INTERFACE *
*********************************************************************/
diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
index ec399ad..1608f71 100644
--- a/drivers/cpufreq/highbank-cpufreq.c
+++ b/drivers/cpufreq/highbank-cpufreq.c
@@ -19,7 +19,7 @@
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/of.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
#include <linux/platform_device.h>
#define HB_CPUFREQ_CHANGE_NOTE 0x80000001
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 0668b38..27bb6d3 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -52,6 +52,17 @@
return div_s64((int64_t)x << FRAC_BITS, y);
}
+static inline int ceiling_fp(int32_t x)
+{
+ int mask, ret;
+
+ ret = fp_toint(x);
+ mask = (1 << FRAC_BITS) - 1;
+ if (x & mask)
+ ret += 1;
+ return ret;
+}
+
struct sample {
int32_t core_pct_busy;
u64 aperf;
@@ -64,6 +75,7 @@
int current_pstate;
int min_pstate;
int max_pstate;
+ int scaling;
int turbo_pstate;
};
@@ -113,6 +125,7 @@
int (*get_max)(void);
int (*get_min)(void);
int (*get_turbo)(void);
+ int (*get_scaling)(void);
void (*set)(struct cpudata*, int pstate);
void (*get_vid)(struct cpudata *);
};
@@ -138,6 +151,7 @@
static struct perf_limits limits = {
.no_turbo = 0,
+ .turbo_disabled = 0,
.max_perf_pct = 100,
.max_perf = int_tofp(1),
.min_perf_pct = 0,
@@ -218,6 +232,18 @@
}
}
+static inline void update_turbo_state(void)
+{
+ u64 misc_en;
+ struct cpudata *cpu;
+
+ cpu = all_cpu_data[0];
+ rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
+ limits.turbo_disabled =
+ (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
+ cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
+}
+
/************************** debugfs begin ************************/
static int pid_param_set(void *data, u64 val)
{
@@ -274,6 +300,20 @@
return sprintf(buf, "%u\n", limits.object); \
}
+static ssize_t show_no_turbo(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ ssize_t ret;
+
+ update_turbo_state();
+ if (limits.turbo_disabled)
+ ret = sprintf(buf, "%u\n", limits.turbo_disabled);
+ else
+ ret = sprintf(buf, "%u\n", limits.no_turbo);
+
+ return ret;
+}
+
static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
const char *buf, size_t count)
{
@@ -283,11 +323,14 @@
ret = sscanf(buf, "%u", &input);
if (ret != 1)
return -EINVAL;
- limits.no_turbo = clamp_t(int, input, 0 , 1);
+
+ update_turbo_state();
if (limits.turbo_disabled) {
pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
- limits.no_turbo = limits.turbo_disabled;
+ return -EPERM;
}
+ limits.no_turbo = clamp_t(int, input, 0, 1);
+
return count;
}
@@ -323,7 +366,6 @@
return count;
}
-show_one(no_turbo, no_turbo);
show_one(max_perf_pct, max_perf_pct);
show_one(min_perf_pct, min_perf_pct);
@@ -394,7 +436,7 @@
cpudata->vid.ratio);
vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
- vid = fp_toint(vid_fp);
+ vid = ceiling_fp(vid_fp);
if (pstate > cpudata->pstate.max_pstate)
vid = cpudata->vid.turbo;
@@ -404,6 +446,22 @@
wrmsrl(MSR_IA32_PERF_CTL, val);
}
+#define BYT_BCLK_FREQS 5
+static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
+
+static int byt_get_scaling(void)
+{
+ u64 value;
+ int i;
+
+ rdmsrl(MSR_FSB_FREQ, value);
+ i = value & 0x3;
+
+ BUG_ON(i > BYT_BCLK_FREQS);
+
+ return byt_freq_table[i] * 100;
+}
+
static void byt_get_vid(struct cpudata *cpudata)
{
u64 value;
@@ -449,6 +507,11 @@
return ret;
}
+static inline int core_get_scaling(void)
+{
+ return 100000;
+}
+
static void core_set_pstate(struct cpudata *cpudata, int pstate)
{
u64 val;
@@ -473,6 +536,7 @@
.get_max = core_get_max_pstate,
.get_min = core_get_min_pstate,
.get_turbo = core_get_turbo_pstate,
+ .get_scaling = core_get_scaling,
.set = core_set_pstate,
},
};
@@ -491,6 +555,7 @@
.get_min = byt_get_min_pstate,
.get_turbo = byt_get_turbo_pstate,
.set = byt_set_pstate,
+ .get_scaling = byt_get_scaling,
.get_vid = byt_get_vid,
},
};
@@ -501,7 +566,7 @@
int max_perf_adj;
int min_perf;
- if (limits.no_turbo)
+ if (limits.no_turbo || limits.turbo_disabled)
max_perf = cpu->pstate.max_pstate;
max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
@@ -516,6 +581,8 @@
{
int max_perf, min_perf;
+ update_turbo_state();
+
intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
pstate = clamp_t(int, pstate, min_perf, max_perf);
@@ -523,7 +590,7 @@
if (pstate == cpu->pstate.current_pstate)
return;
- trace_cpu_frequency(pstate * 100000, cpu->cpu);
+ trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
cpu->pstate.current_pstate = pstate;
@@ -535,6 +602,7 @@
cpu->pstate.min_pstate = pstate_funcs.get_min();
cpu->pstate.max_pstate = pstate_funcs.get_max();
cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
+ cpu->pstate.scaling = pstate_funcs.get_scaling();
if (pstate_funcs.get_vid)
pstate_funcs.get_vid(cpu);
@@ -550,7 +618,9 @@
core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
sample->freq = fp_toint(
- mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct));
+ mul_fp(int_tofp(
+ cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
+ core_pct));
sample->core_pct_busy = (int32_t)core_pct;
}
@@ -671,7 +741,9 @@
{
struct cpudata *cpu;
- all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
+ if (!all_cpu_data[cpunum])
+ all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
+ GFP_KERNEL);
if (!all_cpu_data[cpunum])
return -ENOMEM;
@@ -714,9 +786,10 @@
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
limits.min_perf_pct = 100;
limits.min_perf = int_tofp(1);
+ limits.max_policy_pct = 100;
limits.max_perf_pct = 100;
limits.max_perf = int_tofp(1);
- limits.no_turbo = limits.turbo_disabled;
+ limits.no_turbo = 0;
return 0;
}
limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
@@ -751,15 +824,12 @@
del_timer_sync(&all_cpu_data[cpu_num]->timer);
intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
- kfree(all_cpu_data[cpu_num]);
- all_cpu_data[cpu_num] = NULL;
}
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
{
struct cpudata *cpu;
int rc;
- u64 misc_en;
rc = intel_pstate_init_cpu(policy->cpu);
if (rc)
@@ -767,23 +837,18 @@
cpu = all_cpu_data[policy->cpu];
- rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
- if (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
- cpu->pstate.max_pstate == cpu->pstate.turbo_pstate) {
- limits.turbo_disabled = 1;
- limits.no_turbo = 1;
- }
if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
else
policy->policy = CPUFREQ_POLICY_POWERSAVE;
- policy->min = cpu->pstate.min_pstate * 100000;
- policy->max = cpu->pstate.turbo_pstate * 100000;
+ policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
+ policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
/* cpuinfo and default policy values */
- policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000;
- policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000;
+ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
+ policy->cpuinfo.max_freq =
+ cpu->pstate.turbo_pstate * cpu->pstate.scaling;
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
cpumask_set_cpu(policy->cpu, policy->cpus);
@@ -841,6 +906,7 @@
pstate_funcs.get_max = funcs->get_max;
pstate_funcs.get_min = funcs->get_min;
pstate_funcs.get_turbo = funcs->get_turbo;
+ pstate_funcs.get_scaling = funcs->get_scaling;
pstate_funcs.set = funcs->set;
pstate_funcs.get_vid = funcs->get_vid;
}
diff --git a/drivers/cpuidle/Kconfig.mips b/drivers/cpuidle/Kconfig.mips
index 0e70ee2..4102be0 100644
--- a/drivers/cpuidle/Kconfig.mips
+++ b/drivers/cpuidle/Kconfig.mips
@@ -3,7 +3,7 @@
#
config MIPS_CPS_CPUIDLE
bool "CPU Idle driver for MIPS CPS platforms"
- depends on CPU_IDLE
+ depends on CPU_IDLE && MIPS_CPS
depends on SYS_SUPPORTS_MIPS_CPS
select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index a64be57..7d3a349 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -163,7 +163,8 @@
int nr_idle_states = 1; /* Snooze */
int dt_idle_states;
const __be32 *idle_state_flags;
- u32 len_flags, flags;
+ const __be32 *idle_state_latency;
+ u32 len_flags, flags, latency_ns;
int i;
/* Currently we have snooze statically defined */
@@ -180,18 +181,32 @@
return nr_idle_states;
}
+ idle_state_latency = of_get_property(power_mgt,
+ "ibm,cpu-idle-state-latencies-ns", NULL);
+ if (!idle_state_latency) {
+ pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-latencies-ns\n");
+ return nr_idle_states;
+ }
+
dt_idle_states = len_flags / sizeof(u32);
for (i = 0; i < dt_idle_states; i++) {
flags = be32_to_cpu(idle_state_flags[i]);
+
+ /* Cpuidle accepts exit_latency in us and we estimate
+ * target residency to be 10x exit_latency
+ */
+ latency_ns = be32_to_cpu(idle_state_latency[i]);
if (flags & IDLE_USE_INST_NAP) {
/* Add NAP state */
strcpy(powernv_states[nr_idle_states].name, "Nap");
strcpy(powernv_states[nr_idle_states].desc, "Nap");
powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID;
- powernv_states[nr_idle_states].exit_latency = 10;
- powernv_states[nr_idle_states].target_residency = 100;
+ powernv_states[nr_idle_states].exit_latency =
+ ((unsigned int)latency_ns) / 1000;
+ powernv_states[nr_idle_states].target_residency =
+ ((unsigned int)latency_ns / 100);
powernv_states[nr_idle_states].enter = &nap_loop;
nr_idle_states++;
}
@@ -202,8 +217,10 @@
strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
powernv_states[nr_idle_states].flags =
CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP;
- powernv_states[nr_idle_states].exit_latency = 300;
- powernv_states[nr_idle_states].target_residency = 1000000;
+ powernv_states[nr_idle_states].exit_latency =
+ ((unsigned int)latency_ns) / 1000;
+ powernv_states[nr_idle_states].target_residency =
+ ((unsigned int)latency_ns / 100);
powernv_states[nr_idle_states].enter = &fastsleep_loop;
nr_idle_states++;
}
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 64ecbb5..8590099 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -41,6 +41,28 @@
};
EXPORT_SYMBOL(efi);
+static bool disable_runtime;
+static int __init setup_noefi(char *arg)
+{
+ disable_runtime = true;
+ return 0;
+}
+early_param("noefi", setup_noefi);
+
+bool efi_runtime_disabled(void)
+{
+ return disable_runtime;
+}
+
+static int __init parse_efi_cmdline(char *str)
+{
+ if (parse_option_str(str, "noruntime"))
+ disable_runtime = true;
+
+ return 0;
+}
+early_param("efi", parse_efi_cmdline);
+
static struct kobject *efi_kobj;
static struct kobject *efivars_kobj;
@@ -423,3 +445,60 @@
return ret;
}
#endif /* CONFIG_EFI_PARAMS_FROM_FDT */
+
+static __initdata char memory_type_name[][20] = {
+ "Reserved",
+ "Loader Code",
+ "Loader Data",
+ "Boot Code",
+ "Boot Data",
+ "Runtime Code",
+ "Runtime Data",
+ "Conventional Memory",
+ "Unusable Memory",
+ "ACPI Reclaim Memory",
+ "ACPI Memory NVS",
+ "Memory Mapped I/O",
+ "MMIO Port Space",
+ "PAL Code"
+};
+
+char * __init efi_md_typeattr_format(char *buf, size_t size,
+ const efi_memory_desc_t *md)
+{
+ char *pos;
+ int type_len;
+ u64 attr;
+
+ pos = buf;
+ if (md->type >= ARRAY_SIZE(memory_type_name))
+ type_len = snprintf(pos, size, "[type=%u", md->type);
+ else
+ type_len = snprintf(pos, size, "[%-*s",
+ (int)(sizeof(memory_type_name[0]) - 1),
+ memory_type_name[md->type]);
+ if (type_len >= size)
+ return buf;
+
+ pos += type_len;
+ size -= type_len;
+
+ attr = md->attribute;
+ if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
+ EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_WP |
+ EFI_MEMORY_RP | EFI_MEMORY_XP | EFI_MEMORY_RUNTIME))
+ snprintf(pos, size, "|attr=0x%016llx]",
+ (unsigned long long)attr);
+ else
+ snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+ attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
+ attr & EFI_MEMORY_XP ? "XP" : "",
+ attr & EFI_MEMORY_RP ? "RP" : "",
+ attr & EFI_MEMORY_WP ? "WP" : "",
+ attr & EFI_MEMORY_UCE ? "UCE" : "",
+ attr & EFI_MEMORY_WB ? "WB" : "",
+ attr & EFI_MEMORY_WT ? "WT" : "",
+ attr & EFI_MEMORY_WC ? "WC" : "",
+ attr & EFI_MEMORY_UC ? "UC" : "");
+ return buf;
+}
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 480339b..75ee059 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -226,6 +226,10 @@
goto fail_free_image;
}
+ status = efi_parse_options(cmdline_ptr);
+ if (status != EFI_SUCCESS)
+ pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n");
+
/*
* Unauthenticated device tree data is a security hazard, so
* ignore 'dtb=' unless UEFI Secure Boot is disabled.
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 32d5cca..a920fec 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -15,8 +15,23 @@
#include "efistub.h"
+/*
+ * Some firmware implementations have problems reading files in one go.
+ * A read chunk size of 1MB seems to work for most platforms.
+ *
+ * Unfortunately, reading files in chunks triggers *other* bugs on some
+ * platforms, so we provide a way to disable this workaround, which can
+ * be done by passing "efi=nochunk" on the EFI boot stub command line.
+ *
+ * If you experience issues with initrd images being corrupt it's worth
+ * trying efi=nochunk, but chunking is enabled by default because there
+ * are far more machines that require the workaround than those that
+ * break with it enabled.
+ */
#define EFI_READ_CHUNK_SIZE (1024 * 1024)
+static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
+
struct file_info {
efi_file_handle_t *handle;
u64 size;
@@ -281,6 +296,49 @@
efi_call_early(free_pages, addr, nr_pages);
}
+/*
+ * Parse the ASCII string 'cmdline' for EFI options, denoted by the efi=
+ * option, e.g. efi=nochunk.
+ *
+ * It should be noted that efi= is parsed in two very different
+ * environments, first in the early boot environment of the EFI boot
+ * stub, and subsequently during the kernel boot.
+ */
+efi_status_t efi_parse_options(char *cmdline)
+{
+ char *str;
+
+ /*
+ * If no EFI parameters were specified on the cmdline we've got
+ * nothing to do.
+ */
+ str = strstr(cmdline, "efi=");
+ if (!str)
+ return EFI_SUCCESS;
+
+ /* Skip ahead to first argument */
+ str += strlen("efi=");
+
+ /*
+ * Remember, because efi= is also used by the kernel we need to
+ * skip over arguments we don't understand.
+ */
+ while (*str) {
+ if (!strncmp(str, "nochunk", 7)) {
+ str += strlen("nochunk");
+ __chunk_size = -1UL;
+ }
+
+ /* Group words together, delimited by "," */
+ while (*str && *str != ',')
+ str++;
+
+ if (*str == ',')
+ str++;
+ }
+
+ return EFI_SUCCESS;
+}
/*
* Check the cmdline for a LILO-style file= arguments.
@@ -423,8 +481,8 @@
size = files[j].size;
while (size) {
unsigned long chunksize;
- if (size > EFI_READ_CHUNK_SIZE)
- chunksize = EFI_READ_CHUNK_SIZE;
+ if (size > __chunk_size)
+ chunksize = __chunk_size;
else
chunksize = size;
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 10daa4b..228bbf9 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -14,11 +14,80 @@
* This file is released under the GPLv2.
*/
+#include <linux/bug.h>
#include <linux/efi.h>
-#include <linux/spinlock.h> /* spinlock_t */
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include <asm/efi.h>
/*
+ * According to section 7.1 of the UEFI spec, Runtime Services are not fully
+ * reentrant, and there are particular combinations of calls that need to be
+ * serialized. (source: UEFI Specification v2.4A)
+ *
+ * Table 31. Rules for Reentry Into Runtime Services
+ * +------------------------------------+-------------------------------+
+ * | If previous call is busy in | Forbidden to call |
+ * +------------------------------------+-------------------------------+
+ * | Any | SetVirtualAddressMap() |
+ * +------------------------------------+-------------------------------+
+ * | ConvertPointer() | ConvertPointer() |
+ * +------------------------------------+-------------------------------+
+ * | SetVariable() | ResetSystem() |
+ * | UpdateCapsule() | |
+ * | SetTime() | |
+ * | SetWakeupTime() | |
+ * | GetNextHighMonotonicCount() | |
+ * +------------------------------------+-------------------------------+
+ * | GetVariable() | GetVariable() |
+ * | GetNextVariableName() | GetNextVariableName() |
+ * | SetVariable() | SetVariable() |
+ * | QueryVariableInfo() | QueryVariableInfo() |
+ * | UpdateCapsule() | UpdateCapsule() |
+ * | QueryCapsuleCapabilities() | QueryCapsuleCapabilities() |
+ * | GetNextHighMonotonicCount() | GetNextHighMonotonicCount() |
+ * +------------------------------------+-------------------------------+
+ * | GetTime() | GetTime() |
+ * | SetTime() | SetTime() |
+ * | GetWakeupTime() | GetWakeupTime() |
+ * | SetWakeupTime() | SetWakeupTime() |
+ * +------------------------------------+-------------------------------+
+ *
+ * Due to the fact that the EFI pstore may write to the variable store in
+ * interrupt context, we need to use a spinlock for at least the groups that
+ * contain SetVariable() and QueryVariableInfo(). That leaves little else, as
+ * none of the remaining functions are actually ever called at runtime.
+ * So let's just use a single spinlock to serialize all Runtime Services calls.
+ */
+static DEFINE_SPINLOCK(efi_runtime_lock);
+
+/*
+ * Some runtime services calls can be reentrant under NMI, even if the table
+ * above says they are not. (source: UEFI Specification v2.4A)
+ *
+ * Table 32. Functions that may be called after Machine Check, INIT and NMI
+ * +----------------------------+------------------------------------------+
+ * | Function | Called after Machine Check, INIT and NMI |
+ * +----------------------------+------------------------------------------+
+ * | GetTime() | Yes, even if previously busy. |
+ * | GetVariable() | Yes, even if previously busy |
+ * | GetNextVariableName() | Yes, even if previously busy |
+ * | QueryVariableInfo() | Yes, even if previously busy |
+ * | SetVariable() | Yes, even if previously busy |
+ * | UpdateCapsule() | Yes, even if previously busy |
+ * | QueryCapsuleCapabilities() | Yes, even if previously busy |
+ * | ResetSystem() | Yes, even if previously busy |
+ * +----------------------------+------------------------------------------+
+ *
+ * In order to prevent deadlocks under NMI, the wrappers for these functions
+ * may only grab the efi_runtime_lock or rtc_lock spinlocks if !efi_in_nmi().
+ * However, not all of the services listed are reachable through NMI code paths,
+ * so the the special handling as suggested by the UEFI spec is only implemented
+ * for QueryVariableInfo() and SetVariable(), as these can be reached in NMI
+ * context through efi_pstore_write().
+ */
+
+/*
* As per commit ef68c8f87ed1 ("x86: Serialize EFI time accesses on rtc_lock"),
* the EFI specification requires that callers of the time related runtime
* functions serialize with other CMOS accesses in the kernel, as the EFI time
@@ -32,7 +101,9 @@
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
+ spin_lock(&efi_runtime_lock);
status = efi_call_virt(get_time, tm, tc);
+ spin_unlock(&efi_runtime_lock);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -43,7 +114,9 @@
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
+ spin_lock(&efi_runtime_lock);
status = efi_call_virt(set_time, tm);
+ spin_unlock(&efi_runtime_lock);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -56,7 +129,9 @@
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
+ spin_lock(&efi_runtime_lock);
status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
+ spin_unlock(&efi_runtime_lock);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -67,7 +142,9 @@
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
+ spin_lock(&efi_runtime_lock);
status = efi_call_virt(set_wakeup_time, enabled, tm);
+ spin_unlock(&efi_runtime_lock);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -78,14 +155,27 @@
unsigned long *data_size,
void *data)
{
- return efi_call_virt(get_variable, name, vendor, attr, data_size, data);
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+ status = efi_call_virt(get_variable, name, vendor, attr, data_size,
+ data);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ return status;
}
static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
- return efi_call_virt(get_next_variable, name_size, name, vendor);
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+ status = efi_call_virt(get_next_variable, name_size, name, vendor);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ return status;
}
static efi_status_t virt_efi_set_variable(efi_char16_t *name,
@@ -94,24 +184,61 @@
unsigned long data_size,
void *data)
{
- return efi_call_virt(set_variable, name, vendor, attr, data_size, data);
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+ status = efi_call_virt(set_variable, name, vendor, attr, data_size,
+ data);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ return status;
}
+static efi_status_t
+virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
+ u32 attr, unsigned long data_size,
+ void *data)
+{
+ unsigned long flags;
+ efi_status_t status;
+
+ if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+ return EFI_NOT_READY;
+
+ status = efi_call_virt(set_variable, name, vendor, attr, data_size,
+ data);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ return status;
+}
+
+
static efi_status_t virt_efi_query_variable_info(u32 attr,
u64 *storage_space,
u64 *remaining_space,
u64 *max_variable_size)
{
+ unsigned long flags;
+ efi_status_t status;
+
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- return efi_call_virt(query_variable_info, attr, storage_space,
- remaining_space, max_variable_size);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+ status = efi_call_virt(query_variable_info, attr, storage_space,
+ remaining_space, max_variable_size);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ return status;
}
static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
{
- return efi_call_virt(get_next_high_mono_count, count);
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+ status = efi_call_virt(get_next_high_mono_count, count);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ return status;
}
static void virt_efi_reset_system(int reset_type,
@@ -119,17 +246,27 @@
unsigned long data_size,
efi_char16_t *data)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
__efi_call_virt(reset_system, reset_type, status, data_size, data);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
}
static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
unsigned long count,
unsigned long sg_list)
{
+ unsigned long flags;
+ efi_status_t status;
+
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- return efi_call_virt(update_capsule, capsules, count, sg_list);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+ status = efi_call_virt(update_capsule, capsules, count, sg_list);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ return status;
}
static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
@@ -137,11 +274,17 @@
u64 *max_size,
int *reset_type)
{
+ unsigned long flags;
+ efi_status_t status;
+
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- return efi_call_virt(query_capsule_caps, capsules, count, max_size,
- reset_type);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+ status = efi_call_virt(query_capsule_caps, capsules, count, max_size,
+ reset_type);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ return status;
}
void efi_native_runtime_setup(void)
@@ -153,6 +296,7 @@
efi.get_variable = virt_efi_get_variable;
efi.get_next_variable = virt_efi_get_next_variable;
efi.set_variable = virt_efi_set_variable;
+ efi.set_variable_nonblocking = virt_efi_set_variable_nonblocking;
efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
efi.reset_system = virt_efi_reset_system;
efi.query_variable_info = virt_efi_query_variable_info;
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 5abe943..70a0fb1 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -321,11 +321,11 @@
* Print a warning when duplicate EFI variables are encountered and
* disable the sysfs workqueue since the firmware is buggy.
*/
-static void dup_variable_bug(efi_char16_t *s16, efi_guid_t *vendor_guid,
+static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid,
unsigned long len16)
{
size_t i, len8 = len16 / sizeof(efi_char16_t);
- char *s8;
+ char *str8;
/*
* Disable the workqueue since the algorithm it uses for
@@ -334,16 +334,16 @@
*/
efivar_wq_enabled = false;
- s8 = kzalloc(len8, GFP_KERNEL);
- if (!s8)
+ str8 = kzalloc(len8, GFP_KERNEL);
+ if (!str8)
return;
for (i = 0; i < len8; i++)
- s8[i] = s16[i];
+ str8[i] = str16[i];
printk(KERN_WARNING "efivars: duplicate variable: %s-%pUl\n",
- s8, vendor_guid);
- kfree(s8);
+ str8, vendor_guid);
+ kfree(str8);
}
/**
@@ -595,6 +595,39 @@
}
EXPORT_SYMBOL_GPL(efivar_entry_set);
+/*
+ * efivar_entry_set_nonblocking - call set_variable_nonblocking()
+ *
+ * This function is guaranteed to not block and is suitable for calling
+ * from crash/panic handlers.
+ *
+ * Crucially, this function will not block if it cannot acquire
+ * __efivars->lock. Instead, it returns -EBUSY.
+ */
+static int
+efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor,
+ u32 attributes, unsigned long size, void *data)
+{
+ const struct efivar_operations *ops = __efivars->ops;
+ unsigned long flags;
+ efi_status_t status;
+
+ if (!spin_trylock_irqsave(&__efivars->lock, flags))
+ return -EBUSY;
+
+ status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
+ if (status != EFI_SUCCESS) {
+ spin_unlock_irqrestore(&__efivars->lock, flags);
+ return -ENOSPC;
+ }
+
+ status = ops->set_variable_nonblocking(name, &vendor, attributes,
+ size, data);
+
+ spin_unlock_irqrestore(&__efivars->lock, flags);
+ return efi_status_to_err(status);
+}
+
/**
* efivar_entry_set_safe - call set_variable() if enough space in firmware
* @name: buffer containing the variable name
@@ -622,6 +655,20 @@
if (!ops->query_variable_store)
return -ENOSYS;
+ /*
+ * If the EFI variable backend provides a non-blocking
+ * ->set_variable() operation and we're in a context where we
+ * cannot block, then we need to use it to avoid live-locks,
+ * since the implication is that the regular ->set_variable()
+ * will block.
+ *
+ * If no ->set_variable_nonblocking() is provided then
+ * ->set_variable() is assumed to be non-blocking.
+ */
+ if (!block && ops->set_variable_nonblocking)
+ return efivar_entry_set_nonblocking(name, vendor, attributes,
+ size, data);
+
if (!block) {
if (!spin_trylock_irqsave(&__efivars->lock, flags))
return -EBUSY;
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c
index e705335..c2a1cba 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.c
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
@@ -32,6 +32,8 @@
static const struct pci_device_id pciidlist[] = {
{ PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, 0x1af4, 0x1100, 0,
0, 0 },
+ { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, PCI_VENDOR_ID_XEN,
+ 0x0001, 0, 0, 0 },
{0,}
};
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3201986..f66392b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1711,7 +1711,7 @@
#define HPD_STORM_DETECT_PERIOD 1000
#define HPD_STORM_THRESHOLD 5
-static int ilk_port_to_hotplug_shift(enum port port)
+static int pch_port_to_hotplug_shift(enum port port)
{
switch (port) {
case PORT_A:
@@ -1727,7 +1727,7 @@
}
}
-static int g4x_port_to_hotplug_shift(enum port port)
+static int i915_port_to_hotplug_shift(enum port port)
{
switch (port) {
case PORT_A:
@@ -1785,12 +1785,12 @@
if (port && dev_priv->hpd_irq_port[port]) {
bool long_hpd;
- if (IS_G4X(dev)) {
- dig_shift = g4x_port_to_hotplug_shift(port);
- long_hpd = (hotplug_trigger >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT;
- } else {
- dig_shift = ilk_port_to_hotplug_shift(port);
+ if (HAS_PCH_SPLIT(dev)) {
+ dig_shift = pch_port_to_hotplug_shift(port);
long_hpd = (dig_hotplug_reg >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT;
+ } else {
+ dig_shift = i915_port_to_hotplug_shift(port);
+ long_hpd = (hotplug_trigger >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT;
}
DRM_DEBUG_DRIVER("digital hpd port %c - %s\n",
@@ -3458,12 +3458,13 @@
void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv)
{
unsigned long irqflags;
+ uint32_t extra_ier = GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN;
spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
GEN8_IRQ_INIT_NDX(DE_PIPE, PIPE_B, dev_priv->de_irq_mask[PIPE_B],
- ~dev_priv->de_irq_mask[PIPE_B]);
+ ~dev_priv->de_irq_mask[PIPE_B] | extra_ier);
GEN8_IRQ_INIT_NDX(DE_PIPE, PIPE_C, dev_priv->de_irq_mask[PIPE_C],
- ~dev_priv->de_irq_mask[PIPE_C]);
+ ~dev_priv->de_irq_mask[PIPE_C] | extra_ier);
spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5073705..c9e2209 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -73,9 +73,6 @@
DRM_FORMAT_ARGB8888,
};
-#define DIV_ROUND_CLOSEST_ULL(ll, d) \
-({ unsigned long long _tmp = (ll)+(d)/2; do_div(_tmp, d); _tmp; })
-
static void intel_increase_pllclock(struct drm_device *dev,
enum pipe pipe);
static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
@@ -12357,27 +12354,36 @@
if (I915_READ(PCH_DP_D) & DP_DETECTED)
intel_dp_init(dev, PCH_DP_D, PORT_D);
} else if (IS_VALLEYVIEW(dev)) {
- if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIB) & SDVO_DETECTED) {
+ /*
+ * The DP_DETECTED bit is the latched state of the DDC
+ * SDA pin at boot. However since eDP doesn't require DDC
+ * (no way to plug in a DP->HDMI dongle) the DDC pins for
+ * eDP ports may have been muxed to an alternate function.
+ * Thus we can't rely on the DP_DETECTED bit alone to detect
+ * eDP ports. Consult the VBT as well as DP_DETECTED to
+ * detect eDP ports.
+ */
+ if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIB) & SDVO_DETECTED)
intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIB,
PORT_B);
- if (I915_READ(VLV_DISPLAY_BASE + DP_B) & DP_DETECTED)
- intel_dp_init(dev, VLV_DISPLAY_BASE + DP_B, PORT_B);
- }
+ if (I915_READ(VLV_DISPLAY_BASE + DP_B) & DP_DETECTED ||
+ intel_dp_is_edp(dev, PORT_B))
+ intel_dp_init(dev, VLV_DISPLAY_BASE + DP_B, PORT_B);
- if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIC) & SDVO_DETECTED) {
+ if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIC) & SDVO_DETECTED)
intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIC,
PORT_C);
- if (I915_READ(VLV_DISPLAY_BASE + DP_C) & DP_DETECTED)
- intel_dp_init(dev, VLV_DISPLAY_BASE + DP_C, PORT_C);
- }
+ if (I915_READ(VLV_DISPLAY_BASE + DP_C) & DP_DETECTED ||
+ intel_dp_is_edp(dev, PORT_C))
+ intel_dp_init(dev, VLV_DISPLAY_BASE + DP_C, PORT_C);
if (IS_CHERRYVIEW(dev)) {
- if (I915_READ(VLV_DISPLAY_BASE + CHV_HDMID) & SDVO_DETECTED) {
+ if (I915_READ(VLV_DISPLAY_BASE + CHV_HDMID) & SDVO_DETECTED)
intel_hdmi_init(dev, VLV_DISPLAY_BASE + CHV_HDMID,
PORT_D);
- if (I915_READ(VLV_DISPLAY_BASE + DP_D) & DP_DETECTED)
- intel_dp_init(dev, VLV_DISPLAY_BASE + DP_D, PORT_D);
- }
+ /* eDP not supported on port D, so don't check VBT */
+ if (I915_READ(VLV_DISPLAY_BASE + DP_D) & DP_DETECTED)
+ intel_dp_init(dev, VLV_DISPLAY_BASE + DP_D, PORT_D);
}
intel_dsi_init(dev);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 07ce046..ba71522 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -35,6 +35,9 @@
#include <drm/drm_fb_helper.h>
#include <drm/drm_dp_mst_helper.h>
+#define DIV_ROUND_CLOSEST_ULL(ll, d) \
+({ unsigned long long _tmp = (ll)+(d)/2; do_div(_tmp, d); _tmp; })
+
/**
* _wait_for - magic (register) wait macro
*
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 1878447..0e018cb 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -419,9 +419,8 @@
source_val = clamp(source_val, source_min, source_max);
/* avoid overflows */
- target_val = (uint64_t)(source_val - source_min) *
- (target_max - target_min);
- do_div(target_val, source_max - source_min);
+ target_val = DIV_ROUND_CLOSEST_ULL((uint64_t)(source_val - source_min) *
+ (target_max - target_min), source_max - source_min);
target_val += target_min;
return target_val;
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv50.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv50.c
index 552fdbd..1d0e33f 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv50.c
@@ -113,6 +113,8 @@
#define IS_NVA3F(x) (((x) > 0xa0 && (x) < 0xaa) || (x) == 0xaf)
#define IS_NVAAF(x) ((x) >= 0xaa && (x) <= 0xac)
+#include <subdev/fb.h>
+
/*
* This code deals with PGRAPH contexts on NV50 family cards. Like NV40, it's
* the GPU itself that does context-switching, but it needs a special
@@ -569,8 +571,12 @@
gr_def(ctx, 0x407d08, 0x00010040);
else if (device->chipset < 0xa0)
gr_def(ctx, 0x407d08, 0x00390040);
- else
- gr_def(ctx, 0x407d08, 0x003d0040);
+ else {
+ if (nouveau_fb(device)->ram->type != NV_MEM_TYPE_GDDR5)
+ gr_def(ctx, 0x407d08, 0x003d0040);
+ else
+ gr_def(ctx, 0x407d08, 0x003c0040);
+ }
gr_def(ctx, 0x407d0c, 0x00000022);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 589dbb5..fd3dbd5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -400,15 +400,20 @@
struct nouveau_channel **pchan)
{
struct nouveau_cli *cli = (void *)nvif_client(&device->base);
+ bool super;
int ret;
+ /* hack until fencenv50 is fixed, and agp access relaxed */
+ super = cli->base.super;
+ cli->base.super = true;
+
ret = nouveau_channel_ind(drm, device, handle, arg0, pchan);
if (ret) {
NV_PRINTK(debug, cli, "ib channel create, %d\n", ret);
ret = nouveau_channel_dma(drm, device, handle, pchan);
if (ret) {
NV_PRINTK(debug, cli, "dma channel create, %d\n", ret);
- return ret;
+ goto done;
}
}
@@ -416,8 +421,9 @@
if (ret) {
NV_PRINTK(error, cli, "channel failed to initialise, %d\n", ret);
nouveau_channel_del(pchan);
- return ret;
}
- return 0;
+done:
+ cli->base.super = super;
+ return ret;
}
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index af9e785..0d13962 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -572,7 +572,6 @@
struct qxl_framebuffer *qfb;
struct qxl_bo *bo, *old_bo = NULL;
struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
- uint32_t width, height, base_offset;
bool recreate_primary = false;
int ret;
int surf_id;
@@ -602,9 +601,10 @@
if (qcrtc->index == 0)
recreate_primary = true;
- width = mode->hdisplay;
- height = mode->vdisplay;
- base_offset = 0;
+ if (bo->surf.stride * bo->surf.height > qdev->vram_size) {
+ DRM_ERROR("Mode doesn't fit in vram size (vgamem)");
+ return -EINVAL;
+ }
ret = qxl_bo_reserve(bo, false);
if (ret != 0)
@@ -618,10 +618,10 @@
if (recreate_primary) {
qxl_io_destroy_primary(qdev);
qxl_io_log(qdev,
- "recreate primary: %dx%d (was %dx%d,%d,%d)\n",
- width, height, bo->surf.width,
- bo->surf.height, bo->surf.stride, bo->surf.format);
- qxl_io_create_primary(qdev, base_offset, bo);
+ "recreate primary: %dx%d,%d,%d\n",
+ bo->surf.width, bo->surf.height,
+ bo->surf.stride, bo->surf.format);
+ qxl_io_create_primary(qdev, 0, bo);
bo->is_primary = true;
}
diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c
index 300d971..0b2929d 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.c
+++ b/drivers/gpu/drm/radeon/btc_dpm.c
@@ -24,6 +24,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "btcd.h"
#include "r600_dpm.h"
#include "cypress_dpm.h"
@@ -1170,6 +1171,23 @@
{ 25000, 30000, RADEON_SCLK_UP }
};
+void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table,
+ u32 *max_clock)
+{
+ u32 i, clock = 0;
+
+ if ((table == NULL) || (table->count == 0)) {
+ *max_clock = clock;
+ return;
+ }
+
+ for (i = 0; i < table->count; i++) {
+ if (clock < table->entries[i].clk)
+ clock = table->entries[i].clk;
+ }
+ *max_clock = clock;
+}
+
void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table,
u32 clock, u16 max_voltage, u16 *voltage)
{
diff --git a/drivers/gpu/drm/radeon/btc_dpm.h b/drivers/gpu/drm/radeon/btc_dpm.h
index 1a15e0e..3b6f12b 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.h
+++ b/drivers/gpu/drm/radeon/btc_dpm.h
@@ -46,6 +46,8 @@
struct rv7xx_pl *pl);
void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table,
u32 clock, u16 max_voltage, u16 *voltage);
+void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table,
+ u32 *max_clock);
void btc_apply_voltage_delta_rules(struct radeon_device *rdev,
u16 max_vddc, u16 max_vddci,
u16 *vddc, u16 *vddci);
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index f5c8c04..11a55e9 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -24,6 +24,7 @@
#include <linux/firmware.h>
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "radeon_ucode.h"
#include "cikd.h"
#include "r600_dpm.h"
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index c77dad1..4e8432d 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -611,16 +611,19 @@
{
unsigned i;
int r;
- void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+ unsigned index;
u32 tmp;
+ u64 gpu_addr;
- if (!ptr) {
- DRM_ERROR("invalid vram scratch pointer\n");
- return -EINVAL;
- }
+ if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+ index = R600_WB_DMA_RING_TEST_OFFSET;
+ else
+ index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
+
+ gpu_addr = rdev->wb.gpu_addr + index;
tmp = 0xCAFEDEAD;
- writel(tmp, ptr);
+ rdev->wb.wb[index/4] = cpu_to_le32(tmp);
r = radeon_ring_lock(rdev, ring, 5);
if (r) {
@@ -628,14 +631,14 @@
return r;
}
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
- radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
- radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr));
+ radeon_ring_write(ring, lower_32_bits(gpu_addr));
+ radeon_ring_write(ring, upper_32_bits(gpu_addr));
radeon_ring_write(ring, 1); /* number of DWs to follow */
radeon_ring_write(ring, 0xDEADBEEF);
radeon_ring_unlock_commit(rdev, ring, false);
for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = readl(ptr);
+ tmp = le32_to_cpu(rdev->wb.wb[index/4]);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c
index 47d31e9..9aad032 100644
--- a/drivers/gpu/drm/radeon/cypress_dpm.c
+++ b/drivers/gpu/drm/radeon/cypress_dpm.c
@@ -24,6 +24,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "evergreend.h"
#include "r600_dpm.h"
#include "cypress_dpm.h"
diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c
index 950af15..2fe8cfc 100644
--- a/drivers/gpu/drm/radeon/dce3_1_afmt.c
+++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c
@@ -32,7 +32,7 @@
struct drm_connector *connector;
struct radeon_connector *radeon_connector = NULL;
u32 tmp;
- u8 *sadb;
+ u8 *sadb = NULL;
int sad_count;
list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
@@ -49,8 +49,8 @@
sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
if (sad_count < 0) {
- DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
- return;
+ DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+ sad_count = 0;
}
/* program the speaker allocation */
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index c0bbf68..f312edf 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -155,7 +155,7 @@
struct drm_connector *connector;
struct radeon_connector *radeon_connector = NULL;
u32 offset, tmp;
- u8 *sadb;
+ u8 *sadb = NULL;
int sad_count;
if (!dig || !dig->afmt || !dig->afmt->pin)
@@ -176,9 +176,9 @@
}
sad_count = drm_edid_to_speaker_allocation(radeon_connector_edid(connector), &sadb);
- if (sad_count <= 0) {
- DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
- return;
+ if (sad_count < 0) {
+ DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+ sad_count = 0;
}
/* program the speaker allocation */
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 2514d65..53abd9b 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -133,7 +133,7 @@
struct drm_connector *connector;
struct radeon_connector *radeon_connector = NULL;
u32 tmp;
- u8 *sadb;
+ u8 *sadb = NULL;
int sad_count;
list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
@@ -149,9 +149,9 @@
}
sad_count = drm_edid_to_speaker_allocation(radeon_connector_edid(connector), &sadb);
- if (sad_count <= 0) {
- DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
- return;
+ if (sad_count < 0) {
+ DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+ sad_count = 0;
}
/* program the speaker allocation */
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index 715b181..6d2f16c 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -23,6 +23,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "nid.h"
#include "r600_dpm.h"
#include "ni_dpm.h"
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index 100189e..aabc343 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -232,16 +232,19 @@
{
unsigned i;
int r;
- void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+ unsigned index;
u32 tmp;
+ u64 gpu_addr;
- if (!ptr) {
- DRM_ERROR("invalid vram scratch pointer\n");
- return -EINVAL;
- }
+ if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+ index = R600_WB_DMA_RING_TEST_OFFSET;
+ else
+ index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
+
+ gpu_addr = rdev->wb.gpu_addr + index;
tmp = 0xCAFEDEAD;
- writel(tmp, ptr);
+ rdev->wb.wb[index/4] = cpu_to_le32(tmp);
r = radeon_ring_lock(rdev, ring, 4);
if (r) {
@@ -249,13 +252,13 @@
return r;
}
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
- radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
- radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
+ radeon_ring_write(ring, lower_32_bits(gpu_addr));
+ radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xff);
radeon_ring_write(ring, 0xDEADBEEF);
radeon_ring_unlock_commit(rdev, ring, false);
for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = readl(ptr);
+ tmp = le32_to_cpu(rdev->wb.wb[index/4]);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index 9c61b74..f6309bd 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -24,6 +24,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "r600d.h"
#include "r600_dpm.h"
#include "atom.h"
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index f7c4b22..a9717b3 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1133,6 +1133,8 @@
#define R600_WB_EVENT_OFFSET 3072
#define CIK_WB_CP1_WPTR_OFFSET 3328
#define CIK_WB_CP2_WPTR_OFFSET 3584
+#define R600_WB_DMA_RING_TEST_OFFSET 3588
+#define CAYMAN_WB_DMA1_RING_TEST_OFFSET 3592
/**
* struct radeon_pm - power management datas
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index f41cc15..ea26769 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1130,7 +1130,7 @@
if (radeon_vm_block_size == -1) {
/* Total bits covered by PD + PTs */
- unsigned bits = ilog2(radeon_vm_size) + 17;
+ unsigned bits = ilog2(radeon_vm_size) + 18;
/* Make sure the PD is 4K in size up to 8GB address space.
Above that split equal between PD and PTs */
diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c
index 02f7710..9031f4b 100644
--- a/drivers/gpu/drm/radeon/rs780_dpm.c
+++ b/drivers/gpu/drm/radeon/rs780_dpm.c
@@ -24,6 +24,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "rs780d.h"
#include "r600_dpm.h"
#include "rs780_dpm.h"
diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index e7045b0..6a5c233 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -24,6 +24,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "rv6xxd.h"
#include "r600_dpm.h"
#include "rv6xx_dpm.h"
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index 3c76e1d..755a8f9 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -24,6 +24,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "rv770d.h"
#include "r600_dpm.h"
#include "rv770_dpm.h"
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 9e4d5d7..a53c2e7 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -23,6 +23,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "sid.h"
#include "r600_dpm.h"
#include "si_dpm.h"
@@ -2916,6 +2917,7 @@
bool disable_sclk_switching = false;
u32 mclk, sclk;
u16 vddc, vddci;
+ u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
int i;
if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
@@ -2949,6 +2951,29 @@
}
}
+ /* limit clocks to max supported clocks based on voltage dependency tables */
+ btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
+ &max_sclk_vddc);
+ btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
+ &max_mclk_vddci);
+ btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
+ &max_mclk_vddc);
+
+ for (i = 0; i < ps->performance_level_count; i++) {
+ if (max_sclk_vddc) {
+ if (ps->performance_levels[i].sclk > max_sclk_vddc)
+ ps->performance_levels[i].sclk = max_sclk_vddc;
+ }
+ if (max_mclk_vddci) {
+ if (ps->performance_levels[i].mclk > max_mclk_vddci)
+ ps->performance_levels[i].mclk = max_mclk_vddci;
+ }
+ if (max_mclk_vddc) {
+ if (ps->performance_levels[i].mclk > max_mclk_vddc)
+ ps->performance_levels[i].mclk = max_mclk_vddc;
+ }
+ }
+
/* XXX validate the min clocks required for display */
if (disable_mclk_switching) {
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index 3f0e8d7..1f8a883 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -23,6 +23,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "sumod.h"
#include "r600_dpm.h"
#include "cypress_dpm.h"
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 57f7800..b4ec5c4 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -23,6 +23,7 @@
#include "drmP.h"
#include "radeon.h"
+#include "radeon_asic.h"
#include "trinityd.h"
#include "r600_dpm.h"
#include "trinity_dpm.h"
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 8f5cec6..d395b0b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -709,6 +709,7 @@
static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
uint32_t mem_type,
+ const struct ttm_place *place,
bool interruptible,
bool no_wait_gpu)
{
@@ -720,8 +721,21 @@
spin_lock(&glob->lru_lock);
list_for_each_entry(bo, &man->lru, lru) {
ret = __ttm_bo_reserve(bo, false, true, false, NULL);
- if (!ret)
+ if (!ret) {
+ if (place && (place->fpfn || place->lpfn)) {
+ /* Don't evict this BO if it's outside of the
+ * requested placement range
+ */
+ if (place->fpfn >= (bo->mem.start + bo->mem.size) ||
+ (place->lpfn && place->lpfn <= bo->mem.start)) {
+ __ttm_bo_unreserve(bo);
+ ret = -EBUSY;
+ continue;
+ }
+ }
+
break;
+ }
}
if (ret) {
@@ -782,7 +796,7 @@
return ret;
if (mem->mm_node)
break;
- ret = ttm_mem_evict_first(bdev, mem_type,
+ ret = ttm_mem_evict_first(bdev, mem_type, place,
interruptible, no_wait_gpu);
if (unlikely(ret != 0))
return ret;
@@ -994,9 +1008,9 @@
for (i = 0; i < placement->num_placement; i++) {
const struct ttm_place *heap = &placement->placement[i];
- if (mem->mm_node && heap->lpfn != 0 &&
+ if (mem->mm_node &&
(mem->start < heap->fpfn ||
- mem->start + mem->num_pages > heap->lpfn))
+ (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
continue;
*new_flags = heap->flags;
@@ -1007,9 +1021,9 @@
for (i = 0; i < placement->num_busy_placement; i++) {
const struct ttm_place *heap = &placement->busy_placement[i];
- if (mem->mm_node && heap->lpfn != 0 &&
+ if (mem->mm_node &&
(mem->start < heap->fpfn ||
- mem->start + mem->num_pages > heap->lpfn))
+ (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
continue;
*new_flags = heap->flags;
@@ -1233,7 +1247,7 @@
spin_lock(&glob->lru_lock);
while (!list_empty(&man->lru)) {
spin_unlock(&glob->lru_lock);
- ret = ttm_mem_evict_first(bdev, mem_type, false, false);
+ ret = ttm_mem_evict_first(bdev, mem_type, NULL, false, false);
if (ret) {
if (allow_errors) {
return ret;
diff --git a/drivers/hwmon/menf21bmc_hwmon.c b/drivers/hwmon/menf21bmc_hwmon.c
index c92229d..afc6b58 100644
--- a/drivers/hwmon/menf21bmc_hwmon.c
+++ b/drivers/hwmon/menf21bmc_hwmon.c
@@ -21,6 +21,7 @@
#include <linux/jiffies.h>
#include <linux/slab.h>
#include <linux/i2c.h>
+#include <linux/err.h>
#define DRV_NAME "menf21bmc_hwmon"
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 0bea5776..3effa93 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -2185,7 +2185,7 @@
isert_cmd->tx_desc.num_sge = 2;
}
- isert_init_send_wr(isert_conn, isert_cmd, send_wr, true);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -2871,7 +2871,7 @@
&isert_cmd->tx_desc.iscsi_header);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
isert_init_send_wr(isert_conn, isert_cmd,
- &isert_cmd->tx_desc.send_wr, true);
+ &isert_cmd->tx_desc.send_wr, false);
isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
wr->send_wr_num += 1;
}
@@ -3140,7 +3140,7 @@
accept_wait:
ret = down_interruptible(&isert_np->np_sem);
- if (max_accept > 5)
+ if (ret || max_accept > 5)
return -ENODEV;
spin_lock_bh(&np->np_thread_lock);
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index aa29198..7440c58 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -9,26 +9,21 @@
* published by the Free Software Foundation.
*/
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/device.h>
-#include <linux/timer.h>
-#include <linux/err.h>
#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/leds.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
#include "leds.h"
static struct class *leds_class;
-static void led_update_brightness(struct led_classdev *led_cdev)
-{
- if (led_cdev->brightness_get)
- led_cdev->brightness = led_cdev->brightness_get(led_cdev);
-}
-
static ssize_t brightness_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -59,14 +54,14 @@
}
static DEVICE_ATTR_RW(brightness);
-static ssize_t led_max_brightness_show(struct device *dev,
+static ssize_t max_brightness_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct led_classdev *led_cdev = dev_get_drvdata(dev);
return sprintf(buf, "%u\n", led_cdev->max_brightness);
}
-static DEVICE_ATTR(max_brightness, 0444, led_max_brightness_show, NULL);
+static DEVICE_ATTR_RO(max_brightness);
#ifdef CONFIG_LEDS_TRIGGERS
static DEVICE_ATTR(trigger, 0644, led_trigger_show, led_trigger_store);
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 71b40d3..aaa8eba 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -12,10 +12,11 @@
*/
#include <linux/kernel.h>
+#include <linux/leds.h>
#include <linux/list.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/rwsem.h>
-#include <linux/leds.h>
#include "leds.h"
DECLARE_RWSEM(leds_list_lock);
@@ -126,3 +127,19 @@
__led_set_brightness(led_cdev, brightness);
}
EXPORT_SYMBOL(led_set_brightness);
+
+int led_update_brightness(struct led_classdev *led_cdev)
+{
+ int ret = 0;
+
+ if (led_cdev->brightness_get) {
+ ret = led_cdev->brightness_get(led_cdev);
+ if (ret >= 0) {
+ led_cdev->brightness = ret;
+ return 0;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(led_update_brightness);
diff --git a/drivers/leds/leds-gpio-register.c b/drivers/leds/leds-gpio-register.c
index 1c4ed55..75717ba 100644
--- a/drivers/leds/leds-gpio-register.c
+++ b/drivers/leds/leds-gpio-register.c
@@ -7,9 +7,9 @@
* Free Software Foundation.
*/
#include <linux/err.h>
+#include <linux/leds.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
-#include <linux/leds.h>
/**
* gpio_led_register_device - register a gpio-led device
@@ -28,6 +28,9 @@
struct platform_device *ret;
struct gpio_led_platform_data _pdata = *pdata;
+ if (!pdata->num_leds)
+ return ERR_PTR(-EINVAL);
+
_pdata.leds = kmemdup(pdata->leds,
pdata->num_leds * sizeof(*pdata->leds), GFP_KERNEL);
if (!_pdata.leds)
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 57ff20f..b4518c8 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -10,17 +10,17 @@
* published by the Free Software Foundation.
*
*/
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
+#include <linux/err.h>
#include <linux/gpio.h>
+#include <linux/kernel.h>
#include <linux/leds.h>
+#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
#include <linux/of_gpio.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/module.h>
-#include <linux/err.h>
struct gpio_led_data {
struct led_classdev cdev;
@@ -36,7 +36,7 @@
static void gpio_led_work(struct work_struct *work)
{
- struct gpio_led_data *led_dat =
+ struct gpio_led_data *led_dat =
container_of(work, struct gpio_led_data, work);
if (led_dat->blinking) {
@@ -235,14 +235,12 @@
}
#endif /* CONFIG_OF_GPIO */
-
static int gpio_led_probe(struct platform_device *pdev)
{
struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
struct gpio_leds_priv *priv;
int i, ret = 0;
-
if (pdata && pdata->num_leds) {
priv = devm_kzalloc(&pdev->dev,
sizeof_gpio_leds_priv(pdata->num_leds),
diff --git a/drivers/leds/leds-lp3944.c b/drivers/leds/leds-lp3944.c
index 8e1abdc..53144fb 100644
--- a/drivers/leds/leds-lp3944.c
+++ b/drivers/leds/leds-lp3944.c
@@ -335,7 +335,8 @@
}
/* to expose the default value to userspace */
- led->ldev.brightness = led->status;
+ led->ldev.brightness =
+ (enum led_brightness) led->status;
/* Set the default led status */
err = lp3944_led_set(led, led->status);
diff --git a/drivers/leds/trigger/ledtrig-gpio.c b/drivers/leds/trigger/ledtrig-gpio.c
index 35812e3..c86c418 100644
--- a/drivers/leds/trigger/ledtrig-gpio.c
+++ b/drivers/leds/trigger/ledtrig-gpio.c
@@ -48,7 +48,7 @@
if (!gpio_data->gpio)
return;
- tmp = gpio_get_value(gpio_data->gpio);
+ tmp = gpio_get_value_cansleep(gpio_data->gpio);
if (gpio_data->inverted)
tmp = !tmp;
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 6d184db..94ed7ce 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -1,3 +1,7 @@
+# Generic MAILBOX API
+
+obj-$(CONFIG_MAILBOX) += mailbox.o
+
obj-$(CONFIG_PL320_MBOX) += pl320-ipc.o
obj-$(CONFIG_OMAP2PLUS_MBOX) += omap-mailbox.o
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
new file mode 100644
index 0000000..afcb430
--- /dev/null
+++ b/drivers/mailbox/mailbox.c
@@ -0,0 +1,465 @@
+/*
+ * Mailbox: Common code for Mailbox controllers and users
+ *
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox_controller.h>
+
+#define TXDONE_BY_IRQ BIT(0) /* controller has remote RTR irq */
+#define TXDONE_BY_POLL BIT(1) /* controller can read status of last TX */
+#define TXDONE_BY_ACK BIT(2) /* S/W ACK recevied by Client ticks the TX */
+
+static LIST_HEAD(mbox_cons);
+static DEFINE_MUTEX(con_mutex);
+
+static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
+{
+ int idx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ /* See if there is any space left */
+ if (chan->msg_count == MBOX_TX_QUEUE_LEN) {
+ spin_unlock_irqrestore(&chan->lock, flags);
+ return -ENOBUFS;
+ }
+
+ idx = chan->msg_free;
+ chan->msg_data[idx] = mssg;
+ chan->msg_count++;
+
+ if (idx == MBOX_TX_QUEUE_LEN - 1)
+ chan->msg_free = 0;
+ else
+ chan->msg_free++;
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return idx;
+}
+
+static void msg_submit(struct mbox_chan *chan)
+{
+ unsigned count, idx;
+ unsigned long flags;
+ void *data;
+ int err;
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ if (!chan->msg_count || chan->active_req)
+ goto exit;
+
+ count = chan->msg_count;
+ idx = chan->msg_free;
+ if (idx >= count)
+ idx -= count;
+ else
+ idx += MBOX_TX_QUEUE_LEN - count;
+
+ data = chan->msg_data[idx];
+
+ /* Try to submit a message to the MBOX controller */
+ err = chan->mbox->ops->send_data(chan, data);
+ if (!err) {
+ chan->active_req = data;
+ chan->msg_count--;
+ }
+exit:
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void tx_tick(struct mbox_chan *chan, int r)
+{
+ unsigned long flags;
+ void *mssg;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ mssg = chan->active_req;
+ chan->active_req = NULL;
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ /* Submit next message */
+ msg_submit(chan);
+
+ /* Notify the client */
+ if (mssg && chan->cl->tx_done)
+ chan->cl->tx_done(chan->cl, mssg, r);
+
+ if (chan->cl->tx_block)
+ complete(&chan->tx_complete);
+}
+
+static void poll_txdone(unsigned long data)
+{
+ struct mbox_controller *mbox = (struct mbox_controller *)data;
+ bool txdone, resched = false;
+ int i;
+
+ for (i = 0; i < mbox->num_chans; i++) {
+ struct mbox_chan *chan = &mbox->chans[i];
+
+ if (chan->active_req && chan->cl) {
+ resched = true;
+ txdone = chan->mbox->ops->last_tx_done(chan);
+ if (txdone)
+ tx_tick(chan, 0);
+ }
+ }
+
+ if (resched)
+ mod_timer(&mbox->poll, jiffies +
+ msecs_to_jiffies(mbox->txpoll_period));
+}
+
+/**
+ * mbox_chan_received_data - A way for controller driver to push data
+ * received from remote to the upper layer.
+ * @chan: Pointer to the mailbox channel on which RX happened.
+ * @mssg: Client specific message typecasted as void *
+ *
+ * After startup and before shutdown any data received on the chan
+ * is passed on to the API via atomic mbox_chan_received_data().
+ * The controller should ACK the RX only after this call returns.
+ */
+void mbox_chan_received_data(struct mbox_chan *chan, void *mssg)
+{
+ /* No buffering the received data */
+ if (chan->cl->rx_callback)
+ chan->cl->rx_callback(chan->cl, mssg);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_received_data);
+
+/**
+ * mbox_chan_txdone - A way for controller driver to notify the
+ * framework that the last TX has completed.
+ * @chan: Pointer to the mailbox chan on which TX happened.
+ * @r: Status of last TX - OK or ERROR
+ *
+ * The controller that has IRQ for TX ACK calls this atomic API
+ * to tick the TX state machine. It works only if txdone_irq
+ * is set by the controller.
+ */
+void mbox_chan_txdone(struct mbox_chan *chan, int r)
+{
+ if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) {
+ dev_err(chan->mbox->dev,
+ "Controller can't run the TX ticker\n");
+ return;
+ }
+
+ tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_txdone);
+
+/**
+ * mbox_client_txdone - The way for a client to run the TX state machine.
+ * @chan: Mailbox channel assigned to this client.
+ * @r: Success status of last transmission.
+ *
+ * The client/protocol had received some 'ACK' packet and it notifies
+ * the API that the last packet was sent successfully. This only works
+ * if the controller can't sense TX-Done.
+ */
+void mbox_client_txdone(struct mbox_chan *chan, int r)
+{
+ if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) {
+ dev_err(chan->mbox->dev, "Client can't run the TX ticker\n");
+ return;
+ }
+
+ tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_client_txdone);
+
+/**
+ * mbox_client_peek_data - A way for client driver to pull data
+ * received from remote by the controller.
+ * @chan: Mailbox channel assigned to this client.
+ *
+ * A poke to controller driver for any received data.
+ * The data is actually passed onto client via the
+ * mbox_chan_received_data()
+ * The call can be made from atomic context, so the controller's
+ * implementation of peek_data() must not sleep.
+ *
+ * Return: True, if controller has, and is going to push after this,
+ * some data.
+ * False, if controller doesn't have any data to be read.
+ */
+bool mbox_client_peek_data(struct mbox_chan *chan)
+{
+ if (chan->mbox->ops->peek_data)
+ return chan->mbox->ops->peek_data(chan);
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(mbox_client_peek_data);
+
+/**
+ * mbox_send_message - For client to submit a message to be
+ * sent to the remote.
+ * @chan: Mailbox channel assigned to this client.
+ * @mssg: Client specific message typecasted.
+ *
+ * For client to submit data to the controller destined for a remote
+ * processor. If the client had set 'tx_block', the call will return
+ * either when the remote receives the data or when 'tx_tout' millisecs
+ * run out.
+ * In non-blocking mode, the requests are buffered by the API and a
+ * non-negative token is returned for each queued request. If the request
+ * is not queued, a negative token is returned. Upon failure or successful
+ * TX, the API calls 'tx_done' from atomic context, from which the client
+ * could submit yet another request.
+ * The pointer to message should be preserved until it is sent
+ * over the chan, i.e, tx_done() is made.
+ * This function could be called from atomic context as it simply
+ * queues the data and returns a token against the request.
+ *
+ * Return: Non-negative integer for successful submission (non-blocking mode)
+ * or transmission over chan (blocking mode).
+ * Negative value denotes failure.
+ */
+int mbox_send_message(struct mbox_chan *chan, void *mssg)
+{
+ int t;
+
+ if (!chan || !chan->cl)
+ return -EINVAL;
+
+ t = add_to_rbuf(chan, mssg);
+ if (t < 0) {
+ dev_err(chan->mbox->dev, "Try increasing MBOX_TX_QUEUE_LEN\n");
+ return t;
+ }
+
+ msg_submit(chan);
+
+ if (chan->txdone_method == TXDONE_BY_POLL)
+ poll_txdone((unsigned long)chan->mbox);
+
+ if (chan->cl->tx_block && chan->active_req) {
+ unsigned long wait;
+ int ret;
+
+ if (!chan->cl->tx_tout) /* wait forever */
+ wait = msecs_to_jiffies(3600000);
+ else
+ wait = msecs_to_jiffies(chan->cl->tx_tout);
+
+ ret = wait_for_completion_timeout(&chan->tx_complete, wait);
+ if (ret == 0) {
+ t = -EIO;
+ tx_tick(chan, -EIO);
+ }
+ }
+
+ return t;
+}
+EXPORT_SYMBOL_GPL(mbox_send_message);
+
+/**
+ * mbox_request_channel - Request a mailbox channel.
+ * @cl: Identity of the client requesting the channel.
+ * @index: Index of mailbox specifier in 'mboxes' property.
+ *
+ * The Client specifies its requirements and capabilities while asking for
+ * a mailbox channel. It can't be called from atomic context.
+ * The channel is exclusively allocated and can't be used by another
+ * client before the owner calls mbox_free_channel.
+ * After assignment, any packet received on this channel will be
+ * handed over to the client via the 'rx_callback'.
+ * The framework holds reference to the client, so the mbox_client
+ * structure shouldn't be modified until the mbox_free_channel returns.
+ *
+ * Return: Pointer to the channel assigned to the client if successful.
+ * ERR_PTR for request failure.
+ */
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
+{
+ struct device *dev = cl->dev;
+ struct mbox_controller *mbox;
+ struct of_phandle_args spec;
+ struct mbox_chan *chan;
+ unsigned long flags;
+ int ret;
+
+ if (!dev || !dev->of_node) {
+ pr_debug("%s: No owner device node\n", __func__);
+ return ERR_PTR(-ENODEV);
+ }
+
+ mutex_lock(&con_mutex);
+
+ if (of_parse_phandle_with_args(dev->of_node, "mboxes",
+ "#mbox-cells", index, &spec)) {
+ dev_dbg(dev, "%s: can't parse \"mboxes\" property\n", __func__);
+ mutex_unlock(&con_mutex);
+ return ERR_PTR(-ENODEV);
+ }
+
+ chan = NULL;
+ list_for_each_entry(mbox, &mbox_cons, node)
+ if (mbox->dev->of_node == spec.np) {
+ chan = mbox->of_xlate(mbox, &spec);
+ break;
+ }
+
+ of_node_put(spec.np);
+
+ if (!chan || chan->cl || !try_module_get(mbox->dev->driver->owner)) {
+ dev_dbg(dev, "%s: mailbox not free\n", __func__);
+ mutex_unlock(&con_mutex);
+ return ERR_PTR(-EBUSY);
+ }
+
+ spin_lock_irqsave(&chan->lock, flags);
+ chan->msg_free = 0;
+ chan->msg_count = 0;
+ chan->active_req = NULL;
+ chan->cl = cl;
+ init_completion(&chan->tx_complete);
+
+ if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)
+ chan->txdone_method |= TXDONE_BY_ACK;
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ ret = chan->mbox->ops->startup(chan);
+ if (ret) {
+ dev_err(dev, "Unable to startup the chan (%d)\n", ret);
+ mbox_free_channel(chan);
+ chan = ERR_PTR(ret);
+ }
+
+ mutex_unlock(&con_mutex);
+ return chan;
+}
+EXPORT_SYMBOL_GPL(mbox_request_channel);
+
+/**
+ * mbox_free_channel - The client relinquishes control of a mailbox
+ * channel by this call.
+ * @chan: The mailbox channel to be freed.
+ */
+void mbox_free_channel(struct mbox_chan *chan)
+{
+ unsigned long flags;
+
+ if (!chan || !chan->cl)
+ return;
+
+ chan->mbox->ops->shutdown(chan);
+
+ /* The queued TX requests are simply aborted, no callbacks are made */
+ spin_lock_irqsave(&chan->lock, flags);
+ chan->cl = NULL;
+ chan->active_req = NULL;
+ if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
+ chan->txdone_method = TXDONE_BY_POLL;
+
+ module_put(chan->mbox->dev->driver->owner);
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mbox_free_channel);
+
+static struct mbox_chan *
+of_mbox_index_xlate(struct mbox_controller *mbox,
+ const struct of_phandle_args *sp)
+{
+ int ind = sp->args[0];
+
+ if (ind >= mbox->num_chans)
+ return NULL;
+
+ return &mbox->chans[ind];
+}
+
+/**
+ * mbox_controller_register - Register the mailbox controller
+ * @mbox: Pointer to the mailbox controller.
+ *
+ * The controller driver registers its communication channels
+ */
+int mbox_controller_register(struct mbox_controller *mbox)
+{
+ int i, txdone;
+
+ /* Sanity check */
+ if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans)
+ return -EINVAL;
+
+ if (mbox->txdone_irq)
+ txdone = TXDONE_BY_IRQ;
+ else if (mbox->txdone_poll)
+ txdone = TXDONE_BY_POLL;
+ else /* It has to be ACK then */
+ txdone = TXDONE_BY_ACK;
+
+ if (txdone == TXDONE_BY_POLL) {
+ mbox->poll.function = &poll_txdone;
+ mbox->poll.data = (unsigned long)mbox;
+ init_timer(&mbox->poll);
+ }
+
+ for (i = 0; i < mbox->num_chans; i++) {
+ struct mbox_chan *chan = &mbox->chans[i];
+
+ chan->cl = NULL;
+ chan->mbox = mbox;
+ chan->txdone_method = txdone;
+ spin_lock_init(&chan->lock);
+ }
+
+ if (!mbox->of_xlate)
+ mbox->of_xlate = of_mbox_index_xlate;
+
+ mutex_lock(&con_mutex);
+ list_add_tail(&mbox->node, &mbox_cons);
+ mutex_unlock(&con_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mbox_controller_register);
+
+/**
+ * mbox_controller_unregister - Unregister the mailbox controller
+ * @mbox: Pointer to the mailbox controller.
+ */
+void mbox_controller_unregister(struct mbox_controller *mbox)
+{
+ int i;
+
+ if (!mbox)
+ return;
+
+ mutex_lock(&con_mutex);
+
+ list_del(&mbox->node);
+
+ for (i = 0; i < mbox->num_chans; i++)
+ mbox_free_channel(&mbox->chans[i]);
+
+ if (mbox->txdone_poll)
+ del_timer_sync(&mbox->poll);
+
+ mutex_unlock(&con_mutex);
+}
+EXPORT_SYMBOL_GPL(mbox_controller_unregister);
diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c
index d873cba..f3755e0 100644
--- a/drivers/mailbox/pl320-ipc.c
+++ b/drivers/mailbox/pl320-ipc.c
@@ -26,7 +26,7 @@
#include <linux/device.h>
#include <linux/amba/bus.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
#define IPCMxSOURCE(m) ((m) * 0x40)
#define IPCMxDSET(m) (((m) * 0x40) + 0x004)
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index a9f9c46e..63fc639 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -397,6 +397,7 @@
struct pcie_pme_service_data *data = get_service_data(srv);
struct pci_dev *port = srv->port;
bool wakeup;
+ int ret;
if (device_may_wakeup(&port->dev)) {
wakeup = true;
@@ -407,9 +408,10 @@
}
spin_lock_irq(&data->lock);
if (wakeup) {
- enable_irq_wake(srv->irq);
+ ret = enable_irq_wake(srv->irq);
data->suspend_level = PME_SUSPEND_WAKEUP;
- } else {
+ }
+ if (!wakeup || ret) {
struct pci_dev *port = srv->port;
pcie_pme_interrupt_enable(port, false);
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index b800783..ef2dd2e 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -83,6 +83,7 @@
config PWM_CLPS711X
tristate "CLPS711X PWM support"
depends on ARCH_CLPS711X || COMPILE_TEST
+ depends on HAS_IOMEM
help
Generic PWM framework driver for Cirrus Logic CLPS711X.
@@ -101,6 +102,7 @@
config PWM_FSL_FTM
tristate "Freescale FlexTimer Module (FTM) PWM support"
depends on OF
+ select REGMAP_MMIO
help
Generic FTM PWM framework driver for Freescale VF610 and
Layerscape LS-1 SoCs.
@@ -149,7 +151,7 @@
config PWM_LPSS
tristate "Intel LPSS PWM support"
- depends on ACPI
+ depends on X86
help
Generic PWM framework driver for Intel Low Power Subsystem PWM
controller.
@@ -157,6 +159,24 @@
To compile this driver as a module, choose M here: the module
will be called pwm-lpss.
+config PWM_LPSS_PCI
+ tristate "Intel LPSS PWM PCI driver"
+ depends on PWM_LPSS && PCI
+ help
+ The PCI driver for Intel Low Power Subsystem PWM controller.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-lpss-pci.
+
+config PWM_LPSS_PLATFORM
+ tristate "Intel LPSS PWM platform driver"
+ depends on PWM_LPSS && ACPI
+ help
+ The platform driver for Intel Low Power Subsystem PWM controller.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-lpss-platform.
+
config PWM_MXS
tristate "Freescale MXS PWM support"
depends on ARCH_MXS && OF
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index f8c577d..c458606 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -13,6 +13,8 @@
obj-$(CONFIG_PWM_LP3943) += pwm-lp3943.o
obj-$(CONFIG_PWM_LPC32XX) += pwm-lpc32xx.o
obj-$(CONFIG_PWM_LPSS) += pwm-lpss.o
+obj-$(CONFIG_PWM_LPSS_PCI) += pwm-lpss-pci.o
+obj-$(CONFIG_PWM_LPSS_PLATFORM) += pwm-lpss-platform.o
obj-$(CONFIG_PWM_MXS) += pwm-mxs.o
obj-$(CONFIG_PWM_PCA9685) += pwm-pca9685.o
obj-$(CONFIG_PWM_PUV3) += pwm-puv3.o
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index d2c3592..966497d 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -236,7 +236,7 @@
int ret;
if (!chip || !chip->dev || !chip->ops || !chip->ops->config ||
- !chip->ops->enable || !chip->ops->disable)
+ !chip->ops->enable || !chip->ops->disable || !chip->npwm)
return -EINVAL;
mutex_lock(&pwm_lock);
@@ -602,12 +602,9 @@
struct pwm_device *pwm = ERR_PTR(-EPROBE_DEFER);
const char *dev_id = dev ? dev_name(dev) : NULL;
struct pwm_chip *chip = NULL;
- unsigned int index = 0;
unsigned int best = 0;
- struct pwm_lookup *p;
+ struct pwm_lookup *p, *chosen = NULL;
unsigned int match;
- unsigned int period;
- enum pwm_polarity polarity;
/* look up via DT first */
if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node)
@@ -653,10 +650,7 @@
}
if (match > best) {
- chip = pwmchip_find_by_name(p->provider);
- index = p->index;
- period = p->period;
- polarity = p->polarity;
+ chosen = p;
if (match != 3)
best = match;
@@ -665,17 +659,22 @@
}
}
- mutex_unlock(&pwm_lookup_lock);
+ if (!chosen)
+ goto out;
- if (chip)
- pwm = pwm_request_from_chip(chip, index, con_id ?: dev_id);
+ chip = pwmchip_find_by_name(chosen->provider);
+ if (!chip)
+ goto out;
+
+ pwm = pwm_request_from_chip(chip, chosen->index, con_id ?: dev_id);
if (IS_ERR(pwm))
- return pwm;
+ goto out;
- pwm_set_period(pwm, period);
- pwm_set_polarity(pwm, polarity);
+ pwm_set_period(pwm, chosen->period);
+ pwm_set_polarity(pwm, chosen->polarity);
-
+out:
+ mutex_unlock(&pwm_lookup_lock);
return pwm;
}
EXPORT_SYMBOL_GPL(pwm_get);
diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index 6e700a5..d3c22de 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -102,7 +102,7 @@
int duty_ns, int period_ns)
{
struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
- unsigned long clk_rate, prd, dty;
+ unsigned long prd, dty;
unsigned long long div;
unsigned int pres = 0;
u32 val;
@@ -113,20 +113,18 @@
return -EBUSY;
}
- clk_rate = clk_get_rate(atmel_pwm->clk);
- div = clk_rate;
+ /* Calculate the period cycles and prescale value */
+ div = (unsigned long long)clk_get_rate(atmel_pwm->clk) * period_ns;
+ do_div(div, NSEC_PER_SEC);
- /* Calculate the period cycles */
while (div > PWM_MAX_PRD) {
- div = clk_rate / (1 << pres);
- div = div * period_ns;
- /* 1/Hz = 100000000 ns */
- do_div(div, 1000000000);
+ div >>= 1;
+ pres++;
+ }
- if (pres++ > PRD_MAX_PRES) {
- dev_err(chip->dev, "pres exceeds the maximum value\n");
- return -EINVAL;
- }
+ if (pres > PRD_MAX_PRES) {
+ dev_err(chip->dev, "pres exceeds the maximum value\n");
+ return -EINVAL;
}
/* Calculate the duty cycles */
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index a18bc8f..0f2cc7e 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -18,14 +18,14 @@
#include <linux/of_address.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
+#include <linux/regmap.h>
#include <linux/slab.h>
#define FTM_SC 0x00
-#define FTM_SC_CLK_MASK 0x3
-#define FTM_SC_CLK_SHIFT 3
-#define FTM_SC_CLK(c) (((c) + 1) << FTM_SC_CLK_SHIFT)
+#define FTM_SC_CLK_MASK_SHIFT 3
+#define FTM_SC_CLK_MASK (3 << FTM_SC_CLK_MASK_SHIFT)
+#define FTM_SC_CLK(c) (((c) + 1) << FTM_SC_CLK_MASK_SHIFT)
#define FTM_SC_PS_MASK 0x7
-#define FTM_SC_PS_SHIFT 0
#define FTM_CNT 0x04
#define FTM_MOD 0x08
@@ -83,7 +83,7 @@
unsigned int cnt_select;
unsigned int clk_ps;
- void __iomem *base;
+ struct regmap *regmap;
int period_ns;
@@ -219,10 +219,11 @@
unsigned long period_ns,
unsigned long duty_ns)
{
- unsigned long long val, duty;
+ unsigned long long duty;
+ u32 val;
- val = readl(fpc->base + FTM_MOD);
- duty = duty_ns * (val + 1);
+ regmap_read(fpc->regmap, FTM_MOD, &val);
+ duty = (unsigned long long)duty_ns * (val + 1);
do_div(duty, period_ns);
return (unsigned long)duty;
@@ -232,7 +233,7 @@
int duty_ns, int period_ns)
{
struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
- u32 val, period, duty;
+ u32 period, duty;
mutex_lock(&fpc->lock);
@@ -257,11 +258,9 @@
return -EINVAL;
}
- val = readl(fpc->base + FTM_SC);
- val &= ~(FTM_SC_PS_MASK << FTM_SC_PS_SHIFT);
- val |= fpc->clk_ps;
- writel(val, fpc->base + FTM_SC);
- writel(period - 1, fpc->base + FTM_MOD);
+ regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_PS_MASK,
+ fpc->clk_ps);
+ regmap_write(fpc->regmap, FTM_MOD, period - 1);
fpc->period_ns = period_ns;
}
@@ -270,8 +269,9 @@
duty = fsl_pwm_calculate_duty(fpc, period_ns, duty_ns);
- writel(FTM_CSC_MSB | FTM_CSC_ELSB, fpc->base + FTM_CSC(pwm->hwpwm));
- writel(duty, fpc->base + FTM_CV(pwm->hwpwm));
+ regmap_write(fpc->regmap, FTM_CSC(pwm->hwpwm),
+ FTM_CSC_MSB | FTM_CSC_ELSB);
+ regmap_write(fpc->regmap, FTM_CV(pwm->hwpwm), duty);
return 0;
}
@@ -283,31 +283,28 @@
struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
u32 val;
- val = readl(fpc->base + FTM_POL);
+ regmap_read(fpc->regmap, FTM_POL, &val);
if (polarity == PWM_POLARITY_INVERSED)
val |= BIT(pwm->hwpwm);
else
val &= ~BIT(pwm->hwpwm);
- writel(val, fpc->base + FTM_POL);
+ regmap_write(fpc->regmap, FTM_POL, val);
return 0;
}
static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc)
{
- u32 val;
int ret;
if (fpc->use_count != 0)
return 0;
/* select counter clock source */
- val = readl(fpc->base + FTM_SC);
- val &= ~(FTM_SC_CLK_MASK << FTM_SC_CLK_SHIFT);
- val |= FTM_SC_CLK(fpc->cnt_select);
- writel(val, fpc->base + FTM_SC);
+ regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK,
+ FTM_SC_CLK(fpc->cnt_select));
ret = clk_prepare_enable(fpc->clk[fpc->cnt_select]);
if (ret)
@@ -327,13 +324,10 @@
static int fsl_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
- u32 val;
int ret;
mutex_lock(&fpc->lock);
- val = readl(fpc->base + FTM_OUTMASK);
- val &= ~BIT(pwm->hwpwm);
- writel(val, fpc->base + FTM_OUTMASK);
+ regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm), 0);
ret = fsl_counter_clock_enable(fpc);
mutex_unlock(&fpc->lock);
@@ -343,8 +337,6 @@
static void fsl_counter_clock_disable(struct fsl_pwm_chip *fpc)
{
- u32 val;
-
/*
* already disabled, do nothing
*/
@@ -356,9 +348,7 @@
return;
/* no users left, disable PWM counter clock */
- val = readl(fpc->base + FTM_SC);
- val &= ~(FTM_SC_CLK_MASK << FTM_SC_CLK_SHIFT);
- writel(val, fpc->base + FTM_SC);
+ regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK, 0);
clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]);
clk_disable_unprepare(fpc->clk[fpc->cnt_select]);
@@ -370,14 +360,12 @@
u32 val;
mutex_lock(&fpc->lock);
- val = readl(fpc->base + FTM_OUTMASK);
- val |= BIT(pwm->hwpwm);
- writel(val, fpc->base + FTM_OUTMASK);
+ regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm),
+ BIT(pwm->hwpwm));
fsl_counter_clock_disable(fpc);
- val = readl(fpc->base + FTM_OUTMASK);
-
+ regmap_read(fpc->regmap, FTM_OUTMASK, &val);
if ((val & 0xFF) == 0xFF)
fpc->period_ns = 0;
@@ -402,19 +390,28 @@
if (ret)
return ret;
- writel(0x00, fpc->base + FTM_CNTIN);
- writel(0x00, fpc->base + FTM_OUTINIT);
- writel(0xFF, fpc->base + FTM_OUTMASK);
+ regmap_write(fpc->regmap, FTM_CNTIN, 0x00);
+ regmap_write(fpc->regmap, FTM_OUTINIT, 0x00);
+ regmap_write(fpc->regmap, FTM_OUTMASK, 0xFF);
clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_SYS]);
return 0;
}
+static const struct regmap_config fsl_pwm_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+
+ .max_register = FTM_PWMLOAD,
+};
+
static int fsl_pwm_probe(struct platform_device *pdev)
{
struct fsl_pwm_chip *fpc;
struct resource *res;
+ void __iomem *base;
int ret;
fpc = devm_kzalloc(&pdev->dev, sizeof(*fpc), GFP_KERNEL);
@@ -426,9 +423,16 @@
fpc->chip.dev = &pdev->dev;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- fpc->base = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(fpc->base))
- return PTR_ERR(fpc->base);
+ base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ fpc->regmap = devm_regmap_init_mmio_clk(&pdev->dev, NULL, base,
+ &fsl_pwm_regmap_config);
+ if (IS_ERR(fpc->regmap)) {
+ dev_err(&pdev->dev, "regmap init failed\n");
+ return PTR_ERR(fpc->regmap);
+ }
fpc->clk[FSL_PWM_CLK_SYS] = devm_clk_get(&pdev->dev, "ftm_sys");
if (IS_ERR(fpc->clk[FSL_PWM_CLK_SYS])) {
diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index 5449d91..f8b5f10 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/io.h>
#include <linux/pwm.h>
#include <linux/of.h>
@@ -21,24 +22,30 @@
/* i.MX1 and i.MX21 share the same PWM function block: */
-#define MX1_PWMC 0x00 /* PWM Control Register */
-#define MX1_PWMS 0x04 /* PWM Sample Register */
-#define MX1_PWMP 0x08 /* PWM Period Register */
+#define MX1_PWMC 0x00 /* PWM Control Register */
+#define MX1_PWMS 0x04 /* PWM Sample Register */
+#define MX1_PWMP 0x08 /* PWM Period Register */
-#define MX1_PWMC_EN (1 << 4)
+#define MX1_PWMC_EN (1 << 4)
/* i.MX27, i.MX31, i.MX35 share the same PWM function block: */
-#define MX3_PWMCR 0x00 /* PWM Control Register */
-#define MX3_PWMSAR 0x0C /* PWM Sample Register */
-#define MX3_PWMPR 0x10 /* PWM Period Register */
-#define MX3_PWMCR_PRESCALER(x) (((x - 1) & 0xFFF) << 4)
-#define MX3_PWMCR_DOZEEN (1 << 24)
-#define MX3_PWMCR_WAITEN (1 << 23)
+#define MX3_PWMCR 0x00 /* PWM Control Register */
+#define MX3_PWMSR 0x04 /* PWM Status Register */
+#define MX3_PWMSAR 0x0C /* PWM Sample Register */
+#define MX3_PWMPR 0x10 /* PWM Period Register */
+#define MX3_PWMCR_PRESCALER(x) ((((x) - 1) & 0xFFF) << 4)
+#define MX3_PWMCR_DOZEEN (1 << 24)
+#define MX3_PWMCR_WAITEN (1 << 23)
#define MX3_PWMCR_DBGEN (1 << 22)
-#define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16)
-#define MX3_PWMCR_CLKSRC_IPG (1 << 16)
-#define MX3_PWMCR_EN (1 << 0)
+#define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16)
+#define MX3_PWMCR_CLKSRC_IPG (1 << 16)
+#define MX3_PWMCR_SWR (1 << 3)
+#define MX3_PWMCR_EN (1 << 0)
+#define MX3_PWMSR_FIFOAV_4WORDS 0x4
+#define MX3_PWMSR_FIFOAV_MASK 0x7
+
+#define MX3_PWM_SWR_LOOP 5
struct imx_chip {
struct clk *clk_per;
@@ -103,9 +110,43 @@
struct pwm_device *pwm, int duty_ns, int period_ns)
{
struct imx_chip *imx = to_imx_chip(chip);
+ struct device *dev = chip->dev;
unsigned long long c;
unsigned long period_cycles, duty_cycles, prescale;
- u32 cr;
+ unsigned int period_ms;
+ bool enable = test_bit(PWMF_ENABLED, &pwm->flags);
+ int wait_count = 0, fifoav;
+ u32 cr, sr;
+
+ /*
+ * i.MX PWMv2 has a 4-word sample FIFO.
+ * In order to avoid FIFO overflow issue, we do software reset
+ * to clear all sample FIFO if the controller is disabled or
+ * wait for a full PWM cycle to get a relinquished FIFO slot
+ * when the controller is enabled and the FIFO is fully loaded.
+ */
+ if (enable) {
+ sr = readl(imx->mmio_base + MX3_PWMSR);
+ fifoav = sr & MX3_PWMSR_FIFOAV_MASK;
+ if (fifoav == MX3_PWMSR_FIFOAV_4WORDS) {
+ period_ms = DIV_ROUND_UP(pwm->period, NSEC_PER_MSEC);
+ msleep(period_ms);
+
+ sr = readl(imx->mmio_base + MX3_PWMSR);
+ if (fifoav == (sr & MX3_PWMSR_FIFOAV_MASK))
+ dev_warn(dev, "there is no free FIFO slot\n");
+ }
+ } else {
+ writel(MX3_PWMCR_SWR, imx->mmio_base + MX3_PWMCR);
+ do {
+ usleep_range(200, 1000);
+ cr = readl(imx->mmio_base + MX3_PWMCR);
+ } while ((cr & MX3_PWMCR_SWR) &&
+ (wait_count++ < MX3_PWM_SWR_LOOP));
+
+ if (cr & MX3_PWMCR_SWR)
+ dev_warn(dev, "software reset timeout\n");
+ }
c = clk_get_rate(imx->clk_per);
c = c * period_ns;
@@ -135,7 +176,7 @@
MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH;
- if (test_bit(PWMF_ENABLED, &pwm->flags))
+ if (enable)
cr |= MX3_PWMCR_EN;
writel(cr, imx->mmio_base + MX3_PWMCR);
diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c
new file mode 100644
index 0000000..cf20d2b
--- /dev/null
+++ b/drivers/pwm/pwm-lpss-pci.c
@@ -0,0 +1,64 @@
+/*
+ * Intel Low Power Subsystem PWM controller PCI driver
+ *
+ * Copyright (C) 2014, Intel Corporation
+ *
+ * Derived from the original pwm-lpss.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "pwm-lpss.h"
+
+static int pwm_lpss_probe_pci(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ const struct pwm_lpss_boardinfo *info;
+ struct pwm_lpss_chip *lpwm;
+ int err;
+
+ err = pcim_enable_device(pdev);
+ if (err < 0)
+ return err;
+
+ info = (struct pwm_lpss_boardinfo *)id->driver_data;
+ lpwm = pwm_lpss_probe(&pdev->dev, &pdev->resource[0], info);
+ if (IS_ERR(lpwm))
+ return PTR_ERR(lpwm);
+
+ pci_set_drvdata(pdev, lpwm);
+ return 0;
+}
+
+static void pwm_lpss_remove_pci(struct pci_dev *pdev)
+{
+ struct pwm_lpss_chip *lpwm = pci_get_drvdata(pdev);
+
+ pwm_lpss_remove(lpwm);
+}
+
+static const struct pci_device_id pwm_lpss_pci_ids[] = {
+ { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info},
+ { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info},
+ { PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info},
+ { PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info},
+ { },
+};
+MODULE_DEVICE_TABLE(pci, pwm_lpss_pci_ids);
+
+static struct pci_driver pwm_lpss_driver_pci = {
+ .name = "pwm-lpss",
+ .id_table = pwm_lpss_pci_ids,
+ .probe = pwm_lpss_probe_pci,
+ .remove = pwm_lpss_remove_pci,
+};
+module_pci_driver(pwm_lpss_driver_pci);
+
+MODULE_DESCRIPTION("PWM PCI driver for Intel LPSS");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c
new file mode 100644
index 0000000..18a9c88
--- /dev/null
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -0,0 +1,68 @@
+/*
+ * Intel Low Power Subsystem PWM controller driver
+ *
+ * Copyright (C) 2014, Intel Corporation
+ *
+ * Derived from the original pwm-lpss.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include "pwm-lpss.h"
+
+static int pwm_lpss_probe_platform(struct platform_device *pdev)
+{
+ const struct pwm_lpss_boardinfo *info;
+ const struct acpi_device_id *id;
+ struct pwm_lpss_chip *lpwm;
+ struct resource *r;
+
+ id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev);
+ if (!id)
+ return -ENODEV;
+
+ info = (const struct pwm_lpss_boardinfo *)id->driver_data;
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+ lpwm = pwm_lpss_probe(&pdev->dev, r, info);
+ if (IS_ERR(lpwm))
+ return PTR_ERR(lpwm);
+
+ platform_set_drvdata(pdev, lpwm);
+ return 0;
+}
+
+static int pwm_lpss_remove_platform(struct platform_device *pdev)
+{
+ struct pwm_lpss_chip *lpwm = platform_get_drvdata(pdev);
+
+ return pwm_lpss_remove(lpwm);
+}
+
+static const struct acpi_device_id pwm_lpss_acpi_match[] = {
+ { "80860F09", (unsigned long)&pwm_lpss_byt_info },
+ { "80862288", (unsigned long)&pwm_lpss_bsw_info },
+ { },
+};
+MODULE_DEVICE_TABLE(acpi, pwm_lpss_acpi_match);
+
+static struct platform_driver pwm_lpss_driver_platform = {
+ .driver = {
+ .name = "pwm-lpss",
+ .acpi_match_table = pwm_lpss_acpi_match,
+ },
+ .probe = pwm_lpss_probe_platform,
+ .remove = pwm_lpss_remove_platform,
+};
+module_platform_driver(pwm_lpss_driver_platform);
+
+MODULE_DESCRIPTION("PWM platform driver for Intel LPSS");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:pwm-lpss");
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 4df994f..e979825 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -13,15 +13,11 @@
* published by the Free Software Foundation.
*/
-#include <linux/acpi.h>
-#include <linux/device.h>
+#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/pwm.h>
-#include <linux/platform_device.h>
-#include <linux/pci.h>
-static int pci_drv, plat_drv; /* So we know which drivers registered */
+#include "pwm-lpss.h"
#define PWM 0x00000000
#define PWM_ENABLE BIT(31)
@@ -39,14 +35,17 @@
unsigned long clk_rate;
};
-struct pwm_lpss_boardinfo {
- unsigned long clk_rate;
-};
-
/* BayTrail */
-static const struct pwm_lpss_boardinfo byt_info = {
- 25000000
+const struct pwm_lpss_boardinfo pwm_lpss_byt_info = {
+ .clk_rate = 25000000
};
+EXPORT_SYMBOL_GPL(pwm_lpss_byt_info);
+
+/* Braswell */
+const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = {
+ .clk_rate = 19200000
+};
+EXPORT_SYMBOL_GPL(pwm_lpss_bsw_info);
static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip)
{
@@ -118,9 +117,8 @@
.owner = THIS_MODULE,
};
-static struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev,
- struct resource *r,
- const struct pwm_lpss_boardinfo *info)
+struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
+ const struct pwm_lpss_boardinfo *info)
{
struct pwm_lpss_chip *lpwm;
int ret;
@@ -147,8 +145,9 @@
return lpwm;
}
+EXPORT_SYMBOL_GPL(pwm_lpss_probe);
-static int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
+int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
{
u32 ctrl;
@@ -157,114 +156,8 @@
return pwmchip_remove(&lpwm->chip);
}
-
-static int pwm_lpss_probe_pci(struct pci_dev *pdev,
- const struct pci_device_id *id)
-{
- const struct pwm_lpss_boardinfo *info;
- struct pwm_lpss_chip *lpwm;
- int err;
-
- err = pci_enable_device(pdev);
- if (err < 0)
- return err;
-
- info = (struct pwm_lpss_boardinfo *)id->driver_data;
- lpwm = pwm_lpss_probe(&pdev->dev, &pdev->resource[0], info);
- if (IS_ERR(lpwm))
- return PTR_ERR(lpwm);
-
- pci_set_drvdata(pdev, lpwm);
- return 0;
-}
-
-static void pwm_lpss_remove_pci(struct pci_dev *pdev)
-{
- struct pwm_lpss_chip *lpwm = pci_get_drvdata(pdev);
-
- pwm_lpss_remove(lpwm);
- pci_disable_device(pdev);
-}
-
-static struct pci_device_id pwm_lpss_pci_ids[] = {
- { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&byt_info},
- { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&byt_info},
- { },
-};
-MODULE_DEVICE_TABLE(pci, pwm_lpss_pci_ids);
-
-static struct pci_driver pwm_lpss_driver_pci = {
- .name = "pwm-lpss",
- .id_table = pwm_lpss_pci_ids,
- .probe = pwm_lpss_probe_pci,
- .remove = pwm_lpss_remove_pci,
-};
-
-static int pwm_lpss_probe_platform(struct platform_device *pdev)
-{
- const struct pwm_lpss_boardinfo *info;
- const struct acpi_device_id *id;
- struct pwm_lpss_chip *lpwm;
- struct resource *r;
-
- id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev);
- if (!id)
- return -ENODEV;
-
- r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
- info = (struct pwm_lpss_boardinfo *)id->driver_data;
- lpwm = pwm_lpss_probe(&pdev->dev, r, info);
- if (IS_ERR(lpwm))
- return PTR_ERR(lpwm);
-
- platform_set_drvdata(pdev, lpwm);
- return 0;
-}
-
-static int pwm_lpss_remove_platform(struct platform_device *pdev)
-{
- struct pwm_lpss_chip *lpwm = platform_get_drvdata(pdev);
-
- return pwm_lpss_remove(lpwm);
-}
-
-static const struct acpi_device_id pwm_lpss_acpi_match[] = {
- { "80860F09", (unsigned long)&byt_info },
- { },
-};
-MODULE_DEVICE_TABLE(acpi, pwm_lpss_acpi_match);
-
-static struct platform_driver pwm_lpss_driver_platform = {
- .driver = {
- .name = "pwm-lpss",
- .acpi_match_table = pwm_lpss_acpi_match,
- },
- .probe = pwm_lpss_probe_platform,
- .remove = pwm_lpss_remove_platform,
-};
-
-static int __init pwm_init(void)
-{
- pci_drv = pci_register_driver(&pwm_lpss_driver_pci);
- plat_drv = platform_driver_register(&pwm_lpss_driver_platform);
- if (pci_drv && plat_drv)
- return pci_drv;
-
- return 0;
-}
-module_init(pwm_init);
-
-static void __exit pwm_exit(void)
-{
- if (!pci_drv)
- pci_unregister_driver(&pwm_lpss_driver_pci);
- if (!plat_drv)
- platform_driver_unregister(&pwm_lpss_driver_platform);
-}
-module_exit(pwm_exit);
+EXPORT_SYMBOL_GPL(pwm_lpss_remove);
MODULE_DESCRIPTION("PWM driver for Intel LPSS");
MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:pwm-lpss");
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h
new file mode 100644
index 0000000..aa041bb
--- /dev/null
+++ b/drivers/pwm/pwm-lpss.h
@@ -0,0 +1,32 @@
+/*
+ * Intel Low Power Subsystem PWM controller driver
+ *
+ * Copyright (C) 2014, Intel Corporation
+ *
+ * Derived from the original pwm-lpss.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __PWM_LPSS_H
+#define __PWM_LPSS_H
+
+#include <linux/device.h>
+#include <linux/pwm.h>
+
+struct pwm_lpss_chip;
+
+struct pwm_lpss_boardinfo {
+ unsigned long clk_rate;
+};
+
+extern const struct pwm_lpss_boardinfo pwm_lpss_byt_info;
+extern const struct pwm_lpss_boardinfo pwm_lpss_bsw_info;
+
+struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
+ const struct pwm_lpss_boardinfo *info);
+int pwm_lpss_remove(struct pwm_lpss_chip *lpwm);
+
+#endif /* __PWM_LPSS_H */
diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c
index bdd8644..9442df2 100644
--- a/drivers/pwm/pwm-rockchip.c
+++ b/drivers/pwm/pwm-rockchip.c
@@ -24,7 +24,9 @@
#define PWM_ENABLE (1 << 0)
#define PWM_CONTINUOUS (1 << 1)
#define PWM_DUTY_POSITIVE (1 << 3)
+#define PWM_DUTY_NEGATIVE (0 << 3)
#define PWM_INACTIVE_NEGATIVE (0 << 4)
+#define PWM_INACTIVE_POSITIVE (1 << 4)
#define PWM_OUTPUT_LEFT (0 << 5)
#define PWM_LP_DISABLE (0 << 8)
@@ -45,8 +47,10 @@
struct rockchip_pwm_data {
struct rockchip_pwm_regs regs;
unsigned int prescaler;
+ const struct pwm_ops *ops;
- void (*set_enable)(struct pwm_chip *chip, bool enable);
+ void (*set_enable)(struct pwm_chip *chip,
+ struct pwm_device *pwm, bool enable);
};
static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c)
@@ -54,7 +58,8 @@
return container_of(c, struct rockchip_pwm_chip, chip);
}
-static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, bool enable)
+static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip,
+ struct pwm_device *pwm, bool enable)
{
struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
u32 enable_conf = PWM_CTRL_OUTPUT_EN | PWM_CTRL_TIMER_EN;
@@ -70,14 +75,19 @@
writel_relaxed(val, pc->base + pc->data->regs.ctrl);
}
-static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip, bool enable)
+static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip,
+ struct pwm_device *pwm, bool enable)
{
struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
u32 enable_conf = PWM_OUTPUT_LEFT | PWM_LP_DISABLE | PWM_ENABLE |
- PWM_CONTINUOUS | PWM_DUTY_POSITIVE |
- PWM_INACTIVE_NEGATIVE;
+ PWM_CONTINUOUS;
u32 val;
+ if (pwm->polarity == PWM_POLARITY_INVERSED)
+ enable_conf |= PWM_DUTY_NEGATIVE | PWM_INACTIVE_POSITIVE;
+ else
+ enable_conf |= PWM_DUTY_POSITIVE | PWM_INACTIVE_NEGATIVE;
+
val = readl_relaxed(pc->base + pc->data->regs.ctrl);
if (enable)
@@ -124,6 +134,19 @@
return 0;
}
+static int rockchip_pwm_set_polarity(struct pwm_chip *chip,
+ struct pwm_device *pwm,
+ enum pwm_polarity polarity)
+{
+ /*
+ * No action needed here because pwm->polarity will be set by the core
+ * and the core will only change polarity when the PWM is not enabled.
+ * We'll handle things in set_enable().
+ */
+
+ return 0;
+}
+
static int rockchip_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
@@ -133,7 +156,7 @@
if (ret)
return ret;
- pc->data->set_enable(chip, true);
+ pc->data->set_enable(chip, pwm, true);
return 0;
}
@@ -142,18 +165,26 @@
{
struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
- pc->data->set_enable(chip, false);
+ pc->data->set_enable(chip, pwm, false);
clk_disable(pc->clk);
}
-static const struct pwm_ops rockchip_pwm_ops = {
+static const struct pwm_ops rockchip_pwm_ops_v1 = {
.config = rockchip_pwm_config,
.enable = rockchip_pwm_enable,
.disable = rockchip_pwm_disable,
.owner = THIS_MODULE,
};
+static const struct pwm_ops rockchip_pwm_ops_v2 = {
+ .config = rockchip_pwm_config,
+ .set_polarity = rockchip_pwm_set_polarity,
+ .enable = rockchip_pwm_enable,
+ .disable = rockchip_pwm_disable,
+ .owner = THIS_MODULE,
+};
+
static const struct rockchip_pwm_data pwm_data_v1 = {
.regs = {
.duty = 0x04,
@@ -162,6 +193,7 @@
.ctrl = 0x0c,
},
.prescaler = 2,
+ .ops = &rockchip_pwm_ops_v1,
.set_enable = rockchip_pwm_set_enable_v1,
};
@@ -173,6 +205,7 @@
.ctrl = 0x0c,
},
.prescaler = 1,
+ .ops = &rockchip_pwm_ops_v2,
.set_enable = rockchip_pwm_set_enable_v2,
};
@@ -184,6 +217,7 @@
.ctrl = 0x00,
},
.prescaler = 1,
+ .ops = &rockchip_pwm_ops_v2,
.set_enable = rockchip_pwm_set_enable_v2,
};
@@ -227,10 +261,15 @@
pc->data = id->data;
pc->chip.dev = &pdev->dev;
- pc->chip.ops = &rockchip_pwm_ops;
+ pc->chip.ops = pc->data->ops;
pc->chip.base = -1;
pc->chip.npwm = 1;
+ if (pc->data->ops->set_polarity) {
+ pc->chip.of_xlate = of_pwm_xlate_with_flags;
+ pc->chip.of_pwm_n_cells = 3;
+ }
+
ret = pwmchip_add(&pc->chip);
if (ret < 0) {
clk_unprepare(pc->clk);
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 8cd0bee..94ae179 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -830,7 +830,7 @@
config RTC_DRV_EFI
tristate "EFI RTC"
- depends on EFI
+ depends on EFI && !X86
help
If you say yes here you will get support for the EFI
Real Time Clock.
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index c384fec..53b589d 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -236,3 +236,4 @@
MODULE_AUTHOR("dann frazier <dannf@hp.com>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("EFI RTC driver");
+MODULE_ALIAS("platform:rtc-efi");
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index dc24ecf..db2cb1f 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -105,7 +105,7 @@
config HMC_DRV
def_tristate m
prompt "Support for file transfers from HMC drive CD/DVD-ROM"
- depends on 64BIT
+ depends on S390 && 64BIT
select CRC16
help
This option enables support for file transfers from a Hardware
diff --git a/drivers/scsi/osd/Kbuild b/drivers/scsi/osd/Kbuild
index 5fd73d77..58cecd4 100644
--- a/drivers/scsi/osd/Kbuild
+++ b/drivers/scsi/osd/Kbuild
@@ -4,7 +4,7 @@
# Copyright (C) 2008 Panasas Inc. All rights reserved.
#
# Authors:
-# Boaz Harrosh <bharrosh@panasas.com>
+# Boaz Harrosh <ooo@electrozaur.com>
# Benny Halevy <bhalevy@panasas.com>
#
# This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/Kconfig b/drivers/scsi/osd/Kconfig
index a070351..347cc5e 100644
--- a/drivers/scsi/osd/Kconfig
+++ b/drivers/scsi/osd/Kconfig
@@ -4,7 +4,7 @@
# Copyright (C) 2008 Panasas Inc. All rights reserved.
#
# Authors:
-# Boaz Harrosh <bharrosh@panasas.com>
+# Boaz Harrosh <ooo@electrozaur.com>
# Benny Halevy <bhalevy@panasas.com>
#
# This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/osd_debug.h b/drivers/scsi/osd/osd_debug.h
index 579e491..2634126 100644
--- a/drivers/scsi/osd/osd_debug.h
+++ b/drivers/scsi/osd/osd_debug.h
@@ -4,7 +4,7 @@
* Copyright (C) 2008 Panasas Inc. All rights reserved.
*
* Authors:
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
* Benny Halevy <bhalevy@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index fd19fd8..488c392 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -7,7 +7,7 @@
* Copyright (C) 2008 Panasas Inc. All rights reserved.
*
* Authors:
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
* Benny Halevy <bhalevy@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -57,7 +57,7 @@
enum { OSD_REQ_RETRIES = 1 };
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
MODULE_DESCRIPTION("open-osd initiator library libosd.ko");
MODULE_LICENSE("GPL");
diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index e1d9a4c..92cdd4b 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -10,7 +10,7 @@
* Copyright (C) 2008 Panasas Inc. All rights reserved.
*
* Authors:
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
* Benny Halevy <bhalevy@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -74,7 +74,7 @@
static const char osd_name[] = "osd";
static const char *osd_version_string = "open-osd 0.2.1";
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
MODULE_DESCRIPTION("open-osd Upper-Layer-Driver osd.ko");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CHARDEV_MAJOR(SCSI_OSD_MAJOR);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 829752c..a902fa1 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -112,6 +112,7 @@
struct qla_tgt_cmd *cmd);
static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
struct atio_from_isp *atio, uint16_t status, int qfull);
+static void qlt_disable_vha(struct scsi_qla_host *vha);
/*
* Global Variables
*/
@@ -210,7 +211,7 @@
spin_unlock_irqrestore(&vha->hw->tgt.q_full_lock, flags);
}
-void qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
+static void qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
struct atio_from_isp *atio)
{
ql_dbg(ql_dbg_tgt, vha, 0xe072,
@@ -433,7 +434,7 @@
#if 0 /* FIXME: Re-enable Global event handling.. */
/* Global event */
atomic_inc(&ha->tgt.qla_tgt->tgt_global_resets_count);
- qlt_clear_tgt_db(ha->tgt.qla_tgt, 1);
+ qlt_clear_tgt_db(ha->tgt.qla_tgt);
if (!list_empty(&ha->tgt.qla_tgt->sess_list)) {
sess = list_entry(ha->tgt.qla_tgt->sess_list.next,
typeof(*sess), sess_list_entry);
@@ -515,7 +516,7 @@
}
/* ha->hardware_lock supposed to be held on entry */
-static void qlt_clear_tgt_db(struct qla_tgt *tgt, bool local_only)
+static void qlt_clear_tgt_db(struct qla_tgt *tgt)
{
struct qla_tgt_sess *sess;
@@ -867,7 +868,7 @@
mutex_lock(&vha->vha_tgt.tgt_mutex);
spin_lock_irqsave(&ha->hardware_lock, flags);
tgt->tgt_stop = 1;
- qlt_clear_tgt_db(tgt, true);
+ qlt_clear_tgt_db(tgt);
spin_unlock_irqrestore(&ha->hardware_lock, flags);
mutex_unlock(&vha->vha_tgt.tgt_mutex);
mutex_unlock(&qla_tgt_mutex);
@@ -1462,12 +1463,13 @@
return -1;
}
-static inline void qlt_unmap_sg(struct scsi_qla_host *vha,
- struct qla_tgt_cmd *cmd)
+static void qlt_unmap_sg(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd)
{
struct qla_hw_data *ha = vha->hw;
- BUG_ON(!cmd->sg_mapped);
+ if (!cmd->sg_mapped)
+ return;
+
pci_unmap_sg(ha->pdev, cmd->sg, cmd->sg_cnt, cmd->dma_data_direction);
cmd->sg_mapped = 0;
@@ -2428,8 +2430,7 @@
return 0;
out_unmap_unlock:
- if (cmd->sg_mapped)
- qlt_unmap_sg(vha, cmd);
+ qlt_unmap_sg(vha, cmd);
spin_unlock_irqrestore(&ha->hardware_lock, flags);
return res;
@@ -2506,8 +2507,7 @@
return res;
out_unlock_free_unmap:
- if (cmd->sg_mapped)
- qlt_unmap_sg(vha, cmd);
+ qlt_unmap_sg(vha, cmd);
spin_unlock_irqrestore(&ha->hardware_lock, flags);
return res;
@@ -2741,8 +2741,7 @@
if (!ha_locked && !in_interrupt())
msleep(250); /* just in case */
- if (cmd->sg_mapped)
- qlt_unmap_sg(vha, cmd);
+ qlt_unmap_sg(vha, cmd);
vha->hw->tgt.tgt_ops->free_cmd(cmd);
}
return;
@@ -3087,8 +3086,7 @@
tfo = se_cmd->se_tfo;
cmd->cmd_sent_to_fw = 0;
- if (cmd->sg_mapped)
- qlt_unmap_sg(vha, cmd);
+ qlt_unmap_sg(vha, cmd);
if (unlikely(status != CTIO_SUCCESS)) {
switch (status & 0xFFFF) {
@@ -5343,7 +5341,7 @@
EXPORT_SYMBOL(qlt_lport_deregister);
/* Must be called under HW lock */
-void qlt_set_mode(struct scsi_qla_host *vha)
+static void qlt_set_mode(struct scsi_qla_host *vha)
{
struct qla_hw_data *ha = vha->hw;
@@ -5364,7 +5362,7 @@
}
/* Must be called under HW lock */
-void qlt_clear_mode(struct scsi_qla_host *vha)
+static void qlt_clear_mode(struct scsi_qla_host *vha)
{
struct qla_hw_data *ha = vha->hw;
@@ -5428,8 +5426,7 @@
*
* Disable Target Mode and reset the adapter
*/
-void
-qlt_disable_vha(struct scsi_qla_host *vha)
+static void qlt_disable_vha(struct scsi_qla_host *vha)
{
struct qla_hw_data *ha = vha->hw;
struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 8ff330f..3320867 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -1001,11 +1001,11 @@
struct qla_tgt *tgt;
void *pkt;
struct scatterlist *sg; /* cmd data buffer SG vector */
+ unsigned char *sense_buffer;
int seg_cnt;
int req_cnt;
uint16_t rq_result;
uint16_t scsi_status;
- unsigned char *sense_buffer;
int sense_buffer_len;
int residual;
int add_status_pkt;
@@ -1033,10 +1033,6 @@
extern struct qla_tgt_data qla_target;
-/*
- * Internal function prototypes
- */
-void qlt_disable_vha(struct scsi_qla_host *);
/*
* Function prototypes for qla_target.c logic used by qla2xxx LLD code.
@@ -1049,8 +1045,6 @@
extern void qlt_unreg_sess(struct qla_tgt_sess *);
extern void qlt_fc_port_added(struct scsi_qla_host *, fc_port_t *);
extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *);
-extern void qlt_set_mode(struct scsi_qla_host *ha);
-extern void qlt_clear_mode(struct scsi_qla_host *ha);
extern int __init qlt_init(void);
extern void qlt_exit(void);
extern void qlt_update_vp_map(struct scsi_qla_host *, int);
@@ -1083,13 +1077,9 @@
/*
* Exported symbols from qla_target.c LLD logic used by qla2xxx code..
*/
-extern void qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *,
- struct atio_from_isp *);
extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *);
extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *);
extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t);
-extern int qlt_rdy_to_xfer_dif(struct qla_tgt_cmd *);
-extern int qlt_xmit_response_dif(struct qla_tgt_cmd *, int, uint8_t);
extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *);
extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *);
extern void qlt_free_cmd(struct qla_tgt_cmd *cmd);
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 031b296..73f9fee 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -786,7 +786,16 @@
pr_debug("fc_rport domain: port_id 0x%06x\n", nacl->nport_id);
node = btree_remove32(&lport->lport_fcport_map, nacl->nport_id);
- WARN_ON(node && (node != se_nacl));
+ if (WARN_ON(node && (node != se_nacl))) {
+ /*
+ * The nacl no longer matches what we think it should be.
+ * Most likely a new dynamic acl has been added while
+ * someone dropped the hardware lock. It clearly is a
+ * bug elsewhere, but this bit can't make things worse.
+ */
+ btree_insert32(&lport->lport_fcport_map, nacl->nport_id,
+ node, GFP_ATOMIC);
+ }
pr_debug("Removed from fcport_map: %p for WWNN: 0x%016LX, port_id: 0x%06x\n",
se_nacl, nacl->nport_wwnn, nacl->nport_id);
diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index dc2d84a..81d44c4 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -31,6 +31,13 @@
Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered
passthrough access to Linux/SCSI device
+config TCM_USER
+ tristate "TCM/USER Subsystem Plugin for Linux"
+ depends on UIO && NET
+ help
+ Say Y here to enable the TCM/USER subsystem plugin for a userspace
+ process to handle requests
+
source "drivers/target/loopback/Kconfig"
source "drivers/target/tcm_fc/Kconfig"
source "drivers/target/iscsi/Kconfig"
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 85b012d..bbb4a7d 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -22,6 +22,7 @@
obj-$(CONFIG_TCM_IBLOCK) += target_core_iblock.o
obj-$(CONFIG_TCM_FILEIO) += target_core_file.o
obj-$(CONFIG_TCM_PSCSI) += target_core_pscsi.o
+obj-$(CONFIG_TCM_USER) += target_core_user.o
# Fabric modules
obj-$(CONFIG_LOOPBACK_TARGET) += loopback/
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 260c3e1..b19e432 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -3709,7 +3709,6 @@
struct task_struct *p,
int mode)
{
- char buf[128];
/*
* mode == 1 signals iscsi_target_tx_thread() usage.
* mode == 0 signals iscsi_target_rx_thread() usage.
@@ -3728,8 +3727,6 @@
* both TX and RX kthreads are scheduled to run on the
* same CPU.
*/
- memset(buf, 0, 128);
- cpumask_scnprintf(buf, 128, conn->conn_cpumask);
set_cpus_allowed_ptr(p, conn->conn_cpumask);
}
@@ -4326,8 +4323,7 @@
if (conn->conn_tx_hash.tfm)
crypto_free_hash(conn->conn_tx_hash.tfm);
- if (conn->conn_cpumask)
- free_cpumask_var(conn->conn_cpumask);
+ free_cpumask_var(conn->conn_cpumask);
kfree(conn->conn_ops);
conn->conn_ops = NULL;
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index ae03f3e..9059c1e 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -669,12 +669,10 @@
} else {
sess = se_sess->fabric_sess_ptr;
- if (sess->sess_ops->InitiatorName)
- rb += sprintf(page+rb, "InitiatorName: %s\n",
- sess->sess_ops->InitiatorName);
- if (sess->sess_ops->InitiatorAlias)
- rb += sprintf(page+rb, "InitiatorAlias: %s\n",
- sess->sess_ops->InitiatorAlias);
+ rb += sprintf(page+rb, "InitiatorName: %s\n",
+ sess->sess_ops->InitiatorName);
+ rb += sprintf(page+rb, "InitiatorAlias: %s\n",
+ sess->sess_ops->InitiatorAlias);
rb += sprintf(page+rb, "LIO Session ID: %u "
"ISID: 0x%02x %02x %02x %02x %02x %02x "
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
index 0d1e6ee..a0ae5fc 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -345,7 +345,6 @@
struct iscsi_cmd *cmd,
unsigned char *buf)
{
- int dump = 0, recovery = 0;
u32 data_sn = 0;
struct iscsi_conn *conn = cmd->conn;
struct iscsi_data *hdr = (struct iscsi_data *) buf;
@@ -370,13 +369,11 @@
pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
" higher than expected 0x%08x.\n", cmd->init_task_tag,
be32_to_cpu(hdr->datasn), data_sn);
- recovery = 1;
goto recover;
} else if (be32_to_cpu(hdr->datasn) < data_sn) {
pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
" lower than expected 0x%08x, discarding payload.\n",
cmd->init_task_tag, be32_to_cpu(hdr->datasn), data_sn);
- dump = 1;
goto dump;
}
@@ -392,8 +389,7 @@
if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
return DATAOUT_CANNOT_RECOVER;
- return (recovery || dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY :
- DATAOUT_NORMAL;
+ return DATAOUT_WITHIN_COMMAND_RECOVERY;
}
static int iscsit_dataout_pre_datapduinorder_yes(
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 5e71ac6..480f2e0 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -978,8 +978,7 @@
return 0;
fail:
np->np_socket = NULL;
- if (sock)
- sock_release(sock);
+ sock_release(sock);
return ret;
}
@@ -1190,8 +1189,7 @@
if (!IS_ERR(conn->conn_tx_hash.tfm))
crypto_free_hash(conn->conn_tx_hash.tfm);
- if (conn->conn_cpumask)
- free_cpumask_var(conn->conn_cpumask);
+ free_cpumask_var(conn->conn_cpumask);
kfree(conn->conn_ops);
@@ -1268,8 +1266,6 @@
iscsit_put_transport(conn->conn_transport);
kfree(conn);
conn = NULL;
- if (ret == -ENODEV)
- goto out;
/* Get another socket */
return 1;
}
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 73355f4..ce87ce9 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1481,8 +1481,9 @@
if (conn->param_list)
intrname = iscsi_find_param_from_key(INITIATORNAME,
conn->param_list);
- strcpy(ls->last_intr_fail_name,
- (intrname ? intrname->value : "Unknown"));
+ strlcpy(ls->last_intr_fail_name,
+ (intrname ? intrname->value : "Unknown"),
+ sizeof(ls->last_intr_fail_name));
ls->last_intr_fail_ip_family = conn->login_family;
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 340de9d..ab3ab27 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -153,18 +153,11 @@
/*
* Locate the SAM Task Attr from struct scsi_cmnd *
*/
-static int tcm_loop_sam_attr(struct scsi_cmnd *sc)
+static int tcm_loop_sam_attr(struct scsi_cmnd *sc, int tag)
{
- if (sc->device->tagged_supported) {
- switch (sc->tag) {
- case HEAD_OF_QUEUE_TAG:
- return MSG_HEAD_TAG;
- case ORDERED_QUEUE_TAG:
- return MSG_ORDERED_TAG;
- default:
- break;
- }
- }
+ if (sc->device->tagged_supported &&
+ sc->device->ordered_tags && tag >= 0)
+ return MSG_ORDERED_TAG;
return MSG_SIMPLE_TAG;
}
@@ -227,7 +220,7 @@
rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd,
&tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun,
- transfer_length, tcm_loop_sam_attr(sc),
+ transfer_length, tcm_loop_sam_attr(sc, tl_cmd->sc_cmd_tag),
sc->sc_data_direction, 0,
scsi_sglist(sc), scsi_sg_count(sc),
sgl_bidi, sgl_bidi_count,
@@ -266,7 +259,7 @@
}
tl_cmd->sc = sc;
- tl_cmd->sc_cmd_tag = sc->tag;
+ tl_cmd->sc_cmd_tag = sc->request->tag;
INIT_WORK(&tl_cmd->work, tcm_loop_submission_work);
queue_work(tcm_loop_workqueue, &tl_cmd->work);
return 0;
@@ -370,7 +363,7 @@
*/
tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun,
- sc->tag, TMR_ABORT_TASK);
+ sc->request->tag, TMR_ABORT_TASK);
return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
}
@@ -960,8 +953,7 @@
struct tcm_loop_tpg, tl_se_tpg);
struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
- atomic_inc(&tl_tpg->tl_tpg_port_count);
- smp_mb__after_atomic();
+ atomic_inc_mb(&tl_tpg->tl_tpg_port_count);
/*
* Add Linux/SCSI struct scsi_device by HCTL
*/
@@ -995,8 +987,7 @@
scsi_remove_device(sd);
scsi_device_put(sd);
- atomic_dec(&tl_tpg->tl_tpg_port_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&tl_tpg->tl_tpg_port_count);
pr_debug("TCM_Loop_ConfigFS: Port Unlink Successful\n");
}
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index fbc5ebb..fb87780 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -392,8 +392,7 @@
if (tg_pt_id != tg_pt_gp->tg_pt_gp_id)
continue;
- atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
- smp_mb__after_atomic();
+ atomic_inc_mb(&tg_pt_gp->tg_pt_gp_ref_cnt);
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
@@ -403,8 +402,7 @@
found = true;
spin_lock(&dev->t10_alua.tg_pt_gps_lock);
- atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
- smp_mb__after_atomic();
+ atomic_dec_mb(&tg_pt_gp->tg_pt_gp_ref_cnt);
break;
}
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
@@ -998,8 +996,7 @@
* every I_T nexus other than the I_T nexus on which the SET
* TARGET PORT GROUPS command
*/
- atomic_inc(&mem->tg_pt_gp_mem_ref_cnt);
- smp_mb__after_atomic();
+ atomic_inc_mb(&mem->tg_pt_gp_mem_ref_cnt);
spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
spin_lock_bh(&port->sep_alua_lock);
@@ -1028,8 +1025,7 @@
spin_unlock_bh(&port->sep_alua_lock);
spin_lock(&tg_pt_gp->tg_pt_gp_lock);
- atomic_dec(&mem->tg_pt_gp_mem_ref_cnt);
- smp_mb__after_atomic();
+ atomic_dec_mb(&mem->tg_pt_gp_mem_ref_cnt);
}
spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
/*
@@ -1063,7 +1059,6 @@
core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));
spin_lock(&dev->t10_alua.tg_pt_gps_lock);
atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
- smp_mb__after_atomic();
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
if (tg_pt_gp->tg_pt_gp_transition_complete)
@@ -1125,7 +1120,6 @@
*/
spin_lock(&dev->t10_alua.tg_pt_gps_lock);
atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
- smp_mb__after_atomic();
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
@@ -1168,7 +1162,6 @@
spin_lock(&local_lu_gp_mem->lu_gp_mem_lock);
lu_gp = local_lu_gp_mem->lu_gp;
atomic_inc(&lu_gp->lu_gp_ref_cnt);
- smp_mb__after_atomic();
spin_unlock(&local_lu_gp_mem->lu_gp_mem_lock);
/*
* For storage objects that are members of the 'default_lu_gp',
@@ -1184,8 +1177,7 @@
l_tg_pt_gp->tg_pt_gp_alua_nacl = l_nacl;
rc = core_alua_do_transition_tg_pt(l_tg_pt_gp,
new_state, explicit);
- atomic_dec(&lu_gp->lu_gp_ref_cnt);
- smp_mb__after_atomic();
+ atomic_dec_mb(&lu_gp->lu_gp_ref_cnt);
return rc;
}
/*
@@ -1198,8 +1190,7 @@
lu_gp_mem_list) {
dev = lu_gp_mem->lu_gp_mem_dev;
- atomic_inc(&lu_gp_mem->lu_gp_mem_ref_cnt);
- smp_mb__after_atomic();
+ atomic_inc_mb(&lu_gp_mem->lu_gp_mem_ref_cnt);
spin_unlock(&lu_gp->lu_gp_lock);
spin_lock(&dev->t10_alua.tg_pt_gps_lock);
@@ -1227,8 +1218,7 @@
tg_pt_gp->tg_pt_gp_alua_port = NULL;
tg_pt_gp->tg_pt_gp_alua_nacl = NULL;
}
- atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
- smp_mb__after_atomic();
+ atomic_inc_mb(&tg_pt_gp->tg_pt_gp_ref_cnt);
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
/*
* core_alua_do_transition_tg_pt() will always return
@@ -1238,16 +1228,14 @@
new_state, explicit);
spin_lock(&dev->t10_alua.tg_pt_gps_lock);
- atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
- smp_mb__after_atomic();
+ atomic_dec_mb(&tg_pt_gp->tg_pt_gp_ref_cnt);
if (rc)
break;
}
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
spin_lock(&lu_gp->lu_gp_lock);
- atomic_dec(&lu_gp_mem->lu_gp_mem_ref_cnt);
- smp_mb__after_atomic();
+ atomic_dec_mb(&lu_gp_mem->lu_gp_mem_ref_cnt);
}
spin_unlock(&lu_gp->lu_gp_lock);
@@ -1260,8 +1248,7 @@
core_alua_dump_state(new_state));
}
- atomic_dec(&lu_gp->lu_gp_ref_cnt);
- smp_mb__after_atomic();
+ atomic_dec_mb(&lu_gp->lu_gp_ref_cnt);
return rc;
}
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 756def3..79f9296 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -665,6 +665,9 @@
DEF_DEV_ATTRIB(emulate_rest_reord);
SE_DEV_ATTR(emulate_rest_reord, S_IRUGO | S_IWUSR);
+DEF_DEV_ATTRIB(force_pr_aptpl);
+SE_DEV_ATTR(force_pr_aptpl, S_IRUGO | S_IWUSR);
+
DEF_DEV_ATTRIB_RO(hw_block_size);
SE_DEV_ATTR_RO(hw_block_size);
@@ -719,6 +722,7 @@
&target_core_dev_attrib_hw_pi_prot_type.attr,
&target_core_dev_attrib_pi_prot_format.attr,
&target_core_dev_attrib_enforce_pr_isids.attr,
+ &target_core_dev_attrib_force_pr_aptpl.attr,
&target_core_dev_attrib_is_nonrot.attr,
&target_core_dev_attrib_emulate_rest_reord.attr,
&target_core_dev_attrib_hw_block_size.attr,
@@ -1263,7 +1267,7 @@
{
unsigned char *i_fabric = NULL, *i_port = NULL, *isid = NULL;
unsigned char *t_fabric = NULL, *t_port = NULL;
- char *orig, *ptr, *arg_p, *opts;
+ char *orig, *ptr, *opts;
substring_t args[MAX_OPT_ARGS];
unsigned long long tmp_ll;
u64 sa_res_key = 0;
@@ -1295,14 +1299,14 @@
token = match_token(ptr, tokens, args);
switch (token) {
case Opt_initiator_fabric:
- i_fabric = match_strdup(&args[0]);
+ i_fabric = match_strdup(args);
if (!i_fabric) {
ret = -ENOMEM;
goto out;
}
break;
case Opt_initiator_node:
- i_port = match_strdup(&args[0]);
+ i_port = match_strdup(args);
if (!i_port) {
ret = -ENOMEM;
goto out;
@@ -1316,7 +1320,7 @@
}
break;
case Opt_initiator_sid:
- isid = match_strdup(&args[0]);
+ isid = match_strdup(args);
if (!isid) {
ret = -ENOMEM;
goto out;
@@ -1330,15 +1334,9 @@
}
break;
case Opt_sa_res_key:
- arg_p = match_strdup(&args[0]);
- if (!arg_p) {
- ret = -ENOMEM;
- goto out;
- }
- ret = kstrtoull(arg_p, 0, &tmp_ll);
+ ret = kstrtoull(args->from, 0, &tmp_ll);
if (ret < 0) {
- pr_err("kstrtoull() failed for"
- " sa_res_key=\n");
+ pr_err("kstrtoull() failed for sa_res_key=\n");
goto out;
}
sa_res_key = (u64)tmp_ll;
@@ -1370,14 +1368,14 @@
* PR APTPL Metadata for Target Port
*/
case Opt_target_fabric:
- t_fabric = match_strdup(&args[0]);
+ t_fabric = match_strdup(args);
if (!t_fabric) {
ret = -ENOMEM;
goto out;
}
break;
case Opt_target_node:
- t_port = match_strdup(&args[0]);
+ t_port = match_strdup(args);
if (!t_port) {
ret = -ENOMEM;
goto out;
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 98da901..c45f9e9 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -224,8 +224,7 @@
if (port->sep_rtpi != rtpi)
continue;
- atomic_inc(&deve->pr_ref_count);
- smp_mb__after_atomic();
+ atomic_inc_mb(&deve->pr_ref_count);
spin_unlock_irq(&nacl->device_list_lock);
return deve;
@@ -1019,6 +1018,23 @@
return 0;
}
+int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag)
+{
+ if ((flag != 0) && (flag != 1)) {
+ printk(KERN_ERR "Illegal value %d\n", flag);
+ return -EINVAL;
+ }
+ if (dev->export_count) {
+ pr_err("dev[%p]: Unable to set force_pr_aptpl while"
+ " export_count is %d\n", dev, dev->export_count);
+ return -EINVAL;
+ }
+
+ dev->dev_attrib.force_pr_aptpl = flag;
+ pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", dev, flag);
+ return 0;
+}
+
int se_dev_set_is_nonrot(struct se_device *dev, int flag)
{
if ((flag != 0) && (flag != 1)) {
@@ -1250,24 +1266,16 @@
*
*
*/
-int core_dev_del_lun(
+void core_dev_del_lun(
struct se_portal_group *tpg,
- u32 unpacked_lun)
+ struct se_lun *lun)
{
- struct se_lun *lun;
-
- lun = core_tpg_pre_dellun(tpg, unpacked_lun);
- if (IS_ERR(lun))
- return PTR_ERR(lun);
-
- core_tpg_post_dellun(tpg, lun);
-
- pr_debug("%s_TPG[%u]_LUN[%u] - Deactivated %s Logical Unit from"
+ pr_debug("%s_TPG[%u]_LUN[%u] - Deactivating %s Logical Unit from"
" device object\n", tpg->se_tpg_tfo->get_fabric_name(),
- tpg->se_tpg_tfo->tpg_get_tag(tpg), unpacked_lun,
+ tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun,
tpg->se_tpg_tfo->get_fabric_name());
- return 0;
+ core_tpg_remove_lun(tpg, lun);
}
struct se_lun *core_get_lun_from_tpg(struct se_portal_group *tpg, u32 unpacked_lun)
@@ -1396,8 +1404,7 @@
spin_lock(&lun->lun_acl_lock);
list_add_tail(&lacl->lacl_list, &lun->lun_acl_list);
- atomic_inc(&lun->lun_acl_count);
- smp_mb__after_atomic();
+ atomic_inc_mb(&lun->lun_acl_count);
spin_unlock(&lun->lun_acl_lock);
pr_debug("%s_TPG[%hu]_LUN[%u->%u] - Added %s ACL for "
@@ -1409,7 +1416,8 @@
* Check to see if there are any existing persistent reservation APTPL
* pre-registrations that need to be enabled for this LUN ACL..
*/
- core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, lacl);
+ core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, nacl,
+ lacl->mapped_lun);
return 0;
}
@@ -1430,8 +1438,7 @@
spin_lock(&lun->lun_acl_lock);
list_del(&lacl->lacl_list);
- atomic_dec(&lun->lun_acl_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&lun->lun_acl_count);
spin_unlock(&lun->lun_acl_lock);
core_disable_device_list_for_node(lun, NULL, lacl->mapped_lun,
@@ -1554,6 +1561,7 @@
dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC;
dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE0_PROT;
dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS;
+ dev->dev_attrib.force_pr_aptpl = DA_FORCE_PR_APTPL;
dev->dev_attrib.is_nonrot = DA_IS_NONROT;
dev->dev_attrib.emulate_rest_reord = DA_EMULATE_REST_REORD;
dev->dev_attrib.max_unmap_lba_count = DA_MAX_UNMAP_LBA_COUNT;
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 7de9f04..0c3f901 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -320,7 +320,7 @@
struct se_node_acl, acl_group);
struct se_portal_group *se_tpg = se_nacl->se_tpg;
struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
- struct se_lun_acl *lacl;
+ struct se_lun_acl *lacl = NULL;
struct config_item *acl_ci;
struct config_group *lacl_cg = NULL, *ml_stat_grp = NULL;
char *buf;
@@ -406,6 +406,7 @@
out:
if (lacl_cg)
kfree(lacl_cg->default_groups);
+ kfree(lacl);
kfree(buf);
return ERR_PTR(ret);
}
@@ -821,7 +822,7 @@
tf->tf_ops.fabric_pre_unlink(se_tpg, lun);
}
- core_dev_del_lun(se_tpg, lun->unpacked_lun);
+ core_dev_del_lun(se_tpg, lun);
return 0;
}
@@ -910,16 +911,12 @@
GFP_KERNEL);
if (!port_stat_grp->default_groups) {
pr_err("Unable to allocate port_stat_grp->default_groups\n");
- errno = -ENOMEM;
- goto out;
+ kfree(lun_cg->default_groups);
+ return ERR_PTR(-ENOMEM);
}
target_stat_setup_port_default_groups(lun);
return &lun->lun_group;
-out:
- if (lun_cg)
- kfree(lun_cg->default_groups);
- return ERR_PTR(errno);
}
static void target_fabric_drop_lun(
diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c
index 0d1cf8b..35bfe77 100644
--- a/drivers/target/target_core_fabric_lib.c
+++ b/drivers/target/target_core_fabric_lib.c
@@ -394,9 +394,9 @@
* If the caller wants the TransportID Length, we set that value for the
* entire iSCSI Tarnsport ID now.
*/
- if (out_tid_len != NULL) {
- add_len = ((buf[2] >> 8) & 0xff);
- add_len |= (buf[3] & 0xff);
+ if (out_tid_len) {
+ /* The shift works thanks to integer promotion rules */
+ add_len = (buf[2] << 8) | buf[3];
tid_len = strlen(&buf[4]);
tid_len += 4; /* Add four bytes for iSCSI Transport ID header */
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 7d6cdda..72c83d9 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -415,7 +415,7 @@
} else {
start = cmd->t_task_lba * dev->dev_attrib.block_size;
if (cmd->data_length)
- end = start + cmd->data_length;
+ end = start + cmd->data_length - 1;
else
end = LLONG_MAX;
}
@@ -680,7 +680,12 @@
struct fd_dev *fd_dev = FD_DEV(dev);
loff_t start = cmd->t_task_lba *
dev->dev_attrib.block_size;
- loff_t end = start + cmd->data_length;
+ loff_t end;
+
+ if (cmd->data_length)
+ end = start + cmd->data_length - 1;
+ else
+ end = LLONG_MAX;
vfs_fsync_range(fd_dev->fd_file, start, end, 1);
}
@@ -762,7 +767,9 @@
fd_dev->fbd_flags |= FBDF_HAS_SIZE;
break;
case Opt_fd_buffered_io:
- match_int(args, &arg);
+ ret = match_int(args, &arg);
+ if (ret)
+ goto out;
if (arg != 1) {
pr_err("bogus fd_buffered_io=%d value\n", arg);
ret = -EINVAL;
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index de9cab7..e31f42f 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -38,6 +38,7 @@
int se_dev_set_pi_prot_type(struct se_device *, int);
int se_dev_set_pi_prot_format(struct se_device *, int);
int se_dev_set_enforce_pr_isids(struct se_device *, int);
+int se_dev_set_force_pr_aptpl(struct se_device *, int);
int se_dev_set_is_nonrot(struct se_device *, int);
int se_dev_set_emulate_rest_reord(struct se_device *dev, int);
int se_dev_set_queue_depth(struct se_device *, u32);
@@ -46,7 +47,7 @@
int se_dev_set_optimal_sectors(struct se_device *, u32);
int se_dev_set_block_size(struct se_device *, u32);
struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_device *, u32);
-int core_dev_del_lun(struct se_portal_group *, u32);
+void core_dev_del_lun(struct se_portal_group *, struct se_lun *);
struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32);
struct se_lun_acl *core_dev_init_initiator_node_lun_acl(struct se_portal_group *,
struct se_node_acl *, u32, int *);
@@ -82,8 +83,7 @@
struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u32);
int core_tpg_add_lun(struct se_portal_group *, struct se_lun *,
u32, struct se_device *);
-struct se_lun *core_tpg_pre_dellun(struct se_portal_group *, u32 unpacked_lun);
-int core_tpg_post_dellun(struct se_portal_group *, struct se_lun *);
+void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *);
/* target_core_transport.c */
extern struct kmem_cache *se_tmr_req_cache;
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index df35786..8c60a1a 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -674,8 +674,7 @@
*/
spin_lock(&dev->se_port_lock);
list_for_each_entry_safe(port, port_tmp, &dev->dev_sep_list, sep_list) {
- atomic_inc(&port->sep_tg_pt_ref_cnt);
- smp_mb__after_atomic();
+ atomic_inc_mb(&port->sep_tg_pt_ref_cnt);
spin_unlock(&dev->se_port_lock);
spin_lock_bh(&port->sep_alua_lock);
@@ -709,8 +708,7 @@
if (strcmp(nacl->initiatorname, nacl_tmp->initiatorname))
continue;
- atomic_inc(&deve_tmp->pr_ref_count);
- smp_mb__after_atomic();
+ atomic_inc_mb(&deve_tmp->pr_ref_count);
spin_unlock_bh(&port->sep_alua_lock);
/*
* Grab a configfs group dependency that is released
@@ -722,10 +720,8 @@
if (ret < 0) {
pr_err("core_scsi3_lunacl_depend"
"_item() failed\n");
- atomic_dec(&port->sep_tg_pt_ref_cnt);
- smp_mb__after_atomic();
- atomic_dec(&deve_tmp->pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&port->sep_tg_pt_ref_cnt);
+ atomic_dec_mb(&deve_tmp->pr_ref_count);
goto out;
}
/*
@@ -739,10 +735,8 @@
nacl_tmp, deve_tmp, NULL,
sa_res_key, all_tg_pt, aptpl);
if (!pr_reg_atp) {
- atomic_dec(&port->sep_tg_pt_ref_cnt);
- smp_mb__after_atomic();
- atomic_dec(&deve_tmp->pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&port->sep_tg_pt_ref_cnt);
+ atomic_dec_mb(&deve_tmp->pr_ref_count);
core_scsi3_lunacl_undepend_item(deve_tmp);
goto out;
}
@@ -754,8 +748,7 @@
spin_unlock_bh(&port->sep_alua_lock);
spin_lock(&dev->se_port_lock);
- atomic_dec(&port->sep_tg_pt_ref_cnt);
- smp_mb__after_atomic();
+ atomic_dec_mb(&port->sep_tg_pt_ref_cnt);
}
spin_unlock(&dev->se_port_lock);
@@ -902,6 +895,7 @@
spin_lock(&pr_tmpl->aptpl_reg_lock);
list_for_each_entry_safe(pr_reg, pr_reg_tmp, &pr_tmpl->aptpl_reg_list,
pr_reg_aptpl_list) {
+
if (!strcmp(pr_reg->pr_iport, i_port) &&
(pr_reg->pr_res_mapped_lun == deve->mapped_lun) &&
!(strcmp(pr_reg->pr_tport, t_port)) &&
@@ -944,10 +938,10 @@
struct se_device *dev,
struct se_portal_group *tpg,
struct se_lun *lun,
- struct se_lun_acl *lun_acl)
+ struct se_node_acl *nacl,
+ u32 mapped_lun)
{
- struct se_node_acl *nacl = lun_acl->se_lun_nacl;
- struct se_dev_entry *deve = nacl->device_list[lun_acl->mapped_lun];
+ struct se_dev_entry *deve = nacl->device_list[mapped_lun];
if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
return 0;
@@ -1109,8 +1103,7 @@
if (dev->dev_attrib.enforce_pr_isids)
continue;
}
- atomic_inc(&pr_reg->pr_res_holders);
- smp_mb__after_atomic();
+ atomic_inc_mb(&pr_reg->pr_res_holders);
spin_unlock(&pr_tmpl->registration_lock);
return pr_reg;
}
@@ -1124,8 +1117,7 @@
if (strcmp(isid, pr_reg->pr_reg_isid))
continue;
- atomic_inc(&pr_reg->pr_res_holders);
- smp_mb__after_atomic();
+ atomic_inc_mb(&pr_reg->pr_res_holders);
spin_unlock(&pr_tmpl->registration_lock);
return pr_reg;
}
@@ -1154,8 +1146,7 @@
static void core_scsi3_put_pr_reg(struct t10_pr_registration *pr_reg)
{
- atomic_dec(&pr_reg->pr_res_holders);
- smp_mb__after_atomic();
+ atomic_dec_mb(&pr_reg->pr_res_holders);
}
static int core_scsi3_check_implicit_release(
@@ -1348,8 +1339,7 @@
configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
&tpg->tpg_group.cg_item);
- atomic_dec(&tpg->tpg_pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&tpg->tpg_pr_ref_count);
}
static int core_scsi3_nodeacl_depend_item(struct se_node_acl *nacl)
@@ -1368,16 +1358,14 @@
struct se_portal_group *tpg = nacl->se_tpg;
if (nacl->dynamic_node_acl) {
- atomic_dec(&nacl->acl_pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&nacl->acl_pr_ref_count);
return;
}
configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
&nacl->acl_group.cg_item);
- atomic_dec(&nacl->acl_pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&nacl->acl_pr_ref_count);
}
static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve)
@@ -1407,8 +1395,7 @@
* For nacl->dynamic_node_acl=1
*/
if (!lun_acl) {
- atomic_dec(&se_deve->pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&se_deve->pr_ref_count);
return;
}
nacl = lun_acl->se_lun_nacl;
@@ -1417,8 +1404,7 @@
configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
&lun_acl->se_lun_group.cg_item);
- atomic_dec(&se_deve->pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&se_deve->pr_ref_count);
}
static sense_reason_t
@@ -1551,15 +1537,13 @@
if (!i_str)
continue;
- atomic_inc(&tmp_tpg->tpg_pr_ref_count);
- smp_mb__after_atomic();
+ atomic_inc_mb(&tmp_tpg->tpg_pr_ref_count);
spin_unlock(&dev->se_port_lock);
if (core_scsi3_tpg_depend_item(tmp_tpg)) {
pr_err(" core_scsi3_tpg_depend_item()"
" for tmp_tpg\n");
- atomic_dec(&tmp_tpg->tpg_pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&tmp_tpg->tpg_pr_ref_count);
ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
goto out_unmap;
}
@@ -1571,10 +1555,8 @@
spin_lock_irq(&tmp_tpg->acl_node_lock);
dest_node_acl = __core_tpg_get_initiator_node_acl(
tmp_tpg, i_str);
- if (dest_node_acl) {
- atomic_inc(&dest_node_acl->acl_pr_ref_count);
- smp_mb__after_atomic();
- }
+ if (dest_node_acl)
+ atomic_inc_mb(&dest_node_acl->acl_pr_ref_count);
spin_unlock_irq(&tmp_tpg->acl_node_lock);
if (!dest_node_acl) {
@@ -1586,8 +1568,7 @@
if (core_scsi3_nodeacl_depend_item(dest_node_acl)) {
pr_err("configfs_depend_item() failed"
" for dest_node_acl->acl_group\n");
- atomic_dec(&dest_node_acl->acl_pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&dest_node_acl->acl_pr_ref_count);
core_scsi3_tpg_undepend_item(tmp_tpg);
ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
goto out_unmap;
@@ -1646,8 +1627,7 @@
if (core_scsi3_lunacl_depend_item(dest_se_deve)) {
pr_err("core_scsi3_lunacl_depend_item()"
" failed\n");
- atomic_dec(&dest_se_deve->pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&dest_se_deve->pr_ref_count);
core_scsi3_nodeacl_undepend_item(dest_node_acl);
core_scsi3_tpg_undepend_item(dest_tpg);
ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -3167,15 +3147,13 @@
if (!dest_tf_ops)
continue;
- atomic_inc(&dest_se_tpg->tpg_pr_ref_count);
- smp_mb__after_atomic();
+ atomic_inc_mb(&dest_se_tpg->tpg_pr_ref_count);
spin_unlock(&dev->se_port_lock);
if (core_scsi3_tpg_depend_item(dest_se_tpg)) {
pr_err("core_scsi3_tpg_depend_item() failed"
" for dest_se_tpg\n");
- atomic_dec(&dest_se_tpg->tpg_pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&dest_se_tpg->tpg_pr_ref_count);
ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
goto out_put_pr_reg;
}
@@ -3271,10 +3249,8 @@
spin_lock_irq(&dest_se_tpg->acl_node_lock);
dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg,
initiator_str);
- if (dest_node_acl) {
- atomic_inc(&dest_node_acl->acl_pr_ref_count);
- smp_mb__after_atomic();
- }
+ if (dest_node_acl)
+ atomic_inc_mb(&dest_node_acl->acl_pr_ref_count);
spin_unlock_irq(&dest_se_tpg->acl_node_lock);
if (!dest_node_acl) {
@@ -3288,8 +3264,7 @@
if (core_scsi3_nodeacl_depend_item(dest_node_acl)) {
pr_err("core_scsi3_nodeacl_depend_item() for"
" dest_node_acl\n");
- atomic_dec(&dest_node_acl->acl_pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&dest_node_acl->acl_pr_ref_count);
dest_node_acl = NULL;
ret = TCM_INVALID_PARAMETER_LIST;
goto out;
@@ -3313,8 +3288,7 @@
if (core_scsi3_lunacl_depend_item(dest_se_deve)) {
pr_err("core_scsi3_lunacl_depend_item() failed\n");
- atomic_dec(&dest_se_deve->pr_ref_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&dest_se_deve->pr_ref_count);
dest_se_deve = NULL;
ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
goto out;
@@ -3497,6 +3471,7 @@
sense_reason_t
target_scsi3_emulate_pr_out(struct se_cmd *cmd)
{
+ struct se_device *dev = cmd->se_dev;
unsigned char *cdb = &cmd->t_task_cdb[0];
unsigned char *buf;
u64 res_key, sa_res_key;
@@ -3561,6 +3536,13 @@
aptpl = (buf[17] & 0x01);
unreg = (buf[17] & 0x02);
}
+ /*
+ * If the backend device has been configured to force APTPL metadata
+ * write-out, go ahead and propigate aptpl=1 down now.
+ */
+ if (dev->dev_attrib.force_pr_aptpl)
+ aptpl = 1;
+
transport_kunmap_data_sg(cmd);
buf = NULL;
@@ -3803,7 +3785,7 @@
if (!buf)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
- buf[0] = ((add_len << 8) & 0xff);
+ buf[0] = ((add_len >> 8) & 0xff);
buf[1] = (add_len & 0xff);
buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */
buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */
@@ -3879,8 +3861,7 @@
se_tpg = pr_reg->pr_reg_nacl->se_tpg;
add_desc_len = 0;
- atomic_inc(&pr_reg->pr_res_holders);
- smp_mb__after_atomic();
+ atomic_inc_mb(&pr_reg->pr_res_holders);
spin_unlock(&pr_tmpl->registration_lock);
/*
* Determine expected length of $FABRIC_MOD specific
@@ -3893,8 +3874,7 @@
pr_warn("SPC-3 PRIN READ_FULL_STATUS ran"
" out of buffer: %d\n", cmd->data_length);
spin_lock(&pr_tmpl->registration_lock);
- atomic_dec(&pr_reg->pr_res_holders);
- smp_mb__after_atomic();
+ atomic_dec_mb(&pr_reg->pr_res_holders);
break;
}
/*
@@ -3955,8 +3935,7 @@
se_nacl, pr_reg, &format_code, &buf[off+4]);
spin_lock(&pr_tmpl->registration_lock);
- atomic_dec(&pr_reg->pr_res_holders);
- smp_mb__after_atomic();
+ atomic_dec_mb(&pr_reg->pr_res_holders);
/*
* Set the ADDITIONAL DESCRIPTOR LENGTH
*/
diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h
index 2ee2936..749fd7b 100644
--- a/drivers/target/target_core_pr.h
+++ b/drivers/target/target_core_pr.h
@@ -60,7 +60,7 @@
unsigned char *, u16, u32, int, int, u8);
extern int core_scsi3_check_aptpl_registration(struct se_device *,
struct se_portal_group *, struct se_lun *,
- struct se_lun_acl *);
+ struct se_node_acl *, u32);
extern void core_scsi3_free_pr_reg_from_nacl(struct se_device *,
struct se_node_acl *);
extern void core_scsi3_free_all_registrations(struct se_device *);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 70d9f6d..7c8291f 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -749,14 +749,18 @@
ret = -EINVAL;
goto out;
}
- match_int(args, &arg);
+ ret = match_int(args, &arg);
+ if (ret)
+ goto out;
pdv->pdv_host_id = arg;
pr_debug("PSCSI[%d]: Referencing SCSI Host ID:"
" %d\n", phv->phv_host_id, pdv->pdv_host_id);
pdv->pdv_flags |= PDF_HAS_VIRT_HOST_ID;
break;
case Opt_scsi_channel_id:
- match_int(args, &arg);
+ ret = match_int(args, &arg);
+ if (ret)
+ goto out;
pdv->pdv_channel_id = arg;
pr_debug("PSCSI[%d]: Referencing SCSI Channel"
" ID: %d\n", phv->phv_host_id,
@@ -764,7 +768,9 @@
pdv->pdv_flags |= PDF_HAS_CHANNEL_ID;
break;
case Opt_scsi_target_id:
- match_int(args, &arg);
+ ret = match_int(args, &arg);
+ if (ret)
+ goto out;
pdv->pdv_target_id = arg;
pr_debug("PSCSI[%d]: Referencing SCSI Target"
" ID: %d\n", phv->phv_host_id,
@@ -772,7 +778,9 @@
pdv->pdv_flags |= PDF_HAS_TARGET_ID;
break;
case Opt_scsi_lun_id:
- match_int(args, &arg);
+ ret = match_int(args, &arg);
+ if (ret)
+ goto out;
pdv->pdv_lun_id = arg;
pr_debug("PSCSI[%d]: Referencing SCSI LUN ID:"
" %d\n", phv->phv_host_id, pdv->pdv_lun_id);
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index bd78d92..ebe62af 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -948,7 +948,7 @@
}
/* reject any command that we don't have a handler for */
- if (!(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) && !cmd->execute_cmd)
+ if (!cmd->execute_cmd)
return TCM_UNSUPPORTED_SCSI_OPCODE;
if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) {
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index f7cd95e..fa5e157 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -64,21 +64,17 @@
}
EXPORT_SYMBOL(core_tmr_alloc_req);
-void core_tmr_release_req(
- struct se_tmr_req *tmr)
+void core_tmr_release_req(struct se_tmr_req *tmr)
{
struct se_device *dev = tmr->tmr_dev;
unsigned long flags;
- if (!dev) {
- kfree(tmr);
- return;
+ if (dev) {
+ spin_lock_irqsave(&dev->se_tmr_lock, flags);
+ list_del(&tmr->tmr_list);
+ spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
}
- spin_lock_irqsave(&dev->se_tmr_lock, flags);
- list_del(&tmr->tmr_list);
- spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
-
kfree(tmr);
}
@@ -90,9 +86,8 @@
bool remove = true;
/*
* TASK ABORTED status (TAS) bit support
- */
- if ((tmr_nacl &&
- (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) {
+ */
+ if ((tmr_nacl && (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) {
remove = false;
transport_send_task_abort(cmd);
}
@@ -120,13 +115,12 @@
struct se_tmr_req *tmr,
struct se_session *se_sess)
{
- struct se_cmd *se_cmd, *tmp_cmd;
+ struct se_cmd *se_cmd;
unsigned long flags;
int ref_tag;
spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
- list_for_each_entry_safe(se_cmd, tmp_cmd,
- &se_sess->sess_cmd_list, se_cmd_list) {
+ list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) {
if (dev != se_cmd->se_dev)
continue;
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index be783f7..0696de9 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -40,6 +40,7 @@
#include <target/target_core_fabric.h>
#include "target_core_internal.h"
+#include "target_core_pr.h"
extern struct se_device *g_lun0_dev;
@@ -166,6 +167,13 @@
core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun,
lun_access, acl, tpg);
+ /*
+ * Check to see if there are any existing persistent reservation
+ * APTPL pre-registrations that need to be enabled for this dynamic
+ * LUN ACL now..
+ */
+ core_scsi3_check_aptpl_registration(dev, tpg, lun, acl,
+ lun->unpacked_lun);
spin_lock(&tpg->tpg_lun_lock);
}
spin_unlock(&tpg->tpg_lun_lock);
@@ -335,7 +343,7 @@
continue;
spin_unlock(&tpg->tpg_lun_lock);
- core_dev_del_lun(tpg, lun->unpacked_lun);
+ core_dev_del_lun(tpg, lun);
spin_lock(&tpg->tpg_lun_lock);
}
spin_unlock(&tpg->tpg_lun_lock);
@@ -663,13 +671,6 @@
return 0;
}
-static void core_tpg_release_virtual_lun0(struct se_portal_group *se_tpg)
-{
- struct se_lun *lun = &se_tpg->tpg_virt_lun0;
-
- core_tpg_post_dellun(se_tpg, lun);
-}
-
int core_tpg_register(
struct target_core_fabric_ops *tfo,
struct se_wwn *se_wwn,
@@ -773,7 +774,7 @@
spin_unlock_irq(&se_tpg->acl_node_lock);
if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL)
- core_tpg_release_virtual_lun0(se_tpg);
+ core_tpg_remove_lun(se_tpg, &se_tpg->tpg_virt_lun0);
se_tpg->se_tpg_fabric_ptr = NULL;
array_free(se_tpg->tpg_lun_list, TRANSPORT_MAX_LUNS_PER_TPG);
@@ -838,37 +839,7 @@
return 0;
}
-struct se_lun *core_tpg_pre_dellun(
- struct se_portal_group *tpg,
- u32 unpacked_lun)
-{
- struct se_lun *lun;
-
- if (unpacked_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) {
- pr_err("%s LUN: %u exceeds TRANSPORT_MAX_LUNS_PER_TPG"
- "-1: %u for Target Portal Group: %u\n",
- tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun,
- TRANSPORT_MAX_LUNS_PER_TPG-1,
- tpg->se_tpg_tfo->tpg_get_tag(tpg));
- return ERR_PTR(-EOVERFLOW);
- }
-
- spin_lock(&tpg->tpg_lun_lock);
- lun = tpg->tpg_lun_list[unpacked_lun];
- if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) {
- pr_err("%s Logical Unit Number: %u is not active on"
- " Target Portal Group: %u, ignoring request.\n",
- tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun,
- tpg->se_tpg_tfo->tpg_get_tag(tpg));
- spin_unlock(&tpg->tpg_lun_lock);
- return ERR_PTR(-ENODEV);
- }
- spin_unlock(&tpg->tpg_lun_lock);
-
- return lun;
-}
-
-int core_tpg_post_dellun(
+void core_tpg_remove_lun(
struct se_portal_group *tpg,
struct se_lun *lun)
{
@@ -882,6 +853,4 @@
spin_unlock(&tpg->tpg_lun_lock);
percpu_ref_exit(&lun->lun_ref);
-
- return 0;
}
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 7fa62fc..9ea0d5f 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -232,6 +232,10 @@
if (ret != 0)
pr_err("Unable to load target_core_pscsi\n");
+ ret = request_module("target_core_user");
+ if (ret != 0)
+ pr_err("Unable to load target_core_user\n");
+
sub_api_initialized = 1;
}
@@ -752,8 +756,7 @@
list_for_each_entry_safe(cmd, cmd_tmp, &qf_cmd_list, se_qf_node) {
list_del(&cmd->se_qf_node);
- atomic_dec(&dev->dev_qf_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&dev->dev_qf_count);
pr_debug("Processing %s cmd: %p QUEUE_FULL in work queue"
" context: %s\n", cmd->se_tfo->get_fabric_name(), cmd,
@@ -1166,7 +1169,6 @@
* Dormant to Active status.
*/
cmd->se_ordered_id = atomic_inc_return(&dev->dev_ordered_id);
- smp_mb__after_atomic();
pr_debug("Allocated se_ordered_id: %u for Task Attr: 0x%02x on %s\n",
cmd->se_ordered_id, cmd->sam_task_attr,
dev->transport->name);
@@ -1722,8 +1724,7 @@
cmd->t_task_cdb[0], cmd->se_ordered_id);
return false;
case MSG_ORDERED_TAG:
- atomic_inc(&dev->dev_ordered_sync);
- smp_mb__after_atomic();
+ atomic_inc_mb(&dev->dev_ordered_sync);
pr_debug("Added ORDERED for CDB: 0x%02x to ordered list, "
" se_ordered_id: %u\n",
@@ -1740,8 +1741,7 @@
/*
* For SIMPLE and UNTAGGED Task Attribute commands
*/
- atomic_inc(&dev->simple_cmds);
- smp_mb__after_atomic();
+ atomic_inc_mb(&dev->simple_cmds);
break;
}
@@ -1845,8 +1845,7 @@
return;
if (cmd->sam_task_attr == MSG_SIMPLE_TAG) {
- atomic_dec(&dev->simple_cmds);
- smp_mb__after_atomic();
+ atomic_dec_mb(&dev->simple_cmds);
dev->dev_cur_ordered_id++;
pr_debug("Incremented dev->dev_cur_ordered_id: %u for"
" SIMPLE: %u\n", dev->dev_cur_ordered_id,
@@ -1857,8 +1856,7 @@
" HEAD_OF_QUEUE: %u\n", dev->dev_cur_ordered_id,
cmd->se_ordered_id);
} else if (cmd->sam_task_attr == MSG_ORDERED_TAG) {
- atomic_dec(&dev->dev_ordered_sync);
- smp_mb__after_atomic();
+ atomic_dec_mb(&dev->dev_ordered_sync);
dev->dev_cur_ordered_id++;
pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED:"
@@ -1877,8 +1875,7 @@
if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) {
trace_target_cmd_complete(cmd);
ret = cmd->se_tfo->queue_status(cmd);
- if (ret)
- goto out;
+ goto out;
}
switch (cmd->data_direction) {
@@ -1916,8 +1913,7 @@
{
spin_lock_irq(&dev->qf_cmd_lock);
list_add_tail(&cmd->se_qf_node, &cmd->se_dev->qf_cmd_list);
- atomic_inc(&dev->dev_qf_count);
- smp_mb__after_atomic();
+ atomic_inc_mb(&dev->dev_qf_count);
spin_unlock_irq(&cmd->se_dev->qf_cmd_lock);
schedule_work(&cmd->se_dev->qf_work_queue);
@@ -2896,7 +2892,6 @@
if (cmd->se_tfo->write_pending_status(cmd) != 0) {
cmd->transport_state |= CMD_T_ABORTED;
cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
- smp_mb__after_atomic();
return;
}
}
diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c
index 101858e..1738b16 100644
--- a/drivers/target/target_core_ua.c
+++ b/drivers/target/target_core_ua.c
@@ -161,8 +161,7 @@
spin_unlock(&deve->ua_lock);
spin_unlock_irq(&nacl->device_list_lock);
- atomic_inc(&deve->ua_count);
- smp_mb__after_atomic();
+ atomic_inc_mb(&deve->ua_count);
return 0;
}
list_add_tail(&ua->ua_nacl_list, &deve->ua_list);
@@ -174,8 +173,7 @@
nacl->se_tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun,
asc, ascq);
- atomic_inc(&deve->ua_count);
- smp_mb__after_atomic();
+ atomic_inc_mb(&deve->ua_count);
return 0;
}
@@ -189,8 +187,7 @@
list_del(&ua->ua_nacl_list);
kmem_cache_free(se_ua_cache, ua);
- atomic_dec(&deve->ua_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&deve->ua_count);
}
spin_unlock(&deve->ua_lock);
}
@@ -250,8 +247,7 @@
list_del(&ua->ua_nacl_list);
kmem_cache_free(se_ua_cache, ua);
- atomic_dec(&deve->ua_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&deve->ua_count);
}
spin_unlock(&deve->ua_lock);
spin_unlock_irq(&nacl->device_list_lock);
@@ -309,8 +305,7 @@
list_del(&ua->ua_nacl_list);
kmem_cache_free(se_ua_cache, ua);
- atomic_dec(&deve->ua_count);
- smp_mb__after_atomic();
+ atomic_dec_mb(&deve->ua_count);
}
spin_unlock(&deve->ua_lock);
spin_unlock_irq(&nacl->device_list_lock);
diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h
index be912b3..a6b56b3 100644
--- a/drivers/target/target_core_ua.h
+++ b/drivers/target/target_core_ua.h
@@ -1,4 +1,5 @@
#ifndef TARGET_CORE_UA_H
+#define TARGET_CORE_UA_H
/*
* From spc4r17, Table D.1: ASC and ASCQ Assignement
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
new file mode 100644
index 0000000..9a1b314
--- /dev/null
+++ b/drivers/target/target_core_user.c
@@ -0,0 +1,1167 @@
+/*
+ * Copyright (C) 2013 Shaohua Li <shli@kernel.org>
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/timer.h>
+#include <linux/parser.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <linux/uio_driver.h>
+#include <net/genetlink.h>
+#include <target/target_core_base.h>
+#include <target/target_core_fabric.h>
+#include <target/target_core_backend.h>
+#include <linux/target_core_user.h>
+
+/*
+ * Define a shared-memory interface for LIO to pass SCSI commands and
+ * data to userspace for processing. This is to allow backends that
+ * are too complex for in-kernel support to be possible.
+ *
+ * It uses the UIO framework to do a lot of the device-creation and
+ * introspection work for us.
+ *
+ * See the .h file for how the ring is laid out. Note that while the
+ * command ring is defined, the particulars of the data area are
+ * not. Offset values in the command entry point to other locations
+ * internal to the mmap()ed area. There is separate space outside the
+ * command ring for data buffers. This leaves maximum flexibility for
+ * moving buffer allocations, or even page flipping or other
+ * allocation techniques, without altering the command ring layout.
+ *
+ * SECURITY:
+ * The user process must be assumed to be malicious. There's no way to
+ * prevent it breaking the command ring protocol if it wants, but in
+ * order to prevent other issues we must only ever read *data* from
+ * the shared memory area, not offsets or sizes. This applies to
+ * command ring entries as well as the mailbox. Extra code needed for
+ * this may have a 'UAM' comment.
+ */
+
+
+#define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
+
+#define CMDR_SIZE (16 * 4096)
+#define DATA_SIZE (257 * 4096)
+
+#define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
+
+static struct device *tcmu_root_device;
+
+struct tcmu_hba {
+ u32 host_id;
+};
+
+/* User wants all cmds or just some */
+enum passthru_level {
+ TCMU_PASS_ALL = 0,
+ TCMU_PASS_IO,
+ TCMU_PASS_INVALID,
+};
+
+#define TCMU_CONFIG_LEN 256
+
+struct tcmu_dev {
+ struct se_device se_dev;
+
+ char *name;
+ struct se_hba *hba;
+
+#define TCMU_DEV_BIT_OPEN 0
+#define TCMU_DEV_BIT_BROKEN 1
+ unsigned long flags;
+ enum passthru_level pass_level;
+
+ struct uio_info uio_info;
+
+ struct tcmu_mailbox *mb_addr;
+ size_t dev_size;
+ u32 cmdr_size;
+ u32 cmdr_last_cleaned;
+ /* Offset of data ring from start of mb */
+ size_t data_off;
+ size_t data_size;
+ /* Ring head + tail values. */
+ /* Must add data_off and mb_addr to get the address */
+ size_t data_head;
+ size_t data_tail;
+
+ wait_queue_head_t wait_cmdr;
+ /* TODO should this be a mutex? */
+ spinlock_t cmdr_lock;
+
+ struct idr commands;
+ spinlock_t commands_lock;
+
+ struct timer_list timeout;
+
+ char dev_config[TCMU_CONFIG_LEN];
+};
+
+#define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)
+
+#define CMDR_OFF sizeof(struct tcmu_mailbox)
+
+struct tcmu_cmd {
+ struct se_cmd *se_cmd;
+ struct tcmu_dev *tcmu_dev;
+
+ uint16_t cmd_id;
+
+ /* Can't use se_cmd->data_length when cleaning up expired cmds, because if
+ cmd has been completed then accessing se_cmd is off limits */
+ size_t data_length;
+
+ unsigned long deadline;
+
+#define TCMU_CMD_BIT_EXPIRED 0
+ unsigned long flags;
+};
+
+static struct kmem_cache *tcmu_cmd_cache;
+
+/* multicast group */
+enum tcmu_multicast_groups {
+ TCMU_MCGRP_CONFIG,
+};
+
+static const struct genl_multicast_group tcmu_mcgrps[] = {
+ [TCMU_MCGRP_CONFIG] = { .name = "config", },
+};
+
+/* Our generic netlink family */
+static struct genl_family tcmu_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = "TCM-USER",
+ .version = 1,
+ .maxattr = TCMU_ATTR_MAX,
+ .mcgrps = tcmu_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(tcmu_mcgrps),
+};
+
+static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
+{
+ struct se_device *se_dev = se_cmd->se_dev;
+ struct tcmu_dev *udev = TCMU_DEV(se_dev);
+ struct tcmu_cmd *tcmu_cmd;
+ int cmd_id;
+
+ tcmu_cmd = kmem_cache_zalloc(tcmu_cmd_cache, GFP_KERNEL);
+ if (!tcmu_cmd)
+ return NULL;
+
+ tcmu_cmd->se_cmd = se_cmd;
+ tcmu_cmd->tcmu_dev = udev;
+ tcmu_cmd->data_length = se_cmd->data_length;
+
+ tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
+
+ idr_preload(GFP_KERNEL);
+ spin_lock_irq(&udev->commands_lock);
+ cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 0,
+ USHRT_MAX, GFP_NOWAIT);
+ spin_unlock_irq(&udev->commands_lock);
+ idr_preload_end();
+
+ if (cmd_id < 0) {
+ kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+ return NULL;
+ }
+ tcmu_cmd->cmd_id = cmd_id;
+
+ return tcmu_cmd;
+}
+
+static inline void tcmu_flush_dcache_range(void *vaddr, size_t size)
+{
+ unsigned long offset = (unsigned long) vaddr & ~PAGE_MASK;
+
+ size = round_up(size+offset, PAGE_SIZE);
+ vaddr -= offset;
+
+ while (size) {
+ flush_dcache_page(virt_to_page(vaddr));
+ size -= PAGE_SIZE;
+ }
+}
+
+/*
+ * Some ring helper functions. We don't assume size is a power of 2 so
+ * we can't use circ_buf.h.
+ */
+static inline size_t spc_used(size_t head, size_t tail, size_t size)
+{
+ int diff = head - tail;
+
+ if (diff >= 0)
+ return diff;
+ else
+ return size + diff;
+}
+
+static inline size_t spc_free(size_t head, size_t tail, size_t size)
+{
+ /* Keep 1 byte unused or we can't tell full from empty */
+ return (size - spc_used(head, tail, size) - 1);
+}
+
+static inline size_t head_to_end(size_t head, size_t size)
+{
+ return size - head;
+}
+
+#define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
+
+/*
+ * We can't queue a command until we have space available on the cmd ring *and* space
+ * space avail on the data ring.
+ *
+ * Called with ring lock held.
+ */
+static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t data_needed)
+{
+ struct tcmu_mailbox *mb = udev->mb_addr;
+ size_t space;
+ u32 cmd_head;
+ size_t cmd_needed;
+
+ tcmu_flush_dcache_range(mb, sizeof(*mb));
+
+ cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+
+ /*
+ * If cmd end-of-ring space is too small then we need space for a NOP plus
+ * original cmd - cmds are internally contiguous.
+ */
+ if (head_to_end(cmd_head, udev->cmdr_size) >= cmd_size)
+ cmd_needed = cmd_size;
+ else
+ cmd_needed = cmd_size + head_to_end(cmd_head, udev->cmdr_size);
+
+ space = spc_free(cmd_head, udev->cmdr_last_cleaned, udev->cmdr_size);
+ if (space < cmd_needed) {
+ pr_debug("no cmd space: %u %u %u\n", cmd_head,
+ udev->cmdr_last_cleaned, udev->cmdr_size);
+ return false;
+ }
+
+ space = spc_free(udev->data_head, udev->data_tail, udev->data_size);
+ if (space < data_needed) {
+ pr_debug("no data space: %zu %zu %zu\n", udev->data_head,
+ udev->data_tail, udev->data_size);
+ return false;
+ }
+
+ return true;
+}
+
+static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
+{
+ struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
+ struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
+ size_t base_command_size, command_size;
+ struct tcmu_mailbox *mb;
+ struct tcmu_cmd_entry *entry;
+ int i;
+ struct scatterlist *sg;
+ struct iovec *iov;
+ int iov_cnt = 0;
+ uint32_t cmd_head;
+ uint64_t cdb_off;
+
+ if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
+ return -EINVAL;
+
+ /*
+ * Must be a certain minimum size for response sense info, but
+ * also may be larger if the iov array is large.
+ *
+ * iovs = sgl_nents+1, for end-of-ring case, plus another 1
+ * b/c size == offsetof one-past-element.
+ */
+ base_command_size = max(offsetof(struct tcmu_cmd_entry,
+ req.iov[se_cmd->t_data_nents + 2]),
+ sizeof(struct tcmu_cmd_entry));
+ command_size = base_command_size
+ + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
+
+ WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
+
+ spin_lock_irq(&udev->cmdr_lock);
+
+ mb = udev->mb_addr;
+ cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+ if ((command_size > (udev->cmdr_size / 2))
+ || tcmu_cmd->data_length > (udev->data_size - 1))
+ pr_warn("TCMU: Request of size %zu/%zu may be too big for %u/%zu "
+ "cmd/data ring buffers\n", command_size, tcmu_cmd->data_length,
+ udev->cmdr_size, udev->data_size);
+
+ while (!is_ring_space_avail(udev, command_size, tcmu_cmd->data_length)) {
+ int ret;
+ DEFINE_WAIT(__wait);
+
+ prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE);
+
+ pr_debug("sleeping for ring space\n");
+ spin_unlock_irq(&udev->cmdr_lock);
+ ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
+ finish_wait(&udev->wait_cmdr, &__wait);
+ if (!ret) {
+ pr_warn("tcmu: command timed out\n");
+ return -ETIMEDOUT;
+ }
+
+ spin_lock_irq(&udev->cmdr_lock);
+
+ /* We dropped cmdr_lock, cmd_head is stale */
+ cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+ }
+
+ /* Insert a PAD if end-of-ring space is too small */
+ if (head_to_end(cmd_head, udev->cmdr_size) < command_size) {
+ size_t pad_size = head_to_end(cmd_head, udev->cmdr_size);
+
+ entry = (void *) mb + CMDR_OFF + cmd_head;
+ tcmu_flush_dcache_range(entry, sizeof(*entry));
+ tcmu_hdr_set_op(&entry->hdr, TCMU_OP_PAD);
+ tcmu_hdr_set_len(&entry->hdr, pad_size);
+
+ UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
+
+ cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+ WARN_ON(cmd_head != 0);
+ }
+
+ entry = (void *) mb + CMDR_OFF + cmd_head;
+ tcmu_flush_dcache_range(entry, sizeof(*entry));
+ tcmu_hdr_set_op(&entry->hdr, TCMU_OP_CMD);
+ tcmu_hdr_set_len(&entry->hdr, command_size);
+ entry->cmd_id = tcmu_cmd->cmd_id;
+
+ /*
+ * Fix up iovecs, and handle if allocation in data ring wrapped.
+ */
+ iov = &entry->req.iov[0];
+ for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) {
+ size_t copy_bytes = min((size_t)sg->length,
+ head_to_end(udev->data_head, udev->data_size));
+ void *from = kmap_atomic(sg_page(sg)) + sg->offset;
+ void *to = (void *) mb + udev->data_off + udev->data_head;
+
+ if (tcmu_cmd->se_cmd->data_direction == DMA_TO_DEVICE) {
+ memcpy(to, from, copy_bytes);
+ tcmu_flush_dcache_range(to, copy_bytes);
+ }
+
+ /* Even iov_base is relative to mb_addr */
+ iov->iov_len = copy_bytes;
+ iov->iov_base = (void *) udev->data_off + udev->data_head;
+ iov_cnt++;
+ iov++;
+
+ UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
+
+ /* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */
+ if (sg->length != copy_bytes) {
+ from += copy_bytes;
+ copy_bytes = sg->length - copy_bytes;
+
+ iov->iov_len = copy_bytes;
+ iov->iov_base = (void *) udev->data_off + udev->data_head;
+
+ if (se_cmd->data_direction == DMA_TO_DEVICE) {
+ to = (void *) mb + udev->data_off + udev->data_head;
+ memcpy(to, from, copy_bytes);
+ tcmu_flush_dcache_range(to, copy_bytes);
+ }
+
+ iov_cnt++;
+ iov++;
+
+ UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
+ }
+
+ kunmap_atomic(from);
+ }
+ entry->req.iov_cnt = iov_cnt;
+
+ /* All offsets relative to mb_addr, not start of entry! */
+ cdb_off = CMDR_OFF + cmd_head + base_command_size;
+ memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb));
+ entry->req.cdb_off = cdb_off;
+ tcmu_flush_dcache_range(entry, sizeof(*entry));
+
+ UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size);
+ tcmu_flush_dcache_range(mb, sizeof(*mb));
+
+ spin_unlock_irq(&udev->cmdr_lock);
+
+ /* TODO: only if FLUSH and FUA? */
+ uio_event_notify(&udev->uio_info);
+
+ mod_timer(&udev->timeout,
+ round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
+
+ return 0;
+}
+
+static int tcmu_queue_cmd(struct se_cmd *se_cmd)
+{
+ struct se_device *se_dev = se_cmd->se_dev;
+ struct tcmu_dev *udev = TCMU_DEV(se_dev);
+ struct tcmu_cmd *tcmu_cmd;
+ int ret;
+
+ tcmu_cmd = tcmu_alloc_cmd(se_cmd);
+ if (!tcmu_cmd)
+ return -ENOMEM;
+
+ ret = tcmu_queue_cmd_ring(tcmu_cmd);
+ if (ret < 0) {
+ pr_err("TCMU: Could not queue command\n");
+ spin_lock_irq(&udev->commands_lock);
+ idr_remove(&udev->commands, tcmu_cmd->cmd_id);
+ spin_unlock_irq(&udev->commands_lock);
+
+ kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+ }
+
+ return ret;
+}
+
+static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry)
+{
+ struct se_cmd *se_cmd = cmd->se_cmd;
+ struct tcmu_dev *udev = cmd->tcmu_dev;
+
+ if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
+ /* cmd has been completed already from timeout, just reclaim data
+ ring space */
+ UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+ return;
+ }
+
+ if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
+ memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
+ se_cmd->scsi_sense_length);
+
+ UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+ }
+ else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
+ struct scatterlist *sg;
+ int i;
+
+ /* It'd be easier to look at entry's iovec again, but UAM */
+ for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) {
+ size_t copy_bytes;
+ void *to;
+ void *from;
+
+ copy_bytes = min((size_t)sg->length,
+ head_to_end(udev->data_tail, udev->data_size));
+
+ to = kmap_atomic(sg_page(sg)) + sg->offset;
+ WARN_ON(sg->length + sg->offset > PAGE_SIZE);
+ from = (void *) udev->mb_addr + udev->data_off + udev->data_tail;
+ tcmu_flush_dcache_range(from, copy_bytes);
+ memcpy(to, from, copy_bytes);
+
+ UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size);
+
+ /* Uh oh, wrapped the data buffer for this sg's data */
+ if (sg->length != copy_bytes) {
+ from = (void *) udev->mb_addr + udev->data_off + udev->data_tail;
+ WARN_ON(udev->data_tail);
+ to += copy_bytes;
+ copy_bytes = sg->length - copy_bytes;
+ tcmu_flush_dcache_range(from, copy_bytes);
+ memcpy(to, from, copy_bytes);
+
+ UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size);
+ }
+
+ kunmap_atomic(to);
+ }
+
+ } else if (se_cmd->data_direction == DMA_TO_DEVICE) {
+ UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+ } else {
+ pr_warn("TCMU: data direction was %d!\n", se_cmd->data_direction);
+ }
+
+ target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status);
+ cmd->se_cmd = NULL;
+
+ kmem_cache_free(tcmu_cmd_cache, cmd);
+}
+
+static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
+{
+ struct tcmu_mailbox *mb;
+ LIST_HEAD(cpl_cmds);
+ unsigned long flags;
+ int handled = 0;
+
+ if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) {
+ pr_err("ring broken, not handling completions\n");
+ return 0;
+ }
+
+ spin_lock_irqsave(&udev->cmdr_lock, flags);
+
+ mb = udev->mb_addr;
+ tcmu_flush_dcache_range(mb, sizeof(*mb));
+
+ while (udev->cmdr_last_cleaned != ACCESS_ONCE(mb->cmd_tail)) {
+
+ struct tcmu_cmd_entry *entry = (void *) mb + CMDR_OFF + udev->cmdr_last_cleaned;
+ struct tcmu_cmd *cmd;
+
+ tcmu_flush_dcache_range(entry, sizeof(*entry));
+
+ if (tcmu_hdr_get_op(&entry->hdr) == TCMU_OP_PAD) {
+ UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+ continue;
+ }
+ WARN_ON(tcmu_hdr_get_op(&entry->hdr) != TCMU_OP_CMD);
+
+ spin_lock(&udev->commands_lock);
+ cmd = idr_find(&udev->commands, entry->cmd_id);
+ if (cmd)
+ idr_remove(&udev->commands, cmd->cmd_id);
+ spin_unlock(&udev->commands_lock);
+
+ if (!cmd) {
+ pr_err("cmd_id not found, ring is broken\n");
+ set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
+ break;
+ }
+
+ tcmu_handle_completion(cmd, entry);
+
+ UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+
+ handled++;
+ }
+
+ if (mb->cmd_tail == mb->cmd_head)
+ del_timer(&udev->timeout); /* no more pending cmds */
+
+ spin_unlock_irqrestore(&udev->cmdr_lock, flags);
+
+ wake_up(&udev->wait_cmdr);
+
+ return handled;
+}
+
+static int tcmu_check_expired_cmd(int id, void *p, void *data)
+{
+ struct tcmu_cmd *cmd = p;
+
+ if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
+ return 0;
+
+ if (!time_after(cmd->deadline, jiffies))
+ return 0;
+
+ set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
+ target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION);
+ cmd->se_cmd = NULL;
+
+ kmem_cache_free(tcmu_cmd_cache, cmd);
+
+ return 0;
+}
+
+static void tcmu_device_timedout(unsigned long data)
+{
+ struct tcmu_dev *udev = (struct tcmu_dev *)data;
+ unsigned long flags;
+ int handled;
+
+ handled = tcmu_handle_completions(udev);
+
+ pr_warn("%d completions handled from timeout\n", handled);
+
+ spin_lock_irqsave(&udev->commands_lock, flags);
+ idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
+ spin_unlock_irqrestore(&udev->commands_lock, flags);
+
+ /*
+ * We don't need to wakeup threads on wait_cmdr since they have their
+ * own timeout.
+ */
+}
+
+static int tcmu_attach_hba(struct se_hba *hba, u32 host_id)
+{
+ struct tcmu_hba *tcmu_hba;
+
+ tcmu_hba = kzalloc(sizeof(struct tcmu_hba), GFP_KERNEL);
+ if (!tcmu_hba)
+ return -ENOMEM;
+
+ tcmu_hba->host_id = host_id;
+ hba->hba_ptr = tcmu_hba;
+
+ return 0;
+}
+
+static void tcmu_detach_hba(struct se_hba *hba)
+{
+ kfree(hba->hba_ptr);
+ hba->hba_ptr = NULL;
+}
+
+static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
+{
+ struct tcmu_dev *udev;
+
+ udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL);
+ if (!udev)
+ return NULL;
+
+ udev->name = kstrdup(name, GFP_KERNEL);
+ if (!udev->name) {
+ kfree(udev);
+ return NULL;
+ }
+
+ udev->hba = hba;
+
+ init_waitqueue_head(&udev->wait_cmdr);
+ spin_lock_init(&udev->cmdr_lock);
+
+ idr_init(&udev->commands);
+ spin_lock_init(&udev->commands_lock);
+
+ setup_timer(&udev->timeout, tcmu_device_timedout,
+ (unsigned long)udev);
+
+ udev->pass_level = TCMU_PASS_ALL;
+
+ return &udev->se_dev;
+}
+
+static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
+{
+ struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info);
+
+ tcmu_handle_completions(tcmu_dev);
+
+ return 0;
+}
+
+/*
+ * mmap code from uio.c. Copied here because we want to hook mmap()
+ * and this stuff must come along.
+ */
+static int tcmu_find_mem_index(struct vm_area_struct *vma)
+{
+ struct tcmu_dev *udev = vma->vm_private_data;
+ struct uio_info *info = &udev->uio_info;
+
+ if (vma->vm_pgoff < MAX_UIO_MAPS) {
+ if (info->mem[vma->vm_pgoff].size == 0)
+ return -1;
+ return (int)vma->vm_pgoff;
+ }
+ return -1;
+}
+
+static int tcmu_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct tcmu_dev *udev = vma->vm_private_data;
+ struct uio_info *info = &udev->uio_info;
+ struct page *page;
+ unsigned long offset;
+ void *addr;
+
+ int mi = tcmu_find_mem_index(vma);
+ if (mi < 0)
+ return VM_FAULT_SIGBUS;
+
+ /*
+ * We need to subtract mi because userspace uses offset = N*PAGE_SIZE
+ * to use mem[N].
+ */
+ offset = (vmf->pgoff - mi) << PAGE_SHIFT;
+
+ addr = (void *)(unsigned long)info->mem[mi].addr + offset;
+ if (info->mem[mi].memtype == UIO_MEM_LOGICAL)
+ page = virt_to_page(addr);
+ else
+ page = vmalloc_to_page(addr);
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct tcmu_vm_ops = {
+ .fault = tcmu_vma_fault,
+};
+
+static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
+{
+ struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
+
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_ops = &tcmu_vm_ops;
+
+ vma->vm_private_data = udev;
+
+ /* Ensure the mmap is exactly the right size */
+ if (vma_pages(vma) != (TCMU_RING_SIZE >> PAGE_SHIFT))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int tcmu_open(struct uio_info *info, struct inode *inode)
+{
+ struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
+
+ /* O_EXCL not supported for char devs, so fake it? */
+ if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags))
+ return -EBUSY;
+
+ pr_debug("open\n");
+
+ return 0;
+}
+
+static int tcmu_release(struct uio_info *info, struct inode *inode)
+{
+ struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
+
+ clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
+
+ pr_debug("close\n");
+
+ return 0;
+}
+
+static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int minor)
+{
+ struct sk_buff *skb;
+ void *msg_header;
+ int ret = -ENOMEM;
+
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return ret;
+
+ msg_header = genlmsg_put(skb, 0, 0, &tcmu_genl_family, 0, cmd);
+ if (!msg_header)
+ goto free_skb;
+
+ ret = nla_put_string(skb, TCMU_ATTR_DEVICE, name);
+ if (ret < 0)
+ goto free_skb;
+
+ ret = nla_put_u32(skb, TCMU_ATTR_MINOR, minor);
+ if (ret < 0)
+ goto free_skb;
+
+ ret = genlmsg_end(skb, msg_header);
+ if (ret < 0)
+ goto free_skb;
+
+ ret = genlmsg_multicast(&tcmu_genl_family, skb, 0,
+ TCMU_MCGRP_CONFIG, GFP_KERNEL);
+
+ /* We don't care if no one is listening */
+ if (ret == -ESRCH)
+ ret = 0;
+
+ return ret;
+free_skb:
+ nlmsg_free(skb);
+ return ret;
+}
+
+static int tcmu_configure_device(struct se_device *dev)
+{
+ struct tcmu_dev *udev = TCMU_DEV(dev);
+ struct tcmu_hba *hba = udev->hba->hba_ptr;
+ struct uio_info *info;
+ struct tcmu_mailbox *mb;
+ size_t size;
+ size_t used;
+ int ret = 0;
+ char *str;
+
+ info = &udev->uio_info;
+
+ size = snprintf(NULL, 0, "tcm-user/%u/%s/%s", hba->host_id, udev->name,
+ udev->dev_config);
+ size += 1; /* for \0 */
+ str = kmalloc(size, GFP_KERNEL);
+ if (!str)
+ return -ENOMEM;
+
+ used = snprintf(str, size, "tcm-user/%u/%s", hba->host_id, udev->name);
+
+ if (udev->dev_config[0])
+ snprintf(str + used, size - used, "/%s", udev->dev_config);
+
+ info->name = str;
+
+ udev->mb_addr = vzalloc(TCMU_RING_SIZE);
+ if (!udev->mb_addr) {
+ ret = -ENOMEM;
+ goto err_vzalloc;
+ }
+
+ /* mailbox fits in first part of CMDR space */
+ udev->cmdr_size = CMDR_SIZE - CMDR_OFF;
+ udev->data_off = CMDR_SIZE;
+ udev->data_size = TCMU_RING_SIZE - CMDR_SIZE;
+
+ mb = udev->mb_addr;
+ mb->version = 1;
+ mb->cmdr_off = CMDR_OFF;
+ mb->cmdr_size = udev->cmdr_size;
+
+ WARN_ON(!PAGE_ALIGNED(udev->data_off));
+ WARN_ON(udev->data_size % PAGE_SIZE);
+
+ info->version = "1";
+
+ info->mem[0].name = "tcm-user command & data buffer";
+ info->mem[0].addr = (phys_addr_t) udev->mb_addr;
+ info->mem[0].size = TCMU_RING_SIZE;
+ info->mem[0].memtype = UIO_MEM_VIRTUAL;
+
+ info->irqcontrol = tcmu_irqcontrol;
+ info->irq = UIO_IRQ_CUSTOM;
+
+ info->mmap = tcmu_mmap;
+ info->open = tcmu_open;
+ info->release = tcmu_release;
+
+ ret = uio_register_device(tcmu_root_device, info);
+ if (ret)
+ goto err_register;
+
+ /* Other attributes can be configured in userspace */
+ dev->dev_attrib.hw_block_size = 512;
+ dev->dev_attrib.hw_max_sectors = 128;
+ dev->dev_attrib.hw_queue_depth = 128;
+
+ ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
+ udev->uio_info.uio_dev->minor);
+ if (ret)
+ goto err_netlink;
+
+ return 0;
+
+err_netlink:
+ uio_unregister_device(&udev->uio_info);
+err_register:
+ vfree(udev->mb_addr);
+err_vzalloc:
+ kfree(info->name);
+
+ return ret;
+}
+
+static int tcmu_check_pending_cmd(int id, void *p, void *data)
+{
+ struct tcmu_cmd *cmd = p;
+
+ if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
+ return 0;
+ return -EINVAL;
+}
+
+static void tcmu_free_device(struct se_device *dev)
+{
+ struct tcmu_dev *udev = TCMU_DEV(dev);
+ int i;
+
+ del_timer_sync(&udev->timeout);
+
+ vfree(udev->mb_addr);
+
+ /* Upper layer should drain all requests before calling this */
+ spin_lock_irq(&udev->commands_lock);
+ i = idr_for_each(&udev->commands, tcmu_check_pending_cmd, NULL);
+ idr_destroy(&udev->commands);
+ spin_unlock_irq(&udev->commands_lock);
+ WARN_ON(i);
+
+ /* Device was configured */
+ if (udev->uio_info.uio_dev) {
+ tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
+ udev->uio_info.uio_dev->minor);
+
+ uio_unregister_device(&udev->uio_info);
+ kfree(udev->uio_info.name);
+ kfree(udev->name);
+ }
+
+ kfree(udev);
+}
+
+enum {
+ Opt_dev_config, Opt_dev_size, Opt_err, Opt_pass_level,
+};
+
+static match_table_t tokens = {
+ {Opt_dev_config, "dev_config=%s"},
+ {Opt_dev_size, "dev_size=%u"},
+ {Opt_pass_level, "pass_level=%u"},
+ {Opt_err, NULL}
+};
+
+static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
+ const char *page, ssize_t count)
+{
+ struct tcmu_dev *udev = TCMU_DEV(dev);
+ char *orig, *ptr, *opts, *arg_p;
+ substring_t args[MAX_OPT_ARGS];
+ int ret = 0, token;
+ int arg;
+
+ opts = kstrdup(page, GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+
+ orig = opts;
+
+ while ((ptr = strsep(&opts, ",\n")) != NULL) {
+ if (!*ptr)
+ continue;
+
+ token = match_token(ptr, tokens, args);
+ switch (token) {
+ case Opt_dev_config:
+ if (match_strlcpy(udev->dev_config, &args[0],
+ TCMU_CONFIG_LEN) == 0) {
+ ret = -EINVAL;
+ break;
+ }
+ pr_debug("TCMU: Referencing Path: %s\n", udev->dev_config);
+ break;
+ case Opt_dev_size:
+ arg_p = match_strdup(&args[0]);
+ if (!arg_p) {
+ ret = -ENOMEM;
+ break;
+ }
+ ret = kstrtoul(arg_p, 0, (unsigned long *) &udev->dev_size);
+ kfree(arg_p);
+ if (ret < 0)
+ pr_err("kstrtoul() failed for dev_size=\n");
+ break;
+ case Opt_pass_level:
+ match_int(args, &arg);
+ if (arg >= TCMU_PASS_INVALID) {
+ pr_warn("TCMU: Invalid pass_level: %d\n", arg);
+ break;
+ }
+
+ pr_debug("TCMU: Setting pass_level to %d\n", arg);
+ udev->pass_level = arg;
+ break;
+ default:
+ break;
+ }
+ }
+
+ kfree(orig);
+ return (!ret) ? count : ret;
+}
+
+static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
+{
+ struct tcmu_dev *udev = TCMU_DEV(dev);
+ ssize_t bl = 0;
+
+ bl = sprintf(b + bl, "Config: %s ",
+ udev->dev_config[0] ? udev->dev_config : "NULL");
+ bl += sprintf(b + bl, "Size: %zu PassLevel: %u\n",
+ udev->dev_size, udev->pass_level);
+
+ return bl;
+}
+
+static sector_t tcmu_get_blocks(struct se_device *dev)
+{
+ struct tcmu_dev *udev = TCMU_DEV(dev);
+
+ return div_u64(udev->dev_size - dev->dev_attrib.block_size,
+ dev->dev_attrib.block_size);
+}
+
+static sense_reason_t
+tcmu_execute_rw(struct se_cmd *se_cmd, struct scatterlist *sgl, u32 sgl_nents,
+ enum dma_data_direction data_direction)
+{
+ int ret;
+
+ ret = tcmu_queue_cmd(se_cmd);
+
+ if (ret != 0)
+ return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+ else
+ return TCM_NO_SENSE;
+}
+
+static sense_reason_t
+tcmu_pass_op(struct se_cmd *se_cmd)
+{
+ int ret = tcmu_queue_cmd(se_cmd);
+
+ if (ret != 0)
+ return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+ else
+ return TCM_NO_SENSE;
+}
+
+static struct sbc_ops tcmu_sbc_ops = {
+ .execute_rw = tcmu_execute_rw,
+ .execute_sync_cache = tcmu_pass_op,
+ .execute_write_same = tcmu_pass_op,
+ .execute_write_same_unmap = tcmu_pass_op,
+ .execute_unmap = tcmu_pass_op,
+};
+
+static sense_reason_t
+tcmu_parse_cdb(struct se_cmd *cmd)
+{
+ unsigned char *cdb = cmd->t_task_cdb;
+ struct tcmu_dev *udev = TCMU_DEV(cmd->se_dev);
+ sense_reason_t ret;
+
+ switch (udev->pass_level) {
+ case TCMU_PASS_ALL:
+ /* We're just like pscsi, then */
+ /*
+ * For REPORT LUNS we always need to emulate the response, for everything
+ * else, pass it up.
+ */
+ switch (cdb[0]) {
+ case REPORT_LUNS:
+ cmd->execute_cmd = spc_emulate_report_luns;
+ break;
+ case READ_6:
+ case READ_10:
+ case READ_12:
+ case READ_16:
+ case WRITE_6:
+ case WRITE_10:
+ case WRITE_12:
+ case WRITE_16:
+ case WRITE_VERIFY:
+ cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
+ /* FALLTHROUGH */
+ default:
+ cmd->execute_cmd = tcmu_pass_op;
+ }
+ ret = TCM_NO_SENSE;
+ break;
+ case TCMU_PASS_IO:
+ ret = sbc_parse_cdb(cmd, &tcmu_sbc_ops);
+ break;
+ default:
+ pr_err("Unknown tcm-user pass level %d\n", udev->pass_level);
+ ret = TCM_CHECK_CONDITION_ABORT_CMD;
+ }
+
+ return ret;
+}
+
+static struct se_subsystem_api tcmu_template = {
+ .name = "user",
+ .inquiry_prod = "USER",
+ .inquiry_rev = TCMU_VERSION,
+ .owner = THIS_MODULE,
+ .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV,
+ .attach_hba = tcmu_attach_hba,
+ .detach_hba = tcmu_detach_hba,
+ .alloc_device = tcmu_alloc_device,
+ .configure_device = tcmu_configure_device,
+ .free_device = tcmu_free_device,
+ .parse_cdb = tcmu_parse_cdb,
+ .set_configfs_dev_params = tcmu_set_configfs_dev_params,
+ .show_configfs_dev_params = tcmu_show_configfs_dev_params,
+ .get_device_type = sbc_get_device_type,
+ .get_blocks = tcmu_get_blocks,
+};
+
+static int __init tcmu_module_init(void)
+{
+ int ret;
+
+ BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
+
+ tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache",
+ sizeof(struct tcmu_cmd),
+ __alignof__(struct tcmu_cmd),
+ 0, NULL);
+ if (!tcmu_cmd_cache)
+ return -ENOMEM;
+
+ tcmu_root_device = root_device_register("tcm_user");
+ if (IS_ERR(tcmu_root_device)) {
+ ret = PTR_ERR(tcmu_root_device);
+ goto out_free_cache;
+ }
+
+ ret = genl_register_family(&tcmu_genl_family);
+ if (ret < 0) {
+ goto out_unreg_device;
+ }
+
+ ret = transport_subsystem_register(&tcmu_template);
+ if (ret)
+ goto out_unreg_genl;
+
+ return 0;
+
+out_unreg_genl:
+ genl_unregister_family(&tcmu_genl_family);
+out_unreg_device:
+ root_device_unregister(tcmu_root_device);
+out_free_cache:
+ kmem_cache_destroy(tcmu_cmd_cache);
+
+ return ret;
+}
+
+static void __exit tcmu_module_exit(void)
+{
+ transport_subsystem_release(&tcmu_template);
+ genl_unregister_family(&tcmu_genl_family);
+ root_device_unregister(tcmu_root_device);
+ kmem_cache_destroy(tcmu_cmd_cache);
+}
+
+MODULE_DESCRIPTION("TCM USER subsystem plugin");
+MODULE_AUTHOR("Shaohua Li <shli@kernel.org>");
+MODULE_AUTHOR("Andy Grover <agrover@redhat.com>");
+MODULE_LICENSE("GPL");
+
+module_init(tcmu_module_init);
+module_exit(tcmu_module_exit);
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
index 21ce508..ccee7e3 100644
--- a/drivers/target/tcm_fc/tfc_sess.c
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -98,7 +98,7 @@
ft_sess_delete_all(tport);
lport = tport->lport;
BUG_ON(tport != lport->prov[FC_TYPE_FCP]);
- rcu_assign_pointer(lport->prov[FC_TYPE_FCP], NULL);
+ RCU_INIT_POINTER(lport->prov[FC_TYPE_FCP], NULL);
tpg = tport->tpg;
if (tpg) {
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index ef5587f..f554d25 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -84,6 +84,16 @@
Enable this to manage platform thermals using a simple linear
governor.
+config THERMAL_GOV_BANG_BANG
+ bool "Bang Bang thermal governor"
+ default n
+ help
+ Enable this to manage platform thermals using bang bang governor.
+
+ Say 'Y' here if you want to use two point temperature regulation
+ used for fans without throttling. Some fan drivers depend on this
+ governor to be enabled (e.g. acerhdf).
+
config THERMAL_GOV_USER_SPACE
bool "User_space thermal governor"
help
@@ -207,21 +217,6 @@
two trip points which can be set by user to get notifications via thermal
notification methods.
-config ACPI_INT3403_THERMAL
- tristate "ACPI INT3403 thermal driver"
- depends on X86 && ACPI
- help
- Newer laptops and tablets that use ACPI may have thermal sensors
- outside the core CPU/SOC for thermal safety reasons. These
- temperature sensors are also exposed for the OS to use via the so
- called INT3403 ACPI object. This driver will, on devices that have
- such sensors, expose the temperature information from these sensors
- to userspace via the normal thermal framework. This means that a wide
- range of applications and GUI widgets can show this information to
- the user or use this information for making decisions. For example,
- the Intel Thermal Daemon can use this information to allow the user
- to select his laptop to run without turning on the fans.
-
config INTEL_SOC_DTS_THERMAL
tristate "Intel SoCs DTS thermal driver"
depends on X86 && IOSF_MBI
@@ -234,6 +229,30 @@
notification methods.The other trip is a critical trip point, which
was set by the driver based on the TJ MAX temperature.
+config INT340X_THERMAL
+ tristate "ACPI INT340X thermal drivers"
+ depends on X86 && ACPI
+ select THERMAL_GOV_USER_SPACE
+ select ACPI_THERMAL_REL
+ select ACPI_FAN
+ help
+ Newer laptops and tablets that use ACPI may have thermal sensors and
+ other devices with thermal control capabilities outside the core
+ CPU/SOC, for thermal safety reasons.
+ They are exposed for the OS to use via the INT3400 ACPI device object
+ as the master, and INT3401~INT340B ACPI device objects as the slaves.
+ Enable this to expose the temperature information and cooling ability
+ from these objects to userspace via the normal thermal framework.
+ This means that a wide range of applications and GUI widgets can show
+ the information to the user or use this information for making
+ decisions. For example, the Intel Thermal Daemon can use this
+ information to allow the user to select his laptop to run without
+ turning on the fans.
+
+config ACPI_THERMAL_REL
+ tristate
+ depends on ACPI
+
menu "Texas Instruments thermal drivers"
source "drivers/thermal/ti-soc-thermal/Kconfig"
endmenu
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 31e232f..39c4fe8 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -11,6 +11,7 @@
# governors
thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o
+thermal_sys-$(CONFIG_THERMAL_GOV_BANG_BANG) += gov_bang_bang.o
thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o
thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o
@@ -31,5 +32,5 @@
obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o
obj-$(CONFIG_INTEL_SOC_DTS_THERMAL) += intel_soc_dts_thermal.o
obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/
-obj-$(CONFIG_ACPI_INT3403_THERMAL) += int3403_thermal.o
+obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/
obj-$(CONFIG_ST_THERMAL) += st/
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 944ba2f..6e0a3fb 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -23,6 +23,7 @@
*/
#include <linux/thermal.h>
+#include <trace/events/thermal.h>
#include "thermal_core.h"
@@ -34,6 +35,7 @@
{
int count = 0;
unsigned long trip_temp;
+ enum thermal_trip_type trip_type;
if (tz->trips == 0 || !tz->ops->get_trip_temp)
return 0;
@@ -43,6 +45,16 @@
if (tz->temperature < trip_temp)
break;
}
+
+ /*
+ * count > 0 only if temperature is greater than first trip
+ * point, in which case, trip_point = count - 1
+ */
+ if (count > 0) {
+ tz->ops->get_trip_type(tz, count - 1, &trip_type);
+ trace_thermal_zone_trip(tz, count - 1, trip_type);
+ }
+
return count;
}
diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c
new file mode 100644
index 0000000..c5dd76b
--- /dev/null
+++ b/drivers/thermal/gov_bang_bang.c
@@ -0,0 +1,131 @@
+/*
+ * gov_bang_bang.c - A simple thermal throttling governor using hysteresis
+ *
+ * Copyright (C) 2014 Peter Feuerer <peter@piie.net>
+ *
+ * Based on step_wise.c with following Copyrights:
+ * Copyright (C) 2012 Intel Corp
+ * Copyright (C) 2012 Durgadoss R <durgadoss.r@intel.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ */
+
+#include <linux/thermal.h>
+
+#include "thermal_core.h"
+
+static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
+{
+ long trip_temp;
+ unsigned long trip_hyst;
+ struct thermal_instance *instance;
+
+ tz->ops->get_trip_temp(tz, trip, &trip_temp);
+ tz->ops->get_trip_hyst(tz, trip, &trip_hyst);
+
+ dev_dbg(&tz->device, "Trip%d[temp=%ld]:temp=%d:hyst=%ld\n",
+ trip, trip_temp, tz->temperature,
+ trip_hyst);
+
+ mutex_lock(&tz->lock);
+
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ if (instance->trip != trip)
+ continue;
+
+ /* in case fan is in initial state, switch the fan off */
+ if (instance->target == THERMAL_NO_TARGET)
+ instance->target = 0;
+
+ /* in case fan is neither on nor off set the fan to active */
+ if (instance->target != 0 && instance->target != 1) {
+ pr_warn("Thermal instance %s controlled by bang-bang has unexpected state: %ld\n",
+ instance->name, instance->target);
+ instance->target = 1;
+ }
+
+ /*
+ * enable fan when temperature exceeds trip_temp and disable
+ * the fan in case it falls below trip_temp minus hysteresis
+ */
+ if (instance->target == 0 && tz->temperature >= trip_temp)
+ instance->target = 1;
+ else if (instance->target == 1 &&
+ tz->temperature < trip_temp - trip_hyst)
+ instance->target = 0;
+
+ dev_dbg(&instance->cdev->device, "target=%d\n",
+ (int)instance->target);
+
+ instance->cdev->updated = false; /* cdev needs update */
+ }
+
+ mutex_unlock(&tz->lock);
+}
+
+/**
+ * bang_bang_control - controls devices associated with the given zone
+ * @tz - thermal_zone_device
+ * @trip - the trip point
+ *
+ * Regulation Logic: a two point regulation, deliver cooling state depending
+ * on the previous state shown in this diagram:
+ *
+ * Fan: OFF ON
+ *
+ * |
+ * |
+ * trip_temp: +---->+
+ * | | ^
+ * | | |
+ * | | Temperature
+ * (trip_temp - hyst): +<----+
+ * |
+ * |
+ * |
+ *
+ * * If the fan is not running and temperature exceeds trip_temp, the fan
+ * gets turned on.
+ * * In case the fan is running, temperature must fall below
+ * (trip_temp - hyst) so that the fan gets turned off again.
+ *
+ */
+static int bang_bang_control(struct thermal_zone_device *tz, int trip)
+{
+ struct thermal_instance *instance;
+
+ thermal_zone_trip_update(tz, trip);
+
+ mutex_lock(&tz->lock);
+
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node)
+ thermal_cdev_update(instance->cdev);
+
+ mutex_unlock(&tz->lock);
+
+ return 0;
+}
+
+static struct thermal_governor thermal_gov_bang_bang = {
+ .name = "bang_bang",
+ .throttle = bang_bang_control,
+};
+
+int thermal_gov_bang_bang_register(void)
+{
+ return thermal_register_governor(&thermal_gov_bang_bang);
+}
+
+void thermal_gov_bang_bang_unregister(void)
+{
+ thermal_unregister_governor(&thermal_gov_bang_bang);
+}
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 2c516f2..461bf3d 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -19,6 +19,7 @@
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/slab.h>
@@ -31,6 +32,11 @@
#define MISC0 0x0150
#define MISC0_REFTOP_SELBIASOFF (1 << 3)
+#define MISC1 0x0160
+#define MISC1_IRQ_TEMPHIGH (1 << 29)
+/* Below LOW and PANIC bits are only for TEMPMON_IMX6SX */
+#define MISC1_IRQ_TEMPLOW (1 << 28)
+#define MISC1_IRQ_TEMPPANIC (1 << 27)
#define TEMPSENSE0 0x0180
#define TEMPSENSE0_ALARM_VALUE_SHIFT 20
@@ -43,6 +49,12 @@
#define TEMPSENSE1 0x0190
#define TEMPSENSE1_MEASURE_FREQ 0xffff
+/* Below TEMPSENSE2 is only for TEMPMON_IMX6SX */
+#define TEMPSENSE2 0x0290
+#define TEMPSENSE2_LOW_VALUE_SHIFT 0
+#define TEMPSENSE2_LOW_VALUE_MASK 0xfff
+#define TEMPSENSE2_PANIC_VALUE_SHIFT 16
+#define TEMPSENSE2_PANIC_VALUE_MASK 0xfff0000
#define OCOTP_ANA1 0x04e0
@@ -66,6 +78,21 @@
#define FACTOR1 15976
#define FACTOR2 4297157
+#define TEMPMON_IMX6Q 1
+#define TEMPMON_IMX6SX 2
+
+struct thermal_soc_data {
+ u32 version;
+};
+
+static struct thermal_soc_data thermal_imx6q_data = {
+ .version = TEMPMON_IMX6Q,
+};
+
+static struct thermal_soc_data thermal_imx6sx_data = {
+ .version = TEMPMON_IMX6SX,
+};
+
struct imx_thermal_data {
struct thermal_zone_device *tz;
struct thermal_cooling_device *cdev;
@@ -79,8 +106,21 @@
bool irq_enabled;
int irq;
struct clk *thermal_clk;
+ const struct thermal_soc_data *socdata;
};
+static void imx_set_panic_temp(struct imx_thermal_data *data,
+ signed long panic_temp)
+{
+ struct regmap *map = data->tempmon;
+ int critical_value;
+
+ critical_value = (data->c2 - panic_temp) / data->c1;
+ regmap_write(map, TEMPSENSE2 + REG_CLR, TEMPSENSE2_PANIC_VALUE_MASK);
+ regmap_write(map, TEMPSENSE2 + REG_SET, critical_value <<
+ TEMPSENSE2_PANIC_VALUE_SHIFT);
+}
+
static void imx_set_alarm_temp(struct imx_thermal_data *data,
signed long alarm_temp)
{
@@ -142,13 +182,17 @@
/* See imx_get_sensor_data() for formula derivation */
*temp = data->c2 - n_meas * data->c1;
- /* Update alarm value to next higher trip point */
- if (data->alarm_temp == data->temp_passive && *temp >= data->temp_passive)
- imx_set_alarm_temp(data, data->temp_critical);
- if (data->alarm_temp == data->temp_critical && *temp < data->temp_passive) {
- imx_set_alarm_temp(data, data->temp_passive);
- dev_dbg(&tz->device, "thermal alarm off: T < %lu\n",
- data->alarm_temp / 1000);
+ /* Update alarm value to next higher trip point for TEMPMON_IMX6Q */
+ if (data->socdata->version == TEMPMON_IMX6Q) {
+ if (data->alarm_temp == data->temp_passive &&
+ *temp >= data->temp_passive)
+ imx_set_alarm_temp(data, data->temp_critical);
+ if (data->alarm_temp == data->temp_critical &&
+ *temp < data->temp_passive) {
+ imx_set_alarm_temp(data, data->temp_passive);
+ dev_dbg(&tz->device, "thermal alarm off: T < %lu\n",
+ data->alarm_temp / 1000);
+ }
}
if (*temp != data->last_temp) {
@@ -398,8 +442,17 @@
return IRQ_HANDLED;
}
+static const struct of_device_id of_imx_thermal_match[] = {
+ { .compatible = "fsl,imx6q-tempmon", .data = &thermal_imx6q_data, },
+ { .compatible = "fsl,imx6sx-tempmon", .data = &thermal_imx6sx_data, },
+ { /* end */ }
+};
+MODULE_DEVICE_TABLE(of, of_imx_thermal_match);
+
static int imx_thermal_probe(struct platform_device *pdev)
{
+ const struct of_device_id *of_id =
+ of_match_device(of_imx_thermal_match, &pdev->dev);
struct imx_thermal_data *data;
struct cpumask clip_cpus;
struct regmap *map;
@@ -418,6 +471,20 @@
}
data->tempmon = map;
+ data->socdata = of_id->data;
+
+ /* make sure the IRQ flag is clear before enabling irq on i.MX6SX */
+ if (data->socdata->version == TEMPMON_IMX6SX) {
+ regmap_write(map, MISC1 + REG_CLR, MISC1_IRQ_TEMPHIGH |
+ MISC1_IRQ_TEMPLOW | MISC1_IRQ_TEMPPANIC);
+ /*
+ * reset value of LOW ALARM is incorrect, set it to lowest
+ * value to avoid false trigger of low alarm.
+ */
+ regmap_write(map, TEMPSENSE2 + REG_SET,
+ TEMPSENSE2_LOW_VALUE_MASK);
+ }
+
data->irq = platform_get_irq(pdev, 0);
if (data->irq < 0)
return data->irq;
@@ -489,6 +556,10 @@
measure_freq = DIV_ROUND_UP(32768, 10); /* 10 Hz */
regmap_write(map, TEMPSENSE1 + REG_SET, measure_freq);
imx_set_alarm_temp(data, data->temp_passive);
+
+ if (data->socdata->version == TEMPMON_IMX6SX)
+ imx_set_panic_temp(data, data->temp_critical);
+
regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
@@ -550,12 +621,6 @@
static SIMPLE_DEV_PM_OPS(imx_thermal_pm_ops,
imx_thermal_suspend, imx_thermal_resume);
-static const struct of_device_id of_imx_thermal_match[] = {
- { .compatible = "fsl,imx6q-tempmon", },
- { /* end */ }
-};
-MODULE_DEVICE_TABLE(of, of_imx_thermal_match);
-
static struct platform_driver imx_thermal = {
.driver = {
.name = "imx_thermal",
diff --git a/drivers/thermal/int3403_thermal.c b/drivers/thermal/int3403_thermal.c
deleted file mode 100644
index 17554ee..0000000
--- a/drivers/thermal/int3403_thermal.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * ACPI INT3403 thermal driver
- * Copyright (c) 2013, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/acpi.h>
-#include <linux/thermal.h>
-
-#define INT3403_TYPE_SENSOR 0x03
-#define INT3403_PERF_CHANGED_EVENT 0x80
-#define INT3403_THERMAL_EVENT 0x90
-
-#define DECI_KELVIN_TO_MILLI_CELSIUS(t, off) (((t) - (off)) * 100)
-#define KELVIN_OFFSET 2732
-#define MILLI_CELSIUS_TO_DECI_KELVIN(t, off) (((t) / 100) + (off))
-
-#define ACPI_INT3403_CLASS "int3403"
-#define ACPI_INT3403_FILE_STATE "state"
-
-struct int3403_sensor {
- struct thermal_zone_device *tzone;
- unsigned long *thresholds;
- unsigned long crit_temp;
- int crit_trip_id;
- unsigned long psv_temp;
- int psv_trip_id;
-};
-
-static int sys_get_curr_temp(struct thermal_zone_device *tzone,
- unsigned long *temp)
-{
- struct acpi_device *device = tzone->devdata;
- unsigned long long tmp;
- acpi_status status;
-
- status = acpi_evaluate_integer(device->handle, "_TMP", NULL, &tmp);
- if (ACPI_FAILURE(status))
- return -EIO;
-
- *temp = DECI_KELVIN_TO_MILLI_CELSIUS(tmp, KELVIN_OFFSET);
-
- return 0;
-}
-
-static int sys_get_trip_hyst(struct thermal_zone_device *tzone,
- int trip, unsigned long *temp)
-{
- struct acpi_device *device = tzone->devdata;
- unsigned long long hyst;
- acpi_status status;
-
- status = acpi_evaluate_integer(device->handle, "GTSH", NULL, &hyst);
- if (ACPI_FAILURE(status))
- return -EIO;
-
- /*
- * Thermal hysteresis represents a temperature difference.
- * Kelvin and Celsius have same degree size. So the
- * conversion here between tenths of degree Kelvin unit
- * and Milli-Celsius unit is just to multiply 100.
- */
- *temp = hyst * 100;
-
- return 0;
-}
-
-static int sys_get_trip_temp(struct thermal_zone_device *tzone,
- int trip, unsigned long *temp)
-{
- struct acpi_device *device = tzone->devdata;
- struct int3403_sensor *obj = acpi_driver_data(device);
-
- if (trip == obj->crit_trip_id)
- *temp = obj->crit_temp;
- else if (trip == obj->psv_trip_id)
- *temp = obj->psv_temp;
- else {
- /*
- * get_trip_temp is a mandatory callback but
- * PATx method doesn't return any value, so return
- * cached value, which was last set from user space.
- */
- *temp = obj->thresholds[trip];
- }
-
- return 0;
-}
-
-static int sys_get_trip_type(struct thermal_zone_device *thermal,
- int trip, enum thermal_trip_type *type)
-{
- struct acpi_device *device = thermal->devdata;
- struct int3403_sensor *obj = acpi_driver_data(device);
-
- /* Mandatory callback, may not mean much here */
- if (trip == obj->crit_trip_id)
- *type = THERMAL_TRIP_CRITICAL;
- else
- *type = THERMAL_TRIP_PASSIVE;
-
- return 0;
-}
-
-int sys_set_trip_temp(struct thermal_zone_device *tzone, int trip,
- unsigned long temp)
-{
- struct acpi_device *device = tzone->devdata;
- acpi_status status;
- char name[10];
- int ret = 0;
- struct int3403_sensor *obj = acpi_driver_data(device);
-
- snprintf(name, sizeof(name), "PAT%d", trip);
- if (acpi_has_method(device->handle, name)) {
- status = acpi_execute_simple_method(device->handle, name,
- MILLI_CELSIUS_TO_DECI_KELVIN(temp,
- KELVIN_OFFSET));
- if (ACPI_FAILURE(status))
- ret = -EIO;
- else
- obj->thresholds[trip] = temp;
- } else {
- ret = -EIO;
- dev_err(&device->dev, "sys_set_trip_temp: method not found\n");
- }
-
- return ret;
-}
-
-static struct thermal_zone_device_ops tzone_ops = {
- .get_temp = sys_get_curr_temp,
- .get_trip_temp = sys_get_trip_temp,
- .get_trip_type = sys_get_trip_type,
- .set_trip_temp = sys_set_trip_temp,
- .get_trip_hyst = sys_get_trip_hyst,
-};
-
-static void acpi_thermal_notify(struct acpi_device *device, u32 event)
-{
- struct int3403_sensor *obj;
-
- if (!device)
- return;
-
- obj = acpi_driver_data(device);
- if (!obj)
- return;
-
- switch (event) {
- case INT3403_PERF_CHANGED_EVENT:
- break;
- case INT3403_THERMAL_EVENT:
- thermal_zone_device_update(obj->tzone);
- break;
- default:
- dev_err(&device->dev, "Unsupported event [0x%x]\n", event);
- break;
- }
-}
-
-static int sys_get_trip_crt(struct acpi_device *device, unsigned long *temp)
-{
- unsigned long long crt;
- acpi_status status;
-
- status = acpi_evaluate_integer(device->handle, "_CRT", NULL, &crt);
- if (ACPI_FAILURE(status))
- return -EIO;
-
- *temp = DECI_KELVIN_TO_MILLI_CELSIUS(crt, KELVIN_OFFSET);
-
- return 0;
-}
-
-static int sys_get_trip_psv(struct acpi_device *device, unsigned long *temp)
-{
- unsigned long long psv;
- acpi_status status;
-
- status = acpi_evaluate_integer(device->handle, "_PSV", NULL, &psv);
- if (ACPI_FAILURE(status))
- return -EIO;
-
- *temp = DECI_KELVIN_TO_MILLI_CELSIUS(psv, KELVIN_OFFSET);
-
- return 0;
-}
-
-static int acpi_int3403_add(struct acpi_device *device)
-{
- int result = 0;
- unsigned long long ptyp;
- acpi_status status;
- struct int3403_sensor *obj;
- unsigned long long trip_cnt;
- int trip_mask = 0;
-
- if (!device)
- return -EINVAL;
-
- status = acpi_evaluate_integer(device->handle, "PTYP", NULL, &ptyp);
- if (ACPI_FAILURE(status))
- return -EINVAL;
-
- if (ptyp != INT3403_TYPE_SENSOR)
- return -EINVAL;
-
- obj = devm_kzalloc(&device->dev, sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- device->driver_data = obj;
-
- status = acpi_evaluate_integer(device->handle, "PATC", NULL,
- &trip_cnt);
- if (ACPI_FAILURE(status))
- trip_cnt = 0;
-
- if (trip_cnt) {
- /* We have to cache, thresholds can't be readback */
- obj->thresholds = devm_kzalloc(&device->dev,
- sizeof(*obj->thresholds) * trip_cnt,
- GFP_KERNEL);
- if (!obj->thresholds)
- return -ENOMEM;
- trip_mask = BIT(trip_cnt) - 1;
- }
-
- obj->psv_trip_id = -1;
- if (!sys_get_trip_psv(device, &obj->psv_temp))
- obj->psv_trip_id = trip_cnt++;
-
- obj->crit_trip_id = -1;
- if (!sys_get_trip_crt(device, &obj->crit_temp))
- obj->crit_trip_id = trip_cnt++;
-
- obj->tzone = thermal_zone_device_register(acpi_device_bid(device),
- trip_cnt, trip_mask, device, &tzone_ops,
- NULL, 0, 0);
- if (IS_ERR(obj->tzone)) {
- result = PTR_ERR(obj->tzone);
- return result;
- }
-
- strcpy(acpi_device_name(device), "INT3403");
- strcpy(acpi_device_class(device), ACPI_INT3403_CLASS);
-
- return 0;
-}
-
-static int acpi_int3403_remove(struct acpi_device *device)
-{
- struct int3403_sensor *obj;
-
- obj = acpi_driver_data(device);
- thermal_zone_device_unregister(obj->tzone);
-
- return 0;
-}
-
-ACPI_MODULE_NAME("int3403");
-static const struct acpi_device_id int3403_device_ids[] = {
- {"INT3403", 0},
- {"", 0},
-};
-MODULE_DEVICE_TABLE(acpi, int3403_device_ids);
-
-static struct acpi_driver acpi_int3403_driver = {
- .name = "INT3403",
- .class = ACPI_INT3403_CLASS,
- .ids = int3403_device_ids,
- .ops = {
- .add = acpi_int3403_add,
- .remove = acpi_int3403_remove,
- .notify = acpi_thermal_notify,
- },
-};
-
-module_acpi_driver(acpi_int3403_driver);
-
-MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("ACPI INT3403 thermal driver");
diff --git a/drivers/thermal/int340x_thermal/Makefile b/drivers/thermal/int340x_thermal/Makefile
new file mode 100644
index 0000000..ffe40bf
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_INT340X_THERMAL) += int3400_thermal.o
+obj-$(CONFIG_INT340X_THERMAL) += int3402_thermal.o
+obj-$(CONFIG_INT340X_THERMAL) += int3403_thermal.o
+obj-$(CONFIG_ACPI_THERMAL_REL) += acpi_thermal_rel.o
diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
new file mode 100644
index 0000000..0d8db80
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
@@ -0,0 +1,400 @@
+/* acpi_thermal_rel.c driver for exporting ACPI thermal relationship
+ *
+ * Copyright (c) 2014 Intel Corp
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ */
+
+/*
+ * Two functionalities included:
+ * 1. Export _TRT, _ART, via misc device interface to the userspace.
+ * 2. Provide parsing result to kernel drivers
+ *
+ */
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+#include "acpi_thermal_rel.h"
+
+static acpi_handle acpi_thermal_rel_handle;
+static DEFINE_SPINLOCK(acpi_thermal_rel_chrdev_lock);
+static int acpi_thermal_rel_chrdev_count; /* #times opened */
+static int acpi_thermal_rel_chrdev_exclu; /* already open exclusive? */
+
+static int acpi_thermal_rel_open(struct inode *inode, struct file *file)
+{
+ spin_lock(&acpi_thermal_rel_chrdev_lock);
+ if (acpi_thermal_rel_chrdev_exclu ||
+ (acpi_thermal_rel_chrdev_count && (file->f_flags & O_EXCL))) {
+ spin_unlock(&acpi_thermal_rel_chrdev_lock);
+ return -EBUSY;
+ }
+
+ if (file->f_flags & O_EXCL)
+ acpi_thermal_rel_chrdev_exclu = 1;
+ acpi_thermal_rel_chrdev_count++;
+
+ spin_unlock(&acpi_thermal_rel_chrdev_lock);
+
+ return nonseekable_open(inode, file);
+}
+
+static int acpi_thermal_rel_release(struct inode *inode, struct file *file)
+{
+ spin_lock(&acpi_thermal_rel_chrdev_lock);
+ acpi_thermal_rel_chrdev_count--;
+ acpi_thermal_rel_chrdev_exclu = 0;
+ spin_unlock(&acpi_thermal_rel_chrdev_lock);
+
+ return 0;
+}
+
+/**
+ * acpi_parse_trt - Thermal Relationship Table _TRT for passive cooling
+ *
+ * @handle: ACPI handle of the device contains _TRT
+ * @art_count: the number of valid entries resulted from parsing _TRT
+ * @artp: pointer to pointer of array of art entries in parsing result
+ * @create_dev: whether to create platform devices for target and source
+ *
+ */
+int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
+ bool create_dev)
+{
+ acpi_status status;
+ int result = 0;
+ int i;
+ int nr_bad_entries = 0;
+ struct trt *trts;
+ struct acpi_device *adev;
+ union acpi_object *p;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_buffer element = { 0, NULL };
+ struct acpi_buffer trt_format = { sizeof("RRNNNNNN"), "RRNNNNNN" };
+
+ if (!acpi_has_method(handle, "_TRT"))
+ return 0;
+
+ status = acpi_evaluate_object(handle, "_TRT", NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ p = buffer.pointer;
+ if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
+ pr_err("Invalid _TRT data\n");
+ result = -EFAULT;
+ goto end;
+ }
+
+ *trt_count = p->package.count;
+ trts = kzalloc(*trt_count * sizeof(struct trt), GFP_KERNEL);
+ if (!trts) {
+ result = -ENOMEM;
+ goto end;
+ }
+
+ for (i = 0; i < *trt_count; i++) {
+ struct trt *trt = &trts[i - nr_bad_entries];
+
+ element.length = sizeof(struct trt);
+ element.pointer = trt;
+
+ status = acpi_extract_package(&(p->package.elements[i]),
+ &trt_format, &element);
+ if (ACPI_FAILURE(status)) {
+ nr_bad_entries++;
+ pr_warn("_TRT package %d is invalid, ignored\n", i);
+ continue;
+ }
+ if (!create_dev)
+ continue;
+
+ result = acpi_bus_get_device(trt->source, &adev);
+ if (!result)
+ acpi_create_platform_device(adev);
+ else
+ pr_warn("Failed to get source ACPI device\n");
+
+ result = acpi_bus_get_device(trt->target, &adev);
+ if (!result)
+ acpi_create_platform_device(adev);
+ else
+ pr_warn("Failed to get target ACPI device\n");
+ }
+
+ *trtp = trts;
+ /* don't count bad entries */
+ *trt_count -= nr_bad_entries;
+end:
+ kfree(buffer.pointer);
+ return result;
+}
+EXPORT_SYMBOL(acpi_parse_trt);
+
+/**
+ * acpi_parse_art - Parse Active Relationship Table _ART
+ *
+ * @handle: ACPI handle of the device contains _ART
+ * @art_count: the number of valid entries resulted from parsing _ART
+ * @artp: pointer to pointer of array of art entries in parsing result
+ * @create_dev: whether to create platform devices for target and source
+ *
+ */
+int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
+ bool create_dev)
+{
+ acpi_status status;
+ int result = 0;
+ int i;
+ int nr_bad_entries = 0;
+ struct art *arts;
+ struct acpi_device *adev;
+ union acpi_object *p;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_buffer element = { 0, NULL };
+ struct acpi_buffer art_format = {
+ sizeof("RRNNNNNNNNNNN"), "RRNNNNNNNNNNN" };
+
+ if (!acpi_has_method(handle, "_ART"))
+ return 0;
+
+ status = acpi_evaluate_object(handle, "_ART", NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ p = buffer.pointer;
+ if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
+ pr_err("Invalid _ART data\n");
+ result = -EFAULT;
+ goto end;
+ }
+
+ /* ignore p->package.elements[0], as this is _ART Revision field */
+ *art_count = p->package.count - 1;
+ arts = kzalloc(*art_count * sizeof(struct art), GFP_KERNEL);
+ if (!arts) {
+ result = -ENOMEM;
+ goto end;
+ }
+
+ for (i = 0; i < *art_count; i++) {
+ struct art *art = &arts[i - nr_bad_entries];
+
+ element.length = sizeof(struct art);
+ element.pointer = art;
+
+ status = acpi_extract_package(&(p->package.elements[i + 1]),
+ &art_format, &element);
+ if (ACPI_FAILURE(status)) {
+ pr_warn("_ART package %d is invalid, ignored", i);
+ nr_bad_entries++;
+ continue;
+ }
+ if (!create_dev)
+ continue;
+
+ if (art->source) {
+ result = acpi_bus_get_device(art->source, &adev);
+ if (!result)
+ acpi_create_platform_device(adev);
+ else
+ pr_warn("Failed to get source ACPI device\n");
+ }
+ if (art->target) {
+ result = acpi_bus_get_device(art->target, &adev);
+ if (!result)
+ acpi_create_platform_device(adev);
+ else
+ pr_warn("Failed to get source ACPI device\n");
+ }
+ }
+
+ *artp = arts;
+ /* don't count bad entries */
+ *art_count -= nr_bad_entries;
+end:
+ kfree(buffer.pointer);
+ return result;
+}
+EXPORT_SYMBOL(acpi_parse_art);
+
+
+/* get device name from acpi handle */
+static void get_single_name(acpi_handle handle, char *name)
+{
+ struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER};
+
+ if (ACPI_FAILURE(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)))
+ pr_warn("Failed get name from handle\n");
+ else {
+ memcpy(name, buffer.pointer, ACPI_NAME_SIZE);
+ kfree(buffer.pointer);
+ }
+}
+
+static int fill_art(char __user *ubuf)
+{
+ int i;
+ int ret;
+ int count;
+ int art_len;
+ struct art *arts = NULL;
+ union art_object *art_user;
+
+ ret = acpi_parse_art(acpi_thermal_rel_handle, &count, &arts, false);
+ if (ret)
+ goto free_art;
+ art_len = count * sizeof(union art_object);
+ art_user = kzalloc(art_len, GFP_KERNEL);
+ if (!art_user) {
+ ret = -ENOMEM;
+ goto free_art;
+ }
+ /* now fill in user art data */
+ for (i = 0; i < count; i++) {
+ /* userspace art needs device name instead of acpi reference */
+ get_single_name(arts[i].source, art_user[i].source_device);
+ get_single_name(arts[i].target, art_user[i].target_device);
+ /* copy the rest int data in addition to source and target */
+ memcpy(&art_user[i].weight, &arts[i].weight,
+ sizeof(u64) * (ACPI_NR_ART_ELEMENTS - 2));
+ }
+
+ if (copy_to_user(ubuf, art_user, art_len))
+ ret = -EFAULT;
+ kfree(art_user);
+free_art:
+ kfree(arts);
+ return ret;
+}
+
+static int fill_trt(char __user *ubuf)
+{
+ int i;
+ int ret;
+ int count;
+ int trt_len;
+ struct trt *trts = NULL;
+ union trt_object *trt_user;
+
+ ret = acpi_parse_trt(acpi_thermal_rel_handle, &count, &trts, false);
+ if (ret)
+ goto free_trt;
+ trt_len = count * sizeof(union trt_object);
+ trt_user = kzalloc(trt_len, GFP_KERNEL);
+ if (!trt_user) {
+ ret = -ENOMEM;
+ goto free_trt;
+ }
+ /* now fill in user trt data */
+ for (i = 0; i < count; i++) {
+ /* userspace trt needs device name instead of acpi reference */
+ get_single_name(trts[i].source, trt_user[i].source_device);
+ get_single_name(trts[i].target, trt_user[i].target_device);
+ trt_user[i].sample_period = trts[i].sample_period;
+ trt_user[i].influence = trts[i].influence;
+ }
+
+ if (copy_to_user(ubuf, trt_user, trt_len))
+ ret = -EFAULT;
+ kfree(trt_user);
+free_trt:
+ kfree(trts);
+ return ret;
+}
+
+static long acpi_thermal_rel_ioctl(struct file *f, unsigned int cmd,
+ unsigned long __arg)
+{
+ int ret = 0;
+ unsigned long length = 0;
+ unsigned long count = 0;
+ char __user *arg = (void __user *)__arg;
+ struct trt *trts;
+ struct art *arts;
+
+ switch (cmd) {
+ case ACPI_THERMAL_GET_TRT_COUNT:
+ ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count,
+ &trts, false);
+ kfree(trts);
+ if (!ret)
+ return put_user(count, (unsigned long __user *)__arg);
+ return ret;
+ case ACPI_THERMAL_GET_TRT_LEN:
+ ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count,
+ &trts, false);
+ kfree(trts);
+ length = count * sizeof(union trt_object);
+ if (!ret)
+ return put_user(length, (unsigned long __user *)__arg);
+ return ret;
+ case ACPI_THERMAL_GET_TRT:
+ return fill_trt(arg);
+ case ACPI_THERMAL_GET_ART_COUNT:
+ ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count,
+ &arts, false);
+ kfree(arts);
+ if (!ret)
+ return put_user(count, (unsigned long __user *)__arg);
+ return ret;
+ case ACPI_THERMAL_GET_ART_LEN:
+ ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count,
+ &arts, false);
+ kfree(arts);
+ length = count * sizeof(union art_object);
+ if (!ret)
+ return put_user(length, (unsigned long __user *)__arg);
+ return ret;
+
+ case ACPI_THERMAL_GET_ART:
+ return fill_art(arg);
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+static const struct file_operations acpi_thermal_rel_fops = {
+ .owner = THIS_MODULE,
+ .open = acpi_thermal_rel_open,
+ .release = acpi_thermal_rel_release,
+ .unlocked_ioctl = acpi_thermal_rel_ioctl,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice acpi_thermal_rel_misc_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ "acpi_thermal_rel",
+ &acpi_thermal_rel_fops
+};
+
+int acpi_thermal_rel_misc_device_add(acpi_handle handle)
+{
+ acpi_thermal_rel_handle = handle;
+
+ return misc_register(&acpi_thermal_rel_misc_device);
+}
+EXPORT_SYMBOL(acpi_thermal_rel_misc_device_add);
+
+int acpi_thermal_rel_misc_device_remove(acpi_handle handle)
+{
+ misc_deregister(&acpi_thermal_rel_misc_device);
+
+ return 0;
+}
+EXPORT_SYMBOL(acpi_thermal_rel_misc_device_remove);
+
+MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
+MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com");
+MODULE_DESCRIPTION("Intel acpi thermal rel misc dev driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.h b/drivers/thermal/int340x_thermal/acpi_thermal_rel.h
new file mode 100644
index 0000000..f00700b
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.h
@@ -0,0 +1,84 @@
+#ifndef __ACPI_ACPI_THERMAL_H
+#define __ACPI_ACPI_THERMAL_H
+
+#include <asm/ioctl.h>
+
+#define ACPI_THERMAL_MAGIC 's'
+
+#define ACPI_THERMAL_GET_TRT_LEN _IOR(ACPI_THERMAL_MAGIC, 1, unsigned long)
+#define ACPI_THERMAL_GET_ART_LEN _IOR(ACPI_THERMAL_MAGIC, 2, unsigned long)
+#define ACPI_THERMAL_GET_TRT_COUNT _IOR(ACPI_THERMAL_MAGIC, 3, unsigned long)
+#define ACPI_THERMAL_GET_ART_COUNT _IOR(ACPI_THERMAL_MAGIC, 4, unsigned long)
+
+#define ACPI_THERMAL_GET_TRT _IOR(ACPI_THERMAL_MAGIC, 5, unsigned long)
+#define ACPI_THERMAL_GET_ART _IOR(ACPI_THERMAL_MAGIC, 6, unsigned long)
+
+struct art {
+ acpi_handle source;
+ acpi_handle target;
+ u64 weight;
+ u64 ac0_max;
+ u64 ac1_max;
+ u64 ac2_max;
+ u64 ac3_max;
+ u64 ac4_max;
+ u64 ac5_max;
+ u64 ac6_max;
+ u64 ac7_max;
+ u64 ac8_max;
+ u64 ac9_max;
+} __packed;
+
+struct trt {
+ acpi_handle source;
+ acpi_handle target;
+ u64 influence;
+ u64 sample_period;
+ u64 reverved1;
+ u64 reverved2;
+ u64 reverved3;
+ u64 reverved4;
+} __packed;
+
+#define ACPI_NR_ART_ELEMENTS 13
+/* for usrspace */
+union art_object {
+ struct {
+ char source_device[8]; /* ACPI single name */
+ char target_device[8]; /* ACPI single name */
+ u64 weight;
+ u64 ac0_max_level;
+ u64 ac1_max_level;
+ u64 ac2_max_level;
+ u64 ac3_max_level;
+ u64 ac4_max_level;
+ u64 ac5_max_level;
+ u64 ac6_max_level;
+ u64 ac7_max_level;
+ u64 ac8_max_level;
+ u64 ac9_max_level;
+ };
+ u64 __data[ACPI_NR_ART_ELEMENTS];
+};
+
+union trt_object {
+ struct {
+ char source_device[8]; /* ACPI single name */
+ char target_device[8]; /* ACPI single name */
+ u64 influence;
+ u64 sample_period;
+ u64 reserved[4];
+ };
+ u64 __data[8];
+};
+
+#ifdef __KERNEL__
+int acpi_thermal_rel_misc_device_add(acpi_handle handle);
+int acpi_thermal_rel_misc_device_remove(acpi_handle handle);
+int acpi_parse_art(acpi_handle handle, int *art_count, struct art **arts,
+ bool create_dev);
+int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trts,
+ bool create_dev);
+#endif
+
+#endif /* __ACPI_ACPI_THERMAL_H */
diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c
new file mode 100644
index 0000000..edc1cce
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3400_thermal.c
@@ -0,0 +1,271 @@
+/*
+ * INT3400 thermal driver
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Zhang Rui <rui.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/thermal.h>
+#include "acpi_thermal_rel.h"
+
+enum int3400_thermal_uuid {
+ INT3400_THERMAL_PASSIVE_1,
+ INT3400_THERMAL_PASSIVE_2,
+ INT3400_THERMAL_ACTIVE,
+ INT3400_THERMAL_CRITICAL,
+ INT3400_THERMAL_COOLING_MODE,
+ INT3400_THERMAL_MAXIMUM_UUID,
+};
+
+static u8 *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
+ "42A441D6-AE6A-462b-A84B-4A8CE79027D3",
+ "9E04115A-AE87-4D1C-9500-0F3E340BFE75",
+ "3A95C389-E4B8-4629-A526-C52C88626BAE",
+ "97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
+ "16CAF1B7-DD38-40ed-B1C1-1B8A1913D531",
+};
+
+struct int3400_thermal_priv {
+ struct acpi_device *adev;
+ struct thermal_zone_device *thermal;
+ int mode;
+ int art_count;
+ struct art *arts;
+ int trt_count;
+ struct trt *trts;
+ u8 uuid_bitmap;
+ int rel_misc_dev_res;
+};
+
+static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv)
+{
+ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL};
+ union acpi_object *obja, *objb;
+ int i, j;
+ int result = 0;
+ acpi_status status;
+
+ status = acpi_evaluate_object(priv->adev->handle, "IDSP", NULL, &buf);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ obja = (union acpi_object *)buf.pointer;
+ if (obja->type != ACPI_TYPE_PACKAGE) {
+ result = -EINVAL;
+ goto end;
+ }
+
+ for (i = 0; i < obja->package.count; i++) {
+ objb = &obja->package.elements[i];
+ if (objb->type != ACPI_TYPE_BUFFER) {
+ result = -EINVAL;
+ goto end;
+ }
+
+ /* UUID must be 16 bytes */
+ if (objb->buffer.length != 16) {
+ result = -EINVAL;
+ goto end;
+ }
+
+ for (j = 0; j < INT3400_THERMAL_MAXIMUM_UUID; j++) {
+ u8 uuid[16];
+
+ acpi_str_to_uuid(int3400_thermal_uuids[j], uuid);
+ if (!strncmp(uuid, objb->buffer.pointer, 16)) {
+ priv->uuid_bitmap |= (1 << j);
+ break;
+ }
+ }
+ }
+
+end:
+ kfree(buf.pointer);
+ return result;
+}
+
+static int int3400_thermal_run_osc(acpi_handle handle,
+ enum int3400_thermal_uuid uuid, bool enable)
+{
+ u32 ret, buf[2];
+ acpi_status status;
+ int result = 0;
+ struct acpi_osc_context context = {
+ .uuid_str = int3400_thermal_uuids[uuid],
+ .rev = 1,
+ .cap.length = 8,
+ };
+
+ buf[OSC_QUERY_DWORD] = 0;
+ buf[OSC_SUPPORT_DWORD] = enable;
+
+ context.cap.pointer = buf;
+
+ status = acpi_run_osc(handle, &context);
+ if (ACPI_SUCCESS(status)) {
+ ret = *((u32 *)(context.ret.pointer + 4));
+ if (ret != enable)
+ result = -EPERM;
+ } else
+ result = -EPERM;
+
+ kfree(context.ret.pointer);
+ return result;
+}
+
+static int int3400_thermal_get_temp(struct thermal_zone_device *thermal,
+ unsigned long *temp)
+{
+ *temp = 20 * 1000; /* faked temp sensor with 20C */
+ return 0;
+}
+
+static int int3400_thermal_get_mode(struct thermal_zone_device *thermal,
+ enum thermal_device_mode *mode)
+{
+ struct int3400_thermal_priv *priv = thermal->devdata;
+
+ if (!priv)
+ return -EINVAL;
+
+ *mode = priv->mode;
+
+ return 0;
+}
+
+static int int3400_thermal_set_mode(struct thermal_zone_device *thermal,
+ enum thermal_device_mode mode)
+{
+ struct int3400_thermal_priv *priv = thermal->devdata;
+ bool enable;
+ int result = 0;
+
+ if (!priv)
+ return -EINVAL;
+
+ if (mode == THERMAL_DEVICE_ENABLED)
+ enable = true;
+ else if (mode == THERMAL_DEVICE_DISABLED)
+ enable = false;
+ else
+ return -EINVAL;
+
+ if (enable != priv->mode) {
+ priv->mode = enable;
+ /* currently, only PASSIVE COOLING is supported */
+ result = int3400_thermal_run_osc(priv->adev->handle,
+ INT3400_THERMAL_PASSIVE_1, enable);
+ }
+ return result;
+}
+
+static struct thermal_zone_device_ops int3400_thermal_ops = {
+ .get_temp = int3400_thermal_get_temp,
+};
+
+static struct thermal_zone_params int3400_thermal_params = {
+ .governor_name = "user_space",
+ .no_hwmon = true,
+};
+
+static int int3400_thermal_probe(struct platform_device *pdev)
+{
+ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+ struct int3400_thermal_priv *priv;
+ int result;
+
+ if (!adev)
+ return -ENODEV;
+
+ priv = kzalloc(sizeof(struct int3400_thermal_priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->adev = adev;
+
+ result = int3400_thermal_get_uuids(priv);
+ if (result)
+ goto free_priv;
+
+ result = acpi_parse_art(priv->adev->handle, &priv->art_count,
+ &priv->arts, true);
+ if (result)
+ goto free_priv;
+
+
+ result = acpi_parse_trt(priv->adev->handle, &priv->trt_count,
+ &priv->trts, true);
+ if (result)
+ goto free_art;
+
+ platform_set_drvdata(pdev, priv);
+
+ if (priv->uuid_bitmap & 1 << INT3400_THERMAL_PASSIVE_1) {
+ int3400_thermal_ops.get_mode = int3400_thermal_get_mode;
+ int3400_thermal_ops.set_mode = int3400_thermal_set_mode;
+ }
+ priv->thermal = thermal_zone_device_register("INT3400 Thermal", 0, 0,
+ priv, &int3400_thermal_ops,
+ &int3400_thermal_params, 0, 0);
+ if (IS_ERR(priv->thermal)) {
+ result = PTR_ERR(priv->thermal);
+ goto free_trt;
+ }
+
+ priv->rel_misc_dev_res = acpi_thermal_rel_misc_device_add(
+ priv->adev->handle);
+
+ return 0;
+free_trt:
+ kfree(priv->trts);
+free_art:
+ kfree(priv->arts);
+free_priv:
+ kfree(priv);
+ return result;
+}
+
+static int int3400_thermal_remove(struct platform_device *pdev)
+{
+ struct int3400_thermal_priv *priv = platform_get_drvdata(pdev);
+
+ if (!priv->rel_misc_dev_res)
+ acpi_thermal_rel_misc_device_remove(priv->adev->handle);
+
+ thermal_zone_device_unregister(priv->thermal);
+ kfree(priv->trts);
+ kfree(priv->arts);
+ kfree(priv);
+ return 0;
+}
+
+static const struct acpi_device_id int3400_thermal_match[] = {
+ {"INT3400", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, int3400_thermal_match);
+
+static struct platform_driver int3400_thermal_driver = {
+ .probe = int3400_thermal_probe,
+ .remove = int3400_thermal_remove,
+ .driver = {
+ .name = "int3400 thermal",
+ .owner = THIS_MODULE,
+ .acpi_match_table = ACPI_PTR(int3400_thermal_match),
+ },
+};
+
+module_platform_driver(int3400_thermal_driver);
+
+MODULE_DESCRIPTION("INT3400 Thermal driver");
+MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/int340x_thermal/int3402_thermal.c b/drivers/thermal/int340x_thermal/int3402_thermal.c
new file mode 100644
index 0000000..a5d08c1
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3402_thermal.c
@@ -0,0 +1,242 @@
+/*
+ * INT3402 thermal driver for memory temperature reporting
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Aaron Lu <aaron.lu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/thermal.h>
+
+#define ACPI_ACTIVE_COOLING_MAX_NR 10
+
+struct active_trip {
+ unsigned long temp;
+ int id;
+ bool valid;
+};
+
+struct int3402_thermal_data {
+ unsigned long *aux_trips;
+ int aux_trip_nr;
+ unsigned long psv_temp;
+ int psv_trip_id;
+ unsigned long crt_temp;
+ int crt_trip_id;
+ unsigned long hot_temp;
+ int hot_trip_id;
+ struct active_trip act_trips[ACPI_ACTIVE_COOLING_MAX_NR];
+ acpi_handle *handle;
+};
+
+static int int3402_thermal_get_zone_temp(struct thermal_zone_device *zone,
+ unsigned long *temp)
+{
+ struct int3402_thermal_data *d = zone->devdata;
+ unsigned long long tmp;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(d->handle, "_TMP", NULL, &tmp);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ /* _TMP returns the temperature in tenths of degrees Kelvin */
+ *temp = DECI_KELVIN_TO_MILLICELSIUS(tmp);
+
+ return 0;
+}
+
+static int int3402_thermal_get_trip_temp(struct thermal_zone_device *zone,
+ int trip, unsigned long *temp)
+{
+ struct int3402_thermal_data *d = zone->devdata;
+ int i;
+
+ if (trip < d->aux_trip_nr)
+ *temp = d->aux_trips[trip];
+ else if (trip == d->crt_trip_id)
+ *temp = d->crt_temp;
+ else if (trip == d->psv_trip_id)
+ *temp = d->psv_temp;
+ else if (trip == d->hot_trip_id)
+ *temp = d->hot_temp;
+ else {
+ for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
+ if (d->act_trips[i].valid &&
+ d->act_trips[i].id == trip) {
+ *temp = d->act_trips[i].temp;
+ break;
+ }
+ }
+ if (i == ACPI_ACTIVE_COOLING_MAX_NR)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int int3402_thermal_get_trip_type(struct thermal_zone_device *zone,
+ int trip, enum thermal_trip_type *type)
+{
+ struct int3402_thermal_data *d = zone->devdata;
+ int i;
+
+ if (trip < d->aux_trip_nr)
+ *type = THERMAL_TRIP_PASSIVE;
+ else if (trip == d->crt_trip_id)
+ *type = THERMAL_TRIP_CRITICAL;
+ else if (trip == d->hot_trip_id)
+ *type = THERMAL_TRIP_HOT;
+ else if (trip == d->psv_trip_id)
+ *type = THERMAL_TRIP_PASSIVE;
+ else {
+ for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
+ if (d->act_trips[i].valid &&
+ d->act_trips[i].id == trip) {
+ *type = THERMAL_TRIP_ACTIVE;
+ break;
+ }
+ }
+ if (i == ACPI_ACTIVE_COOLING_MAX_NR)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int int3402_thermal_set_trip_temp(struct thermal_zone_device *zone, int trip,
+ unsigned long temp)
+{
+ struct int3402_thermal_data *d = zone->devdata;
+ acpi_status status;
+ char name[10];
+
+ snprintf(name, sizeof(name), "PAT%d", trip);
+ status = acpi_execute_simple_method(d->handle, name,
+ MILLICELSIUS_TO_DECI_KELVIN(temp));
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ d->aux_trips[trip] = temp;
+ return 0;
+}
+
+static struct thermal_zone_device_ops int3402_thermal_zone_ops = {
+ .get_temp = int3402_thermal_get_zone_temp,
+ .get_trip_temp = int3402_thermal_get_trip_temp,
+ .get_trip_type = int3402_thermal_get_trip_type,
+ .set_trip_temp = int3402_thermal_set_trip_temp,
+};
+
+static struct thermal_zone_params int3402_thermal_params = {
+ .governor_name = "user_space",
+ .no_hwmon = true,
+};
+
+static int int3402_thermal_get_temp(acpi_handle handle, char *name,
+ unsigned long *temp)
+{
+ unsigned long long r;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(handle, name, NULL, &r);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ *temp = DECI_KELVIN_TO_MILLICELSIUS(r);
+ return 0;
+}
+
+static int int3402_thermal_probe(struct platform_device *pdev)
+{
+ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+ struct int3402_thermal_data *d;
+ struct thermal_zone_device *zone;
+ acpi_status status;
+ unsigned long long trip_cnt;
+ int trip_mask = 0, i;
+
+ if (!acpi_has_method(adev->handle, "_TMP"))
+ return -ENODEV;
+
+ d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt);
+ if (ACPI_FAILURE(status))
+ trip_cnt = 0;
+ else {
+ d->aux_trips = devm_kzalloc(&pdev->dev,
+ sizeof(*d->aux_trips) * trip_cnt, GFP_KERNEL);
+ if (!d->aux_trips)
+ return -ENOMEM;
+ trip_mask = trip_cnt - 1;
+ d->handle = adev->handle;
+ d->aux_trip_nr = trip_cnt;
+ }
+
+ d->crt_trip_id = -1;
+ if (!int3402_thermal_get_temp(adev->handle, "_CRT", &d->crt_temp))
+ d->crt_trip_id = trip_cnt++;
+ d->hot_trip_id = -1;
+ if (!int3402_thermal_get_temp(adev->handle, "_HOT", &d->hot_temp))
+ d->hot_trip_id = trip_cnt++;
+ d->psv_trip_id = -1;
+ if (!int3402_thermal_get_temp(adev->handle, "_PSV", &d->psv_temp))
+ d->psv_trip_id = trip_cnt++;
+ for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
+ char name[5] = { '_', 'A', 'C', '0' + i, '\0' };
+ if (int3402_thermal_get_temp(adev->handle, name,
+ &d->act_trips[i].temp))
+ break;
+ d->act_trips[i].id = trip_cnt++;
+ d->act_trips[i].valid = true;
+ }
+
+ zone = thermal_zone_device_register(acpi_device_bid(adev), trip_cnt,
+ trip_mask, d,
+ &int3402_thermal_zone_ops,
+ &int3402_thermal_params,
+ 0, 0);
+ if (IS_ERR(zone))
+ return PTR_ERR(zone);
+ platform_set_drvdata(pdev, zone);
+
+ return 0;
+}
+
+static int int3402_thermal_remove(struct platform_device *pdev)
+{
+ struct thermal_zone_device *zone = platform_get_drvdata(pdev);
+
+ thermal_zone_device_unregister(zone);
+ return 0;
+}
+
+static const struct acpi_device_id int3402_thermal_match[] = {
+ {"INT3402", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, int3402_thermal_match);
+
+static struct platform_driver int3402_thermal_driver = {
+ .probe = int3402_thermal_probe,
+ .remove = int3402_thermal_remove,
+ .driver = {
+ .name = "int3402 thermal",
+ .owner = THIS_MODULE,
+ .acpi_match_table = int3402_thermal_match,
+ },
+};
+
+module_platform_driver(int3402_thermal_driver);
+
+MODULE_DESCRIPTION("INT3402 Thermal driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
new file mode 100644
index 0000000..d20dba9
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c
@@ -0,0 +1,477 @@
+/*
+ * ACPI INT3403 thermal driver
+ * Copyright (c) 2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/acpi.h>
+#include <linux/thermal.h>
+#include <linux/platform_device.h>
+
+#define INT3403_TYPE_SENSOR 0x03
+#define INT3403_TYPE_CHARGER 0x0B
+#define INT3403_TYPE_BATTERY 0x0C
+#define INT3403_PERF_CHANGED_EVENT 0x80
+#define INT3403_THERMAL_EVENT 0x90
+
+#define DECI_KELVIN_TO_MILLI_CELSIUS(t, off) (((t) - (off)) * 100)
+#define KELVIN_OFFSET 2732
+#define MILLI_CELSIUS_TO_DECI_KELVIN(t, off) (((t) / 100) + (off))
+
+struct int3403_sensor {
+ struct thermal_zone_device *tzone;
+ unsigned long *thresholds;
+ unsigned long crit_temp;
+ int crit_trip_id;
+ unsigned long psv_temp;
+ int psv_trip_id;
+
+};
+
+struct int3403_performance_state {
+ u64 performance;
+ u64 power;
+ u64 latency;
+ u64 linear;
+ u64 control;
+ u64 raw_performace;
+ char *raw_unit;
+ int reserved;
+};
+
+struct int3403_cdev {
+ struct thermal_cooling_device *cdev;
+ unsigned long max_state;
+};
+
+struct int3403_priv {
+ struct platform_device *pdev;
+ struct acpi_device *adev;
+ unsigned long long type;
+ void *priv;
+};
+
+static int sys_get_curr_temp(struct thermal_zone_device *tzone,
+ unsigned long *temp)
+{
+ struct int3403_priv *priv = tzone->devdata;
+ struct acpi_device *device = priv->adev;
+ unsigned long long tmp;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(device->handle, "_TMP", NULL, &tmp);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ *temp = DECI_KELVIN_TO_MILLI_CELSIUS(tmp, KELVIN_OFFSET);
+
+ return 0;
+}
+
+static int sys_get_trip_hyst(struct thermal_zone_device *tzone,
+ int trip, unsigned long *temp)
+{
+ struct int3403_priv *priv = tzone->devdata;
+ struct acpi_device *device = priv->adev;
+ unsigned long long hyst;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(device->handle, "GTSH", NULL, &hyst);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ *temp = DECI_KELVIN_TO_MILLI_CELSIUS(hyst, KELVIN_OFFSET);
+
+ return 0;
+}
+
+static int sys_get_trip_temp(struct thermal_zone_device *tzone,
+ int trip, unsigned long *temp)
+{
+ struct int3403_priv *priv = tzone->devdata;
+ struct int3403_sensor *obj = priv->priv;
+
+ if (priv->type != INT3403_TYPE_SENSOR || !obj)
+ return -EINVAL;
+
+ if (trip == obj->crit_trip_id)
+ *temp = obj->crit_temp;
+ else if (trip == obj->psv_trip_id)
+ *temp = obj->psv_temp;
+ else {
+ /*
+ * get_trip_temp is a mandatory callback but
+ * PATx method doesn't return any value, so return
+ * cached value, which was last set from user space
+ */
+ *temp = obj->thresholds[trip];
+ }
+
+ return 0;
+}
+
+static int sys_get_trip_type(struct thermal_zone_device *thermal,
+ int trip, enum thermal_trip_type *type)
+{
+ struct int3403_priv *priv = thermal->devdata;
+ struct int3403_sensor *obj = priv->priv;
+
+ /* Mandatory callback, may not mean much here */
+ if (trip == obj->crit_trip_id)
+ *type = THERMAL_TRIP_CRITICAL;
+ else
+ *type = THERMAL_TRIP_PASSIVE;
+
+ return 0;
+}
+
+int sys_set_trip_temp(struct thermal_zone_device *tzone, int trip,
+ unsigned long temp)
+{
+ struct int3403_priv *priv = tzone->devdata;
+ struct acpi_device *device = priv->adev;
+ struct int3403_sensor *obj = priv->priv;
+ acpi_status status;
+ char name[10];
+ int ret = 0;
+
+ snprintf(name, sizeof(name), "PAT%d", trip);
+ if (acpi_has_method(device->handle, name)) {
+ status = acpi_execute_simple_method(device->handle, name,
+ MILLI_CELSIUS_TO_DECI_KELVIN(temp,
+ KELVIN_OFFSET));
+ if (ACPI_FAILURE(status))
+ ret = -EIO;
+ else
+ obj->thresholds[trip] = temp;
+ } else {
+ ret = -EIO;
+ dev_err(&device->dev, "sys_set_trip_temp: method not found\n");
+ }
+
+ return ret;
+}
+
+static struct thermal_zone_device_ops tzone_ops = {
+ .get_temp = sys_get_curr_temp,
+ .get_trip_temp = sys_get_trip_temp,
+ .get_trip_type = sys_get_trip_type,
+ .set_trip_temp = sys_set_trip_temp,
+ .get_trip_hyst = sys_get_trip_hyst,
+};
+
+static struct thermal_zone_params int3403_thermal_params = {
+ .governor_name = "user_space",
+ .no_hwmon = true,
+};
+
+static void int3403_notify(acpi_handle handle,
+ u32 event, void *data)
+{
+ struct int3403_priv *priv = data;
+ struct int3403_sensor *obj;
+
+ if (!priv)
+ return;
+
+ obj = priv->priv;
+ if (priv->type != INT3403_TYPE_SENSOR || !obj)
+ return;
+
+ switch (event) {
+ case INT3403_PERF_CHANGED_EVENT:
+ break;
+ case INT3403_THERMAL_EVENT:
+ thermal_zone_device_update(obj->tzone);
+ break;
+ default:
+ dev_err(&priv->pdev->dev, "Unsupported event [0x%x]\n", event);
+ break;
+ }
+}
+
+static int sys_get_trip_crt(struct acpi_device *device, unsigned long *temp)
+{
+ unsigned long long crt;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(device->handle, "_CRT", NULL, &crt);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ *temp = DECI_KELVIN_TO_MILLI_CELSIUS(crt, KELVIN_OFFSET);
+
+ return 0;
+}
+
+static int sys_get_trip_psv(struct acpi_device *device, unsigned long *temp)
+{
+ unsigned long long psv;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(device->handle, "_PSV", NULL, &psv);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ *temp = DECI_KELVIN_TO_MILLI_CELSIUS(psv, KELVIN_OFFSET);
+
+ return 0;
+}
+
+static int int3403_sensor_add(struct int3403_priv *priv)
+{
+ int result = 0;
+ acpi_status status;
+ struct int3403_sensor *obj;
+ unsigned long long trip_cnt;
+ int trip_mask = 0;
+
+ obj = devm_kzalloc(&priv->pdev->dev, sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ priv->priv = obj;
+
+ status = acpi_evaluate_integer(priv->adev->handle, "PATC", NULL,
+ &trip_cnt);
+ if (ACPI_FAILURE(status))
+ trip_cnt = 0;
+
+ if (trip_cnt) {
+ /* We have to cache, thresholds can't be readback */
+ obj->thresholds = devm_kzalloc(&priv->pdev->dev,
+ sizeof(*obj->thresholds) * trip_cnt,
+ GFP_KERNEL);
+ if (!obj->thresholds) {
+ result = -ENOMEM;
+ goto err_free_obj;
+ }
+ trip_mask = BIT(trip_cnt) - 1;
+ }
+
+ obj->psv_trip_id = -1;
+ if (!sys_get_trip_psv(priv->adev, &obj->psv_temp))
+ obj->psv_trip_id = trip_cnt++;
+
+ obj->crit_trip_id = -1;
+ if (!sys_get_trip_crt(priv->adev, &obj->crit_temp))
+ obj->crit_trip_id = trip_cnt++;
+
+ obj->tzone = thermal_zone_device_register(acpi_device_bid(priv->adev),
+ trip_cnt, trip_mask, priv, &tzone_ops,
+ &int3403_thermal_params, 0, 0);
+ if (IS_ERR(obj->tzone)) {
+ result = PTR_ERR(obj->tzone);
+ obj->tzone = NULL;
+ goto err_free_obj;
+ }
+
+ result = acpi_install_notify_handler(priv->adev->handle,
+ ACPI_DEVICE_NOTIFY, int3403_notify,
+ (void *)priv);
+ if (result)
+ goto err_free_obj;
+
+ return 0;
+
+ err_free_obj:
+ if (obj->tzone)
+ thermal_zone_device_unregister(obj->tzone);
+ return result;
+}
+
+static int int3403_sensor_remove(struct int3403_priv *priv)
+{
+ struct int3403_sensor *obj = priv->priv;
+
+ thermal_zone_device_unregister(obj->tzone);
+ return 0;
+}
+
+/* INT3403 Cooling devices */
+static int int3403_get_max_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ struct int3403_priv *priv = cdev->devdata;
+ struct int3403_cdev *obj = priv->priv;
+
+ *state = obj->max_state;
+ return 0;
+}
+
+static int int3403_get_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ struct int3403_priv *priv = cdev->devdata;
+ unsigned long long level;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(priv->adev->handle, "PPPC", NULL, &level);
+ if (ACPI_SUCCESS(status)) {
+ *state = level;
+ return 0;
+ } else
+ return -EINVAL;
+}
+
+static int
+int3403_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
+{
+ struct int3403_priv *priv = cdev->devdata;
+ acpi_status status;
+
+ status = acpi_execute_simple_method(priv->adev->handle, "SPPC", state);
+ if (ACPI_SUCCESS(status))
+ return 0;
+ else
+ return -EINVAL;
+}
+
+static const struct thermal_cooling_device_ops int3403_cooling_ops = {
+ .get_max_state = int3403_get_max_state,
+ .get_cur_state = int3403_get_cur_state,
+ .set_cur_state = int3403_set_cur_state,
+};
+
+static int int3403_cdev_add(struct int3403_priv *priv)
+{
+ int result = 0;
+ acpi_status status;
+ struct int3403_cdev *obj;
+ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *p;
+
+ obj = devm_kzalloc(&priv->pdev->dev, sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ status = acpi_evaluate_object(priv->adev->handle, "PPSS", NULL, &buf);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ p = buf.pointer;
+ if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
+ printk(KERN_WARNING "Invalid PPSS data\n");
+ return -EFAULT;
+ }
+
+ obj->max_state = p->package.count - 1;
+ obj->cdev =
+ thermal_cooling_device_register(acpi_device_bid(priv->adev),
+ priv, &int3403_cooling_ops);
+ if (IS_ERR(obj->cdev))
+ result = PTR_ERR(obj->cdev);
+
+ priv->priv = obj;
+
+ /* TODO: add ACPI notification support */
+
+ return result;
+}
+
+static int int3403_cdev_remove(struct int3403_priv *priv)
+{
+ struct int3403_cdev *obj = priv->priv;
+
+ thermal_cooling_device_unregister(obj->cdev);
+ return 0;
+}
+
+static int int3403_add(struct platform_device *pdev)
+{
+ struct int3403_priv *priv;
+ int result = 0;
+ acpi_status status;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(struct int3403_priv),
+ GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->pdev = pdev;
+ priv->adev = ACPI_COMPANION(&(pdev->dev));
+ if (!priv->adev) {
+ result = -EINVAL;
+ goto err;
+ }
+
+ status = acpi_evaluate_integer(priv->adev->handle, "PTYP",
+ NULL, &priv->type);
+ if (ACPI_FAILURE(status)) {
+ result = -EINVAL;
+ goto err;
+ }
+
+ platform_set_drvdata(pdev, priv);
+ switch (priv->type) {
+ case INT3403_TYPE_SENSOR:
+ result = int3403_sensor_add(priv);
+ break;
+ case INT3403_TYPE_CHARGER:
+ case INT3403_TYPE_BATTERY:
+ result = int3403_cdev_add(priv);
+ break;
+ default:
+ result = -EINVAL;
+ }
+
+ if (result)
+ goto err;
+ return result;
+
+err:
+ return result;
+}
+
+static int int3403_remove(struct platform_device *pdev)
+{
+ struct int3403_priv *priv = platform_get_drvdata(pdev);
+
+ switch (priv->type) {
+ case INT3403_TYPE_SENSOR:
+ int3403_sensor_remove(priv);
+ break;
+ case INT3403_TYPE_CHARGER:
+ case INT3403_TYPE_BATTERY:
+ int3403_cdev_remove(priv);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static const struct acpi_device_id int3403_device_ids[] = {
+ {"INT3403", 0},
+ {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, int3403_device_ids);
+
+static struct platform_driver int3403_driver = {
+ .probe = int3403_add,
+ .remove = int3403_remove,
+ .driver = {
+ .name = "int3403 thermal",
+ .owner = THIS_MODULE,
+ .acpi_match_table = int3403_device_ids,
+ },
+};
+
+module_platform_driver(int3403_driver);
+
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ACPI INT3403 thermal driver");
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 4b2b999..f8eb625 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -401,6 +401,10 @@
struct of_phandle_args sensor_specs;
int ret, id;
+ /* Check whether child is enabled or not */
+ if (!of_device_is_available(child))
+ continue;
+
/* For now, thermal framework supports only 1 sensor per zone */
ret = of_parse_phandle_with_args(child, "thermal-sensors",
"#thermal-sensor-cells",
@@ -771,6 +775,10 @@
struct thermal_zone_device *zone;
struct thermal_zone_params *tzp;
+ /* Check whether child is enabled or not */
+ if (!of_device_is_available(child))
+ continue;
+
tz = thermal_of_build_thermal_zone(child);
if (IS_ERR(tz)) {
pr_err("failed to build thermal zone %s: %ld\n",
@@ -838,6 +846,10 @@
for_each_child_of_node(np, child) {
struct thermal_zone_device *zone;
+ /* Check whether child is enabled or not */
+ if (!of_device_is_available(child))
+ continue;
+
zone = thermal_zone_get_zone_by_name(child->name);
if (IS_ERR(zone))
continue;
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index f251521..fdd1f52 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -23,6 +23,7 @@
*/
#include <linux/thermal.h>
+#include <trace/events/thermal.h>
#include "thermal_core.h"
@@ -76,7 +77,7 @@
next_target = instance->upper;
break;
case THERMAL_TREND_DROPPING:
- if (cur_state == instance->lower) {
+ if (cur_state <= instance->lower) {
if (!throttle)
next_target = THERMAL_NO_TARGET;
} else {
@@ -129,8 +130,10 @@
trend = get_tz_trend(tz, trip);
- if (tz->temperature >= trip_temp)
+ if (tz->temperature >= trip_temp) {
throttle = true;
+ trace_thermal_zone_trip(tz, trip, trip_type);
+ }
dev_dbg(&tz->device, "Trip%d[type=%d,temp=%ld]:trend=%d,throttle=%d\n",
trip, trip_type, trip_temp, trend, throttle);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 1e23f4f..9bf10aa 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -38,6 +38,9 @@
#include <net/netlink.h>
#include <net/genetlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/thermal.h>
+
#include "thermal_core.h"
#include "thermal_hwmon.h"
@@ -368,6 +371,8 @@
if (tz->temperature < trip_temp)
return;
+ trace_thermal_zone_trip(tz, trip, trip_type);
+
if (tz->ops->notify)
tz->ops->notify(tz, trip, trip_type);
@@ -463,6 +468,7 @@
tz->temperature = temp;
mutex_unlock(&tz->lock);
+ trace_thermal_temperature(tz);
dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
tz->last_temperature, tz->temperature);
}
@@ -1287,6 +1293,7 @@
mutex_unlock(&cdev->lock);
cdev->ops->set_cur_state(cdev, target);
cdev->updated = true;
+ trace_cdev_update(cdev, target);
dev_dbg(&cdev->device, "set to state %lu\n", target);
}
EXPORT_SYMBOL(thermal_cdev_update);
@@ -1790,6 +1797,10 @@
if (result)
return result;
+ result = thermal_gov_bang_bang_register();
+ if (result)
+ return result;
+
return thermal_gov_user_space_register();
}
@@ -1797,6 +1808,7 @@
{
thermal_gov_step_wise_unregister();
thermal_gov_fair_share_unregister();
+ thermal_gov_bang_bang_unregister();
thermal_gov_user_space_unregister();
}
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 3db339f..d15d243 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -69,6 +69,14 @@
static inline void thermal_gov_fair_share_unregister(void) {}
#endif /* CONFIG_THERMAL_GOV_FAIR_SHARE */
+#ifdef CONFIG_THERMAL_GOV_BANG_BANG
+int thermal_gov_bang_bang_register(void);
+void thermal_gov_bang_bang_unregister(void);
+#else
+static inline int thermal_gov_bang_bang_register(void) { return 0; }
+static inline void thermal_gov_bang_bang_unregister(void) {}
+#endif /* CONFIG_THERMAL_GOV_BANG_BANG */
+
#ifdef CONFIG_THERMAL_GOV_USER_SPACE
int thermal_gov_user_space_register(void);
void thermal_gov_user_space_unregister(void);
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index a673e5b..60fa627 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -28,18 +28,6 @@
#define UIO_MAX_DEVICES (1U << MINORBITS)
-struct uio_device {
- struct module *owner;
- struct device *dev;
- int minor;
- atomic_t event;
- struct fasync_struct *async_queue;
- wait_queue_head_t wait;
- struct uio_info *info;
- struct kobject *map_dir;
- struct kobject *portio_dir;
-};
-
static int uio_major;
static struct cdev *uio_cdev;
static DEFINE_IDR(uio_idr);
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index e3d5bf0..d0107d4 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -87,6 +87,15 @@
This driver can also be built as a module. If so, the module
will be called da9055_wdt.
+config DA9063_WATCHDOG
+ tristate "Dialog DA9063 Watchdog"
+ depends on MFD_DA9063
+ select WATCHDOG_CORE
+ help
+ Support for the watchdog in the DA9063 PMIC.
+
+ This driver can be built as a module. The module name is da9063_wdt.
+
config GPIO_WATCHDOG
tristate "Watchdog device controlled through GPIO-line"
depends on OF_GPIO
@@ -123,6 +132,7 @@
config XILINX_WATCHDOG
tristate "Xilinx Watchdog timer"
+ depends on HAS_IOMEM
select WATCHDOG_CORE
help
Watchdog driver for the xps_timebase_wdt ip core.
@@ -157,6 +167,14 @@
Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will
reboot your system when the timeout is reached.
+config CADENCE_WATCHDOG
+ tristate "Cadence Watchdog Timer"
+ depends on ARM
+ select WATCHDOG_CORE
+ help
+ Say Y here if you want to include support for the watchdog
+ timer in the Xilinx Zynq.
+
config 21285_WATCHDOG
tristate "DC21285 watchdog"
depends on FOOTBRIDGE
@@ -319,6 +337,17 @@
To compile this driver as a module, choose M here: the
module will be called orion_wdt.
+config RN5T618_WATCHDOG
+ tristate "Ricoh RN5T618 watchdog"
+ depends on MFD_RN5T618
+ select WATCHDOG_CORE
+ help
+ If you say yes here you get support for watchdog on the Ricoh
+ RN5T618 PMIC.
+
+ This driver can also be built as a module. If so, the module
+ will be called rn5t618_wdt.
+
config SUNXI_WATCHDOG
tristate "Allwinner SoCs watchdog support"
depends on ARCH_SUNXI
@@ -444,7 +473,7 @@
config TEGRA_WATCHDOG
tristate "Tegra watchdog"
- depends on ARCH_TEGRA || COMPILE_TEST
+ depends on (ARCH_TEGRA || COMPILE_TEST) && HAS_IOMEM
select WATCHDOG_CORE
help
Say Y here to include support for the watchdog timer
@@ -453,6 +482,29 @@
To compile this driver as a module, choose M here: the
module will be called tegra_wdt.
+config QCOM_WDT
+ tristate "QCOM watchdog"
+ depends on HAS_IOMEM
+ depends on ARCH_QCOM
+ select WATCHDOG_CORE
+ help
+ Say Y here to include Watchdog timer support for the watchdog found
+ on QCOM chipsets. Currently supported targets are the MSM8960,
+ APQ8064, and IPQ8064.
+
+ To compile this driver as a module, choose M here: the
+ module will be called qcom_wdt.
+
+config MESON_WATCHDOG
+ tristate "Amlogic Meson SoCs watchdog support"
+ depends on ARCH_MESON
+ select WATCHDOG_CORE
+ help
+ Say Y here to include support for the watchdog timer
+ in Amlogic Meson SoCs.
+ To compile this driver as a module, choose M here: the
+ module will be called meson_wdt.
+
# AVR32 Architecture
config AT32AP700X_WDT
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index de17014..c569ec8 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -32,6 +32,7 @@
obj-$(CONFIG_ARM_SP805_WATCHDOG) += sp805_wdt.o
obj-$(CONFIG_AT91RM9200_WATCHDOG) += at91rm9200_wdt.o
obj-$(CONFIG_AT91SAM9X_WATCHDOG) += at91sam9_wdt.o
+obj-$(CONFIG_CADENCE_WATCHDOG) += cadence_wdt.o
obj-$(CONFIG_OMAP_WATCHDOG) += omap_wdt.o
obj-$(CONFIG_TWL4030_WATCHDOG) += twl4030_wdt.o
obj-$(CONFIG_21285_WATCHDOG) += wdt285.o
@@ -47,6 +48,7 @@
obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o
obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o
obj-$(CONFIG_SUNXI_WATCHDOG) += sunxi_wdt.o
+obj-$(CONFIG_RN5T618_WATCHDOG) += rn5t618_wdt.o
obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o
obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o
obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o
@@ -57,8 +59,10 @@
obj-$(CONFIG_BCM2835_WDT) += bcm2835_wdt.o
obj-$(CONFIG_MOXART_WDT) += moxart_wdt.o
obj-$(CONFIG_SIRFSOC_WATCHDOG) += sirfsoc_wdt.o
+obj-$(CONFIG_QCOM_WDT) += qcom-wdt.o
obj-$(CONFIG_BCM_KONA_WDT) += bcm_kona_wdt.o
obj-$(CONFIG_TEGRA_WATCHDOG) += tegra_wdt.o
+obj-$(CONFIG_MESON_WATCHDOG) += meson_wdt.o
# AVR32 Architecture
obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
@@ -173,6 +177,7 @@
# Architecture Independent
obj-$(CONFIG_DA9052_WATCHDOG) += da9052_wdt.o
obj-$(CONFIG_DA9055_WATCHDOG) += da9055_wdt.o
+obj-$(CONFIG_DA9063_WATCHDOG) += da9063_wdt.o
obj-$(CONFIG_GPIO_WATCHDOG) += gpio_wdt.o
obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o
obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o
diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
index 08a7853..e96b09b 100644
--- a/drivers/watchdog/booke_wdt.c
+++ b/drivers/watchdog/booke_wdt.c
@@ -30,8 +30,6 @@
* occur, and the final time the board will reset.
*/
-u32 booke_wdt_enabled;
-u32 booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT;
#ifdef CONFIG_PPC_FSL_BOOK3E
#define WDTP(x) ((((x)&0x3)<<30)|(((x)&0x3c)<<15))
@@ -41,27 +39,10 @@
#define WDTP_MASK (TCR_WP_MASK)
#endif
-/* Checks wdt=x and wdt_period=xx command-line option */
-notrace int __init early_parse_wdt(char *p)
-{
- if (p && strncmp(p, "0", 1) != 0)
- booke_wdt_enabled = 1;
-
- return 0;
-}
-early_param("wdt", early_parse_wdt);
-
-int __init early_parse_wdt_period(char *p)
-{
- unsigned long ret;
- if (p) {
- if (!kstrtol(p, 0, &ret))
- booke_wdt_period = ret;
- }
-
- return 0;
-}
-early_param("wdt_period", early_parse_wdt_period);
+static bool booke_wdt_enabled;
+module_param(booke_wdt_enabled, bool, 0);
+static int booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT;
+module_param(booke_wdt_period, int, 0);
#ifdef CONFIG_PPC_FSL_BOOK3E
@@ -259,5 +240,6 @@
module_init(booke_wdt_init);
module_exit(booke_wdt_exit);
+MODULE_ALIAS("booke_wdt");
MODULE_DESCRIPTION("PowerPC Book-E watchdog driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c
new file mode 100644
index 0000000..5927c0a
--- /dev/null
+++ b/drivers/watchdog/cadence_wdt.c
@@ -0,0 +1,516 @@
+/*
+ * Cadence WDT driver - Used by Xilinx Zynq
+ *
+ * Copyright (C) 2010 - 2014 Xilinx, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/watchdog.h>
+
+#define CDNS_WDT_DEFAULT_TIMEOUT 10
+/* Supports 1 - 516 sec */
+#define CDNS_WDT_MIN_TIMEOUT 1
+#define CDNS_WDT_MAX_TIMEOUT 516
+
+/* Restart key */
+#define CDNS_WDT_RESTART_KEY 0x00001999
+
+/* Counter register access key */
+#define CDNS_WDT_REGISTER_ACCESS_KEY 0x00920000
+
+/* Counter value divisor */
+#define CDNS_WDT_COUNTER_VALUE_DIVISOR 0x1000
+
+/* Clock prescaler value and selection */
+#define CDNS_WDT_PRESCALE_64 64
+#define CDNS_WDT_PRESCALE_512 512
+#define CDNS_WDT_PRESCALE_4096 4096
+#define CDNS_WDT_PRESCALE_SELECT_64 1
+#define CDNS_WDT_PRESCALE_SELECT_512 2
+#define CDNS_WDT_PRESCALE_SELECT_4096 3
+
+/* Input clock frequency */
+#define CDNS_WDT_CLK_10MHZ 10000000
+#define CDNS_WDT_CLK_75MHZ 75000000
+
+/* Counter maximum value */
+#define CDNS_WDT_COUNTER_MAX 0xFFF
+
+static int wdt_timeout = CDNS_WDT_DEFAULT_TIMEOUT;
+static int nowayout = WATCHDOG_NOWAYOUT;
+
+module_param(wdt_timeout, int, 0);
+MODULE_PARM_DESC(wdt_timeout,
+ "Watchdog time in seconds. (default="
+ __MODULE_STRING(CDNS_WDT_DEFAULT_TIMEOUT) ")");
+
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout,
+ "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+/**
+ * struct cdns_wdt - Watchdog device structure
+ * @regs: baseaddress of device
+ * @rst: reset flag
+ * @clk: struct clk * of a clock source
+ * @prescaler: for saving prescaler value
+ * @ctrl_clksel: counter clock prescaler selection
+ * @io_lock: spinlock for IO register access
+ * @cdns_wdt_device: watchdog device structure
+ * @cdns_wdt_notifier: notifier structure
+ *
+ * Structure containing parameters specific to cadence watchdog.
+ */
+struct cdns_wdt {
+ void __iomem *regs;
+ bool rst;
+ struct clk *clk;
+ u32 prescaler;
+ u32 ctrl_clksel;
+ spinlock_t io_lock;
+ struct watchdog_device cdns_wdt_device;
+ struct notifier_block cdns_wdt_notifier;
+};
+
+/* Write access to Registers */
+static inline void cdns_wdt_writereg(struct cdns_wdt *wdt, u32 offset, u32 val)
+{
+ writel_relaxed(val, wdt->regs + offset);
+}
+
+/*************************Register Map**************************************/
+
+/* Register Offsets for the WDT */
+#define CDNS_WDT_ZMR_OFFSET 0x0 /* Zero Mode Register */
+#define CDNS_WDT_CCR_OFFSET 0x4 /* Counter Control Register */
+#define CDNS_WDT_RESTART_OFFSET 0x8 /* Restart Register */
+#define CDNS_WDT_SR_OFFSET 0xC /* Status Register */
+
+/*
+ * Zero Mode Register - This register controls how the time out is indicated
+ * and also contains the access code to allow writes to the register (0xABC).
+ */
+#define CDNS_WDT_ZMR_WDEN_MASK 0x00000001 /* Enable the WDT */
+#define CDNS_WDT_ZMR_RSTEN_MASK 0x00000002 /* Enable the reset output */
+#define CDNS_WDT_ZMR_IRQEN_MASK 0x00000004 /* Enable IRQ output */
+#define CDNS_WDT_ZMR_RSTLEN_16 0x00000030 /* Reset pulse of 16 pclk cycles */
+#define CDNS_WDT_ZMR_ZKEY_VAL 0x00ABC000 /* Access key, 0xABC << 12 */
+/*
+ * Counter Control register - This register controls how fast the timer runs
+ * and the reset value and also contains the access code to allow writes to
+ * the register.
+ */
+#define CDNS_WDT_CCR_CRV_MASK 0x00003FFC /* Counter reset value */
+
+/**
+ * cdns_wdt_stop - Stop the watchdog.
+ *
+ * @wdd: watchdog device
+ *
+ * Read the contents of the ZMR register, clear the WDEN bit
+ * in the register and set the access key for successful write.
+ *
+ * Return: always 0
+ */
+static int cdns_wdt_stop(struct watchdog_device *wdd)
+{
+ struct cdns_wdt *wdt = watchdog_get_drvdata(wdd);
+
+ spin_lock(&wdt->io_lock);
+ cdns_wdt_writereg(wdt, CDNS_WDT_ZMR_OFFSET,
+ CDNS_WDT_ZMR_ZKEY_VAL & (~CDNS_WDT_ZMR_WDEN_MASK));
+ spin_unlock(&wdt->io_lock);
+
+ return 0;
+}
+
+/**
+ * cdns_wdt_reload - Reload the watchdog timer (i.e. pat the watchdog).
+ *
+ * @wdd: watchdog device
+ *
+ * Write the restart key value (0x00001999) to the restart register.
+ *
+ * Return: always 0
+ */
+static int cdns_wdt_reload(struct watchdog_device *wdd)
+{
+ struct cdns_wdt *wdt = watchdog_get_drvdata(wdd);
+
+ spin_lock(&wdt->io_lock);
+ cdns_wdt_writereg(wdt, CDNS_WDT_RESTART_OFFSET,
+ CDNS_WDT_RESTART_KEY);
+ spin_unlock(&wdt->io_lock);
+
+ return 0;
+}
+
+/**
+ * cdns_wdt_start - Enable and start the watchdog.
+ *
+ * @wdd: watchdog device
+ *
+ * The counter value is calculated according to the formula:
+ * calculated count = (timeout * clock) / prescaler + 1.
+ * The calculated count is divided by 0x1000 to obtain the field value
+ * to write to counter control register.
+ * Clears the contents of prescaler and counter reset value. Sets the
+ * prescaler to 4096 and the calculated count and access key
+ * to write to CCR Register.
+ * Sets the WDT (WDEN bit) and either the Reset signal(RSTEN bit)
+ * or Interrupt signal(IRQEN) with a specified cycles and the access
+ * key to write to ZMR Register.
+ *
+ * Return: always 0
+ */
+static int cdns_wdt_start(struct watchdog_device *wdd)
+{
+ struct cdns_wdt *wdt = watchdog_get_drvdata(wdd);
+ unsigned int data = 0;
+ unsigned short count;
+ unsigned long clock_f = clk_get_rate(wdt->clk);
+
+ /*
+ * Counter value divisor to obtain the value of
+ * counter reset to be written to control register.
+ */
+ count = (wdd->timeout * (clock_f / wdt->prescaler)) /
+ CDNS_WDT_COUNTER_VALUE_DIVISOR + 1;
+
+ if (count > CDNS_WDT_COUNTER_MAX)
+ count = CDNS_WDT_COUNTER_MAX;
+
+ spin_lock(&wdt->io_lock);
+ cdns_wdt_writereg(wdt, CDNS_WDT_ZMR_OFFSET,
+ CDNS_WDT_ZMR_ZKEY_VAL);
+
+ count = (count << 2) & CDNS_WDT_CCR_CRV_MASK;
+
+ /* Write counter access key first to be able write to register */
+ data = count | CDNS_WDT_REGISTER_ACCESS_KEY | wdt->ctrl_clksel;
+ cdns_wdt_writereg(wdt, CDNS_WDT_CCR_OFFSET, data);
+ data = CDNS_WDT_ZMR_WDEN_MASK | CDNS_WDT_ZMR_RSTLEN_16 |
+ CDNS_WDT_ZMR_ZKEY_VAL;
+
+ /* Reset on timeout if specified in device tree. */
+ if (wdt->rst) {
+ data |= CDNS_WDT_ZMR_RSTEN_MASK;
+ data &= ~CDNS_WDT_ZMR_IRQEN_MASK;
+ } else {
+ data &= ~CDNS_WDT_ZMR_RSTEN_MASK;
+ data |= CDNS_WDT_ZMR_IRQEN_MASK;
+ }
+ cdns_wdt_writereg(wdt, CDNS_WDT_ZMR_OFFSET, data);
+ cdns_wdt_writereg(wdt, CDNS_WDT_RESTART_OFFSET,
+ CDNS_WDT_RESTART_KEY);
+ spin_unlock(&wdt->io_lock);
+
+ return 0;
+}
+
+/**
+ * cdns_wdt_settimeout - Set a new timeout value for the watchdog device.
+ *
+ * @wdd: watchdog device
+ * @new_time: new timeout value that needs to be set
+ * Return: 0 on success
+ *
+ * Update the watchdog_device timeout with new value which is used when
+ * cdns_wdt_start is called.
+ */
+static int cdns_wdt_settimeout(struct watchdog_device *wdd,
+ unsigned int new_time)
+{
+ wdd->timeout = new_time;
+
+ return cdns_wdt_start(wdd);
+}
+
+/**
+ * cdns_wdt_irq_handler - Notifies of watchdog timeout.
+ *
+ * @irq: interrupt number
+ * @dev_id: pointer to a platform device structure
+ * Return: IRQ_HANDLED
+ *
+ * The handler is invoked when the watchdog times out and a
+ * reset on timeout has not been enabled.
+ */
+static irqreturn_t cdns_wdt_irq_handler(int irq, void *dev_id)
+{
+ struct platform_device *pdev = dev_id;
+
+ dev_info(&pdev->dev,
+ "Watchdog timed out. Internal reset not enabled\n");
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Info structure used to indicate the features supported by the device
+ * to the upper layers. This is defined in watchdog.h header file.
+ */
+static struct watchdog_info cdns_wdt_info = {
+ .identity = "cdns_wdt watchdog",
+ .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
+ WDIOF_MAGICCLOSE,
+};
+
+/* Watchdog Core Ops */
+static struct watchdog_ops cdns_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = cdns_wdt_start,
+ .stop = cdns_wdt_stop,
+ .ping = cdns_wdt_reload,
+ .set_timeout = cdns_wdt_settimeout,
+};
+
+/**
+ * cdns_wdt_notify_sys - Notifier for reboot or shutdown.
+ *
+ * @this: handle to notifier block
+ * @code: turn off indicator
+ * @unused: unused
+ * Return: NOTIFY_DONE
+ *
+ * This notifier is invoked whenever the system reboot or shutdown occur
+ * because we need to disable the WDT before system goes down as WDT might
+ * reset on the next boot.
+ */
+static int cdns_wdt_notify_sys(struct notifier_block *this, unsigned long code,
+ void *unused)
+{
+ struct cdns_wdt *wdt = container_of(this, struct cdns_wdt,
+ cdns_wdt_notifier);
+ if (code == SYS_DOWN || code == SYS_HALT)
+ cdns_wdt_stop(&wdt->cdns_wdt_device);
+
+ return NOTIFY_DONE;
+}
+
+/************************Platform Operations*****************************/
+/**
+ * cdns_wdt_probe - Probe call for the device.
+ *
+ * @pdev: handle to the platform device structure.
+ * Return: 0 on success, negative error otherwise.
+ *
+ * It does all the memory allocation and registration for the device.
+ */
+static int cdns_wdt_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int ret, irq;
+ unsigned long clock_f;
+ struct cdns_wdt *wdt;
+ struct watchdog_device *cdns_wdt_device;
+
+ wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+ if (!wdt)
+ return -ENOMEM;
+
+ cdns_wdt_device = &wdt->cdns_wdt_device;
+ cdns_wdt_device->info = &cdns_wdt_info;
+ cdns_wdt_device->ops = &cdns_wdt_ops;
+ cdns_wdt_device->timeout = CDNS_WDT_DEFAULT_TIMEOUT;
+ cdns_wdt_device->min_timeout = CDNS_WDT_MIN_TIMEOUT;
+ cdns_wdt_device->max_timeout = CDNS_WDT_MAX_TIMEOUT;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ wdt->regs = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(wdt->regs))
+ return PTR_ERR(wdt->regs);
+
+ /* Register the interrupt */
+ wdt->rst = of_property_read_bool(pdev->dev.of_node, "reset-on-timeout");
+ irq = platform_get_irq(pdev, 0);
+ if (!wdt->rst && irq >= 0) {
+ ret = devm_request_irq(&pdev->dev, irq, cdns_wdt_irq_handler, 0,
+ pdev->name, pdev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "cannot register interrupt handler err=%d\n",
+ ret);
+ return ret;
+ }
+ }
+
+ /* Initialize the members of cdns_wdt structure */
+ cdns_wdt_device->parent = &pdev->dev;
+
+ ret = watchdog_init_timeout(cdns_wdt_device, wdt_timeout, &pdev->dev);
+ if (ret) {
+ dev_err(&pdev->dev, "unable to set timeout value\n");
+ return ret;
+ }
+
+ watchdog_set_nowayout(cdns_wdt_device, nowayout);
+ watchdog_set_drvdata(cdns_wdt_device, wdt);
+
+ wdt->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(wdt->clk)) {
+ dev_err(&pdev->dev, "input clock not found\n");
+ ret = PTR_ERR(wdt->clk);
+ return ret;
+ }
+
+ ret = clk_prepare_enable(wdt->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "unable to enable clock\n");
+ return ret;
+ }
+
+ clock_f = clk_get_rate(wdt->clk);
+ if (clock_f <= CDNS_WDT_CLK_75MHZ) {
+ wdt->prescaler = CDNS_WDT_PRESCALE_512;
+ wdt->ctrl_clksel = CDNS_WDT_PRESCALE_SELECT_512;
+ } else {
+ wdt->prescaler = CDNS_WDT_PRESCALE_4096;
+ wdt->ctrl_clksel = CDNS_WDT_PRESCALE_SELECT_4096;
+ }
+
+ spin_lock_init(&wdt->io_lock);
+
+ wdt->cdns_wdt_notifier.notifier_call = &cdns_wdt_notify_sys;
+ ret = register_reboot_notifier(&wdt->cdns_wdt_notifier);
+ if (ret != 0) {
+ dev_err(&pdev->dev, "cannot register reboot notifier err=%d)\n",
+ ret);
+ goto err_clk_disable;
+ }
+
+ ret = watchdog_register_device(cdns_wdt_device);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register wdt device\n");
+ goto err_clk_disable;
+ }
+ platform_set_drvdata(pdev, wdt);
+
+ dev_dbg(&pdev->dev, "Xilinx Watchdog Timer at %p with timeout %ds%s\n",
+ wdt->regs, cdns_wdt_device->timeout,
+ nowayout ? ", nowayout" : "");
+
+ return 0;
+
+err_clk_disable:
+ clk_disable_unprepare(wdt->clk);
+
+ return ret;
+}
+
+/**
+ * cdns_wdt_remove - Probe call for the device.
+ *
+ * @pdev: handle to the platform device structure.
+ * Return: 0 on success, otherwise negative error.
+ *
+ * Unregister the device after releasing the resources.
+ */
+static int cdns_wdt_remove(struct platform_device *pdev)
+{
+ struct cdns_wdt *wdt = platform_get_drvdata(pdev);
+
+ cdns_wdt_stop(&wdt->cdns_wdt_device);
+ watchdog_unregister_device(&wdt->cdns_wdt_device);
+ unregister_reboot_notifier(&wdt->cdns_wdt_notifier);
+ clk_disable_unprepare(wdt->clk);
+
+ return 0;
+}
+
+/**
+ * cdns_wdt_shutdown - Stop the device.
+ *
+ * @pdev: handle to the platform structure.
+ *
+ */
+static void cdns_wdt_shutdown(struct platform_device *pdev)
+{
+ struct cdns_wdt *wdt = platform_get_drvdata(pdev);
+
+ cdns_wdt_stop(&wdt->cdns_wdt_device);
+ clk_disable_unprepare(wdt->clk);
+}
+
+/**
+ * cdns_wdt_suspend - Stop the device.
+ *
+ * @dev: handle to the device structure.
+ * Return: 0 always.
+ */
+static int __maybe_unused cdns_wdt_suspend(struct device *dev)
+{
+ struct platform_device *pdev = container_of(dev,
+ struct platform_device, dev);
+ struct cdns_wdt *wdt = platform_get_drvdata(pdev);
+
+ cdns_wdt_stop(&wdt->cdns_wdt_device);
+ clk_disable_unprepare(wdt->clk);
+
+ return 0;
+}
+
+/**
+ * cdns_wdt_resume - Resume the device.
+ *
+ * @dev: handle to the device structure.
+ * Return: 0 on success, errno otherwise.
+ */
+static int __maybe_unused cdns_wdt_resume(struct device *dev)
+{
+ int ret;
+ struct platform_device *pdev = container_of(dev,
+ struct platform_device, dev);
+ struct cdns_wdt *wdt = platform_get_drvdata(pdev);
+
+ ret = clk_prepare_enable(wdt->clk);
+ if (ret) {
+ dev_err(dev, "unable to enable clock\n");
+ return ret;
+ }
+ cdns_wdt_start(&wdt->cdns_wdt_device);
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(cdns_wdt_pm_ops, cdns_wdt_suspend, cdns_wdt_resume);
+
+static struct of_device_id cdns_wdt_of_match[] = {
+ { .compatible = "cdns,wdt-r1p2", },
+ { /* end of table */ }
+};
+MODULE_DEVICE_TABLE(of, cdns_wdt_of_match);
+
+/* Driver Structure */
+static struct platform_driver cdns_wdt_driver = {
+ .probe = cdns_wdt_probe,
+ .remove = cdns_wdt_remove,
+ .shutdown = cdns_wdt_shutdown,
+ .driver = {
+ .name = "cdns-wdt",
+ .owner = THIS_MODULE,
+ .of_match_table = cdns_wdt_of_match,
+ .pm = &cdns_wdt_pm_ops,
+ },
+};
+
+module_platform_driver(cdns_wdt_driver);
+
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Watchdog driver for Cadence WDT");
+MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c
new file mode 100644
index 0000000..2cd6b2c
--- /dev/null
+++ b/drivers/watchdog/da9063_wdt.c
@@ -0,0 +1,191 @@
+/*
+ * Watchdog driver for DA9063 PMICs.
+ *
+ * Copyright(c) 2012 Dialog Semiconductor Ltd.
+ *
+ * Author: Mariusz Wojtasik <mariusz.wojtasik@diasemi.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/watchdog.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/mfd/da9063/registers.h>
+#include <linux/mfd/da9063/core.h>
+#include <linux/regmap.h>
+
+/*
+ * Watchdog selector to timeout in seconds.
+ * 0: WDT disabled;
+ * others: timeout = 2048 ms * 2^(TWDSCALE-1).
+ */
+static const unsigned int wdt_timeout[] = { 0, 2, 4, 8, 16, 32, 65, 131 };
+#define DA9063_TWDSCALE_DISABLE 0
+#define DA9063_TWDSCALE_MIN 1
+#define DA9063_TWDSCALE_MAX (ARRAY_SIZE(wdt_timeout) - 1)
+#define DA9063_WDT_MIN_TIMEOUT wdt_timeout[DA9063_TWDSCALE_MIN]
+#define DA9063_WDT_MAX_TIMEOUT wdt_timeout[DA9063_TWDSCALE_MAX]
+#define DA9063_WDG_TIMEOUT wdt_timeout[3]
+
+struct da9063_watchdog {
+ struct da9063 *da9063;
+ struct watchdog_device wdtdev;
+};
+
+static unsigned int da9063_wdt_timeout_to_sel(unsigned int secs)
+{
+ unsigned int i;
+
+ for (i = DA9063_TWDSCALE_MIN; i <= DA9063_TWDSCALE_MAX; i++) {
+ if (wdt_timeout[i] >= secs)
+ return i;
+ }
+
+ return DA9063_TWDSCALE_MAX;
+}
+
+static int _da9063_wdt_set_timeout(struct da9063 *da9063, unsigned int regval)
+{
+ return regmap_update_bits(da9063->regmap, DA9063_REG_CONTROL_D,
+ DA9063_TWDSCALE_MASK, regval);
+}
+
+static int da9063_wdt_start(struct watchdog_device *wdd)
+{
+ struct da9063_watchdog *wdt = watchdog_get_drvdata(wdd);
+ unsigned int selector;
+ int ret;
+
+ selector = da9063_wdt_timeout_to_sel(wdt->wdtdev.timeout);
+ ret = _da9063_wdt_set_timeout(wdt->da9063, selector);
+ if (ret)
+ dev_err(wdt->da9063->dev, "Watchdog failed to start (err = %d)\n",
+ ret);
+
+ return ret;
+}
+
+static int da9063_wdt_stop(struct watchdog_device *wdd)
+{
+ struct da9063_watchdog *wdt = watchdog_get_drvdata(wdd);
+ int ret;
+
+ ret = regmap_update_bits(wdt->da9063->regmap, DA9063_REG_CONTROL_D,
+ DA9063_TWDSCALE_MASK, DA9063_TWDSCALE_DISABLE);
+ if (ret)
+ dev_alert(wdt->da9063->dev, "Watchdog failed to stop (err = %d)\n",
+ ret);
+
+ return ret;
+}
+
+static int da9063_wdt_ping(struct watchdog_device *wdd)
+{
+ struct da9063_watchdog *wdt = watchdog_get_drvdata(wdd);
+ int ret;
+
+ ret = regmap_write(wdt->da9063->regmap, DA9063_REG_CONTROL_F,
+ DA9063_WATCHDOG);
+ if (ret)
+ dev_alert(wdt->da9063->dev, "Failed to ping the watchdog (err = %d)\n",
+ ret);
+
+ return ret;
+}
+
+static int da9063_wdt_set_timeout(struct watchdog_device *wdd,
+ unsigned int timeout)
+{
+ struct da9063_watchdog *wdt = watchdog_get_drvdata(wdd);
+ unsigned int selector;
+ int ret;
+
+ selector = da9063_wdt_timeout_to_sel(timeout);
+ ret = _da9063_wdt_set_timeout(wdt->da9063, selector);
+ if (ret)
+ dev_err(wdt->da9063->dev, "Failed to set watchdog timeout (err = %d)\n",
+ ret);
+ else
+ wdd->timeout = wdt_timeout[selector];
+
+ return ret;
+}
+
+static const struct watchdog_info da9063_watchdog_info = {
+ .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
+ .identity = "DA9063 Watchdog",
+};
+
+static const struct watchdog_ops da9063_watchdog_ops = {
+ .owner = THIS_MODULE,
+ .start = da9063_wdt_start,
+ .stop = da9063_wdt_stop,
+ .ping = da9063_wdt_ping,
+ .set_timeout = da9063_wdt_set_timeout,
+};
+
+static int da9063_wdt_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct da9063 *da9063;
+ struct da9063_watchdog *wdt;
+
+ if (!pdev->dev.parent)
+ return -EINVAL;
+
+ da9063 = dev_get_drvdata(pdev->dev.parent);
+ if (!da9063)
+ return -EINVAL;
+
+ wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+ if (!wdt)
+ return -ENOMEM;
+
+ wdt->da9063 = da9063;
+
+ wdt->wdtdev.info = &da9063_watchdog_info;
+ wdt->wdtdev.ops = &da9063_watchdog_ops;
+ wdt->wdtdev.min_timeout = DA9063_WDT_MIN_TIMEOUT;
+ wdt->wdtdev.max_timeout = DA9063_WDT_MAX_TIMEOUT;
+ wdt->wdtdev.timeout = DA9063_WDG_TIMEOUT;
+
+ wdt->wdtdev.status = WATCHDOG_NOWAYOUT_INIT_STATUS;
+
+ watchdog_set_drvdata(&wdt->wdtdev, wdt);
+ dev_set_drvdata(&pdev->dev, wdt);
+
+ ret = watchdog_register_device(&wdt->wdtdev);
+
+ return ret;
+}
+
+static int da9063_wdt_remove(struct platform_device *pdev)
+{
+ struct da9063_watchdog *wdt = dev_get_drvdata(&pdev->dev);
+
+ watchdog_unregister_device(&wdt->wdtdev);
+
+ return 0;
+}
+
+static struct platform_driver da9063_wdt_driver = {
+ .probe = da9063_wdt_probe,
+ .remove = da9063_wdt_remove,
+ .driver = {
+ .name = DA9063_DRVNAME_WATCHDOG,
+ },
+};
+module_platform_driver(da9063_wdt_driver);
+
+MODULE_AUTHOR("Mariusz Wojtasik <mariusz.wojtasik@diasemi.com>");
+MODULE_DESCRIPTION("Watchdog driver for Dialog DA9063");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DA9063_DRVNAME_WATCHDOG);
diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
index 9f21029..9e577a6 100644
--- a/drivers/watchdog/dw_wdt.c
+++ b/drivers/watchdog/dw_wdt.c
@@ -21,6 +21,7 @@
#include <linux/bitops.h>
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fs.h>
@@ -29,9 +30,11 @@
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/pm.h>
#include <linux/platform_device.h>
+#include <linux/reboot.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/uaccess.h>
@@ -40,6 +43,7 @@
#define WDOG_CONTROL_REG_OFFSET 0x00
#define WDOG_CONTROL_REG_WDT_EN_MASK 0x01
#define WDOG_TIMEOUT_RANGE_REG_OFFSET 0x04
+#define WDOG_TIMEOUT_RANGE_TOPINIT_SHIFT 4
#define WDOG_CURRENT_COUNT_REG_OFFSET 0x08
#define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c
#define WDOG_COUNTER_RESTART_KICK_VALUE 0x76
@@ -62,6 +66,7 @@
unsigned long next_heartbeat;
struct timer_list timer;
int expect_close;
+ struct notifier_block restart_handler;
} dw_wdt;
static inline int dw_wdt_is_enabled(void)
@@ -106,7 +111,8 @@
}
/* Set the new value in the watchdog. */
- writel(top_val, dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
+ writel(top_val | top_val << WDOG_TIMEOUT_RANGE_TOPINIT_SHIFT,
+ dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
dw_wdt_set_next_heartbeat();
@@ -119,6 +125,26 @@
WDOG_COUNTER_RESTART_REG_OFFSET);
}
+static int dw_wdt_restart_handle(struct notifier_block *this,
+ unsigned long mode, void *cmd)
+{
+ u32 val;
+
+ writel(0, dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
+ val = readl(dw_wdt.regs + WDOG_CONTROL_REG_OFFSET);
+ if (val & WDOG_CONTROL_REG_WDT_EN_MASK)
+ writel(WDOG_COUNTER_RESTART_KICK_VALUE, dw_wdt.regs +
+ WDOG_COUNTER_RESTART_REG_OFFSET);
+ else
+ writel(WDOG_CONTROL_REG_WDT_EN_MASK,
+ dw_wdt.regs + WDOG_CONTROL_REG_OFFSET);
+
+ /* wait for reset to assert... */
+ mdelay(500);
+
+ return NOTIFY_DONE;
+}
+
static void dw_wdt_ping(unsigned long data)
{
if (time_before(jiffies, dw_wdt.next_heartbeat) ||
@@ -314,6 +340,12 @@
if (ret)
goto out_disable_clk;
+ dw_wdt.restart_handler.notifier_call = dw_wdt_restart_handle;
+ dw_wdt.restart_handler.priority = 128;
+ ret = register_restart_handler(&dw_wdt.restart_handler);
+ if (ret)
+ pr_warn("cannot register restart handler\n");
+
dw_wdt_set_next_heartbeat();
setup_timer(&dw_wdt.timer, dw_wdt_ping, 0);
mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
@@ -328,6 +360,8 @@
static int dw_wdt_drv_remove(struct platform_device *pdev)
{
+ unregister_restart_handler(&dw_wdt.restart_handler);
+
misc_deregister(&dw_wdt_miscdev);
clk_disable_unprepare(dw_wdt.clk);
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index 68c3d37..7e12f88 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -22,14 +22,17 @@
*/
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/notifier.h>
#include <linux/of_address.h>
#include <linux/platform_device.h>
+#include <linux/reboot.h>
#include <linux/regmap.h>
#include <linux/timer.h>
#include <linux/watchdog.h>
@@ -59,6 +62,7 @@
struct regmap *regmap;
struct timer_list timer; /* Pings the watchdog when closed */
struct watchdog_device wdog;
+ struct notifier_block restart_handler;
};
static bool nowayout = WATCHDOG_NOWAYOUT;
@@ -77,6 +81,31 @@
.options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE,
};
+static int imx2_restart_handler(struct notifier_block *this, unsigned long mode,
+ void *cmd)
+{
+ unsigned int wcr_enable = IMX2_WDT_WCR_WDE;
+ struct imx2_wdt_device *wdev = container_of(this,
+ struct imx2_wdt_device,
+ restart_handler);
+ /* Assert SRS signal */
+ regmap_write(wdev->regmap, 0, wcr_enable);
+ /*
+ * Due to imx6q errata ERR004346 (WDOG: WDOG SRS bit requires to be
+ * written twice), we add another two writes to ensure there must be at
+ * least two writes happen in the same one 32kHz clock period. We save
+ * the target check here, since the writes shouldn't be a huge burden
+ * for other platforms.
+ */
+ regmap_write(wdev->regmap, 0, wcr_enable);
+ regmap_write(wdev->regmap, 0, wcr_enable);
+
+ /* wait for reset to assert... */
+ mdelay(500);
+
+ return NOTIFY_DONE;
+}
+
static inline void imx2_wdt_setup(struct watchdog_device *wdog)
{
struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
@@ -191,12 +220,10 @@
static int __init imx2_wdt_probe(struct platform_device *pdev)
{
- struct device_node *np = pdev->dev.of_node;
struct imx2_wdt_device *wdev;
struct watchdog_device *wdog;
struct resource *res;
void __iomem *base;
- bool big_endian;
int ret;
u32 val;
@@ -204,10 +231,6 @@
if (!wdev)
return -ENOMEM;
- big_endian = of_property_read_bool(np, "big-endian");
- if (big_endian)
- imx2_wdt_regmap_config.val_format_endian = REGMAP_ENDIAN_BIG;
-
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(base))
@@ -257,6 +280,12 @@
return ret;
}
+ wdev->restart_handler.notifier_call = imx2_restart_handler;
+ wdev->restart_handler.priority = 128;
+ ret = register_restart_handler(&wdev->restart_handler);
+ if (ret)
+ dev_err(&pdev->dev, "cannot register restart handler\n");
+
dev_info(&pdev->dev, "timeout %d sec (nowayout=%d)\n",
wdog->timeout, nowayout);
@@ -268,6 +297,8 @@
struct watchdog_device *wdog = platform_get_drvdata(pdev);
struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+ unregister_restart_handler(&wdev->restart_handler);
+
watchdog_unregister_device(wdog);
if (imx2_wdt_is_running(wdev)) {
diff --git a/drivers/watchdog/meson_wdt.c b/drivers/watchdog/meson_wdt.c
new file mode 100644
index 0000000..ef6a298
--- /dev/null
+++ b/drivers/watchdog/meson_wdt.c
@@ -0,0 +1,236 @@
+/*
+ * Meson Watchdog Driver
+ *
+ * Copyright (c) 2014 Carlo Caione
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+
+#define DRV_NAME "meson_wdt"
+
+#define MESON_WDT_TC 0x00
+#define MESON_WDT_TC_EN BIT(22)
+#define MESON_WDT_TC_TM_MASK 0x3fffff
+#define MESON_WDT_DC_RESET (3 << 24)
+
+#define MESON_WDT_RESET 0x04
+
+#define MESON_WDT_TIMEOUT 30
+#define MESON_WDT_MIN_TIMEOUT 1
+#define MESON_WDT_MAX_TIMEOUT (MESON_WDT_TC_TM_MASK / 100000)
+
+#define MESON_SEC_TO_TC(s) ((s) * 100000)
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+static unsigned int timeout = MESON_WDT_TIMEOUT;
+
+struct meson_wdt_dev {
+ struct watchdog_device wdt_dev;
+ void __iomem *wdt_base;
+ struct notifier_block restart_handler;
+};
+
+static int meson_restart_handle(struct notifier_block *this, unsigned long mode,
+ void *cmd)
+{
+ u32 tc_reboot = MESON_WDT_DC_RESET | MESON_WDT_TC_EN;
+ struct meson_wdt_dev *meson_wdt = container_of(this,
+ struct meson_wdt_dev,
+ restart_handler);
+
+ while (1) {
+ writel(tc_reboot, meson_wdt->wdt_base + MESON_WDT_TC);
+ mdelay(5);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int meson_wdt_ping(struct watchdog_device *wdt_dev)
+{
+ struct meson_wdt_dev *meson_wdt = watchdog_get_drvdata(wdt_dev);
+
+ writel(0, meson_wdt->wdt_base + MESON_WDT_RESET);
+
+ return 0;
+}
+
+static void meson_wdt_change_timeout(struct watchdog_device *wdt_dev,
+ unsigned int timeout)
+{
+ struct meson_wdt_dev *meson_wdt = watchdog_get_drvdata(wdt_dev);
+ u32 reg;
+
+ reg = readl(meson_wdt->wdt_base + MESON_WDT_TC);
+ reg &= ~MESON_WDT_TC_TM_MASK;
+ reg |= MESON_SEC_TO_TC(timeout);
+ writel(reg, meson_wdt->wdt_base + MESON_WDT_TC);
+}
+
+static int meson_wdt_set_timeout(struct watchdog_device *wdt_dev,
+ unsigned int timeout)
+{
+ wdt_dev->timeout = timeout;
+
+ meson_wdt_change_timeout(wdt_dev, timeout);
+ meson_wdt_ping(wdt_dev);
+
+ return 0;
+}
+
+static int meson_wdt_stop(struct watchdog_device *wdt_dev)
+{
+ struct meson_wdt_dev *meson_wdt = watchdog_get_drvdata(wdt_dev);
+ u32 reg;
+
+ reg = readl(meson_wdt->wdt_base + MESON_WDT_TC);
+ reg &= ~MESON_WDT_TC_EN;
+ writel(reg, meson_wdt->wdt_base + MESON_WDT_TC);
+
+ return 0;
+}
+
+static int meson_wdt_start(struct watchdog_device *wdt_dev)
+{
+ struct meson_wdt_dev *meson_wdt = watchdog_get_drvdata(wdt_dev);
+ u32 reg;
+
+ meson_wdt_change_timeout(wdt_dev, meson_wdt->wdt_dev.timeout);
+ meson_wdt_ping(wdt_dev);
+
+ reg = readl(meson_wdt->wdt_base + MESON_WDT_TC);
+ reg |= MESON_WDT_TC_EN;
+ writel(reg, meson_wdt->wdt_base + MESON_WDT_TC);
+
+ return 0;
+}
+
+static const struct watchdog_info meson_wdt_info = {
+ .identity = DRV_NAME,
+ .options = WDIOF_SETTIMEOUT |
+ WDIOF_KEEPALIVEPING |
+ WDIOF_MAGICCLOSE,
+};
+
+static const struct watchdog_ops meson_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = meson_wdt_start,
+ .stop = meson_wdt_stop,
+ .ping = meson_wdt_ping,
+ .set_timeout = meson_wdt_set_timeout,
+};
+
+static int meson_wdt_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ struct meson_wdt_dev *meson_wdt;
+ int err;
+
+ meson_wdt = devm_kzalloc(&pdev->dev, sizeof(*meson_wdt), GFP_KERNEL);
+ if (!meson_wdt)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ meson_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(meson_wdt->wdt_base))
+ return PTR_ERR(meson_wdt->wdt_base);
+
+ meson_wdt->wdt_dev.parent = &pdev->dev;
+ meson_wdt->wdt_dev.info = &meson_wdt_info;
+ meson_wdt->wdt_dev.ops = &meson_wdt_ops;
+ meson_wdt->wdt_dev.timeout = MESON_WDT_TIMEOUT;
+ meson_wdt->wdt_dev.max_timeout = MESON_WDT_MAX_TIMEOUT;
+ meson_wdt->wdt_dev.min_timeout = MESON_WDT_MIN_TIMEOUT;
+
+ watchdog_set_drvdata(&meson_wdt->wdt_dev, meson_wdt);
+
+ watchdog_init_timeout(&meson_wdt->wdt_dev, timeout, &pdev->dev);
+ watchdog_set_nowayout(&meson_wdt->wdt_dev, nowayout);
+
+ meson_wdt_stop(&meson_wdt->wdt_dev);
+
+ err = watchdog_register_device(&meson_wdt->wdt_dev);
+ if (err)
+ return err;
+
+ platform_set_drvdata(pdev, meson_wdt);
+
+ meson_wdt->restart_handler.notifier_call = meson_restart_handle;
+ meson_wdt->restart_handler.priority = 128;
+ err = register_restart_handler(&meson_wdt->restart_handler);
+ if (err)
+ dev_err(&pdev->dev,
+ "cannot register restart handler (err=%d)\n", err);
+
+ dev_info(&pdev->dev, "Watchdog enabled (timeout=%d sec, nowayout=%d)",
+ meson_wdt->wdt_dev.timeout, nowayout);
+
+ return 0;
+}
+
+static int meson_wdt_remove(struct platform_device *pdev)
+{
+ struct meson_wdt_dev *meson_wdt = platform_get_drvdata(pdev);
+
+ unregister_restart_handler(&meson_wdt->restart_handler);
+
+ watchdog_unregister_device(&meson_wdt->wdt_dev);
+
+ return 0;
+}
+
+static void meson_wdt_shutdown(struct platform_device *pdev)
+{
+ struct meson_wdt_dev *meson_wdt = platform_get_drvdata(pdev);
+
+ meson_wdt_stop(&meson_wdt->wdt_dev);
+}
+
+static const struct of_device_id meson_wdt_dt_ids[] = {
+ { .compatible = "amlogic,meson6-wdt" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, meson_wdt_dt_ids);
+
+static struct platform_driver meson_wdt_driver = {
+ .probe = meson_wdt_probe,
+ .remove = meson_wdt_remove,
+ .shutdown = meson_wdt_shutdown,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = DRV_NAME,
+ .of_match_table = meson_wdt_dt_ids,
+ },
+};
+
+module_platform_driver(meson_wdt_driver);
+
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout, "Watchdog heartbeat in seconds");
+
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout,
+ "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Carlo Caione <carlo@caione.org>");
+MODULE_DESCRIPTION("Meson Watchdog Timer Driver");
diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
index 1e6e28d..b2e1b4c 100644
--- a/drivers/watchdog/of_xilinx_wdt.c
+++ b/drivers/watchdog/of_xilinx_wdt.c
@@ -236,7 +236,6 @@
.probe = xwdt_probe,
.remove = xwdt_remove,
.driver = {
- .owner = THIS_MODULE,
.name = WATCHDOG_NAME,
.of_match_table = xwdt_of_match,
},
diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c
new file mode 100644
index 0000000..aa85618
--- /dev/null
+++ b/drivers/watchdog/qcom-wdt.c
@@ -0,0 +1,224 @@
+/* Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/watchdog.h>
+
+#define WDT_RST 0x0
+#define WDT_EN 0x8
+#define WDT_BITE_TIME 0x24
+
+struct qcom_wdt {
+ struct watchdog_device wdd;
+ struct clk *clk;
+ unsigned long rate;
+ struct notifier_block restart_nb;
+ void __iomem *base;
+};
+
+static inline
+struct qcom_wdt *to_qcom_wdt(struct watchdog_device *wdd)
+{
+ return container_of(wdd, struct qcom_wdt, wdd);
+}
+
+static int qcom_wdt_start(struct watchdog_device *wdd)
+{
+ struct qcom_wdt *wdt = to_qcom_wdt(wdd);
+
+ writel(0, wdt->base + WDT_EN);
+ writel(1, wdt->base + WDT_RST);
+ writel(wdd->timeout * wdt->rate, wdt->base + WDT_BITE_TIME);
+ writel(1, wdt->base + WDT_EN);
+ return 0;
+}
+
+static int qcom_wdt_stop(struct watchdog_device *wdd)
+{
+ struct qcom_wdt *wdt = to_qcom_wdt(wdd);
+
+ writel(0, wdt->base + WDT_EN);
+ return 0;
+}
+
+static int qcom_wdt_ping(struct watchdog_device *wdd)
+{
+ struct qcom_wdt *wdt = to_qcom_wdt(wdd);
+
+ writel(1, wdt->base + WDT_RST);
+ return 0;
+}
+
+static int qcom_wdt_set_timeout(struct watchdog_device *wdd,
+ unsigned int timeout)
+{
+ wdd->timeout = timeout;
+ return qcom_wdt_start(wdd);
+}
+
+static const struct watchdog_ops qcom_wdt_ops = {
+ .start = qcom_wdt_start,
+ .stop = qcom_wdt_stop,
+ .ping = qcom_wdt_ping,
+ .set_timeout = qcom_wdt_set_timeout,
+ .owner = THIS_MODULE,
+};
+
+static const struct watchdog_info qcom_wdt_info = {
+ .options = WDIOF_KEEPALIVEPING
+ | WDIOF_MAGICCLOSE
+ | WDIOF_SETTIMEOUT,
+ .identity = KBUILD_MODNAME,
+};
+
+static int qcom_wdt_restart(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct qcom_wdt *wdt = container_of(nb, struct qcom_wdt, restart_nb);
+ u32 timeout;
+
+ /*
+ * Trigger watchdog bite:
+ * Setup BITE_TIME to be 128ms, and enable WDT.
+ */
+ timeout = 128 * wdt->rate / 1000;
+
+ writel(0, wdt->base + WDT_EN);
+ writel(1, wdt->base + WDT_RST);
+ writel(timeout, wdt->base + WDT_BITE_TIME);
+ writel(1, wdt->base + WDT_EN);
+
+ /*
+ * Actually make sure the above sequence hits hardware before sleeping.
+ */
+ wmb();
+
+ msleep(150);
+ return NOTIFY_DONE;
+}
+
+static int qcom_wdt_probe(struct platform_device *pdev)
+{
+ struct qcom_wdt *wdt;
+ struct resource *res;
+ int ret;
+
+ wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+ if (!wdt)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ wdt->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(wdt->base))
+ return PTR_ERR(wdt->base);
+
+ wdt->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(wdt->clk)) {
+ dev_err(&pdev->dev, "failed to get input clock\n");
+ return PTR_ERR(wdt->clk);
+ }
+
+ ret = clk_prepare_enable(wdt->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to setup clock\n");
+ return ret;
+ }
+
+ /*
+ * We use the clock rate to calculate the max timeout, so ensure it's
+ * not zero to avoid a divide-by-zero exception.
+ *
+ * WATCHDOG_CORE assumes units of seconds, if the WDT is clocked such
+ * that it would bite before a second elapses it's usefulness is
+ * limited. Bail if this is the case.
+ */
+ wdt->rate = clk_get_rate(wdt->clk);
+ if (wdt->rate == 0 ||
+ wdt->rate > 0x10000000U) {
+ dev_err(&pdev->dev, "invalid clock rate\n");
+ ret = -EINVAL;
+ goto err_clk_unprepare;
+ }
+
+ wdt->wdd.dev = &pdev->dev;
+ wdt->wdd.info = &qcom_wdt_info;
+ wdt->wdd.ops = &qcom_wdt_ops;
+ wdt->wdd.min_timeout = 1;
+ wdt->wdd.max_timeout = 0x10000000U / wdt->rate;
+
+ /*
+ * If 'timeout-sec' unspecified in devicetree, assume a 30 second
+ * default, unless the max timeout is less than 30 seconds, then use
+ * the max instead.
+ */
+ wdt->wdd.timeout = min(wdt->wdd.max_timeout, 30U);
+ watchdog_init_timeout(&wdt->wdd, 0, &pdev->dev);
+
+ ret = watchdog_register_device(&wdt->wdd);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register watchdog\n");
+ goto err_clk_unprepare;
+ }
+
+ /*
+ * WDT restart notifier has priority 0 (use as a last resort)
+ */
+ wdt->restart_nb.notifier_call = qcom_wdt_restart;
+ ret = register_restart_handler(&wdt->restart_nb);
+ if (ret)
+ dev_err(&pdev->dev, "failed to setup restart handler\n");
+
+ platform_set_drvdata(pdev, wdt);
+ return 0;
+
+err_clk_unprepare:
+ clk_disable_unprepare(wdt->clk);
+ return ret;
+}
+
+static int qcom_wdt_remove(struct platform_device *pdev)
+{
+ struct qcom_wdt *wdt = platform_get_drvdata(pdev);
+
+ unregister_restart_handler(&wdt->restart_nb);
+ watchdog_unregister_device(&wdt->wdd);
+ clk_disable_unprepare(wdt->clk);
+ return 0;
+}
+
+static const struct of_device_id qcom_wdt_of_table[] = {
+ { .compatible = "qcom,kpss-wdt-msm8960", },
+ { .compatible = "qcom,kpss-wdt-apq8064", },
+ { .compatible = "qcom,kpss-wdt-ipq8064", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, qcom_wdt_of_table);
+
+static struct platform_driver qcom_watchdog_driver = {
+ .probe = qcom_wdt_probe,
+ .remove = qcom_wdt_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .of_match_table = qcom_wdt_of_table,
+ },
+};
+module_platform_driver(qcom_watchdog_driver);
+
+MODULE_DESCRIPTION("QCOM KPSS Watchdog Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/rn5t618_wdt.c b/drivers/watchdog/rn5t618_wdt.c
new file mode 100644
index 0000000..d1c1227
--- /dev/null
+++ b/drivers/watchdog/rn5t618_wdt.c
@@ -0,0 +1,198 @@
+/*
+ * Watchdog driver for Ricoh RN5T618 PMIC
+ *
+ * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/mfd/rn5t618.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+#define DRIVER_NAME "rn5t618-wdt"
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+static unsigned int timeout;
+
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout, "Initial watchdog timeout in seconds");
+
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+struct rn5t618_wdt {
+ struct watchdog_device wdt_dev;
+ struct rn5t618 *rn5t618;
+};
+
+/*
+ * This array encodes the values of WDOGTIM field for the supported
+ * watchdog expiration times. If the watchdog is not accessed before
+ * the timer expiration, the PMU generates an interrupt and if the CPU
+ * doesn't clear it within one second the system is restarted.
+ */
+static const struct {
+ u8 reg_val;
+ unsigned int time;
+} rn5t618_wdt_map[] = {
+ { 0, 1 },
+ { 1, 8 },
+ { 2, 32 },
+ { 3, 128 },
+};
+
+static int rn5t618_wdt_set_timeout(struct watchdog_device *wdt_dev,
+ unsigned int t)
+{
+ struct rn5t618_wdt *wdt = watchdog_get_drvdata(wdt_dev);
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(rn5t618_wdt_map); i++) {
+ if (rn5t618_wdt_map[i].time + 1 >= t)
+ break;
+ }
+
+ if (i == ARRAY_SIZE(rn5t618_wdt_map))
+ return -EINVAL;
+
+ ret = regmap_update_bits(wdt->rn5t618->regmap, RN5T618_WATCHDOG,
+ RN5T618_WATCHDOG_WDOGTIM_M,
+ rn5t618_wdt_map[i].reg_val);
+ if (!ret)
+ wdt_dev->timeout = rn5t618_wdt_map[i].time;
+
+ return ret;
+}
+
+static int rn5t618_wdt_start(struct watchdog_device *wdt_dev)
+{
+ struct rn5t618_wdt *wdt = watchdog_get_drvdata(wdt_dev);
+ int ret;
+
+ ret = rn5t618_wdt_set_timeout(wdt_dev, wdt_dev->timeout);
+ if (ret)
+ return ret;
+
+ /* enable repower-on */
+ ret = regmap_update_bits(wdt->rn5t618->regmap, RN5T618_REPCNT,
+ RN5T618_REPCNT_REPWRON,
+ RN5T618_REPCNT_REPWRON);
+ if (ret)
+ return ret;
+
+ /* enable watchdog */
+ ret = regmap_update_bits(wdt->rn5t618->regmap, RN5T618_WATCHDOG,
+ RN5T618_WATCHDOG_WDOGEN,
+ RN5T618_WATCHDOG_WDOGEN);
+ if (ret)
+ return ret;
+
+ /* enable watchdog interrupt */
+ return regmap_update_bits(wdt->rn5t618->regmap, RN5T618_PWRIREN,
+ RN5T618_PWRIRQ_IR_WDOG,
+ RN5T618_PWRIRQ_IR_WDOG);
+}
+
+static int rn5t618_wdt_stop(struct watchdog_device *wdt_dev)
+{
+ struct rn5t618_wdt *wdt = watchdog_get_drvdata(wdt_dev);
+
+ return regmap_update_bits(wdt->rn5t618->regmap, RN5T618_WATCHDOG,
+ RN5T618_WATCHDOG_WDOGEN, 0);
+}
+
+static int rn5t618_wdt_ping(struct watchdog_device *wdt_dev)
+{
+ struct rn5t618_wdt *wdt = watchdog_get_drvdata(wdt_dev);
+ unsigned int val;
+ int ret;
+
+ /* The counter is restarted after a R/W access to watchdog register */
+ ret = regmap_read(wdt->rn5t618->regmap, RN5T618_WATCHDOG, &val);
+ if (ret)
+ return ret;
+
+ ret = regmap_write(wdt->rn5t618->regmap, RN5T618_WATCHDOG, val);
+ if (ret)
+ return ret;
+
+ /* Clear pending watchdog interrupt */
+ return regmap_update_bits(wdt->rn5t618->regmap, RN5T618_PWRIRQ,
+ RN5T618_PWRIRQ_IR_WDOG, 0);
+}
+
+static struct watchdog_info rn5t618_wdt_info = {
+ .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE |
+ WDIOF_KEEPALIVEPING,
+ .identity = DRIVER_NAME,
+};
+
+static struct watchdog_ops rn5t618_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = rn5t618_wdt_start,
+ .stop = rn5t618_wdt_stop,
+ .ping = rn5t618_wdt_ping,
+ .set_timeout = rn5t618_wdt_set_timeout,
+};
+
+static int rn5t618_wdt_probe(struct platform_device *pdev)
+{
+ struct rn5t618 *rn5t618 = dev_get_drvdata(pdev->dev.parent);
+ struct rn5t618_wdt *wdt;
+ int min_timeout, max_timeout;
+
+ wdt = devm_kzalloc(&pdev->dev, sizeof(struct rn5t618_wdt), GFP_KERNEL);
+ if (!wdt)
+ return -ENOMEM;
+
+ min_timeout = rn5t618_wdt_map[0].time;
+ max_timeout = rn5t618_wdt_map[ARRAY_SIZE(rn5t618_wdt_map) - 1].time;
+
+ wdt->rn5t618 = rn5t618;
+ wdt->wdt_dev.info = &rn5t618_wdt_info;
+ wdt->wdt_dev.ops = &rn5t618_wdt_ops;
+ wdt->wdt_dev.min_timeout = min_timeout;
+ wdt->wdt_dev.max_timeout = max_timeout;
+ wdt->wdt_dev.timeout = max_timeout;
+ wdt->wdt_dev.parent = &pdev->dev;
+
+ watchdog_set_drvdata(&wdt->wdt_dev, wdt);
+ watchdog_init_timeout(&wdt->wdt_dev, timeout, &pdev->dev);
+ watchdog_set_nowayout(&wdt->wdt_dev, nowayout);
+
+ platform_set_drvdata(pdev, wdt);
+
+ return watchdog_register_device(&wdt->wdt_dev);
+}
+
+static int rn5t618_wdt_remove(struct platform_device *pdev)
+{
+ struct rn5t618_wdt *wdt = platform_get_drvdata(pdev);
+
+ watchdog_unregister_device(&wdt->wdt_dev);
+
+ return 0;
+}
+
+static struct platform_driver rn5t618_wdt_driver = {
+ .probe = rn5t618_wdt_probe,
+ .remove = rn5t618_wdt_remove,
+ .driver = {
+ .name = DRIVER_NAME,
+ },
+};
+
+module_platform_driver(rn5t618_wdt_driver);
+
+MODULE_AUTHOR("Beniamino Galvani <b.galvani@gmail.com>");
+MODULE_DESCRIPTION("RN5T618 watchdog driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 7c6ccd0..8532c3e 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -41,6 +41,8 @@
#include <linux/of.h>
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
#define S3C2410_WTCON 0x00
#define S3C2410_WTDAT 0x04
@@ -128,6 +130,7 @@
unsigned long wtdat_save;
struct watchdog_device wdt_device;
struct notifier_block freq_transition;
+ struct notifier_block restart_handler;
struct s3c2410_wdt_variant *drv_data;
struct regmap *pmureg;
};
@@ -155,6 +158,15 @@
.quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT,
};
+static const struct s3c2410_wdt_variant drv_data_exynos7 = {
+ .disable_reg = EXYNOS5_WDT_DISABLE_REG_OFFSET,
+ .mask_reset_reg = EXYNOS5_WDT_MASK_RESET_REG_OFFSET,
+ .mask_bit = 0,
+ .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
+ .rst_stat_bit = 23, /* A57 WDTRESET */
+ .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT,
+};
+
static const struct of_device_id s3c2410_wdt_match[] = {
{ .compatible = "samsung,s3c2410-wdt",
.data = &drv_data_s3c2410 },
@@ -162,6 +174,8 @@
.data = &drv_data_exynos5250 },
{ .compatible = "samsung,exynos5420-wdt",
.data = &drv_data_exynos5420 },
+ { .compatible = "samsung,exynos7-wdt",
+ .data = &drv_data_exynos7 },
{},
};
MODULE_DEVICE_TABLE(of, s3c2410_wdt_match);
@@ -438,6 +452,31 @@
}
#endif
+static int s3c2410wdt_restart(struct notifier_block *this,
+ unsigned long mode, void *cmd)
+{
+ struct s3c2410_wdt *wdt = container_of(this, struct s3c2410_wdt,
+ restart_handler);
+ void __iomem *wdt_base = wdt->reg_base;
+
+ /* disable watchdog, to be safe */
+ writel(0, wdt_base + S3C2410_WTCON);
+
+ /* put initial values into count and data */
+ writel(0x80, wdt_base + S3C2410_WTCNT);
+ writel(0x80, wdt_base + S3C2410_WTDAT);
+
+ /* set the watchdog to go and reset... */
+ writel(S3C2410_WTCON_ENABLE | S3C2410_WTCON_DIV16 |
+ S3C2410_WTCON_RSTEN | S3C2410_WTCON_PRESCALE(0x20),
+ wdt_base + S3C2410_WTCON);
+
+ /* wait for reset to assert... */
+ mdelay(500);
+
+ return NOTIFY_DONE;
+}
+
static inline unsigned int s3c2410wdt_get_bootstatus(struct s3c2410_wdt *wdt)
{
unsigned int rst_stat;
@@ -592,6 +631,12 @@
platform_set_drvdata(pdev, wdt);
+ wdt->restart_handler.notifier_call = s3c2410wdt_restart;
+ wdt->restart_handler.priority = 128;
+ ret = register_restart_handler(&wdt->restart_handler);
+ if (ret)
+ pr_err("cannot register restart handler, %d\n", ret);
+
/* print out a statement of readiness */
wtcon = readl(wdt->reg_base + S3C2410_WTCON);
@@ -621,6 +666,8 @@
int ret;
struct s3c2410_wdt *wdt = platform_get_drvdata(dev);
+ unregister_restart_handler(&wdt->restart_handler);
+
ret = s3c2410wdt_mask_and_disable_reset(wdt, true);
if (ret < 0)
return ret;
diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c
index 3804d5e9..a62b1b6 100644
--- a/drivers/watchdog/stmp3xxx_rtc_wdt.c
+++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c
@@ -94,9 +94,33 @@
return 0;
}
+static int __maybe_unused stmp3xxx_wdt_suspend(struct device *dev)
+{
+ struct watchdog_device *wdd = &stmp3xxx_wdd;
+
+ if (watchdog_active(wdd))
+ return wdt_stop(wdd);
+
+ return 0;
+}
+
+static int __maybe_unused stmp3xxx_wdt_resume(struct device *dev)
+{
+ struct watchdog_device *wdd = &stmp3xxx_wdd;
+
+ if (watchdog_active(wdd))
+ return wdt_start(wdd);
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(stmp3xxx_wdt_pm_ops,
+ stmp3xxx_wdt_suspend, stmp3xxx_wdt_resume);
+
static struct platform_driver stmp3xxx_wdt_driver = {
.driver = {
.name = "stmp3xxx_rtc_wdt",
+ .pm = &stmp3xxx_wdt_pm_ops,
},
.probe = stmp3xxx_wdt_probe,
.remove = stmp3xxx_wdt_remove,
diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c
index 480bb55..b62301e 100644
--- a/drivers/watchdog/sunxi_wdt.c
+++ b/drivers/watchdog/sunxi_wdt.c
@@ -23,6 +23,7 @@
#include <linux/moduleparam.h>
#include <linux/notifier.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
#include <linux/types.h>
@@ -30,15 +31,11 @@
#define WDT_MAX_TIMEOUT 16
#define WDT_MIN_TIMEOUT 1
-#define WDT_MODE_TIMEOUT(n) ((n) << 3)
-#define WDT_TIMEOUT_MASK WDT_MODE_TIMEOUT(0x0F)
+#define WDT_TIMEOUT_MASK 0x0F
-#define WDT_CTRL 0x00
#define WDT_CTRL_RELOAD ((1 << 0) | (0x0a57 << 1))
-#define WDT_MODE 0x04
#define WDT_MODE_EN (1 << 0)
-#define WDT_MODE_RST_EN (1 << 1)
#define DRV_NAME "sunxi-wdt"
#define DRV_VERSION "1.0"
@@ -46,15 +43,29 @@
static bool nowayout = WATCHDOG_NOWAYOUT;
static unsigned int timeout = WDT_MAX_TIMEOUT;
+/*
+ * This structure stores the register offsets for different variants
+ * of Allwinner's watchdog hardware.
+ */
+struct sunxi_wdt_reg {
+ u8 wdt_ctrl;
+ u8 wdt_cfg;
+ u8 wdt_mode;
+ u8 wdt_timeout_shift;
+ u8 wdt_reset_mask;
+ u8 wdt_reset_val;
+};
+
struct sunxi_wdt_dev {
struct watchdog_device wdt_dev;
void __iomem *wdt_base;
+ const struct sunxi_wdt_reg *wdt_regs;
struct notifier_block restart_handler;
};
/*
* wdt_timeout_map maps the watchdog timer interval value in seconds to
- * the value of the register WDT_MODE bit 3:6
+ * the value of the register WDT_MODE at bits .wdt_timeout_shift ~ +3
*
* [timeout seconds] = register value
*
@@ -82,19 +93,32 @@
struct sunxi_wdt_dev,
restart_handler);
void __iomem *wdt_base = sunxi_wdt->wdt_base;
+ const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
+ u32 val;
- /* Enable timer and set reset bit in the watchdog */
- writel(WDT_MODE_EN | WDT_MODE_RST_EN, wdt_base + WDT_MODE);
+ /* Set system reset function */
+ val = readl(wdt_base + regs->wdt_cfg);
+ val &= ~(regs->wdt_reset_mask);
+ val |= regs->wdt_reset_val;
+ writel(val, wdt_base + regs->wdt_cfg);
+
+ /* Set lowest timeout and enable watchdog */
+ val = readl(wdt_base + regs->wdt_mode);
+ val &= ~(WDT_TIMEOUT_MASK << regs->wdt_timeout_shift);
+ val |= WDT_MODE_EN;
+ writel(val, wdt_base + regs->wdt_mode);
/*
* Restart the watchdog. The default (and lowest) interval
* value for the watchdog is 0.5s.
*/
- writel(WDT_CTRL_RELOAD, wdt_base + WDT_CTRL);
+ writel(WDT_CTRL_RELOAD, wdt_base + regs->wdt_ctrl);
while (1) {
mdelay(5);
- writel(WDT_MODE_EN | WDT_MODE_RST_EN, wdt_base + WDT_MODE);
+ val = readl(wdt_base + regs->wdt_mode);
+ val |= WDT_MODE_EN;
+ writel(val, wdt_base + regs->wdt_mode);
}
return NOTIFY_DONE;
}
@@ -103,8 +127,9 @@
{
struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
void __iomem *wdt_base = sunxi_wdt->wdt_base;
+ const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
- iowrite32(WDT_CTRL_RELOAD, wdt_base + WDT_CTRL);
+ writel(WDT_CTRL_RELOAD, wdt_base + regs->wdt_ctrl);
return 0;
}
@@ -114,6 +139,7 @@
{
struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
void __iomem *wdt_base = sunxi_wdt->wdt_base;
+ const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
u32 reg;
if (wdt_timeout_map[timeout] == 0)
@@ -121,10 +147,10 @@
sunxi_wdt->wdt_dev.timeout = timeout;
- reg = ioread32(wdt_base + WDT_MODE);
- reg &= ~WDT_TIMEOUT_MASK;
- reg |= WDT_MODE_TIMEOUT(wdt_timeout_map[timeout]);
- iowrite32(reg, wdt_base + WDT_MODE);
+ reg = readl(wdt_base + regs->wdt_mode);
+ reg &= ~(WDT_TIMEOUT_MASK << regs->wdt_timeout_shift);
+ reg |= wdt_timeout_map[timeout] << regs->wdt_timeout_shift;
+ writel(reg, wdt_base + regs->wdt_mode);
sunxi_wdt_ping(wdt_dev);
@@ -135,8 +161,9 @@
{
struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
void __iomem *wdt_base = sunxi_wdt->wdt_base;
+ const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
- iowrite32(0, wdt_base + WDT_MODE);
+ writel(0, wdt_base + regs->wdt_mode);
return 0;
}
@@ -146,6 +173,7 @@
u32 reg;
struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
void __iomem *wdt_base = sunxi_wdt->wdt_base;
+ const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
int ret;
ret = sunxi_wdt_set_timeout(&sunxi_wdt->wdt_dev,
@@ -153,9 +181,16 @@
if (ret < 0)
return ret;
- reg = ioread32(wdt_base + WDT_MODE);
- reg |= (WDT_MODE_RST_EN | WDT_MODE_EN);
- iowrite32(reg, wdt_base + WDT_MODE);
+ /* Set system reset function */
+ reg = readl(wdt_base + regs->wdt_cfg);
+ reg &= ~(regs->wdt_reset_mask);
+ reg |= ~(regs->wdt_reset_val);
+ writel(reg, wdt_base + regs->wdt_cfg);
+
+ /* Enable watchdog */
+ reg = readl(wdt_base + regs->wdt_mode);
+ reg |= WDT_MODE_EN;
+ writel(reg, wdt_base + regs->wdt_mode);
return 0;
}
@@ -175,9 +210,35 @@
.set_timeout = sunxi_wdt_set_timeout,
};
+static const struct sunxi_wdt_reg sun4i_wdt_reg = {
+ .wdt_ctrl = 0x00,
+ .wdt_cfg = 0x04,
+ .wdt_mode = 0x04,
+ .wdt_timeout_shift = 3,
+ .wdt_reset_mask = 0x02,
+ .wdt_reset_val = 0x02,
+};
+
+static const struct sunxi_wdt_reg sun6i_wdt_reg = {
+ .wdt_ctrl = 0x10,
+ .wdt_cfg = 0x14,
+ .wdt_mode = 0x18,
+ .wdt_timeout_shift = 4,
+ .wdt_reset_mask = 0x03,
+ .wdt_reset_val = 0x01,
+};
+
+static const struct of_device_id sunxi_wdt_dt_ids[] = {
+ { .compatible = "allwinner,sun4i-a10-wdt", .data = &sun4i_wdt_reg },
+ { .compatible = "allwinner,sun6i-a31-wdt", .data = &sun6i_wdt_reg },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sunxi_wdt_dt_ids);
+
static int sunxi_wdt_probe(struct platform_device *pdev)
{
struct sunxi_wdt_dev *sunxi_wdt;
+ const struct of_device_id *device;
struct resource *res;
int err;
@@ -187,6 +248,12 @@
platform_set_drvdata(pdev, sunxi_wdt);
+ device = of_match_device(sunxi_wdt_dt_ids, &pdev->dev);
+ if (!device)
+ return -ENODEV;
+
+ sunxi_wdt->wdt_regs = device->data;
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
sunxi_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(sunxi_wdt->wdt_base))
@@ -242,12 +309,6 @@
sunxi_wdt_stop(&sunxi_wdt->wdt_dev);
}
-static const struct of_device_id sunxi_wdt_dt_ids[] = {
- { .compatible = "allwinner,sun4i-a10-wdt" },
- { /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, sunxi_wdt_dt_ids);
-
static struct platform_driver sunxi_wdt_driver = {
.probe = sunxi_wdt_probe,
.remove = sunxi_wdt_remove,
diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c
index afa9d6e..dee9c6c 100644
--- a/drivers/watchdog/ts72xx_wdt.c
+++ b/drivers/watchdog/ts72xx_wdt.c
@@ -428,11 +428,7 @@
static int ts72xx_wdt_remove(struct platform_device *pdev)
{
- int error;
-
- error = misc_deregister(&ts72xx_wdt_miscdev);
-
- return error;
+ return misc_deregister(&ts72xx_wdt_miscdev);
}
static struct platform_driver ts72xx_wdt_driver = {
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 1e0a317d..3860d02 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -167,6 +167,9 @@
static enum bp_state update_schedule(enum bp_state state)
{
+ if (state == BP_ECANCELED)
+ return BP_ECANCELED;
+
if (state == BP_DONE) {
balloon_stats.schedule_delay = 1;
balloon_stats.retry_count = 1;
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index dd9c249..95ee430 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -41,24 +41,29 @@
#endif
if (pci_seg_supported) {
- struct physdev_pci_device_add add = {
- .seg = pci_domain_nr(pci_dev->bus),
- .bus = pci_dev->bus->number,
- .devfn = pci_dev->devfn
+ struct {
+ struct physdev_pci_device_add add;
+ uint32_t pxm;
+ } add_ext = {
+ .add.seg = pci_domain_nr(pci_dev->bus),
+ .add.bus = pci_dev->bus->number,
+ .add.devfn = pci_dev->devfn
};
+ struct physdev_pci_device_add *add = &add_ext.add;
+
#ifdef CONFIG_ACPI
acpi_handle handle;
#endif
#ifdef CONFIG_PCI_IOV
if (pci_dev->is_virtfn) {
- add.flags = XEN_PCI_DEV_VIRTFN;
- add.physfn.bus = physfn->bus->number;
- add.physfn.devfn = physfn->devfn;
+ add->flags = XEN_PCI_DEV_VIRTFN;
+ add->physfn.bus = physfn->bus->number;
+ add->physfn.devfn = physfn->devfn;
} else
#endif
if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
- add.flags = XEN_PCI_DEV_EXTFN;
+ add->flags = XEN_PCI_DEV_EXTFN;
#ifdef CONFIG_ACPI
handle = ACPI_HANDLE(&pci_dev->dev);
@@ -77,8 +82,8 @@
status = acpi_evaluate_integer(handle, "_PXM",
NULL, &pxm);
if (ACPI_SUCCESS(status)) {
- add.optarr[0] = pxm;
- add.flags |= XEN_PCI_DEV_PXM;
+ add->optarr[0] = pxm;
+ add->flags |= XEN_PCI_DEV_PXM;
break;
}
status = acpi_get_parent(handle, &handle);
@@ -86,7 +91,7 @@
}
#endif /* CONFIG_ACPI */
- r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, add);
if (r != -ENOSYS)
return r;
pci_seg_supported = false;
diff --git a/fs/Kconfig b/fs/Kconfig
index db5dc15..664991a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -67,6 +67,7 @@
source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
+source "fs/overlayfs/Kconfig"
menu "Caches"
diff --git a/fs/Makefile b/fs/Makefile
index 90c8852..34a1b9de 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -104,6 +104,7 @@
obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
+obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/
obj-$(CONFIG_UDF_FS) += udf/
obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
obj-$(CONFIG_OMFS_FS) += omfs/
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8d2b76e..4399f0c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -765,23 +765,6 @@
return ret;
}
-/* copy of check_sticky in fs/namei.c()
-* It's inline, so penalty for filesystems that don't use sticky bit is
-* minimal.
-*/
-static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
-{
- kuid_t fsuid = current_fsuid();
-
- if (!(dir->i_mode & S_ISVTX))
- return 0;
- if (uid_eq(inode->i_uid, fsuid))
- return 0;
- if (uid_eq(dir->i_uid, fsuid))
- return 0;
- return !capable(CAP_FOWNER);
-}
-
/* copy of may_delete in fs/namei.c()
* Check whether we can remove a link victim from directory dir, check
* whether the type of victim is right.
@@ -817,8 +800,7 @@
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (btrfs_check_sticky(dir, victim->d_inode)||
- IS_APPEND(victim->d_inode)||
+ if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) ||
IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
return -EPERM;
if (isdir) {
diff --git a/fs/buffer.c b/fs/buffer.c
index 9614adc..6c48f20e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -993,7 +993,7 @@
*/
static int
grow_dev_page(struct block_device *bdev, sector_t block,
- pgoff_t index, int size, int sizebits)
+ pgoff_t index, int size, int sizebits, gfp_t gfp)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
@@ -1002,8 +1002,8 @@
int ret = 0; /* Will call free_more_memory() */
gfp_t gfp_mask;
- gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
- gfp_mask |= __GFP_MOVABLE;
+ gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
+
/*
* XXX: __getblk_slow() can not really deal with failure and
* will endlessly loop on improvised global reclaim. Prefer
@@ -1060,7 +1060,7 @@
* that page was dirty, the buffers are set dirty also.
*/
static int
-grow_buffers(struct block_device *bdev, sector_t block, int size)
+grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
{
pgoff_t index;
int sizebits;
@@ -1087,11 +1087,12 @@
}
/* Create a page with the proper size buffers.. */
- return grow_dev_page(bdev, block, index, size, sizebits);
+ return grow_dev_page(bdev, block, index, size, sizebits, gfp);
}
-static struct buffer_head *
-__getblk_slow(struct block_device *bdev, sector_t block, int size)
+struct buffer_head *
+__getblk_slow(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp)
{
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1113,13 +1114,14 @@
if (bh)
return bh;
- ret = grow_buffers(bdev, block, size);
+ ret = grow_buffers(bdev, block, size, gfp);
if (ret < 0)
return NULL;
if (ret == 0)
free_more_memory();
}
}
+EXPORT_SYMBOL(__getblk_slow);
/*
* The relationship between dirty buffers and dirty pages:
@@ -1373,24 +1375,25 @@
EXPORT_SYMBOL(__find_get_block);
/*
- * __getblk will locate (and, if necessary, create) the buffer_head
+ * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
* which corresponds to the passed block_device, block and size. The
* returned buffer has its reference count incremented.
*
- * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
- * attempt is failing. FIXME, perhaps?
+ * __getblk_gfp() will lock up the machine if grow_dev_page's
+ * try_to_free_buffers() attempt is failing. FIXME, perhaps?
*/
struct buffer_head *
-__getblk(struct block_device *bdev, sector_t block, unsigned size)
+__getblk_gfp(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp)
{
struct buffer_head *bh = __find_get_block(bdev, block, size);
might_sleep();
if (bh == NULL)
- bh = __getblk_slow(bdev, block, size);
+ bh = __getblk_slow(bdev, block, size, gfp);
return bh;
}
-EXPORT_SYMBOL(__getblk);
+EXPORT_SYMBOL(__getblk_gfp);
/*
* Do async read-ahead on a buffer..
@@ -1406,24 +1409,28 @@
EXPORT_SYMBOL(__breadahead);
/**
- * __bread() - reads a specified block and returns the bh
+ * __bread_gfp() - reads a specified block and returns the bh
* @bdev: the block_device to read from
* @block: number of block
* @size: size (in bytes) to read
- *
+ * @gfp: page allocation flag
+ *
* Reads a specified block, and returns buffer head that contains it.
+ * The page cache can be allocated from non-movable area
+ * not to prevent page migration if you set gfp to zero.
* It returns NULL if the block was unreadable.
*/
struct buffer_head *
-__bread(struct block_device *bdev, sector_t block, unsigned size)
+__bread_gfp(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp)
{
- struct buffer_head *bh = __getblk(bdev, block, size);
+ struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
if (likely(bh) && !buffer_uptodate(bh))
bh = __bread_slow(bh);
return bh;
}
-EXPORT_SYMBOL(__bread);
+EXPORT_SYMBOL(__bread_gfp);
/*
* invalidate_bh_lrus() is called rarely - but not only at unmount.
@@ -2082,6 +2089,7 @@
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
int i_size_changed = 0;
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2101,6 +2109,8 @@
unlock_page(page);
page_cache_release(page);
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
diff --git a/fs/dcache.c b/fs/dcache.c
index d5a23fd..3ffef7f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2673,11 +2673,13 @@
if (!IS_ROOT(new)) {
spin_unlock(&inode->i_lock);
dput(new);
+ iput(inode);
return ERR_PTR(-EIO);
}
if (d_ancestor(new, dentry)) {
spin_unlock(&inode->i_lock);
dput(new);
+ iput(inode);
return ERR_PTR(-EIO);
}
write_seqlock(&rename_lock);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1b119d3..c4cd1fd 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -566,6 +566,13 @@
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
s->s_magic = ECRYPTFS_SUPER_MAGIC;
+ s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
+
+ rc = -EINVAL;
+ if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+ pr_err("eCryptfs: maximum fs stacking depth exceeded\n");
+ goto out_free;
+ }
inode = ecryptfs_get_inode(path.dentry->d_inode, s);
rc = PTR_ERR(inode);
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 389ba83..b47c7b8 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -4,7 +4,7 @@
# Copyright (C) 2008 Panasas Inc. All rights reserved.
#
# Authors:
-# Boaz Harrosh <bharrosh@panasas.com>
+# Boaz Harrosh <ooo@electrozaur.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 3bbd469..7d88ef5 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -4,7 +4,7 @@
* Copyright (C) 2005, 2006
* Avishay Traeger (avishay@gmail.com)
* Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* Copyrights for code taken from ext2:
* Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 49f51ab..d7defd5 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -2,7 +2,7 @@
* Copyright (C) 2005, 2006
* Avishay Traeger (avishay@gmail.com)
* Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* Copyrights for code taken from ext2:
* Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index fffe86f..ad9cac6 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -2,7 +2,7 @@
* Copyright (C) 2005, 2006
* Avishay Traeger (avishay@gmail.com)
* Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* Copyrights for code taken from ext2:
* Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 71bf8e4..1a376b4 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -2,7 +2,7 @@
* Copyright (C) 2005, 2006
* Avishay Traeger (avishay@gmail.com)
* Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* Copyrights for code taken from ext2:
* Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3f9cafd..f1d3d4e 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -2,7 +2,7 @@
* Copyright (C) 2005, 2006
* Avishay Traeger (avishay@gmail.com)
* Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* Copyrights for code taken from ext2:
* Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4731fd9..2890746 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -2,7 +2,7 @@
* Copyright (C) 2005, 2006
* Avishay Traeger (avishay@gmail.com)
* Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* Copyrights for code taken from ext2:
* Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index cfc0205..7bd8ac8 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -2,7 +2,7 @@
* Copyright (C) 2005, 2006
* Avishay Traeger (avishay@gmail.com)
* Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* This file is part of exofs.
*
@@ -29,7 +29,7 @@
#include "ore_raid.h"
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
MODULE_LICENSE("GPL");
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 84529b8..27cbdb6 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* This file is part of the objects raid engine (ore).
*
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h
index cf6375d..a6e7467 100644
--- a/fs/exofs/ore_raid.h
+++ b/fs/exofs/ore_raid.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) from 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* This file is part of the objects raid engine (ore).
*
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index ed73ed8..9596550 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -2,7 +2,7 @@
* Copyright (C) 2005, 2006
* Avishay Traeger (avishay@gmail.com)
* Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* Copyrights for code taken from ext2:
* Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
index 4dd687c..832e262 100644
--- a/fs/exofs/symlink.c
+++ b/fs/exofs/symlink.c
@@ -2,7 +2,7 @@
* Copyright (C) 2005, 2006
* Avishay Traeger (avishay@gmail.com)
* Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* Copyrights for code taken from ext2:
* Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
index 1b4f2f9..5e6a2c0 100644
--- a/fs/exofs/sys.c
+++ b/fs/exofs/sys.c
@@ -1,7 +1,7 @@
/*
* Copyright (C) 2012
* Sachin Bhamare <sbhamare@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* This file is part of exofs.
*
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 581ef40..83a6f49 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -176,7 +176,7 @@
}
/* Initializes an uninitialized block bitmap */
-static void ext4_init_block_bitmap(struct super_block *sb,
+static int ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
@@ -192,7 +192,6 @@
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
- ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -205,7 +204,7 @@
count);
}
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
- return;
+ return -EIO;
}
memset(bh->b_data, 0, sb->s_blocksize);
@@ -243,6 +242,7 @@
sb->s_blocksize * 8, bh->b_data);
ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
ext4_group_desc_csum_set(sb, block_group, gdp);
+ return 0;
}
/* Return the number of free blocks in a block group. It is used when
@@ -438,11 +438,15 @@
}
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- ext4_init_block_bitmap(sb, bh, block_group, desc);
+ int err;
+
+ err = ext4_init_block_bitmap(sb, bh, block_group, desc);
set_bitmap_uptodate(bh);
set_buffer_uptodate(bh);
ext4_unlock_group(sb, block_group);
unlock_buffer(bh);
+ if (err)
+ ext4_error(sb, "Checksum bad for grp %u", block_group);
return bh;
}
ext4_unlock_group(sb, block_group);
@@ -636,8 +640,7 @@
* Account for the allocated meta blocks. We will never
* fail EDQUOT for metdata, but we do account for it.
*/
- if (!(*errp) &&
- ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
+ if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
dquot_alloc_block_nofail(inode,
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 3285aa5..b610779 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -24,8 +24,7 @@
__u32 provided, calculated;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 1;
provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
@@ -46,8 +45,7 @@
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
@@ -65,8 +63,7 @@
struct ext4_sb_info *sbi = EXT4_SB(sb);
int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 1;
provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
@@ -91,8 +88,7 @@
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 0bb3f9e..c24143e 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -151,13 +151,11 @@
&file->f_ra, file,
index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
- bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
+ bh = ext4_bread(NULL, inode, map.m_lblk, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
}
- /*
- * We ignore I/O errors on directories so users have a chance
- * of recovering data when there's a bad sector
- */
if (!bh) {
if (!dir_has_error) {
EXT4_ERROR_FILE(file, 0,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0c225c..c55a1fa 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -572,15 +572,15 @@
/*
* The bit position of these flags must not overlap with any of the
- * EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(),
+ * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(),
* read_extent_tree_block(), ext4_split_extent_at(),
* ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
* EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
* caching the extents when reading from the extent tree while a
* truncate or punch hole operation is in progress.
*/
-#define EXT4_EX_NOCACHE 0x0400
-#define EXT4_EX_FORCE_CACHE 0x0800
+#define EXT4_EX_NOCACHE 0x40000000
+#define EXT4_EX_FORCE_CACHE 0x20000000
/*
* Flags used by ext4_free_blocks
@@ -890,6 +890,7 @@
struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock;
struct list_head i_es_lru;
+ unsigned int i_es_all_nr; /* protected by i_es_lock */
unsigned int i_es_lru_nr; /* protected by i_es_lock */
unsigned long i_touch_when; /* jiffies of last accessing */
@@ -1174,6 +1175,9 @@
#define EXT4_MF_MNTDIR_SAMPLED 0x0001
#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
+/* Number of quota types we support */
+#define EXT4_MAXQUOTAS 2
+
/*
* fourth extended-fs super-block data in memory
*/
@@ -1237,7 +1241,7 @@
u32 s_min_batch_time;
struct block_device *journal_bdev;
#ifdef CONFIG_QUOTA
- char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
+ char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
@@ -1330,8 +1334,7 @@
/* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker;
struct list_head s_es_lru;
- unsigned long s_es_last_sorted;
- struct percpu_counter s_extent_cache_cnt;
+ struct ext4_es_stats s_es_stats;
struct mb_cache *s_mb_cache;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
@@ -1399,7 +1402,6 @@
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */
- EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
nolocking */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
@@ -2086,10 +2088,8 @@
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
/* inode.c */
-struct buffer_head *ext4_getblk(handle_t *, struct inode *,
- ext4_lblk_t, int, int *);
-struct buffer_head *ext4_bread(handle_t *, struct inode *,
- ext4_lblk_t, int, int *);
+struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
+struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
@@ -2109,6 +2109,7 @@
#define CONVERT_INLINE_DATA 2
extern struct inode *ext4_iget(struct super_block *, unsigned long);
+extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2332,10 +2333,18 @@
static inline int ext4_has_group_desc_csum(struct super_block *sb)
{
return EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
+ (EXT4_SB(sb)->s_chksum_driver != NULL);
}
+static inline int ext4_has_metadata_csum(struct super_block *sb)
+{
+ WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+ !EXT4_SB(sb)->s_chksum_driver);
+
+ return (EXT4_SB(sb)->s_chksum_driver != NULL);
+}
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
{
return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2731,21 +2740,26 @@
struct ext4_extent *ex1,
struct ext4_extent *ex2);
extern int ext4_ext_insert_extent(handle_t *, struct inode *,
- struct ext4_ext_path *,
+ struct ext4_ext_path **,
struct ext4_extent *, int);
-extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
- struct ext4_ext_path *,
- int flags);
+extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
+ struct ext4_ext_path **,
+ int flags);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
extern int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t lblk_start,
ext4_lblk_t lblk_end);
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
+extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
extern int ext4_ext_precache(struct inode *inode);
extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
+extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
+ struct inode *inode2, ext4_lblk_t lblk1,
+ ext4_lblk_t lblk2, ext4_lblk_t count,
+ int mark_unwritten,int *err);
/* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2755,8 +2769,6 @@
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
-extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
- struct ext4_extent **extent);
/* page-io.c */
extern int __init ext4_init_pageio(void);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a867f5c..3c93815 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -123,6 +123,7 @@
struct ext4_ext_path {
ext4_fsblk_t p_block;
__u16 p_depth;
+ __u16 p_maxdepth;
struct ext4_extent *p_ext;
struct ext4_extent_idx *p_idx;
struct ext4_extent_header *p_hdr;
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 0074e0d..3445035 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -256,8 +256,8 @@
set_buffer_prio(bh);
if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh);
- /* Errors can only happen if there is a bug */
- if (WARN_ON_ONCE(err)) {
+ /* Errors can only happen due to aborted journal or a nasty bug */
+ if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) {
ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
if (inode == NULL) {
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 17c00ff..9c5b49f 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -102,9 +102,9 @@
#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
#endif
-#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
-#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
-#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
static inline int ext4_jbd2_credits_xattr(struct inode *inode)
{
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74292a7..37043d0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -73,8 +73,7 @@
{
struct ext4_extent_tail *et;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return 1;
et = find_ext4_extent_tail(eh);
@@ -88,8 +87,7 @@
{
struct ext4_extent_tail *et;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return;
et = find_ext4_extent_tail(eh);
@@ -98,14 +96,14 @@
static int ext4_split_extent(handle_t *handle,
struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
struct ext4_map_blocks *map,
int split_flag,
int flags);
static int ext4_split_extent_at(handle_t *handle,
struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
ext4_lblk_t split,
int split_flag,
int flags);
@@ -291,6 +289,20 @@
return size;
}
+static inline int
+ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path **ppath, ext4_lblk_t lblk,
+ int nofail)
+{
+ struct ext4_ext_path *path = *ppath;
+ int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
+
+ return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
+ EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
+ EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
+ (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
+}
+
/*
* Calculate the number of metadata blocks needed
* to allocate @blocks
@@ -695,9 +707,11 @@
void ext4_ext_drop_refs(struct ext4_ext_path *path)
{
- int depth = path->p_depth;
- int i;
+ int depth, i;
+ if (!path)
+ return;
+ depth = path->p_depth;
for (i = 0; i <= depth; i++, path++)
if (path->p_bh) {
brelse(path->p_bh);
@@ -841,24 +855,32 @@
}
struct ext4_ext_path *
-ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
- struct ext4_ext_path *path, int flags)
+ext4_find_extent(struct inode *inode, ext4_lblk_t block,
+ struct ext4_ext_path **orig_path, int flags)
{
struct ext4_extent_header *eh;
struct buffer_head *bh;
- short int depth, i, ppos = 0, alloc = 0;
+ struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
+ short int depth, i, ppos = 0;
int ret;
eh = ext_inode_hdr(inode);
depth = ext_depth(inode);
- /* account possible depth increase */
+ if (path) {
+ ext4_ext_drop_refs(path);
+ if (depth > path[0].p_maxdepth) {
+ kfree(path);
+ *orig_path = path = NULL;
+ }
+ }
if (!path) {
+ /* account possible depth increase */
path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
GFP_NOFS);
- if (!path)
+ if (unlikely(!path))
return ERR_PTR(-ENOMEM);
- alloc = 1;
+ path[0].p_maxdepth = depth + 1;
}
path[0].p_hdr = eh;
path[0].p_bh = NULL;
@@ -876,7 +898,7 @@
bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
flags);
- if (IS_ERR(bh)) {
+ if (unlikely(IS_ERR(bh))) {
ret = PTR_ERR(bh);
goto err;
}
@@ -910,8 +932,9 @@
err:
ext4_ext_drop_refs(path);
- if (alloc)
- kfree(path);
+ kfree(path);
+ if (orig_path)
+ *orig_path = NULL;
return ERR_PTR(ret);
}
@@ -1238,16 +1261,24 @@
* just created block
*/
static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
- unsigned int flags,
- struct ext4_extent *newext)
+ unsigned int flags)
{
struct ext4_extent_header *neh;
struct buffer_head *bh;
- ext4_fsblk_t newblock;
+ ext4_fsblk_t newblock, goal = 0;
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
int err = 0;
- newblock = ext4_ext_new_meta_block(handle, inode, NULL,
- newext, &err, flags);
+ /* Try to prepend new index to old one */
+ if (ext_depth(inode))
+ goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
+ if (goal > le32_to_cpu(es->s_first_data_block)) {
+ flags |= EXT4_MB_HINT_TRY_GOAL;
+ goal--;
+ } else
+ goal = ext4_inode_to_goal_block(inode);
+ newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
+ NULL, &err);
if (newblock == 0)
return err;
@@ -1314,9 +1345,10 @@
static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
unsigned int mb_flags,
unsigned int gb_flags,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
struct ext4_extent *newext)
{
+ struct ext4_ext_path *path = *ppath;
struct ext4_ext_path *curp;
int depth, i, err = 0;
@@ -1340,23 +1372,21 @@
goto out;
/* refill path */
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode,
+ path = ext4_find_extent(inode,
(ext4_lblk_t)le32_to_cpu(newext->ee_block),
- path, gb_flags);
+ ppath, gb_flags);
if (IS_ERR(path))
err = PTR_ERR(path);
} else {
/* tree is full, time to grow in depth */
- err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
+ err = ext4_ext_grow_indepth(handle, inode, mb_flags);
if (err)
goto out;
/* refill path */
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode,
+ path = ext4_find_extent(inode,
(ext4_lblk_t)le32_to_cpu(newext->ee_block),
- path, gb_flags);
+ ppath, gb_flags);
if (IS_ERR(path)) {
err = PTR_ERR(path);
goto out;
@@ -1559,7 +1589,7 @@
* allocated block. Thus, index entries have to be consistent
* with leaves.
*/
-static ext4_lblk_t
+ext4_lblk_t
ext4_ext_next_allocated_block(struct ext4_ext_path *path)
{
int depth;
@@ -1802,6 +1832,7 @@
sizeof(struct ext4_extent_idx);
s += sizeof(struct ext4_extent_header);
+ path[1].p_maxdepth = path[0].p_maxdepth;
memcpy(path[0].p_hdr, path[1].p_hdr, s);
path[0].p_depth = 0;
path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
@@ -1896,9 +1927,10 @@
* creating new leaf in the no-space case.
*/
int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
struct ext4_extent *newext, int gb_flags)
{
+ struct ext4_ext_path *path = *ppath;
struct ext4_extent_header *eh;
struct ext4_extent *ex, *fex;
struct ext4_extent *nearex; /* nearest extent */
@@ -1907,6 +1939,8 @@
ext4_lblk_t next;
int mb_flags = 0, unwritten;
+ if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ mb_flags |= EXT4_MB_DELALLOC_RESERVED;
if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
return -EIO;
@@ -1925,7 +1959,7 @@
/*
* Try to see whether we should rather test the extent on
* right from ex, or from the left of ex. This is because
- * ext4_ext_find_extent() can return either extent on the
+ * ext4_find_extent() can return either extent on the
* left, or on the right from the searched position. This
* will make merging more effective.
*/
@@ -2008,7 +2042,7 @@
if (next != EXT_MAX_BLOCKS) {
ext_debug("next leaf block - %u\n", next);
BUG_ON(npath != NULL);
- npath = ext4_ext_find_extent(inode, next, NULL, 0);
+ npath = ext4_find_extent(inode, next, NULL, 0);
if (IS_ERR(npath))
return PTR_ERR(npath);
BUG_ON(npath->p_depth != path->p_depth);
@@ -2028,9 +2062,9 @@
* We're gonna add a new leaf in the tree.
*/
if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
- mb_flags = EXT4_MB_USE_RESERVED;
+ mb_flags |= EXT4_MB_USE_RESERVED;
err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
- path, newext);
+ ppath, newext);
if (err)
goto cleanup;
depth = ext_depth(inode);
@@ -2108,10 +2142,8 @@
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
cleanup:
- if (npath) {
- ext4_ext_drop_refs(npath);
- kfree(npath);
- }
+ ext4_ext_drop_refs(npath);
+ kfree(npath);
return err;
}
@@ -2133,13 +2165,7 @@
/* find extent for this block */
down_read(&EXT4_I(inode)->i_data_sem);
- if (path && ext_depth(inode) != depth) {
- /* depth was changed. we have to realloc path */
- kfree(path);
- path = NULL;
- }
-
- path = ext4_ext_find_extent(inode, block, path, 0);
+ path = ext4_find_extent(inode, block, &path, 0);
if (IS_ERR(path)) {
up_read(&EXT4_I(inode)->i_data_sem);
err = PTR_ERR(path);
@@ -2156,7 +2182,6 @@
}
ex = path[depth].p_ext;
next = ext4_ext_next_allocated_block(path);
- ext4_ext_drop_refs(path);
flags = 0;
exists = 0;
@@ -2266,11 +2291,8 @@
block = es.es_lblk + es.es_len;
}
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
-
+ ext4_ext_drop_refs(path);
+ kfree(path);
return err;
}
@@ -2826,7 +2848,7 @@
ext4_lblk_t ee_block;
/* find extent for this block */
- path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
+ path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path)) {
ext4_journal_stop(handle);
return PTR_ERR(path);
@@ -2854,24 +2876,14 @@
*/
if (end >= ee_block &&
end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
- int split_flag = 0;
-
- if (ext4_ext_is_unwritten(ex))
- split_flag = EXT4_EXT_MARK_UNWRIT1 |
- EXT4_EXT_MARK_UNWRIT2;
-
/*
* Split the extent in two so that 'end' is the last
* block in the first new extent. Also we should not
* fail removing space due to ENOSPC so try to use
* reserved block if that happens.
*/
- err = ext4_split_extent_at(handle, inode, path,
- end + 1, split_flag,
- EXT4_EX_NOCACHE |
- EXT4_GET_BLOCKS_PRE_IO |
- EXT4_GET_BLOCKS_METADATA_NOFAIL);
-
+ err = ext4_force_split_extent_at(handle, inode, &path,
+ end + 1, 1);
if (err < 0)
goto out;
}
@@ -2893,7 +2905,7 @@
ext4_journal_stop(handle);
return -ENOMEM;
}
- path[0].p_depth = depth;
+ path[0].p_maxdepth = path[0].p_depth = depth;
path[0].p_hdr = ext_inode_hdr(inode);
i = 0;
@@ -3013,10 +3025,9 @@
out:
ext4_ext_drop_refs(path);
kfree(path);
- if (err == -EAGAIN) {
- path = NULL;
+ path = NULL;
+ if (err == -EAGAIN)
goto again;
- }
ext4_journal_stop(handle);
return err;
@@ -3130,11 +3141,12 @@
*/
static int ext4_split_extent_at(handle_t *handle,
struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
ext4_lblk_t split,
int split_flag,
int flags)
{
+ struct ext4_ext_path *path = *ppath;
ext4_fsblk_t newblock;
ext4_lblk_t ee_block;
struct ext4_extent *ex, newex, orig_ex, zero_ex;
@@ -3205,7 +3217,7 @@
if (split_flag & EXT4_EXT_MARK_UNWRIT2)
ext4_ext_mark_unwritten(ex2);
- err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+ err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
if (split_flag & EXT4_EXT_DATA_VALID1) {
@@ -3271,11 +3283,12 @@
*/
static int ext4_split_extent(handle_t *handle,
struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
struct ext4_map_blocks *map,
int split_flag,
int flags)
{
+ struct ext4_ext_path *path = *ppath;
ext4_lblk_t ee_block;
struct ext4_extent *ex;
unsigned int ee_len, depth;
@@ -3298,7 +3311,7 @@
EXT4_EXT_MARK_UNWRIT2;
if (split_flag & EXT4_EXT_DATA_VALID2)
split_flag1 |= EXT4_EXT_DATA_VALID1;
- err = ext4_split_extent_at(handle, inode, path,
+ err = ext4_split_extent_at(handle, inode, ppath,
map->m_lblk + map->m_len, split_flag1, flags1);
if (err)
goto out;
@@ -3309,8 +3322,7 @@
* Update path is required because previous ext4_split_extent_at() may
* result in split of original leaf or extent zeroout.
*/
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
+ path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = ext_depth(inode);
@@ -3330,7 +3342,7 @@
split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
EXT4_EXT_MARK_UNWRIT2);
}
- err = ext4_split_extent_at(handle, inode, path,
+ err = ext4_split_extent_at(handle, inode, ppath,
map->m_lblk, split_flag1, flags);
if (err)
goto out;
@@ -3364,9 +3376,10 @@
static int ext4_ext_convert_to_initialized(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
int flags)
{
+ struct ext4_ext_path *path = *ppath;
struct ext4_sb_info *sbi;
struct ext4_extent_header *eh;
struct ext4_map_blocks split_map;
@@ -3590,7 +3603,7 @@
}
}
- allocated = ext4_split_extent(handle, inode, path,
+ allocated = ext4_split_extent(handle, inode, ppath,
&split_map, split_flag, flags);
if (allocated < 0)
err = allocated;
@@ -3629,9 +3642,10 @@
static int ext4_split_convert_extents(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
int flags)
{
+ struct ext4_ext_path *path = *ppath;
ext4_lblk_t eof_block;
ext4_lblk_t ee_block;
struct ext4_extent *ex;
@@ -3665,74 +3679,15 @@
split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
}
flags |= EXT4_GET_BLOCKS_PRE_IO;
- return ext4_split_extent(handle, inode, path, map, split_flag, flags);
+ return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
}
-static int ext4_convert_initialized_extents(handle_t *handle,
- struct inode *inode,
- struct ext4_map_blocks *map,
- struct ext4_ext_path *path)
-{
- struct ext4_extent *ex;
- ext4_lblk_t ee_block;
- unsigned int ee_len;
- int depth;
- int err = 0;
-
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
-
- ext_debug("%s: inode %lu, logical"
- "block %llu, max_blocks %u\n", __func__, inode->i_ino,
- (unsigned long long)ee_block, ee_len);
-
- if (ee_block != map->m_lblk || ee_len > map->m_len) {
- err = ext4_split_convert_extents(handle, inode, map, path,
- EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
- if (err < 0)
- goto out;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- if (!ex) {
- EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
- (unsigned long) map->m_lblk);
- err = -EIO;
- goto out;
- }
- }
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
- /* first mark the extent as unwritten */
- ext4_ext_mark_unwritten(ex);
-
- /* note: ext4_ext_correct_indexes() isn't needed here because
- * borders are not changed
- */
- ext4_ext_try_to_merge(handle, inode, path, ex);
-
- /* Mark modified extent as dirty */
- err = ext4_ext_dirty(handle, inode, path + path->p_depth);
-out:
- ext4_ext_show_leaf(inode, path);
- return err;
-}
-
-
static int ext4_convert_unwritten_extents_endio(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path *path)
+ struct ext4_ext_path **ppath)
{
+ struct ext4_ext_path *path = *ppath;
struct ext4_extent *ex;
ext4_lblk_t ee_block;
unsigned int ee_len;
@@ -3761,16 +3716,13 @@
inode->i_ino, (unsigned long long)ee_block, ee_len,
(unsigned long long)map->m_lblk, map->m_len);
#endif
- err = ext4_split_convert_extents(handle, inode, map, path,
+ err = ext4_split_convert_extents(handle, inode, map, ppath,
EXT4_GET_BLOCKS_CONVERT);
if (err < 0)
- goto out;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
+ return err;
+ path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
depth = ext_depth(inode);
ex = path[depth].p_ext;
}
@@ -3963,12 +3915,16 @@
}
static int
-ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map,
- struct ext4_ext_path *path, int flags,
- unsigned int allocated, ext4_fsblk_t newblock)
+convert_initialized_extent(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path **ppath, int flags,
+ unsigned int allocated, ext4_fsblk_t newblock)
{
- int ret = 0;
+ struct ext4_ext_path *path = *ppath;
+ struct ext4_extent *ex;
+ ext4_lblk_t ee_block;
+ unsigned int ee_len;
+ int depth;
int err = 0;
/*
@@ -3978,28 +3934,67 @@
if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
- ret = ext4_convert_initialized_extents(handle, inode, map,
- path);
- if (ret >= 0) {
- ext4_update_inode_fsync_trans(handle, inode, 1);
- err = check_eofblocks_fl(handle, inode, map->m_lblk,
- path, map->m_len);
- } else
- err = ret;
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+
+ ext_debug("%s: inode %lu, logical"
+ "block %llu, max_blocks %u\n", __func__, inode->i_ino,
+ (unsigned long long)ee_block, ee_len);
+
+ if (ee_block != map->m_lblk || ee_len > map->m_len) {
+ err = ext4_split_convert_extents(handle, inode, map, ppath,
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
+ if (err < 0)
+ return err;
+ path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ if (!ex) {
+ EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+ (unsigned long) map->m_lblk);
+ return -EIO;
+ }
+ }
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ return err;
+ /* first mark the extent as unwritten */
+ ext4_ext_mark_unwritten(ex);
+
+ /* note: ext4_ext_correct_indexes() isn't needed here because
+ * borders are not changed
+ */
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+
+ /* Mark modified extent as dirty */
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+ if (err)
+ return err;
+ ext4_ext_show_leaf(inode, path);
+
+ ext4_update_inode_fsync_trans(handle, inode, 1);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len);
+ if (err)
+ return err;
map->m_flags |= EXT4_MAP_UNWRITTEN;
if (allocated > map->m_len)
allocated = map->m_len;
map->m_len = allocated;
-
- return err ? err : allocated;
+ return allocated;
}
static int
ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path *path, int flags,
+ struct ext4_ext_path **ppath, int flags,
unsigned int allocated, ext4_fsblk_t newblock)
{
+ struct ext4_ext_path *path = *ppath;
int ret = 0;
int err = 0;
ext4_io_end_t *io = ext4_inode_aio(inode);
@@ -4021,8 +4016,8 @@
/* get_block() before submit the IO, split the extent */
if (flags & EXT4_GET_BLOCKS_PRE_IO) {
- ret = ext4_split_convert_extents(handle, inode, map,
- path, flags | EXT4_GET_BLOCKS_CONVERT);
+ ret = ext4_split_convert_extents(handle, inode, map, ppath,
+ flags | EXT4_GET_BLOCKS_CONVERT);
if (ret <= 0)
goto out;
/*
@@ -4040,7 +4035,7 @@
/* IO end_io complete, convert the filled extent to written */
if (flags & EXT4_GET_BLOCKS_CONVERT) {
ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
- path);
+ ppath);
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
err = check_eofblocks_fl(handle, inode, map->m_lblk,
@@ -4078,7 +4073,7 @@
}
/* buffered write, writepage time, convert*/
- ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags);
+ ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1);
out:
@@ -4279,7 +4274,7 @@
trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
/* find extent for this block */
- path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
+ path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
@@ -4291,7 +4286,7 @@
/*
* consistent leaf must not be empty;
* this situation is possible, though, _during_ tree modification;
- * this is why assert can't be put in ext4_ext_find_extent()
+ * this is why assert can't be put in ext4_find_extent()
*/
if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
EXT4_ERROR_INODE(inode, "bad extent address "
@@ -4331,15 +4326,15 @@
*/
if ((!ext4_ext_is_unwritten(ex)) &&
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
- allocated = ext4_ext_convert_initialized_extent(
- handle, inode, map, path, flags,
- allocated, newblock);
+ allocated = convert_initialized_extent(
+ handle, inode, map, &path,
+ flags, allocated, newblock);
goto out2;
} else if (!ext4_ext_is_unwritten(ex))
goto out;
ret = ext4_ext_handle_unwritten_extents(
- handle, inode, map, path, flags,
+ handle, inode, map, &path, flags,
allocated, newblock);
if (ret < 0)
err = ret;
@@ -4376,7 +4371,7 @@
/*
* If we are doing bigalloc, check to see if the extent returned
- * by ext4_ext_find_extent() implies a cluster we can use.
+ * by ext4_find_extent() implies a cluster we can use.
*/
if (cluster_offset && ex &&
get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
@@ -4451,6 +4446,8 @@
ar.flags = 0;
if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ar.flags |= EXT4_MB_DELALLOC_RESERVED;
newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock)
goto out2;
@@ -4486,7 +4483,7 @@
err = check_eofblocks_fl(handle, inode, map->m_lblk,
path, ar.len);
if (!err)
- err = ext4_ext_insert_extent(handle, inode, path,
+ err = ext4_ext_insert_extent(handle, inode, &path,
&newex, flags);
if (!err && set_unwritten) {
@@ -4619,10 +4616,8 @@
map->m_pblk = newblock;
map->m_len = allocated;
out2:
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
+ ext4_ext_drop_refs(path);
+ kfree(path);
trace_ext4_ext_map_blocks_exit(inode, flags, map,
err ? err : allocated);
@@ -4799,7 +4794,8 @@
max_blocks -= lblk;
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
- EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+ EXT4_EX_NOCACHE;
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
@@ -4837,17 +4833,23 @@
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
- /*
- * Remove entire range from the extent status tree.
- */
- ret = ext4_es_remove_extent(inode, lblk, max_blocks);
- if (ret)
- goto out_dio;
-
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
if (ret)
goto out_dio;
+ /*
+ * Remove entire range from the extent status tree.
+ *
+ * ext4_es_remove_extent(inode, lblk, max_blocks) is
+ * NOT sufficient. I'm not sure why this is the case,
+ * but let's be conservative and remove the extent
+ * status tree for the entire inode. There should be
+ * no outstanding delalloc extents thanks to the
+ * filemap_write_and_wait_range() call above.
+ */
+ ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
+ if (ret)
+ goto out_dio;
}
if (!partial_begin && !partial_end)
goto out_dio;
@@ -5304,36 +5306,31 @@
struct ext4_ext_path *path;
int ret = 0, depth;
struct ext4_extent *extent;
- ext4_lblk_t stop_block, current_block;
+ ext4_lblk_t stop_block;
ext4_lblk_t ex_start, ex_end;
/* Let path point to the last extent */
- path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
extent = path[depth].p_ext;
- if (!extent) {
- ext4_ext_drop_refs(path);
- kfree(path);
- return ret;
- }
+ if (!extent)
+ goto out;
stop_block = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
- ext4_ext_drop_refs(path);
- kfree(path);
/* Nothing to shift, if hole is at the end of file */
if (start >= stop_block)
- return ret;
+ goto out;
/*
* Don't start shifting extents until we make sure the hole is big
* enough to accomodate the shift.
*/
- path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+ path = ext4_find_extent(inode, start - 1, &path, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5346,8 +5343,6 @@
ex_start = 0;
ex_end = 0;
}
- ext4_ext_drop_refs(path);
- kfree(path);
if ((start == ex_start && shift > ex_start) ||
(shift > start - ex_end))
@@ -5355,7 +5350,7 @@
/* Its safe to start updating extents */
while (start < stop_block) {
- path = ext4_ext_find_extent(inode, start, NULL, 0);
+ path = ext4_find_extent(inode, start, &path, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5365,27 +5360,23 @@
(unsigned long) start);
return -EIO;
}
-
- current_block = le32_to_cpu(extent->ee_block);
- if (start > current_block) {
+ if (start > le32_to_cpu(extent->ee_block)) {
/* Hole, move to the next extent */
- ret = mext_next_extent(inode, path, &extent);
- if (ret != 0) {
- ext4_ext_drop_refs(path);
- kfree(path);
- if (ret == 1)
- ret = 0;
- break;
+ if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
+ path[depth].p_ext++;
+ } else {
+ start = ext4_ext_next_allocated_block(path);
+ continue;
}
}
ret = ext4_ext_shift_path_extents(path, shift, inode,
handle, &start);
- ext4_ext_drop_refs(path);
- kfree(path);
if (ret)
break;
}
-
+out:
+ ext4_ext_drop_refs(path);
+ kfree(path);
return ret;
}
@@ -5508,3 +5499,199 @@
mutex_unlock(&inode->i_mutex);
return ret;
}
+
+/**
+ * ext4_swap_extents - Swap extents between two inodes
+ *
+ * @inode1: First inode
+ * @inode2: Second inode
+ * @lblk1: Start block for first inode
+ * @lblk2: Start block for second inode
+ * @count: Number of blocks to swap
+ * @mark_unwritten: Mark second inode's extents as unwritten after swap
+ * @erp: Pointer to save error value
+ *
+ * This helper routine does exactly what is promise "swap extents". All other
+ * stuff such as page-cache locking consistency, bh mapping consistency or
+ * extent's data copying must be performed by caller.
+ * Locking:
+ * i_mutex is held for both inodes
+ * i_data_sem is locked for write for both inodes
+ * Assumptions:
+ * All pages from requested range are locked for both inodes
+ */
+int
+ext4_swap_extents(handle_t *handle, struct inode *inode1,
+ struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
+ ext4_lblk_t count, int unwritten, int *erp)
+{
+ struct ext4_ext_path *path1 = NULL;
+ struct ext4_ext_path *path2 = NULL;
+ int replaced_count = 0;
+
+ BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
+ BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
+ BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+ BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+
+ *erp = ext4_es_remove_extent(inode1, lblk1, count);
+ if (unlikely(*erp))
+ return 0;
+ *erp = ext4_es_remove_extent(inode2, lblk2, count);
+ if (unlikely(*erp))
+ return 0;
+
+ while (count) {
+ struct ext4_extent *ex1, *ex2, tmp_ex;
+ ext4_lblk_t e1_blk, e2_blk;
+ int e1_len, e2_len, len;
+ int split = 0;
+
+ path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
+ if (unlikely(IS_ERR(path1))) {
+ *erp = PTR_ERR(path1);
+ path1 = NULL;
+ finish:
+ count = 0;
+ goto repeat;
+ }
+ path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
+ if (unlikely(IS_ERR(path2))) {
+ *erp = PTR_ERR(path2);
+ path2 = NULL;
+ goto finish;
+ }
+ ex1 = path1[path1->p_depth].p_ext;
+ ex2 = path2[path2->p_depth].p_ext;
+ /* Do we have somthing to swap ? */
+ if (unlikely(!ex2 || !ex1))
+ goto finish;
+
+ e1_blk = le32_to_cpu(ex1->ee_block);
+ e2_blk = le32_to_cpu(ex2->ee_block);
+ e1_len = ext4_ext_get_actual_len(ex1);
+ e2_len = ext4_ext_get_actual_len(ex2);
+
+ /* Hole handling */
+ if (!in_range(lblk1, e1_blk, e1_len) ||
+ !in_range(lblk2, e2_blk, e2_len)) {
+ ext4_lblk_t next1, next2;
+
+ /* if hole after extent, then go to next extent */
+ next1 = ext4_ext_next_allocated_block(path1);
+ next2 = ext4_ext_next_allocated_block(path2);
+ /* If hole before extent, then shift to that extent */
+ if (e1_blk > lblk1)
+ next1 = e1_blk;
+ if (e2_blk > lblk2)
+ next2 = e1_blk;
+ /* Do we have something to swap */
+ if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
+ goto finish;
+ /* Move to the rightest boundary */
+ len = next1 - lblk1;
+ if (len < next2 - lblk2)
+ len = next2 - lblk2;
+ if (len > count)
+ len = count;
+ lblk1 += len;
+ lblk2 += len;
+ count -= len;
+ goto repeat;
+ }
+
+ /* Prepare left boundary */
+ if (e1_blk < lblk1) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode1,
+ &path1, lblk1, 0);
+ if (unlikely(*erp))
+ goto finish;
+ }
+ if (e2_blk < lblk2) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode2,
+ &path2, lblk2, 0);
+ if (unlikely(*erp))
+ goto finish;
+ }
+ /* ext4_split_extent_at() may result in leaf extent split,
+ * path must to be revalidated. */
+ if (split)
+ goto repeat;
+
+ /* Prepare right boundary */
+ len = count;
+ if (len > e1_blk + e1_len - lblk1)
+ len = e1_blk + e1_len - lblk1;
+ if (len > e2_blk + e2_len - lblk2)
+ len = e2_blk + e2_len - lblk2;
+
+ if (len != e1_len) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode1,
+ &path1, lblk1 + len, 0);
+ if (unlikely(*erp))
+ goto finish;
+ }
+ if (len != e2_len) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode2,
+ &path2, lblk2 + len, 0);
+ if (*erp)
+ goto finish;
+ }
+ /* ext4_split_extent_at() may result in leaf extent split,
+ * path must to be revalidated. */
+ if (split)
+ goto repeat;
+
+ BUG_ON(e2_len != e1_len);
+ *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
+ if (unlikely(*erp))
+ goto finish;
+ *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
+ if (unlikely(*erp))
+ goto finish;
+
+ /* Both extents are fully inside boundaries. Swap it now */
+ tmp_ex = *ex1;
+ ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
+ ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
+ ex1->ee_len = cpu_to_le16(e2_len);
+ ex2->ee_len = cpu_to_le16(e1_len);
+ if (unwritten)
+ ext4_ext_mark_unwritten(ex2);
+ if (ext4_ext_is_unwritten(&tmp_ex))
+ ext4_ext_mark_unwritten(ex1);
+
+ ext4_ext_try_to_merge(handle, inode2, path2, ex2);
+ ext4_ext_try_to_merge(handle, inode1, path1, ex1);
+ *erp = ext4_ext_dirty(handle, inode2, path2 +
+ path2->p_depth);
+ if (unlikely(*erp))
+ goto finish;
+ *erp = ext4_ext_dirty(handle, inode1, path1 +
+ path1->p_depth);
+ /*
+ * Looks scarry ah..? second inode already points to new blocks,
+ * and it was successfully dirtied. But luckily error may happen
+ * only due to journal error, so full transaction will be
+ * aborted anyway.
+ */
+ if (unlikely(*erp))
+ goto finish;
+ lblk1 += len;
+ lblk2 += len;
+ replaced_count += len;
+ count -= len;
+
+ repeat:
+ ext4_ext_drop_refs(path1);
+ kfree(path1);
+ ext4_ext_drop_refs(path2);
+ kfree(path2);
+ path1 = path2 = NULL;
+ }
+ return replaced_count;
+}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0b7e28e..94e7855 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -11,6 +11,8 @@
*/
#include <linux/rbtree.h>
#include <linux/list_sort.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include "ext4.h"
#include "extents_status.h"
@@ -313,19 +315,27 @@
*/
if (!ext4_es_is_delayed(es)) {
EXT4_I(inode)->i_es_lru_nr++;
- percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+ percpu_counter_inc(&EXT4_SB(inode->i_sb)->
+ s_es_stats.es_stats_lru_cnt);
}
+ EXT4_I(inode)->i_es_all_nr++;
+ percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
+
return es;
}
static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
{
+ EXT4_I(inode)->i_es_all_nr--;
+ percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
+
/* Decrease the lru counter when this es is not delayed */
if (!ext4_es_is_delayed(es)) {
BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
EXT4_I(inode)->i_es_lru_nr--;
- percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+ percpu_counter_dec(&EXT4_SB(inode->i_sb)->
+ s_es_stats.es_stats_lru_cnt);
}
kmem_cache_free(ext4_es_cachep, es);
@@ -426,7 +436,7 @@
unsigned short ee_len;
int depth, ee_status, es_status;
- path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
+ path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path))
return;
@@ -499,10 +509,8 @@
}
}
out:
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
+ ext4_ext_drop_refs(path);
+ kfree(path);
}
static void ext4_es_insert_extent_ind_check(struct inode *inode,
@@ -731,6 +739,7 @@
struct extent_status *es)
{
struct ext4_es_tree *tree;
+ struct ext4_es_stats *stats;
struct extent_status *es1 = NULL;
struct rb_node *node;
int found = 0;
@@ -767,11 +776,15 @@
}
out:
+ stats = &EXT4_SB(inode->i_sb)->s_es_stats;
if (found) {
BUG_ON(!es1);
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
+ stats->es_stats_cache_hits++;
+ } else {
+ stats->es_stats_cache_misses++;
}
read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -933,11 +946,16 @@
struct ext4_inode_info *locked_ei)
{
struct ext4_inode_info *ei;
+ struct ext4_es_stats *es_stats;
struct list_head *cur, *tmp;
LIST_HEAD(skipped);
+ ktime_t start_time;
+ u64 scan_time;
int nr_shrunk = 0;
int retried = 0, skip_precached = 1, nr_skipped = 0;
+ es_stats = &sbi->s_es_stats;
+ start_time = ktime_get();
spin_lock(&sbi->s_es_lru_lock);
retry:
@@ -948,7 +966,8 @@
* If we have already reclaimed all extents from extent
* status tree, just stop the loop immediately.
*/
- if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0)
+ if (percpu_counter_read_positive(
+ &es_stats->es_stats_lru_cnt) == 0)
break;
ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
@@ -958,7 +977,7 @@
* time. Normally we try hard to avoid shrinking
* precached inodes, but we will as a last resort.
*/
- if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
+ if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
(skip_precached && ext4_test_inode_state(&ei->vfs_inode,
EXT4_STATE_EXT_PRECACHED))) {
nr_skipped++;
@@ -992,7 +1011,7 @@
if ((nr_shrunk == 0) && nr_skipped && !retried) {
retried++;
list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
- sbi->s_es_last_sorted = jiffies;
+ es_stats->es_stats_last_sorted = jiffies;
ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
i_es_lru);
/*
@@ -1010,6 +1029,22 @@
if (locked_ei && nr_shrunk == 0)
nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
+ scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
+ if (likely(es_stats->es_stats_scan_time))
+ es_stats->es_stats_scan_time = (scan_time +
+ es_stats->es_stats_scan_time*3) / 4;
+ else
+ es_stats->es_stats_scan_time = scan_time;
+ if (scan_time > es_stats->es_stats_max_scan_time)
+ es_stats->es_stats_max_scan_time = scan_time;
+ if (likely(es_stats->es_stats_shrunk))
+ es_stats->es_stats_shrunk = (nr_shrunk +
+ es_stats->es_stats_shrunk*3) / 4;
+ else
+ es_stats->es_stats_shrunk = nr_shrunk;
+
+ trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
+ nr_skipped, retried);
return nr_shrunk;
}
@@ -1020,8 +1055,8 @@
struct ext4_sb_info *sbi;
sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
- nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
- trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
+ nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
+ trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
return nr;
}
@@ -1033,31 +1068,160 @@
int nr_to_scan = sc->nr_to_scan;
int ret, nr_shrunk;
- ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
- trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
+ trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
if (!nr_to_scan)
return ret;
nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
- trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
+ trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
return nr_shrunk;
}
-void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
+static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos)
{
+ return *pos ? NULL : SEQ_START_TOKEN;
+}
+
+static void *
+ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ return NULL;
+}
+
+static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
+{
+ struct ext4_sb_info *sbi = seq->private;
+ struct ext4_es_stats *es_stats = &sbi->s_es_stats;
+ struct ext4_inode_info *ei, *max = NULL;
+ unsigned int inode_cnt = 0;
+
+ if (v != SEQ_START_TOKEN)
+ return 0;
+
+ /* here we just find an inode that has the max nr. of objects */
+ spin_lock(&sbi->s_es_lru_lock);
+ list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) {
+ inode_cnt++;
+ if (max && max->i_es_all_nr < ei->i_es_all_nr)
+ max = ei;
+ else if (!max)
+ max = ei;
+ }
+ spin_unlock(&sbi->s_es_lru_lock);
+
+ seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
+ percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
+ percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt));
+ seq_printf(seq, " %lu/%lu cache hits/misses\n",
+ es_stats->es_stats_cache_hits,
+ es_stats->es_stats_cache_misses);
+ if (es_stats->es_stats_last_sorted != 0)
+ seq_printf(seq, " %u ms last sorted interval\n",
+ jiffies_to_msecs(jiffies -
+ es_stats->es_stats_last_sorted));
+ if (inode_cnt)
+ seq_printf(seq, " %d inodes on lru list\n", inode_cnt);
+
+ seq_printf(seq, "average:\n %llu us scan time\n",
+ div_u64(es_stats->es_stats_scan_time, 1000));
+ seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk);
+ if (inode_cnt)
+ seq_printf(seq,
+ "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
+ " %llu us max scan time\n",
+ max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr,
+ div_u64(es_stats->es_stats_max_scan_time, 1000));
+
+ return 0;
+}
+
+static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v)
+{
+}
+
+static const struct seq_operations ext4_es_seq_shrinker_info_ops = {
+ .start = ext4_es_seq_shrinker_info_start,
+ .next = ext4_es_seq_shrinker_info_next,
+ .stop = ext4_es_seq_shrinker_info_stop,
+ .show = ext4_es_seq_shrinker_info_show,
+};
+
+static int
+ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = seq_open(file, &ext4_es_seq_shrinker_info_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = PDE_DATA(inode);
+ }
+
+ return ret;
+}
+
+static int
+ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file)
+{
+ return seq_release(inode, file);
+}
+
+static const struct file_operations ext4_es_seq_shrinker_info_fops = {
+ .owner = THIS_MODULE,
+ .open = ext4_es_seq_shrinker_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = ext4_es_seq_shrinker_info_release,
+};
+
+int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
+{
+ int err;
+
INIT_LIST_HEAD(&sbi->s_es_lru);
spin_lock_init(&sbi->s_es_lru_lock);
- sbi->s_es_last_sorted = 0;
+ sbi->s_es_stats.es_stats_last_sorted = 0;
+ sbi->s_es_stats.es_stats_shrunk = 0;
+ sbi->s_es_stats.es_stats_cache_hits = 0;
+ sbi->s_es_stats.es_stats_cache_misses = 0;
+ sbi->s_es_stats.es_stats_scan_time = 0;
+ sbi->s_es_stats.es_stats_max_scan_time = 0;
+ err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
+ if (err)
+ return err;
+ err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL);
+ if (err)
+ goto err1;
+
sbi->s_es_shrinker.scan_objects = ext4_es_scan;
sbi->s_es_shrinker.count_objects = ext4_es_count;
sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
- register_shrinker(&sbi->s_es_shrinker);
+ err = register_shrinker(&sbi->s_es_shrinker);
+ if (err)
+ goto err2;
+
+ if (sbi->s_proc)
+ proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc,
+ &ext4_es_seq_shrinker_info_fops, sbi);
+
+ return 0;
+
+err2:
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
+err1:
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
+ return err;
}
void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
{
+ if (sbi->s_proc)
+ remove_proc_entry("es_shrinker_info", sbi->s_proc);
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
unregister_shrinker(&sbi->s_es_shrinker);
}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f1b62a4..efd5f97 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -64,6 +64,17 @@
struct extent_status *cache_es; /* recently accessed extent */
};
+struct ext4_es_stats {
+ unsigned long es_stats_last_sorted;
+ unsigned long es_stats_shrunk;
+ unsigned long es_stats_cache_hits;
+ unsigned long es_stats_cache_misses;
+ u64 es_stats_scan_time;
+ u64 es_stats_max_scan_time;
+ struct percpu_counter es_stats_all_cnt;
+ struct percpu_counter es_stats_lru_cnt;
+};
+
extern int __init ext4_init_es(void);
extern void ext4_exit_es(void);
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
@@ -138,7 +149,7 @@
(pb & ~ES_MASK));
}
-extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi);
+extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
extern void ext4_es_lru_add(struct inode *inode);
extern void ext4_es_lru_del(struct inode *inode);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 5b87fc3..8012a5d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1011,8 +1011,7 @@
spin_unlock(&sbi->s_next_gen_lock);
/* Precompute checksum seed for inode metadata */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+ if (ext4_has_metadata_csum(sb)) {
__u32 csum;
__le32 inum = cpu_to_le32(inode->i_ino);
__le32 gen = cpu_to_le32(inode->i_generation);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index e75f840..36b3696 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -318,34 +318,24 @@
* ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
* as described above and return 0.
*/
-static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
- ext4_lblk_t iblock, int indirect_blks,
- int *blks, ext4_fsblk_t goal,
- ext4_lblk_t *offsets, Indirect *branch)
+static int ext4_alloc_branch(handle_t *handle,
+ struct ext4_allocation_request *ar,
+ int indirect_blks, ext4_lblk_t *offsets,
+ Indirect *branch)
{
- struct ext4_allocation_request ar;
struct buffer_head * bh;
ext4_fsblk_t b, new_blocks[4];
__le32 *p;
int i, j, err, len = 1;
- /*
- * Set up for the direct block allocation
- */
- memset(&ar, 0, sizeof(ar));
- ar.inode = inode;
- ar.len = *blks;
- ar.logical = iblock;
- if (S_ISREG(inode->i_mode))
- ar.flags = EXT4_MB_HINT_DATA;
-
for (i = 0; i <= indirect_blks; i++) {
if (i == indirect_blks) {
- ar.goal = goal;
- new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
+ new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
} else
- goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
- goal, 0, NULL, &err);
+ ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
+ ar->inode, ar->goal,
+ ar->flags & EXT4_MB_DELALLOC_RESERVED,
+ NULL, &err);
if (err) {
i--;
goto failed;
@@ -354,7 +344,7 @@
if (i == 0)
continue;
- bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
+ bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
if (unlikely(!bh)) {
err = -ENOMEM;
goto failed;
@@ -372,7 +362,7 @@
b = new_blocks[i];
if (i == indirect_blks)
- len = ar.len;
+ len = ar->len;
for (j = 0; j < len; j++)
*p++ = cpu_to_le32(b++);
@@ -381,11 +371,10 @@
unlock_buffer(bh);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, bh);
+ err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
if (err)
goto failed;
}
- *blks = ar.len;
return 0;
failed:
for (; i >= 0; i--) {
@@ -396,10 +385,10 @@
* existing before ext4_alloc_branch() was called.
*/
if (i > 0 && i != indirect_blks && branch[i].bh)
- ext4_forget(handle, 1, inode, branch[i].bh,
+ ext4_forget(handle, 1, ar->inode, branch[i].bh,
branch[i].bh->b_blocknr);
- ext4_free_blocks(handle, inode, NULL, new_blocks[i],
- (i == indirect_blks) ? ar.len : 1, 0);
+ ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
+ (i == indirect_blks) ? ar->len : 1, 0);
}
return err;
}
@@ -419,9 +408,9 @@
* inode (->i_blocks, etc.). In case of success we end up with the full
* chain to new block and return 0.
*/
-static int ext4_splice_branch(handle_t *handle, struct inode *inode,
- ext4_lblk_t block, Indirect *where, int num,
- int blks)
+static int ext4_splice_branch(handle_t *handle,
+ struct ext4_allocation_request *ar,
+ Indirect *where, int num)
{
int i;
int err = 0;
@@ -446,9 +435,9 @@
* Update the host buffer_head or inode to point to more just allocated
* direct blocks blocks
*/
- if (num == 0 && blks > 1) {
+ if (num == 0 && ar->len > 1) {
current_block = le32_to_cpu(where->key) + 1;
- for (i = 1; i < blks; i++)
+ for (i = 1; i < ar->len; i++)
*(where->p + i) = cpu_to_le32(current_block++);
}
@@ -465,14 +454,14 @@
*/
jbd_debug(5, "splicing indirect only\n");
BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, where->bh);
+ err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
if (err)
goto err_out;
} else {
/*
* OK, we spliced it into the inode itself on a direct block.
*/
- ext4_mark_inode_dirty(handle, inode);
+ ext4_mark_inode_dirty(handle, ar->inode);
jbd_debug(5, "splicing direct\n");
}
return err;
@@ -484,11 +473,11 @@
* need to revoke the block, which is why we don't
* need to set EXT4_FREE_BLOCKS_METADATA.
*/
- ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
+ ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
EXT4_FREE_BLOCKS_FORGET);
}
- ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
- blks, 0);
+ ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
+ ar->len, 0);
return err;
}
@@ -525,11 +514,11 @@
struct ext4_map_blocks *map,
int flags)
{
+ struct ext4_allocation_request ar;
int err = -EIO;
ext4_lblk_t offsets[4];
Indirect chain[4];
Indirect *partial;
- ext4_fsblk_t goal;
int indirect_blks;
int blocks_to_boundary = 0;
int depth;
@@ -579,7 +568,16 @@
return -ENOSPC;
}
- goal = ext4_find_goal(inode, map->m_lblk, partial);
+ /* Set up for the direct block allocation */
+ memset(&ar, 0, sizeof(ar));
+ ar.inode = inode;
+ ar.logical = map->m_lblk;
+ if (S_ISREG(inode->i_mode))
+ ar.flags = EXT4_MB_HINT_DATA;
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+
+ ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
/* the number of blocks need to allocate for [d,t]indirect blocks */
indirect_blks = (chain + depth) - partial - 1;
@@ -588,13 +586,13 @@
* Next look up the indirect map to count the totoal number of
* direct blocks to allocate for this branch.
*/
- count = ext4_blks_to_allocate(partial, indirect_blks,
- map->m_len, blocks_to_boundary);
+ ar.len = ext4_blks_to_allocate(partial, indirect_blks,
+ map->m_len, blocks_to_boundary);
+
/*
* Block out ext4_truncate while we alter the tree
*/
- err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
- &count, goal,
+ err = ext4_alloc_branch(handle, &ar, indirect_blks,
offsets + (partial - chain), partial);
/*
@@ -605,14 +603,14 @@
* may need to return -EAGAIN upwards in the worst case. --sct
*/
if (!err)
- err = ext4_splice_branch(handle, inode, map->m_lblk,
- partial, indirect_blks, count);
+ err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
if (err)
goto cleanup;
map->m_flags |= EXT4_MAP_NEW;
ext4_update_inode_fsync_trans(handle, inode, 1);
+ count = ar.len;
got_it:
map->m_flags |= EXT4_MAP_MAPPED;
map->m_pblk = le32_to_cpu(chain[depth-1].key);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index bea662b..3ea6269 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -594,6 +594,7 @@
if (ret) {
unlock_page(page);
page_cache_release(page);
+ page = NULL;
ext4_orphan_add(handle, inode);
up_write(&EXT4_I(inode)->xattr_sem);
sem_held = 0;
@@ -613,7 +614,8 @@
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
- block_commit_write(page, from, to);
+ if (page)
+ block_commit_write(page, from, to);
out:
if (page) {
unlock_page(page);
@@ -1126,8 +1128,7 @@
memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
inline_size - EXT4_INLINE_DOTDOT_SIZE);
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
inode->i_size = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3aa26e9..e9777f9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -83,8 +83,7 @@
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_LINUX) ||
- !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ !ext4_has_metadata_csum(inode->i_sb))
return 1;
provided = le16_to_cpu(raw->i_checksum_lo);
@@ -105,8 +104,7 @@
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_LINUX) ||
- !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ !ext4_has_metadata_csum(inode->i_sb))
return;
csum = ext4_inode_csum(inode, raw, ei);
@@ -224,16 +222,15 @@
goto no_delete;
}
- if (!is_bad_inode(inode))
- dquot_initialize(inode);
+ if (is_bad_inode(inode))
+ goto no_delete;
+ dquot_initialize(inode);
if (ext4_should_order_data(inode))
ext4_begin_ordered_truncate(inode, 0);
truncate_inode_pages_final(&inode->i_data);
WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
- if (is_bad_inode(inode))
- goto no_delete;
/*
* Protect us against freezing - iput() caller didn't have to have any
@@ -590,20 +587,12 @@
/*
* New blocks allocate and/or writing to unwritten extent
* will possibly result in updating i_data, so we take
- * the write lock of i_data_sem, and call get_blocks()
+ * the write lock of i_data_sem, and call get_block()
* with create == 1 flag.
*/
down_write(&EXT4_I(inode)->i_data_sem);
/*
- * if the caller is from delayed allocation writeout path
- * we have already reserved fs blocks for allocation
- * let the underlying get_block() function know to
- * avoid double accounting
- */
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
- ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
- /*
* We need to check for EXT4 here because migrate
* could have changed the inode type in between
*/
@@ -631,8 +620,6 @@
(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
ext4_da_update_reserve_space(inode, retval, 1);
}
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
- ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
if (retval > 0) {
unsigned int status;
@@ -734,11 +721,11 @@
* `handle' can be NULL if create is zero
*/
struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
- ext4_lblk_t block, int create, int *errp)
+ ext4_lblk_t block, int create)
{
struct ext4_map_blocks map;
struct buffer_head *bh;
- int fatal = 0, err;
+ int err;
J_ASSERT(handle != NULL || create == 0);
@@ -747,21 +734,14 @@
err = ext4_map_blocks(handle, inode, &map,
create ? EXT4_GET_BLOCKS_CREATE : 0);
- /* ensure we send some value back into *errp */
- *errp = 0;
-
- if (create && err == 0)
- err = -ENOSPC; /* should never happen */
+ if (err == 0)
+ return create ? ERR_PTR(-ENOSPC) : NULL;
if (err < 0)
- *errp = err;
- if (err <= 0)
- return NULL;
+ return ERR_PTR(err);
bh = sb_getblk(inode->i_sb, map.m_pblk);
- if (unlikely(!bh)) {
- *errp = -ENOMEM;
- return NULL;
- }
+ if (unlikely(!bh))
+ return ERR_PTR(-ENOMEM);
if (map.m_flags & EXT4_MAP_NEW) {
J_ASSERT(create != 0);
J_ASSERT(handle != NULL);
@@ -775,44 +755,44 @@
*/
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
- fatal = ext4_journal_get_create_access(handle, bh);
- if (!fatal && !buffer_uptodate(bh)) {
+ err = ext4_journal_get_create_access(handle, bh);
+ if (unlikely(err)) {
+ unlock_buffer(bh);
+ goto errout;
+ }
+ if (!buffer_uptodate(bh)) {
memset(bh->b_data, 0, inode->i_sb->s_blocksize);
set_buffer_uptodate(bh);
}
unlock_buffer(bh);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (!fatal)
- fatal = err;
- } else {
+ if (unlikely(err))
+ goto errout;
+ } else
BUFFER_TRACE(bh, "not a new buffer");
- }
- if (fatal) {
- *errp = fatal;
- brelse(bh);
- bh = NULL;
- }
return bh;
+errout:
+ brelse(bh);
+ return ERR_PTR(err);
}
struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
- ext4_lblk_t block, int create, int *err)
+ ext4_lblk_t block, int create)
{
struct buffer_head *bh;
- bh = ext4_getblk(handle, inode, block, create, err);
- if (!bh)
+ bh = ext4_getblk(handle, inode, block, create);
+ if (IS_ERR(bh))
return bh;
- if (buffer_uptodate(bh))
+ if (!bh || buffer_uptodate(bh))
return bh;
ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
put_bh(bh);
- *err = -EIO;
- return NULL;
+ return ERR_PTR(-EIO);
}
int ext4_walk_page_buffers(handle_t *handle,
@@ -1536,7 +1516,7 @@
}
/*
- * This is a special get_blocks_t callback which is used by
+ * This is a special get_block_t callback which is used by
* ext4_da_write_begin(). It will either return mapped block or
* reserve space for a single block.
*
@@ -2011,12 +1991,10 @@
* in data loss. So use reserved blocks to allocate metadata if
* possible.
*
- * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks
- * in question are delalloc blocks. This affects functions in many
- * different parts of the allocation call path. This flag exists
- * primarily because we don't want to change *many* call functions, so
- * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag
- * once the inode's allocation semaphore is taken.
+ * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
+ * the blocks in question are delalloc blocks. This indicates
+ * that the blocks and quotas has already been checked when
+ * the data was copied into the page cache.
*/
get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL;
@@ -2515,6 +2493,20 @@
return 0;
}
+/* We always reserve for an inode update; the superblock could be there too */
+static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
+{
+ if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
+ return 1;
+
+ if (pos + len <= 0x7fffffffULL)
+ return 1;
+
+ /* We might need to update the superblock to set LARGE_FILE */
+ return 2;
+}
+
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -2565,7 +2557,8 @@
* of file which has an already mapped buffer.
*/
retry_journal:
- handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
+ handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+ ext4_da_write_credits(inode, pos, len));
if (IS_ERR(handle)) {
page_cache_release(page);
return PTR_ERR(handle);
@@ -2658,10 +2651,7 @@
if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
if (ext4_has_inline_data(inode) ||
ext4_da_should_update_i_disksize(page, end)) {
- down_write(&EXT4_I(inode)->i_data_sem);
- if (new_i_size > EXT4_I(inode)->i_disksize)
- EXT4_I(inode)->i_disksize = new_i_size;
- up_write(&EXT4_I(inode)->i_data_sem);
+ ext4_update_i_disksize(inode, new_i_size);
/* We need to mark inode dirty even if
* new_i_size is less that inode->i_size
* bu greater than i_disksize.(hint delalloc)
@@ -3936,8 +3926,7 @@
ei->i_extra_isize = 0;
/* Precompute checksum seed for inode metadata */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+ if (ext4_has_metadata_csum(sb)) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
__u32 csum;
__le32 inum = cpu_to_le32(inode->i_ino);
@@ -4127,6 +4116,13 @@
return ERR_PTR(ret);
}
+struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
+{
+ if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
+ return ERR_PTR(-EIO);
+ return ext4_iget(sb, ino);
+}
+
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
@@ -4226,7 +4222,8 @@
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
- if (ext4_inode_blocks_set(handle, raw_inode, ei)) {
+ err = ext4_inode_blocks_set(handle, raw_inode, ei);
+ if (err) {
spin_unlock(&ei->i_raw_lock);
goto out_brelse;
}
@@ -4536,8 +4533,12 @@
ext4_orphan_del(NULL, inode);
goto err_out;
}
- } else
+ } else {
+ loff_t oldsize = inode->i_size;
+
i_size_write(inode, attr->ia_size);
+ pagecache_isize_extended(inode, oldsize, inode->i_size);
+ }
/*
* Blocks are going to be removed from the inode. Wait
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0f2252e..bfda18a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -331,8 +331,7 @@
if (!inode_owner_or_capable(inode))
return -EPERM;
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+ if (ext4_has_metadata_csum(inode->i_sb)) {
ext4_warning(sb, "Setting inode version is not "
"supported with metadata_csum enabled.");
return -ENOTTY;
@@ -532,9 +531,17 @@
}
case EXT4_IOC_SWAP_BOOT:
+ {
+ int err;
if (!(filp->f_mode & FMODE_WRITE))
return -EBADF;
- return swap_inode_boot_loader(sb, inode);
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+ err = swap_inode_boot_loader(sb, inode);
+ mnt_drop_write_file(filp);
+ return err;
+ }
case EXT4_IOC_RESIZE_FS: {
ext4_fsblk_t n_blocks_count;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 748c913..dbfe15c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3155,9 +3155,8 @@
"start %lu, size %lu, fe_logical %lu",
(unsigned long) start, (unsigned long) size,
(unsigned long) ac->ac_o_ex.fe_logical);
+ BUG();
}
- BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
- start > ac->ac_o_ex.fe_logical);
BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
/* now prepare goal request */
@@ -4410,14 +4409,7 @@
if (IS_NOQUOTA(ar->inode))
ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
- /*
- * For delayed allocation, we could skip the ENOSPC and
- * EDQUOT check, as blocks and quotas have been already
- * reserved when data being copied into pagecache.
- */
- if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
- ar->flags |= EXT4_MB_DELALLOC_RESERVED;
- else {
+ if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
/* Without delayed allocation we need to verify
* there is enough free blocks to do block allocation
* and verify allocation doesn't exceed the quota limits.
@@ -4528,8 +4520,7 @@
if (inquota && ar->len < inquota)
dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
if (!ar->len) {
- if (!ext4_test_inode_state(ar->inode,
- EXT4_STATE_DELALLOC_RESERVED))
+ if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
/* release all the reserved blocks if non delalloc */
percpu_counter_sub(&sbi->s_dirtyclusters_counter,
reserv_clstrs);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index d3567f2..a432634 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -41,8 +41,7 @@
ext4_ext_store_pblock(&newext, lb->first_pblock);
/* Locking only for convinience since we are operating on temp inode */
down_write(&EXT4_I(inode)->i_data_sem);
- path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
-
+ path = ext4_find_extent(inode, lb->first_block, NULL, 0);
if (IS_ERR(path)) {
retval = PTR_ERR(path);
path = NULL;
@@ -81,13 +80,11 @@
goto err_out;
}
}
- retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
+ retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
err_out:
up_write((&EXT4_I(inode)->i_data_sem));
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
+ ext4_ext_drop_refs(path);
+ kfree(path);
lb->first_pblock = 0;
return retval;
}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 32bce84..8313ca3 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -20,8 +20,7 @@
static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 1;
return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
@@ -29,8 +28,7 @@
static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return;
mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 671a74b..9f2311b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -27,120 +27,26 @@
* @lblock: logical block number to find an extent path
* @path: pointer to an extent path pointer (for output)
*
- * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value
+ * ext4_find_extent wrapper. Return 0 on success, or a negative error value
* on failure.
*/
static inline int
get_ext_path(struct inode *inode, ext4_lblk_t lblock,
- struct ext4_ext_path **orig_path)
+ struct ext4_ext_path **ppath)
{
- int ret = 0;
struct ext4_ext_path *path;
- path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE);
+ path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
if (IS_ERR(path))
- ret = PTR_ERR(path);
- else if (path[ext_depth(inode)].p_ext == NULL)
- ret = -ENODATA;
- else
- *orig_path = path;
-
- return ret;
-}
-
-/**
- * copy_extent_status - Copy the extent's initialization status
- *
- * @src: an extent for getting initialize status
- * @dest: an extent to be set the status
- */
-static void
-copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
-{
- if (ext4_ext_is_unwritten(src))
- ext4_ext_mark_unwritten(dest);
- else
- dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
-}
-
-/**
- * mext_next_extent - Search for the next extent and set it to "extent"
- *
- * @inode: inode which is searched
- * @path: this will obtain data for the next extent
- * @extent: pointer to the next extent we have just gotten
- *
- * Search the next extent in the array of ext4_ext_path structure (@path)
- * and set it to ext4_extent structure (@extent). In addition, the member of
- * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
- * ext4_ext_path structure refers to the last extent, or a negative error
- * value on failure.
- */
-int
-mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
- struct ext4_extent **extent)
-{
- struct ext4_extent_header *eh;
- int ppos, leaf_ppos = path->p_depth;
-
- ppos = leaf_ppos;
- if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
- /* leaf block */
- *extent = ++path[ppos].p_ext;
- path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
- return 0;
+ return PTR_ERR(path);
+ if (path[ext_depth(inode)].p_ext == NULL) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ *ppath = NULL;
+ return -ENODATA;
}
-
- while (--ppos >= 0) {
- if (EXT_LAST_INDEX(path[ppos].p_hdr) >
- path[ppos].p_idx) {
- int cur_ppos = ppos;
-
- /* index block */
- path[ppos].p_idx++;
- path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
- if (path[ppos+1].p_bh)
- brelse(path[ppos+1].p_bh);
- path[ppos+1].p_bh =
- sb_bread(inode->i_sb, path[ppos].p_block);
- if (!path[ppos+1].p_bh)
- return -EIO;
- path[ppos+1].p_hdr =
- ext_block_hdr(path[ppos+1].p_bh);
-
- /* Halfway index block */
- while (++cur_ppos < leaf_ppos) {
- path[cur_ppos].p_idx =
- EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
- path[cur_ppos].p_block =
- ext4_idx_pblock(path[cur_ppos].p_idx);
- if (path[cur_ppos+1].p_bh)
- brelse(path[cur_ppos+1].p_bh);
- path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
- path[cur_ppos].p_block);
- if (!path[cur_ppos+1].p_bh)
- return -EIO;
- path[cur_ppos+1].p_hdr =
- ext_block_hdr(path[cur_ppos+1].p_bh);
- }
-
- path[leaf_ppos].p_ext = *extent = NULL;
-
- eh = path[leaf_ppos].p_hdr;
- if (le16_to_cpu(eh->eh_entries) == 0)
- /* empty leaf is found */
- return -ENODATA;
-
- /* leaf block */
- path[leaf_ppos].p_ext = *extent =
- EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
- path[leaf_ppos].p_block =
- ext4_ext_pblock(path[leaf_ppos].p_ext);
- return 0;
- }
- }
- /* We found the last extent */
- return 1;
+ *ppath = path;
+ return 0;
}
/**
@@ -178,417 +84,6 @@
}
/**
- * mext_insert_across_blocks - Insert extents across leaf block
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @o_start: first original extent to be changed
- * @o_end: last original extent to be changed
- * @start_ext: first new extent to be inserted
- * @new_ext: middle of new extent to be inserted
- * @end_ext: last new extent to be inserted
- *
- * Allocate a new leaf block and insert extents into it. Return 0 on success,
- * or a negative error value on failure.
- */
-static int
-mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
- struct ext4_extent *o_start, struct ext4_extent *o_end,
- struct ext4_extent *start_ext, struct ext4_extent *new_ext,
- struct ext4_extent *end_ext)
-{
- struct ext4_ext_path *orig_path = NULL;
- ext4_lblk_t eblock = 0;
- int new_flag = 0;
- int end_flag = 0;
- int err = 0;
-
- if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
- if (o_start == o_end) {
-
- /* start_ext new_ext end_ext
- * donor |---------|-----------|--------|
- * orig |------------------------------|
- */
- end_flag = 1;
- } else {
-
- /* start_ext new_ext end_ext
- * donor |---------|----------|---------|
- * orig |---------------|--------------|
- */
- o_end->ee_block = end_ext->ee_block;
- o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
- }
-
- o_start->ee_len = start_ext->ee_len;
- eblock = le32_to_cpu(start_ext->ee_block);
- new_flag = 1;
-
- } else if (start_ext->ee_len && new_ext->ee_len &&
- !end_ext->ee_len && o_start == o_end) {
-
- /* start_ext new_ext
- * donor |--------------|---------------|
- * orig |------------------------------|
- */
- o_start->ee_len = start_ext->ee_len;
- eblock = le32_to_cpu(start_ext->ee_block);
- new_flag = 1;
-
- } else if (!start_ext->ee_len && new_ext->ee_len &&
- end_ext->ee_len && o_start == o_end) {
-
- /* new_ext end_ext
- * donor |--------------|---------------|
- * orig |------------------------------|
- */
- o_end->ee_block = end_ext->ee_block;
- o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
-
- /*
- * Set 0 to the extent block if new_ext was
- * the first block.
- */
- if (new_ext->ee_block)
- eblock = le32_to_cpu(new_ext->ee_block);
-
- new_flag = 1;
- } else {
- ext4_debug("ext4 move extent: Unexpected insert case\n");
- return -EIO;
- }
-
- if (new_flag) {
- err = get_ext_path(orig_inode, eblock, &orig_path);
- if (err)
- goto out;
-
- if (ext4_ext_insert_extent(handle, orig_inode,
- orig_path, new_ext, 0))
- goto out;
- }
-
- if (end_flag) {
- err = get_ext_path(orig_inode,
- le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
- if (err)
- goto out;
-
- if (ext4_ext_insert_extent(handle, orig_inode,
- orig_path, end_ext, 0))
- goto out;
- }
-out:
- if (orig_path) {
- ext4_ext_drop_refs(orig_path);
- kfree(orig_path);
- }
-
- return err;
-
-}
-
-/**
- * mext_insert_inside_block - Insert new extent to the extent block
- *
- * @o_start: first original extent to be moved
- * @o_end: last original extent to be moved
- * @start_ext: first new extent to be inserted
- * @new_ext: middle of new extent to be inserted
- * @end_ext: last new extent to be inserted
- * @eh: extent header of target leaf block
- * @range_to_move: used to decide how to insert extent
- *
- * Insert extents into the leaf block. The extent (@o_start) is overwritten
- * by inserted extents.
- */
-static void
-mext_insert_inside_block(struct ext4_extent *o_start,
- struct ext4_extent *o_end,
- struct ext4_extent *start_ext,
- struct ext4_extent *new_ext,
- struct ext4_extent *end_ext,
- struct ext4_extent_header *eh,
- int range_to_move)
-{
- int i = 0;
- unsigned long len;
-
- /* Move the existing extents */
- if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
- len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
- (unsigned long)(o_end + 1);
- memmove(o_end + 1 + range_to_move, o_end + 1, len);
- }
-
- /* Insert start entry */
- if (start_ext->ee_len)
- o_start[i++].ee_len = start_ext->ee_len;
-
- /* Insert new entry */
- if (new_ext->ee_len) {
- o_start[i] = *new_ext;
- ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
- }
-
- /* Insert end entry */
- if (end_ext->ee_len)
- o_start[i] = *end_ext;
-
- /* Increment the total entries counter on the extent block */
- le16_add_cpu(&eh->eh_entries, range_to_move);
-}
-
-/**
- * mext_insert_extents - Insert new extent
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @orig_path: path indicates first extent to be changed
- * @o_start: first original extent to be changed
- * @o_end: last original extent to be changed
- * @start_ext: first new extent to be inserted
- * @new_ext: middle of new extent to be inserted
- * @end_ext: last new extent to be inserted
- *
- * Call the function to insert extents. If we cannot add more extents into
- * the leaf block, we call mext_insert_across_blocks() to create a
- * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
- * on success, or a negative error value on failure.
- */
-static int
-mext_insert_extents(handle_t *handle, struct inode *orig_inode,
- struct ext4_ext_path *orig_path,
- struct ext4_extent *o_start,
- struct ext4_extent *o_end,
- struct ext4_extent *start_ext,
- struct ext4_extent *new_ext,
- struct ext4_extent *end_ext)
-{
- struct ext4_extent_header *eh;
- unsigned long need_slots, slots_range;
- int range_to_move, depth, ret;
-
- /*
- * The extents need to be inserted
- * start_extent + new_extent + end_extent.
- */
- need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
- (new_ext->ee_len ? 1 : 0);
-
- /* The number of slots between start and end */
- slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
- / sizeof(struct ext4_extent);
-
- /* Range to move the end of extent */
- range_to_move = need_slots - slots_range;
- depth = orig_path->p_depth;
- orig_path += depth;
- eh = orig_path->p_hdr;
-
- if (depth) {
- /* Register to journal */
- BUFFER_TRACE(orig_path->p_bh, "get_write_access");
- ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
- if (ret)
- return ret;
- }
-
- /* Expansion */
- if (range_to_move > 0 &&
- (range_to_move > le16_to_cpu(eh->eh_max)
- - le16_to_cpu(eh->eh_entries))) {
-
- ret = mext_insert_across_blocks(handle, orig_inode, o_start,
- o_end, start_ext, new_ext, end_ext);
- if (ret < 0)
- return ret;
- } else
- mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
- end_ext, eh, range_to_move);
-
- return ext4_ext_dirty(handle, orig_inode, orig_path);
-}
-
-/**
- * mext_leaf_block - Move one leaf extent block into the inode.
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @orig_path: path indicates first extent to be changed
- * @dext: donor extent
- * @from: start offset on the target file
- *
- * In order to insert extents into the leaf block, we must divide the extent
- * in the leaf block into three extents. The one is located to be inserted
- * extents, and the others are located around it.
- *
- * Therefore, this function creates structures to save extents of the leaf
- * block, and inserts extents by calling mext_insert_extents() with
- * created extents. Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_leaf_block(handle_t *handle, struct inode *orig_inode,
- struct ext4_ext_path *orig_path, struct ext4_extent *dext,
- ext4_lblk_t *from)
-{
- struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
- struct ext4_extent new_ext, start_ext, end_ext;
- ext4_lblk_t new_ext_end;
- int oext_alen, new_ext_alen, end_ext_alen;
- int depth = ext_depth(orig_inode);
- int ret;
-
- start_ext.ee_block = end_ext.ee_block = 0;
- o_start = o_end = oext = orig_path[depth].p_ext;
- oext_alen = ext4_ext_get_actual_len(oext);
- start_ext.ee_len = end_ext.ee_len = 0;
-
- new_ext.ee_block = cpu_to_le32(*from);
- ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
- new_ext.ee_len = dext->ee_len;
- new_ext_alen = ext4_ext_get_actual_len(&new_ext);
- new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
-
- /*
- * Case: original extent is first
- * oext |--------|
- * new_ext |--|
- * start_ext |--|
- */
- if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
- le32_to_cpu(new_ext.ee_block) <
- le32_to_cpu(oext->ee_block) + oext_alen) {
- start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
- le32_to_cpu(oext->ee_block));
- start_ext.ee_block = oext->ee_block;
- copy_extent_status(oext, &start_ext);
- } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
- prev_ext = oext - 1;
- /*
- * We can merge new_ext into previous extent,
- * if these are contiguous and same extent type.
- */
- if (ext4_can_extents_be_merged(orig_inode, prev_ext,
- &new_ext)) {
- o_start = prev_ext;
- start_ext.ee_len = cpu_to_le16(
- ext4_ext_get_actual_len(prev_ext) +
- new_ext_alen);
- start_ext.ee_block = oext->ee_block;
- copy_extent_status(prev_ext, &start_ext);
- new_ext.ee_len = 0;
- }
- }
-
- /*
- * Case: new_ext_end must be less than oext
- * oext |-----------|
- * new_ext |-------|
- */
- if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
- EXT4_ERROR_INODE(orig_inode,
- "new_ext_end(%u) should be less than or equal to "
- "oext->ee_block(%u) + oext_alen(%d) - 1",
- new_ext_end, le32_to_cpu(oext->ee_block),
- oext_alen);
- ret = -EIO;
- goto out;
- }
-
- /*
- * Case: new_ext is smaller than original extent
- * oext |---------------|
- * new_ext |-----------|
- * end_ext |---|
- */
- if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
- new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
- end_ext.ee_len =
- cpu_to_le16(le32_to_cpu(oext->ee_block) +
- oext_alen - 1 - new_ext_end);
- copy_extent_status(oext, &end_ext);
- end_ext_alen = ext4_ext_get_actual_len(&end_ext);
- ext4_ext_store_pblock(&end_ext,
- (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
- end_ext.ee_block =
- cpu_to_le32(le32_to_cpu(o_end->ee_block) +
- oext_alen - end_ext_alen);
- }
-
- ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
- o_end, &start_ext, &new_ext, &end_ext);
-out:
- return ret;
-}
-
-/**
- * mext_calc_swap_extents - Calculate extents for extent swapping.
- *
- * @tmp_dext: the extent that will belong to the original inode
- * @tmp_oext: the extent that will belong to the donor inode
- * @orig_off: block offset of original inode
- * @donor_off: block offset of donor inode
- * @max_count: the maximum length of extents
- *
- * Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_calc_swap_extents(struct ext4_extent *tmp_dext,
- struct ext4_extent *tmp_oext,
- ext4_lblk_t orig_off, ext4_lblk_t donor_off,
- ext4_lblk_t max_count)
-{
- ext4_lblk_t diff, orig_diff;
- struct ext4_extent dext_old, oext_old;
-
- BUG_ON(orig_off != donor_off);
-
- /* original and donor extents have to cover the same block offset */
- if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
- le32_to_cpu(tmp_oext->ee_block) +
- ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
- return -ENODATA;
-
- if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
- le32_to_cpu(tmp_dext->ee_block) +
- ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
- return -ENODATA;
-
- dext_old = *tmp_dext;
- oext_old = *tmp_oext;
-
- /* When tmp_dext is too large, pick up the target range. */
- diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
-
- ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
- le32_add_cpu(&tmp_dext->ee_block, diff);
- le16_add_cpu(&tmp_dext->ee_len, -diff);
-
- if (max_count < ext4_ext_get_actual_len(tmp_dext))
- tmp_dext->ee_len = cpu_to_le16(max_count);
-
- orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
- ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
-
- /* Adjust extent length if donor extent is larger than orig */
- if (ext4_ext_get_actual_len(tmp_dext) >
- ext4_ext_get_actual_len(tmp_oext) - orig_diff)
- tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
- orig_diff);
-
- tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
-
- copy_extent_status(&oext_old, tmp_dext);
- copy_extent_status(&dext_old, tmp_oext);
-
- return 0;
-}
-
-/**
* mext_check_coverage - Check that all extents in range has the same type
*
* @inode: inode in question
@@ -619,171 +114,25 @@
}
ret = 1;
out:
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
+ ext4_ext_drop_refs(path);
+ kfree(path);
return ret;
}
/**
- * mext_replace_branches - Replace original extents with new extents
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @donor_inode: donor inode
- * @from: block offset of orig_inode
- * @count: block count to be replaced
- * @err: pointer to save return value
- *
- * Replace original inode extents and donor inode extents page by page.
- * We implement this replacement in the following three steps:
- * 1. Save the block information of original and donor inodes into
- * dummy extents.
- * 2. Change the block information of original inode to point at the
- * donor inode blocks.
- * 3. Change the block information of donor inode to point at the saved
- * original inode blocks in the dummy extents.
- *
- * Return replaced block count.
- */
-static int
-mext_replace_branches(handle_t *handle, struct inode *orig_inode,
- struct inode *donor_inode, ext4_lblk_t from,
- ext4_lblk_t count, int *err)
-{
- struct ext4_ext_path *orig_path = NULL;
- struct ext4_ext_path *donor_path = NULL;
- struct ext4_extent *oext, *dext;
- struct ext4_extent tmp_dext, tmp_oext;
- ext4_lblk_t orig_off = from, donor_off = from;
- int depth;
- int replaced_count = 0;
- int dext_alen;
-
- *err = ext4_es_remove_extent(orig_inode, from, count);
- if (*err)
- goto out;
-
- *err = ext4_es_remove_extent(donor_inode, from, count);
- if (*err)
- goto out;
-
- /* Get the original extent for the block "orig_off" */
- *err = get_ext_path(orig_inode, orig_off, &orig_path);
- if (*err)
- goto out;
-
- /* Get the donor extent for the head */
- *err = get_ext_path(donor_inode, donor_off, &donor_path);
- if (*err)
- goto out;
- depth = ext_depth(orig_inode);
- oext = orig_path[depth].p_ext;
- tmp_oext = *oext;
-
- depth = ext_depth(donor_inode);
- dext = donor_path[depth].p_ext;
- if (unlikely(!dext))
- goto missing_donor_extent;
- tmp_dext = *dext;
-
- *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
- donor_off, count);
- if (*err)
- goto out;
-
- /* Loop for the donor extents */
- while (1) {
- /* The extent for donor must be found. */
- if (unlikely(!dext)) {
- missing_donor_extent:
- EXT4_ERROR_INODE(donor_inode,
- "The extent for donor must be found");
- *err = -EIO;
- goto out;
- } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
- EXT4_ERROR_INODE(donor_inode,
- "Donor offset(%u) and the first block of donor "
- "extent(%u) should be equal",
- donor_off,
- le32_to_cpu(tmp_dext.ee_block));
- *err = -EIO;
- goto out;
- }
-
- /* Set donor extent to orig extent */
- *err = mext_leaf_block(handle, orig_inode,
- orig_path, &tmp_dext, &orig_off);
- if (*err)
- goto out;
-
- /* Set orig extent to donor extent */
- *err = mext_leaf_block(handle, donor_inode,
- donor_path, &tmp_oext, &donor_off);
- if (*err)
- goto out;
-
- dext_alen = ext4_ext_get_actual_len(&tmp_dext);
- replaced_count += dext_alen;
- donor_off += dext_alen;
- orig_off += dext_alen;
-
- BUG_ON(replaced_count > count);
- /* Already moved the expected blocks */
- if (replaced_count >= count)
- break;
-
- if (orig_path)
- ext4_ext_drop_refs(orig_path);
- *err = get_ext_path(orig_inode, orig_off, &orig_path);
- if (*err)
- goto out;
- depth = ext_depth(orig_inode);
- oext = orig_path[depth].p_ext;
- tmp_oext = *oext;
-
- if (donor_path)
- ext4_ext_drop_refs(donor_path);
- *err = get_ext_path(donor_inode, donor_off, &donor_path);
- if (*err)
- goto out;
- depth = ext_depth(donor_inode);
- dext = donor_path[depth].p_ext;
- tmp_dext = *dext;
-
- *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
- donor_off, count - replaced_count);
- if (*err)
- goto out;
- }
-
-out:
- if (orig_path) {
- ext4_ext_drop_refs(orig_path);
- kfree(orig_path);
- }
- if (donor_path) {
- ext4_ext_drop_refs(donor_path);
- kfree(donor_path);
- }
-
- return replaced_count;
-}
-
-/**
* mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
*
* @inode1: the inode structure
* @inode2: the inode structure
- * @index: page index
+ * @index1: page index
+ * @index2: page index
* @page: result page vector
*
* Grab two locked pages for inode's by inode order
*/
static int
mext_page_double_lock(struct inode *inode1, struct inode *inode2,
- pgoff_t index, struct page *page[2])
+ pgoff_t index1, pgoff_t index2, struct page *page[2])
{
struct address_space *mapping[2];
unsigned fl = AOP_FLAG_NOFS;
@@ -793,15 +142,18 @@
mapping[0] = inode1->i_mapping;
mapping[1] = inode2->i_mapping;
} else {
+ pgoff_t tmp = index1;
+ index1 = index2;
+ index2 = tmp;
mapping[0] = inode2->i_mapping;
mapping[1] = inode1->i_mapping;
}
- page[0] = grab_cache_page_write_begin(mapping[0], index, fl);
+ page[0] = grab_cache_page_write_begin(mapping[0], index1, fl);
if (!page[0])
return -ENOMEM;
- page[1] = grab_cache_page_write_begin(mapping[1], index, fl);
+ page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
if (!page[1]) {
unlock_page(page[0]);
page_cache_release(page[0]);
@@ -893,25 +245,27 @@
* @o_filp: file structure of original file
* @donor_inode: donor inode
* @orig_page_offset: page index on original file
+ * @donor_page_offset: page index on donor file
* @data_offset_in_page: block index where data swapping starts
* @block_len_in_page: the number of blocks to be swapped
* @unwritten: orig extent is unwritten or not
* @err: pointer to save return value
*
* Save the data in original inode blocks and replace original inode extents
- * with donor inode extents by calling mext_replace_branches().
+ * with donor inode extents by calling ext4_swap_extents().
* Finally, write out the saved data in new original inode blocks. Return
* replaced block count.
*/
static int
move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
- pgoff_t orig_page_offset, int data_offset_in_page,
- int block_len_in_page, int unwritten, int *err)
+ pgoff_t orig_page_offset, pgoff_t donor_page_offset,
+ int data_offset_in_page,
+ int block_len_in_page, int unwritten, int *err)
{
struct inode *orig_inode = file_inode(o_filp);
struct page *pagep[2] = {NULL, NULL};
handle_t *handle;
- ext4_lblk_t orig_blk_offset;
+ ext4_lblk_t orig_blk_offset, donor_blk_offset;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
unsigned int w_flags = 0;
unsigned int tmp_data_size, data_size, replaced_size;
@@ -939,6 +293,9 @@
orig_blk_offset = orig_page_offset * blocks_per_page +
data_offset_in_page;
+ donor_blk_offset = donor_page_offset * blocks_per_page +
+ data_offset_in_page;
+
/* Calculate data_size */
if ((orig_blk_offset + block_len_in_page - 1) ==
((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
@@ -959,7 +316,7 @@
replaced_size = data_size;
*err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
- pagep);
+ donor_page_offset, pagep);
if (unlikely(*err < 0))
goto stop_journal;
/*
@@ -978,7 +335,7 @@
if (*err)
goto drop_data_sem;
- unwritten &= mext_check_coverage(donor_inode, orig_blk_offset,
+ unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
block_len_in_page, 1, err);
if (*err)
goto drop_data_sem;
@@ -994,9 +351,10 @@
*err = -EBUSY;
goto drop_data_sem;
}
- replaced_count = mext_replace_branches(handle, orig_inode,
- donor_inode, orig_blk_offset,
- block_len_in_page, err);
+ replaced_count = ext4_swap_extents(handle, orig_inode,
+ donor_inode, orig_blk_offset,
+ donor_blk_offset,
+ block_len_in_page, 1, err);
drop_data_sem:
ext4_double_up_write_data_sem(orig_inode, donor_inode);
goto unlock_pages;
@@ -1014,9 +372,9 @@
goto unlock_pages;
}
ext4_double_down_write_data_sem(orig_inode, donor_inode);
- replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
- orig_blk_offset,
- block_len_in_page, err);
+ replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
+ orig_blk_offset, donor_blk_offset,
+ block_len_in_page, 1, err);
ext4_double_up_write_data_sem(orig_inode, donor_inode);
if (*err) {
if (replaced_count) {
@@ -1061,9 +419,9 @@
* Try to swap extents to it's original places
*/
ext4_double_down_write_data_sem(orig_inode, donor_inode);
- replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
- orig_blk_offset,
- block_len_in_page, &err2);
+ replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
+ orig_blk_offset, donor_blk_offset,
+ block_len_in_page, 0, &err2);
ext4_double_up_write_data_sem(orig_inode, donor_inode);
if (replaced_count != block_len_in_page) {
EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
@@ -1093,10 +451,14 @@
struct inode *donor_inode, __u64 orig_start,
__u64 donor_start, __u64 *len)
{
- ext4_lblk_t orig_blocks, donor_blocks;
+ __u64 orig_eof, donor_eof;
unsigned int blkbits = orig_inode->i_blkbits;
unsigned int blocksize = 1 << blkbits;
+ orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
+ donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
+
+
if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
ext4_debug("ext4 move extent: suid or sgid is set"
" to donor file [ino:orig %lu, donor %lu]\n",
@@ -1112,7 +474,7 @@
ext4_debug("ext4 move extent: The argument files should "
"not be swapfile [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
+ return -EBUSY;
}
/* Ext4 move extent supports only extent based file */
@@ -1132,67 +494,28 @@
}
/* Start offset should be same */
- if (orig_start != donor_start) {
+ if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
+ (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
ext4_debug("ext4 move extent: orig and donor's start "
- "offset are not same [ino:orig %lu, donor %lu]\n",
+ "offset are not alligned [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if ((orig_start >= EXT_MAX_BLOCKS) ||
+ (donor_start >= EXT_MAX_BLOCKS) ||
(*len > EXT_MAX_BLOCKS) ||
+ (donor_start + *len >= EXT_MAX_BLOCKS) ||
(orig_start + *len >= EXT_MAX_BLOCKS)) {
ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
"[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
-
- if (orig_inode->i_size > donor_inode->i_size) {
- donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits;
- /* TODO: eliminate this artificial restriction */
- if (orig_start >= donor_blocks) {
- ext4_debug("ext4 move extent: orig start offset "
- "[%llu] should be less than donor file blocks "
- "[%u] [ino:orig %lu, donor %lu]\n",
- orig_start, donor_blocks,
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- /* TODO: eliminate this artificial restriction */
- if (orig_start + *len > donor_blocks) {
- ext4_debug("ext4 move extent: End offset [%llu] should "
- "be less than donor file blocks [%u]."
- "So adjust length from %llu to %llu "
- "[ino:orig %lu, donor %lu]\n",
- orig_start + *len, donor_blocks,
- *len, donor_blocks - orig_start,
- orig_inode->i_ino, donor_inode->i_ino);
- *len = donor_blocks - orig_start;
- }
- } else {
- orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits;
- if (orig_start >= orig_blocks) {
- ext4_debug("ext4 move extent: start offset [%llu] "
- "should be less than original file blocks "
- "[%u] [ino:orig %lu, donor %lu]\n",
- orig_start, orig_blocks,
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- if (orig_start + *len > orig_blocks) {
- ext4_debug("ext4 move extent: Adjust length "
- "from %llu to %llu. Because it should be "
- "less than original file blocks "
- "[ino:orig %lu, donor %lu]\n",
- *len, orig_blocks - orig_start,
- orig_inode->i_ino, donor_inode->i_ino);
- *len = orig_blocks - orig_start;
- }
- }
-
+ if (orig_eof < orig_start + *len - 1)
+ *len = orig_eof - orig_start;
+ if (donor_eof < donor_start + *len - 1)
+ *len = donor_eof - donor_start;
if (!*len) {
ext4_debug("ext4 move extent: len should not be 0 "
"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
@@ -1208,60 +531,26 @@
*
* @o_filp: file structure of the original file
* @d_filp: file structure of the donor file
- * @orig_start: start offset in block for orig
- * @donor_start: start offset in block for donor
+ * @orig_blk: start offset in block for orig
+ * @donor_blk: start offset in block for donor
* @len: the number of blocks to be moved
* @moved_len: moved block length
*
* This function returns 0 and moved block length is set in moved_len
* if succeed, otherwise returns error value.
*
- * Note: ext4_move_extents() proceeds the following order.
- * 1:ext4_move_extents() calculates the last block number of moving extent
- * function by the start block number (orig_start) and the number of blocks
- * to be moved (len) specified as arguments.
- * If the {orig, donor}_start points a hole, the extent's start offset
- * pointed by ext_cur (current extent), holecheck_path, orig_path are set
- * after hole behind.
- * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
- * or the ext_cur exceeds the block_end which is last logical block number.
- * 3:To get the length of continues area, call mext_next_extent()
- * specified with the ext_cur (initial value is holecheck_path) re-cursive,
- * until find un-continuous extent, the start logical block number exceeds
- * the block_end or the extent points to the last extent.
- * 4:Exchange the original inode data with donor inode data
- * from orig_page_offset to seq_end_page.
- * The start indexes of data are specified as arguments.
- * That of the original inode is orig_page_offset,
- * and the donor inode is also orig_page_offset
- * (To easily handle blocksize != pagesize case, the offset for the
- * donor inode is block unit).
- * 5:Update holecheck_path and orig_path to points a next proceeding extent,
- * then returns to step 2.
- * 6:Release holecheck_path, orig_path and set the len to moved_len
- * which shows the number of moved blocks.
- * The moved_len is useful for the command to calculate the file offset
- * for starting next move extent ioctl.
- * 7:Return 0 on success, or a negative error value on failure.
*/
int
-ext4_move_extents(struct file *o_filp, struct file *d_filp,
- __u64 orig_start, __u64 donor_start, __u64 len,
- __u64 *moved_len)
+ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
+ __u64 donor_blk, __u64 len, __u64 *moved_len)
{
struct inode *orig_inode = file_inode(o_filp);
struct inode *donor_inode = file_inode(d_filp);
- struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
- struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
- ext4_lblk_t block_start = orig_start;
- ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
- ext4_lblk_t rest_blocks;
- pgoff_t orig_page_offset = 0, seq_end_page;
- int ret, depth, last_extent = 0;
+ struct ext4_ext_path *path = NULL;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
- int data_offset_in_page;
- int block_len_in_page;
- int unwritten;
+ ext4_lblk_t o_end, o_start = orig_blk;
+ ext4_lblk_t d_start = donor_blk;
+ int ret;
if (orig_inode->i_sb != donor_inode->i_sb) {
ext4_debug("ext4 move extent: The argument files "
@@ -1303,121 +592,58 @@
/* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(orig_inode, donor_inode);
/* Check the filesystem environment whether move_extent can be done */
- ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
- donor_start, &len);
+ ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
+ donor_blk, &len);
if (ret)
goto out;
+ o_end = o_start + len;
- file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
- block_end = block_start + len - 1;
- if (file_end < block_end)
- len -= block_end - file_end;
+ while (o_start < o_end) {
+ struct ext4_extent *ex;
+ ext4_lblk_t cur_blk, next_blk;
+ pgoff_t orig_page_index, donor_page_index;
+ int offset_in_page;
+ int unwritten, cur_len;
- ret = get_ext_path(orig_inode, block_start, &orig_path);
- if (ret)
- goto out;
-
- /* Get path structure to check the hole */
- ret = get_ext_path(orig_inode, block_start, &holecheck_path);
- if (ret)
- goto out;
-
- depth = ext_depth(orig_inode);
- ext_cur = holecheck_path[depth].p_ext;
-
- /*
- * Get proper starting location of block replacement if block_start was
- * within the hole.
- */
- if (le32_to_cpu(ext_cur->ee_block) +
- ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
- /*
- * The hole exists between extents or the tail of
- * original file.
- */
- last_extent = mext_next_extent(orig_inode,
- holecheck_path, &ext_cur);
- if (last_extent < 0) {
- ret = last_extent;
+ ret = get_ext_path(orig_inode, o_start, &path);
+ if (ret)
goto out;
- }
- last_extent = mext_next_extent(orig_inode, orig_path,
- &ext_dummy);
- if (last_extent < 0) {
- ret = last_extent;
- goto out;
- }
- seq_start = le32_to_cpu(ext_cur->ee_block);
- } else if (le32_to_cpu(ext_cur->ee_block) > block_start)
- /* The hole exists at the beginning of original file. */
- seq_start = le32_to_cpu(ext_cur->ee_block);
- else
- seq_start = block_start;
-
- /* No blocks within the specified range. */
- if (le32_to_cpu(ext_cur->ee_block) > block_end) {
- ext4_debug("ext4 move extent: The specified range of file "
- "may be the hole\n");
- ret = -EINVAL;
- goto out;
- }
-
- /* Adjust start blocks */
- add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
- ext4_ext_get_actual_len(ext_cur), block_end + 1) -
- max(le32_to_cpu(ext_cur->ee_block), block_start);
-
- while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
- seq_blocks += add_blocks;
-
- /* Adjust tail blocks */
- if (seq_start + seq_blocks - 1 > block_end)
- seq_blocks = block_end - seq_start + 1;
-
- ext_prev = ext_cur;
- last_extent = mext_next_extent(orig_inode, holecheck_path,
- &ext_cur);
- if (last_extent < 0) {
- ret = last_extent;
- break;
- }
- add_blocks = ext4_ext_get_actual_len(ext_cur);
-
- /*
- * Extend the length of contiguous block (seq_blocks)
- * if extents are contiguous.
- */
- if (ext4_can_extents_be_merged(orig_inode,
- ext_prev, ext_cur) &&
- block_end >= le32_to_cpu(ext_cur->ee_block) &&
- !last_extent)
+ ex = path[path->p_depth].p_ext;
+ next_blk = ext4_ext_next_allocated_block(path);
+ cur_blk = le32_to_cpu(ex->ee_block);
+ cur_len = ext4_ext_get_actual_len(ex);
+ /* Check hole before the start pos */
+ if (cur_blk + cur_len - 1 < o_start) {
+ if (next_blk == EXT_MAX_BLOCKS) {
+ o_start = o_end;
+ ret = -ENODATA;
+ goto out;
+ }
+ d_start += next_blk - o_start;
+ o_start = next_blk;
continue;
-
- /* Is original extent is unwritten */
- unwritten = ext4_ext_is_unwritten(ext_prev);
-
- data_offset_in_page = seq_start % blocks_per_page;
-
- /*
- * Calculate data blocks count that should be swapped
- * at the first page.
- */
- if (data_offset_in_page + seq_blocks > blocks_per_page) {
- /* Swapped blocks are across pages */
- block_len_in_page =
- blocks_per_page - data_offset_in_page;
- } else {
- /* Swapped blocks are in a page */
- block_len_in_page = seq_blocks;
+ /* Check hole after the start pos */
+ } else if (cur_blk > o_start) {
+ /* Skip hole */
+ d_start += cur_blk - o_start;
+ o_start = cur_blk;
+ /* Extent inside requested range ?*/
+ if (cur_blk >= o_end)
+ goto out;
+ } else { /* in_range(o_start, o_blk, o_len) */
+ cur_len += cur_blk - o_start;
}
+ unwritten = ext4_ext_is_unwritten(ex);
+ if (o_end - o_start < cur_len)
+ cur_len = o_end - o_start;
- orig_page_offset = seq_start >>
- (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
- seq_end_page = (seq_start + seq_blocks - 1) >>
- (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
- seq_start = le32_to_cpu(ext_cur->ee_block);
- rest_blocks = seq_blocks;
-
+ orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
+ orig_inode->i_blkbits);
+ donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
+ donor_inode->i_blkbits);
+ offset_in_page = o_start % blocks_per_page;
+ if (cur_len > blocks_per_page- offset_in_page)
+ cur_len = blocks_per_page - offset_in_page;
/*
* Up semaphore to avoid following problems:
* a. transaction deadlock among ext4_journal_start,
@@ -1426,77 +652,29 @@
* in move_extent_per_page
*/
ext4_double_up_write_data_sem(orig_inode, donor_inode);
-
- while (orig_page_offset <= seq_end_page) {
-
- /* Swap original branches with new branches */
- block_len_in_page = move_extent_per_page(
- o_filp, donor_inode,
- orig_page_offset,
- data_offset_in_page,
- block_len_in_page,
- unwritten, &ret);
-
- /* Count how many blocks we have exchanged */
- *moved_len += block_len_in_page;
- if (ret < 0)
- break;
- if (*moved_len > len) {
- EXT4_ERROR_INODE(orig_inode,
- "We replaced blocks too much! "
- "sum of replaced: %llu requested: %llu",
- *moved_len, len);
- ret = -EIO;
- break;
- }
-
- orig_page_offset++;
- data_offset_in_page = 0;
- rest_blocks -= block_len_in_page;
- if (rest_blocks > blocks_per_page)
- block_len_in_page = blocks_per_page;
- else
- block_len_in_page = rest_blocks;
- }
-
+ /* Swap original branches with new branches */
+ move_extent_per_page(o_filp, donor_inode,
+ orig_page_index, donor_page_index,
+ offset_in_page, cur_len,
+ unwritten, &ret);
ext4_double_down_write_data_sem(orig_inode, donor_inode);
if (ret < 0)
break;
-
- /* Decrease buffer counter */
- if (holecheck_path)
- ext4_ext_drop_refs(holecheck_path);
- ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
- if (ret)
- break;
- depth = holecheck_path->p_depth;
-
- /* Decrease buffer counter */
- if (orig_path)
- ext4_ext_drop_refs(orig_path);
- ret = get_ext_path(orig_inode, seq_start, &orig_path);
- if (ret)
- break;
-
- ext_cur = holecheck_path[depth].p_ext;
- add_blocks = ext4_ext_get_actual_len(ext_cur);
- seq_blocks = 0;
-
+ o_start += cur_len;
+ d_start += cur_len;
}
+ *moved_len = o_start - orig_blk;
+ if (*moved_len > len)
+ *moved_len = len;
+
out:
if (*moved_len) {
ext4_discard_preallocations(orig_inode);
ext4_discard_preallocations(donor_inode);
}
- if (orig_path) {
- ext4_ext_drop_refs(orig_path);
- kfree(orig_path);
- }
- if (holecheck_path) {
- ext4_ext_drop_refs(holecheck_path);
- kfree(holecheck_path);
- }
+ ext4_ext_drop_refs(path);
+ kfree(path);
ext4_double_up_write_data_sem(orig_inode, donor_inode);
ext4_inode_resume_unlocked_dio(orig_inode);
ext4_inode_resume_unlocked_dio(donor_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 603e4eb..123798c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -53,7 +53,7 @@
ext4_lblk_t *block)
{
struct buffer_head *bh;
- int err = 0;
+ int err;
if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
((inode->i_size >> 10) >=
@@ -62,9 +62,9 @@
*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
- bh = ext4_bread(handle, inode, *block, 1, &err);
- if (!bh)
- return ERR_PTR(err);
+ bh = ext4_bread(handle, inode, *block, 1);
+ if (IS_ERR(bh))
+ return bh;
inode->i_size += inode->i_sb->s_blocksize;
EXT4_I(inode)->i_disksize = inode->i_size;
BUFFER_TRACE(bh, "get_write_access");
@@ -94,20 +94,20 @@
{
struct buffer_head *bh;
struct ext4_dir_entry *dirent;
- int err = 0, is_dx_block = 0;
+ int is_dx_block = 0;
- bh = ext4_bread(NULL, inode, block, 0, &err);
- if (!bh) {
- if (err == 0) {
- ext4_error_inode(inode, __func__, line, block,
- "Directory hole found");
- return ERR_PTR(-EIO);
- }
+ bh = ext4_bread(NULL, inode, block, 0);
+ if (IS_ERR(bh)) {
__ext4_warning(inode->i_sb, __func__, line,
- "error reading directory block "
- "(ino %lu, block %lu)", inode->i_ino,
+ "error %ld reading directory block "
+ "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino,
(unsigned long) block);
- return ERR_PTR(err);
+
+ return bh;
+ }
+ if (!bh) {
+ ext4_error_inode(inode, __func__, line, block, "Directory hole found");
+ return ERR_PTR(-EIO);
}
dirent = (struct ext4_dir_entry *) bh->b_data;
/* Determine whether or not we have an index block */
@@ -124,8 +124,7 @@
"directory leaf block found instead of index block");
return ERR_PTR(-EIO);
}
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
+ if (!ext4_has_metadata_csum(inode->i_sb) ||
buffer_verified(bh))
return bh;
@@ -253,8 +252,7 @@
static struct dx_frame *dx_probe(const struct qstr *d_name,
struct inode *dir,
struct dx_hash_info *hinfo,
- struct dx_frame *frame,
- int *err);
+ struct dx_frame *frame);
static void dx_release(struct dx_frame *frames);
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
struct dx_hash_info *hinfo, struct dx_map_entry map[]);
@@ -270,8 +268,7 @@
__u32 *start_hash);
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
const struct qstr *d_name,
- struct ext4_dir_entry_2 **res_dir,
- int *err);
+ struct ext4_dir_entry_2 **res_dir);
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode);
@@ -340,8 +337,7 @@
{
struct ext4_dir_entry_tail *t;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return 1;
t = get_dirent_tail(inode, dirent);
@@ -362,8 +358,7 @@
{
struct ext4_dir_entry_tail *t;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return;
t = get_dirent_tail(inode, dirent);
@@ -438,8 +433,7 @@
struct dx_tail *t;
int count_offset, limit, count;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return 1;
c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -468,8 +462,7 @@
struct dx_tail *t;
int count_offset, limit, count;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return;
c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -557,8 +550,7 @@
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
EXT4_DIR_REC_LEN(2) - infosize;
- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail);
return entry_space / sizeof(struct dx_entry);
}
@@ -567,8 +559,7 @@
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail);
return entry_space / sizeof(struct dx_entry);
}
@@ -641,7 +632,9 @@
u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
struct stats stats;
printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
- if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
+ bh = ext4_bread(NULL,dir, block, 0);
+ if (!bh || IS_ERR(bh))
+ continue;
stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
@@ -669,29 +662,25 @@
*/
static struct dx_frame *
dx_probe(const struct qstr *d_name, struct inode *dir,
- struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
+ struct dx_hash_info *hinfo, struct dx_frame *frame_in)
{
unsigned count, indirect;
struct dx_entry *at, *entries, *p, *q, *m;
struct dx_root *root;
- struct buffer_head *bh;
struct dx_frame *frame = frame_in;
+ struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
u32 hash;
- frame->bh = NULL;
- bh = ext4_read_dirblock(dir, 0, INDEX);
- if (IS_ERR(bh)) {
- *err = PTR_ERR(bh);
- goto fail;
- }
- root = (struct dx_root *) bh->b_data;
+ frame->bh = ext4_read_dirblock(dir, 0, INDEX);
+ if (IS_ERR(frame->bh))
+ return (struct dx_frame *) frame->bh;
+
+ root = (struct dx_root *) frame->bh->b_data;
if (root->info.hash_version != DX_HASH_TEA &&
root->info.hash_version != DX_HASH_HALF_MD4 &&
root->info.hash_version != DX_HASH_LEGACY) {
ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
root->info.hash_version);
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
goto fail;
}
hinfo->hash_version = root->info.hash_version;
@@ -705,16 +694,12 @@
if (root->info.unused_flags & 1) {
ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
root->info.unused_flags);
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
goto fail;
}
if ((indirect = root->info.indirect_levels) > 1) {
ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
root->info.indirect_levels);
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
goto fail;
}
@@ -724,27 +709,21 @@
if (dx_get_limit(entries) != dx_root_limit(dir,
root->info.info_length)) {
ext4_warning(dir->i_sb, "dx entry: limit != root limit");
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
goto fail;
}
dxtrace(printk("Look up %x", hash));
- while (1)
- {
+ while (1) {
count = dx_get_count(entries);
if (!count || count > dx_get_limit(entries)) {
ext4_warning(dir->i_sb,
"dx entry: no count or count > limit");
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto fail2;
+ goto fail;
}
p = entries + 1;
q = entries + count - 1;
- while (p <= q)
- {
+ while (p <= q) {
m = p + (q - p)/2;
dxtrace(printk("."));
if (dx_get_hash(m) > hash)
@@ -753,8 +732,7 @@
p = m + 1;
}
- if (0) // linear search cross check
- {
+ if (0) { // linear search cross check
unsigned n = count - 1;
at = entries;
while (n--)
@@ -771,38 +749,35 @@
at = p - 1;
dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
- frame->bh = bh;
frame->entries = entries;
frame->at = at;
- if (!indirect--) return frame;
- bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
- if (IS_ERR(bh)) {
- *err = PTR_ERR(bh);
- goto fail2;
+ if (!indirect--)
+ return frame;
+ frame++;
+ frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
+ if (IS_ERR(frame->bh)) {
+ ret_err = (struct dx_frame *) frame->bh;
+ frame->bh = NULL;
+ goto fail;
}
- entries = ((struct dx_node *) bh->b_data)->entries;
+ entries = ((struct dx_node *) frame->bh->b_data)->entries;
if (dx_get_limit(entries) != dx_node_limit (dir)) {
ext4_warning(dir->i_sb,
"dx entry: limit != node limit");
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto fail2;
+ goto fail;
}
- frame++;
- frame->bh = NULL;
}
-fail2:
+fail:
while (frame >= frame_in) {
brelse(frame->bh);
frame--;
}
-fail:
- if (*err == ERR_BAD_DX_DIR)
+ if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
ext4_warning(dir->i_sb,
"Corrupt dir inode %lu, running e2fsck is "
"recommended.", dir->i_ino);
- return NULL;
+ return ret_err;
}
static void dx_release (struct dx_frame *frames)
@@ -988,9 +963,9 @@
}
hinfo.hash = start_hash;
hinfo.minor_hash = 0;
- frame = dx_probe(NULL, dir, &hinfo, frames, &err);
- if (!frame)
- return err;
+ frame = dx_probe(NULL, dir, &hinfo, frames);
+ if (IS_ERR(frame))
+ return PTR_ERR(frame);
/* Add '.' and '..' from the htree header */
if (!start_hash && !start_minor_hash) {
@@ -1227,8 +1202,7 @@
buffer */
int num = 0;
ext4_lblk_t nblocks;
- int i, err = 0;
- int namelen;
+ int i, namelen;
*res_dir = NULL;
sb = dir->i_sb;
@@ -1258,17 +1232,13 @@
goto restart;
}
if (is_dx(dir)) {
- bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
+ bh = ext4_dx_find_entry(dir, d_name, res_dir);
/*
* On success, or if the error was file not found,
* return. Otherwise, fall back to doing a search the
* old fashioned way.
*/
- if (err == -ENOENT)
- return NULL;
- if (err && err != ERR_BAD_DX_DIR)
- return ERR_PTR(err);
- if (bh)
+ if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR)
return bh;
dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
"falling back\n"));
@@ -1298,10 +1268,10 @@
break;
}
num++;
- bh = ext4_getblk(NULL, dir, b++, 0, &err);
- if (unlikely(err)) {
+ bh = ext4_getblk(NULL, dir, b++, 0);
+ if (unlikely(IS_ERR(bh))) {
if (ra_max == 0)
- return ERR_PTR(err);
+ return bh;
break;
}
bh_use[ra_max] = bh;
@@ -1366,7 +1336,7 @@
}
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
- struct ext4_dir_entry_2 **res_dir, int *err)
+ struct ext4_dir_entry_2 **res_dir)
{
struct super_block * sb = dir->i_sb;
struct dx_hash_info hinfo;
@@ -1375,25 +1345,23 @@
ext4_lblk_t block;
int retval;
- if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
- return NULL;
+ frame = dx_probe(d_name, dir, &hinfo, frames);
+ if (IS_ERR(frame))
+ return (struct buffer_head *) frame;
do {
block = dx_get_block(frame->at);
bh = ext4_read_dirblock(dir, block, DIRENT);
- if (IS_ERR(bh)) {
- *err = PTR_ERR(bh);
+ if (IS_ERR(bh))
goto errout;
- }
+
retval = search_dirblock(bh, dir, d_name,
block << EXT4_BLOCK_SIZE_BITS(sb),
res_dir);
- if (retval == 1) { /* Success! */
- dx_release(frames);
- return bh;
- }
+ if (retval == 1)
+ goto success;
brelse(bh);
if (retval == -1) {
- *err = ERR_BAD_DX_DIR;
+ bh = ERR_PTR(ERR_BAD_DX_DIR);
goto errout;
}
@@ -1402,18 +1370,19 @@
frames, NULL);
if (retval < 0) {
ext4_warning(sb,
- "error reading index page in directory #%lu",
- dir->i_ino);
- *err = retval;
+ "error %d reading index page in directory #%lu",
+ retval, dir->i_ino);
+ bh = ERR_PTR(retval);
goto errout;
}
} while (retval == 1);
- *err = -ENOENT;
+ bh = NULL;
errout:
dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
- dx_release (frames);
- return NULL;
+success:
+ dx_release(frames);
+ return bh;
}
static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
@@ -1441,7 +1410,7 @@
dentry);
return ERR_PTR(-EIO);
}
- inode = ext4_iget(dir->i_sb, ino);
+ inode = ext4_iget_normal(dir->i_sb, ino);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
@@ -1474,7 +1443,7 @@
return ERR_PTR(-EIO);
}
- return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
+ return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
}
/*
@@ -1533,7 +1502,7 @@
*/
static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
- struct dx_hash_info *hinfo, int *error)
+ struct dx_hash_info *hinfo)
{
unsigned blocksize = dir->i_sb->s_blocksize;
unsigned count, continued;
@@ -1548,16 +1517,14 @@
int csum_size = 0;
int err = 0, i;
- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
bh2 = ext4_append(handle, dir, &newblock);
if (IS_ERR(bh2)) {
brelse(*bh);
*bh = NULL;
- *error = PTR_ERR(bh2);
- return NULL;
+ return (struct ext4_dir_entry_2 *) bh2;
}
BUFFER_TRACE(*bh, "get_write_access");
@@ -1617,8 +1584,7 @@
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
/* Which block gets the new entry? */
- if (hinfo->hash >= hash2)
- {
+ if (hinfo->hash >= hash2) {
swap(*bh, bh2);
de = de2;
}
@@ -1638,8 +1604,7 @@
brelse(bh2);
*bh = NULL;
ext4_std_error(dir->i_sb, err);
- *error = err;
- return NULL;
+ return ERR_PTR(err);
}
int ext4_find_dest_de(struct inode *dir, struct inode *inode,
@@ -1718,8 +1683,7 @@
int csum_size = 0;
int err;
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
if (!de) {
@@ -1786,8 +1750,7 @@
struct fake_dirent *fde;
int csum_size = 0;
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
blocksize = dir->i_sb->s_blocksize;
@@ -1862,8 +1825,8 @@
ext4_handle_dirty_dx_node(handle, dir, frame->bh);
ext4_handle_dirty_dirent_node(handle, dir, bh);
- de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
- if (!de) {
+ de = do_split(handle,dir, &bh, frame, &hinfo);
+ if (IS_ERR(de)) {
/*
* Even if the block split failed, we have to properly write
* out all the changes we did so far. Otherwise we can end up
@@ -1871,7 +1834,7 @@
*/
ext4_mark_inode_dirty(handle, dir);
dx_release(frames);
- return retval;
+ return PTR_ERR(de);
}
dx_release(frames);
@@ -1904,8 +1867,7 @@
ext4_lblk_t block, blocks;
int csum_size = 0;
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
sb = dir->i_sb;
@@ -1982,9 +1944,9 @@
struct ext4_dir_entry_2 *de;
int err;
- frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
- if (!frame)
- return err;
+ frame = dx_probe(&dentry->d_name, dir, &hinfo, frames);
+ if (IS_ERR(frame))
+ return PTR_ERR(frame);
entries = frame->entries;
at = frame->at;
bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
@@ -2095,9 +2057,11 @@
goto cleanup;
}
}
- de = do_split(handle, dir, &bh, frame, &hinfo, &err);
- if (!de)
+ de = do_split(handle, dir, &bh, frame, &hinfo);
+ if (IS_ERR(de)) {
+ err = PTR_ERR(de);
goto cleanup;
+ }
err = add_dirent_to_buf(handle, dentry, inode, de, bh);
goto cleanup;
@@ -2167,8 +2131,7 @@
return err;
}
- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
BUFFER_TRACE(bh, "get_write_access");
@@ -2387,8 +2350,7 @@
int csum_size = 0;
int err;
- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -2403,10 +2365,6 @@
dir_block = ext4_append(handle, inode, &block);
if (IS_ERR(dir_block))
return PTR_ERR(dir_block);
- BUFFER_TRACE(dir_block, "get_write_access");
- err = ext4_journal_get_write_access(handle, dir_block);
- if (err)
- goto out;
de = (struct ext4_dir_entry_2 *)dir_block->b_data;
ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
set_nlink(inode, 2);
@@ -2573,7 +2531,7 @@
int err = 0, rc;
bool dirty = false;
- if (!sbi->s_journal)
+ if (!sbi->s_journal || is_bad_inode(inode))
return 0;
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
@@ -3190,6 +3148,39 @@
}
}
+static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
+ int credits, handle_t **h)
+{
+ struct inode *wh;
+ handle_t *handle;
+ int retries = 0;
+
+ /*
+ * for inode block, sb block, group summaries,
+ * and inode bitmap
+ */
+ credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
+ EXT4_XATTR_TRANS_BLOCKS + 4);
+retry:
+ wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE,
+ &ent->dentry->d_name, 0, NULL,
+ EXT4_HT_DIR, credits);
+
+ handle = ext4_journal_current_handle();
+ if (IS_ERR(wh)) {
+ if (handle)
+ ext4_journal_stop(handle);
+ if (PTR_ERR(wh) == -ENOSPC &&
+ ext4_should_retry_alloc(ent->dir->i_sb, &retries))
+ goto retry;
+ } else {
+ *h = handle;
+ init_special_inode(wh, wh->i_mode, WHITEOUT_DEV);
+ wh->i_op = &ext4_special_inode_operations;
+ }
+ return wh;
+}
+
/*
* Anybody can rename anything with this: the permission checks are left to the
* higher-level routines.
@@ -3199,7 +3190,8 @@
* This comes from rename(const char *oldpath, const char *newpath)
*/
static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
handle_t *handle = NULL;
struct ext4_renament old = {
@@ -3214,6 +3206,9 @@
};
int force_reread;
int retval;
+ struct inode *whiteout = NULL;
+ int credits;
+ u8 old_file_type;
dquot_initialize(old.dir);
dquot_initialize(new.dir);
@@ -3252,11 +3247,17 @@
if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
ext4_alloc_da_blocks(old.inode);
- handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
- (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
+ if (!(flags & RENAME_WHITEOUT)) {
+ handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ } else {
+ whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
+ if (IS_ERR(whiteout))
+ return PTR_ERR(whiteout);
+ }
if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
ext4_handle_sync(handle);
@@ -3284,13 +3285,26 @@
*/
force_reread = (new.dir->i_ino == old.dir->i_ino &&
ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
+
+ old_file_type = old.de->file_type;
+ if (whiteout) {
+ /*
+ * Do this before adding a new entry, so the old entry is sure
+ * to be still pointing to the valid old entry.
+ */
+ retval = ext4_setent(handle, &old, whiteout->i_ino,
+ EXT4_FT_CHRDEV);
+ if (retval)
+ goto end_rename;
+ ext4_mark_inode_dirty(handle, whiteout);
+ }
if (!new.bh) {
retval = ext4_add_entry(handle, new.dentry, old.inode);
if (retval)
goto end_rename;
} else {
retval = ext4_setent(handle, &new,
- old.inode->i_ino, old.de->file_type);
+ old.inode->i_ino, old_file_type);
if (retval)
goto end_rename;
}
@@ -3305,10 +3319,12 @@
old.inode->i_ctime = ext4_current_time(old.inode);
ext4_mark_inode_dirty(handle, old.inode);
- /*
- * ok, that's it
- */
- ext4_rename_delete(handle, &old, force_reread);
+ if (!whiteout) {
+ /*
+ * ok, that's it
+ */
+ ext4_rename_delete(handle, &old, force_reread);
+ }
if (new.inode) {
ext4_dec_count(handle, new.inode);
@@ -3344,6 +3360,12 @@
brelse(old.dir_bh);
brelse(old.bh);
brelse(new.bh);
+ if (whiteout) {
+ if (retval)
+ drop_nlink(whiteout);
+ unlock_new_inode(whiteout);
+ iput(whiteout);
+ }
if (handle)
ext4_journal_stop(handle);
return retval;
@@ -3476,18 +3498,15 @@
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
if (flags & RENAME_EXCHANGE) {
return ext4_cross_rename(old_dir, old_dentry,
new_dir, new_dentry);
}
- /*
- * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE"
- * is equivalent to regular rename.
- */
- return ext4_rename(old_dir, old_dentry, new_dir, new_dentry);
+
+ return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
/*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 1e43b90..f298c60 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1212,8 +1212,7 @@
{
struct buffer_head *bh;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 0;
bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 05c1592..1eda6ab 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -70,7 +70,6 @@
static void ext4_clear_journal_err(struct super_block *sb,
struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
-static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
@@ -141,8 +140,7 @@
static int ext4_superblock_csum_verify(struct super_block *sb,
struct ext4_super_block *es)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 1;
return es->s_checksum == ext4_superblock_csum(sb, es);
@@ -152,8 +150,7 @@
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return;
es->s_checksum = ext4_superblock_csum(sb, es);
@@ -820,10 +817,9 @@
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
- percpu_counter_destroy(&sbi->s_extent_cache_cnt);
brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
- for (i = 0; i < MAXQUOTAS; i++)
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(sbi->s_qf_names[i]);
#endif
@@ -885,6 +881,7 @@
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
INIT_LIST_HEAD(&ei->i_es_lru);
+ ei->i_es_all_nr = 0;
ei->i_es_lru_nr = 0;
ei->i_touch_when = 0;
ei->i_reserved_data_blocks = 0;
@@ -1002,7 +999,7 @@
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
- inode = ext4_iget(sb, ino);
+ inode = ext4_iget_normal(sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
@@ -1124,25 +1121,6 @@
.bdev_try_to_free_page = bdev_try_to_free_page,
};
-static const struct super_operations ext4_nojournal_sops = {
- .alloc_inode = ext4_alloc_inode,
- .destroy_inode = ext4_destroy_inode,
- .write_inode = ext4_write_inode,
- .dirty_inode = ext4_dirty_inode,
- .drop_inode = ext4_drop_inode,
- .evict_inode = ext4_evict_inode,
- .sync_fs = ext4_sync_fs_nojournal,
- .put_super = ext4_put_super,
- .statfs = ext4_statfs,
- .remount_fs = ext4_remount,
- .show_options = ext4_show_options,
-#ifdef CONFIG_QUOTA
- .quota_read = ext4_quota_read,
- .quota_write = ext4_quota_write,
-#endif
- .bdev_try_to_free_page = bdev_try_to_free_page,
-};
-
static const struct export_operations ext4_export_ops = {
.fh_to_dentry = ext4_fh_to_dentry,
.fh_to_parent = ext4_fh_to_parent,
@@ -1712,13 +1690,6 @@
"not specified");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext4_msg(sb, KERN_ERR, "journaled quota format "
- "specified with no journaling "
- "enabled");
- return 0;
- }
}
#endif
if (test_opt(sb, DIOREAD_NOLOCK)) {
@@ -2016,8 +1987,7 @@
__u16 crc = 0;
__le32 le_group = cpu_to_le32(block_group);
- if ((sbi->s_es->s_feature_ro_compat &
- cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
+ if (ext4_has_metadata_csum(sbi->s_sb)) {
/* Use new metadata_csum algorithm */
__le16 save_csum;
__u32 csum32;
@@ -2035,6 +2005,10 @@
}
/* old crc16 code */
+ if (!(sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
+ return 0;
+
offset = offsetof(struct ext4_group_desc, bg_checksum);
crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
@@ -2191,7 +2165,7 @@
if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
/* don't clear list on RO mount w/ errors */
if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
- jbd_debug(1, "Errors on filesystem, "
+ ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
"clearing orphan list.\n");
es->s_last_orphan = 0;
}
@@ -2207,7 +2181,7 @@
/* Needed for iput() to work correctly and not trash data */
sb->s_flags |= MS_ACTIVE;
/* Turn on quotas so that they are updated correctly */
- for (i = 0; i < MAXQUOTAS; i++) {
+ for (i = 0; i < EXT4_MAXQUOTAS; i++) {
if (EXT4_SB(sb)->s_qf_names[i]) {
int ret = ext4_quota_on_mount(sb, i);
if (ret < 0)
@@ -2263,7 +2237,7 @@
PLURAL(nr_truncates));
#ifdef CONFIG_QUOTA
/* Turn quotas off */
- for (i = 0; i < MAXQUOTAS; i++) {
+ for (i = 0; i < EXT4_MAXQUOTAS; i++) {
if (sb_dqopt(sb)->files[i])
dquot_quota_off(sb, i);
}
@@ -2548,6 +2522,16 @@
return count;
}
+static ssize_t es_ui_show(struct ext4_attr *a,
+ struct ext4_sb_info *sbi, char *buf)
+{
+
+ unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) +
+ a->u.offset);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+
static ssize_t reserved_clusters_show(struct ext4_attr *a,
struct ext4_sb_info *sbi, char *buf)
{
@@ -2601,14 +2585,29 @@
.offset = offsetof(struct ext4_sb_info, _elname),\
}, \
}
+
+#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \
+static struct ext4_attr ext4_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ .u = { \
+ .offset = offsetof(struct ext4_super_block, _elname), \
+ }, \
+}
+
#define EXT4_ATTR(name, mode, show, store) \
static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
+
+#define EXT4_RO_ATTR_ES_UI(name, elname) \
+ EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
#define EXT4_RW_ATTR_SBI_UI(name, elname) \
EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
+
#define ATTR_LIST(name) &ext4_attr_##name.attr
#define EXT4_DEPRECATED_ATTR(_name, _val) \
static struct ext4_attr ext4_attr_##_name = { \
@@ -2641,6 +2640,9 @@
EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
+EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
+EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
+EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
static struct attribute *ext4_attrs[] = {
ATTR_LIST(delayed_allocation_blocks),
@@ -2664,6 +2666,9 @@
ATTR_LIST(warning_ratelimit_burst),
ATTR_LIST(msg_ratelimit_interval_ms),
ATTR_LIST(msg_ratelimit_burst),
+ ATTR_LIST(errors_count),
+ ATTR_LIST(first_error_time),
+ ATTR_LIST(last_error_time),
NULL,
};
@@ -2723,9 +2728,25 @@
complete(&ext4_feat->f_kobj_unregister);
}
+static ssize_t ext4_feat_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "supported\n");
+}
+
+/*
+ * We can not use ext4_attr_show/store because it relies on the kobject
+ * being embedded in the ext4_sb_info structure which is definitely not
+ * true in this case.
+ */
+static const struct sysfs_ops ext4_feat_ops = {
+ .show = ext4_feat_show,
+ .store = NULL,
+};
+
static struct kobj_type ext4_feat_ktype = {
.default_attrs = ext4_feat_attrs,
- .sysfs_ops = &ext4_attr_ops,
+ .sysfs_ops = &ext4_feat_ops,
.release = ext4_feat_release,
};
@@ -3179,8 +3200,7 @@
int compat, incompat;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+ if (ext4_has_metadata_csum(sb)) {
/* journal checksum v3 */
compat = 0;
incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
@@ -3190,6 +3210,10 @@
incompat = 0;
}
+ jbd2_journal_clear_features(sbi->s_journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM, 0,
+ JBD2_FEATURE_INCOMPAT_CSUM_V3 |
+ JBD2_FEATURE_INCOMPAT_CSUM_V2);
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ret = jbd2_journal_set_features(sbi->s_journal,
compat, 0,
@@ -3202,11 +3226,8 @@
jbd2_journal_clear_features(sbi->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else {
- jbd2_journal_clear_features(sbi->s_journal,
- JBD2_FEATURE_COMPAT_CHECKSUM, 0,
- JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
- JBD2_FEATURE_INCOMPAT_CSUM_V3 |
- JBD2_FEATURE_INCOMPAT_CSUM_V2);
+ jbd2_journal_clear_features(sbi->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
}
return ret;
@@ -3436,7 +3457,7 @@
logical_sb_block = sb_block;
}
- if (!(bh = sb_bread(sb, logical_sb_block))) {
+ if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
ext4_msg(sb, KERN_ERR, "unable to read superblock");
goto out_fail;
}
@@ -3487,8 +3508,7 @@
}
/* Precompute checksum seed for all metadata */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(sb))
sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
sizeof(es->s_uuid));
@@ -3519,8 +3539,8 @@
set_opt(sb, ERRORS_CONT);
else
set_opt(sb, ERRORS_RO);
- if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
- set_opt(sb, BLOCK_VALIDITY);
+ /* block_validity enabled by default; disable with noblock_validity */
+ set_opt(sb, BLOCK_VALIDITY);
if (def_mount_opts & EXT4_DEFM_DISCARD)
set_opt(sb, DISCARD);
@@ -3646,7 +3666,7 @@
brelse(bh);
logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
offset = do_div(logical_sb_block, blocksize);
- bh = sb_bread(sb, logical_sb_block);
+ bh = sb_bread_unmovable(sb, logical_sb_block);
if (!bh) {
ext4_msg(sb, KERN_ERR,
"Can't read superblock on 2nd try");
@@ -3868,7 +3888,7 @@
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logical_sb_block, i);
- sbi->s_group_desc[i] = sb_bread(sb, block);
+ sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
if (!sbi->s_group_desc[i]) {
ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i);
@@ -3890,13 +3910,8 @@
sbi->s_err_report.data = (unsigned long) sb;
/* Register extent status tree shrinker */
- ext4_es_register_shrinker(sbi);
-
- err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
- if (err) {
- ext4_msg(sb, KERN_ERR, "insufficient memory");
+ if (ext4_es_register_shrinker(sbi))
goto failed_mount3;
- }
sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_extent_max_zeroout_kb = 32;
@@ -3904,11 +3919,7 @@
/*
* set up enough so that it can read an inode
*/
- if (!test_opt(sb, NOLOAD) &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
- sb->s_op = &ext4_sops;
- else
- sb->s_op = &ext4_nojournal_sops;
+ sb->s_op = &ext4_sops;
sb->s_export_op = &ext4_export_ops;
sb->s_xattr = ext4_xattr_handlers;
#ifdef CONFIG_QUOTA
@@ -4229,10 +4240,9 @@
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
}
-failed_mount3:
ext4_es_unregister_shrinker(sbi);
+failed_mount3:
del_timer_sync(&sbi->s_err_report);
- percpu_counter_destroy(&sbi->s_extent_cache_cnt);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
@@ -4247,7 +4257,7 @@
remove_proc_entry(sb->s_id, ext4_proc_root);
}
#ifdef CONFIG_QUOTA
- for (i = 0; i < MAXQUOTAS; i++)
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(sbi->s_qf_names[i]);
#endif
ext4_blkdev_remove(sbi);
@@ -4375,6 +4385,15 @@
goto out_bdev;
}
+ if ((le32_to_cpu(es->s_feature_ro_compat) &
+ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+ es->s_checksum != ext4_superblock_csum(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "external journal has "
+ "corrupt superblock");
+ brelse(bh);
+ goto out_bdev;
+ }
+
if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
ext4_msg(sb, KERN_ERR, "journal UUID does not match");
brelse(bh);
@@ -4677,15 +4696,19 @@
* being sent at the end of the function. But we can skip it if
* transaction_commit will do it for us.
*/
- target = jbd2_get_latest_transaction(sbi->s_journal);
- if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
- !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
- needs_barrier = true;
+ if (sbi->s_journal) {
+ target = jbd2_get_latest_transaction(sbi->s_journal);
+ if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
+ !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
+ needs_barrier = true;
- if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
- if (wait)
- ret = jbd2_log_wait_commit(sbi->s_journal, target);
- }
+ if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
+ if (wait)
+ ret = jbd2_log_wait_commit(sbi->s_journal,
+ target);
+ }
+ } else if (wait && test_opt(sb, BARRIER))
+ needs_barrier = true;
if (needs_barrier) {
int err;
err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
@@ -4696,19 +4719,6 @@
return ret;
}
-static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
-{
- int ret = 0;
-
- trace_ext4_sync_fs(sb, wait);
- flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
- dquot_writeback_dquots(sb, -1);
- if (wait && test_opt(sb, BARRIER))
- ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
-
- return ret;
-}
-
/*
* LVM calls this function before a (read-only) snapshot is created. This
* gives us a chance to flush the journal completely and mark the fs clean.
@@ -4727,23 +4737,26 @@
journal = EXT4_SB(sb)->s_journal;
- /* Now we set up the journal barrier. */
- jbd2_journal_lock_updates(journal);
+ if (journal) {
+ /* Now we set up the journal barrier. */
+ jbd2_journal_lock_updates(journal);
- /*
- * Don't clear the needs_recovery flag if we failed to flush
- * the journal.
- */
- error = jbd2_journal_flush(journal);
- if (error < 0)
- goto out;
+ /*
+ * Don't clear the needs_recovery flag if we failed to
+ * flush the journal.
+ */
+ error = jbd2_journal_flush(journal);
+ if (error < 0)
+ goto out;
+ }
/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
error = ext4_commit_super(sb, 1);
out:
- /* we rely on upper layer to stop further updates */
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ if (journal)
+ /* we rely on upper layer to stop further updates */
+ jbd2_journal_unlock_updates(journal);
return error;
}
@@ -4774,7 +4787,7 @@
u32 s_min_batch_time, s_max_batch_time;
#ifdef CONFIG_QUOTA
int s_jquota_fmt;
- char *s_qf_names[MAXQUOTAS];
+ char *s_qf_names[EXT4_MAXQUOTAS];
#endif
};
@@ -4804,7 +4817,7 @@
old_opts.s_max_batch_time = sbi->s_max_batch_time;
#ifdef CONFIG_QUOTA
old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
- for (i = 0; i < MAXQUOTAS; i++)
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
if (sbi->s_qf_names[i]) {
old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
GFP_KERNEL);
@@ -4965,7 +4978,7 @@
#ifdef CONFIG_QUOTA
/* Release old quota file names */
- for (i = 0; i < MAXQUOTAS; i++)
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(old_opts.s_qf_names[i]);
if (enable_quota) {
if (sb_any_quota_suspended(sb))
@@ -4994,7 +5007,7 @@
sbi->s_max_batch_time = old_opts.s_max_batch_time;
#ifdef CONFIG_QUOTA
sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
- for (i = 0; i < MAXQUOTAS; i++) {
+ for (i = 0; i < EXT4_MAXQUOTAS; i++) {
kfree(sbi->s_qf_names[i]);
sbi->s_qf_names[i] = old_opts.s_qf_names[i];
}
@@ -5197,7 +5210,7 @@
{
int err;
struct inode *qf_inode;
- unsigned long qf_inums[MAXQUOTAS] = {
+ unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
};
@@ -5225,13 +5238,13 @@
static int ext4_enable_quotas(struct super_block *sb)
{
int type, err = 0;
- unsigned long qf_inums[MAXQUOTAS] = {
+ unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
};
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
- for (type = 0; type < MAXQUOTAS; type++) {
+ for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
DQUOT_USAGE_ENABLED);
@@ -5309,7 +5322,6 @@
{
struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
- int err = 0;
int offset = off & (sb->s_blocksize - 1);
int tocopy;
size_t toread;
@@ -5324,9 +5336,9 @@
while (toread > 0) {
tocopy = sb->s_blocksize - offset < toread ?
sb->s_blocksize - offset : toread;
- bh = ext4_bread(NULL, inode, blk, 0, &err);
- if (err)
- return err;
+ bh = ext4_bread(NULL, inode, blk, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (!bh) /* A hole? */
memset(data, 0, tocopy);
else
@@ -5347,8 +5359,7 @@
{
struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
- int err = 0;
- int offset = off & (sb->s_blocksize - 1);
+ int err, offset = off & (sb->s_blocksize - 1);
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
@@ -5369,14 +5380,16 @@
return -EIO;
}
- bh = ext4_bread(handle, inode, blk, 1, &err);
+ bh = ext4_bread(handle, inode, blk, 1);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (!bh)
goto out;
BUFFER_TRACE(bh, "get write access");
err = ext4_journal_get_write_access(handle, bh);
if (err) {
brelse(bh);
- goto out;
+ return err;
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, len);
@@ -5385,8 +5398,6 @@
err = ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
out:
- if (err)
- return err;
if (inode->i_size < off + len) {
i_size_write(inode, off + len);
EXT4_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e738733..1e09fc7 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -142,8 +142,7 @@
sector_t block_nr,
struct ext4_xattr_header *hdr)
{
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+ if (ext4_has_metadata_csum(inode->i_sb) &&
(hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
return 0;
return 1;
@@ -153,8 +152,7 @@
sector_t block_nr,
struct ext4_xattr_header *hdr)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return;
hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
@@ -190,14 +188,28 @@
}
static int
-ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
+ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
+ void *value_start)
{
- while (!IS_LAST_ENTRY(entry)) {
- struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
+ struct ext4_xattr_entry *e = entry;
+
+ while (!IS_LAST_ENTRY(e)) {
+ struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
if ((void *)next >= end)
return -EIO;
- entry = next;
+ e = next;
}
+
+ while (!IS_LAST_ENTRY(entry)) {
+ if (entry->e_value_size != 0 &&
+ (value_start + le16_to_cpu(entry->e_value_offs) <
+ (void *)e + sizeof(__u32) ||
+ value_start + le16_to_cpu(entry->e_value_offs) +
+ le32_to_cpu(entry->e_value_size) > end))
+ return -EIO;
+ entry = EXT4_XATTR_NEXT(entry);
+ }
+
return 0;
}
@@ -214,7 +226,8 @@
return -EIO;
if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
return -EIO;
- error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+ error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
+ bh->b_data);
if (!error)
set_buffer_verified(bh);
return error;
@@ -331,7 +344,7 @@
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(entry, end);
+ error = ext4_xattr_check_names(entry, end, entry);
if (error)
goto cleanup;
error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -463,7 +476,7 @@
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(IFIRST(header), end);
+ error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
if (error)
goto cleanup;
error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -899,14 +912,8 @@
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
- /*
- * take i_data_sem because we will test
- * i_delalloc_reserved_flag in ext4_mb_new_blocks
- */
- down_read(&EXT4_I(inode)->i_data_sem);
block = ext4_new_meta_blocks(handle, inode, goal, 0,
NULL, &error);
- up_read((&EXT4_I(inode)->i_data_sem));
if (error)
goto cleanup;
@@ -986,7 +993,8 @@
is->s.here = is->s.first;
is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
- error = ext4_xattr_check_names(IFIRST(header), is->s.end);
+ error = ext4_xattr_check_names(IFIRST(header), is->s.end,
+ IFIRST(header));
if (error)
return error;
/* Find the named attribute. */
diff --git a/fs/internal.h b/fs/internal.h
index 9477f8f..757ba2ab 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -47,7 +47,6 @@
/*
* namei.c
*/
-extern int __inode_permission(struct inode *, int);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
@@ -139,12 +138,6 @@
extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
/*
- * splice.c
- */
-extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
- loff_t *opos, size_t len, unsigned int flags);
-
-/*
* pipe.c
*/
extern const struct file_operations pipefifo_fops;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06fe11e..aab8549 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -886,7 +886,7 @@
goto out_err;
}
- bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+ bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
if (!bh) {
printk(KERN_ERR
"%s: Cannot get buffer for journal superblock\n",
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 7f34f47..988b32e 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -96,15 +96,8 @@
if (jh->b_transaction == NULL && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
- /*
- * Get our reference so that bh cannot be freed before
- * we unlock it
- */
- get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
- BUFFER_TRACE(bh, "release");
- __brelse(bh);
}
return ret;
}
@@ -122,8 +115,6 @@
nblocks = jbd2_space_needed(journal);
while (jbd2_log_space_left(journal) < nblocks) {
- if (journal->j_flags & JBD2_ABORT)
- return;
write_unlock(&journal->j_state_lock);
mutex_lock(&journal->j_checkpoint_mutex);
@@ -139,6 +130,10 @@
* trace for forensic evidence.
*/
write_lock(&journal->j_state_lock);
+ if (journal->j_flags & JBD2_ABORT) {
+ mutex_unlock(&journal->j_checkpoint_mutex);
+ return;
+ }
spin_lock(&journal->j_list_lock);
nblocks = jbd2_space_needed(journal);
space_left = jbd2_log_space_left(journal);
@@ -183,58 +178,6 @@
}
}
-/*
- * Clean up transaction's list of buffers submitted for io.
- * We wait for any pending IO to complete and remove any clean
- * buffers. Note that we take the buffers in the opposite ordering
- * from the one in which they were submitted for IO.
- *
- * Return 0 on success, and return <0 if some buffers have failed
- * to be written out.
- *
- * Called with j_list_lock held.
- */
-static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
-{
- struct journal_head *jh;
- struct buffer_head *bh;
- tid_t this_tid;
- int released = 0;
- int ret = 0;
-
- this_tid = transaction->t_tid;
-restart:
- /* Did somebody clean up the transaction in the meanwhile? */
- if (journal->j_checkpoint_transactions != transaction ||
- transaction->t_tid != this_tid)
- return ret;
- while (!released && transaction->t_checkpoint_io_list) {
- jh = transaction->t_checkpoint_io_list;
- bh = jh2bh(jh);
- get_bh(bh);
- if (buffer_locked(bh)) {
- spin_unlock(&journal->j_list_lock);
- wait_on_buffer(bh);
- /* the journal_head may have gone by now */
- BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
- spin_lock(&journal->j_list_lock);
- goto restart;
- }
- if (unlikely(buffer_write_io_error(bh)))
- ret = -EIO;
-
- /*
- * Now in whatever state the buffer currently is, we know that
- * it has been written out and so we can drop it from the list
- */
- released = __jbd2_journal_remove_checkpoint(jh);
- __brelse(bh);
- }
-
- return ret;
-}
-
static void
__flush_batch(journal_t *journal, int *batch_count)
{
@@ -255,81 +198,6 @@
}
/*
- * Try to flush one buffer from the checkpoint list to disk.
- *
- * Return 1 if something happened which requires us to abort the current
- * scan of the checkpoint list. Return <0 if the buffer has failed to
- * be written out.
- *
- * Called with j_list_lock held and drops it if 1 is returned
- */
-static int __process_buffer(journal_t *journal, struct journal_head *jh,
- int *batch_count, transaction_t *transaction)
-{
- struct buffer_head *bh = jh2bh(jh);
- int ret = 0;
-
- if (buffer_locked(bh)) {
- get_bh(bh);
- spin_unlock(&journal->j_list_lock);
- wait_on_buffer(bh);
- /* the journal_head may have gone by now */
- BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
- ret = 1;
- } else if (jh->b_transaction != NULL) {
- transaction_t *t = jh->b_transaction;
- tid_t tid = t->t_tid;
-
- transaction->t_chp_stats.cs_forced_to_close++;
- spin_unlock(&journal->j_list_lock);
- if (unlikely(journal->j_flags & JBD2_UNMOUNT))
- /*
- * The journal thread is dead; so starting and
- * waiting for a commit to finish will cause
- * us to wait for a _very_ long time.
- */
- printk(KERN_ERR "JBD2: %s: "
- "Waiting for Godot: block %llu\n",
- journal->j_devname,
- (unsigned long long) bh->b_blocknr);
- jbd2_log_start_commit(journal, tid);
- jbd2_log_wait_commit(journal, tid);
- ret = 1;
- } else if (!buffer_dirty(bh)) {
- ret = 1;
- if (unlikely(buffer_write_io_error(bh)))
- ret = -EIO;
- get_bh(bh);
- BUFFER_TRACE(bh, "remove from checkpoint");
- __jbd2_journal_remove_checkpoint(jh);
- spin_unlock(&journal->j_list_lock);
- __brelse(bh);
- } else {
- /*
- * Important: we are about to write the buffer, and
- * possibly block, while still holding the journal lock.
- * We cannot afford to let the transaction logic start
- * messing around with this buffer before we write it to
- * disk, as that would break recoverability.
- */
- BUFFER_TRACE(bh, "queue");
- get_bh(bh);
- J_ASSERT_BH(bh, !buffer_jwrite(bh));
- journal->j_chkpt_bhs[*batch_count] = bh;
- __buffer_relink_io(jh);
- transaction->t_chp_stats.cs_written++;
- (*batch_count)++;
- if (*batch_count == JBD2_NR_BATCH) {
- spin_unlock(&journal->j_list_lock);
- __flush_batch(journal, batch_count);
- ret = 1;
- }
- }
- return ret;
-}
-
-/*
* Perform an actual checkpoint. We take the first transaction on the
* list of transactions to be checkpointed and send all its buffers
* to disk. We submit larger chunks of data at once.
@@ -339,9 +207,11 @@
*/
int jbd2_log_do_checkpoint(journal_t *journal)
{
- transaction_t *transaction;
- tid_t this_tid;
- int result;
+ struct journal_head *jh;
+ struct buffer_head *bh;
+ transaction_t *transaction;
+ tid_t this_tid;
+ int result, batch_count = 0;
jbd_debug(1, "Start checkpoint\n");
@@ -374,45 +244,117 @@
* done (maybe it's a new transaction, but it fell at the same
* address).
*/
- if (journal->j_checkpoint_transactions == transaction &&
- transaction->t_tid == this_tid) {
- int batch_count = 0;
- struct journal_head *jh;
- int retry = 0, err;
+ if (journal->j_checkpoint_transactions != transaction ||
+ transaction->t_tid != this_tid)
+ goto out;
- while (!retry && transaction->t_checkpoint_list) {
- jh = transaction->t_checkpoint_list;
- retry = __process_buffer(journal, jh, &batch_count,
- transaction);
- if (retry < 0 && !result)
- result = retry;
- if (!retry && (need_resched() ||
- spin_needbreak(&journal->j_list_lock))) {
- spin_unlock(&journal->j_list_lock);
- retry = 1;
- break;
- }
+ /* checkpoint all of the transaction's buffers */
+ while (transaction->t_checkpoint_list) {
+ jh = transaction->t_checkpoint_list;
+ bh = jh2bh(jh);
+
+ if (buffer_locked(bh)) {
+ spin_unlock(&journal->j_list_lock);
+ get_bh(bh);
+ wait_on_buffer(bh);
+ /* the journal_head may have gone by now */
+ BUFFER_TRACE(bh, "brelse");
+ __brelse(bh);
+ goto retry;
}
+ if (jh->b_transaction != NULL) {
+ transaction_t *t = jh->b_transaction;
+ tid_t tid = t->t_tid;
- if (batch_count) {
- if (!retry) {
- spin_unlock(&journal->j_list_lock);
- retry = 1;
- }
- __flush_batch(journal, &batch_count);
+ transaction->t_chp_stats.cs_forced_to_close++;
+ spin_unlock(&journal->j_list_lock);
+ if (unlikely(journal->j_flags & JBD2_UNMOUNT))
+ /*
+ * The journal thread is dead; so
+ * starting and waiting for a commit
+ * to finish will cause us to wait for
+ * a _very_ long time.
+ */
+ printk(KERN_ERR
+ "JBD2: %s: Waiting for Godot: block %llu\n",
+ journal->j_devname, (unsigned long long) bh->b_blocknr);
+
+ jbd2_log_start_commit(journal, tid);
+ jbd2_log_wait_commit(journal, tid);
+ goto retry;
}
-
- if (retry) {
- spin_lock(&journal->j_list_lock);
- goto restart;
+ if (!buffer_dirty(bh)) {
+ if (unlikely(buffer_write_io_error(bh)) && !result)
+ result = -EIO;
+ BUFFER_TRACE(bh, "remove from checkpoint");
+ if (__jbd2_journal_remove_checkpoint(jh))
+ /* The transaction was released; we're done */
+ goto out;
+ continue;
}
/*
- * Now we have cleaned up the first transaction's checkpoint
- * list. Let's clean up the second one
+ * Important: we are about to write the buffer, and
+ * possibly block, while still holding the journal
+ * lock. We cannot afford to let the transaction
+ * logic start messing around with this buffer before
+ * we write it to disk, as that would break
+ * recoverability.
*/
- err = __wait_cp_io(journal, transaction);
- if (!result)
- result = err;
+ BUFFER_TRACE(bh, "queue");
+ get_bh(bh);
+ J_ASSERT_BH(bh, !buffer_jwrite(bh));
+ journal->j_chkpt_bhs[batch_count++] = bh;
+ __buffer_relink_io(jh);
+ transaction->t_chp_stats.cs_written++;
+ if ((batch_count == JBD2_NR_BATCH) ||
+ need_resched() ||
+ spin_needbreak(&journal->j_list_lock))
+ goto unlock_and_flush;
+ }
+
+ if (batch_count) {
+ unlock_and_flush:
+ spin_unlock(&journal->j_list_lock);
+ retry:
+ if (batch_count)
+ __flush_batch(journal, &batch_count);
+ spin_lock(&journal->j_list_lock);
+ goto restart;
+ }
+
+ /*
+ * Now we issued all of the transaction's buffers, let's deal
+ * with the buffers that are out for I/O.
+ */
+restart2:
+ /* Did somebody clean up the transaction in the meanwhile? */
+ if (journal->j_checkpoint_transactions != transaction ||
+ transaction->t_tid != this_tid)
+ goto out;
+
+ while (transaction->t_checkpoint_io_list) {
+ jh = transaction->t_checkpoint_io_list;
+ bh = jh2bh(jh);
+ if (buffer_locked(bh)) {
+ spin_unlock(&journal->j_list_lock);
+ get_bh(bh);
+ wait_on_buffer(bh);
+ /* the journal_head may have gone by now */
+ BUFFER_TRACE(bh, "brelse");
+ __brelse(bh);
+ spin_lock(&journal->j_list_lock);
+ goto restart2;
+ }
+ if (unlikely(buffer_write_io_error(bh)) && !result)
+ result = -EIO;
+
+ /*
+ * Now in whatever state the buffer currently is, we
+ * know that it has been written out and so we can
+ * drop it from the list
+ */
+ if (__jbd2_journal_remove_checkpoint(jh))
+ break;
}
out:
spin_unlock(&journal->j_list_lock);
@@ -478,18 +420,16 @@
* Find all the written-back checkpoint buffers in the given list and
* release them.
*
- * Called with the journal locked.
* Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
+ * Returns 1 if we freed the transaction, 0 otherwise.
*/
-
-static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+static int journal_clean_one_cp_list(struct journal_head *jh)
{
struct journal_head *last_jh;
struct journal_head *next_jh = jh;
- int ret, freed = 0;
+ int ret;
+ int freed = 0;
- *released = 0;
if (!jh)
return 0;
@@ -498,13 +438,11 @@
jh = next_jh;
next_jh = jh->b_cpnext;
ret = __try_to_free_cp_buf(jh);
- if (ret) {
- freed++;
- if (ret == 2) {
- *released = 1;
- return freed;
- }
- }
+ if (!ret)
+ return freed;
+ if (ret == 2)
+ return 1;
+ freed = 1;
/*
* This function only frees up some memory
* if possible so we dont have an obligation
@@ -523,49 +461,49 @@
*
* Find all the written-back checkpoint buffers in the journal and release them.
*
- * Called with the journal locked.
* Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
*/
-
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
{
transaction_t *transaction, *last_transaction, *next_transaction;
- int ret = 0;
- int released;
+ int ret;
transaction = journal->j_checkpoint_transactions;
if (!transaction)
- goto out;
+ return;
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do {
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
- ret += journal_clean_one_cp_list(transaction->
- t_checkpoint_list, &released);
+ ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
/*
* This function only frees up some memory if possible so we
* dont have an obligation to finish processing. Bail out if
* preemption requested:
*/
if (need_resched())
- goto out;
- if (released)
+ return;
+ if (ret)
continue;
/*
* It is essential that we are as careful as in the case of
* t_checkpoint_list with removing the buffer from the list as
* we can possibly see not yet submitted buffers on io_list
*/
- ret += journal_clean_one_cp_list(transaction->
- t_checkpoint_io_list, &released);
+ ret = journal_clean_one_cp_list(transaction->
+ t_checkpoint_io_list);
if (need_resched())
- goto out;
+ return;
+ /*
+ * Stop scanning if we couldn't free the transaction. This
+ * avoids pointless scanning of transactions which still
+ * weren't checkpointed.
+ */
+ if (!ret)
+ return;
} while (transaction != last_transaction);
-out:
- return ret;
}
/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 19d74d8..e4dc747 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1237,7 +1237,7 @@
goto out_err;
}
- bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+ bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
if (!bh) {
printk(KERN_ERR
"%s: Cannot get buffer for journal superblock\n",
@@ -1522,14 +1522,6 @@
goto out;
}
- if (jbd2_journal_has_csum_v2or3(journal) &&
- JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
- /* Can't have checksum v1 and v2 on at the same time! */
- printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
- "at the same time!\n");
- goto out;
- }
-
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
/* Can't have checksum v2 and v3 at the same time! */
@@ -1538,6 +1530,14 @@
goto out;
}
+ if (jbd2_journal_has_csum_v2or3(journal) &&
+ JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
+ /* Can't have checksum v1 and v2 on at the same time! */
+ printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
+ "at the same time!\n");
+ goto out;
+ }
+
if (!jbd2_verify_csum_type(journal, sb)) {
printk(KERN_ERR "JBD2: Unknown checksum type\n");
goto out;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 9b329b5..bcbef08 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -525,6 +525,7 @@
!jbd2_descr_block_csum_verify(journal,
bh->b_data)) {
err = -EIO;
+ brelse(bh);
goto failed;
}
diff --git a/fs/namei.c b/fs/namei.c
index 43927d1..42df664 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -416,6 +416,7 @@
return security_inode_permission(inode, mask);
}
+EXPORT_SYMBOL(__inode_permission);
/**
* sb_permission - Check superblock-level permissions
@@ -2383,22 +2384,17 @@
}
EXPORT_SYMBOL(kern_path_mountpoint);
-/*
- * It's inline, so penalty for filesystems that don't use sticky bit is
- * minimal.
- */
-static inline int check_sticky(struct inode *dir, struct inode *inode)
+int __check_sticky(struct inode *dir, struct inode *inode)
{
kuid_t fsuid = current_fsuid();
- if (!(dir->i_mode & S_ISVTX))
- return 0;
if (uid_eq(inode->i_uid, fsuid))
return 0;
if (uid_eq(dir->i_uid, fsuid))
return 0;
return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
}
+EXPORT_SYMBOL(__check_sticky);
/*
* Check whether we can remove a link victim from directory dir, check
@@ -3064,9 +3060,12 @@
error = may_open(&nd->path, acc_mode, open_flag);
if (error)
goto out;
- file->f_path.mnt = nd->path.mnt;
- error = finish_open(file, nd->path.dentry, NULL, opened);
- if (error) {
+
+ BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
+ error = vfs_open(&nd->path, file, current_cred());
+ if (!error) {
+ *opened |= FILE_OPENED;
+ } else {
if (error == -EOPENSTALE)
goto stale_open;
goto out;
@@ -4210,12 +4209,16 @@
bool should_retry = false;
int error;
- if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
- if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE))
+ if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
+ (flags & RENAME_EXCHANGE))
return -EINVAL;
+ if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
+ return -EPERM;
+
retry:
from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
if (IS_ERR(from)) {
@@ -4347,6 +4350,20 @@
return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}
+int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+{
+ int error = may_create(dir, dentry);
+ if (error)
+ return error;
+
+ if (!dir->i_op->mknod)
+ return -EPERM;
+
+ return dir->i_op->mknod(dir, dentry,
+ S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+}
+EXPORT_SYMBOL(vfs_whiteout);
+
int readlink_copy(char __user *buffer, int buflen, const char *link)
{
int len = PTR_ERR(link);
diff --git a/fs/namespace.c b/fs/namespace.c
index fbba8b1..5b66b2b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1686,6 +1686,33 @@
namespace_unlock();
}
+/**
+ * clone_private_mount - create a private clone of a path
+ *
+ * This creates a new vfsmount, which will be the clone of @path. The new will
+ * not be attached anywhere in the namespace and will be private (i.e. changes
+ * to the originating mount won't be propagated into this).
+ *
+ * Release with mntput().
+ */
+struct vfsmount *clone_private_mount(struct path *path)
+{
+ struct mount *old_mnt = real_mount(path->mnt);
+ struct mount *new_mnt;
+
+ if (IS_MNT_UNBINDABLE(old_mnt))
+ return ERR_PTR(-EINVAL);
+
+ down_read(&namespace_sem);
+ new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
+ up_read(&namespace_sem);
+ if (IS_ERR(new_mnt))
+ return ERR_CAST(new_mnt);
+
+ return &new_mnt->mnt;
+}
+EXPORT_SYMBOL_GPL(clone_private_mount);
+
int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
struct vfsmount *root)
{
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6e4bda..9e5bc42 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -5,7 +5,7 @@
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index c89357c..919efd4 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -5,7 +5,7 @@
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 3a0828d..2641dba 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -6,7 +6,7 @@
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
index b3918f7..f093c7e 100644
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -5,7 +5,7 @@
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
diff --git a/fs/open.c b/fs/open.c
index d6fd3ac..de92c13 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -823,8 +823,7 @@
f = get_empty_filp();
if (!IS_ERR(f)) {
f->f_flags = flags;
- f->f_path = *path;
- error = do_dentry_open(f, NULL, cred);
+ error = vfs_open(path, f, cred);
if (!error) {
/* from now on we need fput() to dispose of f */
error = open_check_o_direct(f);
@@ -841,6 +840,26 @@
}
EXPORT_SYMBOL(dentry_open);
+/**
+ * vfs_open - open the file at the given path
+ * @path: path to open
+ * @filp: newly allocated file with f_flag initialized
+ * @cred: credentials to use
+ */
+int vfs_open(const struct path *path, struct file *filp,
+ const struct cred *cred)
+{
+ struct inode *inode = path->dentry->d_inode;
+
+ if (inode->i_op->dentry_open)
+ return inode->i_op->dentry_open(path->dentry, filp, cred);
+ else {
+ filp->f_path = *path;
+ return do_dentry_open(filp, NULL, cred);
+ }
+}
+EXPORT_SYMBOL(vfs_open);
+
static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
{
int lookup_flags = 0;
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
new file mode 100644
index 0000000..e601259
--- /dev/null
+++ b/fs/overlayfs/Kconfig
@@ -0,0 +1,10 @@
+config OVERLAYFS_FS
+ tristate "Overlay filesystem support"
+ help
+ An overlay filesystem combines two filesystems - an 'upper' filesystem
+ and a 'lower' filesystem. When a name exists in both filesystems, the
+ object in the 'upper' filesystem is visible while the object in the
+ 'lower' filesystem is either hidden or, in the case of directories,
+ merged with the 'upper' object.
+
+ For more information see Documentation/filesystems/overlayfs.txt
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
new file mode 100644
index 0000000..8f91889
--- /dev/null
+++ b/fs/overlayfs/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the overlay filesystem.
+#
+
+obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o
+
+overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
new file mode 100644
index 0000000..ea10a87
--- /dev/null
+++ b/fs/overlayfs/copy_up.c
@@ -0,0 +1,414 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/splice.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+#include "overlayfs.h"
+
+#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
+
+int ovl_copy_xattr(struct dentry *old, struct dentry *new)
+{
+ ssize_t list_size, size;
+ char *buf, *name, *value;
+ int error;
+
+ if (!old->d_inode->i_op->getxattr ||
+ !new->d_inode->i_op->getxattr)
+ return 0;
+
+ list_size = vfs_listxattr(old, NULL, 0);
+ if (list_size <= 0) {
+ if (list_size == -EOPNOTSUPP)
+ return 0;
+ return list_size;
+ }
+
+ buf = kzalloc(list_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ error = -ENOMEM;
+ value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
+ if (!value)
+ goto out;
+
+ list_size = vfs_listxattr(old, buf, list_size);
+ if (list_size <= 0) {
+ error = list_size;
+ goto out_free_value;
+ }
+
+ for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+ size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
+ if (size <= 0) {
+ error = size;
+ goto out_free_value;
+ }
+ error = vfs_setxattr(new, name, value, size, 0);
+ if (error)
+ goto out_free_value;
+ }
+
+out_free_value:
+ kfree(value);
+out:
+ kfree(buf);
+ return error;
+}
+
+static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
+{
+ struct file *old_file;
+ struct file *new_file;
+ loff_t old_pos = 0;
+ loff_t new_pos = 0;
+ int error = 0;
+
+ if (len == 0)
+ return 0;
+
+ old_file = ovl_path_open(old, O_RDONLY);
+ if (IS_ERR(old_file))
+ return PTR_ERR(old_file);
+
+ new_file = ovl_path_open(new, O_WRONLY);
+ if (IS_ERR(new_file)) {
+ error = PTR_ERR(new_file);
+ goto out_fput;
+ }
+
+ /* FIXME: copy up sparse files efficiently */
+ while (len) {
+ size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
+ long bytes;
+
+ if (len < this_len)
+ this_len = len;
+
+ if (signal_pending_state(TASK_KILLABLE, current)) {
+ error = -EINTR;
+ break;
+ }
+
+ bytes = do_splice_direct(old_file, &old_pos,
+ new_file, &new_pos,
+ this_len, SPLICE_F_MOVE);
+ if (bytes <= 0) {
+ error = bytes;
+ break;
+ }
+ WARN_ON(old_pos != new_pos);
+
+ len -= bytes;
+ }
+
+ fput(new_file);
+out_fput:
+ fput(old_file);
+ return error;
+}
+
+static char *ovl_read_symlink(struct dentry *realdentry)
+{
+ int res;
+ char *buf;
+ struct inode *inode = realdentry->d_inode;
+ mm_segment_t old_fs;
+
+ res = -EINVAL;
+ if (!inode->i_op->readlink)
+ goto err;
+
+ res = -ENOMEM;
+ buf = (char *) __get_free_page(GFP_KERNEL);
+ if (!buf)
+ goto err;
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ res = inode->i_op->readlink(realdentry,
+ (char __user *)buf, PAGE_SIZE - 1);
+ set_fs(old_fs);
+ if (res < 0) {
+ free_page((unsigned long) buf);
+ goto err;
+ }
+ buf[res] = '\0';
+
+ return buf;
+
+err:
+ return ERR_PTR(res);
+}
+
+static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
+{
+ struct iattr attr = {
+ .ia_valid =
+ ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
+ .ia_atime = stat->atime,
+ .ia_mtime = stat->mtime,
+ };
+
+ return notify_change(upperdentry, &attr, NULL);
+}
+
+int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
+{
+ int err = 0;
+
+ if (!S_ISLNK(stat->mode)) {
+ struct iattr attr = {
+ .ia_valid = ATTR_MODE,
+ .ia_mode = stat->mode,
+ };
+ err = notify_change(upperdentry, &attr, NULL);
+ }
+ if (!err) {
+ struct iattr attr = {
+ .ia_valid = ATTR_UID | ATTR_GID,
+ .ia_uid = stat->uid,
+ .ia_gid = stat->gid,
+ };
+ err = notify_change(upperdentry, &attr, NULL);
+ }
+ if (!err)
+ ovl_set_timestamps(upperdentry, stat);
+
+ return err;
+
+}
+
+static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
+ struct dentry *dentry, struct path *lowerpath,
+ struct kstat *stat, struct iattr *attr,
+ const char *link)
+{
+ struct inode *wdir = workdir->d_inode;
+ struct inode *udir = upperdir->d_inode;
+ struct dentry *newdentry = NULL;
+ struct dentry *upper = NULL;
+ umode_t mode = stat->mode;
+ int err;
+
+ newdentry = ovl_lookup_temp(workdir, dentry);
+ err = PTR_ERR(newdentry);
+ if (IS_ERR(newdentry))
+ goto out;
+
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (IS_ERR(upper))
+ goto out1;
+
+ /* Can't properly set mode on creation because of the umask */
+ stat->mode &= S_IFMT;
+ err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
+ stat->mode = mode;
+ if (err)
+ goto out2;
+
+ if (S_ISREG(stat->mode)) {
+ struct path upperpath;
+ ovl_path_upper(dentry, &upperpath);
+ BUG_ON(upperpath.dentry != NULL);
+ upperpath.dentry = newdentry;
+
+ err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
+ if (err)
+ goto out_cleanup;
+ }
+
+ err = ovl_copy_xattr(lowerpath->dentry, newdentry);
+ if (err)
+ goto out_cleanup;
+
+ mutex_lock(&newdentry->d_inode->i_mutex);
+ err = ovl_set_attr(newdentry, stat);
+ if (!err && attr)
+ err = notify_change(newdentry, attr, NULL);
+ mutex_unlock(&newdentry->d_inode->i_mutex);
+ if (err)
+ goto out_cleanup;
+
+ err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+ if (err)
+ goto out_cleanup;
+
+ ovl_dentry_update(dentry, newdentry);
+ newdentry = NULL;
+
+ /*
+ * Non-directores become opaque when copied up.
+ */
+ if (!S_ISDIR(stat->mode))
+ ovl_dentry_set_opaque(dentry, true);
+out2:
+ dput(upper);
+out1:
+ dput(newdentry);
+out:
+ return err;
+
+out_cleanup:
+ ovl_cleanup(wdir, newdentry);
+ goto out;
+}
+
+/*
+ * Copy up a single dentry
+ *
+ * Directory renames only allowed on "pure upper" (already created on
+ * upper filesystem, never copied up). Directories which are on lower or
+ * are merged may not be renamed. For these -EXDEV is returned and
+ * userspace has to deal with it. This means, when copying up a
+ * directory we can rely on it and ancestors being stable.
+ *
+ * Non-directory renames start with copy up of source if necessary. The
+ * actual rename will only proceed once the copy up was successful. Copy
+ * up uses upper parent i_mutex for exclusion. Since rename can change
+ * d_parent it is possible that the copy up will lock the old parent. At
+ * that point the file will have already been copied up anyway.
+ */
+int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
+ struct path *lowerpath, struct kstat *stat,
+ struct iattr *attr)
+{
+ struct dentry *workdir = ovl_workdir(dentry);
+ int err;
+ struct kstat pstat;
+ struct path parentpath;
+ struct dentry *upperdir;
+ struct dentry *upperdentry;
+ const struct cred *old_cred;
+ struct cred *override_cred;
+ char *link = NULL;
+
+ ovl_path_upper(parent, &parentpath);
+ upperdir = parentpath.dentry;
+
+ err = vfs_getattr(&parentpath, &pstat);
+ if (err)
+ return err;
+
+ if (S_ISLNK(stat->mode)) {
+ link = ovl_read_symlink(lowerpath->dentry);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+ }
+
+ err = -ENOMEM;
+ override_cred = prepare_creds();
+ if (!override_cred)
+ goto out_free_link;
+
+ override_cred->fsuid = stat->uid;
+ override_cred->fsgid = stat->gid;
+ /*
+ * CAP_SYS_ADMIN for copying up extended attributes
+ * CAP_DAC_OVERRIDE for create
+ * CAP_FOWNER for chmod, timestamp update
+ * CAP_FSETID for chmod
+ * CAP_CHOWN for chown
+ * CAP_MKNOD for mknod
+ */
+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+ cap_raise(override_cred->cap_effective, CAP_FOWNER);
+ cap_raise(override_cred->cap_effective, CAP_FSETID);
+ cap_raise(override_cred->cap_effective, CAP_CHOWN);
+ cap_raise(override_cred->cap_effective, CAP_MKNOD);
+ old_cred = override_creds(override_cred);
+
+ err = -EIO;
+ if (lock_rename(workdir, upperdir) != NULL) {
+ pr_err("overlayfs: failed to lock workdir+upperdir\n");
+ goto out_unlock;
+ }
+ upperdentry = ovl_dentry_upper(dentry);
+ if (upperdentry) {
+ unlock_rename(workdir, upperdir);
+ err = 0;
+ /* Raced with another copy-up? Do the setattr here */
+ if (attr) {
+ mutex_lock(&upperdentry->d_inode->i_mutex);
+ err = notify_change(upperdentry, attr, NULL);
+ mutex_unlock(&upperdentry->d_inode->i_mutex);
+ }
+ goto out_put_cred;
+ }
+
+ err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
+ stat, attr, link);
+ if (!err) {
+ /* Restore timestamps on parent (best effort) */
+ ovl_set_timestamps(upperdir, &pstat);
+ }
+out_unlock:
+ unlock_rename(workdir, upperdir);
+out_put_cred:
+ revert_creds(old_cred);
+ put_cred(override_cred);
+
+out_free_link:
+ if (link)
+ free_page((unsigned long) link);
+
+ return err;
+}
+
+int ovl_copy_up(struct dentry *dentry)
+{
+ int err;
+
+ err = 0;
+ while (!err) {
+ struct dentry *next;
+ struct dentry *parent;
+ struct path lowerpath;
+ struct kstat stat;
+ enum ovl_path_type type = ovl_path_type(dentry);
+
+ if (type != OVL_PATH_LOWER)
+ break;
+
+ next = dget(dentry);
+ /* find the topmost dentry not yet copied up */
+ for (;;) {
+ parent = dget_parent(next);
+
+ type = ovl_path_type(parent);
+ if (type != OVL_PATH_LOWER)
+ break;
+
+ dput(next);
+ next = parent;
+ }
+
+ ovl_path_lower(next, &lowerpath);
+ err = vfs_getattr(&lowerpath, &stat);
+ if (!err)
+ err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
+
+ dput(parent);
+ dput(next);
+ }
+
+ return err;
+}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
new file mode 100644
index 0000000..15cd91a
--- /dev/null
+++ b/fs/overlayfs/dir.c
@@ -0,0 +1,921 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/cred.h>
+#include "overlayfs.h"
+
+void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+{
+ int err;
+
+ dget(wdentry);
+ if (S_ISDIR(wdentry->d_inode->i_mode))
+ err = ovl_do_rmdir(wdir, wdentry);
+ else
+ err = ovl_do_unlink(wdir, wdentry);
+ dput(wdentry);
+
+ if (err) {
+ pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
+ wdentry, err);
+ }
+}
+
+struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
+{
+ struct dentry *temp;
+ char name[20];
+
+ snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
+
+ temp = lookup_one_len(name, workdir, strlen(name));
+ if (!IS_ERR(temp) && temp->d_inode) {
+ pr_err("overlayfs: workdir/%s already exists\n", name);
+ dput(temp);
+ temp = ERR_PTR(-EIO);
+ }
+
+ return temp;
+}
+
+/* caller holds i_mutex on workdir */
+static struct dentry *ovl_whiteout(struct dentry *workdir,
+ struct dentry *dentry)
+{
+ int err;
+ struct dentry *whiteout;
+ struct inode *wdir = workdir->d_inode;
+
+ whiteout = ovl_lookup_temp(workdir, dentry);
+ if (IS_ERR(whiteout))
+ return whiteout;
+
+ err = ovl_do_whiteout(wdir, whiteout);
+ if (err) {
+ dput(whiteout);
+ whiteout = ERR_PTR(err);
+ }
+
+ return whiteout;
+}
+
+int ovl_create_real(struct inode *dir, struct dentry *newdentry,
+ struct kstat *stat, const char *link,
+ struct dentry *hardlink, bool debug)
+{
+ int err;
+
+ if (newdentry->d_inode)
+ return -ESTALE;
+
+ if (hardlink) {
+ err = ovl_do_link(hardlink, dir, newdentry, debug);
+ } else {
+ switch (stat->mode & S_IFMT) {
+ case S_IFREG:
+ err = ovl_do_create(dir, newdentry, stat->mode, debug);
+ break;
+
+ case S_IFDIR:
+ err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
+ break;
+
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ err = ovl_do_mknod(dir, newdentry,
+ stat->mode, stat->rdev, debug);
+ break;
+
+ case S_IFLNK:
+ err = ovl_do_symlink(dir, newdentry, link, debug);
+ break;
+
+ default:
+ err = -EPERM;
+ }
+ }
+ if (!err && WARN_ON(!newdentry->d_inode)) {
+ /*
+ * Not quite sure if non-instantiated dentry is legal or not.
+ * VFS doesn't seem to care so check and warn here.
+ */
+ err = -ENOENT;
+ }
+ return err;
+}
+
+static int ovl_set_opaque(struct dentry *upperdentry)
+{
+ return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0);
+}
+
+static void ovl_remove_opaque(struct dentry *upperdentry)
+{
+ int err;
+
+ err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr);
+ if (err) {
+ pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
+ upperdentry->d_name.name, err);
+ }
+}
+
+static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ int err;
+ enum ovl_path_type type;
+ struct path realpath;
+
+ type = ovl_path_real(dentry, &realpath);
+ err = vfs_getattr(&realpath, stat);
+ if (err)
+ return err;
+
+ stat->dev = dentry->d_sb->s_dev;
+ stat->ino = dentry->d_inode->i_ino;
+
+ /*
+ * It's probably not worth it to count subdirs to get the
+ * correct link count. nlink=1 seems to pacify 'find' and
+ * other utilities.
+ */
+ if (type == OVL_PATH_MERGE)
+ stat->nlink = 1;
+
+ return 0;
+}
+
+static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
+ struct kstat *stat, const char *link,
+ struct dentry *hardlink)
+{
+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+ struct inode *udir = upperdir->d_inode;
+ struct dentry *newdentry;
+ int err;
+
+ mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
+ newdentry = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ err = PTR_ERR(newdentry);
+ if (IS_ERR(newdentry))
+ goto out_unlock;
+ err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
+ if (err)
+ goto out_dput;
+
+ ovl_dentry_version_inc(dentry->d_parent);
+ ovl_dentry_update(dentry, newdentry);
+ ovl_copyattr(newdentry->d_inode, inode);
+ d_instantiate(dentry, inode);
+ newdentry = NULL;
+out_dput:
+ dput(newdentry);
+out_unlock:
+ mutex_unlock(&udir->i_mutex);
+ return err;
+}
+
+static int ovl_lock_rename_workdir(struct dentry *workdir,
+ struct dentry *upperdir)
+{
+ /* Workdir should not be the same as upperdir */
+ if (workdir == upperdir)
+ goto err;
+
+ /* Workdir should not be subdir of upperdir and vice versa */
+ if (lock_rename(workdir, upperdir) != NULL)
+ goto err_unlock;
+
+ return 0;
+
+err_unlock:
+ unlock_rename(workdir, upperdir);
+err:
+ pr_err("overlayfs: failed to lock workdir+upperdir\n");
+ return -EIO;
+}
+
+static struct dentry *ovl_clear_empty(struct dentry *dentry,
+ struct list_head *list)
+{
+ struct dentry *workdir = ovl_workdir(dentry);
+ struct inode *wdir = workdir->d_inode;
+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+ struct inode *udir = upperdir->d_inode;
+ struct path upperpath;
+ struct dentry *upper;
+ struct dentry *opaquedir;
+ struct kstat stat;
+ int err;
+
+ err = ovl_lock_rename_workdir(workdir, upperdir);
+ if (err)
+ goto out;
+
+ ovl_path_upper(dentry, &upperpath);
+ err = vfs_getattr(&upperpath, &stat);
+ if (err)
+ goto out_unlock;
+
+ err = -ESTALE;
+ if (!S_ISDIR(stat.mode))
+ goto out_unlock;
+ upper = upperpath.dentry;
+ if (upper->d_parent->d_inode != udir)
+ goto out_unlock;
+
+ opaquedir = ovl_lookup_temp(workdir, dentry);
+ err = PTR_ERR(opaquedir);
+ if (IS_ERR(opaquedir))
+ goto out_unlock;
+
+ err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
+ if (err)
+ goto out_dput;
+
+ err = ovl_copy_xattr(upper, opaquedir);
+ if (err)
+ goto out_cleanup;
+
+ err = ovl_set_opaque(opaquedir);
+ if (err)
+ goto out_cleanup;
+
+ mutex_lock(&opaquedir->d_inode->i_mutex);
+ err = ovl_set_attr(opaquedir, &stat);
+ mutex_unlock(&opaquedir->d_inode->i_mutex);
+ if (err)
+ goto out_cleanup;
+
+ err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
+ if (err)
+ goto out_cleanup;
+
+ ovl_cleanup_whiteouts(upper, list);
+ ovl_cleanup(wdir, upper);
+ unlock_rename(workdir, upperdir);
+
+ /* dentry's upper doesn't match now, get rid of it */
+ d_drop(dentry);
+
+ return opaquedir;
+
+out_cleanup:
+ ovl_cleanup(wdir, opaquedir);
+out_dput:
+ dput(opaquedir);
+out_unlock:
+ unlock_rename(workdir, upperdir);
+out:
+ return ERR_PTR(err);
+}
+
+static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry,
+ enum ovl_path_type type)
+{
+ int err;
+ struct dentry *ret = NULL;
+ LIST_HEAD(list);
+
+ err = ovl_check_empty_dir(dentry, &list);
+ if (err)
+ ret = ERR_PTR(err);
+ else if (type == OVL_PATH_MERGE)
+ ret = ovl_clear_empty(dentry, &list);
+
+ ovl_cache_free(&list);
+
+ return ret;
+}
+
+static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
+ struct kstat *stat, const char *link,
+ struct dentry *hardlink)
+{
+ struct dentry *workdir = ovl_workdir(dentry);
+ struct inode *wdir = workdir->d_inode;
+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+ struct inode *udir = upperdir->d_inode;
+ struct dentry *upper;
+ struct dentry *newdentry;
+ int err;
+
+ err = ovl_lock_rename_workdir(workdir, upperdir);
+ if (err)
+ goto out;
+
+ newdentry = ovl_lookup_temp(workdir, dentry);
+ err = PTR_ERR(newdentry);
+ if (IS_ERR(newdentry))
+ goto out_unlock;
+
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (IS_ERR(upper))
+ goto out_dput;
+
+ err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
+ if (err)
+ goto out_dput2;
+
+ if (S_ISDIR(stat->mode)) {
+ err = ovl_set_opaque(newdentry);
+ if (err)
+ goto out_cleanup;
+
+ err = ovl_do_rename(wdir, newdentry, udir, upper,
+ RENAME_EXCHANGE);
+ if (err)
+ goto out_cleanup;
+
+ ovl_cleanup(wdir, upper);
+ } else {
+ err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+ if (err)
+ goto out_cleanup;
+ }
+ ovl_dentry_version_inc(dentry->d_parent);
+ ovl_dentry_update(dentry, newdentry);
+ ovl_copyattr(newdentry->d_inode, inode);
+ d_instantiate(dentry, inode);
+ newdentry = NULL;
+out_dput2:
+ dput(upper);
+out_dput:
+ dput(newdentry);
+out_unlock:
+ unlock_rename(workdir, upperdir);
+out:
+ return err;
+
+out_cleanup:
+ ovl_cleanup(wdir, newdentry);
+ goto out_dput2;
+}
+
+static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
+ const char *link, struct dentry *hardlink)
+{
+ int err;
+ struct inode *inode;
+ struct kstat stat = {
+ .mode = mode,
+ .rdev = rdev,
+ };
+
+ err = -ENOMEM;
+ inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
+ if (!inode)
+ goto out;
+
+ err = ovl_copy_up(dentry->d_parent);
+ if (err)
+ goto out_iput;
+
+ if (!ovl_dentry_is_opaque(dentry)) {
+ err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
+ } else {
+ const struct cred *old_cred;
+ struct cred *override_cred;
+
+ err = -ENOMEM;
+ override_cred = prepare_creds();
+ if (!override_cred)
+ goto out_iput;
+
+ /*
+ * CAP_SYS_ADMIN for setting opaque xattr
+ * CAP_DAC_OVERRIDE for create in workdir, rename
+ * CAP_FOWNER for removing whiteout from sticky dir
+ */
+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+ cap_raise(override_cred->cap_effective, CAP_FOWNER);
+ old_cred = override_creds(override_cred);
+
+ err = ovl_create_over_whiteout(dentry, inode, &stat, link,
+ hardlink);
+
+ revert_creds(old_cred);
+ put_cred(override_cred);
+ }
+
+ if (!err)
+ inode = NULL;
+out_iput:
+ iput(inode);
+out:
+ return err;
+}
+
+static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
+ const char *link)
+{
+ int err;
+
+ err = ovl_want_write(dentry);
+ if (!err) {
+ err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
+ ovl_drop_write(dentry);
+ }
+
+ return err;
+}
+
+static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool excl)
+{
+ return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
+}
+
+static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
+}
+
+static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+ dev_t rdev)
+{
+ /* Don't allow creation of "whiteout" on overlay */
+ if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
+ return -EPERM;
+
+ return ovl_create_object(dentry, mode, rdev, NULL);
+}
+
+static int ovl_symlink(struct inode *dir, struct dentry *dentry,
+ const char *link)
+{
+ return ovl_create_object(dentry, S_IFLNK, 0, link);
+}
+
+static int ovl_link(struct dentry *old, struct inode *newdir,
+ struct dentry *new)
+{
+ int err;
+ struct dentry *upper;
+
+ err = ovl_want_write(old);
+ if (err)
+ goto out;
+
+ err = ovl_copy_up(old);
+ if (err)
+ goto out_drop_write;
+
+ upper = ovl_dentry_upper(old);
+ err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
+
+out_drop_write:
+ ovl_drop_write(old);
+out:
+ return err;
+}
+
+static int ovl_remove_and_whiteout(struct dentry *dentry,
+ enum ovl_path_type type, bool is_dir)
+{
+ struct dentry *workdir = ovl_workdir(dentry);
+ struct inode *wdir = workdir->d_inode;
+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+ struct inode *udir = upperdir->d_inode;
+ struct dentry *whiteout;
+ struct dentry *upper;
+ struct dentry *opaquedir = NULL;
+ int err;
+
+ if (is_dir) {
+ opaquedir = ovl_check_empty_and_clear(dentry, type);
+ err = PTR_ERR(opaquedir);
+ if (IS_ERR(opaquedir))
+ goto out;
+ }
+
+ err = ovl_lock_rename_workdir(workdir, upperdir);
+ if (err)
+ goto out_dput;
+
+ whiteout = ovl_whiteout(workdir, dentry);
+ err = PTR_ERR(whiteout);
+ if (IS_ERR(whiteout))
+ goto out_unlock;
+
+ if (type == OVL_PATH_LOWER) {
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (IS_ERR(upper))
+ goto kill_whiteout;
+
+ err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
+ dput(upper);
+ if (err)
+ goto kill_whiteout;
+ } else {
+ int flags = 0;
+
+ upper = ovl_dentry_upper(dentry);
+ if (opaquedir)
+ upper = opaquedir;
+ err = -ESTALE;
+ if (upper->d_parent != upperdir)
+ goto kill_whiteout;
+
+ if (is_dir)
+ flags |= RENAME_EXCHANGE;
+
+ err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
+ if (err)
+ goto kill_whiteout;
+
+ if (is_dir)
+ ovl_cleanup(wdir, upper);
+ }
+ ovl_dentry_version_inc(dentry->d_parent);
+out_d_drop:
+ d_drop(dentry);
+ dput(whiteout);
+out_unlock:
+ unlock_rename(workdir, upperdir);
+out_dput:
+ dput(opaquedir);
+out:
+ return err;
+
+kill_whiteout:
+ ovl_cleanup(wdir, whiteout);
+ goto out_d_drop;
+}
+
+static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
+{
+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+ struct inode *dir = upperdir->d_inode;
+ struct dentry *upper = ovl_dentry_upper(dentry);
+ int err;
+
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ err = -ESTALE;
+ if (upper->d_parent == upperdir) {
+ /* Don't let d_delete() think it can reset d_inode */
+ dget(upper);
+ if (is_dir)
+ err = vfs_rmdir(dir, upper);
+ else
+ err = vfs_unlink(dir, upper, NULL);
+ dput(upper);
+ ovl_dentry_version_inc(dentry->d_parent);
+ }
+
+ /*
+ * Keeping this dentry hashed would mean having to release
+ * upperpath/lowerpath, which could only be done if we are the
+ * sole user of this dentry. Too tricky... Just unhash for
+ * now.
+ */
+ d_drop(dentry);
+ mutex_unlock(&dir->i_mutex);
+
+ return err;
+}
+
+static inline int ovl_check_sticky(struct dentry *dentry)
+{
+ struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
+ struct inode *inode = ovl_dentry_real(dentry)->d_inode;
+
+ if (check_sticky(dir, inode))
+ return -EPERM;
+
+ return 0;
+}
+
+static int ovl_do_remove(struct dentry *dentry, bool is_dir)
+{
+ enum ovl_path_type type;
+ int err;
+
+ err = ovl_check_sticky(dentry);
+ if (err)
+ goto out;
+
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ err = ovl_copy_up(dentry->d_parent);
+ if (err)
+ goto out_drop_write;
+
+ type = ovl_path_type(dentry);
+ if (type == OVL_PATH_PURE_UPPER) {
+ err = ovl_remove_upper(dentry, is_dir);
+ } else {
+ const struct cred *old_cred;
+ struct cred *override_cred;
+
+ err = -ENOMEM;
+ override_cred = prepare_creds();
+ if (!override_cred)
+ goto out_drop_write;
+
+ /*
+ * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
+ * CAP_DAC_OVERRIDE for create in workdir, rename
+ * CAP_FOWNER for removing whiteout from sticky dir
+ * CAP_FSETID for chmod of opaque dir
+ * CAP_CHOWN for chown of opaque dir
+ */
+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+ cap_raise(override_cred->cap_effective, CAP_FOWNER);
+ cap_raise(override_cred->cap_effective, CAP_FSETID);
+ cap_raise(override_cred->cap_effective, CAP_CHOWN);
+ old_cred = override_creds(override_cred);
+
+ err = ovl_remove_and_whiteout(dentry, type, is_dir);
+
+ revert_creds(old_cred);
+ put_cred(override_cred);
+ }
+out_drop_write:
+ ovl_drop_write(dentry);
+out:
+ return err;
+}
+
+static int ovl_unlink(struct inode *dir, struct dentry *dentry)
+{
+ return ovl_do_remove(dentry, false);
+}
+
+static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ return ovl_do_remove(dentry, true);
+}
+
+static int ovl_rename2(struct inode *olddir, struct dentry *old,
+ struct inode *newdir, struct dentry *new,
+ unsigned int flags)
+{
+ int err;
+ enum ovl_path_type old_type;
+ enum ovl_path_type new_type;
+ struct dentry *old_upperdir;
+ struct dentry *new_upperdir;
+ struct dentry *olddentry;
+ struct dentry *newdentry;
+ struct dentry *trap;
+ bool old_opaque;
+ bool new_opaque;
+ bool new_create = false;
+ bool cleanup_whiteout = false;
+ bool overwrite = !(flags & RENAME_EXCHANGE);
+ bool is_dir = S_ISDIR(old->d_inode->i_mode);
+ bool new_is_dir = false;
+ struct dentry *opaquedir = NULL;
+ const struct cred *old_cred = NULL;
+ struct cred *override_cred = NULL;
+
+ err = -EINVAL;
+ if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
+ goto out;
+
+ flags &= ~RENAME_NOREPLACE;
+
+ err = ovl_check_sticky(old);
+ if (err)
+ goto out;
+
+ /* Don't copy up directory trees */
+ old_type = ovl_path_type(old);
+ err = -EXDEV;
+ if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir)
+ goto out;
+
+ if (new->d_inode) {
+ err = ovl_check_sticky(new);
+ if (err)
+ goto out;
+
+ if (S_ISDIR(new->d_inode->i_mode))
+ new_is_dir = true;
+
+ new_type = ovl_path_type(new);
+ err = -EXDEV;
+ if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir)
+ goto out;
+
+ err = 0;
+ if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) {
+ if (ovl_dentry_lower(old)->d_inode ==
+ ovl_dentry_lower(new)->d_inode)
+ goto out;
+ }
+ if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) {
+ if (ovl_dentry_upper(old)->d_inode ==
+ ovl_dentry_upper(new)->d_inode)
+ goto out;
+ }
+ } else {
+ if (ovl_dentry_is_opaque(new))
+ new_type = OVL_PATH_UPPER;
+ else
+ new_type = OVL_PATH_PURE_UPPER;
+ }
+
+ err = ovl_want_write(old);
+ if (err)
+ goto out;
+
+ err = ovl_copy_up(old);
+ if (err)
+ goto out_drop_write;
+
+ err = ovl_copy_up(new->d_parent);
+ if (err)
+ goto out_drop_write;
+ if (!overwrite) {
+ err = ovl_copy_up(new);
+ if (err)
+ goto out_drop_write;
+ }
+
+ old_opaque = old_type != OVL_PATH_PURE_UPPER;
+ new_opaque = new_type != OVL_PATH_PURE_UPPER;
+
+ if (old_opaque || new_opaque) {
+ err = -ENOMEM;
+ override_cred = prepare_creds();
+ if (!override_cred)
+ goto out_drop_write;
+
+ /*
+ * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
+ * CAP_DAC_OVERRIDE for create in workdir
+ * CAP_FOWNER for removing whiteout from sticky dir
+ * CAP_FSETID for chmod of opaque dir
+ * CAP_CHOWN for chown of opaque dir
+ */
+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+ cap_raise(override_cred->cap_effective, CAP_FOWNER);
+ cap_raise(override_cred->cap_effective, CAP_FSETID);
+ cap_raise(override_cred->cap_effective, CAP_CHOWN);
+ old_cred = override_creds(override_cred);
+ }
+
+ if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) {
+ opaquedir = ovl_check_empty_and_clear(new, new_type);
+ err = PTR_ERR(opaquedir);
+ if (IS_ERR(opaquedir)) {
+ opaquedir = NULL;
+ goto out_revert_creds;
+ }
+ }
+
+ if (overwrite) {
+ if (old_opaque) {
+ if (new->d_inode || !new_opaque) {
+ /* Whiteout source */
+ flags |= RENAME_WHITEOUT;
+ } else {
+ /* Switch whiteouts */
+ flags |= RENAME_EXCHANGE;
+ }
+ } else if (is_dir && !new->d_inode && new_opaque) {
+ flags |= RENAME_EXCHANGE;
+ cleanup_whiteout = true;
+ }
+ }
+
+ old_upperdir = ovl_dentry_upper(old->d_parent);
+ new_upperdir = ovl_dentry_upper(new->d_parent);
+
+ trap = lock_rename(new_upperdir, old_upperdir);
+
+ olddentry = ovl_dentry_upper(old);
+ newdentry = ovl_dentry_upper(new);
+ if (newdentry) {
+ if (opaquedir) {
+ newdentry = opaquedir;
+ opaquedir = NULL;
+ } else {
+ dget(newdentry);
+ }
+ } else {
+ new_create = true;
+ newdentry = lookup_one_len(new->d_name.name, new_upperdir,
+ new->d_name.len);
+ err = PTR_ERR(newdentry);
+ if (IS_ERR(newdentry))
+ goto out_unlock;
+ }
+
+ err = -ESTALE;
+ if (olddentry->d_parent != old_upperdir)
+ goto out_dput;
+ if (newdentry->d_parent != new_upperdir)
+ goto out_dput;
+ if (olddentry == trap)
+ goto out_dput;
+ if (newdentry == trap)
+ goto out_dput;
+
+ if (is_dir && !old_opaque && new_opaque) {
+ err = ovl_set_opaque(olddentry);
+ if (err)
+ goto out_dput;
+ }
+ if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
+ err = ovl_set_opaque(newdentry);
+ if (err)
+ goto out_dput;
+ }
+
+ if (old_opaque || new_opaque) {
+ err = ovl_do_rename(old_upperdir->d_inode, olddentry,
+ new_upperdir->d_inode, newdentry,
+ flags);
+ } else {
+ /* No debug for the plain case */
+ BUG_ON(flags & ~RENAME_EXCHANGE);
+ err = vfs_rename(old_upperdir->d_inode, olddentry,
+ new_upperdir->d_inode, newdentry,
+ NULL, flags);
+ }
+
+ if (err) {
+ if (is_dir && !old_opaque && new_opaque)
+ ovl_remove_opaque(olddentry);
+ if (!overwrite && new_is_dir && old_opaque && !new_opaque)
+ ovl_remove_opaque(newdentry);
+ goto out_dput;
+ }
+
+ if (is_dir && old_opaque && !new_opaque)
+ ovl_remove_opaque(olddentry);
+ if (!overwrite && new_is_dir && !old_opaque && new_opaque)
+ ovl_remove_opaque(newdentry);
+
+ if (old_opaque != new_opaque) {
+ ovl_dentry_set_opaque(old, new_opaque);
+ if (!overwrite)
+ ovl_dentry_set_opaque(new, old_opaque);
+ }
+
+ if (cleanup_whiteout)
+ ovl_cleanup(old_upperdir->d_inode, newdentry);
+
+ ovl_dentry_version_inc(old->d_parent);
+ ovl_dentry_version_inc(new->d_parent);
+
+out_dput:
+ dput(newdentry);
+out_unlock:
+ unlock_rename(new_upperdir, old_upperdir);
+out_revert_creds:
+ if (old_opaque || new_opaque) {
+ revert_creds(old_cred);
+ put_cred(override_cred);
+ }
+out_drop_write:
+ ovl_drop_write(old);
+out:
+ dput(opaquedir);
+ return err;
+}
+
+const struct inode_operations ovl_dir_inode_operations = {
+ .lookup = ovl_lookup,
+ .mkdir = ovl_mkdir,
+ .symlink = ovl_symlink,
+ .unlink = ovl_unlink,
+ .rmdir = ovl_rmdir,
+ .rename2 = ovl_rename2,
+ .link = ovl_link,
+ .setattr = ovl_setattr,
+ .create = ovl_create,
+ .mknod = ovl_mknod,
+ .permission = ovl_permission,
+ .getattr = ovl_dir_getattr,
+ .setxattr = ovl_setxattr,
+ .getxattr = ovl_getxattr,
+ .listxattr = ovl_listxattr,
+ .removexattr = ovl_removexattr,
+};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
new file mode 100644
index 0000000..af2d18c
--- /dev/null
+++ b/fs/overlayfs/inode.c
@@ -0,0 +1,425 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/xattr.h>
+#include "overlayfs.h"
+
+static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
+ bool no_data)
+{
+ int err;
+ struct dentry *parent;
+ struct kstat stat;
+ struct path lowerpath;
+
+ parent = dget_parent(dentry);
+ err = ovl_copy_up(parent);
+ if (err)
+ goto out_dput_parent;
+
+ ovl_path_lower(dentry, &lowerpath);
+ err = vfs_getattr(&lowerpath, &stat);
+ if (err)
+ goto out_dput_parent;
+
+ if (no_data)
+ stat.size = 0;
+
+ err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
+
+out_dput_parent:
+ dput(parent);
+ return err;
+}
+
+int ovl_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ int err;
+ struct dentry *upperdentry;
+
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ upperdentry = ovl_dentry_upper(dentry);
+ if (upperdentry) {
+ mutex_lock(&upperdentry->d_inode->i_mutex);
+ err = notify_change(upperdentry, attr, NULL);
+ mutex_unlock(&upperdentry->d_inode->i_mutex);
+ } else {
+ err = ovl_copy_up_last(dentry, attr, false);
+ }
+ ovl_drop_write(dentry);
+out:
+ return err;
+}
+
+static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct path realpath;
+
+ ovl_path_real(dentry, &realpath);
+ return vfs_getattr(&realpath, stat);
+}
+
+int ovl_permission(struct inode *inode, int mask)
+{
+ struct ovl_entry *oe;
+ struct dentry *alias = NULL;
+ struct inode *realinode;
+ struct dentry *realdentry;
+ bool is_upper;
+ int err;
+
+ if (S_ISDIR(inode->i_mode)) {
+ oe = inode->i_private;
+ } else if (mask & MAY_NOT_BLOCK) {
+ return -ECHILD;
+ } else {
+ /*
+ * For non-directories find an alias and get the info
+ * from there.
+ */
+ alias = d_find_any_alias(inode);
+ if (WARN_ON(!alias))
+ return -ENOENT;
+
+ oe = alias->d_fsdata;
+ }
+
+ realdentry = ovl_entry_real(oe, &is_upper);
+
+ /* Careful in RCU walk mode */
+ realinode = ACCESS_ONCE(realdentry->d_inode);
+ if (!realinode) {
+ WARN_ON(!(mask & MAY_NOT_BLOCK));
+ err = -ENOENT;
+ goto out_dput;
+ }
+
+ if (mask & MAY_WRITE) {
+ umode_t mode = realinode->i_mode;
+
+ /*
+ * Writes will always be redirected to upper layer, so
+ * ignore lower layer being read-only.
+ *
+ * If the overlay itself is read-only then proceed
+ * with the permission check, don't return EROFS.
+ * This will only happen if this is the lower layer of
+ * another overlayfs.
+ *
+ * If upper fs becomes read-only after the overlay was
+ * constructed return EROFS to prevent modification of
+ * upper layer.
+ */
+ err = -EROFS;
+ if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+ goto out_dput;
+ }
+
+ err = __inode_permission(realinode, mask);
+out_dput:
+ dput(alias);
+ return err;
+}
+
+
+struct ovl_link_data {
+ struct dentry *realdentry;
+ void *cookie;
+};
+
+static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ void *ret;
+ struct dentry *realdentry;
+ struct inode *realinode;
+
+ realdentry = ovl_dentry_real(dentry);
+ realinode = realdentry->d_inode;
+
+ if (WARN_ON(!realinode->i_op->follow_link))
+ return ERR_PTR(-EPERM);
+
+ ret = realinode->i_op->follow_link(realdentry, nd);
+ if (IS_ERR(ret))
+ return ret;
+
+ if (realinode->i_op->put_link) {
+ struct ovl_link_data *data;
+
+ data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
+ if (!data) {
+ realinode->i_op->put_link(realdentry, nd, ret);
+ return ERR_PTR(-ENOMEM);
+ }
+ data->realdentry = realdentry;
+ data->cookie = ret;
+
+ return data;
+ } else {
+ return NULL;
+ }
+}
+
+static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
+{
+ struct inode *realinode;
+ struct ovl_link_data *data = c;
+
+ if (!data)
+ return;
+
+ realinode = data->realdentry->d_inode;
+ realinode->i_op->put_link(data->realdentry, nd, data->cookie);
+ kfree(data);
+}
+
+static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
+ struct path realpath;
+ struct inode *realinode;
+
+ ovl_path_real(dentry, &realpath);
+ realinode = realpath.dentry->d_inode;
+
+ if (!realinode->i_op->readlink)
+ return -EINVAL;
+
+ touch_atime(&realpath);
+
+ return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
+}
+
+
+static bool ovl_is_private_xattr(const char *name)
+{
+ return strncmp(name, "trusted.overlay.", 14) == 0;
+}
+
+int ovl_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ int err;
+ struct dentry *upperdentry;
+
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ err = -EPERM;
+ if (ovl_is_private_xattr(name))
+ goto out_drop_write;
+
+ err = ovl_copy_up(dentry);
+ if (err)
+ goto out_drop_write;
+
+ upperdentry = ovl_dentry_upper(dentry);
+ err = vfs_setxattr(upperdentry, name, value, size, flags);
+
+out_drop_write:
+ ovl_drop_write(dentry);
+out:
+ return err;
+}
+
+ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
+ void *value, size_t size)
+{
+ if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
+ ovl_is_private_xattr(name))
+ return -ENODATA;
+
+ return vfs_getxattr(ovl_dentry_real(dentry), name, value, size);
+}
+
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ ssize_t res;
+ int off;
+
+ res = vfs_listxattr(ovl_dentry_real(dentry), list, size);
+ if (res <= 0 || size == 0)
+ return res;
+
+ if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE)
+ return res;
+
+ /* filter out private xattrs */
+ for (off = 0; off < res;) {
+ char *s = list + off;
+ size_t slen = strlen(s) + 1;
+
+ BUG_ON(off + slen > res);
+
+ if (ovl_is_private_xattr(s)) {
+ res -= slen;
+ memmove(s, s + slen, res - off);
+ } else {
+ off += slen;
+ }
+ }
+
+ return res;
+}
+
+int ovl_removexattr(struct dentry *dentry, const char *name)
+{
+ int err;
+ struct path realpath;
+ enum ovl_path_type type;
+
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
+ ovl_is_private_xattr(name))
+ goto out_drop_write;
+
+ type = ovl_path_real(dentry, &realpath);
+ if (type == OVL_PATH_LOWER) {
+ err = vfs_getxattr(realpath.dentry, name, NULL, 0);
+ if (err < 0)
+ goto out_drop_write;
+
+ err = ovl_copy_up(dentry);
+ if (err)
+ goto out_drop_write;
+
+ ovl_path_upper(dentry, &realpath);
+ }
+
+ err = vfs_removexattr(realpath.dentry, name);
+out_drop_write:
+ ovl_drop_write(dentry);
+out:
+ return err;
+}
+
+static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
+ struct dentry *realdentry)
+{
+ if (type != OVL_PATH_LOWER)
+ return false;
+
+ if (special_file(realdentry->d_inode->i_mode))
+ return false;
+
+ if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
+ return false;
+
+ return true;
+}
+
+static int ovl_dentry_open(struct dentry *dentry, struct file *file,
+ const struct cred *cred)
+{
+ int err;
+ struct path realpath;
+ enum ovl_path_type type;
+ bool want_write = false;
+
+ type = ovl_path_real(dentry, &realpath);
+ if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
+ want_write = true;
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ if (file->f_flags & O_TRUNC)
+ err = ovl_copy_up_last(dentry, NULL, true);
+ else
+ err = ovl_copy_up(dentry);
+ if (err)
+ goto out_drop_write;
+
+ ovl_path_upper(dentry, &realpath);
+ }
+
+ err = vfs_open(&realpath, file, cred);
+out_drop_write:
+ if (want_write)
+ ovl_drop_write(dentry);
+out:
+ return err;
+}
+
+static const struct inode_operations ovl_file_inode_operations = {
+ .setattr = ovl_setattr,
+ .permission = ovl_permission,
+ .getattr = ovl_getattr,
+ .setxattr = ovl_setxattr,
+ .getxattr = ovl_getxattr,
+ .listxattr = ovl_listxattr,
+ .removexattr = ovl_removexattr,
+ .dentry_open = ovl_dentry_open,
+};
+
+static const struct inode_operations ovl_symlink_inode_operations = {
+ .setattr = ovl_setattr,
+ .follow_link = ovl_follow_link,
+ .put_link = ovl_put_link,
+ .readlink = ovl_readlink,
+ .getattr = ovl_getattr,
+ .setxattr = ovl_setxattr,
+ .getxattr = ovl_getxattr,
+ .listxattr = ovl_listxattr,
+ .removexattr = ovl_removexattr,
+};
+
+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
+ struct ovl_entry *oe)
+{
+ struct inode *inode;
+
+ inode = new_inode(sb);
+ if (!inode)
+ return NULL;
+
+ mode &= S_IFMT;
+
+ inode->i_ino = get_next_ino();
+ inode->i_mode = mode;
+ inode->i_flags |= S_NOATIME | S_NOCMTIME;
+
+ switch (mode) {
+ case S_IFDIR:
+ inode->i_private = oe;
+ inode->i_op = &ovl_dir_inode_operations;
+ inode->i_fop = &ovl_dir_operations;
+ break;
+
+ case S_IFLNK:
+ inode->i_op = &ovl_symlink_inode_operations;
+ break;
+
+ case S_IFREG:
+ case S_IFSOCK:
+ case S_IFBLK:
+ case S_IFCHR:
+ case S_IFIFO:
+ inode->i_op = &ovl_file_inode_operations;
+ break;
+
+ default:
+ WARN(1, "illegal file type: %i\n", mode);
+ iput(inode);
+ inode = NULL;
+ }
+
+ return inode;
+
+}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
new file mode 100644
index 0000000..814bed3
--- /dev/null
+++ b/fs/overlayfs/overlayfs.h
@@ -0,0 +1,191 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+
+struct ovl_entry;
+
+enum ovl_path_type {
+ OVL_PATH_PURE_UPPER,
+ OVL_PATH_UPPER,
+ OVL_PATH_MERGE,
+ OVL_PATH_LOWER,
+};
+
+extern const char *ovl_opaque_xattr;
+
+static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ int err = vfs_rmdir(dir, dentry);
+ pr_debug("rmdir(%pd2) = %i\n", dentry, err);
+ return err;
+}
+
+static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
+{
+ int err = vfs_unlink(dir, dentry, NULL);
+ pr_debug("unlink(%pd2) = %i\n", dentry, err);
+ return err;
+}
+
+static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *new_dentry, bool debug)
+{
+ int err = vfs_link(old_dentry, dir, new_dentry, NULL);
+ if (debug) {
+ pr_debug("link(%pd2, %pd2) = %i\n",
+ old_dentry, new_dentry, err);
+ }
+ return err;
+}
+
+static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, bool debug)
+{
+ int err = vfs_create(dir, dentry, mode, true);
+ if (debug)
+ pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
+ return err;
+}
+
+static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
+ umode_t mode, bool debug)
+{
+ int err = vfs_mkdir(dir, dentry, mode);
+ if (debug)
+ pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
+ return err;
+}
+
+static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
+ umode_t mode, dev_t dev, bool debug)
+{
+ int err = vfs_mknod(dir, dentry, mode, dev);
+ if (debug) {
+ pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
+ dentry, mode, dev, err);
+ }
+ return err;
+}
+
+static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
+ const char *oldname, bool debug)
+{
+ int err = vfs_symlink(dir, dentry, oldname);
+ if (debug)
+ pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
+ return err;
+}
+
+static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ int err = vfs_setxattr(dentry, name, value, size, flags);
+ pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
+ dentry, name, (int) size, (char *) value, flags, err);
+ return err;
+}
+
+static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
+{
+ int err = vfs_removexattr(dentry, name);
+ pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
+ return err;
+}
+
+static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
+ struct inode *newdir, struct dentry *newdentry,
+ unsigned int flags)
+{
+ int err;
+
+ pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
+ olddentry, newdentry, flags);
+
+ err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
+
+ if (err) {
+ pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
+ olddentry, newdentry, err);
+ }
+ return err;
+}
+
+static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
+{
+ int err = vfs_whiteout(dir, dentry);
+ pr_debug("whiteout(%pd2) = %i\n", dentry, err);
+ return err;
+}
+
+enum ovl_path_type ovl_path_type(struct dentry *dentry);
+u64 ovl_dentry_version_get(struct dentry *dentry);
+void ovl_dentry_version_inc(struct dentry *dentry);
+void ovl_path_upper(struct dentry *dentry, struct path *path);
+void ovl_path_lower(struct dentry *dentry, struct path *path);
+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
+struct dentry *ovl_dentry_upper(struct dentry *dentry);
+struct dentry *ovl_dentry_lower(struct dentry *dentry);
+struct dentry *ovl_dentry_real(struct dentry *dentry);
+struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
+struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
+void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
+struct dentry *ovl_workdir(struct dentry *dentry);
+int ovl_want_write(struct dentry *dentry);
+void ovl_drop_write(struct dentry *dentry);
+bool ovl_dentry_is_opaque(struct dentry *dentry);
+void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
+bool ovl_is_whiteout(struct dentry *dentry);
+void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags);
+struct file *ovl_path_open(struct path *path, int flags);
+
+struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
+ struct kstat *stat, const char *link);
+
+/* readdir.c */
+extern const struct file_operations ovl_dir_operations;
+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
+void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
+void ovl_cache_free(struct list_head *list);
+
+/* inode.c */
+int ovl_setattr(struct dentry *dentry, struct iattr *attr);
+int ovl_permission(struct inode *inode, int mask);
+int ovl_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags);
+ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
+ void *value, size_t size);
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
+int ovl_removexattr(struct dentry *dentry, const char *name);
+
+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
+ struct ovl_entry *oe);
+static inline void ovl_copyattr(struct inode *from, struct inode *to)
+{
+ to->i_uid = from->i_uid;
+ to->i_gid = from->i_gid;
+}
+
+/* dir.c */
+extern const struct inode_operations ovl_dir_inode_operations;
+struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
+int ovl_create_real(struct inode *dir, struct dentry *newdentry,
+ struct kstat *stat, const char *link,
+ struct dentry *hardlink, bool debug);
+void ovl_cleanup(struct inode *dir, struct dentry *dentry);
+
+/* copy_up.c */
+int ovl_copy_up(struct dentry *dentry);
+int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
+ struct path *lowerpath, struct kstat *stat,
+ struct iattr *attr);
+int ovl_copy_xattr(struct dentry *old, struct dentry *new);
+int ovl_set_attr(struct dentry *upper, struct kstat *stat);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
new file mode 100644
index 0000000..910553f
--- /dev/null
+++ b/fs/overlayfs/readdir.c
@@ -0,0 +1,590 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/xattr.h>
+#include <linux/rbtree.h>
+#include <linux/security.h>
+#include <linux/cred.h>
+#include "overlayfs.h"
+
+struct ovl_cache_entry {
+ unsigned int len;
+ unsigned int type;
+ u64 ino;
+ bool is_whiteout;
+ struct list_head l_node;
+ struct rb_node node;
+ char name[];
+};
+
+struct ovl_dir_cache {
+ long refcount;
+ u64 version;
+ struct list_head entries;
+};
+
+struct ovl_readdir_data {
+ struct dir_context ctx;
+ bool is_merge;
+ struct rb_root root;
+ struct list_head *list;
+ struct list_head middle;
+ int count;
+ int err;
+};
+
+struct ovl_dir_file {
+ bool is_real;
+ bool is_upper;
+ struct ovl_dir_cache *cache;
+ struct ovl_cache_entry cursor;
+ struct file *realfile;
+ struct file *upperfile;
+};
+
+static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
+{
+ return container_of(n, struct ovl_cache_entry, node);
+}
+
+static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
+ const char *name, int len)
+{
+ struct rb_node *node = root->rb_node;
+ int cmp;
+
+ while (node) {
+ struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
+
+ cmp = strncmp(name, p->name, len);
+ if (cmp > 0)
+ node = p->node.rb_right;
+ else if (cmp < 0 || len < p->len)
+ node = p->node.rb_left;
+ else
+ return p;
+ }
+
+ return NULL;
+}
+
+static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
+ u64 ino, unsigned int d_type)
+{
+ struct ovl_cache_entry *p;
+ size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
+
+ p = kmalloc(size, GFP_KERNEL);
+ if (p) {
+ memcpy(p->name, name, len);
+ p->name[len] = '\0';
+ p->len = len;
+ p->type = d_type;
+ p->ino = ino;
+ p->is_whiteout = false;
+ }
+
+ return p;
+}
+
+static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
+ const char *name, int len, u64 ino,
+ unsigned int d_type)
+{
+ struct rb_node **newp = &rdd->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct ovl_cache_entry *p;
+
+ while (*newp) {
+ int cmp;
+ struct ovl_cache_entry *tmp;
+
+ parent = *newp;
+ tmp = ovl_cache_entry_from_node(*newp);
+ cmp = strncmp(name, tmp->name, len);
+ if (cmp > 0)
+ newp = &tmp->node.rb_right;
+ else if (cmp < 0 || len < tmp->len)
+ newp = &tmp->node.rb_left;
+ else
+ return 0;
+ }
+
+ p = ovl_cache_entry_new(name, len, ino, d_type);
+ if (p == NULL)
+ return -ENOMEM;
+
+ list_add_tail(&p->l_node, rdd->list);
+ rb_link_node(&p->node, parent, newp);
+ rb_insert_color(&p->node, &rdd->root);
+
+ return 0;
+}
+
+static int ovl_fill_lower(struct ovl_readdir_data *rdd,
+ const char *name, int namelen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct ovl_cache_entry *p;
+
+ p = ovl_cache_entry_find(&rdd->root, name, namelen);
+ if (p) {
+ list_move_tail(&p->l_node, &rdd->middle);
+ } else {
+ p = ovl_cache_entry_new(name, namelen, ino, d_type);
+ if (p == NULL)
+ rdd->err = -ENOMEM;
+ else
+ list_add_tail(&p->l_node, &rdd->middle);
+ }
+
+ return rdd->err;
+}
+
+void ovl_cache_free(struct list_head *list)
+{
+ struct ovl_cache_entry *p;
+ struct ovl_cache_entry *n;
+
+ list_for_each_entry_safe(p, n, list, l_node)
+ kfree(p);
+
+ INIT_LIST_HEAD(list);
+}
+
+static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
+{
+ struct ovl_dir_cache *cache = od->cache;
+
+ list_del(&od->cursor.l_node);
+ WARN_ON(cache->refcount <= 0);
+ cache->refcount--;
+ if (!cache->refcount) {
+ if (ovl_dir_cache(dentry) == cache)
+ ovl_set_dir_cache(dentry, NULL);
+
+ ovl_cache_free(&cache->entries);
+ kfree(cache);
+ }
+}
+
+static int ovl_fill_merge(void *buf, const char *name, int namelen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct ovl_readdir_data *rdd = buf;
+
+ rdd->count++;
+ if (!rdd->is_merge)
+ return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
+ else
+ return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
+}
+
+static inline int ovl_dir_read(struct path *realpath,
+ struct ovl_readdir_data *rdd)
+{
+ struct file *realfile;
+ int err;
+
+ realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
+ if (IS_ERR(realfile))
+ return PTR_ERR(realfile);
+
+ rdd->ctx.pos = 0;
+ do {
+ rdd->count = 0;
+ rdd->err = 0;
+ err = iterate_dir(realfile, &rdd->ctx);
+ if (err >= 0)
+ err = rdd->err;
+ } while (!err && rdd->count);
+ fput(realfile);
+
+ return err;
+}
+
+static void ovl_dir_reset(struct file *file)
+{
+ struct ovl_dir_file *od = file->private_data;
+ struct ovl_dir_cache *cache = od->cache;
+ struct dentry *dentry = file->f_path.dentry;
+ enum ovl_path_type type = ovl_path_type(dentry);
+
+ if (cache && ovl_dentry_version_get(dentry) != cache->version) {
+ ovl_cache_put(od, dentry);
+ od->cache = NULL;
+ }
+ WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
+ if (od->is_real && type == OVL_PATH_MERGE)
+ od->is_real = false;
+}
+
+static int ovl_dir_mark_whiteouts(struct dentry *dir,
+ struct ovl_readdir_data *rdd)
+{
+ struct ovl_cache_entry *p;
+ struct dentry *dentry;
+ const struct cred *old_cred;
+ struct cred *override_cred;
+
+ override_cred = prepare_creds();
+ if (!override_cred) {
+ ovl_cache_free(rdd->list);
+ return -ENOMEM;
+ }
+
+ /*
+ * CAP_DAC_OVERRIDE for lookup
+ */
+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+ old_cred = override_creds(override_cred);
+
+ mutex_lock(&dir->d_inode->i_mutex);
+ list_for_each_entry(p, rdd->list, l_node) {
+ if (!p->name)
+ continue;
+
+ if (p->type != DT_CHR)
+ continue;
+
+ dentry = lookup_one_len(p->name, dir, p->len);
+ if (IS_ERR(dentry))
+ continue;
+
+ p->is_whiteout = ovl_is_whiteout(dentry);
+ dput(dentry);
+ }
+ mutex_unlock(&dir->d_inode->i_mutex);
+
+ revert_creds(old_cred);
+ put_cred(override_cred);
+
+ return 0;
+}
+
+static inline int ovl_dir_read_merged(struct path *upperpath,
+ struct path *lowerpath,
+ struct list_head *list)
+{
+ int err;
+ struct ovl_readdir_data rdd = {
+ .ctx.actor = ovl_fill_merge,
+ .list = list,
+ .root = RB_ROOT,
+ .is_merge = false,
+ };
+
+ if (upperpath->dentry) {
+ err = ovl_dir_read(upperpath, &rdd);
+ if (err)
+ goto out;
+
+ if (lowerpath->dentry) {
+ err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd);
+ if (err)
+ goto out;
+ }
+ }
+ if (lowerpath->dentry) {
+ /*
+ * Insert lowerpath entries before upperpath ones, this allows
+ * offsets to be reasonably constant
+ */
+ list_add(&rdd.middle, rdd.list);
+ rdd.is_merge = true;
+ err = ovl_dir_read(lowerpath, &rdd);
+ list_del(&rdd.middle);
+ }
+out:
+ return err;
+
+}
+
+static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
+{
+ struct ovl_cache_entry *p;
+ loff_t off = 0;
+
+ list_for_each_entry(p, &od->cache->entries, l_node) {
+ if (!p->name)
+ continue;
+ if (off >= pos)
+ break;
+ off++;
+ }
+ list_move_tail(&od->cursor.l_node, &p->l_node);
+}
+
+static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
+{
+ int res;
+ struct path lowerpath;
+ struct path upperpath;
+ struct ovl_dir_cache *cache;
+
+ cache = ovl_dir_cache(dentry);
+ if (cache && ovl_dentry_version_get(dentry) == cache->version) {
+ cache->refcount++;
+ return cache;
+ }
+ ovl_set_dir_cache(dentry, NULL);
+
+ cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
+ if (!cache)
+ return ERR_PTR(-ENOMEM);
+
+ cache->refcount = 1;
+ INIT_LIST_HEAD(&cache->entries);
+
+ ovl_path_lower(dentry, &lowerpath);
+ ovl_path_upper(dentry, &upperpath);
+
+ res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries);
+ if (res) {
+ ovl_cache_free(&cache->entries);
+ kfree(cache);
+ return ERR_PTR(res);
+ }
+
+ cache->version = ovl_dentry_version_get(dentry);
+ ovl_set_dir_cache(dentry, cache);
+
+ return cache;
+}
+
+static int ovl_iterate(struct file *file, struct dir_context *ctx)
+{
+ struct ovl_dir_file *od = file->private_data;
+ struct dentry *dentry = file->f_path.dentry;
+
+ if (!ctx->pos)
+ ovl_dir_reset(file);
+
+ if (od->is_real)
+ return iterate_dir(od->realfile, ctx);
+
+ if (!od->cache) {
+ struct ovl_dir_cache *cache;
+
+ cache = ovl_cache_get(dentry);
+ if (IS_ERR(cache))
+ return PTR_ERR(cache);
+
+ od->cache = cache;
+ ovl_seek_cursor(od, ctx->pos);
+ }
+
+ while (od->cursor.l_node.next != &od->cache->entries) {
+ struct ovl_cache_entry *p;
+
+ p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node);
+ /* Skip cursors */
+ if (p->name) {
+ if (!p->is_whiteout) {
+ if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
+ break;
+ }
+ ctx->pos++;
+ }
+ list_move(&od->cursor.l_node, &p->l_node);
+ }
+ return 0;
+}
+
+static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
+{
+ loff_t res;
+ struct ovl_dir_file *od = file->private_data;
+
+ mutex_lock(&file_inode(file)->i_mutex);
+ if (!file->f_pos)
+ ovl_dir_reset(file);
+
+ if (od->is_real) {
+ res = vfs_llseek(od->realfile, offset, origin);
+ file->f_pos = od->realfile->f_pos;
+ } else {
+ res = -EINVAL;
+
+ switch (origin) {
+ case SEEK_CUR:
+ offset += file->f_pos;
+ break;
+ case SEEK_SET:
+ break;
+ default:
+ goto out_unlock;
+ }
+ if (offset < 0)
+ goto out_unlock;
+
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ if (od->cache)
+ ovl_seek_cursor(od, offset);
+ }
+ res = offset;
+ }
+out_unlock:
+ mutex_unlock(&file_inode(file)->i_mutex);
+
+ return res;
+}
+
+static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct ovl_dir_file *od = file->private_data;
+ struct dentry *dentry = file->f_path.dentry;
+ struct file *realfile = od->realfile;
+
+ /*
+ * Need to check if we started out being a lower dir, but got copied up
+ */
+ if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) {
+ struct inode *inode = file_inode(file);
+
+ realfile = od->upperfile;
+ if (!realfile) {
+ struct path upperpath;
+
+ ovl_path_upper(dentry, &upperpath);
+ realfile = ovl_path_open(&upperpath, O_RDONLY);
+ mutex_lock(&inode->i_mutex);
+ if (!od->upperfile) {
+ if (IS_ERR(realfile)) {
+ mutex_unlock(&inode->i_mutex);
+ return PTR_ERR(realfile);
+ }
+ od->upperfile = realfile;
+ } else {
+ /* somebody has beaten us to it */
+ if (!IS_ERR(realfile))
+ fput(realfile);
+ realfile = od->upperfile;
+ }
+ mutex_unlock(&inode->i_mutex);
+ }
+ }
+
+ return vfs_fsync_range(realfile, start, end, datasync);
+}
+
+static int ovl_dir_release(struct inode *inode, struct file *file)
+{
+ struct ovl_dir_file *od = file->private_data;
+
+ if (od->cache) {
+ mutex_lock(&inode->i_mutex);
+ ovl_cache_put(od, file->f_path.dentry);
+ mutex_unlock(&inode->i_mutex);
+ }
+ fput(od->realfile);
+ if (od->upperfile)
+ fput(od->upperfile);
+ kfree(od);
+
+ return 0;
+}
+
+static int ovl_dir_open(struct inode *inode, struct file *file)
+{
+ struct path realpath;
+ struct file *realfile;
+ struct ovl_dir_file *od;
+ enum ovl_path_type type;
+
+ od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
+ if (!od)
+ return -ENOMEM;
+
+ type = ovl_path_real(file->f_path.dentry, &realpath);
+ realfile = ovl_path_open(&realpath, file->f_flags);
+ if (IS_ERR(realfile)) {
+ kfree(od);
+ return PTR_ERR(realfile);
+ }
+ INIT_LIST_HEAD(&od->cursor.l_node);
+ od->realfile = realfile;
+ od->is_real = (type != OVL_PATH_MERGE);
+ od->is_upper = (type != OVL_PATH_LOWER);
+ file->private_data = od;
+
+ return 0;
+}
+
+const struct file_operations ovl_dir_operations = {
+ .read = generic_read_dir,
+ .open = ovl_dir_open,
+ .iterate = ovl_iterate,
+ .llseek = ovl_dir_llseek,
+ .fsync = ovl_dir_fsync,
+ .release = ovl_dir_release,
+};
+
+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
+{
+ int err;
+ struct path lowerpath;
+ struct path upperpath;
+ struct ovl_cache_entry *p;
+
+ ovl_path_upper(dentry, &upperpath);
+ ovl_path_lower(dentry, &lowerpath);
+
+ err = ovl_dir_read_merged(&upperpath, &lowerpath, list);
+ if (err)
+ return err;
+
+ err = 0;
+
+ list_for_each_entry(p, list, l_node) {
+ if (p->is_whiteout)
+ continue;
+
+ if (p->name[0] == '.') {
+ if (p->len == 1)
+ continue;
+ if (p->len == 2 && p->name[1] == '.')
+ continue;
+ }
+ err = -ENOTEMPTY;
+ break;
+ }
+
+ return err;
+}
+
+void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
+{
+ struct ovl_cache_entry *p;
+
+ mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT);
+ list_for_each_entry(p, list, l_node) {
+ struct dentry *dentry;
+
+ if (!p->is_whiteout)
+ continue;
+
+ dentry = lookup_one_len(p->name, upper, p->len);
+ if (IS_ERR(dentry)) {
+ pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
+ upper->d_name.name, p->len, p->name,
+ (int) PTR_ERR(dentry));
+ continue;
+ }
+ ovl_cleanup(upper->d_inode, dentry);
+ dput(dentry);
+ }
+ mutex_unlock(&upper->d_inode->i_mutex);
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
new file mode 100644
index 0000000..08b704c
--- /dev/null
+++ b/fs/overlayfs/super.c
@@ -0,0 +1,796 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/parser.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/statfs.h>
+#include <linux/seq_file.h>
+#include "overlayfs.h"
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Overlay filesystem");
+MODULE_LICENSE("GPL");
+
+#define OVERLAYFS_SUPER_MAGIC 0x794c764f
+
+struct ovl_config {
+ char *lowerdir;
+ char *upperdir;
+ char *workdir;
+};
+
+/* private information held for overlayfs's superblock */
+struct ovl_fs {
+ struct vfsmount *upper_mnt;
+ struct vfsmount *lower_mnt;
+ struct dentry *workdir;
+ long lower_namelen;
+ /* pathnames of lower and upper dirs, for show_options */
+ struct ovl_config config;
+};
+
+struct ovl_dir_cache;
+
+/* private information held for every overlayfs dentry */
+struct ovl_entry {
+ struct dentry *__upperdentry;
+ struct dentry *lowerdentry;
+ struct ovl_dir_cache *cache;
+ union {
+ struct {
+ u64 version;
+ bool opaque;
+ };
+ struct rcu_head rcu;
+ };
+};
+
+const char *ovl_opaque_xattr = "trusted.overlay.opaque";
+
+
+enum ovl_path_type ovl_path_type(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (oe->__upperdentry) {
+ if (oe->lowerdentry) {
+ if (S_ISDIR(dentry->d_inode->i_mode))
+ return OVL_PATH_MERGE;
+ else
+ return OVL_PATH_UPPER;
+ } else {
+ if (oe->opaque)
+ return OVL_PATH_UPPER;
+ else
+ return OVL_PATH_PURE_UPPER;
+ }
+ } else {
+ return OVL_PATH_LOWER;
+ }
+}
+
+static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
+{
+ struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry);
+ /*
+ * Make sure to order reads to upperdentry wrt ovl_dentry_update()
+ */
+ smp_read_barrier_depends();
+ return upperdentry;
+}
+
+void ovl_path_upper(struct dentry *dentry, struct path *path)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ path->mnt = ofs->upper_mnt;
+ path->dentry = ovl_upperdentry_dereference(oe);
+}
+
+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
+{
+
+ enum ovl_path_type type = ovl_path_type(dentry);
+
+ if (type == OVL_PATH_LOWER)
+ ovl_path_lower(dentry, path);
+ else
+ ovl_path_upper(dentry, path);
+
+ return type;
+}
+
+struct dentry *ovl_dentry_upper(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return ovl_upperdentry_dereference(oe);
+}
+
+struct dentry *ovl_dentry_lower(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->lowerdentry;
+}
+
+struct dentry *ovl_dentry_real(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+ struct dentry *realdentry;
+
+ realdentry = ovl_upperdentry_dereference(oe);
+ if (!realdentry)
+ realdentry = oe->lowerdentry;
+
+ return realdentry;
+}
+
+struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
+{
+ struct dentry *realdentry;
+
+ realdentry = ovl_upperdentry_dereference(oe);
+ if (realdentry) {
+ *is_upper = true;
+ } else {
+ realdentry = oe->lowerdentry;
+ *is_upper = false;
+ }
+ return realdentry;
+}
+
+struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->cache;
+}
+
+void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ oe->cache = cache;
+}
+
+void ovl_path_lower(struct dentry *dentry, struct path *path)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ path->mnt = ofs->lower_mnt;
+ path->dentry = oe->lowerdentry;
+}
+
+int ovl_want_write(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ return mnt_want_write(ofs->upper_mnt);
+}
+
+void ovl_drop_write(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ mnt_drop_write(ofs->upper_mnt);
+}
+
+struct dentry *ovl_workdir(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ return ofs->workdir;
+}
+
+bool ovl_dentry_is_opaque(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+ return oe->opaque;
+}
+
+void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+ oe->opaque = opaque;
+}
+
+void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
+ WARN_ON(oe->__upperdentry);
+ BUG_ON(!upperdentry->d_inode);
+ /*
+ * Make sure upperdentry is consistent before making it visible to
+ * ovl_upperdentry_dereference().
+ */
+ smp_wmb();
+ oe->__upperdentry = upperdentry;
+}
+
+void ovl_dentry_version_inc(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ oe->version++;
+}
+
+u64 ovl_dentry_version_get(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ return oe->version;
+}
+
+bool ovl_is_whiteout(struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+
+ return inode && IS_WHITEOUT(inode);
+}
+
+static bool ovl_is_opaquedir(struct dentry *dentry)
+{
+ int res;
+ char val;
+ struct inode *inode = dentry->d_inode;
+
+ if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr)
+ return false;
+
+ res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1);
+ if (res == 1 && val == 'y')
+ return true;
+
+ return false;
+}
+
+static void ovl_dentry_release(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (oe) {
+ dput(oe->__upperdentry);
+ dput(oe->lowerdentry);
+ kfree_rcu(oe, rcu);
+ }
+}
+
+static const struct dentry_operations ovl_dentry_operations = {
+ .d_release = ovl_dentry_release,
+};
+
+static struct ovl_entry *ovl_alloc_entry(void)
+{
+ return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL);
+}
+
+static inline struct dentry *ovl_lookup_real(struct dentry *dir,
+ struct qstr *name)
+{
+ struct dentry *dentry;
+
+ mutex_lock(&dir->d_inode->i_mutex);
+ dentry = lookup_one_len(name->name, dir, name->len);
+ mutex_unlock(&dir->d_inode->i_mutex);
+
+ if (IS_ERR(dentry)) {
+ if (PTR_ERR(dentry) == -ENOENT)
+ dentry = NULL;
+ } else if (!dentry->d_inode) {
+ dput(dentry);
+ dentry = NULL;
+ }
+ return dentry;
+}
+
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct ovl_entry *oe;
+ struct dentry *upperdir;
+ struct dentry *lowerdir;
+ struct dentry *upperdentry = NULL;
+ struct dentry *lowerdentry = NULL;
+ struct inode *inode = NULL;
+ int err;
+
+ err = -ENOMEM;
+ oe = ovl_alloc_entry();
+ if (!oe)
+ goto out;
+
+ upperdir = ovl_dentry_upper(dentry->d_parent);
+ lowerdir = ovl_dentry_lower(dentry->d_parent);
+
+ if (upperdir) {
+ upperdentry = ovl_lookup_real(upperdir, &dentry->d_name);
+ err = PTR_ERR(upperdentry);
+ if (IS_ERR(upperdentry))
+ goto out_put_dir;
+
+ if (lowerdir && upperdentry) {
+ if (ovl_is_whiteout(upperdentry)) {
+ dput(upperdentry);
+ upperdentry = NULL;
+ oe->opaque = true;
+ } else if (ovl_is_opaquedir(upperdentry)) {
+ oe->opaque = true;
+ }
+ }
+ }
+ if (lowerdir && !oe->opaque) {
+ lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name);
+ err = PTR_ERR(lowerdentry);
+ if (IS_ERR(lowerdentry))
+ goto out_dput_upper;
+ }
+
+ if (lowerdentry && upperdentry &&
+ (!S_ISDIR(upperdentry->d_inode->i_mode) ||
+ !S_ISDIR(lowerdentry->d_inode->i_mode))) {
+ dput(lowerdentry);
+ lowerdentry = NULL;
+ oe->opaque = true;
+ }
+
+ if (lowerdentry || upperdentry) {
+ struct dentry *realdentry;
+
+ realdentry = upperdentry ? upperdentry : lowerdentry;
+ err = -ENOMEM;
+ inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
+ oe);
+ if (!inode)
+ goto out_dput;
+ ovl_copyattr(realdentry->d_inode, inode);
+ }
+
+ oe->__upperdentry = upperdentry;
+ oe->lowerdentry = lowerdentry;
+
+ dentry->d_fsdata = oe;
+ d_add(dentry, inode);
+
+ return NULL;
+
+out_dput:
+ dput(lowerdentry);
+out_dput_upper:
+ dput(upperdentry);
+out_put_dir:
+ kfree(oe);
+out:
+ return ERR_PTR(err);
+}
+
+struct file *ovl_path_open(struct path *path, int flags)
+{
+ return dentry_open(path, flags, current_cred());
+}
+
+static void ovl_put_super(struct super_block *sb)
+{
+ struct ovl_fs *ufs = sb->s_fs_info;
+
+ dput(ufs->workdir);
+ mntput(ufs->upper_mnt);
+ mntput(ufs->lower_mnt);
+
+ kfree(ufs->config.lowerdir);
+ kfree(ufs->config.upperdir);
+ kfree(ufs->config.workdir);
+ kfree(ufs);
+}
+
+/**
+ * ovl_statfs
+ * @sb: The overlayfs super block
+ * @buf: The struct kstatfs to fill in with stats
+ *
+ * Get the filesystem statistics. As writes always target the upper layer
+ * filesystem pass the statfs to the same filesystem.
+ */
+static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct dentry *root_dentry = dentry->d_sb->s_root;
+ struct path path;
+ int err;
+
+ ovl_path_upper(root_dentry, &path);
+
+ err = vfs_statfs(&path, buf);
+ if (!err) {
+ buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen);
+ buf->f_type = OVERLAYFS_SUPER_MAGIC;
+ }
+
+ return err;
+}
+
+/**
+ * ovl_show_options
+ *
+ * Prints the mount options for a given superblock.
+ * Returns zero; does not fail.
+ */
+static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct ovl_fs *ufs = sb->s_fs_info;
+
+ seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir);
+ seq_printf(m, ",upperdir=%s", ufs->config.upperdir);
+ seq_printf(m, ",workdir=%s", ufs->config.workdir);
+ return 0;
+}
+
+static const struct super_operations ovl_super_operations = {
+ .put_super = ovl_put_super,
+ .statfs = ovl_statfs,
+ .show_options = ovl_show_options,
+};
+
+enum {
+ OPT_LOWERDIR,
+ OPT_UPPERDIR,
+ OPT_WORKDIR,
+ OPT_ERR,
+};
+
+static const match_table_t ovl_tokens = {
+ {OPT_LOWERDIR, "lowerdir=%s"},
+ {OPT_UPPERDIR, "upperdir=%s"},
+ {OPT_WORKDIR, "workdir=%s"},
+ {OPT_ERR, NULL}
+};
+
+static int ovl_parse_opt(char *opt, struct ovl_config *config)
+{
+ char *p;
+
+ while ((p = strsep(&opt, ",")) != NULL) {
+ int token;
+ substring_t args[MAX_OPT_ARGS];
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, ovl_tokens, args);
+ switch (token) {
+ case OPT_UPPERDIR:
+ kfree(config->upperdir);
+ config->upperdir = match_strdup(&args[0]);
+ if (!config->upperdir)
+ return -ENOMEM;
+ break;
+
+ case OPT_LOWERDIR:
+ kfree(config->lowerdir);
+ config->lowerdir = match_strdup(&args[0]);
+ if (!config->lowerdir)
+ return -ENOMEM;
+ break;
+
+ case OPT_WORKDIR:
+ kfree(config->workdir);
+ config->workdir = match_strdup(&args[0]);
+ if (!config->workdir)
+ return -ENOMEM;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+#define OVL_WORKDIR_NAME "work"
+
+static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
+ struct dentry *dentry)
+{
+ struct inode *dir = dentry->d_inode;
+ struct dentry *work;
+ int err;
+ bool retried = false;
+
+ err = mnt_want_write(mnt);
+ if (err)
+ return ERR_PTR(err);
+
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+retry:
+ work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
+ strlen(OVL_WORKDIR_NAME));
+
+ if (!IS_ERR(work)) {
+ struct kstat stat = {
+ .mode = S_IFDIR | 0,
+ };
+
+ if (work->d_inode) {
+ err = -EEXIST;
+ if (retried)
+ goto out_dput;
+
+ retried = true;
+ ovl_cleanup(dir, work);
+ dput(work);
+ goto retry;
+ }
+
+ err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
+ if (err)
+ goto out_dput;
+ }
+out_unlock:
+ mutex_unlock(&dir->i_mutex);
+ mnt_drop_write(mnt);
+
+ return work;
+
+out_dput:
+ dput(work);
+ work = ERR_PTR(err);
+ goto out_unlock;
+}
+
+static int ovl_mount_dir(const char *name, struct path *path)
+{
+ int err;
+
+ err = kern_path(name, LOOKUP_FOLLOW, path);
+ if (err) {
+ pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static bool ovl_is_allowed_fs_type(struct dentry *root)
+{
+ const struct dentry_operations *dop = root->d_op;
+
+ /*
+ * We don't support:
+ * - automount filesystems
+ * - filesystems with revalidate (FIXME for lower layer)
+ * - filesystems with case insensitive names
+ */
+ if (dop &&
+ (dop->d_manage || dop->d_automount ||
+ dop->d_revalidate || dop->d_weak_revalidate ||
+ dop->d_compare || dop->d_hash)) {
+ return false;
+ }
+ return true;
+}
+
+/* Workdir should not be subdir of upperdir and vice versa */
+static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
+{
+ bool ok = false;
+
+ if (workdir != upperdir) {
+ ok = (lock_rename(workdir, upperdir) == NULL);
+ unlock_rename(workdir, upperdir);
+ }
+ return ok;
+}
+
+static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct path lowerpath;
+ struct path upperpath;
+ struct path workpath;
+ struct inode *root_inode;
+ struct dentry *root_dentry;
+ struct ovl_entry *oe;
+ struct ovl_fs *ufs;
+ struct kstatfs statfs;
+ int err;
+
+ err = -ENOMEM;
+ ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
+ if (!ufs)
+ goto out;
+
+ err = ovl_parse_opt((char *) data, &ufs->config);
+ if (err)
+ goto out_free_config;
+
+ /* FIXME: workdir is not needed for a R/O mount */
+ err = -EINVAL;
+ if (!ufs->config.upperdir || !ufs->config.lowerdir ||
+ !ufs->config.workdir) {
+ pr_err("overlayfs: missing upperdir or lowerdir or workdir\n");
+ goto out_free_config;
+ }
+
+ err = -ENOMEM;
+ oe = ovl_alloc_entry();
+ if (oe == NULL)
+ goto out_free_config;
+
+ err = ovl_mount_dir(ufs->config.upperdir, &upperpath);
+ if (err)
+ goto out_free_oe;
+
+ err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath);
+ if (err)
+ goto out_put_upperpath;
+
+ err = ovl_mount_dir(ufs->config.workdir, &workpath);
+ if (err)
+ goto out_put_lowerpath;
+
+ err = -EINVAL;
+ if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) ||
+ !S_ISDIR(lowerpath.dentry->d_inode->i_mode) ||
+ !S_ISDIR(workpath.dentry->d_inode->i_mode)) {
+ pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n");
+ goto out_put_workpath;
+ }
+
+ if (upperpath.mnt != workpath.mnt) {
+ pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
+ goto out_put_workpath;
+ }
+ if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) {
+ pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
+ goto out_put_workpath;
+ }
+
+ if (!ovl_is_allowed_fs_type(upperpath.dentry)) {
+ pr_err("overlayfs: filesystem of upperdir is not supported\n");
+ goto out_put_workpath;
+ }
+
+ if (!ovl_is_allowed_fs_type(lowerpath.dentry)) {
+ pr_err("overlayfs: filesystem of lowerdir is not supported\n");
+ goto out_put_workpath;
+ }
+
+ err = vfs_statfs(&lowerpath, &statfs);
+ if (err) {
+ pr_err("overlayfs: statfs failed on lowerpath\n");
+ goto out_put_workpath;
+ }
+ ufs->lower_namelen = statfs.f_namelen;
+
+ sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth,
+ lowerpath.mnt->mnt_sb->s_stack_depth) + 1;
+
+ err = -EINVAL;
+ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+ pr_err("overlayfs: maximum fs stacking depth exceeded\n");
+ goto out_put_workpath;
+ }
+
+ ufs->upper_mnt = clone_private_mount(&upperpath);
+ err = PTR_ERR(ufs->upper_mnt);
+ if (IS_ERR(ufs->upper_mnt)) {
+ pr_err("overlayfs: failed to clone upperpath\n");
+ goto out_put_workpath;
+ }
+
+ ufs->lower_mnt = clone_private_mount(&lowerpath);
+ err = PTR_ERR(ufs->lower_mnt);
+ if (IS_ERR(ufs->lower_mnt)) {
+ pr_err("overlayfs: failed to clone lowerpath\n");
+ goto out_put_upper_mnt;
+ }
+
+ ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
+ err = PTR_ERR(ufs->workdir);
+ if (IS_ERR(ufs->workdir)) {
+ pr_err("overlayfs: failed to create directory %s/%s\n",
+ ufs->config.workdir, OVL_WORKDIR_NAME);
+ goto out_put_lower_mnt;
+ }
+
+ /*
+ * Make lower_mnt R/O. That way fchmod/fchown on lower file
+ * will fail instead of modifying lower fs.
+ */
+ ufs->lower_mnt->mnt_flags |= MNT_READONLY;
+
+ /* If the upper fs is r/o, we mark overlayfs r/o too */
+ if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)
+ sb->s_flags |= MS_RDONLY;
+
+ sb->s_d_op = &ovl_dentry_operations;
+
+ err = -ENOMEM;
+ root_inode = ovl_new_inode(sb, S_IFDIR, oe);
+ if (!root_inode)
+ goto out_put_workdir;
+
+ root_dentry = d_make_root(root_inode);
+ if (!root_dentry)
+ goto out_put_workdir;
+
+ mntput(upperpath.mnt);
+ mntput(lowerpath.mnt);
+ path_put(&workpath);
+
+ oe->__upperdentry = upperpath.dentry;
+ oe->lowerdentry = lowerpath.dentry;
+
+ root_dentry->d_fsdata = oe;
+
+ sb->s_magic = OVERLAYFS_SUPER_MAGIC;
+ sb->s_op = &ovl_super_operations;
+ sb->s_root = root_dentry;
+ sb->s_fs_info = ufs;
+
+ return 0;
+
+out_put_workdir:
+ dput(ufs->workdir);
+out_put_lower_mnt:
+ mntput(ufs->lower_mnt);
+out_put_upper_mnt:
+ mntput(ufs->upper_mnt);
+out_put_workpath:
+ path_put(&workpath);
+out_put_lowerpath:
+ path_put(&lowerpath);
+out_put_upperpath:
+ path_put(&upperpath);
+out_free_oe:
+ kfree(oe);
+out_free_config:
+ kfree(ufs->config.lowerdir);
+ kfree(ufs->config.upperdir);
+ kfree(ufs->config.workdir);
+ kfree(ufs);
+out:
+ return err;
+}
+
+static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
+{
+ return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
+}
+
+static struct file_system_type ovl_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "overlayfs",
+ .mount = ovl_mount,
+ .kill_sb = kill_anon_super,
+};
+MODULE_ALIAS_FS("overlayfs");
+
+static int __init ovl_init(void)
+{
+ return register_filesystem(&ovl_fs_type);
+}
+
+static void __exit ovl_exit(void)
+{
+ unregister_filesystem(&ovl_fs_type);
+}
+
+module_init(ovl_init);
+module_exit(ovl_exit);
diff --git a/fs/splice.c b/fs/splice.c
index f5cb9ba..75c6058 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1330,6 +1330,7 @@
return ret;
}
+EXPORT_SYMBOL(do_splice_direct);
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h
index f97804b..7461327 100644
--- a/include/acpi/acnames.h
+++ b/include/acpi/acnames.h
@@ -52,6 +52,7 @@
#define METHOD_NAME__CBA "_CBA"
#define METHOD_NAME__CID "_CID"
#define METHOD_NAME__CRS "_CRS"
+#define METHOD_NAME__DDN "_DDN"
#define METHOD_NAME__HID "_HID"
#define METHOD_NAME__INI "_INI"
#define METHOD_NAME__PLD "_PLD"
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 57ee052..f34a083 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -433,6 +433,7 @@
int acpi_bus_init_power(struct acpi_device *device);
int acpi_device_fix_up_power(struct acpi_device *device);
int acpi_bus_update_power(acpi_handle handle, int *state_p);
+int acpi_device_update_power(struct acpi_device *device, int *state_p);
bool acpi_bus_power_manageable(acpi_handle handle);
#ifdef CONFIG_PM
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 9fc1d71..ab2acf6 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -46,7 +46,7 @@
/* Current ACPICA subsystem version in YYYYMMDD format */
-#define ACPI_CA_VERSION 0x20140828
+#define ACPI_CA_VERSION 0x20140926
#include <acpi/acconfig.h>
#include <acpi/actypes.h>
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index ac03ec8..7000e66 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -721,7 +721,7 @@
* | | | +--- Enabled for wake?
* | | +----- Set?
* | +------- Has a handler?
- * +----------- <Reserved>
+ * +------------- <Reserved>
*/
typedef u32 acpi_event_status;
@@ -729,7 +729,7 @@
#define ACPI_EVENT_FLAG_ENABLED (acpi_event_status) 0x01
#define ACPI_EVENT_FLAG_WAKE_ENABLED (acpi_event_status) 0x02
#define ACPI_EVENT_FLAG_SET (acpi_event_status) 0x04
-#define ACPI_EVENT_FLAG_HANDLE (acpi_event_status) 0x08
+#define ACPI_EVENT_FLAG_HAS_HANDLER (acpi_event_status) 0x08
/* Actions for acpi_set_gpe, acpi_gpe_wakeup, acpi_hw_low_set_gpe */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b7926bb..407a12f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -432,6 +432,7 @@
int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
int acpi_device_modalias(struct device *, char *, int);
+struct platform_device *acpi_create_platform_device(struct acpi_device *);
#define ACPI_PTR(_ptr) (_ptr)
#else /* !CONFIG_ACPI */
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 36dffec..e58fe7d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -90,7 +90,7 @@
extern unsigned compat_chattr_class[];
extern unsigned compat_signal_class[];
-extern int __weak audit_classify_compat_syscall(int abi, unsigned syscall);
+extern int audit_classify_compat_syscall(int abi, unsigned syscall);
/* audit_names->type values */
#define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 324329c..73b4522 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -175,12 +175,13 @@
wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
unsigned size);
-struct buffer_head *__getblk(struct block_device *bdev, sector_t block,
- unsigned size);
+struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp);
void __brelse(struct buffer_head *);
void __bforget(struct buffer_head *);
void __breadahead(struct block_device *, sector_t block, unsigned int size);
-struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size);
+struct buffer_head *__bread_gfp(struct block_device *,
+ sector_t block, unsigned size, gfp_t gfp);
void invalidate_bh_lrus(void);
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
@@ -295,7 +296,13 @@
static inline struct buffer_head *
sb_bread(struct super_block *sb, sector_t block)
{
- return __bread(sb->s_bdev, block, sb->s_blocksize);
+ return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
+}
+
+static inline struct buffer_head *
+sb_bread_unmovable(struct super_block *sb, sector_t block)
+{
+ return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0);
}
static inline void
@@ -307,7 +314,7 @@
static inline struct buffer_head *
sb_getblk(struct super_block *sb, sector_t block)
{
- return __getblk(sb->s_bdev, block, sb->s_blocksize);
+ return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
}
static inline struct buffer_head *
@@ -344,6 +351,36 @@
__lock_buffer(bh);
}
+static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
+ sector_t block,
+ unsigned size)
+{
+ return __getblk_gfp(bdev, block, size, 0);
+}
+
+static inline struct buffer_head *__getblk(struct block_device *bdev,
+ sector_t block,
+ unsigned size)
+{
+ return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
+}
+
+/**
+ * __bread() - reads a specified block and returns the bh
+ * @bdev: the block_device to read from
+ * @block: number of block
+ * @size: size (in bytes) to read
+ *
+ * Reads a specified block, and returns buffer head that contains it.
+ * The page cache is allocated from movable area so that it can be migrated.
+ * It returns NULL if the block was unreadable.
+ */
+static inline struct buffer_head *
+__bread(struct block_device *bdev, sector_t block, unsigned size)
+{
+ return __bread_gfp(bdev, block, size, __GFP_MOVABLE);
+}
+
extern int __set_page_dirty_buffers(struct page *page);
#else /* CONFIG_BLOCK */
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 653f0e2..abcafaa 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -287,7 +287,7 @@
extern void clocksource_change_rating(struct clocksource *cs, int rating);
extern void clocksource_suspend(void);
extern void clocksource_resume(void);
-extern struct clocksource * __init __weak clocksource_default_clock(void);
+extern struct clocksource * __init clocksource_default_clock(void);
extern void clocksource_mark_unstable(struct clocksource *cs);
extern u64
diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h
new file mode 100644
index 0000000..0414009
--- /dev/null
+++ b/include/linux/cpufreq-dt.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2014 Marvell
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CPUFREQ_DT_H__
+#define __CPUFREQ_DT_H__
+
+struct cpufreq_dt_platform_data {
+ /*
+ * True when each CPU has its own clock to control its
+ * frequency, false when all CPUs are controlled by a single
+ * clock.
+ */
+ bool independent_clocks;
+};
+
+#endif /* __CPUFREQ_DT_H__ */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 138336b..503b085 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -219,6 +219,7 @@
struct cpufreq_driver {
char name[CPUFREQ_NAME_LEN];
u8 flags;
+ void *driver_data;
/* needed by all drivers */
int (*init) (struct cpufreq_policy *policy);
@@ -312,6 +313,7 @@
int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
const char *cpufreq_get_current_driver(void);
+void *cpufreq_get_driver_data(void);
static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
unsigned int min, unsigned int max)
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 72ab536..3849fce 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -14,14 +14,13 @@
extern unsigned long long elfcorehdr_addr;
extern unsigned long long elfcorehdr_size;
-extern int __weak elfcorehdr_alloc(unsigned long long *addr,
- unsigned long long *size);
-extern void __weak elfcorehdr_free(unsigned long long addr);
-extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos);
-extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
-extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
- unsigned long from, unsigned long pfn,
- unsigned long size, pgprot_t prot);
+extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size);
+extern void elfcorehdr_free(unsigned long long addr);
+extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos);
+extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
+extern int remap_oldmem_pfn_range(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot);
extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
unsigned long, int);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 45cb4ff..0949f9c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -92,6 +92,7 @@
#define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */
#define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */
#define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */
+#define EFI_MEMORY_UCE ((u64)0x0000000000000010ULL) /* uncached, exported */
#define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */
#define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */
#define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */
@@ -502,6 +503,10 @@
typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size,
void *data);
+typedef efi_status_t
+efi_set_variable_nonblocking_t(efi_char16_t *name, efi_guid_t *vendor,
+ u32 attr, unsigned long data_size, void *data);
+
typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count);
typedef void efi_reset_system_t (int reset_type, efi_status_t status,
unsigned long data_size, efi_char16_t *data);
@@ -821,6 +826,7 @@
efi_get_variable_t *get_variable;
efi_get_next_variable_t *get_next_variable;
efi_set_variable_t *set_variable;
+ efi_set_variable_nonblocking_t *set_variable_nonblocking;
efi_query_variable_info_t *query_variable_info;
efi_update_capsule_t *update_capsule;
efi_query_capsule_caps_t *query_capsule_caps;
@@ -886,6 +892,13 @@
(md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
(md) = (void *)(md) + (m)->desc_size)
+/*
+ * Format an EFI memory descriptor's type and attributes to a user-provided
+ * character buffer, as per snprintf(), and return the buffer.
+ */
+char * __init efi_md_typeattr_format(char *buf, size_t size,
+ const efi_memory_desc_t *md);
+
/**
* efi_range_is_wc - check the WC bit on an address range
* @start: starting kvirt address
@@ -1034,6 +1047,7 @@
efi_get_variable_t *get_variable;
efi_get_next_variable_t *get_next_variable;
efi_set_variable_t *set_variable;
+ efi_set_variable_nonblocking_t *set_variable_nonblocking;
efi_query_variable_store_t *query_variable_store;
};
@@ -1227,4 +1241,7 @@
unsigned long *load_addr,
unsigned long *load_size);
+efi_status_t efi_parse_options(char *cmdline);
+
+bool efi_runtime_disabled(void);
#endif /* _LINUX_EFI_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a957d43..4e41a4a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -223,6 +223,13 @@
#define ATTR_TIMES_SET (1 << 16)
/*
+ * Whiteout is represented by a char device. The following constants define the
+ * mode and device number to use.
+ */
+#define WHITEOUT_MODE 0
+#define WHITEOUT_DEV 0
+
+/*
* This is the Inode Attributes structure, used for notify_change(). It
* uses the above definitions as flags, to know which values have changed.
* Also, in this manner, a Filesystem can look at only the values it cares
@@ -254,6 +261,12 @@
*/
#include <linux/quota.h>
+/*
+ * Maximum number of layers of fs stack. Needs to be limited to
+ * prevent kernel stack overflow
+ */
+#define FILESYSTEM_MAX_STACK_DEPTH 2
+
/**
* enum positive_aop_returns - aop return codes with specific semantics
*
@@ -1266,6 +1279,11 @@
struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
struct rcu_head rcu;
+
+ /*
+ * Indicates how deep in a filesystem stack this SB is
+ */
+ int s_stack_depth;
};
extern struct timespec current_fs_time(struct super_block *sb);
@@ -1398,6 +1416,7 @@
extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
+extern int vfs_whiteout(struct inode *, struct dentry *);
/*
* VFS dentry helper functions.
@@ -1528,6 +1547,9 @@
umode_t create_mode, int *opened);
int (*tmpfile) (struct inode *, struct dentry *, umode_t);
int (*set_acl)(struct inode *, struct posix_acl *, int);
+
+ /* WARNING: probably going away soon, do not use! */
+ int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
} ____cacheline_aligned;
ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
@@ -1625,6 +1647,9 @@
#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
+#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
+ (inode)->i_rdev == WHITEOUT_DEV)
+
/*
* Inode state bits. Protected by inode->i_lock
*
@@ -2040,6 +2065,7 @@
extern struct file *filp_open(const char *, int, umode_t);
extern struct file *file_open_root(struct dentry *, struct vfsmount *,
const char *, int);
+extern int vfs_open(const struct path *, struct file *, const struct cred *);
extern struct file * dentry_open(const struct path *, int, const struct cred *);
extern int filp_close(struct file *, fl_owner_t id);
@@ -2253,7 +2279,9 @@
#endif
extern int notify_change(struct dentry *, struct iattr *, struct inode **);
extern int inode_permission(struct inode *, int);
+extern int __inode_permission(struct inode *, int);
extern int generic_permission(struct inode *, int);
+extern int __check_sticky(struct inode *dir, struct inode *inode);
static inline bool execute_ok(struct inode *inode)
{
@@ -2452,6 +2480,9 @@
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
struct file *out, loff_t *, size_t len, unsigned int flags);
+extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+ loff_t *opos, size_t len, unsigned int flags);
+
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
@@ -2737,6 +2768,14 @@
return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
}
+static inline int check_sticky(struct inode *dir, struct inode *inode)
+{
+ if (!(dir->i_mode & S_ISVTX))
+ return 0;
+
+ return __check_sticky(dir, inode);
+}
+
static inline void inode_has_no_xattr(struct inode *inode)
{
if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC))
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0dae71e..704b9a5 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1042,7 +1042,7 @@
extern void jbd2_journal_commit_transaction(journal_t *);
/* Checkpoint list management */
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal);
int __jbd2_journal_remove_checkpoint(struct journal_head *);
void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 40728cf..3d770f55 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -403,6 +403,7 @@
extern int get_option(char **str, int *pint);
extern char *get_options(const char *str, int nints, int *ints);
extern unsigned long long memparse(const char *ptr, char **retptr);
+extern bool parse_option_str(const char *str, const char *option);
extern int core_kernel_text(unsigned long addr);
extern int core_kernel_data(unsigned long addr);
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6b06d37..e465bb1 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -283,7 +283,7 @@
extern struct kgdb_arch arch_kgdb_ops;
-extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
+extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs);
#ifdef CONFIG_SERIAL_KGDB_NMI
extern int kgdb_register_nmi_console(void);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 28be31f..ea53b04 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1080,6 +1080,7 @@
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
+void kvm_unregister_device_ops(u32 type);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
diff --git a/include/linux/leds.h b/include/linux/leds.h
index e436864..a57611d 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -13,8 +13,8 @@
#define __LINUX_LEDS_H_INCLUDED
#include <linux/list.h>
-#include <linux/spinlock.h>
#include <linux/rwsem.h>
+#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
@@ -31,8 +31,8 @@
struct led_classdev {
const char *name;
- int brightness;
- int max_brightness;
+ enum led_brightness brightness;
+ enum led_brightness max_brightness;
int flags;
/* Lower 16 bits reflect status */
@@ -140,6 +140,16 @@
*/
extern void led_set_brightness(struct led_classdev *led_cdev,
enum led_brightness brightness);
+/**
+ * led_update_brightness - update LED brightness
+ * @led_cdev: the LED to query
+ *
+ * Get an LED's current brightness and update led_cdev->brightness
+ * member with the obtained value.
+ *
+ * Returns: 0 on success or negative error value on failure
+ */
+extern int led_update_brightness(struct led_classdev *led_cdev);
/*
* LED Triggers
diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
new file mode 100644
index 0000000..307d9ca
--- /dev/null
+++ b/include/linux/mailbox_client.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CLIENT_H
+#define __MAILBOX_CLIENT_H
+
+#include <linux/of.h>
+#include <linux/device.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_client - User of a mailbox
+ * @dev: The client device
+ * @tx_block: If the mbox_send_message should block until data is
+ * transmitted.
+ * @tx_tout: Max block period in ms before TX is assumed failure
+ * @knows_txdone: If the client could run the TX state machine. Usually
+ * if the client receives some ACK packet for transmission.
+ * Unused if the controller already has TX_Done/RTR IRQ.
+ * @rx_callback: Atomic callback to provide client the data received
+ * @tx_done: Atomic callback to tell client of data transmission
+ */
+struct mbox_client {
+ struct device *dev;
+ bool tx_block;
+ unsigned long tx_tout;
+ bool knows_txdone;
+
+ void (*rx_callback)(struct mbox_client *cl, void *mssg);
+ void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
+};
+
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index);
+int mbox_send_message(struct mbox_chan *chan, void *mssg);
+void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */
+bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */
+void mbox_free_channel(struct mbox_chan *chan); /* may sleep */
+
+#endif /* __MAILBOX_CLIENT_H */
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
new file mode 100644
index 0000000..d4cf96f
--- /dev/null
+++ b/include/linux/mailbox_controller.h
@@ -0,0 +1,133 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CONTROLLER_H
+#define __MAILBOX_CONTROLLER_H
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/completion.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_chan_ops - methods to control mailbox channels
+ * @send_data: The API asks the MBOX controller driver, in atomic
+ * context try to transmit a message on the bus. Returns 0 if
+ * data is accepted for transmission, -EBUSY while rejecting
+ * if the remote hasn't yet read the last data sent. Actual
+ * transmission of data is reported by the controller via
+ * mbox_chan_txdone (if it has some TX ACK irq). It must not
+ * sleep.
+ * @startup: Called when a client requests the chan. The controller
+ * could ask clients for additional parameters of communication
+ * to be provided via client's chan_data. This call may
+ * block. After this call the Controller must forward any
+ * data received on the chan by calling mbox_chan_received_data.
+ * The controller may do stuff that need to sleep.
+ * @shutdown: Called when a client relinquishes control of a chan.
+ * This call may block too. The controller must not forward
+ * any received data anymore.
+ * The controller may do stuff that need to sleep.
+ * @last_tx_done: If the controller sets 'txdone_poll', the API calls
+ * this to poll status of last TX. The controller must
+ * give priority to IRQ method over polling and never
+ * set both txdone_poll and txdone_irq. Only in polling
+ * mode 'send_data' is expected to return -EBUSY.
+ * The controller may do stuff that need to sleep/block.
+ * Used only if txdone_poll:=true && txdone_irq:=false
+ * @peek_data: Atomic check for any received data. Return true if controller
+ * has some data to push to the client. False otherwise.
+ */
+struct mbox_chan_ops {
+ int (*send_data)(struct mbox_chan *chan, void *data);
+ int (*startup)(struct mbox_chan *chan);
+ void (*shutdown)(struct mbox_chan *chan);
+ bool (*last_tx_done)(struct mbox_chan *chan);
+ bool (*peek_data)(struct mbox_chan *chan);
+};
+
+/**
+ * struct mbox_controller - Controller of a class of communication channels
+ * @dev: Device backing this controller
+ * @ops: Operators that work on each communication chan
+ * @chans: Array of channels
+ * @num_chans: Number of channels in the 'chans' array.
+ * @txdone_irq: Indicates if the controller can report to API when
+ * the last transmitted data was read by the remote.
+ * Eg, if it has some TX ACK irq.
+ * @txdone_poll: If the controller can read but not report the TX
+ * done. Ex, some register shows the TX status but
+ * no interrupt rises. Ignored if 'txdone_irq' is set.
+ * @txpoll_period: If 'txdone_poll' is in effect, the API polls for
+ * last TX's status after these many millisecs
+ * @of_xlate: Controller driver specific mapping of channel via DT
+ * @poll: API private. Used to poll for TXDONE on all channels.
+ * @node: API private. To hook into list of controllers.
+ */
+struct mbox_controller {
+ struct device *dev;
+ struct mbox_chan_ops *ops;
+ struct mbox_chan *chans;
+ int num_chans;
+ bool txdone_irq;
+ bool txdone_poll;
+ unsigned txpoll_period;
+ struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
+ const struct of_phandle_args *sp);
+ /* Internal to API */
+ struct timer_list poll;
+ struct list_head node;
+};
+
+/*
+ * The length of circular buffer for queuing messages from a client.
+ * 'msg_count' tracks the number of buffered messages while 'msg_free'
+ * is the index where the next message would be buffered.
+ * We shouldn't need it too big because every transfer is interrupt
+ * triggered and if we have lots of data to transfer, the interrupt
+ * latencies are going to be the bottleneck, not the buffer length.
+ * Besides, mbox_send_message could be called from atomic context and
+ * the client could also queue another message from the notifier 'tx_done'
+ * of the last transfer done.
+ * REVISIT: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN"
+ * print, it needs to be taken from config option or somesuch.
+ */
+#define MBOX_TX_QUEUE_LEN 20
+
+/**
+ * struct mbox_chan - s/w representation of a communication chan
+ * @mbox: Pointer to the parent/provider of this channel
+ * @txdone_method: Way to detect TXDone chosen by the API
+ * @cl: Pointer to the current owner of this channel
+ * @tx_complete: Transmission completion
+ * @active_req: Currently active request hook
+ * @msg_count: No. of mssg currently queued
+ * @msg_free: Index of next available mssg slot
+ * @msg_data: Hook for data packet
+ * @lock: Serialise access to the channel
+ * @con_priv: Hook for controller driver to attach private data
+ */
+struct mbox_chan {
+ struct mbox_controller *mbox;
+ unsigned txdone_method;
+ struct mbox_client *cl;
+ struct completion tx_complete;
+ void *active_req;
+ unsigned msg_count, msg_free;
+ void *msg_data[MBOX_TX_QUEUE_LEN];
+ spinlock_t lock; /* Serialise access to the channel */
+ void *con_priv;
+};
+
+int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */
+void mbox_controller_unregister(struct mbox_controller *mbox); /* can sleep */
+void mbox_chan_received_data(struct mbox_chan *chan, void *data); /* atomic */
+void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */
+
+#endif /* __MAILBOX_CONTROLLER_H */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index bb7384e..8b8d8d1 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -35,7 +35,7 @@
};
int arch_get_memory_phys_device(unsigned long start_pfn);
-unsigned long __weak memory_block_size_bytes(void);
+unsigned long memory_block_size_bytes(void);
/* These states are exposed to userspace as text strings in sysfs */
#define MEM_ONLINE (1<<0) /* exposed to userspace */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 02d11ee..27eb1bf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1176,6 +1176,7 @@
extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize);
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
int truncate_inode_page(struct address_space *mapping, struct page *page);
int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 9262e4b..c2c561d 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -81,6 +81,9 @@
extern struct vfsmount *mnt_clone_internal(struct path *path);
extern int __mnt_is_readonly(struct vfsmount *mnt);
+struct path;
+extern struct vfsmount *clone_private_mount(struct path *path);
+
struct file_system_type;
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
int flags, const char *name,
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 647395a..e8d6e10 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -50,6 +50,9 @@
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
+
+extern int oom_kills_count(void);
+extern void note_oom_kill(void);
extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
unsigned int points, unsigned long totalpages,
struct mem_cgroup *memcg, nodemask_t *nodemask,
diff --git a/include/linux/mailbox.h b/include/linux/pl320-ipc.h
similarity index 100%
rename from include/linux/mailbox.h
rename to include/linux/pl320-ipc.h
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 9ab4bf7c..636e828 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -15,6 +15,7 @@
PM_QOS_CPU_DMA_LATENCY,
PM_QOS_NETWORK_LATENCY,
PM_QOS_NETWORK_THROUGHPUT,
+ PM_QOS_MEMORY_BANDWIDTH,
/* insert new class ID */
PM_QOS_NUM_CLASSES,
@@ -32,6 +33,7 @@
#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0
+#define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0
#define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0
#define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0
#define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1)
@@ -69,7 +71,8 @@
enum pm_qos_type {
PM_QOS_UNITIALIZED,
PM_QOS_MAX, /* return the largest value */
- PM_QOS_MIN /* return the smallest value */
+ PM_QOS_MIN, /* return the smallest value */
+ PM_QOS_SUM /* return the sum */
};
/*
diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
index fe25876..17d7d0d 100644
--- a/include/linux/pnfs_osd_xdr.h
+++ b/include/linux/pnfs_osd_xdr.h
@@ -5,7 +5,7 @@
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
diff --git a/include/linux/string.h b/include/linux/string.h
index e6edfe5..2e22a2e 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -132,7 +132,7 @@
#endif
extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
- const void *from, size_t available);
+ const void *from, size_t available);
/**
* strstarts - does @str start with @prefix?
@@ -144,7 +144,8 @@
return strncmp(str, prefix, strlen(prefix)) == 0;
}
-extern size_t memweight(const void *ptr, size_t bytes);
+size_t memweight(const void *ptr, size_t bytes);
+void memzero_explicit(void *s, size_t count);
/**
* kbasename - return the last part of a pathname.
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 0305cde..ef90838 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -44,6 +44,10 @@
#define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \
((long)t-2732+5)/10 : ((long)t-2732-5)/10)
#define CELSIUS_TO_KELVIN(t) ((t)*10+2732)
+#define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100)
+#define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732)
+#define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off))
+#define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732)
/* Adding event notification support elements */
#define THERMAL_GENL_FAMILY_NAME "thermal_event"
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 1ad4724..baa8171 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -63,7 +63,17 @@
#define MAX_UIO_PORT_REGIONS 5
-struct uio_device;
+struct uio_device {
+ struct module *owner;
+ struct device *dev;
+ int minor;
+ atomic_t event;
+ struct fasync_struct *async_queue;
+ wait_queue_head_t wait;
+ struct uio_info *info;
+ struct kobject *map_dir;
+ struct kobject *portio_dir;
+};
/**
* struct uio_info - UIO device capabilities
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f844c6..60beb5d 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -98,11 +98,11 @@
struct xol_area *xol_area;
};
-extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
-extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
-extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
-extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
-extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
+extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
+extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
+extern bool is_swbp_insn(uprobe_opcode_t *insn);
+extern bool is_trap_insn(uprobe_opcode_t *insn);
+extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
@@ -128,8 +128,8 @@
extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
-extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
-extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
+extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *src, unsigned long len);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 2a3038e..395b70e 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -97,13 +97,8 @@
#define WDOG_UNREGISTERED 4 /* Has the device been unregistered */
};
-#ifdef CONFIG_WATCHDOG_NOWAYOUT
-#define WATCHDOG_NOWAYOUT 1
-#define WATCHDOG_NOWAYOUT_INIT_STATUS (1 << WDOG_NO_WAY_OUT)
-#else
-#define WATCHDOG_NOWAYOUT 0
-#define WATCHDOG_NOWAYOUT_INIT_STATUS 0
-#endif
+#define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT)
+#define WATCHDOG_NOWAYOUT_INIT_STATUS (WATCHDOG_NOWAYOUT << WDOG_NO_WAY_OUT)
/* Use the following function to check whether or not the watchdog is active */
static inline bool watchdog_active(struct watchdog_device *wdd)
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index b2e85fd..a09cca8 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -4,7 +4,7 @@
* Copyright (C) 2008 Panasas Inc. All rights reserved.
*
* Authors:
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
* Benny Halevy <bhalevy@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_ore.h b/include/scsi/osd_ore.h
index 6ca3265..7a8d2cd 100644
--- a/include/scsi/osd_ore.h
+++ b/include/scsi/osd_ore.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
*
* Public Declarations of the ORE API
*
diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index a2594af..e0ca835 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h
@@ -4,7 +4,7 @@
* Copyright (C) 2008 Panasas Inc. All rights reserved.
*
* Authors:
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
* Benny Halevy <bhalevy@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -496,7 +496,7 @@
*/
struct osd_key_identifier {
- u8 id[7]; /* if you know why 7 please email bharrosh@panasas.com */
+ u8 id[7]; /* if you know why 7 please email ooo@electrozaur.com */
} __packed;
/* for osd_capability.format */
diff --git a/include/scsi/osd_sec.h b/include/scsi/osd_sec.h
index f96151c..7abeb0f 100644
--- a/include/scsi/osd_sec.h
+++ b/include/scsi/osd_sec.h
@@ -4,7 +4,7 @@
* Copyright (C) 2008 Panasas Inc. All rights reserved.
*
* Authors:
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
* Benny Halevy <bhalevy@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_sense.h b/include/scsi/osd_sense.h
index 91db543..d52aa93 100644
--- a/include/scsi/osd_sense.h
+++ b/include/scsi/osd_sense.h
@@ -4,7 +4,7 @@
* Copyright (C) 2008 Panasas Inc. All rights reserved.
*
* Authors:
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
* Benny Halevy <bhalevy@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_types.h b/include/scsi/osd_types.h
index bd0be7e..48e8a16 100644
--- a/include/scsi/osd_types.h
+++ b/include/scsi/osd_types.h
@@ -4,7 +4,7 @@
* Copyright (C) 2008 Panasas Inc. All rights reserved.
*
* Authors:
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
* Benny Halevy <bhalevy@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 9ec9864..23c518a 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -108,6 +108,8 @@
#define DA_EMULATE_ALUA 0
/* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */
#define DA_ENFORCE_PR_ISIDS 1
+/* Force SPC-3 PR Activate Persistence across Target Power Loss */
+#define DA_FORCE_PR_APTPL 0
#define DA_STATUS_MAX_SECTORS_MIN 16
#define DA_STATUS_MAX_SECTORS_MAX 8192
/* By default don't report non-rotating (solid state) medium */
@@ -680,6 +682,7 @@
enum target_prot_type pi_prot_type;
enum target_prot_type hw_pi_prot_type;
int enforce_pr_isids;
+ int force_pr_aptpl;
int is_nonrot;
int emulate_rest_reord;
u32 hw_block_size;
@@ -903,4 +906,18 @@
struct config_group fabric_stat_group;
};
+static inline void atomic_inc_mb(atomic_t *v)
+{
+ smp_mb__before_atomic();
+ atomic_inc(v);
+ smp_mb__after_atomic();
+}
+
+static inline void atomic_dec_mb(atomic_t *v)
+{
+ smp_mb__before_atomic();
+ atomic_dec(v);
+ smp_mb__after_atomic();
+}
+
#endif /* TARGET_CORE_BASE_H */
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d4f70a7..ff4bd1b 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2369,7 +2369,7 @@
show_extent_status(__entry->found ? __entry->status : 0))
);
-TRACE_EVENT(ext4_es_shrink_enter,
+DECLARE_EVENT_CLASS(ext4__es_shrink_enter,
TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
TP_ARGS(sb, nr_to_scan, cache_cnt),
@@ -2391,26 +2391,38 @@
__entry->nr_to_scan, __entry->cache_cnt)
);
-TRACE_EVENT(ext4_es_shrink_exit,
- TP_PROTO(struct super_block *sb, int shrunk_nr, int cache_cnt),
+DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_count,
+ TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
- TP_ARGS(sb, shrunk_nr, cache_cnt),
+ TP_ARGS(sb, nr_to_scan, cache_cnt)
+);
+
+DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_scan_enter,
+ TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
+
+ TP_ARGS(sb, nr_to_scan, cache_cnt)
+);
+
+TRACE_EVENT(ext4_es_shrink_scan_exit,
+ TP_PROTO(struct super_block *sb, int nr_shrunk, int cache_cnt),
+
+ TP_ARGS(sb, nr_shrunk, cache_cnt),
TP_STRUCT__entry(
__field( dev_t, dev )
- __field( int, shrunk_nr )
+ __field( int, nr_shrunk )
__field( int, cache_cnt )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
- __entry->shrunk_nr = shrunk_nr;
+ __entry->nr_shrunk = nr_shrunk;
__entry->cache_cnt = cache_cnt;
),
- TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d",
+ TP_printk("dev %d,%d nr_shrunk %d cache_cnt %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->shrunk_nr, __entry->cache_cnt)
+ __entry->nr_shrunk, __entry->cache_cnt)
);
TRACE_EVENT(ext4_collapse_range,
@@ -2438,6 +2450,37 @@
__entry->offset, __entry->len)
);
+TRACE_EVENT(ext4_es_shrink,
+ TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
+ int skip_precached, int nr_skipped, int retried),
+
+ TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, nr_shrunk )
+ __field( unsigned long long, scan_time )
+ __field( int, skip_precached )
+ __field( int, nr_skipped )
+ __field( int, retried )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->nr_shrunk = nr_shrunk;
+ __entry->scan_time = div_u64(scan_time, 1000);
+ __entry->skip_precached = skip_precached;
+ __entry->nr_skipped = nr_skipped;
+ __entry->retried = retried;
+ ),
+
+ TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d "
+ "nr_skipped %d retried %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk,
+ __entry->scan_time, __entry->skip_precached,
+ __entry->nr_skipped, __entry->retried)
+);
+
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h
new file mode 100644
index 0000000..0f4f95d
--- /dev/null
+++ b/include/trace/events/thermal.h
@@ -0,0 +1,83 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM thermal
+
+#if !defined(_TRACE_THERMAL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_THERMAL_H
+
+#include <linux/thermal.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(thermal_temperature,
+
+ TP_PROTO(struct thermal_zone_device *tz),
+
+ TP_ARGS(tz),
+
+ TP_STRUCT__entry(
+ __string(thermal_zone, tz->type)
+ __field(int, id)
+ __field(int, temp_prev)
+ __field(int, temp)
+ ),
+
+ TP_fast_assign(
+ __assign_str(thermal_zone, tz->type);
+ __entry->id = tz->id;
+ __entry->temp_prev = tz->last_temperature;
+ __entry->temp = tz->temperature;
+ ),
+
+ TP_printk("thermal_zone=%s id=%d temp_prev=%d temp=%d",
+ __get_str(thermal_zone), __entry->id, __entry->temp_prev,
+ __entry->temp)
+);
+
+TRACE_EVENT(cdev_update,
+
+ TP_PROTO(struct thermal_cooling_device *cdev, unsigned long target),
+
+ TP_ARGS(cdev, target),
+
+ TP_STRUCT__entry(
+ __string(type, cdev->type)
+ __field(unsigned long, target)
+ ),
+
+ TP_fast_assign(
+ __assign_str(type, cdev->type);
+ __entry->target = target;
+ ),
+
+ TP_printk("type=%s target=%lu", __get_str(type), __entry->target)
+);
+
+TRACE_EVENT(thermal_zone_trip,
+
+ TP_PROTO(struct thermal_zone_device *tz, int trip,
+ enum thermal_trip_type trip_type),
+
+ TP_ARGS(tz, trip, trip_type),
+
+ TP_STRUCT__entry(
+ __string(thermal_zone, tz->type)
+ __field(int, id)
+ __field(int, trip)
+ __field(enum thermal_trip_type, trip_type)
+ ),
+
+ TP_fast_assign(
+ __assign_str(thermal_zone, tz->type);
+ __entry->id = tz->id;
+ __entry->trip = trip;
+ __entry->trip_type = trip_type;
+ ),
+
+ TP_printk("thermal_zone=%s id=%d trip=%d trip_type=%d",
+ __get_str(thermal_zone), __entry->id, __entry->trip,
+ __entry->trip_type)
+);
+
+#endif /* _TRACE_THERMAL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 6cad974..b70237e 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -374,6 +374,7 @@
header-y += synclink.h
header-y += sysctl.h
header-y += sysinfo.h
+header-y += target_core_user.h
header-y += taskstats.h
header-y += tcp.h
header-y += tcp_metrics.h
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index ca1a11b..3735fa0a 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -37,6 +37,7 @@
#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */
#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
+#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
struct fstrim_range {
__u64 start;
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
new file mode 100644
index 0000000..7dcfbe6
--- /dev/null
+++ b/include/uapi/linux/target_core_user.h
@@ -0,0 +1,142 @@
+#ifndef __TARGET_CORE_USER_H
+#define __TARGET_CORE_USER_H
+
+/* This header will be used by application too */
+
+#include <linux/types.h>
+#include <linux/uio.h>
+
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+
+#define TCMU_VERSION "1.0"
+
+/*
+ * Ring Design
+ * -----------
+ *
+ * The mmaped area is divided into three parts:
+ * 1) The mailbox (struct tcmu_mailbox, below)
+ * 2) The command ring
+ * 3) Everything beyond the command ring (data)
+ *
+ * The mailbox tells userspace the offset of the command ring from the
+ * start of the shared memory region, and how big the command ring is.
+ *
+ * The kernel passes SCSI commands to userspace by putting a struct
+ * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking
+ * userspace via uio's interrupt mechanism.
+ *
+ * tcmu_cmd_entry contains a header. If the header type is PAD,
+ * userspace should skip hdr->length bytes (mod cmdr_size) to find the
+ * next cmd_entry.
+ *
+ * Otherwise, the entry will contain offsets into the mmaped area that
+ * contain the cdb and data buffers -- the latter accessible via the
+ * iov array. iov addresses are also offsets into the shared area.
+ *
+ * When userspace is completed handling the command, set
+ * entry->rsp.scsi_status, fill in rsp.sense_buffer if appropriate,
+ * and also set mailbox->cmd_tail equal to the old cmd_tail plus
+ * hdr->length, mod cmdr_size. If cmd_tail doesn't equal cmd_head, it
+ * should process the next packet the same way, and so on.
+ */
+
+#define TCMU_MAILBOX_VERSION 1
+#define ALIGN_SIZE 64 /* Should be enough for most CPUs */
+
+struct tcmu_mailbox {
+ __u16 version;
+ __u16 flags;
+ __u32 cmdr_off;
+ __u32 cmdr_size;
+
+ __u32 cmd_head;
+
+ /* Updated by user. On its own cacheline */
+ __u32 cmd_tail __attribute__((__aligned__(ALIGN_SIZE)));
+
+} __packed;
+
+enum tcmu_opcode {
+ TCMU_OP_PAD = 0,
+ TCMU_OP_CMD,
+};
+
+/*
+ * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode.
+ */
+struct tcmu_cmd_entry_hdr {
+ __u32 len_op;
+} __packed;
+
+#define TCMU_OP_MASK 0x7
+
+static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr)
+{
+ return hdr->len_op & TCMU_OP_MASK;
+}
+
+static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op)
+{
+ hdr->len_op &= ~TCMU_OP_MASK;
+ hdr->len_op |= (op & TCMU_OP_MASK);
+}
+
+static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr)
+{
+ return hdr->len_op & ~TCMU_OP_MASK;
+}
+
+static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
+{
+ hdr->len_op &= TCMU_OP_MASK;
+ hdr->len_op |= len;
+}
+
+/* Currently the same as SCSI_SENSE_BUFFERSIZE */
+#define TCMU_SENSE_BUFFERSIZE 96
+
+struct tcmu_cmd_entry {
+ struct tcmu_cmd_entry_hdr hdr;
+
+ uint16_t cmd_id;
+ uint16_t __pad1;
+
+ union {
+ struct {
+ uint64_t cdb_off;
+ uint64_t iov_cnt;
+ struct iovec iov[0];
+ } req;
+ struct {
+ uint8_t scsi_status;
+ uint8_t __pad1;
+ uint16_t __pad2;
+ uint32_t __pad3;
+ char sense_buffer[TCMU_SENSE_BUFFERSIZE];
+ } rsp;
+ };
+
+} __packed;
+
+#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t)
+
+enum tcmu_genl_cmd {
+ TCMU_CMD_UNSPEC,
+ TCMU_CMD_ADDED_DEVICE,
+ TCMU_CMD_REMOVED_DEVICE,
+ __TCMU_CMD_MAX,
+};
+#define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
+
+enum tcmu_genl_attr {
+ TCMU_ATTR_UNSPEC,
+ TCMU_ATTR_DEVICE,
+ TCMU_ATTR_MINOR,
+ __TCMU_ATTR_MAX,
+};
+#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
+
+#endif
diff --git a/kernel/freezer.c b/kernel/freezer.c
index aa6a8aa..a8900a3 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -42,6 +42,9 @@
if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
return false;
+ if (test_thread_flag(TIF_MEMDIE))
+ return false;
+
if (pm_nosig_freezing || cgroup_freezing(p))
return true;
@@ -147,12 +150,6 @@
{
unsigned long flags;
- /*
- * Clear freezing and kick @p if FROZEN. Clearing is guaranteed to
- * be visible to @p as waking up implies wmb. Waking up inside
- * freezer_lock also prevents wakeups from leaking outside
- * refrigerator.
- */
spin_lock_irqsave(&freezer_lock, flags);
if (frozen(p))
wake_up_process(p);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7b32322..5a6ec86 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -46,13 +46,13 @@
while (true) {
todo = 0;
read_lock(&tasklist_lock);
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (p == current || !freeze_task(p))
continue;
if (!freezer_should_skip(p))
todo++;
- } while_each_thread(g, p);
+ }
read_unlock(&tasklist_lock);
if (!user_only) {
@@ -93,11 +93,11 @@
if (!wakeup) {
read_lock(&tasklist_lock);
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (p != current && !freezer_should_skip(p)
&& freezing(p) && !frozen(p))
sched_show_task(p);
- } while_each_thread(g, p);
+ }
read_unlock(&tasklist_lock);
}
} else {
@@ -108,6 +108,30 @@
return todo ? -EBUSY : 0;
}
+static bool __check_frozen_processes(void)
+{
+ struct task_struct *g, *p;
+
+ for_each_process_thread(g, p)
+ if (p != current && !freezer_should_skip(p) && !frozen(p))
+ return false;
+
+ return true;
+}
+
+/*
+ * Returns true if all freezable tasks (except for current) are frozen already
+ */
+static bool check_frozen_processes(void)
+{
+ bool ret;
+
+ read_lock(&tasklist_lock);
+ ret = __check_frozen_processes();
+ read_unlock(&tasklist_lock);
+ return ret;
+}
+
/**
* freeze_processes - Signal user space processes to enter the refrigerator.
* The current thread will not be frozen. The same process that calls
@@ -118,6 +142,7 @@
int freeze_processes(void)
{
int error;
+ int oom_kills_saved;
error = __usermodehelper_disable(UMH_FREEZING);
if (error)
@@ -132,11 +157,25 @@
pm_wakeup_clear();
printk("Freezing user space processes ... ");
pm_freezing = true;
+ oom_kills_saved = oom_kills_count();
error = try_to_freeze_tasks(true);
if (!error) {
- printk("done.");
__usermodehelper_set_disable_depth(UMH_DISABLED);
oom_killer_disable();
+
+ /*
+ * There might have been an OOM kill while we were
+ * freezing tasks and the killed task might be still
+ * on the way out so we have to double check for race.
+ */
+ if (oom_kills_count() != oom_kills_saved &&
+ !check_frozen_processes()) {
+ __usermodehelper_set_disable_depth(UMH_ENABLED);
+ printk("OOM in progress.");
+ error = -EBUSY;
+ } else {
+ printk("done.");
+ }
}
printk("\n");
BUG_ON(in_atomic());
@@ -191,11 +230,11 @@
thaw_workqueues();
read_lock(&tasklist_lock);
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
/* No other threads should have PF_SUSPEND_TASK set */
WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK));
__thaw_task(p);
- } while_each_thread(g, p);
+ }
read_unlock(&tasklist_lock);
WARN_ON(!(curr->flags & PF_SUSPEND_TASK));
@@ -218,10 +257,10 @@
thaw_workqueues();
read_lock(&tasklist_lock);
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
__thaw_task(p);
- } while_each_thread(g, p);
+ }
read_unlock(&tasklist_lock);
schedule();
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 884b770..5f4c006 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -105,11 +105,27 @@
};
+static BLOCKING_NOTIFIER_HEAD(memory_bandwidth_notifier);
+static struct pm_qos_constraints memory_bw_constraints = {
+ .list = PLIST_HEAD_INIT(memory_bw_constraints.list),
+ .target_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+ .default_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+ .no_constraint_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+ .type = PM_QOS_SUM,
+ .notifiers = &memory_bandwidth_notifier,
+};
+static struct pm_qos_object memory_bandwidth_pm_qos = {
+ .constraints = &memory_bw_constraints,
+ .name = "memory_bandwidth",
+};
+
+
static struct pm_qos_object *pm_qos_array[] = {
&null_pm_qos,
&cpu_dma_pm_qos,
&network_lat_pm_qos,
- &network_throughput_pm_qos
+ &network_throughput_pm_qos,
+ &memory_bandwidth_pm_qos,
};
static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
@@ -130,6 +146,9 @@
/* unlocked internal variant */
static inline int pm_qos_get_value(struct pm_qos_constraints *c)
{
+ struct plist_node *node;
+ int total_value = 0;
+
if (plist_head_empty(&c->list))
return c->no_constraint_value;
@@ -140,6 +159,12 @@
case PM_QOS_MAX:
return plist_last(&c->list)->prio;
+ case PM_QOS_SUM:
+ plist_for_each(node, &c->list)
+ total_value += node->prio;
+
+ return total_value;
+
default:
/* runtime check for not using enum */
BUG();
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 76a712e..8f13cf7 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -160,3 +160,32 @@
return ret;
}
EXPORT_SYMBOL(memparse);
+
+/**
+ * parse_option_str - Parse a string and check an option is set or not
+ * @str: String to be parsed
+ * @option: option name
+ *
+ * This function parses a string containing a comma-separated list of
+ * strings like a=b,c.
+ *
+ * Return true if there's such option in the string, or return false.
+ */
+bool parse_option_str(const char *str, const char *option)
+{
+ while (*str) {
+ if (!strncmp(str, option, strlen(option))) {
+ str += strlen(option);
+ if (!*str || *str == ',')
+ return true;
+ }
+
+ while (*str && *str != ',')
+ str++;
+
+ if (*str == ',')
+ str++;
+ }
+
+ return false;
+}
diff --git a/lib/string.c b/lib/string.c
index 2fc20aa..1006330 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -598,6 +598,22 @@
EXPORT_SYMBOL(memset);
#endif
+/**
+ * memzero_explicit - Fill a region of memory (e.g. sensitive
+ * keying data) with 0s.
+ * @s: Pointer to the start of the area.
+ * @count: The size of the area.
+ *
+ * memzero_explicit() doesn't need an arch-specific version as
+ * it just invokes the one of memset() implicitly.
+ */
+void memzero_explicit(void *s, size_t count)
+{
+ memset(s, 0, count);
+ OPTIMIZER_HIDE_VAR(s);
+}
+EXPORT_SYMBOL(memzero_explicit);
+
#ifndef __HAVE_ARCH_MEMCPY
/**
* memcpy - Copy one area of memory to another
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bbf405a..5340f6b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -404,6 +404,23 @@
dump_tasks(memcg, nodemask);
}
+/*
+ * Number of OOM killer invocations (including memcg OOM killer).
+ * Primarily used by PM freezer to check for potential races with
+ * OOM killed frozen task.
+ */
+static atomic_t oom_kills = ATOMIC_INIT(0);
+
+int oom_kills_count(void)
+{
+ return atomic_read(&oom_kills);
+}
+
+void note_oom_kill(void)
+{
+ atomic_inc(&oom_kills);
+}
+
#define K(x) ((x) << (PAGE_SHIFT-10))
/*
* Must be called while holding a reference to p, which will be released upon
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 736d8e1..9cd36b8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2252,6 +2252,14 @@
}
/*
+ * PM-freezer should be notified that there might be an OOM killer on
+ * its way to kill and wake somebody up. This is too early and we might
+ * end up not killing anything but false positives are acceptable.
+ * See freeze_processes.
+ */
+ note_oom_kill();
+
+ /*
* Go through the zonelist yet one more time, keep very high watermark
* here, this is only to catch a parallel oom killing, we must fail if
* we're still under heavy pressure.
diff --git a/mm/shmem.c b/mm/shmem.c
index cd6fc75..185836b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2345,6 +2345,32 @@
return 0;
}
+static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
+{
+ struct dentry *whiteout;
+ int error;
+
+ whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
+ if (!whiteout)
+ return -ENOMEM;
+
+ error = shmem_mknod(old_dir, whiteout,
+ S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+ dput(whiteout);
+ if (error)
+ return error;
+
+ /*
+ * Cheat and hash the whiteout while the old dentry is still in
+ * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
+ *
+ * d_lookup() will consistently find one of them at this point,
+ * not sure which one, but that isn't even important.
+ */
+ d_rehash(whiteout);
+ return 0;
+}
+
/*
* The VFS layer already does all the dentry stuff for rename,
* we just have to decrement the usage count for the target if
@@ -2356,7 +2382,7 @@
struct inode *inode = old_dentry->d_inode;
int they_are_dirs = S_ISDIR(inode->i_mode);
- if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
if (flags & RENAME_EXCHANGE)
@@ -2365,6 +2391,14 @@
if (!simple_empty(new_dentry))
return -ENOTEMPTY;
+ if (flags & RENAME_WHITEOUT) {
+ int error;
+
+ error = shmem_whiteout(old_dir, old_dentry);
+ if (error)
+ return error;
+ }
+
if (new_dentry->d_inode) {
(void) shmem_unlink(new_dir, new_dentry);
if (they_are_dirs) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 96d1673..261eaf6 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
#include <linux/buffer_head.h> /* grr. try_to_release_page,
do_invalidatepage */
#include <linux/cleancache.h>
+#include <linux/rmap.h>
#include "internal.h"
static void clear_exceptional_entry(struct address_space *mapping,
@@ -719,12 +720,68 @@
*/
void truncate_setsize(struct inode *inode, loff_t newsize)
{
+ loff_t oldsize = inode->i_size;
+
i_size_write(inode, newsize);
+ if (newsize > oldsize)
+ pagecache_isize_extended(inode, oldsize, newsize);
truncate_pagecache(inode, newsize);
}
EXPORT_SYMBOL(truncate_setsize);
/**
+ * pagecache_isize_extended - update pagecache after extension of i_size
+ * @inode: inode for which i_size was extended
+ * @from: original inode size
+ * @to: new inode size
+ *
+ * Handle extension of inode size either caused by extending truncate or by
+ * write starting after current i_size. We mark the page straddling current
+ * i_size RO so that page_mkwrite() is called on the nearest write access to
+ * the page. This way filesystem can be sure that page_mkwrite() is called on
+ * the page before user writes to the page via mmap after the i_size has been
+ * changed.
+ *
+ * The function must be called after i_size is updated so that page fault
+ * coming after we unlock the page will already see the new i_size.
+ * The function must be called while we still hold i_mutex - this not only
+ * makes sure i_size is stable but also that userspace cannot observe new
+ * i_size value before we are prepared to store mmap writes at new inode size.
+ */
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
+{
+ int bsize = 1 << inode->i_blkbits;
+ loff_t rounded_from;
+ struct page *page;
+ pgoff_t index;
+
+ WARN_ON(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON(to > inode->i_size);
+
+ if (from >= to || bsize == PAGE_CACHE_SIZE)
+ return;
+ /* Page straddling @from will not have any hole block created? */
+ rounded_from = round_up(from, bsize);
+ if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
+ return;
+
+ index = from >> PAGE_CACHE_SHIFT;
+ page = find_lock_page(inode->i_mapping, index);
+ /* Page not cached? Nothing to do */
+ if (!page)
+ return;
+ /*
+ * See clear_page_dirty_for_io() for details why set_page_dirty()
+ * is needed.
+ */
+ if (page_mkclean(page))
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+}
+EXPORT_SYMBOL(pagecache_isize_extended);
+
+/**
* truncate_pagecache_range - unmap and remove pagecache that is hole-punched
* @inode: inode
* @lstart: offset of beginning of hole
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index bfe1cf6..166d59c 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -781,16 +781,15 @@
{
struct snd_pcm_substream *s = NULL;
struct snd_pcm_substream *s1;
- int res = 0;
+ int res = 0, depth = 1;
snd_pcm_group_for_each_entry(s, substream) {
if (do_lock && s != substream) {
if (s->pcm->nonatomic)
- mutex_lock_nested(&s->self_group.mutex,
- SINGLE_DEPTH_NESTING);
+ mutex_lock_nested(&s->self_group.mutex, depth);
else
- spin_lock_nested(&s->self_group.lock,
- SINGLE_DEPTH_NESTING);
+ spin_lock_nested(&s->self_group.lock, depth);
+ depth++;
}
res = ops->pre_action(s, state);
if (res < 0)
@@ -906,8 +905,7 @@
down_read(&snd_pcm_link_rwsem);
if (snd_pcm_stream_linked(substream)) {
mutex_lock(&substream->group->mutex);
- mutex_lock_nested(&substream->self_group.mutex,
- SINGLE_DEPTH_NESTING);
+ mutex_lock(&substream->self_group.mutex);
res = snd_pcm_action_group(ops, substream, state, 1);
mutex_unlock(&substream->self_group.mutex);
mutex_unlock(&substream->group->mutex);
@@ -3311,7 +3309,7 @@
#ifndef ARCH_HAS_DMA_MMAP_COHERENT
/* This should be defined / handled globally! */
-#ifdef CONFIG_ARM
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
#define ARCH_HAS_DMA_MMAP_COHERENT
#endif
#endif
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 7eb44e7..62658f2 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -419,7 +419,7 @@
.subvendor = _subvendor,\
.name = _name,\
.value = _value,\
- .pins = (const struct hda_pintbl[]) { _pins } \
+ .pins = (const struct hda_pintbl[]) { _pins, {0, 0}} \
}
#else
@@ -427,7 +427,7 @@
{ .codec = _codec,\
.subvendor = _subvendor,\
.value = _value,\
- .pins = (const struct hda_pintbl[]) { _pins } \
+ .pins = (const struct hda_pintbl[]) { _pins, {0, 0}} \
}
#endif
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 39862e9..9dc9cf8 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1583,19 +1583,22 @@
}
}
- if (pin_eld->eld_valid && !eld->eld_valid) {
- update_eld = true;
+ if (pin_eld->eld_valid != eld->eld_valid)
eld_changed = true;
- }
+
+ if (pin_eld->eld_valid && !eld->eld_valid)
+ update_eld = true;
+
if (update_eld) {
bool old_eld_valid = pin_eld->eld_valid;
pin_eld->eld_valid = eld->eld_valid;
- eld_changed = pin_eld->eld_size != eld->eld_size ||
+ if (pin_eld->eld_size != eld->eld_size ||
memcmp(pin_eld->eld_buffer, eld->eld_buffer,
- eld->eld_size) != 0;
- if (eld_changed)
+ eld->eld_size) != 0) {
memcpy(pin_eld->eld_buffer, eld->eld_buffer,
eld->eld_size);
+ eld_changed = true;
+ }
pin_eld->eld_size = eld->eld_size;
pin_eld->info = eld->info;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index bc86c36..34b7bdb 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2884,6 +2884,9 @@
alc_write_coef_idx(codec, 0x43, 0x9004);
+ /*depop hp during suspend*/
+ alc_write_coef_idx(codec, 0x06, 0x2100);
+
snd_hda_codec_write(codec, hp_pin, 0,
AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
@@ -5610,9 +5613,9 @@
unsigned int oldval = spec->gpio_led;
if (enabled)
- spec->gpio_led &= ~0x01;
- else
spec->gpio_led |= 0x01;
+ else
+ spec->gpio_led &= ~0x01;
if (spec->gpio_led != oldval)
snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA,
spec->gpio_led);
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 223c47b..c657752 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -385,6 +385,36 @@
}
},
{
+ USB_DEVICE(0x0499, 0x1509),
+ .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+ /* .vendor_name = "Yamaha", */
+ /* .product_name = "Steinberg UR22", */
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_COMPOSITE,
+ .data = (const struct snd_usb_audio_quirk[]) {
+ {
+ .ifnum = 1,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE
+ },
+ {
+ .ifnum = 2,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE
+ },
+ {
+ .ifnum = 3,
+ .type = QUIRK_MIDI_YAMAHA
+ },
+ {
+ .ifnum = 4,
+ .type = QUIRK_IGNORE_INTERFACE
+ },
+ {
+ .ifnum = -1
+ }
+ }
+ }
+},
+{
USB_DEVICE(0x0499, 0x150a),
.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
/* .vendor_name = "Yamaha", */
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 60b58cd..7ccb073 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -122,6 +122,14 @@
{
struct termios local_term_attributes;
+ term_attributes_were_set = 0;
+
+ /* STDIN must be a terminal */
+
+ if (!isatty(STDIN_FILENO)) {
+ return;
+ }
+
/* Get and keep the original attributes */
if (tcgetattr(STDIN_FILENO, &original_term_attributes)) {
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index 53cee78..24d32968 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -146,7 +146,7 @@
if (ACPI_VALIDATE_RSDP_SIG(table->signature)) {
rsdp = ACPI_CAST_PTR(struct acpi_table_rsdp, table);
- return (rsdp->length);
+ return (acpi_tb_get_rsdp_length(rsdp));
}
/* Normal ACPI table */
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e51d9f9..c1e6ae9 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -43,13 +43,13 @@
gfn_t base_gfn, unsigned long npages);
static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
- unsigned long size)
+ unsigned long npages)
{
gfn_t end_gfn;
pfn_t pfn;
pfn = gfn_to_pfn_memslot(slot, gfn);
- end_gfn = gfn + (size >> PAGE_SHIFT);
+ end_gfn = gfn + npages;
gfn += 1;
if (is_error_noslot_pfn(pfn))
@@ -119,7 +119,7 @@
* Pin all pages we are about to map in memory. This is
* important because we unmap and unpin in 4kb steps later.
*/
- pfn = kvm_pin_pages(slot, gfn, page_size);
+ pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
if (is_error_noslot_pfn(pfn)) {
gfn += 1;
continue;
@@ -131,7 +131,7 @@
if (r) {
printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%llx\n", pfn);
- kvm_unpin_pages(kvm, pfn, page_size);
+ kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
goto unmap_pages;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 384eaa7..25ffac9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2354,6 +2354,12 @@
return 0;
}
+void kvm_unregister_device_ops(u32 type)
+{
+ if (kvm_device_ops_table[type] != NULL)
+ kvm_device_ops_table[type] = NULL;
+}
+
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
@@ -3328,5 +3334,6 @@
kvm_arch_exit();
kvm_irqfd_exit();
free_cpumask_var(cpus_hardware_enabled);
+ kvm_vfio_ops_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 281e7cf..620e37f 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -283,3 +283,8 @@
{
return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
}
+
+void kvm_vfio_ops_exit(void)
+{
+ kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
+}
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
index 92eac75..ab88c7d 100644
--- a/virt/kvm/vfio.h
+++ b/virt/kvm/vfio.h
@@ -3,11 +3,15 @@
#ifdef CONFIG_KVM_VFIO
int kvm_vfio_ops_init(void);
+void kvm_vfio_ops_exit(void);
#else
static inline int kvm_vfio_ops_init(void)
{
return 0;
}
+static inline void kvm_vfio_ops_exit(void)
+{
+}
#endif
#endif