fs/pstore/zone.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * Provide a pstore intermediate backend, organized into kernel memory
  * allocated zones that are then mapped and flushed into a single
  * contiguous region on a storage backend of some kind (block, mtd, etc).
  */

 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/printk.h>
 #include <linux/fs.h>
 #include <linux/pstore_zone.h>
 #include <linux/kdev_t.h>
 #include <linux/device.h>
 #include <linux/namei.h>
 #include <linux/fcntl.h>
 #include <linux/uio.h>
 #include <linux/writeback.h>
 #include "internal.h"

 /**
  * struct psz_head - header of zone to flush to storage
  *
  * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value)
  * @datalen: length of data in @data
  * @start: offset into @data where the beginning of the stored bytes begin
  * @data: zone data.
  */
 struct psz_buffer {
 #define PSZ_SIG (0x43474244) /* DBGC */
 	uint32_t sig;
 	atomic_t datalen;
 	atomic_t start;
 	uint8_t data[];
 };

 /**
  * struct psz_kmsg_header - kmsg dump-specific header to flush to storage
  *
  * @magic: magic num for kmsg dump header
  * @time: kmsg dump trigger time
  * @compressed: whether conpressed
  * @counter: kmsg dump counter
  * @reason: the kmsg dump reason (e.g. oops, panic, etc)
  * @data: pointer to log data
  *
  * This is a sub-header for a kmsg dump, trailing after &psz_buffer.
  */
 struct psz_kmsg_header {
 #define PSTORE_KMSG_HEADER_MAGIC 0x4dfc3ae5 /* Just a random number */
 	uint32_t magic;
 	struct timespec64 time;
 	bool compressed;
 	uint32_t counter;
 	enum kmsg_dump_reason reason;
 	uint8_t data[];
 };

 /**
  * struct pstore_zone - single stored buffer
  *
  * @off: zone offset of storage
  * @type: front-end type for this zone
  * @name: front-end name for this zone
  * @buffer: pointer to data buffer managed by this zone
  * @oldbuf: pointer to old data buffer
  * @buffer_size: bytes in @buffer->data
  * @should_recover: whether this zone should recover from storage
  * @dirty: whether the data in @buffer dirty
  *
  * zone structure in memory.
  */
 struct pstore_zone {
 	loff_t off;
 	const char *name;
 	enum pstore_type_id type;

 	struct psz_buffer *buffer;
 	struct psz_buffer *oldbuf;
 	size_t buffer_size;
 	bool should_recover;
 	atomic_t dirty;
 };

 /**
  * struct psz_context - all about running state of pstore/zone
  *
  * @kpszs: kmsg dump storage zones
  * @ppsz: pmsg storage zone
  * @cpsz: console storage zone
  * @fpszs: ftrace storage zones
  * @kmsg_max_cnt: max count of @kpszs
  * @kmsg_read_cnt: counter of total read kmsg dumps
  * @kmsg_write_cnt: counter of total kmsg dump writes
  * @pmsg_read_cnt: counter of total read pmsg zone
  * @console_read_cnt: counter of total read console zone
  * @ftrace_max_cnt: max count of @fpszs
  * @ftrace_read_cnt: counter of max read ftrace zone
  * @oops_counter: counter of oops dumps
  * @panic_counter: counter of panic dumps
  * @recovered: whether finished recovering data from storage
  * @on_panic: whether panic is happening
  * @pstore_zone_info_lock: lock to @pstore_zone_info
  * @pstore_zone_info: information from backend
  * @pstore: structure for pstore
  */
 struct psz_context {
 	struct pstore_zone **kpszs;
 	struct pstore_zone *ppsz;
 	struct pstore_zone *cpsz;
 	struct pstore_zone **fpszs;
 	unsigned int kmsg_max_cnt;
 	unsigned int kmsg_read_cnt;
 	unsigned int kmsg_write_cnt;
 	unsigned int pmsg_read_cnt;
 	unsigned int console_read_cnt;
 	unsigned int ftrace_max_cnt;
 	unsigned int ftrace_read_cnt;
 	/*
 	 * These counters should be calculated during recovery.
 	 * It records the oops/panic times after crashes rather than boots.
 	 */
 	unsigned int oops_counter;
 	unsigned int panic_counter;
 	atomic_t recovered;
 	atomic_t on_panic;

 	/*
 	 * pstore_zone_info_lock protects this entire structure during calls
 	 * to register_pstore_zone()/unregister_pstore_zone().
 	 */
 	struct mutex pstore_zone_info_lock;
 	struct pstore_zone_info *pstore_zone_info;
 	struct pstore_info pstore;
 };
 static struct psz_context pstore_zone_cxt;

 static void psz_flush_all_dirty_zones(struct work_struct *);
 static DECLARE_DELAYED_WORK(psz_cleaner, psz_flush_all_dirty_zones);

 /**
  * enum psz_flush_mode - flush mode for psz_zone_write()
  *
  * @FLUSH_NONE: do not flush to storage but update data on memory
  * @FLUSH_PART: just flush part of data including meta data to storage
  * @FLUSH_META: just flush meta data of zone to storage
  * @FLUSH_ALL: flush all of zone
  */
 enum psz_flush_mode {
 	FLUSH_NONE = 0,
 	FLUSH_PART,
 	FLUSH_META,
 	FLUSH_ALL,
 };

 static inline int buffer_datalen(struct pstore_zone *zone)
 {
 	return atomic_read(&zone->buffer->datalen);
 }

 static inline int buffer_start(struct pstore_zone *zone)
 {
 	return atomic_read(&zone->buffer->start);
 }

 static inline bool is_on_panic(void)
 {
 	return atomic_read(&pstore_zone_cxt.on_panic);
 }

 static ssize_t psz_zone_read_buffer(struct pstore_zone *zone, char *buf,
 		size_t len, unsigned long off)
 {
 	if (!buf || !zone || !zone->buffer)
 		return -EINVAL;
 	if (off > zone->buffer_size)
 		return -EINVAL;
 	len = min_t(size_t, len, zone->buffer_size - off);
 	memcpy(buf, zone->buffer->data + off, len);
 	return len;
 }

 static int psz_zone_read_oldbuf(struct pstore_zone *zone, char *buf,
 		size_t len, unsigned long off)
 {
 	if (!buf || !zone || !zone->oldbuf)
 		return -EINVAL;
 	if (off > zone->buffer_size)
 		return -EINVAL;
 	len = min_t(size_t, len, zone->buffer_size - off);
 	memcpy(buf, zone->oldbuf->data + off, len);
 	return 0;
 }

 static int psz_zone_write(struct pstore_zone *zone,
 		enum psz_flush_mode flush_mode, const char *buf,
 		size_t len, unsigned long off)
 {
 	struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info;
 	ssize_t wcnt = 0;
 	ssize_t (*writeop)(const char *buf, size_t bytes, loff_t pos);
 	size_t wlen;

 	if (off > zone->buffer_size)
 		return -EINVAL;

 	wlen = min_t(size_t, len, zone->buffer_size - off);
 	if (buf && wlen) {
 		memcpy(zone->buffer->data + off, buf, wlen);
 		atomic_set(&zone->buffer->datalen, wlen + off);
 	}

 	/* avoid to damage old records */
 	if (!is_on_panic() && !atomic_read(&pstore_zone_cxt.recovered))
 		goto dirty;

 	writeop = is_on_panic() ? info->panic_write : info->write;
 	if (!writeop)
 		goto dirty;

 	switch (flush_mode) {
 	case FLUSH_NONE:
 		if (unlikely(buf && wlen))
 			goto dirty;
 		return 0;
 	case FLUSH_PART:
 		wcnt = writeop((const char *)zone->buffer->data + off, wlen,
 				zone->off + sizeof(*zone->buffer) + off);
 		if (wcnt != wlen)
 			goto dirty;
 		fallthrough;
 	case FLUSH_META:
 		wlen = sizeof(struct psz_buffer);
 		wcnt = writeop((const char *)zone->buffer, wlen, zone->off);
 		if (wcnt != wlen)
 			goto dirty;
 		break;
 	case FLUSH_ALL:
 		wlen = zone->buffer_size + sizeof(*zone->buffer);
 		wcnt = writeop((const char *)zone->buffer, wlen, zone->off);
 		if (wcnt != wlen)
 			goto dirty;
 		break;
 	}

 	return 0;
 dirty:
 	/* no need to mark dirty if going to try next zone */
 	if (wcnt == -ENOMSG)
 		return -ENOMSG;
 	atomic_set(&zone->dirty, true);
 	/* flush dirty zones nicely */
 	if (wcnt == -EBUSY && !is_on_panic())
 		schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(500));
 	return -EBUSY;
 }

 static int psz_flush_dirty_zone(struct pstore_zone *zone)
 {
 	int ret;

 	if (unlikely(!zone))
 		return -EINVAL;

 	if (unlikely(!atomic_read(&pstore_zone_cxt.recovered)))
 		return -EBUSY;

 	if (!atomic_xchg(&zone->dirty, false))
 		return 0;

 	ret = psz_zone_write(zone, FLUSH_ALL, NULL, 0, 0);
 	if (ret)
 		atomic_set(&zone->dirty, true);
 	return ret;
 }

 static int psz_flush_dirty_zones(struct pstore_zone **zones, unsigned int cnt)
 {
 	int i, ret;
 	struct pstore_zone *zone;

 	if (!zones)
 		return -EINVAL;

 	for (i = 0; i < cnt; i++) {
 		zone = zones[i];
 		if (!zone)
 			return -EINVAL;
 		ret = psz_flush_dirty_zone(zone);
 		if (ret)
 			return ret;
 	}
 	return 0;
 }

 static int psz_move_zone(struct pstore_zone *old, struct pstore_zone *new)
 {
 	const char *data = (const char *)old->buffer->data;
 	int ret;

 	ret = psz_zone_write(new, FLUSH_ALL, data, buffer_datalen(old), 0);
 	if (ret) {
 		atomic_set(&new->buffer->datalen, 0);
 		atomic_set(&new->dirty, false);
 		return ret;
 	}
 	atomic_set(&old->buffer->datalen, 0);
 	return 0;
 }

 static void psz_flush_all_dirty_zones(struct work_struct *work)
 {
 	struct psz_context *cxt = &pstore_zone_cxt;
 	int ret = 0;

 	if (cxt->ppsz)
 		ret |= psz_flush_dirty_zone(cxt->ppsz);
 	if (cxt->cpsz)
 		ret |= psz_flush_dirty_zone(cxt->cpsz);
 	if (cxt->kpszs)
 		ret |= psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt);
 	if (cxt->fpszs)
 		ret |= psz_flush_dirty_zones(cxt->fpszs, cxt->ftrace_max_cnt);
 	if (ret && cxt->pstore_zone_info)
 		schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(1000));
 }

 static int psz_kmsg_recover_data(struct psz_context *cxt)
 {
 	struct pstore_zone_info *info = cxt->pstore_zone_info;
 	struct pstore_zone *zone = NULL;
 	struct psz_buffer *buf;
 	unsigned long i;
 	ssize_t rcnt;

 	if (!info->read)
 		return -EINVAL;

 	for (i = 0; i < cxt->kmsg_max_cnt; i++) {
 		zone = cxt->kpszs[i];
 		if (unlikely(!zone))
 			return -EINVAL;
 		if (atomic_read(&zone->dirty)) {
 			unsigned int wcnt = cxt->kmsg_write_cnt;
 			struct pstore_zone *new = cxt->kpszs[wcnt];
 			int ret;

 			ret = psz_move_zone(zone, new);
 			if (ret) {
 				pr_err("move zone from %lu to %d failed\n",
 						i, wcnt);
 				return ret;
 			}
 			cxt->kmsg_write_cnt = (wcnt + 1) % cxt->kmsg_max_cnt;
 		}
 		if (!zone->should_recover)
 			continue;
 		buf = zone->buffer;
 		rcnt = info->read((char *)buf, zone->buffer_size + sizeof(*buf),
 				zone->off);
 		if (rcnt != zone->buffer_size + sizeof(*buf))
 			return (int)rcnt < 0 ? (int)rcnt : -EIO;
 	}
 	return 0;
 }

 static int psz_kmsg_recover_meta(struct psz_context *cxt)
 {
 	struct pstore_zone_info *info = cxt->pstore_zone_info;
 	struct pstore_zone *zone;
 	size_t rcnt, len;
 	struct psz_buffer *buf;
 	struct psz_kmsg_header *hdr;
 	struct timespec64 time = { };
 	unsigned long i;
 	/*
 	 * Recover may on panic, we can't allocate any memory by kmalloc.
 	 * So, we use local array instead.
 	 */
 	char buffer_header[sizeof(*buf) + sizeof(*hdr)] = {0};

 	if (!info->read)
 		return -EINVAL;

 	len = sizeof(*buf) + sizeof(*hdr);
 	buf = (struct psz_buffer *)buffer_header;
 	for (i = 0; i < cxt->kmsg_max_cnt; i++) {
 		zone = cxt->kpszs[i];
 		if (unlikely(!zone))
 			return -EINVAL;

 		rcnt = info->read((char *)buf, len, zone->off);
 		if (rcnt == -ENOMSG) {
 			pr_debug("%s with id %lu may be broken, skip\n",
 					zone->name, i);
 			continue;
 		} else if (rcnt != len) {
 			pr_err("read %s with id %lu failed\n", zone->name, i);
 			return (int)rcnt < 0 ? (int)rcnt : -EIO;
 		}

 		if (buf->sig != zone->buffer->sig) {
 			pr_debug("no valid data in kmsg dump zone %lu\n", i);
 			continue;
 		}

 		if (zone->buffer_size < atomic_read(&buf->datalen)) {
 			pr_info("found overtop zone: %s: id %lu, off %lld, size %zu\n",
 					zone->name, i, zone->off,
 					zone->buffer_size);
 			continue;
 		}

 		hdr = (struct psz_kmsg_header *)buf->data;
 		if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC) {
 			pr_info("found invalid zone: %s: id %lu, off %lld, size %zu\n",
 					zone->name, i, zone->off,
 					zone->buffer_size);
 			continue;
 		}

 		/*
 		 * we get the newest zone, and the next one must be the oldest
 		 * or unused zone, because we do write one by one like a circle.
 		 */
 		if (hdr->time.tv_sec >= time.tv_sec) {
 			time.tv_sec = hdr->time.tv_sec;
 			cxt->kmsg_write_cnt = (i + 1) % cxt->kmsg_max_cnt;
 		}

 		if (hdr->reason == KMSG_DUMP_OOPS)
 			cxt->oops_counter =
 				max(cxt->oops_counter, hdr->counter);
 		else if (hdr->reason == KMSG_DUMP_PANIC)
 			cxt->panic_counter =
 				max(cxt->panic_counter, hdr->counter);

 		if (!atomic_read(&buf->datalen)) {
 			pr_debug("found erased zone: %s: id %lu, off %lld, size %zu, datalen %d\n",
 					zone->name, i, zone->off,
 					zone->buffer_size,
 					atomic_read(&buf->datalen));
 			continue;
 		}

 		if (!is_on_panic())
 			zone->should_recover = true;
 		pr_debug("found nice zone: %s: id %lu, off %lld, size %zu, datalen %d\n",
 				zone->name, i, zone->off,
 				zone->buffer_size, atomic_read(&buf->datalen));
 	}

 	return 0;
 }

 static int psz_kmsg_recover(struct psz_context *cxt)
 {
 	int ret;

 	if (!cxt->kpszs)
 		return 0;

 	ret = psz_kmsg_recover_meta(cxt);
 	if (ret)
 		goto recover_fail;

 	ret = psz_kmsg_recover_data(cxt);
 	if (ret)
 		goto recover_fail;

 	return 0;
 recover_fail:
 	pr_debug("psz_recover_kmsg failed\n");
 	return ret;
 }

 static int psz_recover_zone(struct psz_context *cxt, struct pstore_zone *zone)
 {
 	struct pstore_zone_info *info = cxt->pstore_zone_info;
 	struct psz_buffer *oldbuf, tmpbuf;
 	int ret = 0;
 	char *buf;
 	ssize_t rcnt, len, start, off;

 	if (!zone || zone->oldbuf)
 		return 0;

 	if (is_on_panic()) {
 		/* save data as much as possible */
 		psz_flush_dirty_zone(zone);
 		return 0;
 	}

 	if (unlikely(!info->read))
 		return -EINVAL;

 	len = sizeof(struct psz_buffer);
 	rcnt = info->read((char *)&tmpbuf, len, zone->off);
 	if (rcnt != len) {
 		pr_debug("read zone %s failed\n", zone->name);
 		return (int)rcnt < 0 ? (int)rcnt : -EIO;
 	}

 	if (tmpbuf.sig != zone->buffer->sig) {
 		pr_debug("no valid data in zone %s\n", zone->name);
 		return 0;
 	}

 	if (zone->buffer_size < atomic_read(&tmpbuf.datalen) ||
 		zone->buffer_size < atomic_read(&tmpbuf.start)) {
 		pr_info("found overtop zone: %s: off %lld, size %zu\n",
 				zone->name, zone->off, zone->buffer_size);
 		/* just keep going */
 		return 0;
 	}

 	if (!atomic_read(&tmpbuf.datalen)) {
 		pr_debug("found erased zone: %s: off %lld, size %zu, datalen %d\n",
 				zone->name, zone->off, zone->buffer_size,
 				atomic_read(&tmpbuf.datalen));
 		return 0;
 	}

 	pr_debug("found nice zone: %s: off %lld, size %zu, datalen %d\n",
 			zone->name, zone->off, zone->buffer_size,
 			atomic_read(&tmpbuf.datalen));

 	len = atomic_read(&tmpbuf.datalen) + sizeof(*oldbuf);
 	oldbuf = kzalloc(len, GFP_KERNEL);
 	if (!oldbuf)
 		return -ENOMEM;

 	memcpy(oldbuf, &tmpbuf, sizeof(*oldbuf));
 	buf = (char *)oldbuf + sizeof(*oldbuf);
 	len = atomic_read(&oldbuf->datalen);
 	start = atomic_read(&oldbuf->start);
 	off = zone->off + sizeof(*oldbuf);

 	/* get part of data */
 	rcnt = info->read(buf, len - start, off + start);
 	if (rcnt != len - start) {
 		pr_err("read zone %s failed\n", zone->name);
 		ret = (int)rcnt < 0 ? (int)rcnt : -EIO;
 		goto free_oldbuf;
 	}

 	/* get the rest of data */
 	rcnt = info->read(buf + len - start, start, off);
 	if (rcnt != start) {
 		pr_err("read zone %s failed\n", zone->name);
 		ret = (int)rcnt < 0 ? (int)rcnt : -EIO;
 		goto free_oldbuf;
 	}

 	zone->oldbuf = oldbuf;
 	psz_flush_dirty_zone(zone);
 	return 0;

 free_oldbuf:
 	kfree(oldbuf);
 	return ret;
 }

 static int psz_recover_zones(struct psz_context *cxt,
 		struct pstore_zone **zones, unsigned int cnt)
 {
 	int ret;
 	unsigned int i;
 	struct pstore_zone *zone;

 	if (!zones)
 		return 0;

 	for (i = 0; i < cnt; i++) {
 		zone = zones[i];
 		if (unlikely(!zone))
 			continue;
 		ret = psz_recover_zone(cxt, zone);
 		if (ret)
 			goto recover_fail;
 	}

 	return 0;
 recover_fail:
 	pr_debug("recover %s[%u] failed\n", zone->name, i);
 	return ret;
 }

 /**
  * psz_recovery() - recover data from storage
  * @cxt: the context of pstore/zone
  *
  * recovery means reading data back from storage after rebooting
  *
  * Return: 0 on success, others on failure.
  */
 static inline int psz_recovery(struct psz_context *cxt)
 {
 	int ret;

 	if (atomic_read(&cxt->recovered))
 		return 0;

 	ret = psz_kmsg_recover(cxt);
 	if (ret)
 		goto out;

 	ret = psz_recover_zone(cxt, cxt->ppsz);
 	if (ret)
 		goto out;

 	ret = psz_recover_zone(cxt, cxt->cpsz);
 	if (ret)
 		goto out;

 	ret = psz_recover_zones(cxt, cxt->fpszs, cxt->ftrace_max_cnt);

 out:
 	if (unlikely(ret))
 		pr_err("recover failed\n");
 	else {
 		pr_debug("recover end!\n");
 		atomic_set(&cxt->recovered, 1);
 	}
 	return ret;
 }

 static int psz_pstore_open(struct pstore_info *psi)
 {
 	struct psz_context *cxt = psi->data;

 	cxt->kmsg_read_cnt = 0;
 	cxt->pmsg_read_cnt = 0;
 	cxt->console_read_cnt = 0;
 	cxt->ftrace_read_cnt = 0;
 	return 0;
 }

 static inline bool psz_old_ok(struct pstore_zone *zone)
 {
 	if (zone && zone->oldbuf && atomic_read(&zone->oldbuf->datalen))
 		return true;
 	return false;
 }

 static inline bool psz_ok(struct pstore_zone *zone)
 {
 	if (zone && zone->buffer && buffer_datalen(zone))
 		return true;
 	return false;
 }

 static inline int psz_kmsg_erase(struct psz_context *cxt,
 		struct pstore_zone *zone, struct pstore_record *record)
 {
 	struct psz_buffer *buffer = zone->buffer;
 	struct psz_kmsg_header *hdr =
 		(struct psz_kmsg_header *)buffer->data;
 	size_t size;

 	if (unlikely(!psz_ok(zone)))
 		return 0;

 	/* this zone is already updated, no need to erase */
 	if (record->count != hdr->counter)
 		return 0;

 	size = buffer_datalen(zone) + sizeof(*zone->buffer);
 	atomic_set(&zone->buffer->datalen, 0);
 	if (cxt->pstore_zone_info->erase)
 		return cxt->pstore_zone_info->erase(size, zone->off);
 	else
 		return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
 }

 static inline int psz_record_erase(struct psz_context *cxt,
 		struct pstore_zone *zone)
 {
 	if (unlikely(!psz_old_ok(zone)))
 		return 0;

 	kfree(zone->oldbuf);
 	zone->oldbuf = NULL;
 	/*
 	 * if there are new data in zone buffer, that means the old data
 	 * are already invalid. It is no need to flush 0 (erase) to
 	 * block device.
 	 */
 	if (!buffer_datalen(zone))
 		return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
 	psz_flush_dirty_zone(zone);
 	return 0;
 }

 static int psz_pstore_erase(struct pstore_record *record)
 {
 	struct psz_context *cxt = record->psi->data;

 	switch (record->type) {
 	case PSTORE_TYPE_DMESG:
 		if (record->id >= cxt->kmsg_max_cnt)
 			return -EINVAL;
 		return psz_kmsg_erase(cxt, cxt->kpszs[record->id], record);
 	case PSTORE_TYPE_PMSG:
 		return psz_record_erase(cxt, cxt->ppsz);
 	case PSTORE_TYPE_CONSOLE:
 		return psz_record_erase(cxt, cxt->cpsz);
 	case PSTORE_TYPE_FTRACE:
 		if (record->id >= cxt->ftrace_max_cnt)
 			return -EINVAL;
 		return psz_record_erase(cxt, cxt->fpszs[record->id]);
 	default: return -EINVAL;
 	}
 }

 static void psz_write_kmsg_hdr(struct pstore_zone *zone,
 		struct pstore_record *record)
 {
 	struct psz_context *cxt = record->psi->data;
 	struct psz_buffer *buffer = zone->buffer;
 	struct psz_kmsg_header *hdr =
 		(struct psz_kmsg_header *)buffer->data;

 	hdr->magic = PSTORE_KMSG_HEADER_MAGIC;
 	hdr->compressed = record->compressed;
 	hdr->time.tv_sec = record->time.tv_sec;
 	hdr->time.tv_nsec = record->time.tv_nsec;
 	hdr->reason = record->reason;
 	if (hdr->reason == KMSG_DUMP_OOPS)
 		hdr->counter = ++cxt->oops_counter;
 	else if (hdr->reason == KMSG_DUMP_PANIC)
 		hdr->counter = ++cxt->panic_counter;
 	else
 		hdr->counter = 0;
 }

 /*
  * In case zone is broken, which may occur to MTD device, we try each zones,
  * start at cxt->kmsg_write_cnt.
  */
 static inline int notrace psz_kmsg_write_record(struct psz_context *cxt,
 		struct pstore_record *record)
 {
 	size_t size, hlen;
 	struct pstore_zone *zone;
 	unsigned int i;

 	for (i = 0; i < cxt->kmsg_max_cnt; i++) {
 		unsigned int zonenum, len;
 		int ret;

 		zonenum = (cxt->kmsg_write_cnt + i) % cxt->kmsg_max_cnt;
 		zone = cxt->kpszs[zonenum];
 		if (unlikely(!zone))
 			return -ENOSPC;

 		/* avoid destroying old data, allocate a new one */
 		len = zone->buffer_size + sizeof(*zone->buffer);
 		zone->oldbuf = zone->buffer;
 		zone->buffer = kzalloc(len, GFP_KERNEL);
 		if (!zone->buffer) {
 			zone->buffer = zone->oldbuf;
 			return -ENOMEM;
 		}
 		zone->buffer->sig = zone->oldbuf->sig;

 		pr_debug("write %s to zone id %d\n", zone->name, zonenum);
 		psz_write_kmsg_hdr(zone, record);
 		hlen = sizeof(struct psz_kmsg_header);
 		size = min_t(size_t, record->size, zone->buffer_size - hlen);
 		ret = psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen);
 		if (likely(!ret || ret != -ENOMSG)) {
 			cxt->kmsg_write_cnt = zonenum + 1;
 			cxt->kmsg_write_cnt %= cxt->kmsg_max_cnt;
 			/* no need to try next zone, free last zone buffer */
 			kfree(zone->oldbuf);
 			zone->oldbuf = NULL;
 			return ret;
 		}

 		pr_debug("zone %u may be broken, try next dmesg zone\n",
 				zonenum);
 		kfree(zone->buffer);
 		zone->buffer = zone->oldbuf;
 		zone->oldbuf = NULL;
 	}

 	return -EBUSY;
 }

 static int notrace psz_kmsg_write(struct psz_context *cxt,
 		struct pstore_record *record)
 {
 	int ret;

 	/*
 	 * Explicitly only take the first part of any new crash.
 	 * If our buffer is larger than kmsg_bytes, this can never happen,
 	 * and if our buffer is smaller than kmsg_bytes, we don't want the
 	 * report split across multiple records.
 	 */
 	if (record->part != 1)
 		return -ENOSPC;

 	if (!cxt->kpszs)
 		return -ENOSPC;

 	ret = psz_kmsg_write_record(cxt, record);
 	if (!ret && is_on_panic()) {
 		/* ensure all data are flushed to storage when panic */
 		pr_debug("try to flush other dirty zones\n");
 		psz_flush_all_dirty_zones(NULL);
 	}

 	/* always return 0 as we had handled it on buffer */
 	return 0;
 }

 static int notrace psz_record_write(struct pstore_zone *zone,
 		struct pstore_record *record)
 {
 	size_t start, rem;
 	bool is_full_data = false;
 	char *buf;
 	int cnt;

 	if (!zone || !record)
 		return -ENOSPC;

 	if (atomic_read(&zone->buffer->datalen) >= zone->buffer_size)
 		is_full_data = true;

 	cnt = record->size;
 	buf = record->buf;
 	if (unlikely(cnt > zone->buffer_size)) {
 		buf += cnt - zone->buffer_size;
 		cnt = zone->buffer_size;
 	}

 	start = buffer_start(zone);
 	rem = zone->buffer_size - start;
 	if (unlikely(rem < cnt)) {
 		psz_zone_write(zone, FLUSH_PART, buf, rem, start);
 		buf += rem;
 		cnt -= rem;
 		start = 0;
 		is_full_data = true;
 	}

 	atomic_set(&zone->buffer->start, cnt + start);
 	psz_zone_write(zone, FLUSH_PART, buf, cnt, start);

 	/**
 	 * psz_zone_write will set datalen as start + cnt.
 	 * It work if actual data length lesser than buffer size.
 	 * If data length greater than buffer size, pmsg will rewrite to
 	 * beginning of zone, which make buffer->datalen wrongly.
 	 * So we should reset datalen as buffer size once actual data length
 	 * greater than buffer size.
 	 */
 	if (is_full_data) {
 		atomic_set(&zone->buffer->datalen, zone->buffer_size);
 		psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
 	}
 	return 0;
 }

 static int notrace psz_pstore_write(struct pstore_record *record)
 {
 	struct psz_context *cxt = record->psi->data;

 	if (record->type == PSTORE_TYPE_DMESG &&
 			record->reason == KMSG_DUMP_PANIC)
 		atomic_set(&cxt->on_panic, 1);

 	/*
 	 * if on panic, do not write except panic records
 	 * Fix case that panic_write prints log which wakes up console backend.
 	 */
 	if (is_on_panic() && record->type != PSTORE_TYPE_DMESG)
 		return -EBUSY;

 	switch (record->type) {
 	case PSTORE_TYPE_DMESG:
 		return psz_kmsg_write(cxt, record);
 	case PSTORE_TYPE_CONSOLE:
 		return psz_record_write(cxt->cpsz, record);
 	case PSTORE_TYPE_PMSG:
 		return psz_record_write(cxt->ppsz, record);
 	case PSTORE_TYPE_FTRACE: {
 		int zonenum = smp_processor_id();

 		if (!cxt->fpszs)
 			return -ENOSPC;
 		return psz_record_write(cxt->fpszs[zonenum], record);
 	}
 	default:
 		return -EINVAL;
 	}
 }

 static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt)
 {
 	struct pstore_zone *zone = NULL;

 	while (cxt->kmsg_read_cnt < cxt->kmsg_max_cnt) {
 		zone = cxt->kpszs[cxt->kmsg_read_cnt++];
 		if (psz_ok(zone))
 			return zone;
 	}

 	if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt)
 		/*
 		 * No need psz_old_ok(). Let psz_ftrace_read() do so for
 		 * combination. psz_ftrace_read() should traverse over
 		 * all zones in case of some zone without data.
 		 */
 		return cxt->fpszs[cxt->ftrace_read_cnt++];

 	if (cxt->pmsg_read_cnt == 0) {
 		cxt->pmsg_read_cnt++;
 		zone = cxt->ppsz;
 		if (psz_old_ok(zone))
 			return zone;
 	}

 	if (cxt->console_read_cnt == 0) {
 		cxt->console_read_cnt++;
 		zone = cxt->cpsz;
 		if (psz_old_ok(zone))
 			return zone;
 	}

 	return NULL;
 }

 static int psz_kmsg_read_hdr(struct pstore_zone *zone,
 		struct pstore_record *record)
 {
 	struct psz_buffer *buffer = zone->buffer;
 	struct psz_kmsg_header *hdr =
 		(struct psz_kmsg_header *)buffer->data;

 	if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC)
 		return -EINVAL;
 	record->compressed = hdr->compressed;
 	record->time.tv_sec = hdr->time.tv_sec;
 	record->time.tv_nsec = hdr->time.tv_nsec;
 	record->reason = hdr->reason;
 	record->count = hdr->counter;
 	return 0;
 }

 static ssize_t psz_kmsg_read(struct pstore_zone *zone,
 		struct pstore_record *record)
 {
 	ssize_t size, hlen = 0;

 	size = buffer_datalen(zone);
 	/* Clear and skip this kmsg dump record if it has no valid header */
 	if (psz_kmsg_read_hdr(zone, record)) {
 		atomic_set(&zone->buffer->datalen, 0);
 		atomic_set(&zone->dirty, 0);
 		return -ENOMSG;
 	}
 	size -= sizeof(struct psz_kmsg_header);

 	if (!record->compressed) {
 		char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n",
 				      kmsg_dump_reason_str(record->reason),
 				      record->count);
 		hlen = strlen(buf);
 		record->buf = krealloc(buf, hlen + size, GFP_KERNEL);
 		if (!record->buf) {
 			kfree(buf);
 			return -ENOMEM;
 		}
 	} else {
 		record->buf = kmalloc(size, GFP_KERNEL);
 		if (!record->buf)
 			return -ENOMEM;
 	}

 	size = psz_zone_read_buffer(zone, record->buf + hlen, size,
 			sizeof(struct psz_kmsg_header));
 	if (unlikely(size < 0)) {
 		kfree(record->buf);
 		return -ENOMSG;
 	}

 	return size + hlen;
 }

 /* try to combine all ftrace zones */
 static ssize_t psz_ftrace_read(struct pstore_zone *zone,
 		struct pstore_record *record)
 {
 	struct psz_context *cxt;
 	struct psz_buffer *buf;
 	int ret;

 	if (!zone || !record)
 		return -ENOSPC;

 	if (!psz_old_ok(zone))
 		goto out;

 	buf = (struct psz_buffer *)zone->oldbuf;
 	if (!buf)
 		return -ENOMSG;

 	ret = pstore_ftrace_combine_log(&record->buf, &record->size,
 			(char *)buf->data, atomic_read(&buf->datalen));
 	if (unlikely(ret))
 		return ret;

 out:
 	cxt = record->psi->data;
 	if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt)
 		/* then, read next ftrace zone */
 		return -ENOMSG;
 	record->id = 0;
 	return record->size ? record->size : -ENOMSG;
 }

 static ssize_t psz_record_read(struct pstore_zone *zone,
 		struct pstore_record *record)
 {
 	size_t len;
 	struct psz_buffer *buf;

 	if (!zone || !record)
 		return -ENOSPC;

 	buf = (struct psz_buffer *)zone->oldbuf;
 	if (!buf)
 		return -ENOMSG;

 	len = atomic_read(&buf->datalen);
 	record->buf = kmalloc(len, GFP_KERNEL);
 	if (!record->buf)
 		return -ENOMEM;

 	if (unlikely(psz_zone_read_oldbuf(zone, record->buf, len, 0))) {
 		kfree(record->buf);
 		return -ENOMSG;
 	}

 	return len;
 }

 static ssize_t psz_pstore_read(struct pstore_record *record)
 {
 	struct psz_context *cxt = record->psi->data;
 	ssize_t (*readop)(struct pstore_zone *zone,
 			struct pstore_record *record);
 	struct pstore_zone *zone;
 	ssize_t ret;

 	/* before read, we must recover from storage */
 	ret = psz_recovery(cxt);
 	if (ret)
 		return ret;

 next_zone:
 	zone = psz_read_next_zone(cxt);
 	if (!zone)
 		return 0;

 	record->type = zone->type;
 	switch (record->type) {
 	case PSTORE_TYPE_DMESG:
 		readop = psz_kmsg_read;
 		record->id = cxt->kmsg_read_cnt - 1;
 		break;
 	case PSTORE_TYPE_FTRACE:
 		readop = psz_ftrace_read;
 		break;
 	case PSTORE_TYPE_CONSOLE:
 	case PSTORE_TYPE_PMSG:
 		readop = psz_record_read;
 		break;
 	default:
 		goto next_zone;
 	}

 	ret = readop(zone, record);
 	if (ret == -ENOMSG)
 		goto next_zone;
 	return ret;
 }

 static struct psz_context pstore_zone_cxt = {
 	.pstore_zone_info_lock =
 		__MUTEX_INITIALIZER(pstore_zone_cxt.pstore_zone_info_lock),
 	.recovered = ATOMIC_INIT(0),
 	.on_panic = ATOMIC_INIT(0),
 	.pstore = {
 		.owner = THIS_MODULE,
 		.open = psz_pstore_open,
 		.read = psz_pstore_read,
 		.write = psz_pstore_write,
 		.erase = psz_pstore_erase,
 	},
 };

 static void psz_free_zone(struct pstore_zone **pszone)
 {
 	struct pstore_zone *zone = *pszone;

 	if (!zone)
 		return;

 	kfree(zone->buffer);
 	kfree(zone);
 	*pszone = NULL;
 }

 static void psz_free_zones(struct pstore_zone ***pszones, unsigned int *cnt)
 {
 	struct pstore_zone **zones = *pszones;

 	if (!zones)
 		return;

 	while (*cnt > 0) {
 		(*cnt)--;
 		psz_free_zone(&(zones[*cnt]));
 	}
 	kfree(zones);
 	*pszones = NULL;
 }

 static void psz_free_all_zones(struct psz_context *cxt)
 {
 	if (cxt->kpszs)
 		psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt);
 	if (cxt->ppsz)
 		psz_free_zone(&cxt->ppsz);
 	if (cxt->cpsz)
 		psz_free_zone(&cxt->cpsz);
 	if (cxt->fpszs)
 		psz_free_zones(&cxt->fpszs, &cxt->ftrace_max_cnt);
 }

 static struct pstore_zone *psz_init_zone(enum pstore_type_id type,
 		loff_t *off, size_t size)
 {
 	struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info;
 	struct pstore_zone *zone;
 	const char *name = pstore_type_to_name(type);

 	if (!size)
 		return NULL;

 	if (*off + size > info->total_size) {
 		pr_err("no room for %s (0x%zx@0x%llx over 0x%lx)\n",
 			name, size, *off, info->total_size);
 		return ERR_PTR(-ENOMEM);
 	}

 	zone = kzalloc(sizeof(struct pstore_zone), GFP_KERNEL);
 	if (!zone)
 		return ERR_PTR(-ENOMEM);

 	zone->buffer = kmalloc(size, GFP_KERNEL);
 	if (!zone->buffer) {
 		kfree(zone);
 		return ERR_PTR(-ENOMEM);
 	}
 	memset(zone->buffer, 0xFF, size);
 	zone->off = *off;
 	zone->name = name;
 	zone->type = type;
 	zone->buffer_size = size - sizeof(struct psz_buffer);
 	zone->buffer->sig = type ^ PSZ_SIG;
 	zone->oldbuf = NULL;
 	atomic_set(&zone->dirty, 0);
 	atomic_set(&zone->buffer->datalen, 0);
 	atomic_set(&zone->buffer->start, 0);

 	*off += size;

 	pr_debug("pszone %s: off 0x%llx, %zu header, %zu data\n", zone->name,
 			zone->off, sizeof(*zone->buffer), zone->buffer_size);
 	return zone;
 }

 static struct pstore_zone **psz_init_zones(enum pstore_type_id type,
 	loff_t *off, size_t total_size, ssize_t record_size,
 	unsigned int *cnt)
 {
 	struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info;
 	struct pstore_zone **zones, *zone;
 	const char *name = pstore_type_to_name(type);
 	int c, i;

 	*cnt = 0;
 	if (!total_size || !record_size)
 		return NULL;

 	if (*off + total_size > info->total_size) {
 		pr_err("no room for zones %s (0x%zx@0x%llx over 0x%lx)\n",
 			name, total_size, *off, info->total_size);
 		return ERR_PTR(-ENOMEM);
 	}

 	c = total_size / record_size;
 	zones = kcalloc(c, sizeof(*zones), GFP_KERNEL);
 	if (!zones) {
 		pr_err("allocate for zones %s failed\n", name);
 		return ERR_PTR(-ENOMEM);
 	}
 	memset(zones, 0, c * sizeof(*zones));

 	for (i = 0; i < c; i++) {
 		zone = psz_init_zone(type, off, record_size);
 		if (!zone || IS_ERR(zone)) {
 			pr_err("initialize zones %s failed\n", name);
 			psz_free_zones(&zones, &i);
 			return (void *)zone;
 		}
 		zones[i] = zone;
 	}

 	*cnt = c;
 	return zones;
 }

 static int psz_alloc_zones(struct psz_context *cxt)
 {
 	struct pstore_zone_info *info = cxt->pstore_zone_info;
 	loff_t off = 0;
 	int err;
 	size_t off_size = 0;

 	off_size += info->pmsg_size;
 	cxt->ppsz = psz_init_zone(PSTORE_TYPE_PMSG, &off, info->pmsg_size);
 	if (IS_ERR(cxt->ppsz)) {
 		err = PTR_ERR(cxt->ppsz);
 		cxt->ppsz = NULL;
 		goto free_out;
 	}

 	off_size += info->console_size;
 	cxt->cpsz = psz_init_zone(PSTORE_TYPE_CONSOLE, &off,
 			info->console_size);
 	if (IS_ERR(cxt->cpsz)) {
 		err = PTR_ERR(cxt->cpsz);
 		cxt->cpsz = NULL;
 		goto free_out;
 	}

 	off_size += info->ftrace_size;
 	cxt->fpszs = psz_init_zones(PSTORE_TYPE_FTRACE, &off,
 			info->ftrace_size,
 			info->ftrace_size / nr_cpu_ids,
 			&cxt->ftrace_max_cnt);
 	if (IS_ERR(cxt->fpszs)) {
 		err = PTR_ERR(cxt->fpszs);
 		cxt->fpszs = NULL;
 		goto free_out;
 	}

 	cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off,
 			info->total_size - off_size,
 			info->kmsg_size, &cxt->kmsg_max_cnt);
 	if (IS_ERR(cxt->kpszs)) {
 		err = PTR_ERR(cxt->kpszs);
 		cxt->kpszs = NULL;
 		goto free_out;
 	}

 	return 0;
 free_out:
 	psz_free_all_zones(cxt);
 	return err;
 }

 /**
  * register_pstore_zone() - register to pstore/zone
  *
  * @info: back-end driver information. See &struct pstore_zone_info.
  *
  * Only one back-end at one time.
  *
  * Return: 0 on success, others on failure.
  */
 int register_pstore_zone(struct pstore_zone_info *info)
 {
 	int err = -EINVAL;
 	struct psz_context *cxt = &pstore_zone_cxt;

 	if (info->total_size < 4096) {
 		pr_warn("total_size must be >= 4096\n");
 		return -EINVAL;
 	}
 	if (info->total_size > SZ_128M) {
 		pr_warn("capping size to 128MiB\n");
 		info->total_size = SZ_128M;
 	}

 	if (!info->kmsg_size && !info->pmsg_size && !info->console_size &&
 	    !info->ftrace_size) {
 		pr_warn("at least one record size must be non-zero\n");
 		return -EINVAL;
 	}

 	if (!info->name || !info->name[0])
 		return -EINVAL;

 #define check_size(name, size) {					\
 		if (info->name > 0 && info->name < (size)) {		\
 			pr_err(#name " must be over %d\n", (size));	\
 			return -EINVAL;					\
 		}							\
 		if (info->name & (size - 1)) {				\
 			pr_err(#name " must be a multiple of %d\n",	\
 					(size));			\
 			return -EINVAL;					\
 		}							\
 	}

 	check_size(total_size, 4096);
 	check_size(kmsg_size, SECTOR_SIZE);
 	check_size(pmsg_size, SECTOR_SIZE);
 	check_size(console_size, SECTOR_SIZE);
 	check_size(ftrace_size, SECTOR_SIZE);

 #undef check_size

 	/*
 	 * the @read and @write must be applied.
 	 * if no @read, pstore may mount failed.
 	 * if no @write, pstore do not support to remove record file.
 	 */
 	if (!info->read || !info->write) {
 		pr_err("no valid general read/write interface\n");
 		return -EINVAL;
 	}

 	mutex_lock(&cxt->pstore_zone_info_lock);
 	if (cxt->pstore_zone_info) {
 		pr_warn("'%s' already loaded: ignoring '%s'\n",
 				cxt->pstore_zone_info->name, info->name);
 		mutex_unlock(&cxt->pstore_zone_info_lock);
 		return -EBUSY;
 	}
 	cxt->pstore_zone_info = info;

 	pr_debug("register %s with properties:\n", info->name);
 	pr_debug("\ttotal size : %ld Bytes\n", info->total_size);
 	pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size);
 	pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size);
 	pr_debug("\tconsole size : %ld Bytes\n", info->console_size);
 	pr_debug("\tftrace size : %ld Bytes\n", info->ftrace_size);

 	err = psz_alloc_zones(cxt);
 	if (err) {
 		pr_err("alloc zones failed\n");
 		goto fail_out;
 	}

 	if (info->kmsg_size) {
 		cxt->pstore.bufsize = cxt->kpszs[0]->buffer_size -
 			sizeof(struct psz_kmsg_header);
 		cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL);
 		if (!cxt->pstore.buf) {
 			err = -ENOMEM;
 			goto fail_free;
 		}
 	}
 	cxt->pstore.data = cxt;

 	pr_info("registered %s as backend for", info->name);
 	cxt->pstore.max_reason = info->max_reason;
 	cxt->pstore.name = info->name;
 	if (info->kmsg_size) {
 		cxt->pstore.flags |= PSTORE_FLAGS_DMESG;
 		pr_cont(" kmsg(%s",
 			kmsg_dump_reason_str(cxt->pstore.max_reason));
 		if (cxt->pstore_zone_info->panic_write)
 			pr_cont(",panic_write");
 		pr_cont(")");
 	}
 	if (info->pmsg_size) {
 		cxt->pstore.flags |= PSTORE_FLAGS_PMSG;
 		pr_cont(" pmsg");
 	}
 	if (info->console_size) {
 		cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE;
 		pr_cont(" console");
 	}
 	if (info->ftrace_size) {
 		cxt->pstore.flags |= PSTORE_FLAGS_FTRACE;
 		pr_cont(" ftrace");
 	}
 	pr_cont("\n");

 	err = pstore_register(&cxt->pstore);
 	if (err) {
 		pr_err("registering with pstore failed\n");
 		goto fail_free;
 	}
 	mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock);

 	return 0;

 fail_free:
 	kfree(cxt->pstore.buf);
 	cxt->pstore.buf = NULL;
 	cxt->pstore.bufsize = 0;
 	psz_free_all_zones(cxt);
 fail_out:
 	pstore_zone_cxt.pstore_zone_info = NULL;
 	mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock);
 	return err;
 }
 EXPORT_SYMBOL_GPL(register_pstore_zone);

 /**
  * unregister_pstore_zone() - unregister to pstore/zone
  *
  * @info: back-end driver information. See struct pstore_zone_info.
  */
 void unregister_pstore_zone(struct pstore_zone_info *info)
 {
 	struct psz_context *cxt = &pstore_zone_cxt;

 	mutex_lock(&cxt->pstore_zone_info_lock);
 	if (!cxt->pstore_zone_info) {
 		mutex_unlock(&cxt->pstore_zone_info_lock);
 		return;
 	}

 	/* Stop incoming writes from pstore. */
 	pstore_unregister(&cxt->pstore);

 	/* Flush any pending writes. */
 	psz_flush_all_dirty_zones(NULL);
 	flush_delayed_work(&psz_cleaner);

 	/* Clean up allocations. */
 	kfree(cxt->pstore.buf);
 	cxt->pstore.buf = NULL;
 	cxt->pstore.bufsize = 0;
 	cxt->pstore_zone_info = NULL;

 	psz_free_all_zones(cxt);

 	/* Clear counters and zone state. */
 	cxt->oops_counter = 0;
 	cxt->panic_counter = 0;
 	atomic_set(&cxt->recovered, 0);
 	atomic_set(&cxt->on_panic, 0);

 	mutex_unlock(&cxt->pstore_zone_info_lock);
 }
 EXPORT_SYMBOL_GPL(unregister_pstore_zone);

 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("WeiXiong Liao <liaoweixiong@allwinnertech.com>");
 MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
 MODULE_DESCRIPTION("Storage Manager for pstore/blk");