---
 drivers/md/dm-iostats.c |  488 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 488 insertions(+)

Index: linux/drivers/md/dm-iostats.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/drivers/md/dm-iostats.c	2007-06-06 20:40:10.000000000 +0100
@@ -0,0 +1,488 @@
+/*
+ * Copyright (C) 2007 Red Hat GmbH
+ *
+ * Module Author: Heinz Mauelshagen (Mauelshagen@RedHat.com)
+ *
+ * Gather I/O statistics.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/ctype.h>
+
+#define	DM_MSG_PREFIX	"dm-iostats"
+
+static const char *version = "v1.0";
+
+/* Cook up 32 bit jiffies on 64 bit platform. */
+#if BITS_PER_LONG > 32
+#define	JIFFIES_32 (jiffies & 0xFFFFFFFF)
+#else
+#define	JIFFIES_32 jiffies
+#endif
+
+/* Feature flags. */
+enum feature_flags {
+	IOF_LATENCY	= 0x01, /* IO latency. */
+	IOF_SIZE	= 0x02, /* IO size sums. */
+	IOF_ERROR	= 0x04, /* IO errors. */
+};
+
+/* IO statistics context. */
+struct iostats_c {
+	unsigned long flags;
+	struct dm_dev *dev;
+
+	atomic_t ios[2];	  /* Counter of read/write IOs. */
+
+	/* This field is present in case we count IO errors. */
+	atomic_t errors[2];	  /* Number of IO errors. */
+
+	/*
+	 * These fields are optionally only present,
+	 * if we are recording the IO latency.
+	 */
+	spinlock_t lock;
+	unsigned long last_jiffies;		/* Jiffies overrun. */
+	unsigned long long start[2];		/* Sum start jiffies. */
+	unsigned long long start_inflight[2];	/* Sum in flight IO jiffies.*/
+	unsigned long long end[2];		/* Sum end jiffies. */
+	atomic_t ios_inflight[2];		/* Counter of IOs in flight. */
+
+	/*
+	 * These fields are optionally only present,
+	 * if we are recording the IO sizes sums.
+	 */
+	unsigned long long size[2];		/* Sum of IO sizes. */
+};
+
+/* Reset IO latency vars in case of overrun and preset IO counter. */
+static void reset_latency(struct iostats_c *ic, int rw)
+{
+	ic->start[rw] = ic->end[rw] = 0;
+	atomic_set(ic->ios + rw, 0);
+}
+
+/* Reset all counters/sums on init or resume. */
+static void reset_all(struct iostats_c *ic)
+{
+	if (test_bit(IOF_LATENCY, &ic->flags)) {
+		reset_latency(ic, READ);
+		reset_latency(ic, WRITE);
+	}
+
+	if (test_bit(IOF_SIZE, &ic->flags))
+		ic->size[READ] = ic->size[WRITE] = 0;
+
+	if (test_bit(IOF_ERROR, &ic->flags)) {
+		atomic_set(ic->errors + READ, 0);
+		atomic_set(ic->errors + WRITE, 0);
+	}
+}
+
+/*
+ * Construct an IO status mapping:
+ *
+ *	<dev_path> [<type>...]
+ *
+ *      available types: latency, size, error
+ */
+/* iostats <type> parameter definitions. */
+#define	STR_LATENCY	"latency"
+#define	STR_SIZE	"size"
+#define STR_ERROR	"error"
+
+/* Structure offset macro for iostats_size definitions below. */
+#define	OFFSET(member)	((size_t) &((struct iostats_c*) NULL)->member)
+
+/* iostats feature <type> specs array. */
+struct f_type {
+	char *name;		 /* <type> */
+	size_t len;		 /* String length of name. */
+	enum feature_flags flag; /* Feature flag to set. */
+	size_t size;		 /* Size of structure to allocate. */
+} static const f_types[] = {
+	{ STR_LATENCY, sizeof(STR_LATENCY) - 1, IOF_LATENCY, OFFSET(size) },
+	{ STR_SIZE, sizeof(STR_SIZE) - 1, IOF_SIZE, sizeof(struct iostats_c) },
+	{ STR_ERROR, sizeof(STR_ERROR) - 1, IOF_ERROR, OFFSET(lock) },
+};
+
+#define	for_each_ft(ft) for (ft = f_types; ft < ARRAY_END(f_types); ft++)
+
+static int iostats_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+	int i;
+	unsigned long flags = 0;
+	size_t size = OFFSET(errors); /* Smallest possible structure size. */
+	struct iostats_c *ic;
+
+	if (argc > ARRAY_SIZE(f_types) + 1) {
+		ti->error = "dm-iostats: incorrect number of arguments";
+		return -EINVAL;
+	}
+
+	/* Check constructor <type> arguments. */
+	for (i = 1; i < argc; i++) {
+		const struct f_type *ft;
+
+		for_each_ft(ft) {
+			if (strncmp(argv[i], ft->name, ft->len))
+				continue;
+
+			set_bit(ft->flag, &flags);
+			if (ft->size > size)
+				size = ft->size;
+
+			break;
+		}
+
+		if (ft == ARRAY_END(f_types)) {
+			ti->error = "dm-iostats: invalid iostats <type>";
+			return -EINVAL;
+		}
+	}
+
+	/* Check senseful iostats types given. */
+	if (!test_bit(IOF_LATENCY, &flags) &&
+	    test_bit(IOF_SIZE, &flags)) {
+		ti->error = "dm-iostats: mandatory type 'latency' with 'size'";
+		return -EINVAL;
+	}
+
+	ic = kmalloc(size, GFP_KERNEL);
+	if (ic)
+		memset(ic, 0, size);
+	else {
+		ti->error = "dm-iostats: cannot allocate iostats conetext";
+		return -ENOMEM;
+	}
+
+	if (dm_get_device(ti, *argv, ti->begin, ti->len,
+			  dm_table_get_mode(ti->table), &ic->dev)) {
+		ti->error = "dm-iostats: device lookup failed";
+		kfree(ic);
+		return -ENXIO;
+	}
+
+	ic->flags = flags;
+	if (test_bit(IOF_LATENCY, &flags)) {
+		spin_lock_init(&ic->lock);
+		atomic_set(ic->ios_inflight + READ, 0);
+		atomic_set(ic->ios_inflight + WRITE, 0);
+		reset_all(ic);
+	}
+
+	ti->private = ic;
+
+	return 0;
+}
+
+/*
+ * Destruct an iostats mapping.
+ */
+static void iostats_dtr(struct dm_target *ti)
+{
+	struct iostats_c *ic = ti->private;
+
+	dm_put_device(ti, ic->dev);
+	kfree(ic);
+}
+
+/*
+ * iostats_map() and iostats_end_io() support functions.
+ */
+/* Summarize jiffies (checking overrun). */
+static inline int calc_sum(unsigned long long *sum, unsigned long now)
+{
+	unsigned long long s = *sum + now;
+
+	if (unlikely(s < *sum))
+		s = 0;
+
+	return (*sum = s);
+}
+
+/* Set latency and IO counter for READ or WRITE to actual in flight IO data. */
+static void set_inflight_latency(struct iostats_c *ic, int rw)
+{
+	atomic_set(ic->ios + rw, atomic_read(ic->ios_inflight + rw));
+	ic->start[rw] = ic->start_inflight[rw];
+	ic->end[rw] = 0;
+}
+
+static void set_inflight_latencies(struct iostats_c *ic)
+{
+	set_inflight_latency(ic, READ);
+	set_inflight_latency(ic, WRITE);
+}
+
+/*
+ * Check for jiffies overrun.
+ *
+ * In case of  overrun ->
+ * set both READ and WRITE latencies to in flight ones.
+ */
+static inline void check_jiffies(struct iostats_c *ic, unsigned long now)
+{
+	unsigned long lj = ic->last_jiffies;
+
+	ic->last_jiffies = now;
+
+	if (unlikely(now < lj))
+		set_inflight_latencies(ic);
+}
+
+/*
+ * Read/write statistics mapping:
+ *
+ * o checks for jiffies or sum variable overrun.
+ * o sums up read and write counts
+ *
+ * In case of 'latency' <type> configured:
+ *
+ *	o increments IO in flight counters
+ *	o sums up IO start jiffies for better accuracy; see calc_latency()
+ *
+ * In case of 'size' <type> configured:
+ *
+ *	o sums up IO sizes
+ */
+static int iostats_map(struct dm_target *ti, struct bio *bio,
+		       union map_info *map_context)
+{
+	int rw = bio_data_dir(bio);
+	struct iostats_c *ic = ti->private;
+
+	if (likely(test_bit(IOF_LATENCY, &ic->flags))) {
+		unsigned long flags, now;
+
+		spin_lock_irqsave(&ic->lock, flags);
+
+		now = JIFFIES_32;
+		ic->start_inflight[rw] += now;
+		atomic_inc(ic->ios_inflight + rw);
+
+		/* Check for jiffies overrun. */
+		check_jiffies(ic, now);
+
+		/*
+		 * In case of IO counter or start sum overrun ->
+		 * set rw latency to in flight one.
+		 */
+		if (unlikely(atomic_inc_and_test(ic->ios + rw)) ||
+			     !calc_sum(ic->start + rw, now))
+			set_inflight_latency(ic, rw);
+
+		if (likely(test_bit(IOF_SIZE, &ic->flags)))
+			ic->size[rw] += bio->bi_size;
+
+		spin_unlock_irqrestore(&ic->lock, flags);
+
+		/* Preserve for subtraction in iostats_end_io(). */
+		map_context->ll = now;
+	} else
+		atomic_inc(ic->ios + rw);
+
+	/* Map to the underlying device. */
+	bio->bi_bdev = ic->dev->bdev;
+
+	return 1;
+}
+
+/*
+ * End IO handler:
+ *
+ * o checks for jiffies or sum variable overrun.
+ *
+ * In case of 'latency' <type> configured:
+ *
+ *	o decrements IO in flight counters
+ *	o sums up IO end jiffies
+ *	o subtracts start jiffies from in flight sums
+ *
+ * In case of 'error' <type> configured:
+ *
+ *	o counts any IO errors
+ */
+static int iostats_end_io(struct dm_target *ti, struct bio *bio,
+			  int error, union map_info *map_context)
+{
+	int rw = bio_data_dir(bio);
+	struct iostats_c *ic = ti->private;
+
+	if (likely(test_bit(IOF_LATENCY, &ic->flags))) {
+		unsigned long flags, now;
+
+		spin_lock_irqsave(&ic->lock, flags);
+
+		/* Subtract in flight start time and decrement in flight ios.*/
+		ic->start_inflight[rw] -= map_context->ll;
+		atomic_dec(ic->ios_inflight + rw);
+		now = JIFFIES_32;
+
+		/* Check for jiffies overrun. */
+		check_jiffies(ic, now);
+
+		/*
+		 * In case of end sum overrun ->
+		 * set rw latency to in flight one.
+		 */
+		if (unlikely(!calc_sum(ic->end + rw, now)))
+			set_inflight_latency(ic, rw);
+
+		/* Correct IO sizes sum in case of error. */
+		/* FIXME: correct content in bio->bi_size on error ? */
+		if (unlikely(error && test_bit(IOF_SIZE, &ic->flags)))
+			ic->size[rw] -= bio->bi_size;
+
+		spin_unlock_irqrestore(&ic->lock, flags);
+	}
+
+	if (unlikely(error) && test_bit(IOF_ERROR, &ic->flags))
+		atomic_inc(ic->errors + rw);
+
+	return 0;
+}
+
+/* Calculates the average latency in milliseconds. */
+static unsigned long calc_latency(struct iostats_c *ic, int rw)
+{
+	unsigned long flags, ios;
+	unsigned long long start, start_inflight, end;
+
+	/* Quickly grab values in order to do consistent calculation. */
+	spin_lock_irqsave(&ic->lock, flags);
+	ios = atomic_read(ic->ios + rw);
+	start = ic->start[rw];
+	start_inflight = ic->start_inflight[rw];
+	end = ic->end[rw];
+	spin_unlock_irqrestore(&ic->lock, flags);
+
+	if (likely(ios))
+		return jiffies_to_msecs(end - (start - start_inflight)) / ios;
+
+	return 0;
+}
+
+/*
+ * Resume used to reset statistics in order to
+ * avoid a complete table reload for this purpose.
+ *
+ * No need to take out a lock here, because no
+ * IOs will get queued before we're resumed.
+ */
+static void iostats_resume(struct dm_target *ti)
+{
+	reset_all(ti->private);
+}
+
+/*
+ * Status.
+ *
+ * In case of in flight ios, the values displayed will be a bit inconsistent
+ * with respect to IO counters, IO latencies, IO size sums and errors being
+ * retrieved non-atomically.
+ */
+static int iostats_status(struct dm_target *ti, status_type_t type,
+			  char *result, unsigned maxlen)
+{
+	unsigned sz = 0;
+	char buffer[16];
+	struct iostats_c *ic = ti->private;
+	const struct f_type *ft;
+
+	format_dev_t(buffer, ic->dev->bdev->bd_dev);
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		DMEMIT("%s r=%u w=%u", buffer,
+		       atomic_read(ic->ios + READ),
+		       atomic_read(ic->ios + WRITE));
+
+		/* Show latency in units of mllisecs. */
+		if (test_bit(IOF_LATENCY, &ic->flags))
+			DMEMIT(" rl=%lu wl=%lu",
+			       calc_latency(ic, READ),
+			       calc_latency(ic, WRITE));
+
+		/* Show sizes in units of sectors. */
+		if (test_bit(IOF_SIZE, &ic->flags)) {
+			unsigned long flags;
+			unsigned long long sr, sw;
+
+			spin_lock_irqsave(&ic->lock, flags);
+			sr = ic->size[READ] >> 9;
+			sw = ic->size[WRITE] >> 9;
+			spin_unlock_irqrestore(&ic->lock, flags);
+
+			DMEMIT(" rs=%llu ws=%llu", sr, sw);
+		}
+
+		/* Show number of errors */
+		if (test_bit(IOF_ERROR, &ic->flags)) {
+			unsigned re = atomic_read(ic->errors + READ);
+			unsigned we = atomic_read(ic->errors + WRITE);
+
+			if (re || we)
+				DMEMIT(" re=%u we=%u",  re, we);
+
+		}
+
+		break;
+
+	case STATUSTYPE_TABLE:
+		DMEMIT("%s", buffer);
+		for_each_ft(ft)
+			if (test_bit(ft->flag, &ic->flags))
+				DMEMIT(" %s", ft->name);
+	}
+
+	return 0;
+}
+
+static struct target_type iostats_target = {
+	.name   = "iostats",
+	.version = {1, 0, 0},
+	.module = THIS_MODULE,
+	.ctr    = iostats_ctr,
+	.dtr    = iostats_dtr,
+	.map    = iostats_map,
+	.end_io = iostats_end_io,
+	.resume = iostats_resume,
+	.status = iostats_status,
+};
+
+static int __init dm_iostats_init(void)
+{
+	int r;
+
+	r = dm_register_target(&iostats_target);
+	if (r)
+		DMERR("Failed to register target [%d]", r);
+	else
+		DMINFO("initialized %s", version);
+
+	return r;
+}
+
+static void __exit dm_iostats_exit(void)
+{
+	int r = dm_unregister_target(&iostats_target);
+
+	if (r)
+		DMERR("dm-iostats unregister failed %d", r);
+	else
+		DMINFO("exit %s", version);
+}
+
+/*
+ * Module hooks.
+ */
+module_init(dm_iostats_init);
+module_exit(dm_iostats_exit);
+
+MODULE_DESCRIPTION(DM_NAME " iostats target");
+MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
+MODULE_LICENSE("GPL");