--- drivers/md/dm-iostats.c | 488 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 488 insertions(+) Index: linux/drivers/md/dm-iostats.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/md/dm-iostats.c 2007-06-06 20:40:10.000000000 +0100 @@ -0,0 +1,488 @@ +/* + * Copyright (C) 2007 Red Hat GmbH + * + * Module Author: Heinz Mauelshagen (Mauelshagen@RedHat.com) + * + * Gather I/O statistics. + * + * This file is released under the GPL. + */ + +#include "dm.h" + +#include <linux/ctype.h> + +#define DM_MSG_PREFIX "dm-iostats" + +static const char *version = "v1.0"; + +/* Cook up 32 bit jiffies on 64 bit platform. */ +#if BITS_PER_LONG > 32 +#define JIFFIES_32 (jiffies & 0xFFFFFFFF) +#else +#define JIFFIES_32 jiffies +#endif + +/* Feature flags. */ +enum feature_flags { + IOF_LATENCY = 0x01, /* IO latency. */ + IOF_SIZE = 0x02, /* IO size sums. */ + IOF_ERROR = 0x04, /* IO errors. */ +}; + +/* IO statistics context. */ +struct iostats_c { + unsigned long flags; + struct dm_dev *dev; + + atomic_t ios[2]; /* Counter of read/write IOs. */ + + /* This field is present in case we count IO errors. */ + atomic_t errors[2]; /* Number of IO errors. */ + + /* + * These fields are optionally only present, + * if we are recording the IO latency. + */ + spinlock_t lock; + unsigned long last_jiffies; /* Jiffies overrun. */ + unsigned long long start[2]; /* Sum start jiffies. */ + unsigned long long start_inflight[2]; /* Sum in flight IO jiffies.*/ + unsigned long long end[2]; /* Sum end jiffies. */ + atomic_t ios_inflight[2]; /* Counter of IOs in flight. */ + + /* + * These fields are optionally only present, + * if we are recording the IO sizes sums. + */ + unsigned long long size[2]; /* Sum of IO sizes. */ +}; + +/* Reset IO latency vars in case of overrun and preset IO counter. */ +static void reset_latency(struct iostats_c *ic, int rw) +{ + ic->start[rw] = ic->end[rw] = 0; + atomic_set(ic->ios + rw, 0); +} + +/* Reset all counters/sums on init or resume. */ +static void reset_all(struct iostats_c *ic) +{ + if (test_bit(IOF_LATENCY, &ic->flags)) { + reset_latency(ic, READ); + reset_latency(ic, WRITE); + } + + if (test_bit(IOF_SIZE, &ic->flags)) + ic->size[READ] = ic->size[WRITE] = 0; + + if (test_bit(IOF_ERROR, &ic->flags)) { + atomic_set(ic->errors + READ, 0); + atomic_set(ic->errors + WRITE, 0); + } +} + +/* + * Construct an IO status mapping: + * + * <dev_path> [<type>...] + * + * available types: latency, size, error + */ +/* iostats <type> parameter definitions. */ +#define STR_LATENCY "latency" +#define STR_SIZE "size" +#define STR_ERROR "error" + +/* Structure offset macro for iostats_size definitions below. */ +#define OFFSET(member) ((size_t) &((struct iostats_c*) NULL)->member) + +/* iostats feature <type> specs array. */ +struct f_type { + char *name; /* <type> */ + size_t len; /* String length of name. */ + enum feature_flags flag; /* Feature flag to set. */ + size_t size; /* Size of structure to allocate. */ +} static const f_types[] = { + { STR_LATENCY, sizeof(STR_LATENCY) - 1, IOF_LATENCY, OFFSET(size) }, + { STR_SIZE, sizeof(STR_SIZE) - 1, IOF_SIZE, sizeof(struct iostats_c) }, + { STR_ERROR, sizeof(STR_ERROR) - 1, IOF_ERROR, OFFSET(lock) }, +}; + +#define for_each_ft(ft) for (ft = f_types; ft < ARRAY_END(f_types); ft++) + +static int iostats_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + int i; + unsigned long flags = 0; + size_t size = OFFSET(errors); /* Smallest possible structure size. */ + struct iostats_c *ic; + + if (argc > ARRAY_SIZE(f_types) + 1) { + ti->error = "dm-iostats: incorrect number of arguments"; + return -EINVAL; + } + + /* Check constructor <type> arguments. */ + for (i = 1; i < argc; i++) { + const struct f_type *ft; + + for_each_ft(ft) { + if (strncmp(argv[i], ft->name, ft->len)) + continue; + + set_bit(ft->flag, &flags); + if (ft->size > size) + size = ft->size; + + break; + } + + if (ft == ARRAY_END(f_types)) { + ti->error = "dm-iostats: invalid iostats <type>"; + return -EINVAL; + } + } + + /* Check senseful iostats types given. */ + if (!test_bit(IOF_LATENCY, &flags) && + test_bit(IOF_SIZE, &flags)) { + ti->error = "dm-iostats: mandatory type 'latency' with 'size'"; + return -EINVAL; + } + + ic = kmalloc(size, GFP_KERNEL); + if (ic) + memset(ic, 0, size); + else { + ti->error = "dm-iostats: cannot allocate iostats conetext"; + return -ENOMEM; + } + + if (dm_get_device(ti, *argv, ti->begin, ti->len, + dm_table_get_mode(ti->table), &ic->dev)) { + ti->error = "dm-iostats: device lookup failed"; + kfree(ic); + return -ENXIO; + } + + ic->flags = flags; + if (test_bit(IOF_LATENCY, &flags)) { + spin_lock_init(&ic->lock); + atomic_set(ic->ios_inflight + READ, 0); + atomic_set(ic->ios_inflight + WRITE, 0); + reset_all(ic); + } + + ti->private = ic; + + return 0; +} + +/* + * Destruct an iostats mapping. + */ +static void iostats_dtr(struct dm_target *ti) +{ + struct iostats_c *ic = ti->private; + + dm_put_device(ti, ic->dev); + kfree(ic); +} + +/* + * iostats_map() and iostats_end_io() support functions. + */ +/* Summarize jiffies (checking overrun). */ +static inline int calc_sum(unsigned long long *sum, unsigned long now) +{ + unsigned long long s = *sum + now; + + if (unlikely(s < *sum)) + s = 0; + + return (*sum = s); +} + +/* Set latency and IO counter for READ or WRITE to actual in flight IO data. */ +static void set_inflight_latency(struct iostats_c *ic, int rw) +{ + atomic_set(ic->ios + rw, atomic_read(ic->ios_inflight + rw)); + ic->start[rw] = ic->start_inflight[rw]; + ic->end[rw] = 0; +} + +static void set_inflight_latencies(struct iostats_c *ic) +{ + set_inflight_latency(ic, READ); + set_inflight_latency(ic, WRITE); +} + +/* + * Check for jiffies overrun. + * + * In case of overrun -> + * set both READ and WRITE latencies to in flight ones. + */ +static inline void check_jiffies(struct iostats_c *ic, unsigned long now) +{ + unsigned long lj = ic->last_jiffies; + + ic->last_jiffies = now; + + if (unlikely(now < lj)) + set_inflight_latencies(ic); +} + +/* + * Read/write statistics mapping: + * + * o checks for jiffies or sum variable overrun. + * o sums up read and write counts + * + * In case of 'latency' <type> configured: + * + * o increments IO in flight counters + * o sums up IO start jiffies for better accuracy; see calc_latency() + * + * In case of 'size' <type> configured: + * + * o sums up IO sizes + */ +static int iostats_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) +{ + int rw = bio_data_dir(bio); + struct iostats_c *ic = ti->private; + + if (likely(test_bit(IOF_LATENCY, &ic->flags))) { + unsigned long flags, now; + + spin_lock_irqsave(&ic->lock, flags); + + now = JIFFIES_32; + ic->start_inflight[rw] += now; + atomic_inc(ic->ios_inflight + rw); + + /* Check for jiffies overrun. */ + check_jiffies(ic, now); + + /* + * In case of IO counter or start sum overrun -> + * set rw latency to in flight one. + */ + if (unlikely(atomic_inc_and_test(ic->ios + rw)) || + !calc_sum(ic->start + rw, now)) + set_inflight_latency(ic, rw); + + if (likely(test_bit(IOF_SIZE, &ic->flags))) + ic->size[rw] += bio->bi_size; + + spin_unlock_irqrestore(&ic->lock, flags); + + /* Preserve for subtraction in iostats_end_io(). */ + map_context->ll = now; + } else + atomic_inc(ic->ios + rw); + + /* Map to the underlying device. */ + bio->bi_bdev = ic->dev->bdev; + + return 1; +} + +/* + * End IO handler: + * + * o checks for jiffies or sum variable overrun. + * + * In case of 'latency' <type> configured: + * + * o decrements IO in flight counters + * o sums up IO end jiffies + * o subtracts start jiffies from in flight sums + * + * In case of 'error' <type> configured: + * + * o counts any IO errors + */ +static int iostats_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) +{ + int rw = bio_data_dir(bio); + struct iostats_c *ic = ti->private; + + if (likely(test_bit(IOF_LATENCY, &ic->flags))) { + unsigned long flags, now; + + spin_lock_irqsave(&ic->lock, flags); + + /* Subtract in flight start time and decrement in flight ios.*/ + ic->start_inflight[rw] -= map_context->ll; + atomic_dec(ic->ios_inflight + rw); + now = JIFFIES_32; + + /* Check for jiffies overrun. */ + check_jiffies(ic, now); + + /* + * In case of end sum overrun -> + * set rw latency to in flight one. + */ + if (unlikely(!calc_sum(ic->end + rw, now))) + set_inflight_latency(ic, rw); + + /* Correct IO sizes sum in case of error. */ + /* FIXME: correct content in bio->bi_size on error ? */ + if (unlikely(error && test_bit(IOF_SIZE, &ic->flags))) + ic->size[rw] -= bio->bi_size; + + spin_unlock_irqrestore(&ic->lock, flags); + } + + if (unlikely(error) && test_bit(IOF_ERROR, &ic->flags)) + atomic_inc(ic->errors + rw); + + return 0; +} + +/* Calculates the average latency in milliseconds. */ +static unsigned long calc_latency(struct iostats_c *ic, int rw) +{ + unsigned long flags, ios; + unsigned long long start, start_inflight, end; + + /* Quickly grab values in order to do consistent calculation. */ + spin_lock_irqsave(&ic->lock, flags); + ios = atomic_read(ic->ios + rw); + start = ic->start[rw]; + start_inflight = ic->start_inflight[rw]; + end = ic->end[rw]; + spin_unlock_irqrestore(&ic->lock, flags); + + if (likely(ios)) + return jiffies_to_msecs(end - (start - start_inflight)) / ios; + + return 0; +} + +/* + * Resume used to reset statistics in order to + * avoid a complete table reload for this purpose. + * + * No need to take out a lock here, because no + * IOs will get queued before we're resumed. + */ +static void iostats_resume(struct dm_target *ti) +{ + reset_all(ti->private); +} + +/* + * Status. + * + * In case of in flight ios, the values displayed will be a bit inconsistent + * with respect to IO counters, IO latencies, IO size sums and errors being + * retrieved non-atomically. + */ +static int iostats_status(struct dm_target *ti, status_type_t type, + char *result, unsigned maxlen) +{ + unsigned sz = 0; + char buffer[16]; + struct iostats_c *ic = ti->private; + const struct f_type *ft; + + format_dev_t(buffer, ic->dev->bdev->bd_dev); + + switch (type) { + case STATUSTYPE_INFO: + DMEMIT("%s r=%u w=%u", buffer, + atomic_read(ic->ios + READ), + atomic_read(ic->ios + WRITE)); + + /* Show latency in units of mllisecs. */ + if (test_bit(IOF_LATENCY, &ic->flags)) + DMEMIT(" rl=%lu wl=%lu", + calc_latency(ic, READ), + calc_latency(ic, WRITE)); + + /* Show sizes in units of sectors. */ + if (test_bit(IOF_SIZE, &ic->flags)) { + unsigned long flags; + unsigned long long sr, sw; + + spin_lock_irqsave(&ic->lock, flags); + sr = ic->size[READ] >> 9; + sw = ic->size[WRITE] >> 9; + spin_unlock_irqrestore(&ic->lock, flags); + + DMEMIT(" rs=%llu ws=%llu", sr, sw); + } + + /* Show number of errors */ + if (test_bit(IOF_ERROR, &ic->flags)) { + unsigned re = atomic_read(ic->errors + READ); + unsigned we = atomic_read(ic->errors + WRITE); + + if (re || we) + DMEMIT(" re=%u we=%u", re, we); + + } + + break; + + case STATUSTYPE_TABLE: + DMEMIT("%s", buffer); + for_each_ft(ft) + if (test_bit(ft->flag, &ic->flags)) + DMEMIT(" %s", ft->name); + } + + return 0; +} + +static struct target_type iostats_target = { + .name = "iostats", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = iostats_ctr, + .dtr = iostats_dtr, + .map = iostats_map, + .end_io = iostats_end_io, + .resume = iostats_resume, + .status = iostats_status, +}; + +static int __init dm_iostats_init(void) +{ + int r; + + r = dm_register_target(&iostats_target); + if (r) + DMERR("Failed to register target [%d]", r); + else + DMINFO("initialized %s", version); + + return r; +} + +static void __exit dm_iostats_exit(void) +{ + int r = dm_unregister_target(&iostats_target); + + if (r) + DMERR("dm-iostats unregister failed %d", r); + else + DMINFO("exit %s", version); +} + +/* + * Module hooks. + */ +module_init(dm_iostats_init); +module_exit(dm_iostats_exit); + +MODULE_DESCRIPTION(DM_NAME " iostats target"); +MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>"); +MODULE_LICENSE("GPL");