Robert Love | 875f194 | 2008-12-19 18:02:58 -0800 | [diff] [blame] | 1 | /* |
| 2 | * drivers/misc/logger.c |
| 3 | * |
| 4 | * A Logging Subsystem |
| 5 | * |
| 6 | * Copyright (C) 2007-2008 Google, Inc. |
| 7 | * |
| 8 | * Robert Love <rlove@google.com> |
| 9 | * |
| 10 | * This software is licensed under the terms of the GNU General Public |
| 11 | * License version 2, as published by the Free Software Foundation, and |
| 12 | * may be copied, distributed, and modified under those terms. |
| 13 | * |
| 14 | * This program is distributed in the hope that it will be useful, |
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | * GNU General Public License for more details. |
| 18 | */ |
| 19 | |
| 20 | #include <linux/module.h> |
| 21 | #include <linux/fs.h> |
| 22 | #include <linux/miscdevice.h> |
| 23 | #include <linux/uaccess.h> |
| 24 | #include <linux/poll.h> |
| 25 | #include <linux/time.h> |
| 26 | #include "logger.h" |
| 27 | |
| 28 | #include <asm/ioctls.h> |
| 29 | |
| 30 | /* |
| 31 | * struct logger_log - represents a specific log, such as 'main' or 'radio' |
| 32 | * |
| 33 | * This structure lives from module insertion until module removal, so it does |
| 34 | * not need additional reference counting. The structure is protected by the |
| 35 | * mutex 'mutex'. |
| 36 | */ |
| 37 | struct logger_log { |
| 38 | unsigned char * buffer; /* the ring buffer itself */ |
| 39 | struct miscdevice misc; /* misc device representing the log */ |
| 40 | wait_queue_head_t wq; /* wait queue for readers */ |
| 41 | struct list_head readers; /* this log's readers */ |
| 42 | struct mutex mutex; /* mutex protecting buffer */ |
| 43 | size_t w_off; /* current write head offset */ |
| 44 | size_t head; /* new readers start here */ |
| 45 | size_t size; /* size of the log */ |
| 46 | }; |
| 47 | |
| 48 | /* |
| 49 | * struct logger_reader - a logging device open for reading |
| 50 | * |
| 51 | * This object lives from open to release, so we don't need additional |
| 52 | * reference counting. The structure is protected by log->mutex. |
| 53 | */ |
| 54 | struct logger_reader { |
| 55 | struct logger_log * log; /* associated log */ |
| 56 | struct list_head list; /* entry in logger_log's list */ |
| 57 | size_t r_off; /* current read head offset */ |
| 58 | }; |
| 59 | |
| 60 | /* logger_offset - returns index 'n' into the log via (optimized) modulus */ |
| 61 | #define logger_offset(n) ((n) & (log->size - 1)) |
| 62 | |
| 63 | /* |
| 64 | * file_get_log - Given a file structure, return the associated log |
| 65 | * |
| 66 | * This isn't aesthetic. We have several goals: |
| 67 | * |
| 68 | * 1) Need to quickly obtain the associated log during an I/O operation |
| 69 | * 2) Readers need to maintain state (logger_reader) |
| 70 | * 3) Writers need to be very fast (open() should be a near no-op) |
| 71 | * |
| 72 | * In the reader case, we can trivially go file->logger_reader->logger_log. |
| 73 | * For a writer, we don't want to maintain a logger_reader, so we just go |
| 74 | * file->logger_log. Thus what file->private_data points at depends on whether |
| 75 | * or not the file was opened for reading. This function hides that dirtiness. |
| 76 | */ |
| 77 | static inline struct logger_log * file_get_log(struct file *file) |
| 78 | { |
| 79 | if (file->f_mode & FMODE_READ) { |
| 80 | struct logger_reader *reader = file->private_data; |
| 81 | return reader->log; |
| 82 | } else |
| 83 | return file->private_data; |
| 84 | } |
| 85 | |
| 86 | /* |
| 87 | * get_entry_len - Grabs the length of the payload of the next entry starting |
| 88 | * from 'off'. |
| 89 | * |
| 90 | * Caller needs to hold log->mutex. |
| 91 | */ |
| 92 | static __u32 get_entry_len(struct logger_log *log, size_t off) |
| 93 | { |
| 94 | __u16 val; |
| 95 | |
| 96 | switch (log->size - off) { |
| 97 | case 1: |
| 98 | memcpy(&val, log->buffer + off, 1); |
| 99 | memcpy(((char *) &val) + 1, log->buffer, 1); |
| 100 | break; |
| 101 | default: |
| 102 | memcpy(&val, log->buffer + off, 2); |
| 103 | } |
| 104 | |
| 105 | return sizeof(struct logger_entry) + val; |
| 106 | } |
| 107 | |
| 108 | /* |
| 109 | * do_read_log_to_user - reads exactly 'count' bytes from 'log' into the |
| 110 | * user-space buffer 'buf'. Returns 'count' on success. |
| 111 | * |
| 112 | * Caller must hold log->mutex. |
| 113 | */ |
| 114 | static ssize_t do_read_log_to_user(struct logger_log *log, |
| 115 | struct logger_reader *reader, |
| 116 | char __user *buf, |
| 117 | size_t count) |
| 118 | { |
| 119 | size_t len; |
| 120 | |
| 121 | /* |
| 122 | * We read from the log in two disjoint operations. First, we read from |
| 123 | * the current read head offset up to 'count' bytes or to the end of |
| 124 | * the log, whichever comes first. |
| 125 | */ |
| 126 | len = min(count, log->size - reader->r_off); |
| 127 | if (copy_to_user(buf, log->buffer + reader->r_off, len)) |
| 128 | return -EFAULT; |
| 129 | |
| 130 | /* |
| 131 | * Second, we read any remaining bytes, starting back at the head of |
| 132 | * the log. |
| 133 | */ |
| 134 | if (count != len) |
| 135 | if (copy_to_user(buf + len, log->buffer, count - len)) |
| 136 | return -EFAULT; |
| 137 | |
| 138 | reader->r_off = logger_offset(reader->r_off + count); |
| 139 | |
| 140 | return count; |
| 141 | } |
| 142 | |
| 143 | /* |
| 144 | * logger_read - our log's read() method |
| 145 | * |
| 146 | * Behavior: |
| 147 | * |
| 148 | * - O_NONBLOCK works |
| 149 | * - If there are no log entries to read, blocks until log is written to |
| 150 | * - Atomically reads exactly one log entry |
| 151 | * |
| 152 | * Optimal read size is LOGGER_ENTRY_MAX_LEN. Will set errno to EINVAL if read |
| 153 | * buffer is insufficient to hold next entry. |
| 154 | */ |
| 155 | static ssize_t logger_read(struct file *file, char __user *buf, |
| 156 | size_t count, loff_t *pos) |
| 157 | { |
| 158 | struct logger_reader *reader = file->private_data; |
| 159 | struct logger_log *log = reader->log; |
| 160 | ssize_t ret; |
| 161 | DEFINE_WAIT(wait); |
| 162 | |
| 163 | start: |
| 164 | while (1) { |
| 165 | prepare_to_wait(&log->wq, &wait, TASK_INTERRUPTIBLE); |
| 166 | |
| 167 | mutex_lock(&log->mutex); |
| 168 | ret = (log->w_off == reader->r_off); |
| 169 | mutex_unlock(&log->mutex); |
| 170 | if (!ret) |
| 171 | break; |
| 172 | |
| 173 | if (file->f_flags & O_NONBLOCK) { |
| 174 | ret = -EAGAIN; |
| 175 | break; |
| 176 | } |
| 177 | |
| 178 | if (signal_pending(current)) { |
| 179 | ret = -EINTR; |
| 180 | break; |
| 181 | } |
| 182 | |
| 183 | schedule(); |
| 184 | } |
| 185 | |
| 186 | finish_wait(&log->wq, &wait); |
| 187 | if (ret) |
| 188 | return ret; |
| 189 | |
| 190 | mutex_lock(&log->mutex); |
| 191 | |
| 192 | /* is there still something to read or did we race? */ |
| 193 | if (unlikely(log->w_off == reader->r_off)) { |
| 194 | mutex_unlock(&log->mutex); |
| 195 | goto start; |
| 196 | } |
| 197 | |
| 198 | /* get the size of the next entry */ |
| 199 | ret = get_entry_len(log, reader->r_off); |
| 200 | if (count < ret) { |
| 201 | ret = -EINVAL; |
| 202 | goto out; |
| 203 | } |
| 204 | |
| 205 | /* get exactly one entry from the log */ |
| 206 | ret = do_read_log_to_user(log, reader, buf, ret); |
| 207 | |
| 208 | out: |
| 209 | mutex_unlock(&log->mutex); |
| 210 | |
| 211 | return ret; |
| 212 | } |
| 213 | |
| 214 | /* |
| 215 | * get_next_entry - return the offset of the first valid entry at least 'len' |
| 216 | * bytes after 'off'. |
| 217 | * |
| 218 | * Caller must hold log->mutex. |
| 219 | */ |
| 220 | static size_t get_next_entry(struct logger_log *log, size_t off, size_t len) |
| 221 | { |
| 222 | size_t count = 0; |
| 223 | |
| 224 | do { |
| 225 | size_t nr = get_entry_len(log, off); |
| 226 | off = logger_offset(off + nr); |
| 227 | count += nr; |
| 228 | } while (count < len); |
| 229 | |
| 230 | return off; |
| 231 | } |
| 232 | |
| 233 | /* |
| 234 | * clock_interval - is a < c < b in mod-space? Put another way, does the line |
| 235 | * from a to b cross c? |
| 236 | */ |
| 237 | static inline int clock_interval(size_t a, size_t b, size_t c) |
| 238 | { |
| 239 | if (b < a) { |
| 240 | if (a < c || b >= c) |
| 241 | return 1; |
| 242 | } else { |
| 243 | if (a < c && b >= c) |
| 244 | return 1; |
| 245 | } |
| 246 | |
| 247 | return 0; |
| 248 | } |
| 249 | |
| 250 | /* |
| 251 | * fix_up_readers - walk the list of all readers and "fix up" any who were |
| 252 | * lapped by the writer; also do the same for the default "start head". |
| 253 | * We do this by "pulling forward" the readers and start head to the first |
| 254 | * entry after the new write head. |
| 255 | * |
| 256 | * The caller needs to hold log->mutex. |
| 257 | */ |
| 258 | static void fix_up_readers(struct logger_log *log, size_t len) |
| 259 | { |
| 260 | size_t old = log->w_off; |
| 261 | size_t new = logger_offset(old + len); |
| 262 | struct logger_reader *reader; |
| 263 | |
| 264 | if (clock_interval(old, new, log->head)) |
| 265 | log->head = get_next_entry(log, log->head, len); |
| 266 | |
| 267 | list_for_each_entry(reader, &log->readers, list) |
| 268 | if (clock_interval(old, new, reader->r_off)) |
| 269 | reader->r_off = get_next_entry(log, reader->r_off, len); |
| 270 | } |
| 271 | |
| 272 | /* |
| 273 | * do_write_log - writes 'len' bytes from 'buf' to 'log' |
| 274 | * |
| 275 | * The caller needs to hold log->mutex. |
| 276 | */ |
| 277 | static void do_write_log(struct logger_log *log, const void *buf, size_t count) |
| 278 | { |
| 279 | size_t len; |
| 280 | |
| 281 | len = min(count, log->size - log->w_off); |
| 282 | memcpy(log->buffer + log->w_off, buf, len); |
| 283 | |
| 284 | if (count != len) |
| 285 | memcpy(log->buffer, buf + len, count - len); |
| 286 | |
| 287 | log->w_off = logger_offset(log->w_off + count); |
| 288 | |
| 289 | } |
| 290 | |
| 291 | /* |
| 292 | * do_write_log_user - writes 'len' bytes from the user-space buffer 'buf' to |
| 293 | * the log 'log' |
| 294 | * |
| 295 | * The caller needs to hold log->mutex. |
| 296 | * |
| 297 | * Returns 'count' on success, negative error code on failure. |
| 298 | */ |
| 299 | static ssize_t do_write_log_from_user(struct logger_log *log, |
| 300 | const void __user *buf, size_t count) |
| 301 | { |
| 302 | size_t len; |
| 303 | |
| 304 | len = min(count, log->size - log->w_off); |
| 305 | if (len && copy_from_user(log->buffer + log->w_off, buf, len)) |
| 306 | return -EFAULT; |
| 307 | |
| 308 | if (count != len) |
| 309 | if (copy_from_user(log->buffer, buf + len, count - len)) |
| 310 | return -EFAULT; |
| 311 | |
| 312 | log->w_off = logger_offset(log->w_off + count); |
| 313 | |
| 314 | return count; |
| 315 | } |
| 316 | |
| 317 | /* |
| 318 | * logger_aio_write - our write method, implementing support for write(), |
| 319 | * writev(), and aio_write(). Writes are our fast path, and we try to optimize |
| 320 | * them above all else. |
| 321 | */ |
| 322 | ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, |
| 323 | unsigned long nr_segs, loff_t ppos) |
| 324 | { |
| 325 | struct logger_log *log = file_get_log(iocb->ki_filp); |
| 326 | size_t orig = log->w_off; |
| 327 | struct logger_entry header; |
| 328 | struct timespec now; |
| 329 | ssize_t ret = 0; |
| 330 | |
| 331 | now = current_kernel_time(); |
| 332 | |
| 333 | header.pid = current->tgid; |
| 334 | header.tid = current->pid; |
| 335 | header.sec = now.tv_sec; |
| 336 | header.nsec = now.tv_nsec; |
| 337 | header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD); |
| 338 | |
| 339 | /* null writes succeed, return zero */ |
| 340 | if (unlikely(!header.len)) |
| 341 | return 0; |
| 342 | |
| 343 | mutex_lock(&log->mutex); |
| 344 | |
| 345 | /* |
| 346 | * Fix up any readers, pulling them forward to the first readable |
| 347 | * entry after (what will be) the new write offset. We do this now |
| 348 | * because if we partially fail, we can end up with clobbered log |
| 349 | * entries that encroach on readable buffer. |
| 350 | */ |
| 351 | fix_up_readers(log, sizeof(struct logger_entry) + header.len); |
| 352 | |
| 353 | do_write_log(log, &header, sizeof(struct logger_entry)); |
| 354 | |
| 355 | while (nr_segs-- > 0) { |
| 356 | size_t len; |
| 357 | ssize_t nr; |
| 358 | |
| 359 | /* figure out how much of this vector we can keep */ |
| 360 | len = min_t(size_t, iov->iov_len, header.len - ret); |
| 361 | |
| 362 | /* write out this segment's payload */ |
| 363 | nr = do_write_log_from_user(log, iov->iov_base, len); |
| 364 | if (unlikely(nr < 0)) { |
| 365 | log->w_off = orig; |
| 366 | mutex_unlock(&log->mutex); |
| 367 | return nr; |
| 368 | } |
| 369 | |
| 370 | iov++; |
| 371 | ret += nr; |
| 372 | } |
| 373 | |
| 374 | mutex_unlock(&log->mutex); |
| 375 | |
| 376 | /* wake up any blocked readers */ |
| 377 | wake_up_interruptible(&log->wq); |
| 378 | |
| 379 | return ret; |
| 380 | } |
| 381 | |
| 382 | static struct logger_log * get_log_from_minor(int); |
| 383 | |
| 384 | /* |
| 385 | * logger_open - the log's open() file operation |
| 386 | * |
| 387 | * Note how near a no-op this is in the write-only case. Keep it that way! |
| 388 | */ |
| 389 | static int logger_open(struct inode *inode, struct file *file) |
| 390 | { |
| 391 | struct logger_log *log; |
| 392 | int ret; |
| 393 | |
| 394 | ret = nonseekable_open(inode, file); |
| 395 | if (ret) |
| 396 | return ret; |
| 397 | |
| 398 | log = get_log_from_minor(MINOR(inode->i_rdev)); |
| 399 | if (!log) |
| 400 | return -ENODEV; |
| 401 | |
| 402 | if (file->f_mode & FMODE_READ) { |
| 403 | struct logger_reader *reader; |
| 404 | |
| 405 | reader = kmalloc(sizeof(struct logger_reader), GFP_KERNEL); |
| 406 | if (!reader) |
| 407 | return -ENOMEM; |
| 408 | |
| 409 | reader->log = log; |
| 410 | INIT_LIST_HEAD(&reader->list); |
| 411 | |
| 412 | mutex_lock(&log->mutex); |
| 413 | reader->r_off = log->head; |
| 414 | list_add_tail(&reader->list, &log->readers); |
| 415 | mutex_unlock(&log->mutex); |
| 416 | |
| 417 | file->private_data = reader; |
| 418 | } else |
| 419 | file->private_data = log; |
| 420 | |
| 421 | return 0; |
| 422 | } |
| 423 | |
| 424 | /* |
| 425 | * logger_release - the log's release file operation |
| 426 | * |
| 427 | * Note this is a total no-op in the write-only case. Keep it that way! |
| 428 | */ |
| 429 | static int logger_release(struct inode *ignored, struct file *file) |
| 430 | { |
| 431 | if (file->f_mode & FMODE_READ) { |
| 432 | struct logger_reader *reader = file->private_data; |
| 433 | list_del(&reader->list); |
| 434 | kfree(reader); |
| 435 | } |
| 436 | |
| 437 | return 0; |
| 438 | } |
| 439 | |
| 440 | /* |
| 441 | * logger_poll - the log's poll file operation, for poll/select/epoll |
| 442 | * |
| 443 | * Note we always return POLLOUT, because you can always write() to the log. |
| 444 | * Note also that, strictly speaking, a return value of POLLIN does not |
| 445 | * guarantee that the log is readable without blocking, as there is a small |
| 446 | * chance that the writer can lap the reader in the interim between poll() |
| 447 | * returning and the read() request. |
| 448 | */ |
| 449 | static unsigned int logger_poll(struct file *file, poll_table *wait) |
| 450 | { |
| 451 | struct logger_reader *reader; |
| 452 | struct logger_log *log; |
| 453 | unsigned int ret = POLLOUT | POLLWRNORM; |
| 454 | |
| 455 | if (!(file->f_mode & FMODE_READ)) |
| 456 | return ret; |
| 457 | |
| 458 | reader = file->private_data; |
| 459 | log = reader->log; |
| 460 | |
| 461 | poll_wait(file, &log->wq, wait); |
| 462 | |
| 463 | mutex_lock(&log->mutex); |
| 464 | if (log->w_off != reader->r_off) |
| 465 | ret |= POLLIN | POLLRDNORM; |
| 466 | mutex_unlock(&log->mutex); |
| 467 | |
| 468 | return ret; |
| 469 | } |
| 470 | |
| 471 | static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
| 472 | { |
| 473 | struct logger_log *log = file_get_log(file); |
| 474 | struct logger_reader *reader; |
| 475 | long ret = -ENOTTY; |
| 476 | |
| 477 | mutex_lock(&log->mutex); |
| 478 | |
| 479 | switch (cmd) { |
| 480 | case LOGGER_GET_LOG_BUF_SIZE: |
| 481 | ret = log->size; |
| 482 | break; |
| 483 | case LOGGER_GET_LOG_LEN: |
| 484 | if (!(file->f_mode & FMODE_READ)) { |
| 485 | ret = -EBADF; |
| 486 | break; |
| 487 | } |
| 488 | reader = file->private_data; |
| 489 | if (log->w_off >= reader->r_off) |
| 490 | ret = log->w_off - reader->r_off; |
| 491 | else |
| 492 | ret = (log->size - reader->r_off) + log->w_off; |
| 493 | break; |
| 494 | case LOGGER_GET_NEXT_ENTRY_LEN: |
| 495 | if (!(file->f_mode & FMODE_READ)) { |
| 496 | ret = -EBADF; |
| 497 | break; |
| 498 | } |
| 499 | reader = file->private_data; |
| 500 | if (log->w_off != reader->r_off) |
| 501 | ret = get_entry_len(log, reader->r_off); |
| 502 | else |
| 503 | ret = 0; |
| 504 | break; |
| 505 | case LOGGER_FLUSH_LOG: |
| 506 | if (!(file->f_mode & FMODE_WRITE)) { |
| 507 | ret = -EBADF; |
| 508 | break; |
| 509 | } |
| 510 | list_for_each_entry(reader, &log->readers, list) |
| 511 | reader->r_off = log->w_off; |
| 512 | log->head = log->w_off; |
| 513 | ret = 0; |
| 514 | break; |
| 515 | } |
| 516 | |
| 517 | mutex_unlock(&log->mutex); |
| 518 | |
| 519 | return ret; |
| 520 | } |
| 521 | |
| 522 | static struct file_operations logger_fops = { |
| 523 | .owner = THIS_MODULE, |
| 524 | .read = logger_read, |
| 525 | .aio_write = logger_aio_write, |
| 526 | .poll = logger_poll, |
| 527 | .unlocked_ioctl = logger_ioctl, |
| 528 | .compat_ioctl = logger_ioctl, |
| 529 | .open = logger_open, |
| 530 | .release = logger_release, |
| 531 | }; |
| 532 | |
| 533 | /* |
| 534 | * Defines a log structure with name 'NAME' and a size of 'SIZE' bytes, which |
| 535 | * must be a power of two, greater than LOGGER_ENTRY_MAX_LEN, and less than |
| 536 | * LONG_MAX minus LOGGER_ENTRY_MAX_LEN. |
| 537 | */ |
| 538 | #define DEFINE_LOGGER_DEVICE(VAR, NAME, SIZE) \ |
| 539 | static unsigned char _buf_ ## VAR[SIZE]; \ |
| 540 | static struct logger_log VAR = { \ |
| 541 | .buffer = _buf_ ## VAR, \ |
| 542 | .misc = { \ |
| 543 | .minor = MISC_DYNAMIC_MINOR, \ |
| 544 | .name = NAME, \ |
| 545 | .fops = &logger_fops, \ |
| 546 | .parent = NULL, \ |
| 547 | }, \ |
| 548 | .wq = __WAIT_QUEUE_HEAD_INITIALIZER(VAR .wq), \ |
| 549 | .readers = LIST_HEAD_INIT(VAR .readers), \ |
| 550 | .mutex = __MUTEX_INITIALIZER(VAR .mutex), \ |
| 551 | .w_off = 0, \ |
| 552 | .head = 0, \ |
| 553 | .size = SIZE, \ |
| 554 | }; |
| 555 | |
| 556 | DEFINE_LOGGER_DEVICE(log_main, LOGGER_LOG_MAIN, 64*1024) |
| 557 | DEFINE_LOGGER_DEVICE(log_events, LOGGER_LOG_EVENTS, 256*1024) |
| 558 | DEFINE_LOGGER_DEVICE(log_radio, LOGGER_LOG_RADIO, 64*1024) |
| 559 | |
| 560 | static struct logger_log * get_log_from_minor(int minor) |
| 561 | { |
| 562 | if (log_main.misc.minor == minor) |
| 563 | return &log_main; |
| 564 | if (log_events.misc.minor == minor) |
| 565 | return &log_events; |
| 566 | if (log_radio.misc.minor == minor) |
| 567 | return &log_radio; |
| 568 | return NULL; |
| 569 | } |
| 570 | |
| 571 | static int __init init_log(struct logger_log *log) |
| 572 | { |
| 573 | int ret; |
| 574 | |
| 575 | ret = misc_register(&log->misc); |
| 576 | if (unlikely(ret)) { |
| 577 | printk(KERN_ERR "logger: failed to register misc " |
| 578 | "device for log '%s'!\n", log->misc.name); |
| 579 | return ret; |
| 580 | } |
| 581 | |
| 582 | printk(KERN_INFO "logger: created %luK log '%s'\n", |
| 583 | (unsigned long) log->size >> 10, log->misc.name); |
| 584 | |
| 585 | return 0; |
| 586 | } |
| 587 | |
| 588 | static int __init logger_init(void) |
| 589 | { |
| 590 | int ret; |
| 591 | |
| 592 | ret = init_log(&log_main); |
| 593 | if (unlikely(ret)) |
| 594 | goto out; |
| 595 | |
| 596 | ret = init_log(&log_events); |
| 597 | if (unlikely(ret)) |
| 598 | goto out; |
| 599 | |
| 600 | ret = init_log(&log_radio); |
| 601 | if (unlikely(ret)) |
| 602 | goto out; |
| 603 | |
| 604 | out: |
| 605 | return ret; |
| 606 | } |
| 607 | device_initcall(logger_init); |