ChangeSet 1.1276.8.8, 2004/01/28 12:40:17-08:00, david-b@pacbell.net [PATCH] USB: ehci update: 3/3, highspeed iso rewrite David Brownell wrote: > This is an updated version of a patch submitted to me from > Michal Sojka , basically providing a > much-needed rewrite of the highspeed ISO support. I updated > the scheduling and made it a closer match to how OHCI works; > and also tested it a bunch. > > So far it seems most of the requests for highspeed ISO support > have been for realtime data collection -- custom apps, nothing > a mainstream kernel would ship with. The USB Video class is > now defined; highspeed video will also need these updates. > > Key changes: > > - Define an "iso_stream" head for iso endpoints. This acts > enough like a QH that endpoint_disable() works. It holds the > queue of ITDs, and the endpoint's current schedule state. > And it's easy to find (spinlocked array access), no search. > > - Uses a temporary "itd_sched" while submitting each URB, with > not-yet-linked ITDs and request-specific metadata. There's > a per-stream cache of ITDs, so resubmitting ISO urbs (to > achieve a "ring" of transfers) is typically cheap. > > - Scheduling for most URBs is almost a NOP: just a sanity > check to make sure there's no need to reschedule, and then > just link into the schedule at the current schedule slot. > (The previous code was a gross hack that didn't even work > reasonably with more than two URBs queued.) > > - Is a reasonable model to use with full speed ISO transfers. > (They need additional TT scheduling hooks, most of which > are already written but not merged.) > > - Handles several cases the previous code didn't, including > high bandwidth transfers (loads up to 24 MByte/sec) > > - Has had more testing than the old code, including 20+ hour > successful IN+OUT runs, more varied transfer intervals and > maxpacket sizes. (Using net2280 and a gadgetfs driver.) > > So it's worth replacing the existing code with this; there > aren't too many rough edges, and it's much more fixable than > the previous version. > > Please merge, this goes on top of the preceding two patches. > > - Dave > > p.s. Many thanks, Michal! > > And here's a version of this for 2.4.24pre. Differences include again usb device refcounting (affects only a debug message here), and handling of what endpoint_disable() does in 2.6 ... basically usb device drivers in 2.4 that don't disconnect() by waiting for all URBs have always been oops-prone. This doesn't try to improve on that by doing more than reporting that driver problem. drivers/usb/host/ehci-dbg.c | 30 + drivers/usb/host/ehci-hcd.c | 15 drivers/usb/host/ehci-sched.c | 870 +++++++++++++++++++++++++++--------------- drivers/usb/host/ehci.h | 71 +++ 4 files changed, 668 insertions(+), 318 deletions(-) diff -Nru a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c --- a/drivers/usb/host/ehci-dbg.c Wed Jan 28 13:36:17 2004 +++ b/drivers/usb/host/ehci-dbg.c Wed Jan 28 13:36:17 2004 @@ -150,6 +150,36 @@ dbg_qtd ("overlay", ehci, (struct ehci_qtd *) &qh->hw_qtd_next); } +static void __attribute__((__unused__)) +dbg_itd (const char *label, struct ehci_hcd *ehci, struct ehci_itd *itd) +{ + ehci_dbg (ehci, "%s [%d] itd %p, next %08x, urb %p\n", + label, itd->frame, itd, le32_to_cpu(itd->hw_next), itd->urb); + ehci_dbg (ehci, + " trans: %08x %08x %08x %08x %08x %08x %08x %08x\n", + le32_to_cpu(itd->hw_transaction[0]), + le32_to_cpu(itd->hw_transaction[1]), + le32_to_cpu(itd->hw_transaction[2]), + le32_to_cpu(itd->hw_transaction[3]), + le32_to_cpu(itd->hw_transaction[4]), + le32_to_cpu(itd->hw_transaction[5]), + le32_to_cpu(itd->hw_transaction[6]), + le32_to_cpu(itd->hw_transaction[7])); + ehci_dbg (ehci, + " buf: %08x %08x %08x %08x %08x %08x %08x\n", + le32_to_cpu(itd->hw_bufp[0]), + le32_to_cpu(itd->hw_bufp[1]), + le32_to_cpu(itd->hw_bufp[2]), + le32_to_cpu(itd->hw_bufp[3]), + le32_to_cpu(itd->hw_bufp[4]), + le32_to_cpu(itd->hw_bufp[5]), + le32_to_cpu(itd->hw_bufp[6])); + ehci_dbg (ehci, " index: %d %d %d %d %d %d %d %d\n", + itd->index[0], itd->index[1], itd->index[2], + itd->index[3], itd->index[4], itd->index[5], + itd->index[6], itd->index[7]); +} + static int __attribute__((__unused__)) dbg_status_buf (char *buf, unsigned len, char *label, u32 status) { diff -Nru a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c --- a/drivers/usb/host/ehci-hcd.c Wed Jan 28 13:36:17 2004 +++ b/drivers/usb/host/ehci-hcd.c Wed Jan 28 13:36:17 2004 @@ -68,6 +68,9 @@ * * HISTORY: * + * 2003-12-29 Rewritten high speed iso transfer support (by Michal Sojka, + * , updates by DB). + * * 2002-11-29 Correct handling for hw async_next register. * 2002-08-06 Handling for bulk and interrupt transfers is mostly shared; * only scheduling is different, no arbitrary limitations. @@ -91,7 +94,7 @@ * 2001-June Works with usb-storage and NEC EHCI on 2.4 */ -#define DRIVER_VERSION "2003-Jun-19/2.4" +#define DRIVER_VERSION "2003-Dec-29/2.4" #define DRIVER_AUTHOR "David Brownell" #define DRIVER_DESC "USB 2.0 'Enhanced' Host Controller (EHCI) Driver" @@ -897,8 +900,16 @@ struct ehci_qh *qh; char *why; - /* dev->ep never has ITDs or SITDs */ + /* dev->ep is a QH unless info1.maxpacket of zero + * marks an iso stream head. + * FIXME do something smarter here with ISO + */ qh = (struct ehci_qh *) dev->ep [i]; + if (qh->hw_info1 == 0) { + ehci_err (ehci, "no iso cleanup!!\n"); + continue; + } + /* detect/report non-recoverable errors */ if (in_interrupt ()) diff -Nru a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c --- a/drivers/usb/host/ehci-sched.c Wed Jan 28 13:36:17 2004 +++ b/drivers/usb/host/ehci-sched.c Wed Jan 28 13:36:17 2004 @@ -1,5 +1,6 @@ /* - * Copyright (c) 2001-2002 by David Brownell + * Copyright (c) 2001-2003 by David Brownell + * Copyright (c) 2003 Michal Sojka, for high-speed iso transfers * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -27,10 +28,10 @@ * Note that for interrupt transfers, the QH/QTD manipulation is shared * with the "asynchronous" transaction support (control/bulk transfers). * The only real difference is in how interrupt transfers are scheduled. - * We get some funky API restrictions from the current URB model, which - * works notably better for reading transfers than for writing. (And - * which accordingly needs to change before it'll work inside devices, - * or with "USB On The Go" additions to USB 2.0 ...) + * + * For ISO, we make an "iso_stream" head to serve the same role as a QH. + * It keeps track of every ITD (or SITD) that's linked, and holds enough + * pre-calculated schedule data to make appending to the queue be quick. */ static int ehci_get_frame (struct usb_hcd *hcd); @@ -126,9 +127,7 @@ q = &q->fstn->fstn_next; break; case Q_TYPE_ITD: - /* NOTE the "one uframe per itd" policy */ - if (q->itd->hw_transaction [uframe] != 0) - usecs += q->itd->usecs; + usecs += q->itd->usecs [uframe]; q = &q->itd->itd_next; break; #ifdef have_split_iso @@ -520,348 +519,565 @@ /*-------------------------------------------------------------------------*/ -static void -itd_free_list (struct ehci_hcd *ehci, struct urb *urb) +static inline struct ehci_iso_stream * +iso_stream_alloc (int mem_flags) { - struct ehci_itd *first_itd = urb->hcpriv; + struct ehci_iso_stream *stream; - while (!list_empty (&first_itd->itd_list)) { - struct ehci_itd *itd; - - itd = list_entry ( - first_itd->itd_list.next, - struct ehci_itd, itd_list); - list_del (&itd->itd_list); - pci_pool_free (ehci->itd_pool, itd, itd->itd_dma); + stream = kmalloc(sizeof *stream, mem_flags); + if (likely (stream != 0)) { + memset (stream, 0, sizeof(*stream)); + INIT_LIST_HEAD(&stream->itd_list); + INIT_LIST_HEAD(&stream->free_itd_list); + stream->next_uframe = -1; + stream->refcount = 1; } - pci_pool_free (ehci->itd_pool, first_itd, first_itd->itd_dma); - urb->hcpriv = 0; + return stream; } -static int -itd_fill ( - struct ehci_hcd *ehci, - struct ehci_itd *itd, - struct urb *urb, - unsigned index, // urb->iso_frame_desc [index] - dma_addr_t dma // mapped transfer buffer -) { - u64 temp; - u32 buf1; - unsigned i, epnum, maxp, multi; - unsigned length; - int is_input; - - itd->hw_next = EHCI_LIST_END; - itd->urb = urb; - itd->index = index; - - /* tell itd about its transfer buffer, max 2 pages */ - length = urb->iso_frame_desc [index].length; - dma += urb->iso_frame_desc [index].offset; - temp = dma & ~0x0fff; - for (i = 0; i < 2; i++) { - itd->hw_bufp [i] = cpu_to_le32 ((u32) temp); - itd->hw_bufp_hi [i] = cpu_to_le32 ((u32)(temp >> 32)); - temp += 0x1000; - } - itd->buf_dma = dma; +static inline void +iso_stream_init ( + struct ehci_iso_stream *stream, + struct usb_device *dev, + int pipe, + unsigned interval +) +{ + u32 buf1; + unsigned epnum, maxp, multi; + int is_input; + long bandwidth; /* * this might be a "high bandwidth" highspeed endpoint, - * as encoded in the ep descriptor's maxpacket field + * as encoded in the ep descriptor's wMaxPacket field */ - epnum = usb_pipeendpoint (urb->pipe); - is_input = usb_pipein (urb->pipe); + epnum = usb_pipeendpoint (pipe); + is_input = usb_pipein (pipe) ? USB_DIR_IN : 0; if (is_input) { - maxp = urb->dev->epmaxpacketin [epnum]; + maxp = dev->epmaxpacketin [epnum]; buf1 = (1 << 11); } else { - maxp = urb->dev->epmaxpacketout [epnum]; + maxp = dev->epmaxpacketout [epnum]; buf1 = 0; } - buf1 |= (maxp & 0x03ff); - multi = 1; - multi += (maxp >> 11) & 0x03; - maxp &= 0x03ff; + + multi = hb_mult(maxp); + maxp = max_packet(maxp); + buf1 |= maxp; maxp *= multi; - /* transfer can't fit in any uframe? */ - if (length < 0 || maxp < length) { - dbg ("BAD iso packet: %d bytes, max %d, urb %p [%d] (of %d)", - length, maxp, urb, index, - urb->iso_frame_desc [index].length); - return -ENOSPC; - } - itd->usecs = usb_calc_bus_time (USB_SPEED_HIGH, is_input, 1, length); - - /* "plus" info in low order bits of buffer pointers */ - itd->hw_bufp [0] |= cpu_to_le32 ((epnum << 8) | urb->dev->devnum); - itd->hw_bufp [1] |= cpu_to_le32 (buf1); - itd->hw_bufp [2] |= cpu_to_le32 (multi); - - /* figure hw_transaction[] value (it's scheduled later) */ - itd->transaction = EHCI_ISOC_ACTIVE; - itd->transaction |= dma & 0x0fff; /* offset; buffer=0 */ - if ((index + 1) == urb->number_of_packets) - itd->transaction |= EHCI_ITD_IOC; /* end-of-urb irq */ - itd->transaction |= length << 16; - cpu_to_le32s (&itd->transaction); + stream->dev = (struct hcd_dev *)dev->hcpriv; + + stream->bEndpointAddress = is_input | epnum; + stream->interval = interval; + stream->maxp = maxp; + + stream->buf0 = cpu_to_le32 ((epnum << 8) | dev->devnum); + stream->buf1 = cpu_to_le32 (buf1); + stream->buf2 = cpu_to_le32 (multi); + + /* usbfs wants to report the average usecs per frame tied up + * when transfers on this endpoint are scheduled ... + */ + stream->usecs = HS_USECS_ISO (maxp); + bandwidth = stream->usecs * 8; + bandwidth /= 1 << (interval - 1); + stream->bandwidth = bandwidth; +} + +static void +iso_stream_put(struct ehci_hcd *ehci, struct ehci_iso_stream *stream) +{ + stream->refcount--; + + /* free whenever just a dev->ep reference remains. + * not like a QH -- no persistent state (toggle, halt) + */ + if (stream->refcount == 1) { + int is_in; + + // BUG_ON (!list_empty(&stream->itd_list)); + + while (!list_empty (&stream->free_itd_list)) { + struct ehci_itd *itd; + + itd = list_entry (stream->free_itd_list.next, + struct ehci_itd, itd_list); + list_del (&itd->itd_list); + pci_pool_free (ehci->itd_pool, itd, itd->itd_dma); + } + + is_in = (stream->bEndpointAddress & USB_DIR_IN) ? 0x10 : 0; + stream->bEndpointAddress &= 0x0f; + stream->dev->ep [is_in + stream->bEndpointAddress] = 0; + + if (stream->rescheduled) { + ehci_info (ehci, "ep%d%s-iso rescheduled " + "%lu times in %lu seconds\n", + stream->bEndpointAddress, is_in ? "in" : "out", + stream->rescheduled, + ((jiffies - stream->start)/HZ) + ); + } + + kfree(stream); + } +} + +static inline struct ehci_iso_stream * +iso_stream_get (struct ehci_iso_stream *stream) +{ + if (likely (stream != 0)) + stream->refcount++; + return stream; +} + +static struct ehci_iso_stream * +iso_stream_find (struct ehci_hcd *ehci, struct urb *urb) +{ + unsigned epnum; + struct hcd_dev *dev; + struct ehci_iso_stream *stream; + unsigned long flags; + + epnum = usb_pipeendpoint (urb->pipe); + if (usb_pipein(urb->pipe)) + epnum += 0x10; + + spin_lock_irqsave (&ehci->lock, flags); + + dev = (struct hcd_dev *)urb->dev->hcpriv; + stream = dev->ep [epnum]; + + if (unlikely (stream == 0)) { + stream = iso_stream_alloc(GFP_ATOMIC); + if (likely (stream != 0)) { + /* dev->ep owns the initial refcount */ + dev->ep[epnum] = stream; + iso_stream_init(stream, urb->dev, urb->pipe, + urb->interval); + } + + /* if dev->ep [epnum] is a QH, info1.maxpacket is nonzero */ + } else if (unlikely (stream->hw_info1 != 0)) { + ehci_dbg (ehci, "dev %s ep%d%s, not iso??\n", + urb->dev->devpath, epnum & 0x0f, + (epnum & 0x10) ? "in" : "out"); + stream = 0; + } + + /* caller guarantees an eventual matching iso_stream_put */ + stream = iso_stream_get (stream); + + spin_unlock_irqrestore (&ehci->lock, flags); + return stream; +} + +/*-------------------------------------------------------------------------*/ + +static inline struct ehci_itd_sched * +itd_sched_alloc (unsigned packets, int mem_flags) +{ + struct ehci_itd_sched *itd_sched; + int size = sizeof *itd_sched; + + size += packets * sizeof (struct ehci_iso_uframe); + itd_sched = kmalloc (size, mem_flags); + if (likely (itd_sched != 0)) { + memset(itd_sched, 0, size); + INIT_LIST_HEAD (&itd_sched->itd_list); + } + return itd_sched; +} +static int +itd_sched_init ( + struct ehci_itd_sched *itd_sched, + struct ehci_iso_stream *stream, + struct urb *urb +) +{ + unsigned i; + dma_addr_t dma = urb->transfer_dma; + + /* how many uframes are needed for these transfers */ + itd_sched->span = urb->number_of_packets * stream->interval; + + /* figure out per-uframe itd fields that we'll need later + * when we fit new itds into the schedule. + */ + for (i = 0; i < urb->number_of_packets; i++) { + struct ehci_iso_uframe *uframe = &itd_sched->packet [i]; + unsigned length; + dma_addr_t buf; + u32 trans; + + length = urb->iso_frame_desc [i].length; + buf = dma + urb->iso_frame_desc [i].offset; + + trans = EHCI_ISOC_ACTIVE; + trans |= buf & 0x0fff; + if (unlikely ((i + 1) == urb->number_of_packets)) + trans |= EHCI_ITD_IOC; + trans |= length << 16; + uframe->transaction = cpu_to_le32 (trans); + + /* might need to cross a buffer page within a td */ + uframe->bufp = (buf & ~(u64)0x0fff); + buf += length; + if (unlikely ((uframe->bufp != (buf & ~(u64)0x0fff)))) + uframe->cross = 1; + } return 0; } +static void +itd_sched_free ( + struct ehci_iso_stream *stream, + struct ehci_itd_sched *itd_sched +) +{ + list_splice (&itd_sched->itd_list, &stream->free_itd_list); + kfree (itd_sched); +} + static int itd_urb_transaction ( + struct ehci_iso_stream *stream, struct ehci_hcd *ehci, struct urb *urb, int mem_flags -) { - int frame_index; - struct ehci_itd *first_itd, *itd; +) +{ + struct ehci_itd *itd; int status; dma_addr_t itd_dma; + int i; + unsigned num_itds; + struct ehci_itd_sched *itd_sched; + + itd_sched = itd_sched_alloc (urb->number_of_packets, mem_flags); + if (unlikely (itd_sched == 0)) + return -ENOMEM; + + status = itd_sched_init (itd_sched, stream, urb); + if (unlikely (status != 0)) { + itd_sched_free (stream, itd_sched); + return status; + } + + if (urb->interval < 8) + num_itds = 1 + (itd_sched->span + 7) / 8; + else + num_itds = urb->number_of_packets; /* allocate/init ITDs */ - for (frame_index = 0, first_itd = 0; - frame_index < urb->number_of_packets; - frame_index++) { - itd = pci_pool_alloc (ehci->itd_pool, mem_flags, &itd_dma); - if (!itd) { - status = -ENOMEM; - goto fail; + for (i = 0; i < num_itds; i++) { + + /* free_itd_list.next might be cache-hot ... but maybe + * the HC caches it too. avoid that issue for now. + */ + + /* prefer previously-allocated itds */ + if (likely (!list_empty(&stream->free_itd_list))) { + itd = list_entry (stream->free_itd_list.prev, + struct ehci_itd, itd_list); + list_del (&itd->itd_list); + itd_dma = itd->itd_dma; + } else + itd = pci_pool_alloc (ehci->itd_pool, mem_flags, + &itd_dma); + + if (unlikely (0 == itd)) { + itd_sched_free (stream, itd_sched); + return -ENOMEM; } memset (itd, 0, sizeof *itd); itd->itd_dma = itd_dma; - - status = itd_fill (ehci, itd, urb, frame_index, - urb->transfer_dma); - if (status != 0) - goto fail; - - if (first_itd) - list_add_tail (&itd->itd_list, - &first_itd->itd_list); - else { - INIT_LIST_HEAD (&itd->itd_list); - urb->hcpriv = first_itd = itd; - } + list_add (&itd->itd_list, &itd_sched->itd_list); } + + /* temporarily store schedule info in hcpriv */ + urb->hcpriv = itd_sched; urb->error_count = 0; return 0; - -fail: - if (urb->hcpriv) - itd_free_list (ehci, urb); - return status; -} - -/*-------------------------------------------------------------------------*/ - -static inline void -itd_link (struct ehci_hcd *ehci, unsigned frame, struct ehci_itd *itd) -{ - /* always prepend ITD/SITD ... only QH tree is order-sensitive */ - itd->itd_next = ehci->pshadow [frame]; - itd->hw_next = ehci->periodic [frame]; - ehci->pshadow [frame].itd = itd; - ehci->periodic [frame] = cpu_to_le32 (itd->itd_dma) | Q_TYPE_ITD; } /* - * return zero on success, else -errno - * - start holds first uframe to start scheduling into - * - max is the first uframe it's NOT (!) OK to start scheduling into - * math to be done modulo "mod" (ehci->periodic_size << 3) + * This scheduler plans almost as far into the future as it has actual + * periodic schedule slots. (Affected by TUNE_FLS, which defaults to + * "as small as possible" to be cache-friendlier.) That limits the size + * transfers you can stream reliably; avoid more than 64 msec per urb. + * Also avoid queue depths of less than the system's worst irq latency. */ -static int get_iso_range ( + +#define SCHEDULE_SLOP 10 /* frames */ + +static int +itd_stream_schedule ( struct ehci_hcd *ehci, struct urb *urb, - unsigned *start, - unsigned *max, - unsigned mod -) { - struct list_head *lh; - struct hcd_dev *dev = urb->dev->hcpriv; - int last = -1; - unsigned now, span, end; - - span = urb->interval * urb->number_of_packets; - - /* first see if we know when the next transfer SHOULD happen */ - list_for_each (lh, &dev->urb_list) { - struct urb *u; - struct ehci_itd *itd; - unsigned s; + struct ehci_iso_stream *stream +) +{ + u32 now, start, end, max; + int status; + unsigned mod = ehci->periodic_size << 3; + struct ehci_itd_sched *itd_sched = urb->hcpriv; - u = list_entry (lh, struct urb, urb_list); - if (u == urb || u->pipe != urb->pipe) - continue; - if (u->interval != urb->interval) { /* must not change! */ - dbg ("urb %p interval %d ... != %p interval %d", - u, u->interval, urb, urb->interval); - return -EINVAL; - } - - /* URB for this endpoint... covers through when? */ - itd = urb->hcpriv; - s = itd->uframe + u->interval * u->number_of_packets; - if (last < 0) - last = s; - else { - /* - * So far we can only queue two ISO URBs... - * - * FIXME do interval math, figure out whether - * this URB is "before" or not ... also, handle - * the case where the URB might have completed, - * but hasn't yet been processed. - */ - dbg ("NYET: queue >2 URBs per ISO endpoint"); - return -EDOM; - } + if (unlikely (itd_sched->span > (mod - 8 * SCHEDULE_SLOP))) { + ehci_dbg (ehci, "iso request %p too long\n", urb); + status = -EFBIG; + goto fail; } - /* calculate the legal range [start,max) */ - now = readl (&ehci->regs->frame_index) + 1; /* next uframe */ - if (!ehci->periodic_sched) - now += 8; /* startup delay */ - now %= mod; - end = now + mod; - if (last < 0) { - *start = now + ehci->i_thresh + /* paranoia */ 1; - *max = end - span; - if (*max < *start + 1) - *max = *start + 1; - } else { - *start = last % mod; - *max = (last + 1) % mod; - } + now = readl (&ehci->regs->frame_index) % mod; - /* explicit start frame? */ - if (!(urb->transfer_flags & URB_ISO_ASAP)) { - unsigned temp; - - /* sanity check: must be in range */ - urb->start_frame %= ehci->periodic_size; - temp = urb->start_frame << 3; - if (temp < *start) - temp += mod; - if (temp > *max) - return -EDOM; - - /* use that explicit start frame */ - *start = urb->start_frame << 3; - temp += 8; - if (temp < *max) - *max = temp; + /* when's the last uframe this urb could start? */ + max = now + mod; + max -= itd_sched->span; + max -= 8 * SCHEDULE_SLOP; + + /* typical case: reuse current schedule. stream is still active, + * and no gaps from host falling behind (irq delays etc) + */ + if (likely (!list_empty (&stream->itd_list))) { + + start = stream->next_uframe; + if (start < now) + start += mod; + if (likely (start < max)) + goto ready; + + /* two cases: + * (a) we missed some uframes ... can reschedule + * (b) trying to overcommit the schedule + * FIXME (b) should be a hard failure + */ } - // FIXME minimize wraparound to "now" ... insist max+span - // (and start+span) remains a few frames short of "end" + /* need to schedule; when's the next (u)frame we could start? + * this is bigger than ehci->i_thresh allows; scheduling itself + * isn't free, the slop should handle reasonably slow cpus. it + * can also help high bandwidth if the dma and irq loads don't + * jump until after the queue is primed. + */ + start = SCHEDULE_SLOP * 8 + (now & ~0x07); + end = start; - *max %= ehci->periodic_size; - if ((*start + span) < end) - return 0; - return -EFBIG; -} + ehci_vdbg (ehci, "%s schedule from %d (%d..%d), was %d\n", + __FUNCTION__, now, start, max, + stream->next_uframe); -static int -itd_schedule (struct ehci_hcd *ehci, struct urb *urb) -{ - unsigned start, max, i; - int status; - unsigned mod = ehci->periodic_size << 3; + /* NOTE: assumes URB_ISO_ASAP, to limit complexity/bugs */ - for (i = 0; i < urb->number_of_packets; i++) { - urb->iso_frame_desc [i].status = -EINPROGRESS; - urb->iso_frame_desc [i].actual_length = 0; - } + if (likely (max > (start + urb->interval))) + max = start + urb->interval; - if ((status = get_iso_range (ehci, urb, &start, &max, mod)) != 0) - return status; + /* hack: account for itds already scheduled to this endpoint */ + if (unlikely (list_empty (&stream->itd_list))) + end = max; + /* within [start..max] find a uframe slot with enough bandwidth */ + end %= mod; do { unsigned uframe; - unsigned usecs; - struct ehci_itd *itd; + int enough_space = 1; /* check schedule: enough space? */ - itd = urb->hcpriv; uframe = start; - for (i = 0, uframe = start; - i < urb->number_of_packets; - i++, uframe += urb->interval) { + do { uframe %= mod; /* can't commit more than 80% periodic == 100 usec */ if (periodic_usecs (ehci, uframe >> 3, uframe & 0x7) - > (100 - itd->usecs)) { - itd = 0; + > (100 - stream->usecs)) { + enough_space = 0; break; } - itd = list_entry (itd->itd_list.next, - struct ehci_itd, itd_list); - } - if (!itd) - continue; - - /* that's where we'll schedule this! */ - itd = urb->hcpriv; - urb->start_frame = start >> 3; - vdbg ("ISO urb %p (%d packets period %d) starting %d.%d", - urb, urb->number_of_packets, urb->interval, - urb->start_frame, start & 0x7); - for (i = 0, uframe = start, usecs = 0; - i < urb->number_of_packets; - i++, uframe += urb->interval) { - uframe %= mod; - - itd->uframe = uframe; - itd->hw_transaction [uframe & 0x07] = itd->transaction; - itd_link (ehci, (uframe >> 3) % ehci->periodic_size, - itd); - wmb (); - usecs += itd->usecs; - itd = list_entry (itd->itd_list.next, - struct ehci_itd, itd_list); - } - - /* update bandwidth utilization records (for usbfs) - * - * FIXME This claims each URB queued to an endpoint, as if - * transfers were concurrent, not sequential. So bandwidth - * typically gets double-billed ... comes from tying it to - * URBs rather than endpoints in the schedule. Luckily we - * don't use this usbfs data for serious decision making. - */ - usecs /= urb->number_of_packets; - usecs /= urb->interval; - usecs >>= 3; - if (usecs < 1) - usecs = 1; - usb_claim_bandwidth (urb->dev, urb, usecs, 1); - - /* maybe enable periodic schedule processing */ - if (!ehci->periodic_sched++) { - if ((status = enable_periodic (ehci)) != 0) { - // FIXME deschedule right away - err ("itd_schedule, enable = %d", status); + /* we know urb->interval is 2^N uframes */ + uframe += urb->interval; + } while (uframe != end); + + /* (re)schedule it here if there's enough bandwidth */ + if (enough_space) { + start %= mod; + if (unlikely (!list_empty (&stream->itd_list))) { + /* host fell behind ... maybe irq latencies + * delayed this request queue for too long. + */ + stream->rescheduled++; + pr_debug ("ehci %s devpath %d " + "iso%d%s %d.%d skip %d.%d\n", + ehci->pdev->slot_name, + urb->dev->devpath, + stream->bEndpointAddress & 0x0f, + (stream->bEndpointAddress & USB_DIR_IN) + ? "in" : "out", + stream->next_uframe >> 3, + stream->next_uframe & 0x7, + start >> 3, start & 0x7); } + stream->next_uframe = start; + goto ready; } - return 0; - - } while ((start = ++start % mod) != max); + } while (++start < max); /* no room in the schedule */ - dbg ("urb %p, CAN'T SCHEDULE", urb); - return -ENOSPC; + ehci_dbg (ehci, "iso %ssched full %p (now %d end %d max %d)\n", + list_empty (&stream->itd_list) ? "" : "re", + urb, now, end, max); + status = -ENOSPC; + +fail: + itd_sched_free (stream, itd_sched); + urb->hcpriv = 0; + return status; + +ready: + urb->start_frame = stream->next_uframe; + return 0; } /*-------------------------------------------------------------------------*/ +static inline void +itd_init (struct ehci_iso_stream *stream, struct ehci_itd *itd) +{ + int i; + + itd->hw_next = EHCI_LIST_END; + itd->hw_bufp [0] = stream->buf0; + itd->hw_bufp [1] = stream->buf1; + itd->hw_bufp [2] = stream->buf2; + + for (i = 0; i < 8; i++) + itd->index[i] = -1; + + /* All other fields are filled when scheduling */ +} + +static inline void +itd_patch ( + struct ehci_itd *itd, + struct ehci_itd_sched *itd_sched, + unsigned index, + u16 uframe, + int first +) +{ + struct ehci_iso_uframe *uf = &itd_sched->packet [index]; + unsigned pg = itd->pg; + + // BUG_ON (pg == 6 && uf->cross); + + uframe &= 0x07; + itd->index [uframe] = index; + + itd->hw_transaction [uframe] = uf->transaction; + itd->hw_transaction [uframe] |= cpu_to_le32 (pg << 12); + itd->hw_bufp [pg] |= cpu_to_le32 (uf->bufp & ~(u32)0); + itd->hw_bufp_hi [pg] |= cpu_to_le32 ((u32)(uf->bufp >> 32)); + + /* iso_frame_desc[].offset must be strictly increasing */ + if (unlikely (!first && uf->cross)) { + u64 bufp = uf->bufp + 4096; + itd->pg = ++pg; + itd->hw_bufp [pg] |= cpu_to_le32 (bufp & ~(u32)0); + itd->hw_bufp_hi [pg] |= cpu_to_le32 ((u32)(bufp >> 32)); + } +} + +static inline void +itd_link (struct ehci_hcd *ehci, unsigned frame, struct ehci_itd *itd) +{ + /* always prepend ITD/SITD ... only QH tree is order-sensitive */ + itd->itd_next = ehci->pshadow [frame]; + itd->hw_next = ehci->periodic [frame]; + ehci->pshadow [frame].itd = itd; + itd->frame = frame; + wmb (); + ehci->periodic [frame] = cpu_to_le32 (itd->itd_dma) | Q_TYPE_ITD; +} + +/* fit urb's itds into the selected schedule slot; activate as needed */ +static int +itd_link_urb ( + struct ehci_hcd *ehci, + struct urb *urb, + unsigned mod, + struct ehci_iso_stream *stream +) +{ + int packet, first = 1; + unsigned next_uframe, uframe, frame; + struct ehci_itd_sched *itd_sched = urb->hcpriv; + struct ehci_itd *itd; + + next_uframe = stream->next_uframe % mod; + + if (unlikely (list_empty(&stream->itd_list))) { + hcd_to_bus (&ehci->hcd)->bandwidth_allocated + += stream->bandwidth; + ehci_vdbg (ehci, + "schedule devp %s ep%d%s-iso period %d start %d.%d\n", + urb->dev->devpath, stream->bEndpointAddress & 0x0f, + (stream->bEndpointAddress & USB_DIR_IN) ? "in" : "out", + urb->interval, + next_uframe >> 3, next_uframe & 0x7); + stream->start = jiffies; + } + hcd_to_bus (&ehci->hcd)->bandwidth_isoc_reqs++; + + /* fill iTDs uframe by uframe */ + for (packet = 0, itd = 0; packet < urb->number_of_packets; ) { + if (itd == 0) { + /* ASSERT: we have all necessary itds */ + // BUG_ON (list_empty (&itd_sched->itd_list)); + + /* ASSERT: no itds for this endpoint in this uframe */ + + itd = list_entry (itd_sched->itd_list.next, + struct ehci_itd, itd_list); + list_move_tail (&itd->itd_list, &stream->itd_list); + itd->stream = iso_stream_get (stream); + itd->urb = usb_get_urb (urb); + first = 1; + itd_init (stream, itd); + } + + uframe = next_uframe & 0x07; + frame = next_uframe >> 3; + + itd->usecs [uframe] = stream->usecs; + itd_patch (itd, itd_sched, packet, uframe, first); + first = 0; + + next_uframe += stream->interval; + next_uframe %= mod; + packet++; + + /* link completed itds into the schedule */ + if (((next_uframe >> 3) != frame) + || packet == urb->number_of_packets) { + itd_link (ehci, frame % ehci->periodic_size, itd); + itd = 0; + } + } + stream->next_uframe = next_uframe; + + /* don't need that schedule data any more */ + itd_sched_free (stream, itd_sched); + urb->hcpriv = 0; + + if (unlikely (!ehci->periodic_sched++)) + return enable_periodic (ehci); + return 0; +} + #define ISO_ERRS (EHCI_ISOC_BUF_ERR | EHCI_ISOC_BABBLE | EHCI_ISOC_XACTERR) static unsigned @@ -875,12 +1091,14 @@ u32 t; unsigned uframe; int urb_index = -1; + struct ehci_iso_stream *stream = itd->stream; + struct usb_device *dev; /* for each uframe with a packet */ for (uframe = 0; uframe < 8; uframe++) { - if (itd->hw_transaction [uframe] == 0) + if (likely (itd->index[uframe] == -1)) continue; - urb_index = itd->index; + urb_index = itd->index[uframe]; desc = &urb->iso_frame_desc [urb_index]; t = le32_to_cpup (&itd->hw_transaction [uframe]); @@ -907,36 +1125,43 @@ } } - vdbg ("itd %p urb %p packet %d/%d trans %x status %d len %d", - itd, urb, itd->index + 1, urb->number_of_packets, - t, desc->status, desc->actual_length); + usb_put_urb (urb); + itd->urb = 0; + itd->stream = 0; + list_move (&itd->itd_list, &stream->free_itd_list); + iso_stream_put (ehci, stream); /* handle completion now? */ if (likely ((urb_index + 1) != urb->number_of_packets)) return 0; - /* - * Always give the urb back to the driver ... expect it to submit - * a new urb (or resubmit this), and to have another already queued - * when un-interrupted transfers are needed. - * - * NOTE that for now we don't accelerate ISO unlinks; they just - * happen according to the current schedule. Means a delay of - * up to about a second (max). - */ - itd_free_list (ehci, urb); - if (urb->status == -EINPROGRESS) - urb->status = 0; - - /* complete() can reenter this HCD */ - spin_unlock (&ehci->lock); - usb_hcd_giveback_urb (&ehci->hcd, urb, regs); - spin_lock (&ehci->lock); + /* ASSERT: it's really the last itd for this urb + list_for_each_entry (itd, &stream->itd_list, itd_list) + BUG_ON (itd->urb == urb); + */ + + /* give urb back to the driver ... can be out-of-order */ + //dev = usb_get_dev (urb->dev); + dev = urb->dev; + ehci_urb_done (ehci, urb, regs); + urb = 0; /* defer stopping schedule; completion can submit */ ehci->periodic_sched--; - if (!ehci->periodic_sched) + if (unlikely (!ehci->periodic_sched)) (void) disable_periodic (ehci); + hcd_to_bus (&ehci->hcd)->bandwidth_isoc_reqs--; + + if (unlikely (list_empty (&stream->itd_list))) { + hcd_to_bus (&ehci->hcd)->bandwidth_allocated + -= stream->bandwidth; + ehci_vdbg (ehci, + "deschedule devp %s ep%d%s-iso\n", + dev->devpath, stream->bEndpointAddress & 0x0f, + (stream->bEndpointAddress & USB_DIR_IN) ? "in" : "out"); + } + iso_stream_put (ehci, stream); + //usb_put_dev (dev); return 1; } @@ -945,23 +1170,50 @@ static int itd_submit (struct ehci_hcd *ehci, struct urb *urb, int mem_flags) { - int status; - unsigned long flags; + int status = -EINVAL; + unsigned long flags; + struct ehci_iso_stream *stream; - dbg ("itd_submit urb %p", urb); + /* Get iso_stream head */ + stream = iso_stream_find (ehci, urb); + if (unlikely (stream == 0)) { + ehci_dbg (ehci, "can't get iso stream\n"); + return -ENOMEM; + } + if (unlikely (urb->interval != stream->interval)) { + ehci_dbg (ehci, "can't change iso interval %d --> %d\n", + stream->interval, urb->interval); + goto done; + } + +#ifdef EHCI_URB_TRACE + ehci_dbg (ehci, + "%s %s urb %p ep%d%s len %d, %d pkts %d uframes [%p]\n", + __FUNCTION__, urb->dev->devpath, urb, + usb_pipeendpoint (urb->pipe), + usb_pipein (urb->pipe) ? "in" : "out", + urb->transfer_buffer_length, + urb->number_of_packets, urb->interval, + stream); +#endif /* allocate ITDs w/o locking anything */ - status = itd_urb_transaction (ehci, urb, mem_flags); - if (status < 0) - return status; + status = itd_urb_transaction (stream, ehci, urb, mem_flags); + if (unlikely (status < 0)) { + ehci_dbg (ehci, "can't init itds\n"); + goto done; + } /* schedule ... need to lock */ spin_lock_irqsave (&ehci->lock, flags); - status = itd_schedule (ehci, urb); + status = itd_stream_schedule (ehci, urb, stream); + if (likely (status == 0)) + itd_link_urb (ehci, urb, ehci->periodic_size << 3, stream); spin_unlock_irqrestore (&ehci->lock, flags); - if (status < 0) - itd_free_list (ehci, urb); +done: + if (unlikely (status < 0)) + iso_stream_put (ehci, stream); return status; } diff -Nru a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h --- a/drivers/usb/host/ehci.h Wed Jan 28 13:36:17 2004 +++ b/drivers/usb/host/ehci.h Wed Jan 28 13:36:17 2004 @@ -386,6 +386,63 @@ /*-------------------------------------------------------------------------*/ +/* description of one iso highspeed transaction (up to 3 KB data) */ +struct ehci_iso_uframe { + /* These will be copied to iTD when scheduling */ + u64 bufp; /* itd->hw_bufp{,_hi}[pg] |= */ + u32 transaction; /* itd->hw_transaction[i] |= */ + u8 cross; /* buf crosses pages */ +}; + +/* temporary schedule data for highspeed packets from iso urbs + * each packet is one uframe's usb transactions, in some itd, + * beginning at stream->next_uframe + */ +struct ehci_itd_sched { + struct list_head itd_list; + unsigned span; + struct ehci_iso_uframe packet [0]; +}; + +/* + * ehci_iso_stream - groups all (s)itds for this endpoint. + * acts like a qh would, if EHCI had them for ISO. + */ +struct ehci_iso_stream { + /* first two fields match QH, but info1 == 0 */ + u32 hw_next; + u32 hw_info1; + + u32 refcount; + u8 bEndpointAddress; + struct list_head itd_list; /* queued itds */ + struct list_head free_itd_list; /* list of unused itds */ + struct hcd_dev *dev; + + /* output of (re)scheduling */ + unsigned long start; /* jiffies */ + unsigned long rescheduled; + int next_uframe; + + /* the rest is derived from the endpoint descriptor, + * trusting urb->interval == (1 << (epdesc->bInterval - 1)), + * including the extra info for hw_bufp[0..2] + */ + u8 interval; + u8 usecs; + u16 maxp; + unsigned bandwidth; + + /* This is used to initialize iTD's hw_bufp fields */ + u32 buf0; + u32 buf1; + u32 buf2; + + /* ... sITD won't use buf[012], and needs TT access ... */ +}; + +/*-------------------------------------------------------------------------*/ + /* * EHCI Specification 0.95 Section 3.3 * Fig 3-4 "Isochronous Transaction Descriptor (iTD)" @@ -413,14 +470,14 @@ union ehci_shadow itd_next; /* ptr to periodic q entry */ struct urb *urb; - struct list_head itd_list; /* list of urb frames' itds */ - dma_addr_t buf_dma; /* frame's buffer address */ + struct ehci_iso_stream *stream; /* endpoint's queue */ + struct list_head itd_list; /* list of stream's itds */ - /* for now, only one hw_transaction per itd */ - u32 transaction; - u16 index; /* in urb->iso_frame_desc */ - u16 uframe; /* in periodic schedule */ - u16 usecs; + /* any/all hw_transactions here may be used by that urb */ + unsigned frame; /* where scheduled */ + unsigned pg; + unsigned index[8]; /* in urb->iso_frame_desc */ + u8 usecs[8]; } __attribute__ ((aligned (32))); /*-------------------------------------------------------------------------*/