***************
*** 142,188 ****
  
  static void shrink_buffers(struct stripe_head *sh, int num)
  {
- 	struct buffer_head *bh;
  	int i;
  
  	for (i=0; i<num ; i++) {
- 		bh = sh->bh_cache[i];
- 		if (!bh)
- 			return;
- 		sh->bh_cache[i] = NULL;
- 		free_page((unsigned long) bh->b_data);
- 		kfree(bh);
  	}
  }
  
- static int grow_buffers(struct stripe_head *sh, int num, int b_size, int priority)
  {
- 	struct buffer_head *bh;
  	int i;
  
  	for (i=0; i<num; i++) {
  		struct page *page;
- 		bh = kmalloc(sizeof(struct buffer_head), priority);
- 		if (!bh)
- 			return 1;
- 		memset(bh, 0, sizeof (struct buffer_head));
- 		if ((page = alloc_page(priority)))
- 			bh->b_data = page_address(page);
- 		else {
- 			kfree(bh);
  			return 1;
  		}
- 		atomic_set(&bh->b_count, 0);
- 		bh->b_page = page;
- 		sh->bh_cache[i] = bh;
- 
  	}
  	return 0;
  }
  
- static struct buffer_head *raid5_build_block (struct stripe_head *sh, int i);
  
- static inline void init_stripe(struct stripe_head *sh, unsigned long sector)
  {
  	raid5_conf_t *conf = sh->raid_conf;
  	int disks = conf->raid_disks, i;
--- 142,177 ----
  
  static void shrink_buffers(struct stripe_head *sh, int num)
  {
+ 	struct page *p;
  	int i;
  
  	for (i=0; i<num ; i++) {
+ 		p = sh->dev[i].page;
+ 		if (!p)
+ 			continue;
+ 		sh->dev[i].page = NULL;
+ 		page_cache_release(p);
  	}
  }
  
+ static int grow_buffers(struct stripe_head *sh, int num)
  {
  	int i;
  
  	for (i=0; i<num; i++) {
  		struct page *page;
+ 
+ 		if (!(page = alloc_page(GFP_KERNEL))) {
  			return 1;
  		}
+ 		sh->dev[i].page = page;
  	}
  	return 0;
  }
  
+ static void raid5_build_block (struct stripe_head *sh, int i);
  
+ static inline void init_stripe(struct stripe_head *sh, unsigned long sector, int pd_idx)
  {
  	raid5_conf_t *conf = sh->raid_conf;
  	int disks = conf->raid_disks, i;
***************
*** 198,237 ****
  	remove_hash(sh);
  	
  	sh->sector = sector;
- 	sh->size = conf->buffer_size;
  	sh->state = 0;
  
  	for (i=disks; i--; ) {
- 		if (sh->bh_read[i] || sh->bh_write[i] || sh->bh_written[i] ||
- 		    buffer_locked(sh->bh_cache[i])) {
  			printk("sector=%lx i=%d %p %p %p %d\n",
- 			       sh->sector, i, sh->bh_read[i],
- 			       sh->bh_write[i], sh->bh_written[i],
- 			       buffer_locked(sh->bh_cache[i]));
  			BUG();
  		}
- 		clear_buffer_uptodate(sh->bh_cache[i]);
  		raid5_build_block(sh, i);
  	}
  	insert_hash(conf, sh);
  }
  
- /* the buffer size has changed, so unhash all stripes
-  * as active stripes complete, they will go onto inactive list
-  */
- static void shrink_stripe_cache(raid5_conf_t *conf)
- {
- 	int i;
- 	CHECK_DEVLOCK();
- 	if (atomic_read(&conf->active_stripes))
- 		BUG();
- 	for (i=0; i < NR_HASH; i++) {
- 		struct stripe_head *sh;
- 		while ((sh = conf->stripe_hashtbl[i])) 
- 			remove_hash(sh);
- 	}
- }
- 
  static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long sector)
  {
  	struct stripe_head *sh;
--- 187,212 ----
  	remove_hash(sh);
  	
  	sh->sector = sector;
+ 	sh->pd_idx = pd_idx;
  	sh->state = 0;
  
  	for (i=disks; i--; ) {
+ 		struct r5dev *dev = &sh->dev[i];
+ 
+ 		if (dev->toread || dev->towrite || dev->written ||
+ 		    test_bit(R5_LOCKED, &dev->flags)) {
  			printk("sector=%lx i=%d %p %p %p %d\n",
+ 			       sh->sector, i, dev->toread,
+ 			       dev->towrite, dev->written,
+ 			       test_bit(R5_LOCKED, &dev->flags));
  			BUG();
  		}
+ 		dev->flags = 0;
  		raid5_build_block(sh, i);
  	}
  	insert_hash(conf, sh);
  }
  
  static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long sector)
  {
  	struct stripe_head *sh;
***************
*** 410,447 ****
  		} else
  			buffer = NULL;
  		spin_unlock_irqrestore(&conf->device_lock, flags);
- 		if (sh->bh_page[i]==NULL)
  			set_buffer_uptodate(bh);
  		if (buffer) {
  			if (buffer->b_page != bh->b_page)
  				memcpy(buffer->b_data, bh->b_data, bh->b_size);
  			buffer->b_end_io(buffer, 1);
  		}
  	} else {
- 		md_error(conf->mddev, bh->b_bdev);
- 		clear_buffer_uptodate(bh);
  	}
  	/* must restore b_page before unlocking buffer... */
- 	if (sh->bh_page[i]) {
  		bh->b_page = sh->bh_page[i];
  		bh->b_data = page_address(bh->b_page);
- 		sh->bh_page[i] = NULL;
  		clear_buffer_uptodate(bh);
  	}
- 	clear_buffer_locked(bh);
  	set_bit(STRIPE_HANDLE, &sh->state);
  	release_stripe(sh);
  }
  
- static void raid5_end_write_request (struct buffer_head *bh, int uptodate)
  {
-  	struct stripe_head *sh = bh->b_private;
  	raid5_conf_t *conf = sh->raid_conf;
  	int disks = conf->raid_disks, i;
  	unsigned long flags;
  
  	for (i=0 ; i<disks; i++)
- 		if (bh == sh->bh_cache[i])
  			break;
  
  	PRINTK("end_write_request %lu/%d, count %d, uptodate: %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate);
--- 361,403 ----
  		} else
  			buffer = NULL;
  		spin_unlock_irqrestore(&conf->device_lock, flags);
+ 		if (sh->bh_page[i]==bh->b_page)
  			set_buffer_uptodate(bh);
  		if (buffer) {
  			if (buffer->b_page != bh->b_page)
  				memcpy(buffer->b_data, bh->b_data, bh->b_size);
  			buffer->b_end_io(buffer, 1);
  		}
+ #else
+ 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
+ #endif		
  	} else {
+ 		md_error(conf->mddev, bi->bi_bdev);
+ 		clear_bit(R5_UPTODATE, &sh->dev[i].flags);
  	}
+ #if 0
  	/* must restore b_page before unlocking buffer... */
+ 	if (sh->bh_page[i] != bh->b_page) {
  		bh->b_page = sh->bh_page[i];
  		bh->b_data = page_address(bh->b_page);
  		clear_buffer_uptodate(bh);
  	}
+ #endif
+ 	clear_bit(R5_LOCKED, &sh->dev[i].flags);
  	set_bit(STRIPE_HANDLE, &sh->state);
  	release_stripe(sh);
  }
  
+ static void raid5_end_write_request (struct bio *bi)
  {
+  	struct stripe_head *sh = bi->bi_private;
  	raid5_conf_t *conf = sh->raid_conf;
  	int disks = conf->raid_disks, i;
  	unsigned long flags;
+ 	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
  
  	for (i=0 ; i<disks; i++)
+ 		if (bi == &sh->dev[i].req)
  			break;
  
  	PRINTK("end_write_request %lu/%d, count %d, uptodate: %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate);
***************
*** 452,480 ****
  
  	spin_lock_irqsave(&conf->device_lock, flags);
  	if (!uptodate)
- 		md_error(conf->mddev, bh->b_bdev);
- 	clear_buffer_locked(bh);
  	set_bit(STRIPE_HANDLE, &sh->state);
  	__release_stripe(conf, sh);
  	spin_unlock_irqrestore(&conf->device_lock, flags);
  }
- 	
  
  
- static struct buffer_head *raid5_build_block (struct stripe_head *sh, int i)
  {
  	raid5_conf_t *conf = sh->raid_conf;
- 	struct buffer_head *bh = sh->bh_cache[i];
- 	unsigned long block = sh->sector / (sh->size >> 9);
  
- 	init_buffer(bh, raid5_end_read_request, sh);
- 	bh->b_dev       = conf->disks[i].dev;
- 	/* FIXME - later we will need bdev here */
- 	bh->b_blocknr   = block;
- 
- 	bh->b_state	= (1 << BH_Req) | (1 << BH_Mapped);
- 	bh->b_size	= sh->size;
- 	return bh;
  }
  
  static int error (mddev_t *mddev, kdev_t dev)
--- 408,443 ----
  
  	spin_lock_irqsave(&conf->device_lock, flags);
  	if (!uptodate)
+ 		md_error(conf->mddev, bi->bi_bdev);
+ 	
+ 	clear_bit(R5_LOCKED, &sh->dev[i].flags);
  	set_bit(STRIPE_HANDLE, &sh->state);
  	__release_stripe(conf, sh);
  	spin_unlock_irqrestore(&conf->device_lock, flags);
  }
  
  
+ static unsigned long compute_blocknr(struct stripe_head *sh, int i);
+ 	
+ static void raid5_build_block (struct stripe_head *sh, int i)
  {
  	raid5_conf_t *conf = sh->raid_conf;
+ 	struct r5dev *dev = &sh->dev[i];
  
+ 	bio_init(&dev->req);
+ 	dev->req.bi_io_vec = &dev->vec;
+ 	dev->req.bi_vcnt++;
+ 	dev->vec.bv_page = dev->page;
+ 	dev->vec.bv_len = STRIPE_SIZE;
+ 	dev->vec.bv_offset = 0;
+ 
+ 	dev->req.bi_bdev = conf->disks[i].bdev;
+ 	dev->req.bi_sector = sh->sector;
+ 	dev->req.bi_private = sh;
+ 
+ 	dev->flags = 0;
+ 	if (i != sh->pd_idx)
+ 		dev->sector = compute_blocknr(sh, i);
  }
  
  static int error (mddev_t *mddev, kdev_t dev)
***************
*** 661,748 ****
  {
  	raid5_conf_t *conf = sh->raid_conf;
  	int i, count, disks = conf->raid_disks;
- 	struct buffer_head *bh_ptr[MAX_XOR_BLOCKS], *bh;
  
  	PRINTK("compute_block, stripe %lu, idx %d\n", sh->sector, dd_idx);
  
- 
- 	memset(sh->bh_cache[dd_idx]->b_data, 0, sh->size);
- 	bh_ptr[0] = sh->bh_cache[dd_idx];
  	count = 1;
  	for (i = disks ; i--; ) {
  		if (i == dd_idx)
  			continue;
- 		bh = sh->bh_cache[i];
- 		if (buffer_uptodate(bh))
- 			bh_ptr[count++] = bh;
  		else
  			printk("compute_block() %d, stripe %lu, %d not present\n", dd_idx, sh->sector, i);
  
  		check_xor();
  	}
  	if (count != 1)
- 		xor_block(count, bh_ptr);
- 	set_buffer_uptodate(sh->bh_cache[dd_idx]);
  }
  
  static void compute_parity(struct stripe_head *sh, int method)
  {
  	raid5_conf_t *conf = sh->raid_conf;
  	int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count;
- 	struct buffer_head *bh_ptr[MAX_XOR_BLOCKS];
- 	struct buffer_head *chosen[MD_SB_DISKS];
  
  	PRINTK("compute_parity, stripe %lu, method %d\n", sh->sector, method);
  	memset(chosen, 0, sizeof(chosen));
  
  	count = 1;
- 	bh_ptr[0] = sh->bh_cache[pd_idx];
  	switch(method) {
  	case READ_MODIFY_WRITE:
- 		if (!buffer_uptodate(sh->bh_cache[pd_idx]))
  			BUG();
  		for (i=disks ; i-- ;) {
  			if (i==pd_idx)
  				continue;
- 			if (sh->bh_write[i] &&
- 			    buffer_uptodate(sh->bh_cache[i])) {
- 				bh_ptr[count++] = sh->bh_cache[i];
- 				chosen[i] = sh->bh_write[i];
- 				sh->bh_write[i] = sh->bh_write[i]->b_reqnext;
- 				chosen[i]->b_reqnext = sh->bh_written[i];
- 				sh->bh_written[i] = chosen[i];
  				check_xor();
  			}
  		}
  		break;
  	case RECONSTRUCT_WRITE:
- 		memset(sh->bh_cache[pd_idx]->b_data, 0, sh->size);
  		for (i= disks; i-- ;)
- 			if (i!=pd_idx && sh->bh_write[i]) {
- 				chosen[i] = sh->bh_write[i];
- 				sh->bh_write[i] = sh->bh_write[i]->b_reqnext;
- 				chosen[i]->b_reqnext = sh->bh_written[i];
- 				sh->bh_written[i] = chosen[i];
  			}
  		break;
  	case CHECK_PARITY:
  		break;
  	}
  	if (count>1) {
- 		xor_block(count, bh_ptr);
  		count = 1;
  	}
  	
  	for (i = disks; i--;)
  		if (chosen[i]) {
- 			struct buffer_head *bh = sh->bh_cache[i];
- 			char *bdata;
- 			bdata = bh_kmap(chosen[i]);
- 			memcpy(bh->b_data,
- 			       bdata,sh->size);
- 			bh_kunmap(chosen[i]);
- 			set_buffer_locked(bh);
- 			set_buffer_uptodate(bh);
  		}
  
  	switch(method) {
--- 674,757 ----
  {
  	raid5_conf_t *conf = sh->raid_conf;
  	int i, count, disks = conf->raid_disks;
+ 	void *ptr[MAX_XOR_BLOCKS], *p;
  
  	PRINTK("compute_block, stripe %lu, idx %d\n", sh->sector, dd_idx);
  
+ 	ptr[0] = page_address(sh->dev[dd_idx].page);
+ 	memset(ptr[0], 0, STRIPE_SIZE);
  	count = 1;
  	for (i = disks ; i--; ) {
  		if (i == dd_idx)
  			continue;
+ 		p = page_address(sh->dev[i].page);
+ 		if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
+ 			ptr[count++] = p;
  		else
  			printk("compute_block() %d, stripe %lu, %d not present\n", dd_idx, sh->sector, i);
  
  		check_xor();
  	}
  	if (count != 1)
+ 		xor_block(count, STRIPE_SIZE, ptr);
+ 	set_bit(R5_UPTODATE, &sh->dev[i].flags);
  }
  
  static void compute_parity(struct stripe_head *sh, int method)
  {
  	raid5_conf_t *conf = sh->raid_conf;
  	int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count;
+ 	void *ptr[MAX_XOR_BLOCKS];
+ 	struct bio *chosen[MD_SB_DISKS];
  
  	PRINTK("compute_parity, stripe %lu, method %d\n", sh->sector, method);
  	memset(chosen, 0, sizeof(chosen));
  
  	count = 1;
+ 	ptr[0] = page_address(sh->dev[pd_idx].page);
  	switch(method) {
  	case READ_MODIFY_WRITE:
+ 		if (!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags))
  			BUG();
  		for (i=disks ; i-- ;) {
  			if (i==pd_idx)
  				continue;
+ 			if (sh->dev[i].towrite &&
+ 			    test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
+ 				ptr[count++] = page_address(sh->dev[i].page);
+ 				chosen[i] = sh->dev[i].towrite;
+ 				sh->dev[i].towrite = NULL;
+ 				if (sh->dev[i].written) BUG();
+ 				sh->dev[i].written = chosen[i];
  				check_xor();
  			}
  		}
  		break;
  	case RECONSTRUCT_WRITE:
+ 		memset(ptr[0], 0, STRIPE_SIZE);
  		for (i= disks; i-- ;)
+ 			if (i!=pd_idx && sh->dev[i].towrite) {
+ 				chosen[i] = sh->dev[i].towrite;
+ 				sh->dev[i].towrite = NULL;
+ 				if (sh->dev[i].written) BUG();
+ 				sh->dev[i].written = chosen[i];
  			}
  		break;
  	case CHECK_PARITY:
  		break;
  	}
  	if (count>1) {
+ 		xor_block(count, STRIPE_SIZE, ptr);
  		count = 1;
  	}
  	
  	for (i = disks; i--;)
  		if (chosen[i]) {
+ 			sector_t sector = sh->dev[i].sector;
+ 			copy_data(1, chosen[i], sh->dev[i].page, sector);
+ 
+ 			set_bit(R5_LOCKED, &sh->dev[i].flags);
+ 			set_bit(R5_UPTODATE, &sh->dev[i].flags);
  		}
  
  	switch(method) {
***************
*** 750,804 ****
  	case CHECK_PARITY:
  		for (i=disks; i--;)
  			if (i != pd_idx) {
- 				bh_ptr[count++] = sh->bh_cache[i];
  				check_xor();
  			}
  		break;
  	case READ_MODIFY_WRITE:
  		for (i = disks; i--;)
  			if (chosen[i]) {
- 				bh_ptr[count++] = sh->bh_cache[i];
  				check_xor();
  			}
  	}
  	if (count != 1)
- 		xor_block(count, bh_ptr);
  	
  	if (method != CHECK_PARITY) {
- 		set_buffer_uptodate(sh->bh_cache[pd_idx]);
- 		set_buffer_locked(sh->bh_cache[pd_idx]);
  	} else
- 		clear_buffer_uptodate(sh->bh_cache[pd_idx]);
  }
  
- static void add_stripe_bh (struct stripe_head *sh, struct buffer_head *bh, int dd_idx, int rw)
  {
- 	struct buffer_head **bhp;
  	raid5_conf_t *conf = sh->raid_conf;
  
- 	PRINTK("adding bh b#%lu to stripe s#%lu\n", bh->b_blocknr, sh->sector);
  
  
  	spin_lock(&sh->lock);
  	spin_lock_irq(&conf->device_lock);
- 	bh->b_reqnext = NULL;
- 	if (rw == READ)
- 		bhp = &sh->bh_read[dd_idx];
  	else
- 		bhp = &sh->bh_write[dd_idx];
- 	while (*bhp) {
- 		printk(KERN_NOTICE "raid5: multiple %d requests for sector %ld\n", rw, sh->sector);
- 		bhp = & (*bhp)->b_reqnext;
- 	}
- 	*bhp = bh;
  	spin_unlock_irq(&conf->device_lock);
  	spin_unlock(&sh->lock);
  
- 	PRINTK("added bh b#%lu to stripe s#%lu, disk %d.\n", bh->b_blocknr, sh->sector, dd_idx);
- }
- 
- 
  
  
  
  /*
--- 759,832 ----
  	case CHECK_PARITY:
  		for (i=disks; i--;)
  			if (i != pd_idx) {
+ 				ptr[count++] = page_address(sh->dev[i].page);
  				check_xor();
  			}
  		break;
  	case READ_MODIFY_WRITE:
  		for (i = disks; i--;)
  			if (chosen[i]) {
+ 				ptr[count++] = page_address(sh->dev[i].page);
  				check_xor();
  			}
  	}
  	if (count != 1)
+ 		xor_block(count, STRIPE_SIZE, ptr);
  	
  	if (method != CHECK_PARITY) {
+ 		set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+ 		set_bit(R5_LOCKED,   &sh->dev[pd_idx].flags);
  	} else
+ 		clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
  }
  
+ /*
+  * Each stripe/dev can have one or more bion attached.
+  * toread/towrite point to the first in a chain. 
+  * The bi_next chain must be in order.
+  */
+ static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
  {
+ 	struct bio **bip;
  	raid5_conf_t *conf = sh->raid_conf;
  
+ 	PRINTK("adding bh b#%lu to stripe s#%lu\n", bi->bi_sector, sh->sector);
  
  
  	spin_lock(&sh->lock);
  	spin_lock_irq(&conf->device_lock);
+ 	if (forwrite)
+ 		bip = &sh->dev[dd_idx].towrite;
  	else
+ 		bip = &sh->dev[dd_idx].toread;
+ 	while (*bip && (*bip)->bi_sector < bi->bi_sector)
+ 		bip = & (*bip)->bi_next;
+ /* FIXME do I need to worry about overlapping bion */
+ 	if (*bip && bi->bi_next && (*bip) != bi->bi_next)
+ 		BUG();
+ 	if (*bip)
+ 		bi->bi_next = *bip;
+ 	*bip = bi;
+ 	bi->bi_phys_segments ++;
  	spin_unlock_irq(&conf->device_lock);
  	spin_unlock(&sh->lock);
  
+ 	if (forwrite) {
+ 		/* check if page is coverred */
+ 		sector_t sector = sh->dev[dd_idx].sector;
+ 		for (bi=sh->dev[dd_idx].towrite;
+ 		     sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
+ 			     bi && bi->bi_sector <= sector;
+ 		     bi = bi->bi_next) {
+ 			if (bi->bi_sector + (bi->bi_size>>9) >= sector)
+ 				sector = bi->bi_sector + (bi->bi_size>>9);
+ 		}
+ 		if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
+ 			set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
+ 	}
  
+ 	PRINTK("added bi b#%lu to stripe s#%lu, disk %d.\n", bi->bi_sector, sh->sector, dd_idx);
+ }
  
  
  /*
***************
*** 955,975 ****
  					compute_block(sh, i);
  					uptodate++;
  				} else if (conf->disks[i].operational) {
- 					set_buffer_locked(bh);
  					action[i] = READ+1;
  					/* if I am just reading this block and we don't have
  					   a failed drive, or any pending writes then sidestep the cache */
- 					if (sh->bh_page[i]) BUG();
  					if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&
  					    ! syncing && !failed && !to_write) {
- 						sh->bh_page[i] = sh->bh_cache[i]->b_page;
  						sh->bh_cache[i]->b_page =  sh->bh_read[i]->b_page;
  						sh->bh_cache[i]->b_data =  sh->bh_read[i]->b_data;
  					}
  					locked++;
  					PRINTK("Reading block %d (sync=%d)\n", i, syncing);
  					if (syncing)
- 						md_sync_acct(conf->disks[i].dev, bh->b_size>>9);
  				}
  			}
  		}
--- 1002,1022 ----
  					compute_block(sh, i);
  					uptodate++;
  				} else if (conf->disks[i].operational) {
+ 					set_bit(R5_LOCKED, &dev->flags);
  					action[i] = READ+1;
+ #if 0
  					/* if I am just reading this block and we don't have
  					   a failed drive, or any pending writes then sidestep the cache */
  					if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&
  					    ! syncing && !failed && !to_write) {
  						sh->bh_cache[i]->b_page =  sh->bh_read[i]->b_page;
  						sh->bh_cache[i]->b_data =  sh->bh_read[i]->b_data;
  					}
+ #endif
  					locked++;
  					PRINTK("Reading block %d (sync=%d)\n", i, syncing);
  					if (syncing)
+ 						md_sync_acct(conf->disks[i].dev, STRIPE_SECTORS);
  				}
  			}
  		}
***************
*** 1004,1017 ****
  		if (rmw < rcw && rmw > 0)
  			/* prefer read-modify-write, but need to get some data */
  			for (i=disks; i--;) {
- 				bh = sh->bh_cache[i];
- 				if ((sh->bh_write[i] || i == sh->pd_idx) &&
- 				    !buffer_locked(bh) && !buffer_uptodate(bh) &&
  				    conf->disks[i].operational) {
  					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
  					{
  						PRINTK("Read_old block %d for r-m-w\n", i);
- 						set_buffer_locked(bh);
  						action[i] = READ+1;
  						locked++;
  					} else {
--- 1059,1072 ----
  		if (rmw < rcw && rmw > 0)
  			/* prefer read-modify-write, but need to get some data */
  			for (i=disks; i--;) {
+ 				dev = &sh->dev[i];
+ 				if ((dev->towrite || i == sh->pd_idx) &&
+ 				    !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
  				    conf->disks[i].operational) {
  					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
  					{
  						PRINTK("Read_old block %d for r-m-w\n", i);
+ 						set_bit(R5_LOCKED, &dev->flags);
  						action[i] = READ+1;
  						locked++;
  					} else {
***************
*** 1023,1036 ****
  		if (rcw <= rmw && rcw > 0)
  			/* want reconstruct write, but need to get some data */
  			for (i=disks; i--;) {
- 				bh = sh->bh_cache[i];
- 				if (!sh->bh_write[i]  && i != sh->pd_idx &&
- 				    !buffer_locked(bh) && !buffer_uptodate(bh) &&
  				    conf->disks[i].operational) {
  					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
  					{
  						PRINTK("Read_old block %d for Reconstruct\n", i);
- 						set_buffer_locked(bh);
  						action[i] = READ+1;
  						locked++;
  					} else {
--- 1078,1091 ----
  		if (rcw <= rmw && rcw > 0)
  			/* want reconstruct write, but need to get some data */
  			for (i=disks; i--;) {
+ 				dev = &sh->dev[i];
+ 				if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
+ 				    !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
  				    conf->disks[i].operational) {
  					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
  					{
  						PRINTK("Read_old block %d for Reconstruct\n", i);
+ 						set_bit(R5_LOCKED, &dev->flags);
  						action[i] = READ+1;
  						locked++;
  					} else {
***************
*** 1093,1152 ****
  			}
  			if (uptodate != disks)
  				BUG();
- 			bh = sh->bh_cache[failed_num];
- 			set_buffer_locked(bh);
  			action[failed_num] = WRITE+1;
  			locked++;
  			set_bit(STRIPE_INSYNC, &sh->state);
  			if (conf->disks[failed_num].operational)
- 				md_sync_acct(conf->disks[failed_num].dev, bh->b_size>>9);
  			else if ((spare=conf->spare))
- 				md_sync_acct(spare->dev, bh->b_size>>9);
  
  		}
  	}
  	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
- 		md_done_sync(conf->mddev, (sh->size>>9) - sh->sync_redone,1);
  		clear_bit(STRIPE_SYNCING, &sh->state);
  	}
  	
- 	
  	spin_unlock(&sh->lock);
  
- 	while ((bh=return_ok)) {
- 		return_ok = bh->b_reqnext;
- 		bh->b_reqnext = NULL;
- 		bh->b_end_io(bh, 1);
- 	}
- 	while ((bh=return_fail)) {
- 		return_fail = bh->b_reqnext;
- 		bh->b_reqnext = NULL;
- 		bh->b_end_io(bh, 0);
  	}
  	for (i=disks; i-- ;) 
  		if (action[i]) {
- 			struct buffer_head *bh = sh->bh_cache[i];
  			struct disk_info *spare = conf->spare;
  			int skip = 0;
  			if (action[i] == READ+1)
- 				bh->b_end_io = raid5_end_read_request;
  			else
- 				bh->b_end_io = raid5_end_write_request;
  			if (conf->disks[i].operational)
- 				bh->b_dev = conf->disks[i].dev;
  			else if (spare && action[i] == WRITE+1)
- 				bh->b_dev = spare->dev;
  			else skip=1;
- 			/* FIXME - later we will need bdev here */
  			if (!skip) {
  				PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i);
  				atomic_inc(&sh->count);
- 				bh->b_rdev = bh->b_dev;
- 				bh->b_rsector = bh->b_blocknr * (bh->b_size>>9);
- 				generic_make_request(action[i]-1, bh);
  			} else {
  				PRINTK("skip op %d on disc %d for sector %ld\n", action[i]-1, i, sh->sector);
- 				clear_buffer_locked(bh);
  				set_bit(STRIPE_HANDLE, &sh->state);
  			}
  		}
--- 1149,1210 ----
  			}
  			if (uptodate != disks)
  				BUG();
+ 			dev = &sh->dev[failed_num];
+ 			set_bit(R5_LOCKED, &dev->flags);
  			action[failed_num] = WRITE+1;
  			locked++;
  			set_bit(STRIPE_INSYNC, &sh->state);
  			if (conf->disks[failed_num].operational)
+ 				md_sync_acct(conf->disks[failed_num].dev, STRIPE_SECTORS);
  			else if ((spare=conf->spare))
+ 				md_sync_acct(spare->dev, STRIPE_SECTORS);
  
  		}
  	}
  	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
+ 		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
  		clear_bit(STRIPE_SYNCING, &sh->state);
  	}
  	
  	spin_unlock(&sh->lock);
  
+ 	while ((bi=return_bi)) {
+ 		return_bi = bi->bi_next;
+ 		bi->bi_next = NULL;
+ 		bi->bi_end_io(bi);
  	}
  	for (i=disks; i-- ;) 
  		if (action[i]) {
+ 			struct bio *bi = &sh->dev[i].req;
  			struct disk_info *spare = conf->spare;
  			int skip = 0;
  			if (action[i] == READ+1)
+ 				bi->bi_end_io = raid5_end_read_request;
  			else
+ 				bi->bi_end_io = raid5_end_write_request;
  			if (conf->disks[i].operational)
+ 				bi->bi_bdev = conf->disks[i].bdev;
  			else if (spare && action[i] == WRITE+1)
+ 				bi->bi_bdev = spare->bdev;
  			else skip=1;
  			if (!skip) {
  				PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i);
  				atomic_inc(&sh->count);
+ 				bi->bi_sector = sh->sector;
+ 				if (action[i] == READ+1) 
+ 					bi->bi_rw = 0;
+ 				else
+ 					bi->bi_rw = 1;
+ 				bi->bi_flags = 0;
+ 				bi->bi_vcnt = 1;	
+ 				bi->bi_idx = 0;
+ 				bi->bi_io_vec = &sh->dev[i].vec;
+ 				bi->bi_size = STRIPE_SIZE;
+ 				bi->bi_next = NULL;
+ 				generic_make_request(bi);
  			} else {
  				PRINTK("skip op %d on disc %d for sector %ld\n", action[i]-1, i, sh->sector);
+ 				clear_bit(R5_LOCKED, &dev->flags);
  				set_bit(STRIPE_HANDLE, &sh->state);
  			}
  		}
***************
*** 1208,1232 ****
  		read_ahead=1;
  	}
  
- 	new_sector = raid5_compute_sector(bh->b_rsector,
- 			raid_disks, data_disks, &dd_idx, &pd_idx, conf);
  
- 	PRINTK("raid5: make_request, sector %lu\n", new_sector);
- 	sh = get_active_stripe(conf, new_sector, bh->b_size, read_ahead);
- 	if (sh) {
- 		sh->pd_idx = pd_idx;
  
- 		add_stripe_bh(sh, bh, dd_idx, rw);
  
- 		raid5_plug_device(conf);
- 		handle_stripe(sh);
- 		release_stripe(sh);
- 	} else
- 		bh->b_end_io(bh, buffer_uptodate(bh));
  	return 0;
  }
  
- static int sync_request (mddev_t *mddev, unsigned long sector_nr)
  {
  	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
  	struct stripe_head *sh;
--- 1267,1305 ----
  		read_ahead=1;
  	}
  
+ 	logical_sector = bi->bi_sector & ~(STRIPE_SECTORS-1);
+ 	last_sector = bi->bi_sector + (bi->bi_size>>9);
  
+ 	bi->bi_next = NULL;
+ 	set_bit(BIO_UPTODATE, &bi->bi_flags); /* will be cleared if error detected */
+ 	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */
+ 	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
+ 		
+ 		new_sector = raid5_compute_sector(logical_sector,
+ 						  raid_disks, data_disks, &dd_idx, &pd_idx, conf);
  
+ 		PRINTK("raid5: make_request, sector %ul logical %ul\n", 
+ 		       new_sector, logical_sector);
  
+ 		sh = get_active_stripe(conf, new_sector, pd_idx, read_ahead);
+ 		if (sh) {
+ 
+ 			add_stripe_bio(sh, bi, dd_idx, rw);
+ 
+ 			raid5_plug_device(conf);
+ 			handle_stripe(sh);
+ 			release_stripe(sh);
+ 		}
+ 	}
+ 	spin_lock_irq(&conf->device_lock);
+ 	if (--bi->bi_phys_segments == 0) 
+ 		bi->bi_end_io(bi);
+ 	spin_unlock_irq(&conf->device_lock);
  	return 0;
  }
  
+ /* FIXME go_faster isn't used */
+ static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster)
  {
  	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
  	struct stripe_head *sh;
***************
*** 1476,1481 ****
  			disk->number = desc->number;
  			disk->raid_disk = raid_disk;
  			disk->dev = NODEV;
  
  			disk->operational = 0;
  			disk->write_only = 0;
--- 1545,1551 ----
  			disk->number = desc->number;
  			disk->raid_disk = raid_disk;
  			disk->dev = NODEV;
+ 			disk->bdev = NULL;
  
  			disk->operational = 0;
  			disk->write_only = 0;
***************
*** 1963,1968 ****
  			goto abort;
  		}
  		rdisk->dev = NODEV;
  		rdisk->used_slot = 0;
  
  		break;
--- 2032,2038 ----
  			goto abort;
  		}
  		rdisk->dev = NODEV;
+ 		rdisk->bdev = NULL;
  		rdisk->used_slot = 0;
  
  		break;
***************
*** 1980,1985 ****
  		adisk->number = added_desc->number;
  		adisk->raid_disk = added_desc->raid_disk;
  		adisk->dev = mk_kdev(added_desc->major,added_desc->minor);
  
  		adisk->operational = 0;
  		adisk->write_only = 0;
--- 2050,2057 ----
  		adisk->number = added_desc->number;
  		adisk->raid_disk = added_desc->raid_disk;
  		adisk->dev = mk_kdev(added_desc->major,added_desc->minor);
+ 		/* it will be held open by rdev */
+ 		adisk->bdev = bdget(kdev_t_to_nr(adisk->dev));
  
  		adisk->operational = 0;
  		adisk->write_only = 0;
