/*
 * (C) Copyright IBM Corp. 2001, 2003
 *
 * This program is free software;  you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY;  without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 * the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program;  if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: plugins/bbr_seg/bbr_seg.c
 *
 * Bad Block Relocation (BBR) Segment Manager
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <uuid/uuid.h>
#include <fcntl.h>
#include <unistd.h>
#include <plugin.h>

#include "bbr_seg.h"
#include "options.h"

static int bbr_write(storage_object_t *segment, lsn_t lsn,
		     sector_count_t count, void *buffer);

static plugin_record_t bbr_plugin_record;
plugin_record_t *my_plugin_record = &bbr_plugin_record;
engine_functions_t *EngFncs = NULL;

/**
 * kernel_bbr_sector_io
 *
 * If the BBR segment is active, we need to read/write from the kernel
 * device instead of using engine-I/O. Open the device for this segment
 * and do a read() or write().
 **/
static int kernel_bbr_sector_io(storage_object_t *segment,
				lsn_t lsn, sector_count_t count,
				void *buffer, int rw)
{
	u_int64_t bytes = 0;
	int rc = EIO, fd;

	LOG_ENTRY();
	LOG_DEBUG("%s segment %s: sector %"PRIu64", count %"PRIu64".\n",
		  rw ? "Writing to" : "Reading from", segment->name, lsn, count);

	fd = EngFncs->open_object(segment, O_RDWR|O_SYNC);
	if (fd >= 0) {
		if (rw == SECTOR_IO_WRITE){
			bytes = EngFncs->write_object(segment, fd, buffer,
						      count << EVMS_VSECTOR_SIZE_SHIFT,
						      lsn << EVMS_VSECTOR_SIZE_SHIFT);
		} else {
			bytes = EngFncs->read_object(segment, fd, buffer,
						     count << EVMS_VSECTOR_SIZE_SHIFT,
						     lsn << EVMS_VSECTOR_SIZE_SHIFT);
		}

		if (bytes == (count << EVMS_VSECTOR_SIZE_SHIFT)) {
			rc = 0;
		} else {
			rc = EIO;
		}

		EngFncs->close_object(segment, fd);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_bad_block_count
 *
 * Called to get a count of bad blocks being remapped in the
 * bbr mapping table.
 *
 * Returns:  count of remapped sectors if successful
 *           otherwise ... 0
 **/
static sector_count_t get_bad_block_count(evms_bbr_table_t *bbr_table,
					  sector_count_t sector_count)
{
	evms_bbr_table_t *table = bbr_table;
	sector_count_t i, bad_blocks = 0;

	LOG_ENTRY();

	if (bbr_table && sector_count) {
		for (i = 0; i < sector_count; i++) {
			bad_blocks += table->in_use_cnt;
			table++;
		}
	}

	LOG_EXIT_U64(bad_blocks);
	return bad_blocks;
}

/**
 * get_kernel_bbr_remap_sector_count
 *
 * Called to get a count of bad blocks being remapped to good
 * sectors by the kernel bbr feature.
 *
 * Returns: count of remapped sectors if successful
 *          otherwise ... 0
 **/
static sector_count_t get_kernel_bbr_remap_sector_count(storage_object_t *segment)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	sector_count_t sector_count = pdata->bbr_table_size_in_sectors;
	sector_count_t bad_blocks1 = 0, bad_blocks2 = 0;
	sector_count_t bad_blocks;
	evms_bbr_table_t *table;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Getting number of remapped sectors for segment %s.\n",
		  segment->name);

	if (sector_count) {
		table = EngFncs->engine_alloc(sector_count <<
					      EVMS_VSECTOR_SIZE_SHIFT);
		if (table) {
			rc = READ(child, pdata->bbr_table_lsn1,
				  sector_count, table);
			if (!rc) {
				bad_blocks1 = get_bad_block_count(table, sector_count);
			}

			rc = READ(child, pdata->bbr_table_lsn2,
				  sector_count, table);
			if (!rc) {
				bad_blocks2 = get_bad_block_count(table, sector_count);
			}

			EngFncs->engine_free(table);
		}
	}

	bad_blocks = max(bad_blocks1, bad_blocks2);

	LOG_EXIT_U64(bad_blocks);
	return bad_blocks;
}

/**
 * get_engine_remap_sector_count
 *
 * Called to get a count of bad blocks being remapped to good
 * sectors by examining the bbr mapping table.
 *
 * Returns:  count of remapped sectors if successful
 *           otherwise ... 0
 **/
static sector_count_t get_engine_remap_sector_count(storage_object_t *segment)
{
	bbr_private_data_t *pdata = segment->private_data;
	sector_count_t bad_blocks;

	LOG_ENTRY();
	LOG_DEBUG("Getting number of remapped sectors for segment %s.\n",
		  segment->name);

	bad_blocks = get_bad_block_count(pdata->bbr_table,
					 pdata->bbr_table_size_in_sectors);

	LOG_EXIT_U64(bad_blocks);
	return bad_blocks;
}

/**
 * get_lsn
 *
 * Called to convert LSN to a remapped LSN.
 *
 * Returns: the LSN of the replacement block if the sector has been remapped
 *          otherwise ... it returns the callers LSN
 **/
static lsn_t get_lsn(bbr_private_data_t *pdata, lsn_t lsn)
{
	evms_bbr_table_t *table = pdata->bbr_table;
	int i, j;

	LOG_ENTRY();

	for (i = 0; i < pdata->bbr_table_size_in_sectors; i++, table++) {
		if (table->in_use_cnt > 0) {
			for (j = 0; j < EVMS_BBR_ENTRIES_PER_SECT; j++) {
				if (table->entries[j].bad_sect == lsn  &&
				    table->entries[j].replacement_sect != 0) {
					LOG_EXIT_U64(table->entries[j].replacement_sect);
					return table->entries[j].replacement_sect;
				}
			}
		}
	}

	LOG_EXIT_U64(lsn);
	return lsn;
}

/**
 * get_next_avail_repl_block_lsn
 *
 * Called to get the next available (unused) replacement block LSN
 *
 * Returns: success: LSN of the replacement block
 *          failure: returns invalid LSN ( LSN==0 )
 **/
static lsn_t get_next_avail_repl_block_lsn(bbr_private_data_t *pdata)
{
	evms_bbr_table_t *table = pdata->bbr_table;
	lsn_t lsn = 0;
	int i, sectors_in_use = 0;

	LOG_ENTRY();

	/* Count replacement sectors being used. */
	for (i = 0; i < pdata->bbr_table_size_in_sectors; i++, table++) {
		sectors_in_use += table->in_use_cnt;
	}

	/* Next avail is N+1 but check that 1 is still available. */
	if (sectors_in_use) {
		if (sectors_in_use < pdata->replacement_blocks_size_in_sectors) {
			lsn = pdata->replacement_blocks_lsn + sectors_in_use;
		} else {   /* Error, there are no replacement blocks available. */
			LOG_ERROR("Error: Unable to provide remap because all "
				  "replacement blocks are used.\n");
			lsn = 0;
		}
	} else { /* Ok, grab 1st - no replacement blocks being used right now. */
		lsn = pdata->replacement_blocks_lsn;
	}

	LOG_EXIT_U64(lsn);
	return lsn;
}

/**
 * cpu_bbr_table_to_disk_table
 *
 * This routine is called to convert a bbr table from cpu format
 * to disk format
 **/
static void cpu_bbr_table_to_disk_table(evms_bbr_table_t *bbr_table,
					sector_count_t sector_count)
{
	void *sector_ptr = bbr_table;
	evms_bbr_table_t *table;
	sector_count_t i;
	int j;

	LOG_ENTRY();

	/* One sector at a time. */
	for (i = 0; i < sector_count; i++) {
		table = sector_ptr;
		table->signature       = CPU_TO_DISK32(table->signature);
		table->crc             = CPU_TO_DISK32(table->crc);
		table->sequence_number = CPU_TO_DISK32(table->sequence_number);
		table->in_use_cnt      = CPU_TO_DISK32(table->in_use_cnt);

		for (j = 0; j < EVMS_BBR_ENTRIES_PER_SECT; j++) {
			table->entries[j].bad_sect = CPU_TO_DISK64(table->entries[j].bad_sect);
			table->entries[j].replacement_sect = CPU_TO_DISK64(table->entries[j].replacement_sect);
		}

		sector_ptr += EVMS_VSECTOR_SIZE;
	}

	LOG_EXIT_VOID();
}

/**
 * write_bbr_table
 *
 * Called to write the BBR table. The LSN of the table is obtained
 * from the BBR feature metadata.
 *
 * Assumes: evms_bbr_metadata_t and evms_bbr_table_t are padded to 512 bytes
 *
 * Returns: 0 if BBR Table is successfully written to disk
 **/
static int write_bbr_table(storage_object_t *segment,
			   storage_object_t *child,
			   u_int64_t lsn,
			   u_int64_t sector_count,
			   evms_bbr_table_t *bbr_table,
			   boolean backup)
{
	evms_bbr_table_t *table;
	void *buffer, *buf;
	u_int32_t crc;
	u_int64_t i;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Writing BBR mapping table on segment %s at "
		  "lsn %"PRIu64".\n", child->name, lsn);

	/* Allocate a buffer to hold the endian-neutral table. */
	buffer = EngFncs->engine_alloc(sector_count * EVMS_VSECTOR_SIZE);
	if (!buffer) {
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	table = buf = buffer;

	memcpy(table, bbr_table, sector_count * EVMS_VSECTOR_SIZE);

	/* Convert to disk format. */
	cpu_bbr_table_to_disk_table(table, sector_count);

	/* Step through the bbr mapping table
	 * and calc the crc for each sector.
	 */
	for (i = 0; i < sector_count; i++) {
		table->crc = 0;
		crc = EngFncs->calculate_CRC(EVMS_INITIAL_CRC, table,
					     EVMS_VSECTOR_SIZE);
		table->crc = CPU_TO_DISK32(crc);

		/* Advance to next sector in table. */
		buf += EVMS_VSECTOR_SIZE;
		table = buf;
	}

	if (backup) {
		rc = EngFncs->save_metadata(segment->name, child->name,
					    lsn, sector_count, buffer);
	} else {
		rc = WRITE(child, lsn, sector_count, buffer);
	}

	EngFncs->engine_free(buffer);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * remap_lsn
 *
 * Called to remap a bad block LSN to a replacement block LSN.
 *
 * Returns: Success: LSN of the replacement block
 *          Failure: Invalid LSN (LSN==0)
 **/
static lsn_t remap_lsn(storage_object_t *segment,
		       bbr_private_data_t *pdata,
		       lsn_t lsn)
{
	evms_bbr_table_t *table = pdata->bbr_table;
	lsn_t replacement_sect;
	int i,j;

	LOG_ENTRY();

	for (i = 0; i < pdata->bbr_table_size_in_sectors; i++, table++) {
		if (table->in_use_cnt >= EVMS_BBR_ENTRIES_PER_SECT) {
			continue;
		}

		for (j = 0; j < EVMS_BBR_ENTRIES_PER_SECT; j++) {
			if (table->entries[j].bad_sect != 0 ||
			    table->entries[j].replacement_sect != 0) {
				continue;
			}

			replacement_sect = get_next_avail_repl_block_lsn(pdata);
			if (replacement_sect) {
				/* Update bbr table. */
				table->entries[j].bad_sect = lsn;
				table->entries[j].replacement_sect = replacement_sect;
				table->in_use_cnt++;

				/* If not a new bbr segment then we need to
				 * sync the bbr table with the on-disk copy.
				 */
				if (!(segment->flags & SOFLAG_NEW)) {
					write_bbr_table(segment, pdata->child,
						        pdata->bbr_table_lsn1,
						        pdata->bbr_table_size_in_sectors,
						        pdata->bbr_table, FALSE);

					write_bbr_table(segment, pdata->child,
						        pdata->bbr_table_lsn2,
						        pdata->bbr_table_size_in_sectors,
						        pdata->bbr_table, FALSE);
				}
			}

			LOG_EXIT_U64(replacement_sect);
			return replacement_sect;
		}
	}

	LOG_ERROR("Error: No replacement blocks available.\n");
	LOG_EXIT_INT(0);
	return 0;
}

/**
 * i_can_modify_object
 *
 * Called to test if we own the specified segment. This is also an
 * opportunity to place code here to further inspect an object prior
 * to making any changes to it.
 **/
static boolean i_can_modify_object(storage_object_t *segment)
{
	bbr_private_data_t *pdata;
	boolean result = FALSE;

	LOG_ENTRY();

	if (segment) {
		/* Check that we own this storage object. */
		if (segment->plugin == my_plugin_record) {
			/* Check that we have private data for the object. */
			if (segment->private_data) {
				pdata = segment->private_data;
				/* Check for a BBR signature. */
				if (pdata->signature == EVMS_BBR_SIGNATURE) {
					result = TRUE;
				}
			}
		}
	}

	LOG_EXIT_BOOL(result);
	return result;
}

/**
 * get_child_useable_size
 *
 * Get the useable area of a BBR child object.  Round the useable area
 * down to a block boundary to prevent higher layers from laying down
 * an evms feature header on an unaccessible sector because the i/o is
 * done through the VFS in block size amounts.
 **/
static sector_count_t get_child_useable_size(storage_object_t *parent,
					     storage_object_t *child)
{
	bbr_private_data_t *pdata = parent->private_data;
	sector_count_t child_useable_size;
	sector_count_t metadata_sectors;
	sector_count_t blocksize;

	LOG_ENTRY();

	/* Bytes to vsectors. */
	blocksize = child->geometry.block_size >> EVMS_VSECTOR_SIZE_SHIFT;
	metadata_sectors = pdata->replacement_blocks_lsn +
			   pdata->replacement_blocks_size_in_sectors;
	child_useable_size = child->size - metadata_sectors;
	/* Round down to block boundary. */
	child_useable_size -= child_useable_size % blocksize;

	LOG_EXIT_U64(child_useable_size);
	return child_useable_size;
}

/**
 * validate_sectors
 *
 * Called to validate that a run of disk sectors are Ok. Just a simple read
 * test on the specified run of sectors on the storage object.
 *
 * Returns the number of sectors we read successfully, prior to an error or
 * successful completion.
 **/
static int validate_sectors(storage_object_t *object, u_int64_t start,
			    u_int64_t count, u_int64_t *good_sector_count)
{
	char buffer[EVMS_VSECTOR_SIZE];
	int use_progress, i, rc;
	u_int64_t lsn = start;
	progress_t progress;

	LOG_ENTRY();

	*good_sector_count = 0;

	/* Use the progress service if the UI supports it. Otherwise,
	 * use the user message service to provide some status.
	 */
	memset(&progress, 0, sizeof(progress));
	progress.title = "Performing I/O tests on replacements blocks...";
	progress.description = "";
	progress.type = DISPLAY_PERCENT;
	progress.total_count = count;
	progress.plugin_private_data = NULL;
	progress.ui_private_data = NULL;

	rc = EngFncs->progress(&progress);
	use_progress = (rc == 0);
	if (!use_progress) {
		MESSAGE(_("Performaing I/O tests on replacement blocks for "
			  "object %s. This will take a moment or two.\n"),
			object->name );
	}

	for (i = 0, rc = 0; i < count && rc == 0; i++, lsn++) {
		if (use_progress && !(i % 100)) {
			progress.count = i;
			EngFncs->progress(&progress);
		}

		rc = READ(object, lsn, 1, buffer);
		if (!rc) {
			(*good_sector_count)++;
		}
	}

	if (use_progress) {
		progress.title = "Finished testing replacement blocks.";
		progress.count = count;

		EngFncs->progress(&progress);
	} else {
		MESSAGE(_("Finished testing replacement blocks.\n"));
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * initialize_bbr_table
 *
 * This routine is called to initialize a new bbr mapping table
 * that doesnt map any replacement sectors.
 **/
static void initialize_bbr_table(evms_bbr_table_t *bbr_table,
				 u_int64_t sector_count)
{
	evms_bbr_table_t *table = bbr_table;
	int i;

	LOG_ENTRY();
	LOG_DEBUG("Table addr = %p, sector count = %"PRIu64"\n",
		  bbr_table, sector_count);

	for (i = 0; i < sector_count; i++, table++) {
		table->signature = EVMS_BBR_TABLE_SIGNATURE;
	}

	LOG_EXIT_VOID();
}

/**
 * roundup_to_hardsect_boundary
 *
 * Returns the specified LSN rounded up to a device sector boundary.
 **/
static lsn_t roundup_to_hardsect_boundary(storage_object_t *child, lsn_t lsn)
{
	lsn_t hardsector_alligned_lsn = lsn;
	int vsects_per_hardsect;

	LOG_ENTRY();

	vsects_per_hardsect = child->geometry.bytes_per_sector >>
			      EVMS_VSECTOR_SIZE_SHIFT;

	if ((lsn + child->start) % vsects_per_hardsect) {
		hardsector_alligned_lsn = lsn -
					  ((lsn + child->start) %
					   vsects_per_hardsect) +
					  vsects_per_hardsect;
	}

	LOG_EXIT_U64(hardsector_alligned_lsn);
	return hardsector_alligned_lsn;
}

/**
 * create_bbr_metadata
 *
 * Called to fill in BBR metadata info for a BBR segment. This routine is
 * called from the create API code. The goal is to reserve no more than 1%
 * of the region for replacement blocks. The minimum amount we'll reserve
 * is 63 sectors ... about a track of disk space in most cases.
 **/
static int create_bbr_metadata(storage_object_t *parent,
			       storage_object_t *child)
{
	bbr_private_data_t *pdata = parent->private_data;
	u_int64_t good_sectors;
	int vsects_per_hardsect, rc = 0;

	LOG_ENTRY();

	vsects_per_hardsect = child->geometry.bytes_per_sector >>
			      EVMS_VSECTOR_SIZE_SHIFT;

	if (vsects_per_hardsect != 1 &&
	    vsects_per_hardsect != 2 &&
	    vsects_per_hardsect != 4 &&
	    vsects_per_hardsect != 8 &&
	    vsects_per_hardsect != 16) {
		/* Not a power-of-2. */
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	/* Size of replacement blocks. */
	pdata->block_size = EVMS_VSECTOR_SIZE;

	/* Figure out the number of replacement blocks needed.
	 * Default amount, trying for 1% of the storage region.
	 */
	pdata->replacement_blocks_needed = child->size / 100;

	/* Enforce MINIMUM and MAXIMUM and HARD SECTOR MULTIPLE rules. */
	if (pdata->replacement_blocks_needed < BBR_MIN_REPLACEMENT_BLOCKS) {
		pdata->replacement_blocks_needed = BBR_MIN_REPLACEMENT_BLOCKS;
	} else if (pdata->replacement_blocks_needed > BBR_MAX_REPLACEMENT_BLOCKS) {
		pdata->replacement_blocks_needed = BBR_MAX_REPLACEMENT_BLOCKS;
	} else if (pdata->replacement_blocks_needed % vsects_per_hardsect) {
		pdata->replacement_blocks_needed -= pdata->replacement_blocks_needed %
						    vsects_per_hardsect;
		pdata->replacement_blocks_needed += vsects_per_hardsect;
	}

	/* Total number of sectors we need. */
	pdata->replacement_blocks_size_in_sectors = pdata->replacement_blocks_needed;

	/* Sectors being used. */
	pdata->bbr_table_size_in_sectors = pdata->replacement_blocks_needed /
					   EVMS_BBR_ENTRIES_PER_SECT;
	if (pdata->replacement_blocks_needed % EVMS_BBR_ENTRIES_PER_SECT) {
		pdata->bbr_table_size_in_sectors++;
	}

	/* Force mapping tables to be a multiple of the hardsector size to
	 * prevent partial sector i/o requirements in the kernel plug-in.
	 */
	if (pdata->bbr_table_size_in_sectors % vsects_per_hardsect) {
		pdata->bbr_table_size_in_sectors -= pdata->bbr_table_size_in_sectors %
						    vsects_per_hardsect;
		pdata->bbr_table_size_in_sectors += vsects_per_hardsect;
	}

	/* Set bbr table metadata fields ... skip boot sector and 2 metadata
	 * sectors at start of the storage object.
	 */
	pdata->bbr_table_lsn1 = roundup_to_hardsect_boundary(child, 3);
	pdata->bbr_table_lsn2 = pdata->bbr_table_lsn1 +
				pdata->bbr_table_size_in_sectors;
	pdata->replacement_blocks_lsn = pdata->bbr_table_lsn2 +
					pdata->bbr_table_size_in_sectors;

	/* Validate that we are not using BAD replacement sectors. */
	rc = validate_sectors(child, pdata->replacement_blocks_lsn,
			      pdata->replacement_blocks_size_in_sectors,
			      &good_sectors);
	if (rc) {
		LOG_ERROR("Unable to lay down requested number of replacement "
			  "sectors, only first %"PRIu64" sectors were Ok.\n",
			  good_sectors);
		LOG_EXIT_INT(rc);
		return rc;
	}

	/* Alloc a bbr table. */
	pdata->bbr_table = EngFncs->engine_alloc(pdata->bbr_table_size_in_sectors *
						 EVMS_VSECTOR_SIZE);
	if (!pdata->bbr_table) {
		LOG_ERROR("Unable to allocate a new BBR mapping table of %"
			  PRIu64" sectors.\n", pdata->bbr_table_size_in_sectors);
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	initialize_bbr_table(pdata->bbr_table, pdata->bbr_table_size_in_sectors);

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * build_metadata
 *
 * Called to build a BBR metadata structure from the BBR private data
 * info. It will build a copy of the metadata that can be layed down
 * at location ... 1 and 2
 *
 * The second table location can be figured by adding the size of the table
 * to table 1's lsn.
 *
 * Layout:
 *   _____________________________________________________________________
 *  |  Boot  | Metadata | Metadata | Table | Table | Replacement | Data ...
 *  | Sector |    1     |    2     |   1   |   2   |   Sectors   |
 *  |________|__________|__________|_______|_______|____________ |_______
 **/
static void build_metadata(storage_object_t *segment,
			   evms_bbr_metadata_t *metadata)
{

	bbr_private_data_t *pdata = segment->private_data;

	LOG_ENTRY();
	LOG_DEBUG("Constructing metadata for segment %s.\n", segment->name);

	memset(metadata, 0, sizeof(*metadata));

	metadata->signature = EVMS_BBR_SIGNATURE;

	/* BBR table info. */
	metadata->start_sect_bbr_table = pdata->bbr_table_lsn1;
	metadata->nr_sects_bbr_table = pdata->bbr_table_size_in_sectors;

	/* Replacement sector info. */
	metadata->start_replacement_sect = pdata->replacement_blocks_lsn;
	metadata->nr_replacement_blks = pdata->replacement_blocks_needed;

	/* Block size. */
	metadata->block_size = pdata->block_size;

	/* Enabled or disabled setup could be changing. */
	if (pdata->bbr_state & BBR_CHANGE_STATE) {
		if (pdata->bbr_state & BBR_ACTIVATE) {
			metadata->bbr_active |= BBR_ENABLED;
		} else {
			metadata->bbr_active &= ~BBR_ENABLED;
		}
	} else {
		metadata->bbr_active = pdata->bbr_state;
	}

	strncpy(metadata->uuid, segment->uuid, EVMS_NAME_SIZE);

	/* Segment size and start. */
	metadata->data_size = segment->size;
	metadata->data_start = segment->start;

	LOG_EXIT_VOID();
}

/**
 * disk_bbr_table_to_cpu_table
 *
 * This routine is called to convert a bbr table from disk format
 * to cpu format
 **/
static void disk_bbr_table_to_cpu_table(evms_bbr_table_t *bbr_table,
					sector_count_t sector_count)
{
	evms_bbr_table_t *table;
	sector_count_t i;
	int j;

	LOG_ENTRY();

	for (i = 0, table = bbr_table; i < sector_count; i++, table++) {
		table->signature       = DISK_TO_CPU32(table->signature);
		table->crc             = DISK_TO_CPU32(table->crc);
		table->sequence_number = DISK_TO_CPU32(table->sequence_number);
		table->in_use_cnt      = DISK_TO_CPU32(table->in_use_cnt);

		for (j = 0; j < EVMS_BBR_ENTRIES_PER_SECT; j++) {
			table->entries[j].bad_sect = DISK_TO_CPU64(table->entries[j].bad_sect);
			table->entries[j].replacement_sect = DISK_TO_CPU64(table->entries[j].replacement_sect);
		}
	}

	LOG_EXIT_VOID();
}

/**
 * read_bbr_table
 *
 *  Called to read the BBR table. The LBA of the table is obtained
 *  from the BBR metadata.
 *
 *  Returns: BBR Table in the supplied buffer if RC=0
 **/
static int read_bbr_table(storage_object_t *object, void *bbr_table,
			  u_int64_t lsn, u_int64_t count)
{
	u_int32_t crc, calculated_crc;
	evms_bbr_table_t *table;
	u_int64_t i;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Reading BBR table at lsn %"PRIu64", nr_sects %"PRIu64"\n",
		  lsn, count);

	/* Read the sectors off of the disk. */
	rc = READ(object, lsn, count, bbr_table);
	if (rc) {
		LOG_EXIT_INT(rc);
		return rc;
	}

	/* Check signature and crc of each sector in table. */
	for (i = 0, table = bbr_table; i < count && !rc; i++, table++) {
		if (DISK_TO_CPU32(table->signature) == EVMS_BBR_TABLE_SIGNATURE) {
			crc = DISK_TO_CPU32(table->crc);
			table->crc = 0;
			calculated_crc = EngFncs->calculate_CRC(EVMS_INITIAL_CRC,
								table, EVMS_VSECTOR_SIZE);
			table->crc = CPU_TO_DISK32(crc);

			if (crc == calculated_crc) {
				rc = 0;
			} else {
				LOG_ERROR("CRC failed on bbr_table[%"PRIu64"]: "
					  "expected %X, calculated %X.\n",
					  i, crc, calculated_crc);
				rc = ENODATA;
			}
		} else {
			LOG_ERROR("Not our signature\n");
			rc = ENODATA;
		}
	}

	if (!rc) {
		disk_bbr_table_to_cpu_table(bbr_table, count);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_bbr_table
 *
 * Called to get a copy of the BBR table. The LBA of the table is obtained
 * from the BBR private data.
 *
 * BBR Table memory is allocated by caller!!
 *
 * Returns: BBR Table in the supplied buffer if RC=0
 **/
static int get_bbr_table(storage_object_t *child,
			 void *bbr_table,
			 sector_count_t count,
			 lba_t bbr_table_lsn1,
			 lba_t bbr_table_lsn2)
{
	evms_bbr_table_t *t1, *t2, *table1, *table2 = NULL;
	int i, rc = 0, rc1, rc2 = ENODATA;

	LOG_ENTRY();

	/* Read table 1. */
	table1 = EngFncs->engine_alloc(count * EVMS_VSECTOR_SIZE);
	if (table1) {
		rc1 = read_bbr_table(child, table1, bbr_table_lsn1, count);
	} else {
		rc1 = ENOMEM;
	}

	/* Read table 2 ... if we have a second copy. */
	if (bbr_table_lsn1 != bbr_table_lsn2 &&
	    bbr_table_lsn2 != 0) {
		table2 = EngFncs->engine_alloc(count * EVMS_VSECTOR_SIZE);
		if (table2) {
			rc2 = read_bbr_table(child, table2, bbr_table_lsn2, count);
		} else {
			rc2 = ENOMEM;
		}
	}

	if (!rc1 && !rc2){
		/* Combine the two BBR mapping tables into one.
		 * Merge 2 good tables into 1, using the most recent copy of
		 * each sector in the BBR table by inspecting the sequence
		 * numbers and sector used counters. The most recent copy of
		 * a BBR table sector should have a higher sequence number but
		 * if the kernel didn't update this field yet ... then the
		 * in_use counter should tell us which is the most recent sector.
		 */
		t1 = table1;
		t2 = table2;
		for (i = 0; i < count; i++) {
			if (t2->sequence_number > t1->sequence_number ||
			    t2->in_use_cnt > t1->in_use_cnt) {
				memcpy(t1, t2, sizeof(*t1));
			}
			t1++;
			t2++;
		}

		memcpy(bbr_table, table1, count * EVMS_VSECTOR_SIZE);
	} else if (!rc1) {
		memcpy(bbr_table, table1, count * EVMS_VSECTOR_SIZE);
	} else if (!rc2) {
		memcpy(bbr_table, table2, count * EVMS_VSECTOR_SIZE);
	} else {
		rc = ENODATA;
	}

	EngFncs->engine_free(table1);
	EngFncs->engine_free(table2);

	LOG_EXIT_INT(rc);
	return rc;
}

static void disk_metadata_to_cpu(evms_bbr_metadata_t *metadata)
{
	LOG_ENTRY();
	metadata->signature		= DISK_TO_CPU32(metadata->signature);
	metadata->crc			= DISK_TO_CPU32(metadata->crc);
	metadata->block_size		= DISK_TO_CPU32(metadata->block_size);
	metadata->flags			= DISK_TO_CPU32(metadata->flags);
	metadata->sequence_number	= DISK_TO_CPU64(metadata->sequence_number);
	metadata->start_sect_bbr_table	= DISK_TO_CPU64(metadata->start_sect_bbr_table);
	metadata->nr_sects_bbr_table	= DISK_TO_CPU64(metadata->nr_sects_bbr_table);
	metadata->start_replacement_sect= DISK_TO_CPU64(metadata->start_replacement_sect);
	metadata->nr_replacement_blks	= DISK_TO_CPU64(metadata->nr_replacement_blks);
	metadata->bbr_active		= DISK_TO_CPU32(metadata->bbr_active);
	metadata->data_start		= DISK_TO_CPU64(metadata->data_start);
	metadata->data_size		= DISK_TO_CPU64(metadata->data_size);
	LOG_EXIT_VOID();
}

static void cpu_metadata_to_disk(evms_bbr_metadata_t *metadata)
{
	LOG_ENTRY();
	metadata->signature		= CPU_TO_DISK32(metadata->signature);
	metadata->crc			= CPU_TO_DISK32(metadata->crc);
	metadata->block_size		= CPU_TO_DISK32(metadata->block_size);
	metadata->flags			= CPU_TO_DISK32(metadata->flags);
	metadata->sequence_number	= CPU_TO_DISK64(metadata->sequence_number);
	metadata->start_sect_bbr_table	= CPU_TO_DISK64(metadata->start_sect_bbr_table);
	metadata->nr_sects_bbr_table	= CPU_TO_DISK64(metadata->nr_sects_bbr_table);
	metadata->start_replacement_sect= CPU_TO_DISK64(metadata->start_replacement_sect);
	metadata->nr_replacement_blks	= CPU_TO_DISK64(metadata->nr_replacement_blks);
	metadata->bbr_active		= CPU_TO_DISK32(metadata->bbr_active);
	metadata->data_start		= CPU_TO_DISK64(metadata->data_start);
	metadata->data_size		= CPU_TO_DISK64(metadata->data_size);
	LOG_EXIT_VOID();
}

/**
 * read_metadata
 *
 * Called to read the BBR metadata into the specified buffer.
 **/
static int read_metadata(storage_object_t *object,
			 evms_bbr_metadata_t *metadata)
{
	evms_bbr_metadata_t metadata2;
	u_int32_t crc, calculated_crc;
	char *buffer;
	int rc1, rc2;
	int rc = 0;

	LOG_ENTRY();

	/* Allocate an I/O buffer, since the metadata structure
	 * isn't a full sector.
	 */
	buffer = EngFncs->engine_alloc(EVMS_VSECTOR_SIZE);
	if (!buffer) {
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	/* Read the metadata from sectors 1 and 2. Sector 0 is skipped. */
	rc1 = READ(object, 1, 1, buffer);
	memcpy(metadata, buffer, sizeof(*metadata));

	rc2 = READ(object, 2, 1, buffer);
	memcpy(&metadata2, buffer, sizeof(metadata2));

	EngFncs->engine_free(buffer);

	LOG_DEBUG("Read metadata. 1st copy rc = %d. 2nd copy rc = %d\n",
		  rc1, rc2);

	/* Check both copies of the metadata for a BBR signature and a valid
	 * CRC. If these checks pass, convert the metadata to CPU order.
	 */
	if (!rc1) {
		if (DISK_TO_CPU32(metadata->signature) == EVMS_BBR_SIGNATURE) {
			crc = DISK_TO_CPU32(metadata->crc);
			metadata->crc = 0;
			calculated_crc = EngFncs->calculate_CRC(EVMS_INITIAL_CRC,
								metadata,
								sizeof(*metadata));
			metadata->crc = CPU_TO_DISK32(crc);

			if (crc == 0 || crc == calculated_crc) {
				disk_metadata_to_cpu(metadata);
			} else {
				rc1 = ENODATA;
			}
		} else {
			rc1 = ENODATA;
		}
	}

	if (!rc2) {
		if (DISK_TO_CPU32(metadata2.signature) == EVMS_BBR_SIGNATURE) {
			crc = DISK_TO_CPU32(metadata2.crc);
			metadata2.crc = 0;
			calculated_crc = EngFncs->calculate_CRC(EVMS_INITIAL_CRC,
								&metadata2,
								sizeof(metadata2));
			metadata2.crc = CPU_TO_DISK32(crc);

			if (crc == 0 || crc == calculated_crc) {
				disk_metadata_to_cpu(&metadata2);
			} else {
				rc2 = ENODATA;
			}
		} else {
			rc2 = ENODATA;
		}
	}

	/* If both versions of the metadata are Ok then choose between the
	 * two copies using the sequence number. This number is incremented
	 * every time the kernel makes a change to the metadata.  The highest
	 * number will be the most recent version. Normally both numbers will
	 * be the same ... but just in case ... choose highest number.
	 */
	if (!rc1 && !rc2) {
		LOG_DEBUG("Both copies of metadata are Ok.\n");
		LOG_DEBUG("  seq_number_1 = %"PRIu64" seq_number_2 = %"PRIu64"\n",
			  metadata->sequence_number, metadata2.sequence_number);

		if (metadata->sequence_number >= metadata2.sequence_number) {
			LOG_DEBUG("Using 1st copy. Sequence numbers are same or 1st is > 2nd\n");
		} else {
			LOG_DEBUG("Using 2nd copy of metadata.\n");
			memcpy(metadata, &metadata2, sizeof(*metadata));
		}
	} else if (!rc1) {
		/* Only the first copy is good. We dont have a choice to make. */
		LOG_DEBUG("Using 1st copy of metadata. 2nd is missing or bad.\n");
	} else if (!rc2) {
		/* Olny the second copy is good. We need to copy
		 * the data to the caller's metadata buffer
		 */
		LOG_DEBUG("Using 2nd copy of metadata. 1st is missing or bad.\n");
		memcpy(metadata, &metadata2, sizeof(*metadata));
	} else {
		/* Neither copy is any good. */
		LOG_DEBUG("No BBR metadata found, or both copies "
			  "of metadata are bad.\n");
		rc = ENODATA;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * write_metadata
 *
 * Called to write BBR metadata out to disk.
 **/
static int write_metadata(storage_object_t *segment,
			  evms_bbr_metadata_t *metadata,
			  commit_phase_t commit_phase,
			  boolean backup)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	char *buffer;
	u_int32_t crc;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Writing metadata for segment %s.\n", segment->name);

	buffer = EngFncs->engine_alloc(EVMS_VSECTOR_SIZE);
	if (!buffer) {
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	if (commit_phase == FIRST_METADATA_WRITE) {

		/* 1st copy of metadata points to the 1st copy of BBR mapping table. */
		metadata->start_sect_bbr_table = pdata->bbr_table_lsn1;

		/* Convert metadata to disk format. */
		cpu_metadata_to_disk(metadata);

		/* CRC the 1st copy of metadata. */
		metadata->crc = 0;
		crc = EngFncs->calculate_CRC(EVMS_INITIAL_CRC, metadata,
					     EVMS_VSECTOR_SIZE);
		metadata->crc = CPU_TO_DISK32(crc);

		LOG_DEBUG("Phase 1. Writing metadata to LSN 1.\n");

		memcpy(buffer, metadata, sizeof(*metadata));
		if (backup) {
			rc  = EngFncs->save_metadata(segment->name, child->name,
						     1, 1, buffer);
		} else {
			rc  = WRITE(child, 1, 1, buffer);
		}

		/* Write out the first copy of the bbr mapping table. */
		if (pdata->bbr_table) {
			rc += write_bbr_table(segment, child,
					      pdata->bbr_table_lsn1,
					      pdata->bbr_table_size_in_sectors,
					      pdata->bbr_table, backup);
		}
	} else if (commit_phase == SECOND_METADATA_WRITE) {

		/* 2nd copy of metadata points to the 2nd copy of BBR mapping table. */
		metadata->start_sect_bbr_table = pdata->bbr_table_lsn2;

		/* Convert metadata to disk format. */
		cpu_metadata_to_disk(metadata);

		/* CRC the 1st copy of metadata. */
		metadata->crc = 0;
		crc = EngFncs->calculate_CRC(EVMS_INITIAL_CRC, metadata,
					     EVMS_VSECTOR_SIZE);
		metadata->crc = CPU_TO_DISK32(crc);

		LOG_DEBUG("Phase 2. Writing metadata to LSN 2.\n");

		memcpy(buffer, metadata, sizeof(*metadata));
		if (backup) {
			rc  = EngFncs->save_metadata(segment->name, child->name,
						     2, 1, buffer);
		} else {
			rc  = WRITE(child, 2, 1, buffer);
		}

		/* Write out the second copy of the bbr mapping table. */
		if (pdata->bbr_table) {
			rc = write_bbr_table(segment, child,
					     pdata->bbr_table_lsn2,
					     pdata->bbr_table_size_in_sectors,
					     pdata->bbr_table, backup);
		}
	}

	EngFncs->engine_free(buffer);
	LOG_EXIT_INT(rc);
	return rc;
}



/**
 * free_bbr_segment
 *
 * Called to free up a BBR segment. Unregister the UUID, free all the
 * private data, and call the engine to free the segment itself.
 **/
static void free_bbr_segment(storage_object_t *segment)
{
	bbr_private_data_t *pdata;

	LOG_ENTRY();

	if (segment) {
		if (strlen(segment->uuid)) {
			EngFncs->unregister_name(segment->uuid);
		}

		pdata = segment->private_data;
		if (pdata) {
			EngFncs->engine_free(pdata->bbr_table);
			EngFncs->engine_free(pdata);
			segment->private_data = NULL;
		}

		EngFncs->free_segment(segment);
	}

	LOG_EXIT_VOID();
}

/**
 * malloc_bbr_segment
 *
 * Called to obtain memory for a BBR storage, allocating any private data
 * areas needed in addition to the storage object struct itself.
 *
 * Returns: ptr to a BBR segment if successful, NULL if not successful
 **/
static storage_object_t *malloc_bbr_segment(void)
{
	storage_object_t *segment = NULL;
	bbr_private_data_t *pdata;
	int rc;

	LOG_ENTRY();

	rc = EngFncs->allocate_segment(NULL, &segment);
	if (!rc) {
		pdata = EngFncs->engine_alloc(sizeof(*pdata));
		if (pdata) {
			segment->plugin = my_plugin_record;
			segment->private_data = pdata;
			segment->object_type = SEGMENT;
			segment->data_type = DATA_TYPE;
			pdata->signature = EVMS_BBR_SIGNATURE;
		} else {
			EngFncs->free_segment(segment);
			segment = NULL;
		}
	}

	LOG_EXIT_PTR(segment);
	return segment;
}

/**
 * kill_sectors
 *
 * Process the kill sectors list.
 **/
static int kill_sectors(storage_object_t *segment)
{
	bbr_private_data_t *pdata = segment->private_data;
	kill_sectors_t *ks_prev, *ks = pdata->kill_sector_list_head;
	sector_count_t buffer_size = 0;
	char *buffer = NULL;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Processing kill sectors for segment %s.\n", segment->name);

	/* Copy the kill sector list head and NULL out the gloabal variable.
	 * This function uses BBR_write() to write out the kill sectors,
	 * but BBR_write() has a check to write kill sectors before it does
	 * any writing.  We could end up in infinite recursion between
	 * kill_sectors() and BBR_write(). By having this function remove
	 * the kill sectors from the list the recursion is stopped.
	 */
	pdata->kill_sector_list_head = NULL;

	while (!rc && ks) {
		if (buffer_size < ks->count) {
			EngFncs->engine_free(buffer);
			buffer = EngFncs->engine_alloc(ks->count *
						       EVMS_VSECTOR_SIZE);
			if (!buffer) {
				buffer_size = 0;
				rc = ENOMEM;
				break;
			}
			buffer_size = ks->count;
		}

		LOG_DEBUG("Killing %"PRIu64" sectors at sector "
			  "offset %"PRIu64".\n", ks->count, ks->lsn);
		rc = bbr_write(segment, ks->lsn, ks->count, buffer);

		ks_prev = ks;
		ks = ks->next;
		EngFncs->engine_free(ks_prev);
	}

	EngFncs->engine_free(buffer);
	LOG_EXIT_INT(rc);
	return(rc);
}

/**
 * commit_bbr_segment
 *
 * Write the metadata to the specified BBR segment.
 **/
static int commit_bbr_segment(storage_object_t *segment,
			      commit_phase_t commit_phase)
{
	bbr_private_data_t *pdata = segment->private_data;
	evms_bbr_metadata_t metadata;
	int rc = 0;

	LOG_ENTRY();

	if (!(segment->flags & SOFLAG_DIRTY)) {
		LOG_EXIT_INT(0);
		return 0;
	}

	if (pdata->kill_sector_list_head) {
		kill_sectors(segment);
	}

	memset(&metadata, 0, sizeof(metadata));
	build_metadata(segment, &metadata);
	rc = write_metadata(segment, &metadata, commit_phase, FALSE);

	if (!rc && commit_phase == SECOND_METADATA_WRITE) {
		segment->flags &= ~(SOFLAG_DIRTY | SOFLAG_NEW);
		pdata->bbr_state &= ~(BBR_CHANGE_STATE | BBR_ACTIVATE);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_get_devmap_info
 *
 * Called to test if the BBR segment has an active device mapper
 * node in the kernel and set the object info accordingly.
 **/
static void bbr_get_devmap_info(storage_object_t *segment)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	dm_target_t *trgts;
	int rc;

	LOG_ENTRY();

	/* Ask the kernel if this segment is active. */
	EngFncs->dm_update_status(segment);

	if (!(segment->flags & SOFLAG_ACTIVE)) {
		LOG_EXIT_VOID();
		return;
	}

	/* If the segment is active, check that the mapping is correct. */
	rc = EngFncs->dm_get_targets(segment, &trgts);
	if (rc ||
	    trgts->next != NULL ||
	    trgts->start != 0 ||
	    trgts->length != segment->size) {
		LOG_DEBUG("%s: The basic table info in the kernel is "
			  "incorrect\n", segment->name);
		segment->flags |= SOFLAG_NEEDS_ACTIVATE;

	} else if (pdata->bbr_state & BBR_ENABLED) {
		if (trgts->type != DM_TARGET_BBR ||
		    trgts->data.bbr->device.major != child->dev_major ||
		    trgts->data.bbr->device.minor != child->dev_minor ||
		    trgts->data.bbr->device.start != segment->start ||
		    trgts->data.bbr->table1_lba != pdata->bbr_table_lsn1 ||
		    trgts->data.bbr->table2_lba != pdata->bbr_table_lsn2 ||
		    trgts->data.bbr->replacement_blocks_lba != pdata->replacement_blocks_lsn ||
		    trgts->data.bbr->table_size != pdata->bbr_table_size_in_sectors ||
		    trgts->data.bbr->num_replacement_blocks != pdata->replacement_blocks_needed ||
		    trgts->data.bbr->block_size != pdata->block_size) {
			LOG_DEBUG("%s: Some BBR-specific table info in the "
				  "kernel is incorrect\n", segment->name);
			segment->flags |= SOFLAG_NEEDS_ACTIVATE;
		}
	} else if (trgts->type != DM_TARGET_LINEAR ||
		   trgts->data.linear->major != child->dev_major ||
		   trgts->data.linear->minor != child->dev_minor ||
		   trgts->data.linear->start != segment->start) {
		LOG_DEBUG("%s: Some Linear-specific table info in the kernel "
			  "is incorrect\n", segment->name);
		segment->flags |= SOFLAG_NEEDS_ACTIVATE;
	}

	EngFncs->dm_deallocate_targets(trgts);

	LOG_EXIT_VOID();
}

/**
 * consume_storage_object
 *
 * This is how a feature applies itself to a storage object, either
 * during discovery or during create. It inserts the child object in
 * the parent storage object list. It then updates fields in the two
 * objects to denote this relationship.
 **/
static void consume_storage_object(storage_object_t *parent,
				   storage_object_t *child)
{
	LOG_ENTRY();

	/* Consumed object is inserted into our child list. */
	EngFncs->insert_thing(child->parent_objects, parent,
			      INSERT_BEFORE, NULL);

	/* New BBR segment is inserted into child's parent list. */
	EngFncs->insert_thing(parent->child_objects, child,
			      INSERT_BEFORE, NULL);

	LOG_EXIT_VOID();
}

/**
 * set_bbr_segment_size
 *
 * Get the size and starting offset of the BBR segment from the metadata. These
 * fields were added to the metadata for EVMS 2.4.1. Earlier versions do not
 * have these fields in the metadata. If we find a zero size or start, we need
 * to calculate the size and start using the previous method. If we do this,
 * we also need to prompt the user to update the metadata to add these fields.
 **/
static int set_bbr_segment_size(storage_object_t *segment,
				storage_object_t *child,
				evms_bbr_metadata_t *metadata)
{
	char *choices[] = {"Don't Update", "Update", NULL};
	int answer = 0, rc = 0;

	LOG_ENTRY();

	segment->size = metadata->data_size;
	segment->start = metadata->data_start;

	if (segment->size == 0) {
		/* We've got old metadata that doesn't contain the new size and
		 * start fields. Use the old method to calculate the size.
		 */
		segment->size = get_child_useable_size(segment, child);
		segment->start = child->size - segment->size;

		QUESTION(&answer, choices,
			 _("BBR metadata update required for %s!\n\n"

			   "As of EVMS 2.4.1, an update was made to the "
			   "metadata for BBR segments. This update is "
			   "necessary to fix differences in how the size of "
			   "BBR segments are calculated on 2.6 kernels as "
			   "compared to 2.4 kernels.\n\n"

			   "Only update the BBR metadata if all your volumes "
			   "have been discovered correctly. If necessary, skip "
			   "the update right now, check your volume "
			   "configuration, and restart the EVMS UI to complete "
			   "the update.\n\n"

			   "If you notice any problems with your volume "
			   "configuration, please revert back to a known "
			   "working version of both EVMS and the Linux kernel. "
			   "Once you are back to a working configuration, "
			   "update to the latest version of EVMS without "
			   "changing kernel versions. Then update the metadata "
			   "for your BBR segments.\n\n"

			   "Should EVMS update the metadata for segment %s?"),
			   segment->name, segment->name);

		if (answer) {
			segment->flags |= SOFLAG_DIRTY;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * build_bbr_segment
 *
 * This routine expects to be called only during discovery with a child
 * storage object that has BBR installed on top of it. Meaning that the
 * child object has a feature_header that names BBR as the current top
 * most feature and that BBR metadata is layed down on the disk.
 *
 * This routine is called to create a BBR segment by consuming a child
 * storage object that contains BBR metadata and by filling in the BBR
 * private data from the feature header and feature metadata.
 **/
static int build_bbr_segment(storage_object_t *child,
			     evms_bbr_metadata_t *metadata,
			     list_anchor_t new_bbr_segments)
{
	storage_object_t *parent;
	bbr_private_data_t *pdata;
	sector_count_t bad_blocks;
	int rc;

	LOG_ENTRY();

	/* Allocate memory for the new storage object. */
	parent = malloc_bbr_segment();
	if (!parent) {
		rc = ENOMEM;
		goto out;
	}

	/* Create the name and uuid for this segment
	 * and register them with the engine.
	 */
	snprintf(parent->name, EVMS_NAME_SIZE, "%s_bbr", child->name);
	strncpy(parent->uuid, metadata->uuid, EVMS_NAME_SIZE);
	rc = EngFncs->register_name(parent->name);
	rc |= EngFncs->register_name(parent->uuid);
	if (rc) {
		LOG_ERROR("Error registering name (%s) and uuid (%s).\n",
			  parent->name, parent->uuid);
		free_bbr_segment(parent);
		goto out;
	}

	/* Copy the metadata to the private-data. */
	pdata = parent->private_data;
	pdata->child = child;
	pdata->replacement_blocks_lsn = metadata->start_replacement_sect;
	pdata->replacement_blocks_needed = metadata->nr_replacement_blks;
	pdata->replacement_blocks_size_in_sectors = metadata->nr_replacement_blks *
						    (metadata->block_size >>
						     EVMS_VSECTOR_SIZE_SHIFT);
	pdata->bbr_table_lsn1 = metadata->start_sect_bbr_table;
	pdata->bbr_table_size_in_sectors = metadata->nr_sects_bbr_table;
	pdata->bbr_table_lsn2 = pdata->bbr_table_lsn1 +
				pdata->bbr_table_size_in_sectors;
	pdata->sequence_number = metadata->sequence_number;
	pdata->block_size = metadata->block_size;
	pdata->bbr_state = metadata->bbr_active;

	LOG_DEBUG("Segment %s is in state %d\n",
		  parent->name, pdata->bbr_state);

	/* Copy the geometry from the child object. */
	memcpy(&parent->geometry, &child->geometry, sizeof(parent->geometry));

	/* Allocate the BBR table. */
	pdata->bbr_table = EngFncs->engine_alloc(pdata->bbr_table_size_in_sectors *
						 EVMS_VSECTOR_SIZE);
	if (!pdata->bbr_table) {
		LOG_ERROR("Unable to allocate a new BBR mapping table for "
			  "segment %s.\n", parent->name);
		free_bbr_segment(parent);
		rc = ENOMEM;
		goto out;
	}

	/* Read the BBR table from disk. */
	rc = get_bbr_table(child, pdata->bbr_table,
			   pdata->bbr_table_size_in_sectors,
			   pdata->bbr_table_lsn1, pdata->bbr_table_lsn2);
	if (rc) {
		LOG_ERROR("Failed to read a good BBR mapping table for "
			  "segment %s.\n", parent->name);
		free_bbr_segment(parent);
		goto out;
	}

	set_bbr_segment_size(parent, child, metadata);

	consume_storage_object(parent, child);

	/* Insert the new BBR segment into a discovery list. */
	EngFncs->insert_thing(new_bbr_segments, parent, INSERT_BEFORE, NULL);

	/* Check if this BBR segment is active. */
	bbr_get_devmap_info(parent);

	/* See if the segment has remapped sectors and warn the user. */
	if (parent->flags & SOFLAG_ACTIVE) {
		bad_blocks = get_kernel_bbr_remap_sector_count(parent);
	} else {
		bad_blocks = get_engine_remap_sector_count(parent);
	}

	if (bad_blocks > 0) {
		char number_buffer[25];
		sprintf(number_buffer, "%"PRIu64, bad_blocks);
		MESSAGE(_("Warning: The kernel BBR segment manager is reporting "
			  "that %s bad sector(s) were discovered on segment %s. "
			  "Though replacement sectors are being used to remedy "
			  "the problem, you would be well advised to take "
			  "corrective actions by replacing the storage object.\n"),
			number_buffer, parent->name);
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * make_bbr
 *
 * Called from Create or Assign to create a new BBR segment on top of the
 * specified object.
 **/
static int make_bbr(storage_object_t *child)
{
	storage_object_t *parent;
	bbr_private_data_t *pdata;
	guid_t id;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Creating new BBR segment on object %s.\n", child->name);

	/* Allocate the BBR segment. */
	parent = malloc_bbr_segment();
	if (!parent) {
		rc = ENOMEM;
		goto out;
	}

	/* Copy the geometry from the child object. */
	memcpy(&parent->geometry, &child->geometry, sizeof(parent->geometry));

	/* Create a name and UUID for the new segment. */
	snprintf(parent->name, EVMS_NAME_SIZE, "%s_bbr", child->name);
	uuid_generate_random((unsigned char*)&id);
	sprintf(parent->uuid,
		"%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X",
		id.time_low, id.time_mid, id.time_high, id.clock_seq_high,
		id.clock_seq_low, id.node[0], id.node[1], id.node[2],
		id.node[3], id.node[4], id.node[5]);
	rc = EngFncs->register_name(parent->name);
	rc += EngFncs->register_name(parent->uuid);
	if (rc) {
		LOG_ERROR("Error registering name (%s) and/or uuid (%s).\n",
			  parent->name, parent->uuid);
		goto out;
	}

	pdata = parent->private_data;
	pdata->child = child;
	pdata->bbr_state = BBR_ENABLED;

	rc = create_bbr_metadata(parent, child);
	if (rc) {
		LOG_ERROR("Error creating BBR metadata for %s.\n",
			  parent->name);
		goto out;
	}

	/* Set size and start for the new segment. */
	parent->size = get_child_useable_size(parent, child);
	parent->start = child->size - parent->size;

	consume_storage_object(parent, child);

out:
	if (rc) {
		free_bbr_segment(parent);
	}
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * forward_kill_sectors
 *
 * The BBR segment is being deleted. Forward any saved kill-sector requests
 * to the child object.
 **/
static int forward_kill_sectors(storage_object_t *segment)
{
	bbr_private_data_t *pdata = segment->private_data;
	kill_sectors_t *ks = pdata->kill_sector_list_head;
	storage_object_t *child = pdata->child;
	lsn_t bbr_lsn;
	sector_count_t i;
	int rc = 0;

	LOG_ENTRY();

	while (!rc && ks) {
		ks->lsn += segment->start;

		for (i = 0, rc = 0; i < ks->count && !rc; i++) {
			bbr_lsn = get_lsn(pdata, ks->lsn + i);
			rc = KILL_SECTORS(child, bbr_lsn, 1);
		}

		pdata->kill_sector_list_head = ks->next;
		EngFncs->engine_free(ks);
		ks = pdata->kill_sector_list_head;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * unmake_bbr
 *
 * Called from delete() and unassign() to remove BBR from this child object.
 **/
static int unmake_bbr(storage_object_t *child, boolean destroy)
{
	list_element_t itr = NULL;
	storage_object_t *segment;
	bbr_private_data_t *pdata;
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Unmaking BBR segment on top of object %s.\n", child->name);

	segment = EngFncs->first_thing(child->parent_objects, &itr);
	EngFncs->delete_element(itr);

	if (segment && i_can_modify_object(segment)) {
		pdata = segment->private_data;
		if (pdata->kill_sector_list_head) {
			forward_kill_sectors(segment);
		}

		if (destroy) {
			/* Erase the metadata and BBR tables. */
			KILL_SECTORS(child, 1, 2);
			KILL_SECTORS(child, pdata->bbr_table_lsn1, 1);
			KILL_SECTORS(child, pdata->bbr_table_lsn2, 1);
		}

		free_bbr_segment(segment);

		rc = 0;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_setup_plugin
 **/
static int bbr_setup_plugin(engine_functions_t *engine_functions)
{
	EngFncs = engine_functions;
	LOG_ENTRY();
	LOG_EXIT_INT(0);
	return 0;
}

/**
 * bbr_cleanup_plugin
 **/
static void bbr_cleanup_plugin(void)
{
	LOG_ENTRY();
	LOG_EXIT_VOID();
}

/**
 * bbr_can_delete
 *
 * This segment can be deleted as long as it belongs to BBR-seg.
 **/
static int bbr_can_delete(storage_object_t *segment)
{
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Checking if segment %s can be deleted.\n", segment->name);

	if (! i_can_modify_object(segment)) {
		rc = EINVAL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_can_unassign
 **/
static int bbr_can_unassign(storage_object_t *child)
{
	storage_object_t *segment;
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Checking if BBR-seg can be unassigned from segment %s.\n",
		  child->name);

	segment = EngFncs->first_thing(child->parent_objects, NULL);
	if (segment && i_can_modify_object(segment)) {
		rc = 0;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_can_expand
 *
 * This segment can be expanded if the child object can be expanded.
 **/
static int bbr_can_expand(storage_object_t *segment,
			  sector_count_t expand_limit,
			  list_anchor_t expansion_points)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Checking if segment %s can be expanded.\n", segment->name);

	if (i_can_modify_object(segment)) {
		/* Pass the can-expand request to the child object. */
		rc = child->plugin->functions.plugin->can_expand(child,
								 expand_limit,
								 expansion_points);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_can_expand_by
 *
 * BBR segments can be expanded by any amount that the child object can
 * expand by.
 **/
static int bbr_can_expand_by(storage_object_t *segment, sector_count_t *size)
{
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Checking if segment %s can be expanded by "
		  "%"PRIu64" sectors.\n", segment->name, *size);

	if (i_can_modify_object(segment)) {
		rc = 0;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_can_shrink
 *
 * This segment can shrink if the child object can shrink.
 **/
static int bbr_can_shrink(storage_object_t *segment,
			  sector_count_t shrink_limit,
			  list_anchor_t shrink_points)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Checking if segment %s can be shrunk.\n", segment->name);

	if (i_can_modify_object(segment)) {
		/* Pass the can-shrink request to the child object. */
		rc = child->plugin->functions.plugin->can_shrink(child,
								 shrink_limit,
								 shrink_points);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_can_shrink_by
 *
 * This segment can shrink by the desired amount if it's owned by BBR-seg and
 * the delta-size is less than the size of the segment.
 **/
static int bbr_can_shrink_by(storage_object_t *segment, sector_count_t *size)
{
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Checking if segment %s can be shrunk by "
		  "%"PRIu64" sectors.\n", segment->name, *size);

	if (i_can_modify_object(segment)) {
		if (*size < segment->size) {
			rc = 0;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_can_set_volume
 *
 * This segment can become a volume as long as it belongs to BBR-seg.
 **/
static int bbr_can_set_volume(storage_object_t *segment, boolean flag)
{
	int rc = 0;

	LOG_ENTRY();

	if (! i_can_modify_object(segment)) {
		rc = EINVAL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_discover
 *
 * Called by the engine with a list of storage objects that it believes
 * are BBR segments. We are suppose to examine the objects and
 * apply the BBR feature to the feature stack on this object once we are
 * convinced that it is a valid BBR segment.  BBR is applied as follows:
 *
 * - Validate the object by looking for BBR metadata. If we can get a
 *   copy of the metadata and the BBR table then we have a valid object.
 *   Otherwise, we will fail this API call.
 *
 * - Create a new BBR segment and put the storage object from
 *   the engine list in the BBR object's child_object list. I call
 *   this consuming the child object.  Fill in BBR fields as appropriate,
 *   e.g. the useable area after we adjust down for our metadata sectors.
 **/
static int bbr_discover(list_anchor_t input_objects,
			list_anchor_t output_objects,
			boolean final_call)
{
	list_anchor_t bbr_segment_list;
	list_element_t itr;
	storage_object_t *object;
	evms_bbr_metadata_t metadata;
	int bbr_segment_count, rc;

	LOG_ENTRY();

	bbr_segment_list = EngFncs->allocate_list();
	if (!bbr_segment_list) {
		EngFncs->concatenate_lists(output_objects, input_objects);
		LOG_EXIT_INT(0);
		return 0;
	}

	/* Validate storage objects found in our discovery list.
	 * Any valid BBR child object will be pruned from the engine list.
	 * New BBR segments will be placed in the bbr_segment_list.
	 */
	LIST_FOR_EACH(input_objects, itr, object) {
		LOG_DEBUG("Examining object %s\n", object->name);

		if (object->plugin == my_plugin_record ||
		    object->data_type != DATA_TYPE) {
			EngFncs->insert_thing(output_objects, object,
					      INSERT_AFTER, NULL);
			continue;
		}

		/* Read and validate the BBR metadata. */
		rc = read_metadata(object, &metadata);
		if (rc) {
			LOG_DEBUG("BBR metadata not found on object %s\n",
				  object->name);
			EngFncs->insert_thing(output_objects, object,
					      INSERT_AFTER, NULL);
			continue;
		}

		rc = build_bbr_segment(object, &metadata, bbr_segment_list);
		if (rc) {
			EngFncs->insert_thing(output_objects, object,
					      INSERT_AFTER, NULL);
			continue;
		}
	}

	bbr_segment_count = EngFncs->list_count(bbr_segment_list);
	EngFncs->merge_lists(output_objects, bbr_segment_list, NULL, NULL);
	EngFncs->destroy_list(bbr_segment_list);

	LOG_EXIT_INT(bbr_segment_count);
	return bbr_segment_count;
}

/**
 * bbr_create
 *
 * Create a new BBR segment using the object in input_objects list. Place the
 * new BBR segment in the output_objects list.
 **/
static int bbr_create(list_anchor_t input_objects,
		      option_array_t *options,
		      list_anchor_t output_objects)
{
	storage_object_t *parent, *child;
	int rc = EINVAL;

	LOG_ENTRY();

	/* Get the first object from the input_objects list. */
	child = EngFncs->first_thing(input_objects, NULL);
	if (child) {
		rc = make_bbr(child);
		if (!rc) {
			parent = EngFncs->first_thing(child->parent_objects,
						      NULL);
			EngFncs->insert_thing(output_objects, parent,
					      INSERT_BEFORE, NULL);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_assign
 **/
static int bbr_assign(storage_object_t *child, option_array_t *options)
{
	int rc;

	LOG_ENTRY();

	rc = make_bbr(child);

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_w_delete
 **/
static int bbr_w_delete(storage_object_t *segment,
			list_anchor_t child_objects,
			boolean destroy)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Deleting segment %s.\n", segment->name);

	rc = unmake_bbr(child, destroy);
	if (!rc && child_objects) {
		EngFncs->insert_thing(child_objects, child,
				      INSERT_BEFORE, NULL);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_discard
 *
 * This function is similar to delete.  Just call delete to free all
 * data structures related to the BBR objects.
 **/
int bbr_discard(list_anchor_t segments)
{
	storage_object_t *segment;
	list_element_t le;

	LOG_ENTRY();

	LIST_FOR_EACH(segments, le, segment) {
		bbr_w_delete(segment, NULL, FALSE);
	}

	LOG_EXIT_INT(0);
	return 0;
}

/**
 * bbr_delete
 *
 * Delete this BBR segment.
 * - Free any privately allocated data.
 * - Remove the parent pointer from the child object.
 * - Put BBR child onto the list provided in the second parameter.
 **/
static int bbr_delete(storage_object_t *segment,
		      list_anchor_t child_objects)
{
	int rc;

	LOG_ENTRY();

	rc = bbr_w_delete(segment, child_objects, TRUE);

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_unassign
 *
 * Delete the BBR segment from on top of this child object.
 **/
static int bbr_unassign(storage_object_t *child)
{
	int rc;

	LOG_ENTRY();

	rc = unmake_bbr(child, TRUE);

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_expand
 *
 * A BBR segment is expanded by:
 * - calling down the plugin stack to expand the child object
 * - recalculating metadata
 * - remapping the bbr table
 * - moving the replacement blocks
 *
 * Check that:
 * - We own the object
 * - The plugin stack can expand the BBR child object
 * - The expanded child is actually larger in size
 * - Kernel is not actively remapping bad blocks
 **/
static int bbr_expand(storage_object_t *segment,
		      storage_object_t *expand_object,
		      list_anchor_t objects,
		      option_array_t *options)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	u_int64_t starting_size;
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Expanding segment %s.\n", segment->name);

	if (i_can_modify_object(segment)) {
		/* Get the starting size. */
		starting_size = child->size;

		/* Pass expand down the feature stack. */
		rc = child->plugin->functions.plugin->expand(child, expand_object,
							     objects, options);
		if (!rc) {
			/* Make sure that the child actually expanded. */
			if (child->size > starting_size) {
				/* Resize the BBR object. Save the old size so
				 * we know how big the kernel device is.
				 */
				if (!pdata->old_size) {
					pdata->old_size = starting_size;
				}
				segment->size = get_child_useable_size(segment, child);

				/* Set object flags. */
				segment->flags |= SOFLAG_DIRTY;
				if (segment->flags & SOFLAG_ACTIVE) {
					segment->flags |= SOFLAG_NEEDS_ACTIVATE;
				}
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_shrink
 *
 * A BBR segment is shrunk by:
 * - Calling down the plugin stack to shrink the child object
 * - Recalculating metadata
 * - Remapping the bbr table
 * - Moving the replacement blocks
 *
 * Check that:
 * - We own the object
 * - The plugin stack can shrink the BBR child object
 * - The child object is actually smaller in size
 **/
static int bbr_shrink(storage_object_t *segment,
		      storage_object_t *shrink_object,
		      list_anchor_t objects,
		      option_array_t *options)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	u_int64_t starting_size;
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Shrinking segment %s.\n", segment->name);

	if (i_can_modify_object(segment)) {
		/* Starting size */
		starting_size = child->size;

		/* Pass the shrink cmd to the child object. */
		rc = child->plugin->functions.plugin->shrink(child, shrink_object,
							     objects, options);
		if (!rc) {
			/* Make sure that the child actually shrunk. */
			if (child->size < starting_size) {
				/* Resize the BBR object. Save the old size so
				 * we know how big the kernel device is.
				 */
				if (!pdata->old_size) {
					pdata->old_size = starting_size;
				}
				segment->size = get_child_useable_size(segment, child);

				/* Set object flags. */
				segment->flags |= SOFLAG_DIRTY;
				if (segment->flags & SOFLAG_ACTIVE) {
					segment->flags |= SOFLAG_NEEDS_ACTIVATE;
				}
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_set_volume
 **/
static void bbr_set_volume(storage_object_t *segment, boolean flag)
{
	LOG_ENTRY();
	LOG_EXIT_VOID();
}

/**
 * bbr_add_sectors_to_kill_list
 *
 * Passes the API call down to the BBR child object. Check that:
 * - We own the object
 * - The logical sectors fall on the useable area of the BBR object
 **/
static int bbr_add_sectors_to_kill_list(storage_object_t *segment,
					lsn_t lsn,
					sector_count_t count)
{
	bbr_private_data_t *pdata = segment->private_data;
	kill_sectors_t *ks;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Killing sectors on segment %s: sector %"PRIu64", count %"
		  PRIu64".\n", segment->name, lsn, count);

	if (i_can_modify_object(segment) &&
	    lsn + count <= segment->size) {
		ks = EngFncs->engine_alloc(sizeof(*ks));
		if (ks) {
			ks->lsn = lsn;
			ks->count = count;
			ks->next = pdata->kill_sector_list_head;
			pdata->kill_sector_list_head = ks;

			/* Mark the region dirty so that it will
			 * get called to commit the kill sectors.
			 */
			segment->flags |= SOFLAG_DIRTY;
		} else {
			rc = ENOMEM;
		}
	} else {
		rc = EINVAL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_commit_changes
 *
 * Called to commit changes to a BBR storage object.
 **/
static int bbr_commit_changes(storage_object_t *segment,
			      commit_phase_t commit_phase)
{
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Committing changes to segment: %s (phase %d).\n",
		  segment->name, commit_phase);

	/* Only work during metadata commit phases
	 * Phase 1: Commit 1st copy of metadata & 1st bbr table
	 * Phase 2: Commit 2nd copy of metadata & 2nd bbr table
	 */
	switch (commit_phase) {

	case FIRST_METADATA_WRITE:
	case SECOND_METADATA_WRITE:
		rc = commit_bbr_segment(segment, commit_phase);
		break;

	default:
		rc = 0;
		break;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_read
 *
 * Perform an engine read on the specified BBR segment. If the segment is
 * active, we need to go directly to the kernel device so the bad-blocks are
 * tracked correctly. If the segment is not active, we can use normal
 * engine-I/O. However, if the segment is active, but has been expanded and
 * not yet reactivated, we might be trying to read past the end of the kernel
 * device. If so, just use engine-I/O.
 **/
static int bbr_read(storage_object_t *segment, lsn_t lsn,
		    sector_count_t count, void *buffer)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	void *sector_ptr = buffer;
	lsn_t bbr_lsn;
	int i, rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Reading from segment %s: sector %"PRIu64", count %"
		  PRIu64".\n", segment->name, lsn, count);

	if (lsn + count <= segment->size &&
	    i_can_modify_object(segment)) {
		if (segment->flags & SOFLAG_ACTIVE &&
		    (!pdata->old_size || lsn < pdata->old_size)) {
			rc = kernel_bbr_sector_io(segment, lsn, count,
						  buffer, SECTOR_IO_READ);
		} else {
			lsn += segment->start;

			for (i = 0, rc = 0; i < count && !rc; i++) {
				bbr_lsn = get_lsn(pdata, lsn + i);

				rc = READ(child, bbr_lsn, 1, sector_ptr);

				sector_ptr += EVMS_VSECTOR_SIZE;
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_write
 *
 * Perform an engine write on the specified BBR segment. If the segment is
 * active, we need to go directly to the kernel device so the bad-blocks are
 * tracked correctly. If the segment is not active, we can use normal
 * engine-I/O. However, if the segment is active, but has been expanded and
 * not yet reactivated, we might be trying to write past the end of the kernel
 * device. If so, just use engine-I/O.
 **/
static int bbr_write(storage_object_t *segment, lsn_t lsn,
		     sector_count_t count, void *buffer)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	void *sector_ptr = buffer;
	lsn_t bbr_lsn;
	int i, rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Writing to segment %s: sector %"PRIu64", count %"
		  PRIu64".\n", segment->name, lsn, count);

	if (lsn + count <= segment->size &&
	    i_can_modify_object(segment)) {

		if (pdata->kill_sector_list_head != NULL) {
			rc = kill_sectors(segment);
			if (rc) {
				LOG_EXIT_INT(rc);
				return rc;
			}
		}

		if (segment->flags & SOFLAG_ACTIVE &&
		    (!pdata->old_size || lsn < pdata->old_size)) {
			rc = kernel_bbr_sector_io(segment, lsn, count,
						  buffer, SECTOR_IO_WRITE);
		} else {
			lsn += segment->start;

			for (i = 0, rc = 0; i < count && !rc; i++) {
				bbr_lsn = get_lsn(pdata, lsn + i);
				rc = WRITE(child, bbr_lsn, 1, sector_ptr);
				while (rc) {
					bbr_lsn = remap_lsn(segment, pdata, bbr_lsn);
					if (bbr_lsn) {
						rc = WRITE(child, bbr_lsn, 1, sector_ptr);
					} else {
						break;
					}
				}

				sector_ptr += EVMS_VSECTOR_SIZE;
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_can_activate
 **/
static int bbr_can_activate(storage_object_t *segment)
{
	LOG_ENTRY();
	LOG_EXIT_INT(0);
	return 0;
}

/**
 * bbr_activate
 *
 * Tell Device-Mapper to activate the specified BBR segment. If BBR is
 * enabled for this segment, use a BBR mapping. If it's disabled, use a
 * simple linear mapping. If the segment has been resized, we can clear
 * the old size if the activation succeeds, since the kernel device will
 * now be the same size as the engine segment.
 **/
static int bbr_activate(storage_object_t *segment)
{
	bbr_private_data_t *pdata = segment->private_data;
	storage_object_t *child = pdata->child;
	dm_target_bbr_t *bbr;
	dm_target_t *trgt;
	dm_device_t *lin;
	int rc;

	LOG_ENTRY();

	if (pdata->bbr_state & BBR_ENABLED) {
		LOG_DEBUG("Activating Enabled BBR object(%s), type = BBR\n",
			  segment->name);
		trgt = EngFncs->dm_allocate_target(DM_TARGET_BBR, 0,
						   segment->size, 0, 0);
		if (!trgt) {
			LOG_EXIT_INT(ENOMEM);
			return ENOMEM;
		}

		bbr = trgt->data.bbr;
		bbr->device.major = child->dev_major;
		bbr->device.minor = child->dev_minor;
		bbr->device.start = segment->start;
		bbr->table1_lba = pdata->bbr_table_lsn1;
		bbr->table2_lba = pdata->bbr_table_lsn2;
		bbr->replacement_blocks_lba = pdata->replacement_blocks_lsn;
		bbr->table_size =  pdata->bbr_table_size_in_sectors;
		bbr->num_replacement_blocks = pdata->replacement_blocks_needed;
		bbr->block_size = pdata->block_size;

		if (EngFncs->is_2_4_kernel()) {
			trgt->length &= ~1;
		}
	} else {
		LOG_DEBUG("Activating Disabled BBR object(%s), type = Linear\n",
			  segment->name);
		trgt = EngFncs->dm_allocate_target(DM_TARGET_LINEAR, 0,
						   segment->size, 0, 0);
		if (!trgt) {
			LOG_EXIT_INT(ENOMEM);
			return ENOMEM;
		}

		lin = trgt->data.linear;
		lin->major = child->dev_major;
		lin->minor = child->dev_minor;
		lin->start = segment->start;
	}

	rc = EngFncs->dm_activate(segment, trgt);
	EngFncs->dm_deallocate_targets(trgt);
	if (!rc) {
		pdata->old_size = 0;
		segment->flags &= ~SOFLAG_NEEDS_ACTIVATE;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_can_deactivate
 **/
static int bbr_can_deactivate(storage_object_t *segment)
{
	LOG_ENTRY();
	LOG_EXIT_INT(0);
	return 0;
}

/**
 * bbr_deactivate
 *
 * Tell Device-Mapper to deactivate the specified BBR segment. If this segment
 * has been resized, we can clear the old size.
 **/
static int bbr_deactivate(storage_object_t *segment)
{
	bbr_private_data_t *pdata = segment->private_data;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Deactivating segment %s.\n", segment->name);

	rc = EngFncs->dm_deactivate(segment);
	if (!rc) {
		if (pdata) {
			pdata->old_size = 0;
		}
		segment->flags &= ~SOFLAG_NEEDS_DEACTIVATE;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_get_option_count
 *
 * Returns count of options for specified task.
 **/
static int bbr_get_option_count(task_context_t *task)
{
	int count;

	LOG_ENTRY();

	switch (task->action) {

	case EVMS_Task_Assign_Plugin:
	case EVMS_Task_Create:
	case EVMS_Task_Expand:
	case EVMS_Task_Shrink:
		count = BBR_OPTION_COUNT;
		break;

	default:
		count = -1;
		break;
	}

	LOG_EXIT_INT(count);
	return count;
}

/**
 * bbr_init_task
 *
 * Initialize the task-context for the specified task. Set up the acceptable
 * objects list and the option descriptor array.
 **/
static int bbr_init_task(task_context_t *context)
{
	storage_object_t *segment;
	bbr_private_data_t *pdata;
	storage_object_t *child;
	list_anchor_t temp_list;
	int rc = EINVAL;

	LOG_ENTRY();

	switch (context->action) {

	case EVMS_Task_Assign_Plugin:
	case EVMS_Task_Create:
		rc = get_acceptable_create_objects(context);
		context->min_selected_objects = 1;
		context->max_selected_objects = 1;
		break;

	case EVMS_Task_Expand:
		segment = context->object;
		pdata = segment->private_data;
		child = pdata->child;

		/* Check that the child object can expand. */
		temp_list = EngFncs->allocate_list();
		if (!temp_list) {
			rc = ENOMEM;
			break;
		}

		if (i_can_modify_object(segment)) {
			rc = child->plugin->functions.plugin->can_expand(child, -1,
									 temp_list);
		}
		EngFncs->destroy_list(temp_list);
		context->min_selected_objects = 0;
		context->max_selected_objects = 0;
		break;

	case EVMS_Task_Shrink:
		segment = context->object;
		pdata = segment->private_data;
		child = pdata->child;

		/* Check that the child object can shrink. */
		temp_list = EngFncs->allocate_list();
		if (!temp_list) {
			rc = ENOMEM;
			break;
		}

		if (i_can_modify_object(segment)) {
			rc = child->plugin->functions.plugin->can_shrink(child,
									 segment->size - 1,
									 temp_list);
		}
		EngFncs->destroy_list(temp_list);
		context->min_selected_objects = 0;
		context->max_selected_objects = 0;
		break;

	case BBR_FUNCTION_ENABLE:
	case BBR_FUNCTION_DISABLE:
		/* No available objects or options. */
		context->min_selected_objects = 0;
		context->max_selected_objects = 0;
		rc = 0;
		break;

	default:
		LOG_ERROR("Action %d is unsupported.\n", context->action);
		rc = EINVAL;
		break;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_set_objects
 *
 * Validate the objects in the selected_objects list in the task context.
 * Remove from the selected objects lists any objects which are not
 * acceptable.
 *
 * For unacceptable objects, create a declined_handle_t structure with the
 * reason why it is not acceptable, and add it to the declined_objects list.
 * Modify the accepatble_objects list in the task context as necessary
 * based on the selected objects and the current settings of the options.
 *
 * Modify any option settings as necessary based on the selected objects.
 * Return the appropriate task_effect_t settings if the object list(s),
 * minimum or maximum objects selected, or option settings have changed.
 **/
static int bbr_set_objects(task_context_t *context,
			   list_anchor_t declined_objects,
			   task_effect_t *effect)
{
	storage_object_t *object;
	int rc;

	LOG_ENTRY();

	switch (context->action) {

	case EVMS_Task_Assign_Plugin:
	case EVMS_Task_Create:
		/* The selected object is acceptable if it's a disk or a
		 * segment and doesn't belong to BBR-seg.
		 */
		object = EngFncs->first_thing(context->selected_objects, NULL);
		if ((object->object_type == DISK ||
		     object->object_type == SEGMENT) &&
		    object->plugin != my_plugin_record) {
			rc = 0;
		} else {
			rc = EINVAL;
		}
		break;

	case EVMS_Task_Expand:
	case EVMS_Task_Shrink:
	case BBR_FUNCTION_ENABLE:
	case BBR_FUNCTION_DISABLE:
		/* No selected objects for expand, shrink, enable or disable. */
		rc = 0;
		break;

	default:
		LOG_ERROR("Action %d is unsupported.\n", context->action);
		rc = EINVAL;
		break;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_set_option
 **/
int bbr_set_option(task_context_t *context,
		   u_int32_t index,
                   value_t *value,
		   u_int32_t *info)
{
	int rc;

	LOG_ENTRY();

	switch (context->action) {

	case EVMS_Task_Assign_Plugin:
	case EVMS_Task_Create:
	case EVMS_Task_Expand:
	case EVMS_Task_Shrink:
	case BBR_FUNCTION_ENABLE:
	case BBR_FUNCTION_DISABLE:
		/* No options for any tasks. */
		rc = 0;
		break;

	default:
		LOG_ERROR("Action %d is unsupported.\n", context->action);
		rc = EINVAL;
		break;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_get_plugin_functions
 **/
static int bbr_get_plugin_functions(storage_object_t *segment,
				    function_info_array_t **functions)
{
	function_info_array_t *fia;
	bbr_private_data_t *pdata;

	LOG_ENTRY();

	*functions = NULL;

	if (!segment) {
		LOG_EXIT_INT(ENOSYS);
		return ENOSYS;
	}

	LOG_DEBUG("Getting plugin functions for segment %s.\n", segment->name);
	pdata = segment->private_data;
	if (pdata->bbr_state & BBR_CHANGE_STATE) {
		LOG_DEBUG("State of segment %s has already been changed.\n",
			  segment->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	/* No plugin functions are available if there are remapped sectors. */
	if (pdata->bbr_state & BBR_ENABLED) {
		sector_count_t bb;

		if (segment->flags & SOFLAG_ACTIVE) {
			bb = get_kernel_bbr_remap_sector_count(segment);
		} else {
			bb = get_engine_remap_sector_count(segment);
		}

		if (bb) {
			LOG_DEBUG("Segment %s has %"PRIu64" remapped sectors - "
				  "cannot be disabled.\n", segment->name, bb);
			LOG_EXIT_INT(EINVAL);
			return EINVAL;
		}
	}

	fia = EngFncs->engine_alloc(sizeof(function_info_array_t) +
				    sizeof(function_info_t));
	if (!fia) {
		LOG_ERROR("Error allocating memory for function-info array.\n");
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	if (!(pdata->bbr_state & BBR_ENABLED)){
		fia->info[0].function = BBR_FUNCTION_ENABLE;
		fia->info[0].name = EngFncs->engine_strdup("enable");
		fia->info[0].title = EngFncs->engine_strdup(_("Enable"));
		fia->info[0].verb = EngFncs->engine_strdup(_("Enable"));
		fia->info[0].help = EngFncs->engine_strdup(_("Bad Block Relocation has been disabled on this device. This will re-enable it."));
	} else {
		fia->info[0].function = BBR_FUNCTION_DISABLE;
		fia->info[0].name = EngFncs->engine_strdup("disable");
		fia->info[0].title = EngFncs->engine_strdup(_("Disable"));
		fia->info[0].verb = EngFncs->engine_strdup(_("Disable"));
		fia->info[0].help = EngFncs->engine_strdup(_("Stop performing Bad Block Relocation on this device"));
	}
	fia->count = 1;
	*functions = fia;

	LOG_EXIT_INT(0);
	return 0;
}

/**
 * bbr_plugin_function
 **/
static int bbr_plugin_function(storage_object_t *segment,
			       task_action_t action,
			       list_anchor_t objects,
			       option_array_t *options)
{
	bbr_private_data_t *pdata = segment->private_data;
	int rc = 0;

	LOG_ENTRY();

	switch (action) {

	case BBR_FUNCTION_ENABLE:
		LOG_DEBUG("Enabling BBR on segment %s.\n", segment->name);
		pdata->bbr_state |= BBR_CHANGE_STATE | BBR_ACTIVATE;
		segment->flags |= SOFLAG_DIRTY;
		if (segment->flags & SOFLAG_ACTIVE) {
			segment->flags |= SOFLAG_NEEDS_ACTIVATE;
		}
		break;

	case BBR_FUNCTION_DISABLE:
		/* NOTE: There is a timing hole here if a remap happens
		 * between now and activate you won't be able to access the
		 * remapped sector (thus our check for no bad blocks doesn't
		 * cover everycase. However, you were disabling BBR, so do
		 * you care?
		 */
		LOG_DEBUG("Disabling BBR on segment %s.\n", segment->name);
		pdata->bbr_state |= BBR_CHANGE_STATE;
		pdata->bbr_state &= ~BBR_ACTIVATE;
		segment->flags |= SOFLAG_DIRTY;
		if (segment->flags & SOFLAG_ACTIVE) {
			segment->flags |= SOFLAG_NEEDS_ACTIVATE;
		}
		break;

	default:
		LOG_ERROR("Action code %u not supported.\n", action);
		rc = EINVAL;
		break;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * bbr_backup_metadata
 **/
static int bbr_backup_metadata(storage_object_t *segment)
{
	evms_bbr_metadata_t metadata;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Backing up metadata for segment %s.\n", segment->name);

	memset(&metadata, 0, sizeof(metadata));
	build_metadata(segment, &metadata);
	rc = write_metadata(segment, &metadata, FIRST_METADATA_WRITE, TRUE);
	if (!rc) {
		rc = write_metadata(segment, &metadata,
				    SECOND_METADATA_WRITE, TRUE);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * Function table for BBR segment manager.
 **/
static plugin_functions_t bbr_functions = {
	.setup_evms_plugin		= bbr_setup_plugin,
	.cleanup_evms_plugin		= bbr_cleanup_plugin,
	.can_delete			= bbr_can_delete,
	.can_unassign			= bbr_can_unassign,
	.can_expand			= bbr_can_expand,
	.can_expand_by			= bbr_can_expand_by,
	.can_shrink			= bbr_can_shrink,
	.can_shrink_by			= bbr_can_shrink_by,
	.can_set_volume			= bbr_can_set_volume,
	.discover			= bbr_discover,
	.create				= bbr_create,
	.assign				= bbr_assign,
	.discard			= bbr_discard,
	.delete				= bbr_delete,
	.unassign			= bbr_unassign,
	.expand				= bbr_expand,
	.shrink				= bbr_shrink,
	.set_volume			= bbr_set_volume,
	.add_sectors_to_kill_list	= bbr_add_sectors_to_kill_list,
	.commit_changes			= bbr_commit_changes,
	.can_activate			= bbr_can_activate,
	.activate			= bbr_activate,
	.can_deactivate			= bbr_can_deactivate,
	.deactivate			= bbr_deactivate,
	.get_option_count		= bbr_get_option_count,
	.init_task			= bbr_init_task,
	.set_option			= bbr_set_option,
	.set_objects			= bbr_set_objects,
	.get_info			= bbr_get_info,
	.get_plugin_info		= bbr_get_plugin_info,
	.read				= bbr_read,
	.write				= bbr_write,
	.get_plugin_functions		= bbr_get_plugin_functions,
	.plugin_function		= bbr_plugin_function,
	.backup_metadata		= bbr_backup_metadata,
};

/**
 * Plugin record for BBR segment manager.
 **/
static plugin_record_t bbr_plugin_record = {
	.id = EVMS_BBR_SEG_PLUGIN_ID,
	.version = {
		.major = MAJOR_VERSION,
		.minor = MINOR_VERSION,
		.patchlevel = PATCH_LEVEL
	},
	.required_engine_api_version = {
		.major = 15,
		.minor = 0,
		.patchlevel = 0
	},
	.required_plugin_api_version = {
		.plugin = {
			.major = 13,
			.minor = 1,
			.patchlevel = 0
		}
	},
	.short_name = EVMS_BBR_SEG_PLUGIN_SHORT_NAME,
	.long_name = EVMS_BBR_SEG_PLUGIN_LONG_NAME,
	.oem_name = EVMS_IBM_OEM_NAME,
	.functions = {
		.plugin = &bbr_functions
	}
};

/* Array of plugin record ptrs that we export for the EVMS Engine. */
plugin_record_t *evms_plugin_records[] = {
	&bbr_plugin_record,
	NULL
};

