/*
 *   Copyright (c) International Business Machines  Corp., 2001
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or 
 *   (at your option) any later version.
 * 
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software 
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: lvmregmgr
 * File: lvm_pv.c
 *
 * Description: This file contains all functions related to the discovery,
 *              creation and management of physical volumes in the LVM region
 *              manager.
 */


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <time.h>
#include <plugin.h>
#include "lvmregmgr.h"


/*** Physical Volume Memory Allocation Functions ***/


/* Function: lvm_allocate_pe_map
 *
 *	Calculate the number of sectors the PE map will take up on the PV, then
 *	allocate the memory to hold the PE map. The pv_entry must already have
 *	its PV structure filled in.
 */
static int lvm_allocate_pe_map( lvm_physical_volume_t * pv_entry )
{
	LOG_ENTRY;

	// Might not always have pe_total filled in yet.
	if ( pv_entry->pv->pe_total == 0 ) {
		pv_entry->pe_map_sectors = 0;
		pv_entry->pe_map = NULL;
		RETURN(0);
	}

	pv_entry->pe_map_sectors = bytes_to_sectors(pv_entry->pv->pe_total * sizeof(pe_disk_t));
	pv_entry->pe_map = lvm_engine->engine_alloc(sectors_to_bytes(pv_entry->pe_map_sectors));
	if ( ! pv_entry->pe_map ) {
		LOG_CRITICAL("Memory error creating PE map for PV %s.\n", pv_entry->segment->name);
		RETURN(ENOMEM);
	}

	RETURN(0);
}


/* Function: lvm_deallocate_pe_map
 * 
 *	Release the memory used by this PV's PE map.
 */
static int lvm_deallocate_pe_map( lvm_physical_volume_t * pv_entry )
{
	LOG_ENTRY;

	if ( pv_entry->pe_map ) {
		lvm_engine->engine_free(pv_entry->pe_map);
		pv_entry->pe_map = NULL;
	}
	pv_entry->pe_map_sectors = 0;

	RETURN(0);
}


/* Function: lvm_allocate_physical_volume
 *
 *	Allocates a new lvm_physical_volume structure. Makes a copy of the pv
 *	structure that is passed in. A PE map is created based on the number of
 *	PEs in this PV.
 *
 *	The pv structure passed into this function must already be filled in
 *	with the appropriate information.
 */
lvm_physical_volume_t * lvm_allocate_physical_volume(	storage_object_t	* segment,
							pv_disk_t		* pv )
{
	lvm_physical_volume_t * new_entry;

	LOG_ENTRY;

	// The pv_list_entry itself
	new_entry = lvm_engine->engine_alloc(sizeof(lvm_physical_volume_t));
	if ( ! new_entry ) {
		LOG_CRITICAL("Memory error creating physical volume %s\n", segment->name);
		lvm_engine->engine_free(pv);
		RETURN(NULL);
	}

	// Fill in the PV entry.
	new_entry->pv		= pv;
	new_entry->segment	= segment;
	new_entry->number	= pv->pv_number;
	new_entry->flags	= 0;

	// A new pe_map.
	if ( lvm_allocate_pe_map(new_entry) ) {
		LOG_CRITICAL("Memory error creating PE map for physical volume %s\n", segment->name);
		lvm_deallocate_physical_volume(new_entry);
		RETURN(NULL);
	}

	RETURN(new_entry);
}


/* Function: lvm_deallocate_physical_volume
 *
 *	Free all the memory for this physical volume
 */
int lvm_deallocate_physical_volume( lvm_physical_volume_t * pv_entry )
{
	lvm_volume_group_t * group = pv_entry->group;

	LOG_ENTRY;

	// Remove this PV from the group's list
	if ( group && group->pv_list[pv_entry->number] == pv_entry ) {
		group->pv_list[pv_entry->number] = NULL;
		group->pv_count--;
	}
	if ( pv_entry->segment ) {
		lvm_remove_segment_from_container(pv_entry->segment);
	}

	// Delete the PE map.
	if ( pv_entry->pe_map ) {
		lvm_engine->engine_free(pv_entry->pe_map);
		pv_entry->pe_map = NULL;
	}

	// Delete the PV metadata.
	if ( pv_entry->pv ) {
		lvm_engine->engine_free(pv_entry->pv);
		pv_entry->pv = NULL;
	}

	// The segment just gets dropped. Depending on who called the
	// deallocate, the segment may need to be added to another list.
	pv_entry->segment		= NULL;
	pv_entry->pe_map_sectors	= 0;
	pv_entry->number		= 0;

	lvm_engine->engine_free(pv_entry);

	RETURN(0);
}



/*** Physical Volume Creation Functions ***/


/* Function: lvm_find_free_pv_number
 *
 *	Search through the list of PVs in this group and return the first
 *	unused PV number.
 */
int lvm_find_free_pv_number( lvm_volume_group_t * group )
{
	int i;

	LOG_ENTRY;

	for ( i = 1; i <= MAX_PV; i++ ) {
		if ( ! group->pv_list[i] ) {
			RETURN(i);
		}
	}
	LOG_ERROR("Container %s has maximum number of objects.\n", group->container->name);
	RETURN(-1);
}


/* Function: lvm_set_system_id
 *
 *	Get the system name for the PV struct. This function is based on the
 *	"system_id_set" function from the LVM tools library, from Heinz
 *	Mauelshagen and Sistina Software (www.sistina.com).
 */
static int lvm_set_system_id( pv_disk_t * pv )
{
	struct utsname uts;

	LOG_ENTRY;

	if ( uname(&uts) ) {
		LOG_ERROR("Error from uname()\n");
		RETURN(EINVAL);
	}
	memset(pv->system_id, 0, NAME_LEN);
	snprintf(pv->system_id, NAME_LEN, "%s%lu", uts.nodename, time(NULL));

	RETURN(0);
}


/* Function: lvm_calculate_vgda_info
 *
 *	Calculate the "base" and "size" fields for all of the on-disk
 *	structures. Use the maximum defaults for all sizes. This function does
 *	not fill in pe_on_disk.size, since that needs to done when calculating
 *	number of PEs, which is done in the next function.
 *
 *	This function is now updated to use the new bases and sizes as used in
 *	the LVM 0.9.1beta8 release, which aligns each metadata structure on a
 *	4k boundary.
 */
static int lvm_calculate_vgda_info( pv_disk_t * pv )
{
	LOG_ENTRY;

	pv->pv_on_disk.base		= LVM_PV_DISK_BASE;
	pv->pv_on_disk.size		= LVM_PV_DISK_SIZE;
	pv->vg_on_disk.base		= LVM_VG_DISK_BASE;
	pv->vg_on_disk.size		= LVM_VG_DISK_SIZE;
	pv->pv_uuidlist_on_disk.base	= LVM_PV_UUIDLIST_DISK_BASE;
	pv->pv_uuidlist_on_disk.size	= (MAX_PV + 1) * NAME_LEN;
	pv->lv_on_disk.base		= round_up(pv->pv_uuidlist_on_disk.base
						+ pv->pv_uuidlist_on_disk.size, LVM_VGDA_ALIGN);
	pv->lv_on_disk.size		= (MAX_LV + 1) * sizeof(lv_disk_t);
	pv->pe_on_disk.base		= round_up(pv->lv_on_disk.base
						+ pv->lv_on_disk.size, LVM_VGDA_ALIGN);
	RETURN(0);
}


/* Function: lvm_calculate_pe_total
 *
 *	Determine how many PEs are going to be on this PV. This function is
 *	based on code from the "vg_setup_for_create" function in the LVM
 *	tools library. The rest of the PV structure must be initialized
 *	before calling this function. The LVM code was modified to make sure
 *	the first data PE starts on a 64k boundary.
 *
 *	This function has been updated to emulate the behavior of the new
 *	LVM 0.9.1beta8 release, which aligns the first PE on a 64k boundary,
 *	and leaves one full PE open between the end of the VGDA and the start
 *	of the first data PE. The code is now based on the "setup_pe_table"
 *	function from the LVM tools library, from Heinz Mauelshagen and
 *	Sistina Software (www.sistina.com).
 */
static int lvm_calculate_pe_total( pv_disk_t * pv )
{
	u_int32_t pe_map_size = 0;
	u_int32_t data_size;
	u_int32_t rest;

	LOG_ENTRY;

	// First guess at number of PEs on the PV
	rest = pv->pv_size - bytes_to_sectors(pv->pe_on_disk.base);
	pv->pe_total = rest / pv->pe_size;

	// Elimiate PEs from the total until there is enough space to fit the PE
	// maps in. There should be a full PE of space (or more) between the PE
	// maps and the first real PE.
	for ( ; pv->pe_total; pv->pe_total-- ) {
		pe_map_size = round_up(bytes_to_sectors(pv->pe_total * sizeof(pe_disk_t)),LVM_PE_ALIGN);
		data_size = pv->pe_total * pv->pe_size;

		if ( (pe_map_size + data_size + pv->pe_size) <= rest ) {
			break;
		}
	}

	if ( ! pv->pe_total ) {
		LOG_ERROR("Not enough space on object for any PEs\n");
		RETURN(EINVAL);
	}

	pv->pe_on_disk.size = sectors_to_bytes(pe_map_size + pv->pe_size);
	pv->pe_start = bytes_to_sectors(pv->pe_on_disk.base + pv->pe_on_disk.size);

/* 0.9.1beta7 version of calculating PE map size.
   Leaving this code in here for historical purposes.
	new_pv_size	= pv->pv_size & ~(PE_SIZE_ALIGNMENT_SECTORS-1);
	rest		= new_pv_size - bytes_to_sectors(pv->pe_on_disk.base);
	pv->pe_total	= rest / pv->pe_size;
	rest		-= (pv->pe_total * pv->pe_size);
	while ( (rest * SECTOR_SIZE / sizeof(pe_disk_t)) < pv->pe_total ) {
		rest += pv->pe_size;
		pv->pe_total--;
	}
	pv->pe_on_disk.size = (new_pv_size - pv->pe_size * pv->pe_total)
				* SECTOR_SIZE - pv->pe_on_disk.base;
*/

	RETURN(0);
}


/* Function: lvm_initialize_new_pv
 *
 *	Set up a new PV disk structure with appropriate initial values. This
 *	does not initialize the fields that are group-dependent.
 */
static int lvm_initialize_new_pv(pv_disk_t		* pv,
				storage_object_t	* segment )
{
	int	rc;

	LOG_ENTRY;

	memset(pv, 0, sizeof(pv_disk_t));

	pv->id[0]		= 'H';
	pv->id[1]		= 'M';
	pv->version		= LVM_METADATA_VERSION;
	pv->pv_major		= 3;			// CHANGE THIS!!! WITH WHAT???
							// Set to 3 for now for IDE.
							// At least it will work on my box. :)
	pv->pv_size		= segment->size;
	pv->pv_allocatable	= PV_ALLOCATABLE;

	lvm_calculate_vgda_info(pv);

	// Get a UUID for the PV
	memset(pv->pv_uuid, 0, NAME_LEN);
	if ( (rc = lvm_create_uuid(pv->pv_uuid)) ) {
		RETURN(rc);
	}

	// Get the system ID
	if ( (rc = lvm_set_system_id(pv)) ) {
		RETURN(rc);
	}

	RETURN(0);
}


/* Function: lvm_create_pv_from_segment
 *
 *	Create a new PV out of the specified disk segment.
 */
lvm_physical_volume_t * lvm_create_pv_from_segment( storage_object_t * segment )
{
	lvm_physical_volume_t	* pv_entry;
	pv_disk_t		* pv;

	LOG_ENTRY;

	// Allocate a new pv_disk_t
	pv = lvm_engine->engine_alloc(sizeof(pv_disk_t));
	if ( ! pv ) {
		LOG_CRITICAL("Memory error creating new PV metadata for object %s.\n", segment->name);
		RETURN(NULL);
	}

	// Setup a temporary pv_disk_t structure first.
	if ( lvm_initialize_new_pv(pv, segment) ) {
		RETURN(NULL);
	}

	// Create a new physical volume.
	pv_entry = lvm_allocate_physical_volume(segment, pv);
	if ( ! pv_entry ) {
		LOG_CRITICAL("Memory error creating PV for object %s\n", segment->name);
		RETURN(NULL);
	}

	RETURN(pv_entry);
}


/* Function: lvm_check_segment_for_pe_size
 *
 *	Before we can create a new group, we have to make sure every segment
 *	going into that group is large enough for the specified PE size.
 */
int lvm_check_segment_for_pe_size(	storage_object_t	* segment,
					u_int32_t		* pe_size )
{
	LOG_ENTRY;

	// The ratio of the PV size to the PE size must be greater than a
	// minimum value (currently 5) for the segment to be allowed into
	// the group.
	if ( segment->size / *pe_size < LVM_PE_SIZE_PV_SIZE_REL ) {
		LOG_WARNING("Object %s not large enough for PE size %d\n", segment->name, *pe_size);
		LOG_WARNING("Object %s is %d sectors in size\n", segment->name, segment->size);
		LOG_WARNING("Target PE size requires objects of %d or more sectors\n", LVM_PE_SIZE_PV_SIZE_REL * *pe_size);
		*pe_size = segment->size / LVM_PE_SIZE_PV_SIZE_REL;
		lvm_check_pe_size(pe_size);
		RETURN(ENOSPC);
	}
	
	RETURN(0);
}


/* Function: lvm_get_pv_for_segment
 *
 *	Search for and return the physical volume that represents this segment.
 *	If the segment is not in a container, return NULL.
 */
lvm_physical_volume_t * lvm_get_pv_for_segment( storage_object_t * segment )
{
	lvm_volume_group_t * group;
	int i;

	LOG_ENTRY;

	if ( segment->consuming_container &&
	     segment->consuming_container->plugin == lvm_plugin ) {
		group = segment->consuming_container->private_data;
		for ( i = 1; i <= MAX_PV; i++ ) {
			if ( group->pv_list[i] &&
			     group->pv_list[i]->segment == segment ) {
				RETURN(group->pv_list[i]);
			}
		}
	}
	RETURN(NULL);
}


/* Function: lvm_get_pv_for_name
 *
 *	Search for and return the physical volume that has the given name.
 */
lvm_physical_volume_t * lvm_get_pv_for_name(	char			* name,
						lvm_volume_group_t	* group )
{
	int i;

	LOG_ENTRY;

	for ( i = 1; i <= MAX_PV; i++ ) {
		if ( group->pv_list[i] &&
		     ! strncmp(name, group->pv_list[i]->segment->name, NAME_LEN) ) {
			RETURN(group->pv_list[i]);
		}
	}
	RETURN(NULL);
}


/* Function: lvm_update_pv_for_group
 *
 *	When a PV is moved into a group, several fields must be updated
 *	according to the new group's information.
 */
int lvm_update_pv_for_group(	lvm_physical_volume_t	* pv_entry,
				lvm_volume_group_t	* group )
{
	pv_disk_t	* pv = pv_entry->pv;
	int		rc;

	LOG_ENTRY;

	// Simple fields.
	pv->lv_cur		= 0;
	pv->pe_allocated	= 0;

	pv->pv_status		= PV_ACTIVE;
	pv->pe_size		= group->vg->pe_size;

	// New PV number
	if ( (pv_entry->number = lvm_find_free_pv_number(group)) <= 0 ) {
		RETURN(ENOSPC);
	}
	pv->pv_number = pv_entry->number;

	// Copy the group name
	memset(pv->vg_name, 0, NAME_LEN);
	lvm_translate_container_name_to_vg_name(group->container->name, pv->vg_name);

	// Reset all the VGDA information
	lvm_calculate_vgda_info(pv);

	// Calculate the number of PEs on this PV.
	if ( (rc = lvm_calculate_pe_total(pv)) ) {
		RETURN(rc);
	}

	// Allocate new PE maps.
	lvm_deallocate_pe_map(pv_entry);
	if ( (rc = lvm_allocate_pe_map(pv_entry)) ) {
		RETURN(rc);
	}

	RETURN(0);
}


/* Function: lvm_update_pv_for_no_group
 *
 *	This function is about the opposite of lvm_update_pv_for_group. It is
 *	called after a PV is removed from a group, and resets all of the PV
 *	information to neutral values.
 */
int lvm_update_pv_for_no_group( lvm_physical_volume_t * pv_entry )
{
	pv_disk_t * pv = pv_entry->pv;

	LOG_ENTRY;

	pv->vg_on_disk.base		= 0;
	pv->vg_on_disk.size		= 0;
	pv->pv_uuidlist_on_disk.base	= 0;
	pv->pv_uuidlist_on_disk.size	= 0;
	pv->lv_on_disk.base		= 0;
	pv->lv_on_disk.size		= 0;
	pv->pe_on_disk.base		= 0;
	pv->pe_on_disk.size		= 0;
	pv->pv_number			= 0;
	pv->pv_status			= 0;
	pv->lv_cur			= 0;
	pv->pe_size			= 0;
	pv->pe_total			= 0;
	pv->pe_allocated		= 0;
	pv->pe_start			= 0;
	memset(pv->vg_name, 0, NAME_LEN);

	pv_entry->group			= NULL;
	pv_entry->number		= 0;
	lvm_deallocate_pe_map(pv_entry);

	RETURN(0);
}


/* Function: lvm_get_pe_start
 *
 *	This function returns the starting sector of the first data PE on the
 *	specified PV. This function also cleans up the metadata mess created
 *	by LVM's temporary switch to version 2 PVs.
 *
 *	Simple rules:
 *	- If pe_start is not filled in, fill it in using the equation
 *	  pe_start = pe_on_disk.base + pe_on_disk.size
 *	- If pe_on_disk.base + .size != pe_start, reset .size so the
 *	  equation holds.
 *	- If version == 2, set it to 1.
 *	- Mark the PV's group's container dirty so all of this will get
 *	  written to disk on the next commit.
 */
u_int64_t lvm_get_pe_start(	pv_disk_t		* pv,
				lvm_volume_group_t	* group )
{
	u_int64_t	first_pe_sector = 0;

	LOG_ENTRY;

	if ( pv->version == 2 ) {
		LOG_DETAILS("Detected version 2 metadata on PV %d, container %s.\n",
			pv->pv_number, group->container->name);
		LOG_DETAILS("Reverting to version 1.\n");
		pv->version = 1;
		first_pe_sector = pv->pe_start;
		group->container->flags |= SCFLAG_DIRTY;
		lvm_engine->set_changes_pending();
	}

	if ( ! first_pe_sector ) {
		first_pe_sector = bytes_to_sectors(pv->pe_on_disk.base + pv->pe_on_disk.size);
	}

	if ( pv->pe_start != bytes_to_sectors(pv->pe_on_disk.base + pv->pe_on_disk.size) ) {
		LOG_DETAILS("Detected pe_start/pe_on_disk.size inconsistency on PV %d, container %s. Fixing.\n",
			pv->pv_number, group->container->name);
		pv->pe_on_disk.size = sectors_to_bytes(first_pe_sector) - pv->pe_on_disk.base;
		pv->pe_start = first_pe_sector;
		group->container->flags |= SCFLAG_DIRTY;
		lvm_engine->set_changes_pending();
	}

	RETURN(first_pe_sector);
}


/* Function: lvm_remove_pv_from_kernel
 *
 *	When a PV is removed from a group in the Engine, this information
 *	needs to be communicated directly to the kernel. Otherwise, on the
 *	next kernel rediscover, that group in the kernel will believe it
 *	still owns that PV. This function sends direct ioctls to LVM in
 *	the kernel, telling in which PVs to remove from which VGs.
 */
int lvm_remove_pvs_from_kernel( void )
{
	lvm_pv_remove_ioctl_t	* entry;
	lvm_pv_remove_ioctl_t	* next_entry;
	evms_plugin_ioctl_t	arg;
	int			rc = 0;

	LOG_ENTRY;

	arg.feature_id = lvm_plugin->id;
	arg.feature_command = EVMS_LVM_PV_REMOVE_IOCTL;
	arg.status = 0;

	for ( entry = lvm_pv_remove_list; entry; entry = next_entry ) {
		next_entry = entry->next;
		arg.feature_ioctl_data = entry;
		if ( (rc = lvm_engine->ioctl_evms_kernel(EVMS_PLUGIN_IOCTL, &arg)) ||
		     (rc = arg.status) ) {
			LOG_SERIOUS("Ioctl error (%d).\n", rc);
			LOG_SERIOUS("Kernel could not remove PV %d from its VG (%s)\n", entry->pv_number, entry->vg_uuid);
			arg.status = 0;
		}
		lvm_engine->engine_free(entry);
	}

	lvm_pv_remove_list = NULL;

	RETURN(0);
}


/* Function: lvm_add_pv_to_removal_list
 *
 *	Create a new entry for the PV removal list, and fill in with
 *	the appropriate information.
 */
int lvm_add_pv_to_removal_list( lvm_physical_volume_t	* pv_entry,
				lvm_volume_group_t	* group )
{
	lvm_pv_remove_ioctl_t	* entry;

	LOG_ENTRY;

	// Allocate the list entry.
	entry = lvm_engine->engine_alloc(sizeof(lvm_pv_remove_ioctl_t));
	if ( ! entry ) {
		LOG_CRITICAL("Memory error creating new PV removal entry\n");
		LOG_SERIOUS("Error adding object %s in container %s to kernel removal list\n", pv_entry->segment->name, group->container->name);
		RETURN(ENOMEM);
	}

	// Initialize.
	memcpy(entry->vg_uuid, group->vg->vg_uuid, UUID_LEN);
	entry->pv_number = pv_entry->number;

	// Add to the list.
	entry->next		= lvm_pv_remove_list;
	lvm_pv_remove_list	= entry;

	RETURN(0);
}


/* Function: lvm_get_available_stripes
 *
 *	Return the number of PVs in this group that still have extents which
 *	can be allocated to new LVs.
 */
int lvm_get_available_stripes( lvm_volume_group_t * group )
{
	int pvs = 0;
	int i;

	LOG_ENTRY;
	for ( i = 1; i <= MAX_PV; i++ ) {
		if ( group->pv_list[i] &&
		     (group->pv_list[i]->pv->pe_total - group->pv_list[i]->pv->pe_allocated) ) {
			pvs++;
		}
	}
	RETURN(pvs);
}


