/* GNU polyxmass - the massist's program.
   -------------------------------------- 
   Copyright (C) 2000,2001,2002,2003,2004 Filippo Rusconi

   http://www.polyxmass.org

   This file is part of the "GNU polyxmass" project.
   
   The "GNU polyxmass" project is an official GNU project package (see
   www.gnu.org) released ---in its entirety--- under the GNU General
   Public License and was started at the Centre National de la
   Recherche Scientifique (FRANCE), that granted me the formal
   authorization to publish it under this Free Software License.

   This software is free software; you can redistribute it and/or
   modify it under the terms of the GNU  General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.
   
   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.
   
   You should have received a copy of the GNU  General Public
   License along with this software; if not, write to the
   Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.
*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "pxmchem-cleavespec.h"
#include "pxmchem-plugin.h"
#include "pxmchem-formula.h"
#include "pxmchem-cleave.h"



/* NEW'ING FUNCTIONS, DUPLICATING FUNCTIONS, INITING FUNCTIONS ...
 */
PxmCleaveSpec *
pxmchem_cleavespec_new (void)
{
  PxmCleaveSpec *cls = g_malloc0 (sizeof (PxmCleaveSpec));
  
  cls->clmGPA = g_ptr_array_new ();
  cls->clrGPA = g_ptr_array_new ();
  
  return cls;
}


PxmCleaveRule * 
pxmchem_cleaverule_new (void)
{
  PxmCleaveRule *clr = NULL;
  
  clr = g_malloc0 (sizeof (PxmCleaveRule));

  return clr;
}


PxmCleavemotif *
pxmchem_cleavemotif_new (void)
{
  PxmCleavemotif *clm = g_malloc0 (sizeof (PxmCleavemotif));
  
  clm->motifGPA = g_ptr_array_new ();
  
  clm->offset = -1;
  
  clm->cleave = TRUE;

  return clm;
}


PxmCleaveSpec *
pxmchem_cleavespec_new_by_name (gchar *name, GPtrArray *GPA)
{
  PxmCleaveSpec *cls = NULL;
  gint iter = 0;
  

  g_assert (GPA != NULL);
  g_assert (name != NULL);
  
  for (iter = 0; iter < GPA->len; iter++)
    {
      cls = g_ptr_array_index (GPA, iter);
            
      if (0 == strcmp (cls->name, name))
	return pxmchem_cleavespec_dup (cls);
    }
  
  return NULL;
}


PxmCleaveSpec *
pxmchem_cleavespec_new_by_pattern (gchar *pattern, GPtrArray *GPA)
{
  PxmCleaveSpec *cls = NULL;
  gint iter = 0;
  

  g_assert (GPA != NULL);
  g_assert (pattern != NULL);
  
  for (iter = 0; iter < GPA->len; iter++)
    {
      cls = g_ptr_array_index (GPA, iter);
            
      if (0 == strcmp (cls->pattern, pattern))
	return pxmchem_cleavespec_dup (cls);
    }
  
  return NULL;
}


PxmCleaveSpec *
pxmchem_cleavespec_dup (const PxmCleaveSpec *cls)
{
  gint iter = 0;
  
  PxmCleaveSpec *cls_new = NULL;

  PxmCleaveRule *clr = NULL;
  PxmCleaveRule *clr_new = NULL;
  PxmCleavemotif *clm = NULL;
  PxmCleavemotif *clm_new = NULL;
  
  
  g_assert (cls != NULL);
  
  cls_new = pxmchem_cleavespec_new ();
  
  /* The template instance is assumed to be well formed,
     which is without NULL members!
  */
  g_assert (cls->name != NULL);
  cls_new->name = g_strdup (cls->name);
    
  g_assert (cls->pattern != NULL);
  cls_new->pattern = g_strdup (cls->pattern);
    
  /* Duplicate the array of leftrighrule instances.
   */
  g_assert (cls->clrGPA != NULL);

  for (iter = 0; iter < cls->clrGPA->len; iter++)
    {
      clr = g_ptr_array_index (cls->clrGPA, iter);
      g_assert (clr != NULL);
      
      clr_new = pxmchem_cleaverule_dup (clr);
      g_ptr_array_add (cls_new->clrGPA, clr_new);
    }

  /* Duplicate the array of cleavemotif instances.
   */
  g_assert (cls->clmGPA != NULL);

  for (iter = 0; iter < cls->clmGPA->len; iter++)
    {
      clm = g_ptr_array_index (cls->clmGPA, iter);
      g_assert (clm != NULL);
      
      clm_new = pxmchem_cleavemotif_dup (clm);
      g_ptr_array_add (cls_new->clmGPA, clm_new);
    }
  
  return cls_new;
}


PxmCleaveRule * 
pxmchem_cleaverule_dup (const PxmCleaveRule *clr)
{
  PxmCleaveRule * new_clr = NULL;

  g_assert (clr != NULL);

  new_clr = g_malloc0 (sizeof (PxmCleaveRule));

  /* If a left end set of data exists, both data must be non-NULL !
   */
  if (clr->left_code != NULL)
    {
      g_assert (clr->left_actform != NULL);
      
      new_clr->left_code = g_strdup (clr->left_code);
      new_clr->left_actform = g_strdup (clr->left_actform);
    }
  
  /* If a right end set of data exists, both data must be non-NULL !
   */
  if (clr->right_code != NULL)
    {
      g_assert (clr->right_actform != NULL);
      
      new_clr->right_code =  g_strdup (clr->right_code);
      new_clr->right_actform = g_strdup (clr->right_actform);
    }
  
  return new_clr;
}


PxmCleavemotif *
pxmchem_cleavemotif_dup (const PxmCleavemotif *clm)
{
  gint iter = 0;
  
  gchar *help = NULL;
  
  PxmCleavemotif *clm_new = NULL;
  
  g_assert (clm != NULL);
  
  clm_new = pxmchem_cleavemotif_new ();
  
  /* The template instance is assumed to be well formed,
     which is without NULL members!
  */
  g_assert (clm->motif != NULL);
  clm_new->motif = g_strdup (clm->motif);
  
  /* Duplicate the array of strings, motifGPA.
   */
  g_assert (clm->motifGPA != NULL);

  for (iter = 0; iter < clm->motifGPA->len; iter ++)
    {
      help = g_ptr_array_index (clm->motifGPA, iter);
      g_assert (help != NULL);
      
      g_ptr_array_add (clm_new->motifGPA, g_strdup (help));
    }
  
  clm_new->offset = clm->offset;
  
  clm_new->cleave = clm->cleave;
  
  return clm_new;
}


gboolean
pxmchem_cleavespec_set_name (PxmCleaveSpec *cls,
			     gchar *name)
{
  g_assert (cls != NULL && name != NULL);
  
  /* The member data may be NULL, as this function can be called
     right after pxmchem_cleavespec_new () which leaves the members
     NULL (except the propGPA which is allocated).
  */
  if (cls->name != NULL)
    g_free (cls->name);
  
  cls->name = g_strdup (name);
  
  return TRUE;
}


gboolean
pxmchem_cleavespec_set_pattern (PxmCleaveSpec *cls,
				gchar *pattern)
{
  g_assert (cls != NULL && pattern != NULL);
  
  /* The member data may be NULL, as this function can be called
     right after pxmchem_cleavespec_new () which leaves the members
     NULL (except the propGPA which is allocated).
  */
  if (cls->pattern != NULL)
    g_free (cls->pattern);
  
  cls->pattern = g_strdup (pattern);
  
  return TRUE;
}

gboolean
pxmchem_cleaverule_set_left_code (PxmCleaveRule *clr,
				     gchar *left_code)
{
  g_assert (clr != NULL && left_code != NULL);

  /* The member data may be NULL, as this function can be called right
     after pxmchem_cleaverule_new () which leaves the members NULL.
  */
  if (clr->left_code != NULL)
    g_free (clr->left_code);

  clr->left_code = g_strdup (left_code);
  
  return TRUE;
}


gboolean
pxmchem_cleaverule_set_right_code (PxmCleaveRule *clr,
				      gchar *right_code)
{
  g_assert (clr != NULL && right_code != NULL);

  /* The member data may be NULL, as this function can be called right
     after pxmchem_cleaverule_new () which leaves the members NULL.
  */
  if (clr->right_code != NULL)
    g_free (clr->right_code);
  
  clr->right_code = g_strdup (right_code);
  
  return TRUE;
}


gboolean
pxmchem_cleaverule_set_left_actform (PxmCleaveRule *clr,
					gchar *left_actform)
{
  g_assert (clr != NULL && left_actform != NULL);

  /* The member data may be NULL, as this function can be called right
     after pxmchem_cleaverule_new () which leaves the members NULL.
  */
  if (clr->left_actform != NULL)
    g_free (clr->left_actform);
  
  clr->left_actform = g_strdup (left_actform);
  
  return TRUE;
}


gboolean
pxmchem_cleaverule_set_right_actform (PxmCleaveRule *clr,
					 gchar *right_actform)
{
  g_assert (clr != NULL && right_actform != NULL);

  /* The member data may be NULL, as this function can be called right
     after pxmchem_cleaverule_new () which leaves the members NULL.
  */
  if (clr->right_actform != NULL)
    g_free (clr->right_actform);

  clr->right_actform = g_strdup (right_actform);
  
  return TRUE;
}


gboolean
pxmchem_cleavemotif_set_motif (PxmCleavemotif *clm,
			       gchar *motif)
{
  g_assert (clm != NULL);
  
  /* The member data may be NULL, as this function can be called
     right after pxmchem_cleavemotif_new () which leaves the members
     NULL (except the propGPA which is allocated).
  */
  if (clm->motif != NULL)
    g_free (clm->motif);
  
  clm->motif = g_strdup (motif);
  
  return TRUE;
}

  
gboolean
pxmchem_cleavemotif_set_offset (PxmCleavemotif *clm,
				gint offset)
{
  g_assert (clm != NULL);
  
  clm->offset = offset;

  return TRUE;
}

  
gboolean
pxmchem_cleavemotif_set_cleave (PxmCleavemotif *clm,
				gboolean cleave)
{
  g_assert (clm != NULL);
  
  clm->cleave = cleave;
  
  return TRUE;
}



/* INTEGRITY CHECKING FUNCTIONS
 */
gboolean
pxmchem_cleavespec_validate (PxmCleaveSpec *cleavespec, gchar *delim_codes,
			     gint codelen, GPtrArray *atom_refGPA, 
			     gchar **valid)
{
  GString *gs = NULL;
  
  gchar *help = NULL;

  gint iter = 0;

  PxmCleaveRule *cleaverule = NULL;
  
  
  g_assert (cleavespec != NULL);
  g_assert (delim_codes != NULL);
  g_assert (atom_refGPA != NULL);
  

  /* Note that for integrity reasons, *valid MUST BE NULL to ensure 
   * that it is empty.
   */
  g_assert (valid != NULL);
  g_assert (*valid == NULL);


  gs = g_string_new ("");
  

  /* This is the DTD for the cleavespec node.
     <!ELEMENT cls (name,pattern,clr*)>
  */
  
  /* The name: it must be non-NULL and be longer than 0 chars.
   */
  if (cleavespec->name == NULL)
    {
      g_string_append_printf (gs, 
			      _("cleavespec has a NULL name\n"));
    }
  else
    {
      /* Make a copy of the string, so that we can strip it of its
       * spaces and next check its "real" length.
       */
      help = g_strdup (cleavespec->name);
      help = g_strstrip (help);
      
      if (strlen (help) <= 0)
	{
	  g_string_append_printf (gs, 
				  _("cleavespec has an invalid name:"
				    " '%s'\n"), cleavespec->name);
	}

      g_free (help);
    }
  
  /* The pattern: it must be non-NULL and be a valid pattern.
   */
  if (cleavespec->pattern == NULL)
    {
      g_string_append_printf (gs, 
			      _("cleavespec has a NULL pattern\n"));
    }
  else
    {
      /* Check that the pattern is OK from a syntactical point of
       * view.
       */
      if (FALSE == pxmchem_cleavespec_check_syntax (cleavespec,
						    delim_codes,
						    codelen))
	g_string_append_printf (gs, 
				_("cleavespec has an invalid pattern:"
				  " '%s'\n"), cleavespec->pattern);
    }
  
  /* The array of cleaverules must not be NULL, however, 
   * cleaverules are optional.
   */
  /* The help pointer must be NULL for the function 
   * pxmchem_cleaverule_validate () call below.
   */
  help = NULL;
  
  for (iter = 0; iter < cleavespec->clrGPA->len; iter++)
    {
      cleaverule = g_ptr_array_index (cleavespec->clrGPA, iter);
      g_assert (cleaverule != NULL);
      
      if (FALSE == 
	  pxmchem_cleaverule_validate (cleaverule, delim_codes, 
					  atom_refGPA, &help))
	{
	  g_assert (help != NULL);
	  
	  g_string_append_printf (gs, 
				  _("cleavespec has an invalid"
				    "cleaverule:\n%s"),
				  help);
	  g_free (help);
	  help = NULL;
	}
    }
  
  /* Finally the validation is finished.
   */
  if (strlen (gs->str) > 0)
    {
      /* String is not empty, which is there were errors.
       */
      *valid = gs->str;
      
      g_string_free (gs, FALSE);
      
      return FALSE;
    }

  g_string_free (gs, TRUE);
  
  return TRUE;
  
}


gboolean
pxmchem_cleaverule_validate (PxmCleaveRule *clr, gchar *delim_codes, 
				GPtrArray *atom_refGPA, gchar **valid)
{
  GString *gs = NULL;
  
  gboolean left_code_set = FALSE;
  gboolean left_actform_set = FALSE;
  gboolean right_code_set = FALSE;
  gboolean right_actform_set = FALSE;
  
  gchar *delim = NULL;


  g_assert (delim_codes != NULL);
  g_assert (atom_refGPA != NULL);
  

  /* Note that for integrity reasons, *valid MUST BE NULL to ensure 
   * that it is empty.
   */
  g_assert (valid != NULL);
  g_assert (*valid == NULL);


  gs = g_string_new ("");
  

  /* This is the DTD for the cleaverule node.
     <!ELEMENT clr ((le-mnm-code,le-actform)?,(re-mnm-code,re-actform)?)>
   */
  /* None of the cleaverule sub-nodes are required, but they go by
     pairs. If one code is set, the actform HAS to be set and valid.
   */

  /* The left-end-monomer-code : if it is NON-null and is set (has a
   * non-zero length), then left-end-actform must be set and valid.
   */
  if (clr->left_code != NULL && strlen (clr->left_code) > 0)
    {
      /* This code must be non-NULL and (if set) be a valid monomer
       * code (length- and syntax-wise) and must already exists as a
       * valid monomer in the polymer definition.  This means that it
       * should be present in the delim_codes string.
       */
      delim = g_strdup_printf ("%c%s%c",
			       libpolyxmass_globals_delim, 
			       clr->left_code,
			       libpolyxmass_globals_delim);
      
      if (NULL == strstr (delim_codes, delim))
	{
	  g_string_append_printf (gs, 
				  _("cleaverule has an invalid"
				    " 'left_code': '%s'\n"),
				  clr->left_code);
	}
      
      g_free (delim);
      
      left_code_set = TRUE;
    }
  
  /* The left-end-actform : if it is NON-null and is set (has a
   * non-zero length), then left-end-code must be set and valid.
   */
  if (clr->left_actform != NULL && strlen (clr->left_actform) > 0)
    {
      /* This actform must be non-NULL and (if set) 
       * be a chemically valid actform.
       */
      if (FALSE == pxmchem_actform_check (clr->left_actform, atom_refGPA))
	{
	  g_string_append_printf (gs, 
				  _("cleaverule has an invalid"
				    " actform: '%s'\n"),
				  clr->left_actform);
	}
      
      left_actform_set = TRUE;
    }

  if (left_code_set != left_actform_set)
    {
      g_string_append_printf (gs, 
			      _("cleaverule has an invalid pair of"
				" left_code and left_actform\n"));
    }


  /* The right-end-monomer-code : if it is NON-null and is set
   * (has a non-zero length), then right-end-actform must be set and valid.
   */
  if (clr->right_code != NULL && strlen (clr->right_code) > 0)
    {
      /* This code must be non-NULL and (if set) be a valid monomer
       * code (length- and syntax-wise) and must already exists as a
       * valid monomer in the polymer definition.  This means that it
       * should be present in the delim_codes string.
       */
      delim = g_strdup_printf ("%c%s%c",
			       libpolyxmass_globals_delim, 
			       clr->right_code,
			       libpolyxmass_globals_delim);
      
      if (NULL == strstr (delim_codes, delim))
	{
	  g_string_append_printf (gs, 
				  _("cleaverule has an invalid"
				    " 'right_code': '%s'\n"),
				  clr->right_code);
	}
      
      g_free (delim);
      
      right_code_set = TRUE;
    }
  
  /* The right-end-actform : if it is NON-null and is set (has a
   * non-zero length), then right-end-code must be set and valid.
   */
  if (clr->right_actform != NULL && strlen (clr->right_actform) > 0)
    {
      /* This actform must be non-NULL and (if set) 
       * be a chemically valid actform.
       */
      if (FALSE == pxmchem_actform_check (clr->right_actform, atom_refGPA))
	{
	  g_string_append_printf (gs, 
				  _("cleaverule has an invalid"
				    " actform: '%s'\n"),
				  clr->right_actform);
	}
      
      right_actform_set = TRUE;
    }

  if (right_code_set != right_actform_set)
    {
      g_string_append_printf (gs, 
			      _("cleaverule has an invalid pair of"
				" right_code and right_actform\n"));
    }

  
  /* Finally the validation is finished.
   */
  if (strlen (gs->str) > 0)
    {
      /* String is not empty, which is there were errors.
       */
      *valid = gs->str;
      
      g_string_free (gs, FALSE);
      
      return FALSE;
    }

  g_string_free (gs, TRUE);

  return TRUE;
}


gboolean
pxmchem_cleavespec_unique_by_name (PxmCleaveSpec *cleavespec, 
				   GPtrArray *GPA)
{
  g_assert (cleavespec != NULL);
  g_assert (cleavespec->name != NULL);
  g_assert (GPA != NULL);
  
  return (pxmchem_cleavespec_get_index_top_by_name (cleavespec->name, GPA )
	  == 
	  pxmchem_cleavespec_get_index_bottom_by_name (cleavespec->name, 
						       GPA));
}




gboolean
pxmchem_cleavespec_check_syntax (PxmCleaveSpec *cleavespec, 
				 gchar *delim_codes,
				 gint codelen)
{
  PxmCleaveSpec *cls = NULL;

  gint result = -1;
  
  
  /* To check the syntax of the cleavespec all we have to do is parse
   * it and check if the parsing went well. We have a function that
   * parses the pattern of the cleavespec into a number of cleavemotif
   * instances. Note that the parsing allocates memory, which means
   * that if it is performed not for actually cleaving the polymer,
   * but for syntax-checking purposes, a copy of the cleavespec should
   * be done, so that when the parsing is finished the cleavespec copy
   * can be freed altogether.
   */
  g_assert (cleavespec != NULL);
  g_assert (delim_codes != NULL);
  
  cls = pxmchem_cleavespec_dup (cleavespec);
  
  result = pxmchem_cleave_parse_cleavespec_pattern (cls, 
						    delim_codes,
						    codelen);
  
  pxmchem_cleavespec_free (cls);
   
  if (-1 == result)
    return FALSE;
  else
    return TRUE;
}



/*  LOCATING FUNCTIONS
 */
gint
pxmchem_cleavespec_get_index_by_name (gchar *name, GPtrArray *GPA)
{
  return pxmchem_cleavespec_get_index_top_by_name (name, GPA);
}


gint
pxmchem_cleavespec_get_index_top_by_name (gchar *name, GPtrArray *GPA)
{
  gint iter = 0;
  PxmCleaveSpec *cls = NULL;
  

  g_assert (GPA != NULL);
  
  for (iter = 0; iter < GPA->len; iter ++)
    {
      cls = g_ptr_array_index (GPA, iter);
      
      if (0 == strcmp (cls->name, name))
	return iter;
    }
  
  return -1;
}


gint
pxmchem_cleavespec_get_index_bottom_by_name (gchar *name, GPtrArray *GPA)
{
  gint iter = 0;
  PxmCleaveSpec *cls = NULL;
  

  g_assert (GPA != NULL);
  
  if (GPA->len > 0)
    {
      for (iter = GPA->len -1; iter >= 0; iter--)
	{
	  cls = g_ptr_array_index (GPA, iter);
      
	  if (0 == strcmp (cls->name, name))
	    return iter;
	}
    }
  
  return -1;
}


gint
pxmchem_cleavespec_get_index_by_pattern (gchar *pattern, GPtrArray *GPA)
{
  gint iter = 0;
  PxmCleaveSpec *cls = NULL;
  

  g_assert (GPA != NULL);
  
  for (iter = 0; iter < GPA->len; iter ++)
    {
      cls = g_ptr_array_index (GPA, iter);
      
      if (0 == strcmp (cls->pattern, pattern))
	return iter;
    }
  
  return -1;
}


gint
pxmchem_cleavespec_get_index_by_ptr (GPtrArray *GPA, 
				     PxmCleaveSpec *cls)
{
  gint iter = 0;
  

  g_assert (GPA != NULL && cls != NULL);
  
  for (iter = 0; iter < GPA->len; iter++)
    if ((PxmCleaveSpec *) g_ptr_array_index (GPA, iter) == cls)
      return iter;
      
  return -1;
}


PxmCleaveSpec *
pxmchem_cleavespec_get_ptr_by_name (gchar *name, GPtrArray *GPA)
{
  gint iter = -1;
  PxmCleaveSpec *cls = NULL;
  

  g_assert (GPA != NULL);
  g_assert (name != NULL);
  
  for (iter = 0; iter < GPA->len; iter ++)
    {
      cls = g_ptr_array_index (GPA, iter);
      
      if (0 == strcmp (cls->name, name))
	return cls;
    }
  
  return NULL;
}


PxmCleaveSpec *
pxmchem_cleavespec_get_ptr_by_pattern (gchar *pattern, GPtrArray *GPA)
{
  gint iter = -1;
  PxmCleaveSpec *cls = NULL;
  

  g_assert (GPA != NULL);
  g_assert (pattern != NULL);
  
  for (iter = 0; iter < GPA->len; iter ++)
    {
      cls = g_ptr_array_index (GPA, iter);
      
      if (0 == strcmp (cls->pattern, pattern))
	return cls;
    }
  
  return NULL;
}


/* UTILITY FUNCTIONS
 */



/* XML-format TRANSACTIONS
 */
gchar *
pxmchem_cleavespec_format_xml_string_cls (PxmCleaveSpec *cls, 
					  gchar *indent, gint offset)
{
  /* The pointer to the cleavespec will allow to create a string 
   * representing its member data: <name>, <pattern> and 
   * array of clr (no, one or more <clr> elements).
   */
  gint iter = 0;
  gint new_offset = 0;
  
  gchar *lead = NULL;
  gchar *help = NULL;
  
  GString *gs = NULL;

  PxmCleaveRule *clr = NULL;
  
  

  g_assert (cls != NULL && indent != NULL);
  
  gs = g_string_new ("");
  g_assert (gs != NULL);

  /* We are willing to create a <cls> node that should look like this:
   *
   * <cls>
   *   <name>CyanogenBromide</name>
   *   <pattern>M/</pattern>
   *   <clr>
   *     <le-mnm-code>M</le-mnm-code>
   *     <le-actform>-C1H2S1+O1</le-actform>
   *     <re-mnm-code>M</re-mnm-code>
   *     <re-actform>-C1H2S1+O1</re-actform>
   *   </clr>
   * </cls>
   *
   * Note that the <clr> is only to be made if such an instance
   * exists in the PxmCleaveSpec's clrGPA.
   */

  /* Open the <cls> element and immediately insert the data.
   */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s<cls>\n", lead);

  g_free (lead);

  new_offset = offset + 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  g_assert (cls->name != NULL && strlen (cls->name) > 0);
  g_string_append_printf (gs, "%s<name>%s</name>\n",
			  lead, cls->name);
  
  g_assert (cls->pattern != NULL && strlen (cls->pattern) > 0);
  g_string_append_printf (gs, "%s<pattern>%s</pattern>\n",
			  lead, cls->pattern);
  
  /* We now should deal with the GPtrArray of clr instances,
   * if any.
   */
  g_assert (cls->clrGPA != NULL);
  
  for (iter = 0; iter < cls->clrGPA->len; iter++)
    {
      clr = g_ptr_array_index (cls->clrGPA, iter);
	  
      help = 
	pxmchem_cleaverule_format_xml_string_clr (clr,
						  indent, new_offset);
      if (help != NULL)
	{
	  g_string_append_printf (gs, "%s", help);
	      
	  g_free (help);
	}
      else
	g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	       "%s@%d: could not format xml string for one "
	       "clr of cleavespec\n", __FILE__, __LINE__);
    }

  g_free (lead);
  
  /* Finally close the cls element.
   */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);
  
  g_string_append_printf (gs, "%s</cls>\n", lead);

  g_free (lead);
  
  help = gs->str;
  
  g_string_free (gs, FALSE);
  
  return help;
}

PxmCleaveSpec *
pxmchem_cleavespec_render_xml_node_cls (xmlDocPtr xml_doc,
					xmlNodePtr xml_node,
					gpointer user_data)
{
  /* The xml node we are in is structured this way:
   *
   * <cls>
   *   <name>CyanogenBromide</name>
   *   <pattern>M/</pattern>
   *   <clr>
   *     <le-mnm-code>M</le-mnm-code>
   *     <le-actform>-C1H2S1+O1</le-actform>
   *     <re-mnm-code>M</re-mnm-code>
   *     <re-actform>-C1H2S1+O1</re-actform>
   *   </clr>
   * </cls>
   *
   * And the xml_node parameter points to the 
   *
   * <cls> element tag:
   *  ^
   *  |
   *  +----- here we are right now.
   * 
   * Which means that xml_node->name == "cls" and that
   * we'll have to go one step down to the first child of the 
   * current node in order to get to the <name> element.
   *
   */
  PxmCleaveSpec *cls = NULL;
  PxmCleaveRule *clr = NULL;


  /* Make sure we have parameters pointing bona fide to the right
   * xml element.
   */
  g_assert (xml_node != NULL);
  g_assert (0 == xmlStrcmp (xml_node->name, 
			    (const xmlChar *) "cls"));
  
  /* Now go to the first child of current node: <name>.
   */
  xml_node = xml_node->children;

  /* From a rigorous XML parsing point of view, the blanks found in
   * the XML document are considered to be nodes, and we have to detect
   * these and take proper action: go next sibling (next blank) as long
   * as blanks are encountered.
   */
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;
  
  /* Check that we have effectively a <name> element here.
   */
  g_assert (0 == xmlStrcmp (xml_node->name, 
			    (const xmlChar *) "name"));

  /* Allocate the cls NOT INITIALIZED instance that we'll characterize
   * using allocated strings modelled after xml data.
   */
  cls = pxmchem_cleavespec_new ();
    
  /* Since we have allocated the cleavespec instance at the line
     above, we know that its member data are NULL, so we can make
     direct assignements, without recoursing to the _set_xxx ().
  */
  cls->name = 
    xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  g_assert (cls->name != NULL);
    
  /* Now go to the second child: <pattern>, which is the next sibling
   * of the <name> node.
   */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  /* Check that we have effectively a <pattern> element here.
   */
  g_assert (0 == xmlStrcmp (xml_node->name, 
			    (const xmlChar *) "pattern"));

  cls->pattern = 
    xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  g_assert (cls->pattern != NULL);

  /* Now go to the third child: <clr>, if any. This one is optional,
   * and there may be one or more also.
   */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;
  
  while (xml_node != NULL)
    {
      /* Check that we have bona fide a <clr> element.
       */
      g_assert (0 == xmlStrcmp (xml_node->name, 
				(const xmlChar *) "clr"));
      
      /* Ask that a PxmCleaveRule object be allocated and 
       * initialized according to the xml data from the 
       * current xml node.
       */
      clr = pxmchem_cleaverule_render_xml_node_clr (xml_doc,
						       xml_node,
						       NULL);
      if (clr == NULL)
	g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	       "%s@%d: failed rendering clr from xml node\n",
	       __FILE__, __LINE__);

      /* Store the newly allocated PxmCleaveRule instance into
       * the array in the cleavespec object.
       */
      g_ptr_array_add (cls->clrGPA, clr);
      
      xml_node = xml_node->next;
      /* From a rigorous XML parsing point of view, the blanks found in
       * the XML document are considered to be nodes, and we have to detect
       * these and take proper action: go next sibling (next blank) as long
       * as blanks are encountered.
       */
      while (TRUE == xmlIsBlankNode (xml_node))
	xml_node = xml_node->next;
    }
  
  /* This stage we should have finished rendering the cleavespec
   * instance with any of its clr instances...
   */
  return cls;
}


gchar *
pxmchem_cleaverule_format_xml_string_clr (PxmCleaveRule *clr, 
					     gchar *indent, gint offset)
{
  /* The pointer to the clr will allow to construct a 
   * string that will describe all its data.
   */
  gint new_offset = 0;
  
  gchar *lead = NULL;
  gchar *help = NULL;
  
  GString *gs = NULL;
  

  g_assert (clr != NULL && indent != NULL);
  
  gs = g_string_new ("");
  g_assert (gs != NULL);

  /* We are willing to create an <clr> node that should 
   * look like this:
   *
   *  <clr>
   *     <le-mnm-code>M</le-mnm-code>
   *     <le-actform>-C1H2S1+O1</le-actform>
   *     <re-mnm-code>M</re-mnm-code>
   *     <re-actform>-C1H2S1+O1</re-actform>
   *  </clr>
   *
   */

  /* Open the <clr> element and immediately insert the data.
   */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s<clr>\n", lead);

  g_free (lead);
  new_offset = offset + 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  /* If a left end set of data exists, both data must be non-NULL !
   */
  if (clr->left_code != NULL && strlen (clr->left_code) > 0)
    {
      g_assert (clr->left_actform != NULL 
		&& strlen (clr->left_actform) > 0);
      
      g_string_append_printf (gs, "%s<le-mnm-code>%s</le-mnm-code>\n",
			      lead, clr->left_code);
      
      g_string_append_printf (gs, "%s<le-actform>%s</le-actform>\n",
			      lead, clr->left_actform);
    }
  
  /* If a right end set of data exists, both data must be non-NULL !
   */
  if (clr->right_code != NULL && strlen (clr->right_code) > 0)
    {
      g_assert (clr->right_actform != NULL 
		&& strlen (clr->right_actform) > 0);
      
      g_string_append_printf (gs, "%s<re-mnm-code>%s</re-mnm-code>\n",
			      lead, clr->right_code);
      
      g_string_append_printf (gs, "%s<re-actform>%s</re-actform>\n",
			      lead, clr->right_actform);
    }
  
  g_free (lead);
  
  /* Finally close the clr element.
   */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s</clr>\n", lead);
  
  g_free (lead);
  
  help = gs->str;
  
  g_string_free (gs, FALSE);
  
  return help;
}

  

PxmCleaveRule *
pxmchem_cleaverule_render_xml_node_clr (xmlDocPtr xml_doc,
					   xmlNodePtr xml_node,
					   gpointer user_data)
{
  /* The xml node we are in is structured this way:
   *
   *  <clr>
   *     <le-mnm-code>M</le-mnm-code>
   *     <le-actform>-C1H2S1+O1</le-actform>
   *     <re-mnm-code>M</re-mnm-code>
   *     <re-actform>-C1H2S1+O1</re-actform>
   *  </clr>
   *
   * And the xml_node parameter points to the 
   *
   * <clr> element tag:
   *  ^
   *  |
   *  +----- here we are right now.
   * 
   * Which means that xml_node->name == "clr" and that
   * we'll have to go one step down to the first child of the 
   * current node in order to get to the <code> element.
   *
   */

  PxmCleaveRule *clr = NULL;

  gboolean le_code_set = FALSE;
  gboolean le_actform_set = FALSE;
  gboolean re_code_set = FALSE;
  gboolean re_actform_set = FALSE;

    
  /* Make sure we have parameters pointing bona fide to the right
   * xml element.
   */
  g_assert (xml_node != NULL);
  g_assert (0 == xmlStrcmp (xml_node->name, 
			    (const xmlChar *) "clr"));

  /* Allocate the clr NOT INITIALIZED instance that we'll characterize
   * using allocated strings modelled after xml data.
   */
  clr = pxmchem_cleaverule_new ();
  
  /* Now go to the first child of current node.
   */
  xml_node = xml_node->children;

  /* From a rigorous XML parsing point of view, the blanks found in
   * the XML document are considered to be nodes, and we have to detect
   * these and take proper action: go next sibling (next blank) as long
   * as blanks are encountered.
   */
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  /* Note that the DTD stipulates that there can be no or one at most
   * of each left end and/or right end set of data. So be careful 
   * with the assertions !
   * This is the DTD material:
   * <!ELEMENT clr ((le-mnm-code,le-actform)?,
   * (re-mnm-code,re-actform)?)>
   */
  while (xml_node != NULL)
    {
      /* Since we have allocated the cleaverule instance at the
	 line above, we know that its member data are NULL, so we can
	 make direct assignements, without recoursing to the _set_xxx
	 functions.
      */
      if (0 == xmlStrcmp (xml_node->name, 
			  (const xmlChar *) "le-mnm-code"))
	{
	  clr->left_code = 
	    xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
	  g_assert (clr->left_code != NULL);
	  
	  le_code_set = TRUE;
	}
      
      else if (0 == xmlStrcmp (xml_node->name, 
			       (const xmlChar *) "le-actform"))
	{
	  clr->left_actform = 
	    xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
	  g_assert (clr->left_actform != NULL);
	  
	  le_actform_set = TRUE;
	}
      
      else if (0 == xmlStrcmp (xml_node->name, 
			       (const xmlChar *) "re-mnm-code"))
	{
	  clr->right_code = 
	    xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
	  g_assert (clr->right_code != NULL);

	  re_code_set = TRUE;
	}
      
      else if (0 == xmlStrcmp (xml_node->name, 
			       (const xmlChar *) "re-actform"))
	{
	  clr->right_actform = 
	    xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
	  g_assert (clr->right_actform != NULL);
	  
	  re_actform_set = TRUE;
	}

      xml_node = xml_node->next;
      /* From a rigorous XML parsing point of view, the blanks found in
       * the XML document are considered to be nodes, and we have to detect
       * these and take proper action: go next sibling (next blank) as long
       * as blanks are encountered.
       */
      while (TRUE == xmlIsBlankNode (xml_node))
	xml_node = xml_node->next;
    }
  
  /* Check that we have all the elements according to the right DTD
   * logic:
   */
  if (le_code_set == TRUE)
    {
      if (le_actform_set == FALSE)
	{
	  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		 "%s@%d: failed rendering clr from xml node\n",
		 __FILE__, __LINE__);
	
	  pxmchem_cleaverule_free (clr);
	  
	  return NULL;
	}
    }
  
  if (re_code_set == TRUE)
    {
      if (re_actform_set == FALSE)
	{
	  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		 "%s@%d: failed rendering clr from xml node\n",
		 __FILE__, __LINE__);
	
	  pxmchem_cleaverule_free (clr);
	  
	  return NULL;
	}
    }
  
  /* Finished the rendering of the current <clr> node.
   */
  return clr;
}





/* FREE'ING FUNCTIONS
 */
gboolean
pxmchem_cleavespec_free (PxmCleaveSpec *cls)
{
  g_assert (cls != NULL);
  
  if (cls->name != NULL)
    g_free (cls->name);
  
  if (cls->pattern != NULL)
    g_free (cls->pattern);

  if (cls->clrGPA != NULL)
    pxmchem_cleaverule_GPA_free (cls->clrGPA);
  
  if (cls->clmGPA != NULL)
    pxmchem_cleavemotif_GPA_free (cls->clmGPA);
  
  g_free (cls);
  
  return TRUE;
}

gboolean
pxmchem_cleaverule_free (PxmCleaveRule *clr)
{
  g_assert (clr != NULL);

  if (clr->left_code != NULL)
    g_free (clr->left_code);

  if (clr->left_actform != NULL)
    g_free (clr->left_actform);

  if (clr->right_code != NULL)
    g_free (clr->right_code);

  if (clr->right_actform != NULL)
    g_free (clr->right_actform);
  
  g_free (clr);

  return TRUE;
}

gboolean
pxmchem_cleavemotif_free (PxmCleavemotif *clm)
{
  
  gchar *help = NULL;
  
  g_assert (clm != NULL);
  
  if (clm->motif != NULL)
    g_free (clm->motif);
  
  if (clm->motifGPA != NULL)
    {
      while (clm->motifGPA->len > 0)
	{
	  help = g_ptr_array_remove_index (clm->motifGPA, 0);
	  g_assert (help != NULL);
      
	  g_free (help);
	}
      
      g_ptr_array_free (clm->motifGPA, TRUE);
    }
  
  g_free (clm);
  
  return TRUE;
}




/* GPtrArray-RELATED FUNCTIONS
 */
gint
pxmchem_cleavespec_GPA_free (GPtrArray *GPA)
{
  gint count = 0;
  
  PxmCleaveSpec *cls = NULL;
  

  g_assert (GPA != NULL);
  
  while (GPA->len > 0)
    {
      cls = g_ptr_array_remove_index (GPA, 0);
      g_assert (cls != NULL);
      pxmchem_cleavespec_free (cls);
      count++;
    }
  
  g_ptr_array_free (GPA, TRUE);

  return count;
}


gint
pxmchem_cleaverule_GPA_free (GPtrArray *GPA)
{
  gint count = 0;
  
  PxmCleaveRule *clr = NULL;
  

  g_assert (GPA != NULL);
  
  while (GPA->len > 0)
    {
      clr = g_ptr_array_remove_index (GPA, 0);
      g_assert (clr != NULL);
      pxmchem_cleaverule_free (clr);
      count++;
    }
  
  g_ptr_array_free (GPA, TRUE);

  return count;
}


gint
pxmchem_cleavemotif_GPA_free (GPtrArray *GPA)
{
  gint count = 0;
  
  PxmCleavemotif *clm = NULL;
  

  g_assert (GPA != NULL);
  
  while (GPA->len > 0)
    {
      clm = g_ptr_array_remove_index (GPA, 0);
      g_assert (clm != NULL);
      pxmchem_cleavemotif_free (clm);
      count++;
    }
  
  g_ptr_array_free (GPA, TRUE);

  return count;
}











