///
/// This file is part of Rheolef.
///
/// Copyright (C) 2000-2009 Pierre Saramito <Pierre.Saramito@imag.fr>
///
/// Rheolef is free software; you can redistribute it and/or modify
/// it under the terms of the GNU General Public License as published by
/// the Free Software Foundation; either version 2 of the License, or
/// (at your option) any later version.
///
/// Rheolef is distributed in the hope that it will be useful,
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
/// GNU General Public License for more details.
///
/// You should have received a copy of the GNU General Public License
/// along with Rheolef; if not, write to the Free Software
/// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
///
/// =========================================================================
//
// main assembly algorithm
//
// authors: Pierre.Saramito@imag.fr
//
// date: 13 march 1999
//
# include "rheolef/form.h"
# include "rheolef/point.h"
# include "rheolef/asr.h"
# include "rheolef/banded_level_set.h"
# include "form_assembly.h"
# include "field.h"
# include "geo-connectivity.h"
using namespace std;
namespace rheolef { 

// =============================================================================
// the general assembly procedure
// =============================================================================
template <class ElementIterator>
static
inline
void
assembly (
    ElementIterator          iter_element,
    ElementIterator          last_element,
    const form_element&      form_e, 
    form&                    a)
{
    typedef size_t size_type;
    const space& X = a.get_first_space();
    const space& Y = a.get_second_space();
    form_e.initialize (X, Y);
    //
    // use temporary dynamic matrix structures
    //
    asr<Float> auu(Y.n_unknown(), X.n_unknown());
    asr<Float> aub(Y.n_unknown(), X.n_blocked());
    asr<Float> abu(Y.n_blocked(), X.n_unknown());
    asr<Float> abb(Y.n_blocked(), X.n_blocked());
    ublas::matrix<Float> ak;
    tiny_vector<size_type> idy, jdx;

    for (; iter_element != last_element; iter_element++) {
      const geo_element& K = *iter_element;
      // --------------------------------
      // compute elementary matrix ak
      // --------------------------------
      form_e (K, ak);
      // ------------------------------
      // assembly ak in sparse matrix a
      // ------------------------------
      X.set_global_dof (K, jdx);
      Y.set_global_dof (K, idy);
      size_type ny = ak.size1(); // nrow
      size_type nx = ak.size2(); // ncol
      for (size_type i = 0; i < ny; i++) {
        for (size_type j = 0; j < nx; j++) {

	    if (ak(i,j) == Float(0)) continue;

	    size_type i_dof = Y.domain_dof(idy(i));
	    size_type j_dof = X.domain_dof(jdx(j));

	    size_type ii    = Y.index(i_dof);
	    size_type jj    = X.index(j_dof);
   
	    if   (Y.is_blocked(i_dof))
	      if (X.is_blocked(j_dof))    abb.entry(ii, jj) += ak(i,j);
              else                        abu.entry(ii, jj) += ak(i,j);
	    else 
	      if (X.is_blocked(j_dof))    aub.entry(ii, jj) += ak(i,j);
              else                        auu.entry(ii, jj) += ak(i,j);
        }
      }
    }
    //
    // convert dynamic matrix to fixed-size one
    //
    a.uu = csr<Float>(auu);
    a.ub = csr<Float>(aub);
    a.bu = csr<Float>(abu);
    a.bb = csr<Float>(abb);
}
// =============================================================================
// the case of locked boundaries (when slip on a curved boundary)
// =============================================================================
template <class ElementIterator>
static
inline
void
assembly_locked_boundaries (
    ElementIterator          iter_element,
    ElementIterator          last_element,
    const form_element&      form_e, 
    form&                    a)
{
    typedef size_t size_type;
    const space& X = a.get_first_space();
    const space& Y = a.get_second_space();
    form_e.initialize (X, Y);
    //
    // use temporary dynamic matrix structures
    //
    asr<Float> auu(Y.n_unknown(), X.n_unknown());
    asr<Float> aub(Y.n_unknown(), X.n_blocked());
    asr<Float> abu(Y.n_blocked(), X.n_unknown());
    asr<Float> abb(Y.n_blocked(), X.n_blocked());
    ublas::matrix<Float> ak;
    tiny_vector<size_type> idy, jdx;

    for (; iter_element != last_element; iter_element++) {
      const geo_element& K = *iter_element;
      // --------------------------------
      // compute elementary matrix ak
      // --------------------------------
      form_e (K, ak);
      // ------------------------------
      // assembly ak in sparse matrix a
      // ------------------------------
      X.set_global_dof (K, jdx);
      Y.set_global_dof (K, idy);
      size_type ny = ak.size1(); // nrow
      size_type nx = ak.size2(); // ncol

      for (size_type i = 0; i < ny; i++) {
        for (size_type j = 0; j < nx; j++) {

	    if (ak(i,j) == Float(0)) continue;

	    size_type i_dof = Y.domain_dof(idy(i));
	    size_type j_dof = X.domain_dof(jdx(j));

	    size_type ii    = Y.index(i_dof);
	    size_type jj    = X.index(j_dof);
 
            if (!X.is_locked(j_dof) && !Y.is_locked(i_dof))
	    {
		if   (Y.is_blocked(i_dof))
		  if (X.is_blocked(j_dof))    abb.entry(ii, jj) += ak(i,j);
		  else                        abu.entry(ii, jj) += ak(i,j);
		else 
		  if (X.is_blocked(j_dof))    aub.entry(ii, jj) += ak(i,j);
		  else                        auu.entry(ii, jj) += ak(i,j);
	    }
            else if (X.is_locked(j_dof) && Y.is_locked(i_dof))
	    {
		size_type j2_dof=X.locked_with(j_dof);
		size_type jj2=X.index(j2_dof);
		size_type comp_j=(j_dof<j2_dof)?0:1;
		size_type i2_dof=Y.locked_with(i_dof);
		size_type comp_i=(i_dof<i2_dof)?0:1;
		size_type ii2=X.index(i2_dof);
		Float aiijj,aii2jj,aiijj2,aii2jj2;
	    	if (comp_i==0)
		  if (comp_j==0)  
		  { // ak(i,j) contains A00
		    aiijj 
		    	= Y.unlocked_component(i_dof,comp_i)*X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aii2jj 
		    	= Y.locked_component(i_dof,comp_i)  *X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aiijj2 
		    	= Y.unlocked_component(i_dof,comp_i)*X.locked_component(j_dof,comp_j)  *ak(i,j);
		    aii2jj2 
		    	= Y.locked_component(i_dof,comp_i)  *X.locked_component(j_dof,comp_j)  *ak(i,j);
		  }
		  else 
		  { // ak(i,j) contains A01
		    aiijj2 
		    	= Y.unlocked_component(i_dof,comp_i)*X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aii2jj2 
		    	= Y.locked_component(i_dof,comp_i)  *X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aiijj 
		    	= Y.unlocked_component(i_dof,comp_i)*X.locked_component(j_dof,comp_j)  *ak(i,j);
		    aii2jj 
		    	= Y.locked_component(i_dof,comp_i)  *X.locked_component(j_dof,comp_j)  *ak(i,j);
		  }
		else 
		  if (comp_j==0)  
		  { // ak(i,j) contains A10
		    aii2jj 
		    	= Y.unlocked_component(i_dof,comp_i)*X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aiijj 
		    	= Y.locked_component(i_dof,comp_i)  *X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aii2jj2 
		    	= Y.unlocked_component(i_dof,comp_i)*X.locked_component(j_dof,comp_j)  *ak(i,j);
		    aiijj2 
		    	= Y.locked_component(i_dof,comp_i)  *X.locked_component(j_dof,comp_j)  *ak(i,j);
		  }
		  else 
		  { // ak(i,j) contains A11
		    aii2jj2 
		    	= Y.unlocked_component(i_dof,comp_i)*X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aiijj2 
		    	= Y.locked_component(i_dof,comp_i)  *X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aii2jj 
		    	= Y.unlocked_component(i_dof,comp_i)*X.locked_component(j_dof,comp_j)  *ak(i,j);
		    aiijj 
		    	= Y.locked_component(i_dof,comp_i)  *X.locked_component(j_dof,comp_j)  *ak(i,j);
		  }

		if   (Y.is_blocked(i_dof))
		  if (X.is_blocked(j_dof))    abb.entry(ii, jj) += aiijj;
		  else                        abu.entry(ii, jj) += aiijj;
		else 
		  if (X.is_blocked(j_dof))    aub.entry(ii, jj) += aiijj;
		  else                        auu.entry(ii, jj) += aiijj;

		if   (Y.is_blocked(i2_dof))
		  if (X.is_blocked(j_dof))    abb.entry(ii2, jj) += aii2jj;
		  else                        abu.entry(ii2, jj) += aii2jj;
		else 
		  if (X.is_blocked(j_dof))    aub.entry(ii2, jj) += aii2jj;
		  else                        auu.entry(ii2, jj) += aii2jj;

		if   (Y.is_blocked(i_dof))
		  if (X.is_blocked(j2_dof))   abb.entry(ii, jj2) += aiijj2;
		  else                        abu.entry(ii, jj2) += aiijj2;
		else 
		  if (X.is_blocked(j2_dof))   aub.entry(ii, jj2) += aiijj2;
		  else                        auu.entry(ii, jj2) += aiijj2;

		if   (Y.is_blocked(i2_dof))
		  if (X.is_blocked(j2_dof))   abb.entry(ii2, jj2) += aii2jj2;
		  else                        abu.entry(ii2, jj2) += aii2jj2;
		else 
		  if (X.is_blocked(j2_dof))   aub.entry(ii2, jj2) += aii2jj2;
		  else                        auu.entry(ii2, jj2) += aii2jj2;
	    }
            else if (X.is_locked(j_dof) && !Y.is_locked(i_dof))
            {
		size_type j2_dof=X.locked_with(j_dof);
		size_type jj2=X.index(j2_dof);
		size_type comp_j=(j_dof<j2_dof)?0:1;
		Float aiijj,aiijj2;
		if (comp_j==0)  
		  { // ak(i,j) contains A00
		    aiijj 
		    	= X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aiijj2 
		    	= X.locked_component(j_dof,comp_j)  *ak(i,j);
		  }
		  else 
		  { // ak(i,j) contains A01
		    aiijj2 
		    	= X.unlocked_component(j_dof,comp_j)*ak(i,j);
		    aiijj 
		    	= X.locked_component(j_dof,comp_j)  *ak(i,j);
		  }

		if   (Y.is_blocked(i_dof))
		  if (X.is_blocked(j_dof))    abb.entry(ii, jj) += aiijj;
		  else                        abu.entry(ii, jj) += aiijj;
		else 
		  if (X.is_blocked(j_dof))    aub.entry(ii, jj) += aiijj;
		  else                        auu.entry(ii, jj) += aiijj;

		if   (Y.is_blocked(i_dof))
		  if (X.is_blocked(j2_dof))   abb.entry(ii, jj2) += aiijj2;
		  else                        abu.entry(ii, jj2) += aiijj2;
		else 
		  if (X.is_blocked(j2_dof))   aub.entry(ii, jj2) += aiijj2;
		  else                        auu.entry(ii, jj2) += aiijj2;
	    }
	    else if (!X.is_locked(j_dof) && Y.is_locked(i_dof))
	    {
		size_type i2_dof=Y.locked_with(i_dof);
		size_type ii2=Y.index(i2_dof);
		size_type comp_i=(i_dof<i2_dof)?0:1;
		Float aiijj,aii2jj;
	    	if (comp_i==0)
		  { // ak(i,j) contains A00
		    aiijj 
		    	= Y.unlocked_component(i_dof,comp_i)*ak(i,j);
		    aii2jj 
		    	= Y.locked_component(i_dof,comp_i)  *ak(i,j);
		  }
		else 
		  { // ak(i,j) contains A10
		    aii2jj 
		    	= Y.unlocked_component(i_dof,comp_i)*ak(i,j);
		    aiijj 
		    	= Y.locked_component(i_dof,comp_i)  *ak(i,j);
		  }

		if   (Y.is_blocked(i_dof))
		  if (X.is_blocked(j_dof))    abb.entry(ii, jj) += aiijj;
		  else                        abu.entry(ii, jj) += aiijj;
		else 
		  if (X.is_blocked(j_dof))    aub.entry(ii, jj) += aiijj;
		  else                        auu.entry(ii, jj) += aiijj;

		if   (Y.is_blocked(i2_dof))
		  if (X.is_blocked(j_dof))    abb.entry(ii2, jj) += aii2jj;
		  else                        abu.entry(ii2, jj) += aii2jj;
		else 
		  if (X.is_blocked(j_dof))    aub.entry(ii2, jj) += aii2jj;
		  else                        auu.entry(ii2, jj) += aii2jj;
	    }
        }
      }
    }
    //
    // convert dynamic matrix to fixed-size one
    //
    a.uu = csr<Float>(auu);
    a.ub = csr<Float>(aub);
    a.bu = csr<Float>(abu);
    a.bb = csr<Float>(abb);
}
//! weighted mass forms and such
//
// Assembly procedures
//
// ex: form(V,V,"grad_grad")
void
form_assembly (form& a, const form_element& form_e, bool locked_boundaries)
{
    const space& X = a.get_first_space();
    const geo&   g = X.get_geo();
    if (locked_boundaries)
    	assembly_locked_boundaries (g.begin(), g.end(), form_e, a);
    else
    	assembly (g.begin(), g.end(), form_e, a);
}
// =============================================================================
// form related to a boundary domain
// =============================================================================
// example:
//  domain gamma = omega["boundary"];
//  V = space(omega,"P1");
//  W = space(omega,"P1",gamma);
//  form mb  (W,W,"mass"); 
//  form mtr (V,W,"mass");
void
form_hybrid_assembly (form& a, const form_element& form_e, const domain& d)
{
    assembly (d.begin(), d.end(), form_e, a);
}
// example:
//  form(V,V,"mass",gamma);
void
form_bdr_assembly (form& a, const form_element& form_e, const domain& d)
{
    assembly (d.begin(), d.end(), form_e, a);
}
// =============================================================================
// trace form, i.e. 1 or 0 float values
// =============================================================================
// example:
//  domain gamma = omega["boundary"];
//  V = space(omega,"P1");
//  W = space(omega,"P1",gamma);
//  trace tr(V,W);
// The difference with:
//  form mtr (V,W,"mass"); 
//  is that the trace is not weighted by element areas.

//! extract degrees of freedom on a boundary domain
void
form_trace_assembly (form& a, const domain& d)
{
    typedef geo::size_type size_type;
    //
    // compute dof numbering, if not yet done
    //
    const space& X = a.get_first_space();
    const space& Y = a.get_second_space();
    X.freeze();
    Y.freeze();
    // 
    // step 1 : map d_dof on g_dof (in array "num")
    //
    //  it is assumed that the elements of 
    //   - the domain        : d     (S sides)
    //   - the boundary mesh : Y.geo (K side-elements)
    //  are in the same order
    //  => may bug if a modif in the construction of the boundary mesh
    //     geo::geo(geo,domain) appends.
    //
    vector<size_type> num(Y.size(), numeric_limits<size_type>::max());
    tiny_vector<size_type> S_dof;
    tiny_vector<size_type> K_dof;
    geo::const_iterator iter_K = Y.get_geo().begin();
    for (domain::const_iterator iter_S = d.begin();
        iter_S != d.end(); iter_S++, iter_K++) {

        const geo_element& S = *iter_S;
        const geo_element& K = *iter_K;

        X.set_dof (S, S_dof);
        Y.set_dof (K, K_dof);
        for (size_type i = 0 ; i < S_dof.size(); i++) {
	  num[K_dof(i)] = S_dof(i);
        }
    }
    //
    // step 2 : built uu, ub, bu, bb
    //
    asr<Float> auu(Y.n_unknown(), X.n_unknown()) ;
    asr<Float> aub(Y.n_unknown(), X.n_blocked()) ;
    asr<Float> abu(Y.n_blocked(), X.n_unknown()) ;
    asr<Float> abb(Y.n_blocked(), X.n_blocked()) ;
  
    for (size_type i_dof=0 ; i_dof < Y.size(); i_dof++) {

        size_type j_dof = num[i_dof];

        size_type ii = Y.index(i_dof);
        size_type jj = X.index(j_dof);

	if   (Y.is_blocked(i_dof))
	  if (X.is_blocked(j_dof))    abb.entry(ii, jj) = 1.;
          else                        abu.entry(ii, jj) = 1.;
	else 
	  if (X.is_blocked(j_dof))    aub.entry(ii, jj) = 1.;
          else                        auu.entry(ii, jj) = 1.;
    }
    a.uu = csr<Float>(auu) ;
    a.ub = csr<Float>(aub) ;
    a.bu = csr<Float>(abu) ;
    a.bb = csr<Float>(abb) ;
}
// =============================================================================
// discontinuous galerkin for transport (u.grad)phi term
// =============================================================================
// ibra
// necessary for DG_initialize
georep::size_type
get_first (const set<georep::size_type>& x)
{
    return *(x.begin());
}
//necessary for DG_initialize
georep::size_type
get_second (const set<georep::size_type>& x)
{
    set<georep::size_type>::const_iterator p = x.begin();
    return *(++p);
}

//builds the following arrays:
// neighbours[i*K.size()+j] is the index of the neighbour of the i-th element
// accross its j-th edge (edge 0 is [0,1], ...)
// (neighbours[...]>#elements) when there is no neighbour
// accross the j-th edge.
void DG_initialize(const geo& g, size_t* neighbours) {
  geo::size_type undef = numeric_limits<geo::size_type>::max();
  vector<set<geo::size_type> > ball (g.n_node());
  build_point_to_element_sets (g.begin(), g.end(), ball.begin());
  int ii = 0;
  for (geo::const_iterator it = g.begin() ; it != g.end() ; it++, ii++){
    const geo_element& K = *it;
    vector<geo::size_type> neighbour; 
    // neighbour accross the j-th edge [v0,v1]
    geo::size_type idx = 0;
    for (geo::size_type j0 = 0; j0 < K.size(); j0++) {
      geo::size_type j1 = (j0+1) % K.size();
      geo::size_type v0 = K[j0];
      geo::size_type v1 = K[j1];
      
      // c = all elements containing [v0,v1]
      set<geo::size_type> c;
      set_intersection (ball[v0].begin(), ball[v0].end(), ball[v1].begin(), ball[v1].end(), 
 			insert_iterator<set<geo::size_type> > (c, c.begin()));
      
      switch (c.size()) {
      case 1: 
	// edge [v0,v1] is on the boundary
	//neighbour.push_back (undef);
	neighbours[K.index()*3 + j0] = undef;
	break;
      case 2:
	// edge [v0,v1] is internal: between K and the neighbour k1 accross [v0,v1]
 	if (get_first(c) != K.index()) 
	  //neighbour.push_back(get_first(c));
	  neighbours[K.index()*3 + j0] = get_first(c);
	else                     
	  //neighbour.push_back(get_second(c));
	  neighbours[K.index()*3 + j0] = get_second(c);
 	break;
      default:
	error_macro ("edge contained by " << c.size() << " elements");
      }
    }
  }
}
void
form_assembly_discontinuous_galerkin (
    form& a,
    const form_element& form_e)
{
  geo::size_type undef = numeric_limits<geo::size_type>::max();
  typedef size_t size_type;
  const space& X = a.get_first_space();
  const space& Y = a.get_second_space();
  form_e.initialize (X, Y);
  size_t neighbours[X.get_geo().size()*3];
  DG_initialize(X.get_geo(),neighbours);
  //
  // use temporary dynamic matrix structures
  //
  asr<Float> auu(Y.n_unknown(), X.n_unknown());
  asr<Float> aub(Y.n_unknown(), X.n_blocked());
  asr<Float> abu(Y.n_blocked(), X.n_unknown());
  asr<Float> abb(Y.n_blocked(), X.n_blocked());
  ublas::matrix<Float> ak;
  tiny_vector<size_type> id,idn;
  
  geo::const_iterator iter_K = Y.get_geo().begin();
  geo::const_iterator last_K = Y.get_geo().end();
  for (; iter_K != last_K; iter_K++) {
    const geo_element& K = *iter_K;
    // --------------------------------
    // compute elementary matrix ak
    // --------------------------------
    
    form_e(K, ak);
    
    // ------------------------------
    // assembly ak in sparse matrix a
    // ------------------------------
    X.set_global_dof (K, id);
    size_type n1 = ak.size1();
    //loop over the first 3 degrees of liberty,
    //that belong to the element K
    for (size_type i = 0; i < n1; i++) {
      for (size_type j = 0; j < n1; j++) {
	
	if (ak(i,j) == Float(0)) continue;
	
	size_type i_dof = X.domain_dof(id(i));
	size_type j_dof = X.domain_dof(id(j));
	 
	size_type ii    = X.index(i_dof);
	size_type jj    = X.index(j_dof);
	
	if   (Y.is_blocked(i_dof))
	  if (X.is_blocked(j_dof))    abb.entry(ii, jj) += ak(i,j);
	  else                        abu.entry(ii, jj) += ak(i,j);
	else 
	  if (X.is_blocked(j_dof))    aub.entry(ii, jj) += ak(i,j);
	  else                        auu.entry(ii, jj) += ak(i,j);
      }
    }
    //loop over the edges: edge j involves
    //degrees of liberty #(n1+2*j) and #(n1+2*j+1)
    geo::const_iterator tab_K = Y.get_geo().begin();
    for (size_type j = 0; j < K.size(); j++) {
      //neighbour across the j-th edge
      size_t ng = neighbours[K.index()*K.size()+j];
      if (ng == undef)//no neighbour on this edge
	continue;
      const geo_element& Kj = tab_K[ng];
      X.set_global_dof (Kj, idn);
      //we look for the local number of the degrees of liberty in
      //the neighbour element
      size_t local_n0 = Kj.size();
      size_t local_n1 = Kj.size();
      for (size_t i = 0 ; i<K.size() ; i++) {
	if (Kj[i] == K[(j+1)%K.size()]) {
	  local_n0 = i; local_n1 = (i+1)%Kj.size();
	  break;
	}
      }
      //loop over the vertices of K
      for (size_t i = 0 ; i < K.size() ; i++) {

	if (ak(i,n1+2*j) == Float(0)) continue;
	size_type j_dof = X.domain_dof(idn(local_n0));
	size_type i_dof = X.domain_dof(id(i));
	
	size_type ii    = X.index(i_dof);
	size_type jj    = X.index(j_dof);
	
	if   (Y.is_blocked(i_dof))
	  if (X.is_blocked(j_dof))    abb.entry(ii, jj) += ak(i,n1+2*j);
	  else                        abu.entry(ii, jj) += ak(i,n1+2*j);
	else 
	  if (X.is_blocked(j_dof))    aub.entry(ii, jj) += ak(i,n1+2*j);
	  else                        auu.entry(ii, jj) += ak(i,n1+2*j);

	if (ak(i,n1+2*j+1) == Float(0)) continue;
	j_dof = X.domain_dof(idn(local_n1));
	i_dof = X.domain_dof(id(i));
	
	ii    = X.index(i_dof);
	jj    = X.index(j_dof);
	
	if   (Y.is_blocked(i_dof))
	  if (X.is_blocked(j_dof))    abb.entry(ii, jj) += ak(i,n1+2*j+1);
	  else                        abu.entry(ii, jj) += ak(i,n1+2*j+1);
	else 
	  if (X.is_blocked(j_dof))    aub.entry(ii, jj) += ak(i,n1+2*j+1);
	  else                        auu.entry(ii, jj) += ak(i,n1+2*j+1);
      }
    }
  }
  //
  // convert dynamic matrix to fixed-size one
  //
  a.uu = csr<Float>(auu);
  a.ub = csr<Float>(aub);
  a.bu = csr<Float>(abu);
  a.bb = csr<Float>(abb);
}
void
form_assembly_discontinuous_galerkin_bdr (
    form& a,
    const form_element& form_e)
{
  geo::size_type undef = numeric_limits<geo::size_type>::max();
  typedef size_t size_type;
  const space& X = a.get_first_space();
  const space& Y = a.get_second_space();
  form_e.initialize (X, Y);
  size_t neighbours[X.get_geo().size()*3];
  DG_initialize(X.get_geo(),neighbours);
  //
  // use temporary dynamic matrix structures
  //
  asr<Float> auu(Y.n_unknown(), X.n_unknown());
  asr<Float> aub(Y.n_unknown(), X.n_blocked());
  asr<Float> abu(Y.n_blocked(), X.n_unknown());
  asr<Float> abb(Y.n_blocked(), X.n_blocked());
  ublas::matrix<Float> ak;
  tiny_vector<size_type> id,idn;
  
  geo::const_iterator iter_K = Y.get_geo().begin();
  geo::const_iterator last_K = Y.get_geo().end();
  for (; iter_K != last_K; iter_K++) {
    const geo_element& K = *iter_K;
    // --------------------------------
    // compute elementary matrix ak
    // --------------------------------
    
    form_e(K, ak);
    
    // ------------------------------
    // assembly ak in sparse matrix a
    // ------------------------------
    X.set_global_dof (K, id);
    size_type n1 = ak.size1();
    //loop over the edges: edge j involves
    //degrees of liberty #(n1+2*j) and #(n1+2*j+1)
    geo::const_iterator tab_K = Y.get_geo().begin();
    for (size_type j = 0; j < K.size(); j++) {
      //neighbour across the j-th edge
      size_t ng = neighbours[K.index()*K.size()+j];
      if (ng != undef)//not on the boundary
	continue;
      const geo_element& Kj = K;
      X.set_global_dof (Kj, idn);
      //we look for the local number of the degrees of liberty in
      //the neighbour element
      size_t local_n1 = j;
      size_t local_n0 = (j+1)%K.size();
      //loop over the vertices of K
      for (size_t i = 0 ; i < K.size() ; i++) {

	if (ak(i,n1+2*j) == Float(0)) continue;
	size_type j_dof = X.domain_dof(idn(local_n0));
	size_type i_dof = X.domain_dof(id(i));
	
	size_type ii    = X.index(i_dof);
	size_type jj    = X.index(j_dof);
	
	if   (Y.is_blocked(i_dof))
	  if (X.is_blocked(j_dof))    abb.entry(ii, jj) += ak(i,n1+2*j);
	  else                        abu.entry(ii, jj) += ak(i,n1+2*j);
	else 
	  if (X.is_blocked(j_dof))    aub.entry(ii, jj) += ak(i,n1+2*j);
	  else                        auu.entry(ii, jj) += ak(i,n1+2*j);

	if (ak(i,n1+2*j+1) == Float(0)) continue;
	j_dof = X.domain_dof(idn(local_n1));
	i_dof = X.domain_dof(id(i));
	
	ii    = X.index(i_dof);
	jj    = X.index(j_dof);
	
	if   (Y.is_blocked(i_dof))
	  if (X.is_blocked(j_dof))    abb.entry(ii, jj) += ak(i,n1+2*j+1);
	  else                        abu.entry(ii, jj) += ak(i,n1+2*j+1);
	else 
	  if (X.is_blocked(j_dof))    aub.entry(ii, jj) += ak(i,n1+2*j+1);
	  else                        auu.entry(ii, jj) += ak(i,n1+2*j+1);
      }
    }
  }
  //
  // convert dynamic matrix to fixed-size one
  //
  a.uu = csr<Float>(auu);
  a.ub = csr<Float>(aub);
  a.bu = csr<Float>(abu);
  a.bb = csr<Float>(abb);
}
// ======================================================================================
// assembly on a banded level set: form(Wh,Bh,"mass_s",bh);
//  where Wh is defined on Gamma_h = {x / phi_h(x) = }
//  and   Bh is defined on beta_h  = the band containing Gamma_h
// ======================================================================================
void
form_band_assembly (class form& a, const form_element& form_e, const class band& bh)
{
    typedef size_t size_type;
    const space& X = a.get_first_space();
    const space& Y = a.get_second_space();
    form_e.set_weight (bh.get_bgd_phi());
    form_e.set_use_coordinate_system_weight(true);
    form_e.set_skip_geo_compatibility_check();
    form_e.initialize (X, Y);
    //
    // use temporary dynamic matrix structures
    //
    asr<Float> auu(Y.n_unknown(), X.n_unknown());
    asr<Float> aub(Y.n_unknown(), X.n_blocked());
    asr<Float> abu(Y.n_blocked(), X.n_unknown());
    asr<Float> abb(Y.n_blocked(), X.n_blocked());
    ublas::matrix<Float> ak;
    tiny_vector<size_type> idy, jdx;

    typedef enum {Wh_Wh, Wh_Bh, Bh_Wh, Bh_Bh} XY_type;
    XY_type XY_switch;
    if (a.get_first_space().dimension()  == a.get_first_space().get_geo().map_dimension() +1 &&
        a.get_second_space().dimension() == a.get_second_space().get_geo().map_dimension()) {
      XY_switch = Wh_Bh; // extension
    } else if (a.get_first_space().dimension()  == a.get_first_space().get_geo().map_dimension() &&
               a.get_second_space().dimension() == a.get_second_space().get_geo().map_dimension()+1) {
      XY_switch = Bh_Wh; // prolongement
    } else if (a.get_first_space().dimension()  == a.get_first_space().get_geo().map_dimension() &&
               a.get_second_space().dimension() == a.get_second_space().get_geo().map_dimension()) {
      XY_switch = Bh_Bh; // classic banded level set
    } else {
      error_macro ("form on a banded level set: at least one of the two spaces may be a band");
    }
    space Bh;
    if (XY_switch == Wh_Bh || XY_switch == Bh_Bh) {
      Bh = Y;
    } else { // Bh_Wh
      Bh = X;
    }
    geo::const_iterator iter_element, last_element;
    if (Bh.is_on_boundary_domain()) {
      iter_element = Bh.get_boundary_domain().begin();
      last_element = Bh.get_boundary_domain().end();
    } else {
      iter_element = Bh.get_geo().begin();
      last_element = Bh.get_geo().end();
    }
    size_t idx = 0;
    for (; iter_element != last_element; iter_element++, idx++) {
      const geo_element& K = *iter_element;
      // --------------------------------
      // compute elementary matrix ak
      // --------------------------------
      form_e (K, ak);
      // ------------------------------
      // assembly ak in sparse matrix a
      // ------------------------------Z
      size_t Kx_idx, Ky_idx;
      if (XY_switch == Wh_Bh) {
        Ky_idx = K.index();
	Kx_idx = bh.element2face (idx);
      } else if (XY_switch == Bh_Wh) {
        Kx_idx = K.index();
	Ky_idx = bh.element2face (idx);
      } else { // Bh_Bh
        Kx_idx = Ky_idx = K.index();
      }
      check_macro (Kx_idx < X.get_global_geo().size(),	
	"X element index " << Kx_idx << " out of range [0:"<< X.get_global_geo().size() << "[");
      check_macro (Ky_idx < Y.get_global_geo().size(),
	"Y element index " << Ky_idx << " out of range [0:"<< Y.get_global_geo().size() << "[");
      const geo_element& Kx = X.get_global_geo().element(Kx_idx);
      const geo_element& Ky = Y.get_global_geo().element(Ky_idx);
      X.set_global_dof (Kx, jdx);
      Y.set_global_dof (Ky, idy);
      size_type ny = ak.size1(); // nrow
      size_type nx = ak.size2(); // ncol
      for (size_type i = 0; i < ny; i++) {
        for (size_type j = 0; j < nx; j++) {

	    if (ak(i,j) == Float(0)) continue;

	    size_type i_dof = Y.domain_dof(idy(i));
	    size_type j_dof = X.domain_dof(jdx(j));

	    size_type ii    = Y.index(i_dof);
	    size_type jj    = X.index(j_dof);
   
	    if   (Y.is_blocked(i_dof))
	      if (X.is_blocked(j_dof))    abb.entry(ii, jj) += ak(i,j);
              else                        abu.entry(ii, jj) += ak(i,j);
	    else 
	      if (X.is_blocked(j_dof))    aub.entry(ii, jj) += ak(i,j);
              else                        auu.entry(ii, jj) += ak(i,j);
        }
      }
    }
    //
    // convert dynamic matrix to fixed-size one
    //
    a.uu = csr<Float>(auu);
    a.ub = csr<Float>(aub);
    a.bu = csr<Float>(abu);
    a.bb = csr<Float>(abb);
}

}// namespace rheolef
