/*$Id: baijfact.c,v 1.81 2000/04/09 04:36:19 bsmith Exp bsmith $*/
/*
Factorization code for BAIJ format.
*/
#include "src/mat/impls/baij/seq/baij.h"
#include "src/vec/vecimpl.h"
#include "src/inline/ilu.h"
/*
The symbolic factorization code is identical to that for AIJ format,
except for very small changes since this is now a SeqBAIJ datastructure.
NOT good code reuse.
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorSymbolic_SeqBAIJ"
int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
{
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
IS isicol;
int *r,*ic,ierr,i,n = a->mbs,*ai = a->i,*aj = a->j;
int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = a->bs,bs2=a->bs2;
int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
PetscFunctionBegin;
PetscValidHeaderSpecific(isrow,IS_COOKIE);
PetscValidHeaderSpecific(iscol,IS_COOKIE);
if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square");
if (!isrow) {
ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr);
}
if (!iscol) {
ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr);
}
ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
/* get new row pointers */
ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew);
ainew[0] = 0;
/* don't know how many column pointers are needed so estimate */
jmax = (int)(f*ai[n] + 1);
ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew);
/* fill is a linked list of nonzeros in active row */
fill = (int*)PetscMalloc((2*n+1)*sizeof(int));CHKPTRQ(fill);
im = fill + n + 1;
/* idnew is location of diagonal in factor */
idnew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(idnew);
idnew[0] = 0;
for (i=0; i 0) {
idx = *ajtmp++;
nzbd++;
if (idx == i) im[row] = nzbd;
do {
m = fm;
fm = fill[m];
} while (fm < idx);
if (fm != idx) {
fill[m] = idx;
fill[idx] = fm;
fm = idx;
nnz++;
}
}
row = fill[row];
}
/* copy new filled row into permanent storage */
ainew[i+1] = ainew[i] + nnz;
if (ainew[i+1] > jmax) {
/* estimate how much additional space we will need */
/* use the strategy suggested by David Hysom */
/* just double the memory each time */
int maxadd = jmax;
/* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */
if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
jmax += maxadd;
/* allocate a longer ajnew */
ajtmp = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(ajtmp);
ierr = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(int));CHKERRQ(ierr);
ierr = PetscFree(ajnew);CHKERRQ(ierr);
ajnew = ajtmp;
realloc++; /* count how many times we realloc */
}
ajtmp = ajnew + ainew[i];
fm = fill[n];
nzi = 0;
im[i] = nnz;
while (nnz--) {
if (fm < i) nzi++;
*ajtmp++ = fm;
fm = fill[fm];
}
idnew[i] = ainew[i] + nzi;
}
if (ai[n] != 0) {
PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Run with -pc_lu_fill %g or use \n",af);
PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:PCLUSetFill(pc,%g);\n",af);
PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:for best performance.\n");
} else {
PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Empty matrix.\n");
}
ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
ierr = PetscFree(fill);CHKERRQ(ierr);
/* put together the new matrix */
ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B);CHKERRQ(ierr);
PLogObjectParent(*B,isicol);
b = (Mat_SeqBAIJ*)(*B)->data;
ierr = PetscFree(b->imax);CHKERRQ(ierr);
b->singlemalloc = PETSC_FALSE;
/* the next line frees the default space generated by the Create() */
ierr = PetscFree(b->a);CHKERRQ(ierr);
ierr = PetscFree(b->ilen);CHKERRQ(ierr);
b->a = (MatScalar*)PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2);CHKPTRQ(b->a);
b->j = ajnew;
b->i = ainew;
b->diag = idnew;
b->ilen = 0;
b->imax = 0;
b->row = isrow;
b->col = iscol;
ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
b->icol = isicol;
b->solve_work = (Scalar*)PetscMalloc((bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
/* In b structure: Free imax, ilen, old a, old j.
Allocate idnew, solve_work, new a, new j */
PLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(MatScalar)));
b->maxnz = b->nz = ainew[n];
(*B)->factor = FACTOR_LU;
(*B)->info.factor_mallocs = realloc;
(*B)->info.fill_ratio_given = f;
if (ai[n] != 0) {
(*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
} else {
(*B)->info.fill_ratio_needed = 0.0;
}
PetscFunctionReturn(0);
}
/* ----------------------------------------------------------- */
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_N"
int MatLUFactorNumeric_SeqBAIJ_N(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
IS isrow = b->row,isicol = b->icol;
int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row,bslog,*ai=a->i,*aj=a->j,k,flg;
int *diag_offset=b->diag,diag,bs=a->bs,bs2 = a->bs2,*v_pivots,*pj;
MatScalar *ba = b->a,*aa = a->a,*pv,*v,*rtmp,*multiplier,*v_work,*pc,*w;
PetscFunctionBegin;
ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
rtmp = (MatScalar*)PetscMalloc(bs2*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
ierr = PetscMemzero(rtmp,bs2*(n+1)*sizeof(MatScalar));CHKERRQ(ierr);
/* generate work space needed by dense LU factorization */
v_work = (MatScalar*)PetscMalloc(bs*sizeof(int) + (bs+bs2)*sizeof(MatScalar));CHKPTRQ(v_work);
multiplier = v_work + bs;
v_pivots = (int*)(multiplier + bs2);
/* flops in while loop */
bslog = 2*bs*bs2;
for (i=0; ia */
pv = ba + bs2*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/* ------------------------------------------------------------*/
/*
Version for when blocks are 7 by 7
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_7"
int MatLUFactorNumeric_SeqBAIJ_7(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
IS isrow = b->row,isicol = b->icol;
int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,idx,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4;
MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16;
MatScalar x17,x18,x19,x20,x21,x22,x23,x24,x25,p10,p11,p12,p13,p14;
MatScalar p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,m10,m11,m12;
MatScalar m13,m14,m15,m16,m17,m18,m19,m20,m21,m22,m23,m24,m25;
MatScalar p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36;
MatScalar p37,p38,p39,p40,p41,p42,p43,p44,p45,p46,p47,p48,p49;
MatScalar x26,x27,x28,x29,x30,x31,x32,x33,x34,x35,x36;
MatScalar x37,x38,x39,x40,x41,x42,x43,x44,x45,x46,x47,x48,x49;
MatScalar m26,m27,m28,m29,m30,m31,m32,m33,m34,m35,m36;
MatScalar m37,m38,m39,m40,m41,m42,m43,m44,m45,m46,m47,m48,m49;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
rtmp = (MatScalar*)PetscMalloc(49*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 49*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*343*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/*
Version for when blocks are 7 by 7 Using natural ordering
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering"
int MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
MatScalar x16,x17,x18,x19,x20,x21,x22,x23,x24,x25;
MatScalar p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15;
MatScalar p16,p17,p18,p19,p20,p21,p22,p23,p24,p25;
MatScalar m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12,m13,m14,m15;
MatScalar m16,m17,m18,m19,m20,m21,m22,m23,m24,m25;
MatScalar p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36;
MatScalar p37,p38,p39,p40,p41,p42,p43,p44,p45,p46,p47,p48,p49;
MatScalar x26,x27,x28,x29,x30,x31,x32,x33,x34,x35,x36;
MatScalar x37,x38,x39,x40,x41,x42,x43,x44,x45,x46,x47,x48,x49;
MatScalar m26,m27,m28,m29,m30,m31,m32,m33,m34,m35,m36;
MatScalar m37,m38,m39,m40,m41,m42,m43,m44,m45,m46,m47,m48,m49;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
rtmp = (MatScalar*)PetscMalloc(49*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 49*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*343*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/* ------------------------------------------------------------*/
/*
Version for when blocks are 6 by 6
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_6"
int MatLUFactorNumeric_SeqBAIJ_6(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
IS isrow = b->row,isicol = b->icol;
int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,idx,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4;
MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16;
MatScalar x17,x18,x19,x20,x21,x22,x23,x24,x25,p10,p11,p12,p13,p14;
MatScalar p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,m10,m11,m12;
MatScalar m13,m14,m15,m16,m17,m18,m19,m20,m21,m22,m23,m24,m25;
MatScalar p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36;
MatScalar x26,x27,x28,x29,x30,x31,x32,x33,x34,x35,x36;
MatScalar m26,m27,m28,m29,m30,m31,m32,m33,m34,m35,m36;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
rtmp = (MatScalar*)PetscMalloc(36*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 36*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*216*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/*
Version for when blocks are 6 by 6 Using natural ordering
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering"
int MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
MatScalar x16,x17,x18,x19,x20,x21,x22,x23,x24,x25;
MatScalar p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15;
MatScalar p16,p17,p18,p19,p20,p21,p22,p23,p24,p25;
MatScalar m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12,m13,m14,m15;
MatScalar m16,m17,m18,m19,m20,m21,m22,m23,m24,m25;
MatScalar p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36;
MatScalar x26,x27,x28,x29,x30,x31,x32,x33,x34,x35,x36;
MatScalar m26,m27,m28,m29,m30,m31,m32,m33,m34,m35,m36;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
rtmp = (MatScalar*)PetscMalloc(36*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 36*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*216*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/* ------------------------------------------------------------*/
/*
Version for when blocks are 5 by 5
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_5"
int MatLUFactorNumeric_SeqBAIJ_5(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
IS isrow = b->row,isicol = b->icol;
int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,idx,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4;
MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16;
MatScalar x17,x18,x19,x20,x21,x22,x23,x24,x25,p10,p11,p12,p13,p14;
MatScalar p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,m10,m11,m12;
MatScalar m13,m14,m15,m16,m17,m18,m19,m20,m21,m22,m23,m24,m25;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
rtmp = (MatScalar*)PetscMalloc(25*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 25*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*125*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/*
Version for when blocks are 5 by 5 Using natural ordering
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering"
int MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
MatScalar x16,x17,x18,x19,x20,x21,x22,x23,x24,x25;
MatScalar p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15;
MatScalar p16,p17,p18,p19,p20,p21,p22,p23,p24,p25;
MatScalar m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12,m13,m14,m15;
MatScalar m16,m17,m18,m19,m20,m21,m22,m23,m24,m25;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
rtmp = (MatScalar*)PetscMalloc(25*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 25*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*125*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/* ------------------------------------------------------------*/
/*
Version for when blocks are 4 by 4
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_4"
int MatLUFactorNumeric_SeqBAIJ_4(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
IS isrow = b->row,isicol = b->icol;
int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,idx,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4;
MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16;
MatScalar p10,p11,p12,p13,p14,p15,p16,m10,m11,m12;
MatScalar m13,m14,m15,m16;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
rtmp = (MatScalar*)PetscMalloc(16*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 16*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*64*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/*
Version for when blocks are 4 by 4 Using natural ordering
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering"
int MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4;
MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16;
MatScalar p10,p11,p12,p13,p14,p15,p16,m10,m11,m12;
MatScalar m13,m14,m15,m16;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
rtmp = (MatScalar*)PetscMalloc(16*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 16*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*64*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/* ------------------------------------------------------------*/
/*
Version for when blocks are 3 by 3
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_3"
int MatLUFactorNumeric_SeqBAIJ_3(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
IS isrow = b->row,isicol = b->icol;
int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row,*ai=a->i,*aj=a->j;
int *diag_offset = b->diag,idx,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4;
MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
rtmp = (MatScalar*)PetscMalloc(9*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 9*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*27*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/*
Version for when blocks are 3 by 3 Using natural ordering
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering"
int MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4;
MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
rtmp = (MatScalar*)PetscMalloc(9*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 9*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*27*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/* ------------------------------------------------------------*/
/*
Version for when blocks are 2 by 2
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_2"
int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
IS isrow = b->row,isicol = b->icol;
int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
MatScalar p1,p2,p3,p4;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
rtmp = (MatScalar*)PetscMalloc(4*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 4*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/*
Version for when blocks are 2 by 2 Using natural ordering
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row;
int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
MatScalar *pv,*v,*rtmp,*pc,*w,*x;
MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
rtmp = (MatScalar*)PetscMalloc(4*(n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + 4*bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
PetscFunctionReturn(0);
}
/* ----------------------------------------------------------- */
/*
Version for when blocks are 1 by 1.
*/
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactorNumeric_SeqBAIJ_1"
int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
{
Mat C = *B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
IS isrow = b->row,isicol = b->icol;
int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
int *diag_offset = b->diag,diag,*pj;
MatScalar *pv,*v,*rtmp,multiplier,*pc;
MatScalar *ba = b->a,*aa = a->a;
PetscFunctionBegin;
ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
rtmp = (MatScalar*)PetscMalloc((n+1)*sizeof(MatScalar));CHKPTRQ(rtmp);
for (i=0; ia */
pv = ba + bi[i];
pj = bj + bi[i];
nz = bi[i+1] - bi[i];
for (j=0; jfactor = FACTOR_LU;
C->assembled = PETSC_TRUE;
PLogFlops(b->n);
PetscFunctionReturn(0);
}
/* ----------------------------------------------------------- */
#undef __FUNC__
#define __FUNC__ /**/"MatLUFactor_SeqBAIJ"
int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,PetscReal f)
{
Mat_SeqBAIJ *mat = (Mat_SeqBAIJ*)A->data;
int ierr,refct;
Mat C;
PetscOps *Abops;
MatOps Aops;
PetscFunctionBegin;
ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr);
ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
/* free all the data structures from mat */
ierr = PetscFree(mat->a);CHKERRQ(ierr);
if (!mat->singlemalloc) {
ierr = PetscFree(mat->i);CHKERRQ(ierr);
ierr = PetscFree(mat->j);CHKERRQ(ierr);
}
if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);}
if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);}
if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);}
if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);}
if (mat->mult_work) {ierr = PetscFree(mat->mult_work);CHKERRQ(ierr);}
if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);}
ierr = PetscFree(mat);CHKERRQ(ierr);
ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
/*
This is horrible,horrible code. We need to keep the
A pointers for the bops and ops but copy everything
else from C.
*/
Abops = A->bops;
Aops = A->ops;
refct = A->refct;
ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
mat = (Mat_SeqBAIJ*)A->data;
PLogObjectParent(A,mat->icol);
A->bops = Abops;
A->ops = Aops;
A->qlist = 0;
A->refct = refct;
/* copy over the type_name and name */
ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr);
ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr);
PetscHeaderDestroy(C);
PetscFunctionReturn(0);
}