1be1d678aSKris Buschelman 22593348eSBarry Smith /* 3b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 42593348eSBarry Smith matrix storage format. 52593348eSBarry Smith */ 6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <petscblaslapack.h> 8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h> 1043516a2dSKris Buschelman 117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 137ea3e4caSstefano_zampini #endif 147ea3e4caSstefano_zampini 15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17b5b72c8aSIrina Sokolova #endif 18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19b5b72c8aSIrina Sokolova 20*9463ebdaSPierre Jolivet PetscErrorCode MatGetColumnNorms_SeqBAIJ(Mat A,NormType type,PetscReal *norms) 21*9463ebdaSPierre Jolivet { 22*9463ebdaSPierre Jolivet PetscErrorCode ierr; 23*9463ebdaSPierre Jolivet Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 24*9463ebdaSPierre Jolivet PetscInt N,i; 25*9463ebdaSPierre Jolivet PetscInt ib,jb,bs = A->rmap->bs; 26*9463ebdaSPierre Jolivet MatScalar *a_val = a_aij->a; 27*9463ebdaSPierre Jolivet 28*9463ebdaSPierre Jolivet PetscFunctionBegin; 29*9463ebdaSPierre Jolivet ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 30*9463ebdaSPierre Jolivet for (i=0; i<N; i++) norms[i] = 0.0; 31*9463ebdaSPierre Jolivet if (type == NORM_2) { 32*9463ebdaSPierre Jolivet for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 33*9463ebdaSPierre Jolivet for (jb=0; jb<bs; jb++) { 34*9463ebdaSPierre Jolivet for (ib=0; ib<bs; ib++) { 35*9463ebdaSPierre Jolivet norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 36*9463ebdaSPierre Jolivet a_val++; 37*9463ebdaSPierre Jolivet } 38*9463ebdaSPierre Jolivet } 39*9463ebdaSPierre Jolivet } 40*9463ebdaSPierre Jolivet } else if (type == NORM_1) { 41*9463ebdaSPierre Jolivet for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 42*9463ebdaSPierre Jolivet for (jb=0; jb<bs; jb++) { 43*9463ebdaSPierre Jolivet for (ib=0; ib<bs; ib++) { 44*9463ebdaSPierre Jolivet norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 45*9463ebdaSPierre Jolivet a_val++; 46*9463ebdaSPierre Jolivet } 47*9463ebdaSPierre Jolivet } 48*9463ebdaSPierre Jolivet } 49*9463ebdaSPierre Jolivet } else if (type == NORM_INFINITY) { 50*9463ebdaSPierre Jolivet for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 51*9463ebdaSPierre Jolivet for (jb=0; jb<bs; jb++) { 52*9463ebdaSPierre Jolivet for (ib=0; ib<bs; ib++) { 53*9463ebdaSPierre Jolivet int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 54*9463ebdaSPierre Jolivet norms[col] = PetscMax(PetscAbsScalar(*a_val), norms[col]); 55*9463ebdaSPierre Jolivet a_val++; 56*9463ebdaSPierre Jolivet } 57*9463ebdaSPierre Jolivet } 58*9463ebdaSPierre Jolivet } 59*9463ebdaSPierre Jolivet } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 60*9463ebdaSPierre Jolivet if (type == NORM_2) { 61*9463ebdaSPierre Jolivet for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 62*9463ebdaSPierre Jolivet } 63*9463ebdaSPierre Jolivet PetscFunctionReturn(0); 64*9463ebdaSPierre Jolivet } 65*9463ebdaSPierre Jolivet 66713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 67b01c7715SBarry Smith { 68b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 696849ba73SBarry Smith PetscErrorCode ierr; 70de80f912SBarry Smith PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 717f0c90edSBarry Smith MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 7262bba022SBarry Smith PetscReal shift = 0.0; 731a9391e3SHong Zhang PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 74b01c7715SBarry Smith 75b01c7715SBarry Smith PetscFunctionBegin; 76a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 77a455e926SHong Zhang 789797317bSBarry Smith if (a->idiagvalid) { 799797317bSBarry Smith if (values) *values = a->idiag; 809797317bSBarry Smith PetscFunctionReturn(0); 819797317bSBarry Smith } 82b01c7715SBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 83b01c7715SBarry Smith diag_offset = a->diag; 84b01c7715SBarry Smith if (!a->idiag) { 857f0c90edSBarry Smith ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 867f0c90edSBarry Smith ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 87b01c7715SBarry Smith } 88b01c7715SBarry Smith diag = a->idiag; 89bbead8a2SBarry Smith if (values) *values = a->idiag; 90b01c7715SBarry Smith /* factor and invert each block */ 91521d7252SBarry Smith switch (bs) { 92ab040260SJed Brown case 1: 93ab040260SJed Brown for (i=0; i<mbs; i++) { 94ab040260SJed Brown odiag = v + 1*diag_offset[i]; 95ab040260SJed Brown diag[0] = odiag[0]; 96ec1892c8SHong Zhang 97ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 98ec1892c8SHong Zhang if (allowzeropivot) { 997b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1007b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 1017b6c816cSBarry Smith A->factorerror_zeropivot_row = i; 102ec1892c8SHong Zhang ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 1037b6c816cSBarry Smith } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 104ec1892c8SHong Zhang } 105ec1892c8SHong Zhang 106d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 107ab040260SJed Brown diag += 1; 108ab040260SJed Brown } 109ab040260SJed Brown break; 110b01c7715SBarry Smith case 2: 111b01c7715SBarry Smith for (i=0; i<mbs; i++) { 112b01c7715SBarry Smith odiag = v + 4*diag_offset[i]; 113b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 114a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1157b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 116b01c7715SBarry Smith diag += 4; 117b01c7715SBarry Smith } 118b01c7715SBarry Smith break; 119b01c7715SBarry Smith case 3: 120b01c7715SBarry Smith for (i=0; i<mbs; i++) { 121b01c7715SBarry Smith odiag = v + 9*diag_offset[i]; 122b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 123b01c7715SBarry Smith diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 124b01c7715SBarry Smith diag[8] = odiag[8]; 125a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1267b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127b01c7715SBarry Smith diag += 9; 128b01c7715SBarry Smith } 129b01c7715SBarry Smith break; 130b01c7715SBarry Smith case 4: 131b01c7715SBarry Smith for (i=0; i<mbs; i++) { 132b01c7715SBarry Smith odiag = v + 16*diag_offset[i]; 133580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 134a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1357b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 136b01c7715SBarry Smith diag += 16; 137b01c7715SBarry Smith } 138b01c7715SBarry Smith break; 139b01c7715SBarry Smith case 5: 140b01c7715SBarry Smith for (i=0; i<mbs; i++) { 141b01c7715SBarry Smith odiag = v + 25*diag_offset[i]; 142580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 143a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1447b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145b01c7715SBarry Smith diag += 25; 146b01c7715SBarry Smith } 147b01c7715SBarry Smith break; 148d49b2adcSBarry Smith case 6: 149d49b2adcSBarry Smith for (i=0; i<mbs; i++) { 150d49b2adcSBarry Smith odiag = v + 36*diag_offset[i]; 151580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 152a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1537b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 154d49b2adcSBarry Smith diag += 36; 155d49b2adcSBarry Smith } 156d49b2adcSBarry Smith break; 157de80f912SBarry Smith case 7: 158de80f912SBarry Smith for (i=0; i<mbs; i++) { 159de80f912SBarry Smith odiag = v + 49*diag_offset[i]; 160580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 161a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1627b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163de80f912SBarry Smith diag += 49; 164de80f912SBarry Smith } 165de80f912SBarry Smith break; 166b01c7715SBarry Smith default: 167dcca6d9dSJed Brown ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 168de80f912SBarry Smith for (i=0; i<mbs; i++) { 169de80f912SBarry Smith odiag = v + bs2*diag_offset[i]; 170580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 1715f8bbccaSHong Zhang ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1727b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 173de80f912SBarry Smith diag += bs2; 174de80f912SBarry Smith } 175de80f912SBarry Smith ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 176b01c7715SBarry Smith } 177b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 178b01c7715SBarry Smith PetscFunctionReturn(0); 179b01c7715SBarry Smith } 180b01c7715SBarry Smith 181e48d15efSToby Isaac PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1826d3beeddSMatthew Knepley { 1836d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 184e48d15efSToby Isaac PetscScalar *x,*work,*w,*workt,*t; 185e48d15efSToby Isaac const MatScalar *v,*aa = a->a, *idiag; 186e48d15efSToby Isaac const PetscScalar *b,*xb; 1875455b99fSToby Isaac PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 1886d3beeddSMatthew Knepley PetscErrorCode ierr; 189e48d15efSToby Isaac PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 190c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 191b01c7715SBarry Smith 192b01c7715SBarry Smith PetscFunctionBegin; 193b01c7715SBarry Smith its = its*lits; 194e32f2f54SBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 195e32f2f54SBarry Smith if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 19652c4f950SSatish Balay if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 19752c4f950SSatish Balay if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 19852c4f950SSatish Balay if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 199b01c7715SBarry Smith 2000298fd71SBarry Smith if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 201b01c7715SBarry Smith 202b2ec919aSToby Isaac if (!m) PetscFunctionReturn(0); 203b01c7715SBarry Smith diag = a->diag; 204b01c7715SBarry Smith idiag = a->idiag; 205de80f912SBarry Smith k = PetscMax(A->rmap->n,A->cmap->n); 206e48d15efSToby Isaac if (!a->mult_work) { 207f361c04dSBarry Smith ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 208de80f912SBarry Smith } 2093475c22fSBarry Smith if (!a->sor_workt) { 210f361c04dSBarry Smith ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 211de80f912SBarry Smith } 212de80f912SBarry Smith if (!a->sor_work) { 213785e854fSJed Brown ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 214de80f912SBarry Smith } 2153475c22fSBarry Smith work = a->mult_work; 2163475c22fSBarry Smith t = a->sor_workt; 217de80f912SBarry Smith w = a->sor_work; 218de80f912SBarry Smith 219de80f912SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 220de80f912SBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 221de80f912SBarry Smith 222de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 223de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 224e48d15efSToby Isaac switch (bs) { 225e48d15efSToby Isaac case 1: 226e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x,idiag,b); 227e48d15efSToby Isaac t[0] = b[0]; 228e48d15efSToby Isaac i2 = 1; 229e48d15efSToby Isaac idiag += 1; 230e48d15efSToby Isaac for (i=1; i<m; i++) { 231e48d15efSToby Isaac v = aa + ai[i]; 232e48d15efSToby Isaac vi = aj + ai[i]; 233e48d15efSToby Isaac nz = diag[i] - ai[i]; 234e48d15efSToby Isaac s[0] = b[i2]; 235e48d15efSToby Isaac for (j=0; j<nz; j++) { 236e48d15efSToby Isaac xw[0] = x[vi[j]]; 237e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 238e48d15efSToby Isaac } 239e48d15efSToby Isaac t[i2] = s[0]; 240e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 241e48d15efSToby Isaac x[i2] = xw[0]; 242e48d15efSToby Isaac idiag += 1; 243e48d15efSToby Isaac i2 += 1; 244e48d15efSToby Isaac } 245e48d15efSToby Isaac break; 246e48d15efSToby Isaac case 2: 247e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x,idiag,b); 248e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; 249e48d15efSToby Isaac i2 = 2; 250e48d15efSToby Isaac idiag += 4; 251e48d15efSToby Isaac for (i=1; i<m; i++) { 252e48d15efSToby Isaac v = aa + 4*ai[i]; 253e48d15efSToby Isaac vi = aj + ai[i]; 254e48d15efSToby Isaac nz = diag[i] - ai[i]; 255e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 256e48d15efSToby Isaac for (j=0; j<nz; j++) { 257e48d15efSToby Isaac idx = 2*vi[j]; 258e48d15efSToby Isaac it = 4*j; 259e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 260e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 261e48d15efSToby Isaac } 262e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; 263e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 264e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 265e48d15efSToby Isaac idiag += 4; 266e48d15efSToby Isaac i2 += 2; 267e48d15efSToby Isaac } 268e48d15efSToby Isaac break; 269e48d15efSToby Isaac case 3: 270e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x,idiag,b); 271e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 272e48d15efSToby Isaac i2 = 3; 273e48d15efSToby Isaac idiag += 9; 274e48d15efSToby Isaac for (i=1; i<m; i++) { 275e48d15efSToby Isaac v = aa + 9*ai[i]; 276e48d15efSToby Isaac vi = aj + ai[i]; 277e48d15efSToby Isaac nz = diag[i] - ai[i]; 278e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 279e48d15efSToby Isaac while (nz--) { 280e48d15efSToby Isaac idx = 3*(*vi++); 281e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 282e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 283e48d15efSToby Isaac v += 9; 284e48d15efSToby Isaac } 285e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 286e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 287e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 288e48d15efSToby Isaac idiag += 9; 289e48d15efSToby Isaac i2 += 3; 290e48d15efSToby Isaac } 291e48d15efSToby Isaac break; 292e48d15efSToby Isaac case 4: 293e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x,idiag,b); 294e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 295e48d15efSToby Isaac i2 = 4; 296e48d15efSToby Isaac idiag += 16; 297e48d15efSToby Isaac for (i=1; i<m; i++) { 298e48d15efSToby Isaac v = aa + 16*ai[i]; 299e48d15efSToby Isaac vi = aj + ai[i]; 300e48d15efSToby Isaac nz = diag[i] - ai[i]; 301e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 302e48d15efSToby Isaac while (nz--) { 303e48d15efSToby Isaac idx = 4*(*vi++); 304e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 305e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 306e48d15efSToby Isaac v += 16; 307e48d15efSToby Isaac } 308e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 309e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 310e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 311e48d15efSToby Isaac idiag += 16; 312e48d15efSToby Isaac i2 += 4; 313e48d15efSToby Isaac } 314e48d15efSToby Isaac break; 315e48d15efSToby Isaac case 5: 316e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x,idiag,b); 317e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 318e48d15efSToby Isaac i2 = 5; 319e48d15efSToby Isaac idiag += 25; 320e48d15efSToby Isaac for (i=1; i<m; i++) { 321e48d15efSToby Isaac v = aa + 25*ai[i]; 322e48d15efSToby Isaac vi = aj + ai[i]; 323e48d15efSToby Isaac nz = diag[i] - ai[i]; 324e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 325e48d15efSToby Isaac while (nz--) { 326e48d15efSToby Isaac idx = 5*(*vi++); 327e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 328e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 329e48d15efSToby Isaac v += 25; 330e48d15efSToby Isaac } 331e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 332e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 333e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 334e48d15efSToby Isaac idiag += 25; 335e48d15efSToby Isaac i2 += 5; 336e48d15efSToby Isaac } 337e48d15efSToby Isaac break; 338e48d15efSToby Isaac case 6: 339e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x,idiag,b); 340e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 341e48d15efSToby Isaac i2 = 6; 342e48d15efSToby Isaac idiag += 36; 343e48d15efSToby Isaac for (i=1; i<m; i++) { 344e48d15efSToby Isaac v = aa + 36*ai[i]; 345e48d15efSToby Isaac vi = aj + ai[i]; 346e48d15efSToby Isaac nz = diag[i] - ai[i]; 347e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 348e48d15efSToby Isaac while (nz--) { 349e48d15efSToby Isaac idx = 6*(*vi++); 350e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 351e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 352e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 353e48d15efSToby Isaac v += 36; 354e48d15efSToby Isaac } 355e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 356e48d15efSToby Isaac t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 357e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 358e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 359e48d15efSToby Isaac idiag += 36; 360e48d15efSToby Isaac i2 += 6; 361e48d15efSToby Isaac } 362e48d15efSToby Isaac break; 363e48d15efSToby Isaac case 7: 364e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x,idiag,b); 365e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 366e48d15efSToby Isaac t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 367e48d15efSToby Isaac i2 = 7; 368e48d15efSToby Isaac idiag += 49; 369e48d15efSToby Isaac for (i=1; i<m; i++) { 370e48d15efSToby Isaac v = aa + 49*ai[i]; 371e48d15efSToby Isaac vi = aj + ai[i]; 372e48d15efSToby Isaac nz = diag[i] - ai[i]; 373e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 374e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 375e48d15efSToby Isaac while (nz--) { 376e48d15efSToby Isaac idx = 7*(*vi++); 377e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 378e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 379e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 380e48d15efSToby Isaac v += 49; 381e48d15efSToby Isaac } 382e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 383e48d15efSToby Isaac t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 384e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 385e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 386e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 387e48d15efSToby Isaac idiag += 49; 388e48d15efSToby Isaac i2 += 7; 389e48d15efSToby Isaac } 390e48d15efSToby Isaac break; 391e48d15efSToby Isaac default: 39296b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 393580bdb30SBarry Smith ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 394de80f912SBarry Smith i2 = bs; 395de80f912SBarry Smith idiag += bs2; 396de80f912SBarry Smith for (i=1; i<m; i++) { 397de80f912SBarry Smith v = aa + bs2*ai[i]; 398de80f912SBarry Smith vi = aj + ai[i]; 399de80f912SBarry Smith nz = diag[i] - ai[i]; 400de80f912SBarry Smith 401580bdb30SBarry Smith ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 402de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 403de80f912SBarry Smith workt = work; 404de80f912SBarry Smith for (j=0; j<nz; j++) { 405580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 406de80f912SBarry Smith workt += bs; 407de80f912SBarry Smith } 40896b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 409580bdb30SBarry Smith ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 41096b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 411de80f912SBarry Smith 412de80f912SBarry Smith idiag += bs2; 413de80f912SBarry Smith i2 += bs; 414de80f912SBarry Smith } 415e48d15efSToby Isaac break; 416e48d15efSToby Isaac } 417de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 418e48d15efSToby Isaac ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 419e48d15efSToby Isaac xb = t; 420de80f912SBarry Smith } 421e48d15efSToby Isaac else xb = b; 422de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 423e48d15efSToby Isaac idiag = a->idiag+bs2*(a->mbs-1); 424e48d15efSToby Isaac i2 = bs * (m-1); 425e48d15efSToby Isaac switch (bs) { 426e48d15efSToby Isaac case 1: 427e48d15efSToby Isaac s[0] = xb[i2]; 428e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 429e48d15efSToby Isaac x[i2] = xw[0]; 430e48d15efSToby Isaac i2 -= 1; 431e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 432e48d15efSToby Isaac v = aa + (diag[i]+1); 433e48d15efSToby Isaac vi = aj + diag[i] + 1; 434e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 435e48d15efSToby Isaac s[0] = xb[i2]; 436e48d15efSToby Isaac for (j=0; j<nz; j++) { 437e48d15efSToby Isaac xw[0] = x[vi[j]]; 438e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 439e48d15efSToby Isaac } 440e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 441e48d15efSToby Isaac x[i2] = xw[0]; 442e48d15efSToby Isaac idiag -= 1; 443e48d15efSToby Isaac i2 -= 1; 444e48d15efSToby Isaac } 445e48d15efSToby Isaac break; 446e48d15efSToby Isaac case 2: 447e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; 448e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 449e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 450e48d15efSToby Isaac i2 -= 2; 451e48d15efSToby Isaac idiag -= 4; 452e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 453e48d15efSToby Isaac v = aa + 4*(diag[i] + 1); 454e48d15efSToby Isaac vi = aj + diag[i] + 1; 455e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 456e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; 457e48d15efSToby Isaac for (j=0; j<nz; j++) { 458e48d15efSToby Isaac idx = 2*vi[j]; 459e48d15efSToby Isaac it = 4*j; 460e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 461e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 462e48d15efSToby Isaac } 463e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 464e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 465e48d15efSToby Isaac idiag -= 4; 466e48d15efSToby Isaac i2 -= 2; 467e48d15efSToby Isaac } 468e48d15efSToby Isaac break; 469e48d15efSToby Isaac case 3: 470e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 471e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 472e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 473e48d15efSToby Isaac i2 -= 3; 474e48d15efSToby Isaac idiag -= 9; 475e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 476e48d15efSToby Isaac v = aa + 9*(diag[i]+1); 477e48d15efSToby Isaac vi = aj + diag[i] + 1; 478e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 479e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 480e48d15efSToby Isaac while (nz--) { 481e48d15efSToby Isaac idx = 3*(*vi++); 482e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 483e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 484e48d15efSToby Isaac v += 9; 485e48d15efSToby Isaac } 486e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 487e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 488e48d15efSToby Isaac idiag -= 9; 489e48d15efSToby Isaac i2 -= 3; 490e48d15efSToby Isaac } 491e48d15efSToby Isaac break; 492e48d15efSToby Isaac case 4: 493e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 494e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 495e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 496e48d15efSToby Isaac i2 -= 4; 497e48d15efSToby Isaac idiag -= 16; 498e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 499e48d15efSToby Isaac v = aa + 16*(diag[i]+1); 500e48d15efSToby Isaac vi = aj + diag[i] + 1; 501e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 502e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 503e48d15efSToby Isaac while (nz--) { 504e48d15efSToby Isaac idx = 4*(*vi++); 505e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 506e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 507e48d15efSToby Isaac v += 16; 508e48d15efSToby Isaac } 509e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 510e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 511e48d15efSToby Isaac idiag -= 16; 512e48d15efSToby Isaac i2 -= 4; 513e48d15efSToby Isaac } 514e48d15efSToby Isaac break; 515e48d15efSToby Isaac case 5: 516e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 517e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 518e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 519e48d15efSToby Isaac i2 -= 5; 520e48d15efSToby Isaac idiag -= 25; 521e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 522e48d15efSToby Isaac v = aa + 25*(diag[i]+1); 523e48d15efSToby Isaac vi = aj + diag[i] + 1; 524e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 525e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 526e48d15efSToby Isaac while (nz--) { 527e48d15efSToby Isaac idx = 5*(*vi++); 528e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 529e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 530e48d15efSToby Isaac v += 25; 531e48d15efSToby Isaac } 532e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 533e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 534e48d15efSToby Isaac idiag -= 25; 535e48d15efSToby Isaac i2 -= 5; 536e48d15efSToby Isaac } 537e48d15efSToby Isaac break; 538e48d15efSToby Isaac case 6: 539e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 540e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 541e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 542e48d15efSToby Isaac i2 -= 6; 543e48d15efSToby Isaac idiag -= 36; 544e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 545e48d15efSToby Isaac v = aa + 36*(diag[i]+1); 546e48d15efSToby Isaac vi = aj + diag[i] + 1; 547e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 548e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 549e48d15efSToby Isaac while (nz--) { 550e48d15efSToby Isaac idx = 6*(*vi++); 551e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 552e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 553e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 554e48d15efSToby Isaac v += 36; 555e48d15efSToby Isaac } 556e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 557e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 558e48d15efSToby Isaac idiag -= 36; 559e48d15efSToby Isaac i2 -= 6; 560e48d15efSToby Isaac } 561e48d15efSToby Isaac break; 562e48d15efSToby Isaac case 7: 563e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 564e48d15efSToby Isaac s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 565e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x,idiag,b); 566e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 567e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 568e48d15efSToby Isaac i2 -= 7; 569e48d15efSToby Isaac idiag -= 49; 570e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 571e48d15efSToby Isaac v = aa + 49*(diag[i]+1); 572e48d15efSToby Isaac vi = aj + diag[i] + 1; 573e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 574e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 575e48d15efSToby Isaac s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 576e48d15efSToby Isaac while (nz--) { 577e48d15efSToby Isaac idx = 7*(*vi++); 578e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 579e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 580e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 581e48d15efSToby Isaac v += 49; 582e48d15efSToby Isaac } 583e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 584e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 585e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 586e48d15efSToby Isaac idiag -= 49; 587e48d15efSToby Isaac i2 -= 7; 588e48d15efSToby Isaac } 589e48d15efSToby Isaac break; 590e48d15efSToby Isaac default: 591580bdb30SBarry Smith ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 59296b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 593de80f912SBarry Smith i2 -= bs; 594e48d15efSToby Isaac idiag -= bs2; 595de80f912SBarry Smith for (i=m-2; i>=0; i--) { 596de80f912SBarry Smith v = aa + bs2*(diag[i]+1); 597de80f912SBarry Smith vi = aj + diag[i] + 1; 598de80f912SBarry Smith nz = ai[i+1] - diag[i] - 1; 599de80f912SBarry Smith 600580bdb30SBarry Smith ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 601de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 602de80f912SBarry Smith workt = work; 603de80f912SBarry Smith for (j=0; j<nz; j++) { 604580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 605de80f912SBarry Smith workt += bs; 606de80f912SBarry Smith } 60796b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 60896b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 609e48d15efSToby Isaac 610de80f912SBarry Smith idiag -= bs2; 611de80f912SBarry Smith i2 -= bs; 612de80f912SBarry Smith } 613e48d15efSToby Isaac break; 614e48d15efSToby Isaac } 615de80f912SBarry Smith ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 616de80f912SBarry Smith } 617e48d15efSToby Isaac its--; 618e48d15efSToby Isaac } 619e48d15efSToby Isaac while (its--) { 620e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 621e48d15efSToby Isaac idiag = a->idiag; 622e48d15efSToby Isaac i2 = 0; 623e48d15efSToby Isaac switch (bs) { 624e48d15efSToby Isaac case 1: 625e48d15efSToby Isaac for (i=0; i<m; i++) { 626e48d15efSToby Isaac v = aa + ai[i]; 627e48d15efSToby Isaac vi = aj + ai[i]; 628e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 629e48d15efSToby Isaac s[0] = b[i2]; 630e48d15efSToby Isaac for (j=0; j<nz; j++) { 631e48d15efSToby Isaac xw[0] = x[vi[j]]; 632e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 633e48d15efSToby Isaac } 634e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 635e48d15efSToby Isaac x[i2] += xw[0]; 636e48d15efSToby Isaac idiag += 1; 637e48d15efSToby Isaac i2 += 1; 638e48d15efSToby Isaac } 639e48d15efSToby Isaac break; 640e48d15efSToby Isaac case 2: 641e48d15efSToby Isaac for (i=0; i<m; i++) { 642e48d15efSToby Isaac v = aa + 4*ai[i]; 643e48d15efSToby Isaac vi = aj + ai[i]; 644e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 645e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 646e48d15efSToby Isaac for (j=0; j<nz; j++) { 647e48d15efSToby Isaac idx = 2*vi[j]; 648e48d15efSToby Isaac it = 4*j; 649e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 650e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 651e48d15efSToby Isaac } 652e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 653e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; 654e48d15efSToby Isaac idiag += 4; 655e48d15efSToby Isaac i2 += 2; 656e48d15efSToby Isaac } 657e48d15efSToby Isaac break; 658e48d15efSToby Isaac case 3: 659e48d15efSToby Isaac for (i=0; i<m; i++) { 660e48d15efSToby Isaac v = aa + 9*ai[i]; 661e48d15efSToby Isaac vi = aj + ai[i]; 662e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 663e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 664e48d15efSToby Isaac while (nz--) { 665e48d15efSToby Isaac idx = 3*(*vi++); 666e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 667e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 668e48d15efSToby Isaac v += 9; 669e48d15efSToby Isaac } 670e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 671e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672e48d15efSToby Isaac idiag += 9; 673e48d15efSToby Isaac i2 += 3; 674e48d15efSToby Isaac } 675e48d15efSToby Isaac break; 676e48d15efSToby Isaac case 4: 677e48d15efSToby Isaac for (i=0; i<m; i++) { 678e48d15efSToby Isaac v = aa + 16*ai[i]; 679e48d15efSToby Isaac vi = aj + ai[i]; 680e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 681e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 682e48d15efSToby Isaac while (nz--) { 683e48d15efSToby Isaac idx = 4*(*vi++); 684e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 685e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 686e48d15efSToby Isaac v += 16; 687e48d15efSToby Isaac } 688e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 689e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 690e48d15efSToby Isaac idiag += 16; 691e48d15efSToby Isaac i2 += 4; 692e48d15efSToby Isaac } 693e48d15efSToby Isaac break; 694e48d15efSToby Isaac case 5: 695e48d15efSToby Isaac for (i=0; i<m; i++) { 696e48d15efSToby Isaac v = aa + 25*ai[i]; 697e48d15efSToby Isaac vi = aj + ai[i]; 698e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 699e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 700e48d15efSToby Isaac while (nz--) { 701e48d15efSToby Isaac idx = 5*(*vi++); 702e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 703e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 704e48d15efSToby Isaac v += 25; 705e48d15efSToby Isaac } 706e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 707e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 708e48d15efSToby Isaac idiag += 25; 709e48d15efSToby Isaac i2 += 5; 710e48d15efSToby Isaac } 711e48d15efSToby Isaac break; 712e48d15efSToby Isaac case 6: 713e48d15efSToby Isaac for (i=0; i<m; i++) { 714e48d15efSToby Isaac v = aa + 36*ai[i]; 715e48d15efSToby Isaac vi = aj + ai[i]; 716e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 717e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 718e48d15efSToby Isaac while (nz--) { 719e48d15efSToby Isaac idx = 6*(*vi++); 720e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 721e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 722e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 723e48d15efSToby Isaac v += 36; 724e48d15efSToby Isaac } 725e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 726e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 727e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 728e48d15efSToby Isaac idiag += 36; 729e48d15efSToby Isaac i2 += 6; 730e48d15efSToby Isaac } 731e48d15efSToby Isaac break; 732e48d15efSToby Isaac case 7: 733e48d15efSToby Isaac for (i=0; i<m; i++) { 734e48d15efSToby Isaac v = aa + 49*ai[i]; 735e48d15efSToby Isaac vi = aj + ai[i]; 736e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 737e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 738e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 739e48d15efSToby Isaac while (nz--) { 740e48d15efSToby Isaac idx = 7*(*vi++); 741e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 742e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 743e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 744e48d15efSToby Isaac v += 49; 745e48d15efSToby Isaac } 746e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 747e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 748e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 749e48d15efSToby Isaac idiag += 49; 750e48d15efSToby Isaac i2 += 7; 751e48d15efSToby Isaac } 752e48d15efSToby Isaac break; 753e48d15efSToby Isaac default: 754e48d15efSToby Isaac for (i=0; i<m; i++) { 755e48d15efSToby Isaac v = aa + bs2*ai[i]; 756e48d15efSToby Isaac vi = aj + ai[i]; 757e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 758e48d15efSToby Isaac 759580bdb30SBarry Smith ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 760e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 761e48d15efSToby Isaac workt = work; 762e48d15efSToby Isaac for (j=0; j<nz; j++) { 763580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 764e48d15efSToby Isaac workt += bs; 765e48d15efSToby Isaac } 766e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 767e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 768e48d15efSToby Isaac 769e48d15efSToby Isaac idiag += bs2; 770e48d15efSToby Isaac i2 += bs; 771e48d15efSToby Isaac } 772e48d15efSToby Isaac break; 773e48d15efSToby Isaac } 774e48d15efSToby Isaac ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 775e48d15efSToby Isaac } 776e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 777e48d15efSToby Isaac idiag = a->idiag+bs2*(a->mbs-1); 778e48d15efSToby Isaac i2 = bs * (m-1); 779e48d15efSToby Isaac switch (bs) { 780e48d15efSToby Isaac case 1: 781e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 782e48d15efSToby Isaac v = aa + ai[i]; 783e48d15efSToby Isaac vi = aj + ai[i]; 784e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 785e48d15efSToby Isaac s[0] = b[i2]; 786e48d15efSToby Isaac for (j=0; j<nz; j++) { 787e48d15efSToby Isaac xw[0] = x[vi[j]]; 788e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 789e48d15efSToby Isaac } 790e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 791e48d15efSToby Isaac x[i2] += xw[0]; 792e48d15efSToby Isaac idiag -= 1; 793e48d15efSToby Isaac i2 -= 1; 794e48d15efSToby Isaac } 795e48d15efSToby Isaac break; 796e48d15efSToby Isaac case 2: 797e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 798e48d15efSToby Isaac v = aa + 4*ai[i]; 799e48d15efSToby Isaac vi = aj + ai[i]; 800e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 801e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 802e48d15efSToby Isaac for (j=0; j<nz; j++) { 803e48d15efSToby Isaac idx = 2*vi[j]; 804e48d15efSToby Isaac it = 4*j; 805e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 806e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 807e48d15efSToby Isaac } 808e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 809e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; 810e48d15efSToby Isaac idiag -= 4; 811e48d15efSToby Isaac i2 -= 2; 812e48d15efSToby Isaac } 813e48d15efSToby Isaac break; 814e48d15efSToby Isaac case 3: 815e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 816e48d15efSToby Isaac v = aa + 9*ai[i]; 817e48d15efSToby Isaac vi = aj + ai[i]; 818e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 819e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 820e48d15efSToby Isaac while (nz--) { 821e48d15efSToby Isaac idx = 3*(*vi++); 822e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 823e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 824e48d15efSToby Isaac v += 9; 825e48d15efSToby Isaac } 826e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 827e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828e48d15efSToby Isaac idiag -= 9; 829e48d15efSToby Isaac i2 -= 3; 830e48d15efSToby Isaac } 831e48d15efSToby Isaac break; 832e48d15efSToby Isaac case 4: 833e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 834e48d15efSToby Isaac v = aa + 16*ai[i]; 835e48d15efSToby Isaac vi = aj + ai[i]; 836e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 837e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 838e48d15efSToby Isaac while (nz--) { 839e48d15efSToby Isaac idx = 4*(*vi++); 840e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 841e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 842e48d15efSToby Isaac v += 16; 843e48d15efSToby Isaac } 844e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 845e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 846e48d15efSToby Isaac idiag -= 16; 847e48d15efSToby Isaac i2 -= 4; 848e48d15efSToby Isaac } 849e48d15efSToby Isaac break; 850e48d15efSToby Isaac case 5: 851e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 852e48d15efSToby Isaac v = aa + 25*ai[i]; 853e48d15efSToby Isaac vi = aj + ai[i]; 854e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 855e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 856e48d15efSToby Isaac while (nz--) { 857e48d15efSToby Isaac idx = 5*(*vi++); 858e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 859e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 860e48d15efSToby Isaac v += 25; 861e48d15efSToby Isaac } 862e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 863e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 864e48d15efSToby Isaac idiag -= 25; 865e48d15efSToby Isaac i2 -= 5; 866e48d15efSToby Isaac } 867e48d15efSToby Isaac break; 868e48d15efSToby Isaac case 6: 869e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 870e48d15efSToby Isaac v = aa + 36*ai[i]; 871e48d15efSToby Isaac vi = aj + ai[i]; 872e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 873e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 874e48d15efSToby Isaac while (nz--) { 875e48d15efSToby Isaac idx = 6*(*vi++); 876e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 877e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 878e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 879e48d15efSToby Isaac v += 36; 880e48d15efSToby Isaac } 881e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 882e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 883e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 884e48d15efSToby Isaac idiag -= 36; 885e48d15efSToby Isaac i2 -= 6; 886e48d15efSToby Isaac } 887e48d15efSToby Isaac break; 888e48d15efSToby Isaac case 7: 889e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 890e48d15efSToby Isaac v = aa + 49*ai[i]; 891e48d15efSToby Isaac vi = aj + ai[i]; 892e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 893e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 894e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 895e48d15efSToby Isaac while (nz--) { 896e48d15efSToby Isaac idx = 7*(*vi++); 897e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 898e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 899e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 900e48d15efSToby Isaac v += 49; 901e48d15efSToby Isaac } 902e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 903e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 904e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 905e48d15efSToby Isaac idiag -= 49; 906e48d15efSToby Isaac i2 -= 7; 907e48d15efSToby Isaac } 908e48d15efSToby Isaac break; 909e48d15efSToby Isaac default: 910e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 911e48d15efSToby Isaac v = aa + bs2*ai[i]; 912e48d15efSToby Isaac vi = aj + ai[i]; 913e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 914e48d15efSToby Isaac 915580bdb30SBarry Smith ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 916e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 917e48d15efSToby Isaac workt = work; 918e48d15efSToby Isaac for (j=0; j<nz; j++) { 919580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 920e48d15efSToby Isaac workt += bs; 921e48d15efSToby Isaac } 922e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 923e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 924e48d15efSToby Isaac 925e48d15efSToby Isaac idiag -= bs2; 926e48d15efSToby Isaac i2 -= bs; 927e48d15efSToby Isaac } 928e48d15efSToby Isaac break; 929e48d15efSToby Isaac } 930e48d15efSToby Isaac ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 931e48d15efSToby Isaac } 932e48d15efSToby Isaac } 933de80f912SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 934de80f912SBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 935de80f912SBarry Smith PetscFunctionReturn(0); 936de80f912SBarry Smith } 937de80f912SBarry Smith 938af674e45SBarry Smith /* 93981824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 940af674e45SBarry Smith */ 941af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 942af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 943af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 944af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 945af674e45SBarry Smith #endif 946af674e45SBarry Smith 9478cc058d9SJed Brown PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 948af674e45SBarry Smith { 949af674e45SBarry Smith Mat A = *AA; 950af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 951c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 952c1ac3661SBarry Smith PetscInt *ai =a->i,*ailen=a->ilen; 95317ec6a02SBarry Smith PetscInt *aj =a->j,stepval,lastcol = -1; 954f15d580aSBarry Smith const PetscScalar *value = v; 9554bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 95670990e77SSatish Balay PetscErrorCode ierr; 957af674e45SBarry Smith 958af674e45SBarry Smith PetscFunctionBegin; 959ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 960af674e45SBarry Smith stepval = (n-1)*4; 961af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 962af674e45SBarry Smith row = im[k]; 963af674e45SBarry Smith rp = aj + ai[row]; 964af674e45SBarry Smith ap = aa + 16*ai[row]; 965af674e45SBarry Smith nrow = ailen[row]; 966af674e45SBarry Smith low = 0; 96717ec6a02SBarry Smith high = nrow; 968af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 969af674e45SBarry Smith col = in[l]; 970db4deed7SKarl Rupp if (col <= lastcol) low = 0; 971db4deed7SKarl Rupp else high = nrow; 97217ec6a02SBarry Smith lastcol = col; 9731e3347e8SBarry Smith value = v + k*(stepval+4 + l)*4; 974af674e45SBarry Smith while (high-low > 7) { 975af674e45SBarry Smith t = (low+high)/2; 976af674e45SBarry Smith if (rp[t] > col) high = t; 977af674e45SBarry Smith else low = t; 978af674e45SBarry Smith } 979af674e45SBarry Smith for (i=low; i<high; i++) { 980af674e45SBarry Smith if (rp[i] > col) break; 981af674e45SBarry Smith if (rp[i] == col) { 982af674e45SBarry Smith bap = ap + 16*i; 983af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 984af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 985af674e45SBarry Smith bap[jj] += *value++; 986af674e45SBarry Smith } 987af674e45SBarry Smith } 988af674e45SBarry Smith goto noinsert2; 989af674e45SBarry Smith } 990af674e45SBarry Smith } 991af674e45SBarry Smith N = nrow++ - 1; 99217ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 993af674e45SBarry Smith /* shift up all the later entries in this row */ 994af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 995af674e45SBarry Smith rp[ii+1] = rp[ii]; 99670990e77SSatish Balay ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 997af674e45SBarry Smith } 998af674e45SBarry Smith if (N >= i) { 99970990e77SSatish Balay ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1000af674e45SBarry Smith } 1001af674e45SBarry Smith rp[i] = col; 1002af674e45SBarry Smith bap = ap + 16*i; 1003af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 1004af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 1005af674e45SBarry Smith bap[jj] = *value++; 1006af674e45SBarry Smith } 1007af674e45SBarry Smith } 1008af674e45SBarry Smith noinsert2:; 1009af674e45SBarry Smith low = i; 1010af674e45SBarry Smith } 1011af674e45SBarry Smith ailen[row] = nrow; 1012af674e45SBarry Smith } 1013be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1014af674e45SBarry Smith } 1015af674e45SBarry Smith 1016af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1017af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1018af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1019af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1020af674e45SBarry Smith #endif 1021af674e45SBarry Smith 10228cc058d9SJed Brown PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1023af674e45SBarry Smith { 1024af674e45SBarry Smith Mat A = *AA; 1025af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1026580bdb30SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1027c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 1028c1ac3661SBarry Smith PetscInt *aj=a->j,brow,bcol; 102917ec6a02SBarry Smith PetscInt ridx,cidx,lastcol = -1; 1030af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 103170990e77SSatish Balay PetscErrorCode ierr; 1032af674e45SBarry Smith 1033af674e45SBarry Smith PetscFunctionBegin; 1034af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 1035af674e45SBarry Smith row = im[k]; brow = row/4; 1036af674e45SBarry Smith rp = aj + ai[brow]; 1037af674e45SBarry Smith ap = aa + 16*ai[brow]; 1038af674e45SBarry Smith nrow = ailen[brow]; 1039af674e45SBarry Smith low = 0; 104017ec6a02SBarry Smith high = nrow; 1041af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 1042af674e45SBarry Smith col = in[l]; bcol = col/4; 1043af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 1044af674e45SBarry Smith value = v[l + k*n]; 1045db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1046db4deed7SKarl Rupp else high = nrow; 104717ec6a02SBarry Smith lastcol = col; 1048af674e45SBarry Smith while (high-low > 7) { 1049af674e45SBarry Smith t = (low+high)/2; 1050af674e45SBarry Smith if (rp[t] > bcol) high = t; 1051af674e45SBarry Smith else low = t; 1052af674e45SBarry Smith } 1053af674e45SBarry Smith for (i=low; i<high; i++) { 1054af674e45SBarry Smith if (rp[i] > bcol) break; 1055af674e45SBarry Smith if (rp[i] == bcol) { 1056af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 1057af674e45SBarry Smith *bap += value; 1058af674e45SBarry Smith goto noinsert1; 1059af674e45SBarry Smith } 1060af674e45SBarry Smith } 1061af674e45SBarry Smith N = nrow++ - 1; 106217ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1063af674e45SBarry Smith /* shift up all the later entries in this row */ 106470990e77SSatish Balay ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 106570990e77SSatish Balay ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 106670990e77SSatish Balay ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1067af674e45SBarry Smith rp[i] = bcol; 1068af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 1069af674e45SBarry Smith noinsert1:; 1070af674e45SBarry Smith low = i; 1071af674e45SBarry Smith } 1072af674e45SBarry Smith ailen[brow] = nrow; 1073af674e45SBarry Smith } 1074be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1075af674e45SBarry Smith } 1076af674e45SBarry Smith 1077be5855fcSBarry Smith /* 1078be5855fcSBarry Smith Checks for missing diagonals 1079be5855fcSBarry Smith */ 1080ace3abfcSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1081be5855fcSBarry Smith { 1082be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 10836849ba73SBarry Smith PetscErrorCode ierr; 10847734d3b5SMatthew G. Knepley PetscInt *diag,*ii = a->i,i; 1085be5855fcSBarry Smith 1086be5855fcSBarry Smith PetscFunctionBegin; 1087c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 10882af78befSBarry Smith *missing = PETSC_FALSE; 10897734d3b5SMatthew G. Knepley if (A->rmap->n > 0 && !ii) { 10902efa7f71SHong Zhang *missing = PETSC_TRUE; 10912efa7f71SHong Zhang if (d) *d = 0; 1092994fe344SLisandro Dalcin ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 10932efa7f71SHong Zhang } else { 109401445905SHong Zhang PetscInt n; 109501445905SHong Zhang n = PetscMin(a->mbs, a->nbs); 1096883fce79SBarry Smith diag = a->diag; 109701445905SHong Zhang for (i=0; i<n; i++) { 10987734d3b5SMatthew G. Knepley if (diag[i] >= ii[i+1]) { 10992af78befSBarry Smith *missing = PETSC_TRUE; 11002af78befSBarry Smith if (d) *d = i; 1101994fe344SLisandro Dalcin ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1102358d2f5dSShri Abhyankar break; 11032efa7f71SHong Zhang } 1104be5855fcSBarry Smith } 1105be5855fcSBarry Smith } 1106be5855fcSBarry Smith PetscFunctionReturn(0); 1107be5855fcSBarry Smith } 1108be5855fcSBarry Smith 1109dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1110de6a44a3SBarry Smith { 1111de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11126849ba73SBarry Smith PetscErrorCode ierr; 111309f38230SBarry Smith PetscInt i,j,m = a->mbs; 1114de6a44a3SBarry Smith 11153a40ed3dSBarry Smith PetscFunctionBegin; 111609f38230SBarry Smith if (!a->diag) { 1117785e854fSJed Brown ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 11183bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 11194fd072dbSBarry Smith a->free_diag = PETSC_TRUE; 112009f38230SBarry Smith } 11217fc0212eSBarry Smith for (i=0; i<m; i++) { 112209f38230SBarry Smith a->diag[i] = a->i[i+1]; 1123de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 1124de6a44a3SBarry Smith if (a->j[j] == i) { 112509f38230SBarry Smith a->diag[i] = j; 1126de6a44a3SBarry Smith break; 1127de6a44a3SBarry Smith } 1128de6a44a3SBarry Smith } 1129de6a44a3SBarry Smith } 11303a40ed3dSBarry Smith PetscFunctionReturn(0); 1131de6a44a3SBarry Smith } 11322593348eSBarry Smith 11331a83f524SJed Brown static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 11343b2fbd54SBarry Smith { 11353b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1136dfbe8321SBarry Smith PetscErrorCode ierr; 11371a83f524SJed Brown PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 11381a83f524SJed Brown PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 11393b2fbd54SBarry Smith 11403a40ed3dSBarry Smith PetscFunctionBegin; 11413b2fbd54SBarry Smith *nn = n; 11423a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 11433b2fbd54SBarry Smith if (symmetric) { 11442462f5fdSStefano Zampini ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1145553b3c51SBarry Smith nz = tia[n]; 11463b2fbd54SBarry Smith } else { 11478f7157efSSatish Balay tia = a->i; tja = a->j; 11483b2fbd54SBarry Smith } 11493b2fbd54SBarry Smith 1150ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1151ecc77c7aSBarry Smith (*nn) *= bs; 11528f7157efSSatish Balay /* malloc & create the natural set of indices */ 1153785e854fSJed Brown ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 11549985e31cSBarry Smith if (n) { 11552462f5fdSStefano Zampini (*ia)[0] = oshift; 1156ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1157ecc77c7aSBarry Smith (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1158ecc77c7aSBarry Smith } 11599985e31cSBarry Smith } 1160ecc77c7aSBarry Smith 1161ecc77c7aSBarry Smith for (i=1; i<n; i++) { 1162ecc77c7aSBarry Smith (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1163ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1164ecc77c7aSBarry Smith (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 11658f7157efSSatish Balay } 11668f7157efSSatish Balay } 11679985e31cSBarry Smith if (n) { 1168ecc77c7aSBarry Smith (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 11699985e31cSBarry Smith } 1170ecc77c7aSBarry Smith 11711a83f524SJed Brown if (inja) { 1172785e854fSJed Brown ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 11739985e31cSBarry Smith cnt = 0; 11749985e31cSBarry Smith for (i=0; i<n; i++) { 11759985e31cSBarry Smith for (j=0; j<bs; j++) { 11769985e31cSBarry Smith for (k=tia[i]; k<tia[i+1]; k++) { 11779985e31cSBarry Smith for (l=0; l<bs; l++) { 11789985e31cSBarry Smith (*ja)[cnt++] = bs*tja[k] + l; 11799985e31cSBarry Smith } 11809985e31cSBarry Smith } 11819985e31cSBarry Smith } 11829985e31cSBarry Smith } 11839985e31cSBarry Smith } 11849985e31cSBarry Smith 11858f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 11868f7157efSSatish Balay ierr = PetscFree(tia);CHKERRQ(ierr); 11878f7157efSSatish Balay ierr = PetscFree(tja);CHKERRQ(ierr); 11888f7157efSSatish Balay } 1189f6d58c54SBarry Smith } else if (oshift == 1) { 1190715a17b5SBarry Smith if (symmetric) { 1191a2ea699eSBarry Smith nz = tia[A->rmap->n/bs]; 1192715a17b5SBarry Smith /* add 1 to i and j indices */ 1193715a17b5SBarry Smith for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1194715a17b5SBarry Smith *ia = tia; 1195715a17b5SBarry Smith if (ja) { 1196715a17b5SBarry Smith for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1197715a17b5SBarry Smith *ja = tja; 1198715a17b5SBarry Smith } 1199715a17b5SBarry Smith } else { 1200a2ea699eSBarry Smith nz = a->i[A->rmap->n/bs]; 1201f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 1202854ce69bSBarry Smith ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1203f6d58c54SBarry Smith for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1204f6d58c54SBarry Smith if (ja) { 1205785e854fSJed Brown ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1206f6d58c54SBarry Smith for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1207f6d58c54SBarry Smith } 1208715a17b5SBarry Smith } 12098f7157efSSatish Balay } else { 12108f7157efSSatish Balay *ia = tia; 1211ecc77c7aSBarry Smith if (ja) *ja = tja; 12128f7157efSSatish Balay } 12133a40ed3dSBarry Smith PetscFunctionReturn(0); 12143b2fbd54SBarry Smith } 12153b2fbd54SBarry Smith 12161a83f524SJed Brown static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 12173b2fbd54SBarry Smith { 12186849ba73SBarry Smith PetscErrorCode ierr; 12193b2fbd54SBarry Smith 12203a40ed3dSBarry Smith PetscFunctionBegin; 12213a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 1222715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1223606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 12249985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 12253b2fbd54SBarry Smith } 12263a40ed3dSBarry Smith PetscFunctionReturn(0); 12273b2fbd54SBarry Smith } 12283b2fbd54SBarry Smith 1229dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 12302d61bbb3SSatish Balay { 12312d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1232dfbe8321SBarry Smith PetscErrorCode ierr; 12332d61bbb3SSatish Balay 1234433994e6SBarry Smith PetscFunctionBegin; 1235aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1236d0f46423SBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 12372d61bbb3SSatish Balay #endif 1238e6b907acSBarry Smith ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 12396bf464f9SBarry Smith ierr = ISDestroy(&a->row);CHKERRQ(ierr); 12406bf464f9SBarry Smith ierr = ISDestroy(&a->col);CHKERRQ(ierr); 12414fd072dbSBarry Smith if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 124205b42c5fSBarry Smith ierr = PetscFree(a->idiag);CHKERRQ(ierr); 12434fd072dbSBarry Smith if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 124405b42c5fSBarry Smith ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 124505b42c5fSBarry Smith ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1246f361c04dSBarry Smith ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1247de80f912SBarry Smith ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 12486bf464f9SBarry Smith ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 124905b42c5fSBarry Smith ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1250cd6b891eSBarry Smith ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1251c4319e64SHong Zhang 12526bf464f9SBarry Smith ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 12536bf464f9SBarry Smith ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1254bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 1255901853e0SKris Buschelman 1256f4259b30SLisandro Dalcin ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1257cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1258cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1259bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1260bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1261bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1262bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1263bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1264bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1265bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1266bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1267bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1268bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 12697ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 1270c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 12717ea3e4caSstefano_zampini #endif 1272c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 12732d61bbb3SSatish Balay PetscFunctionReturn(0); 12742d61bbb3SSatish Balay } 12752d61bbb3SSatish Balay 1276ace3abfcSBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 12772d61bbb3SSatish Balay { 12782d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 127963ba0a88SBarry Smith PetscErrorCode ierr; 12802d61bbb3SSatish Balay 12812d61bbb3SSatish Balay PetscFunctionBegin; 1282aa275fccSKris Buschelman switch (op) { 1283aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 12844e0d8c25SBarry Smith a->roworiented = flg; 1285aa275fccSKris Buschelman break; 1286a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1287a9817697SBarry Smith a->keepnonzeropattern = flg; 1288aa275fccSKris Buschelman break; 1289512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1290512a5fc5SBarry Smith a->nonew = (flg ? 0 : 1); 1291aa275fccSKris Buschelman break; 1292aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 12934e0d8c25SBarry Smith a->nonew = (flg ? -1 : 0); 1294aa275fccSKris Buschelman break; 1295aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 12964e0d8c25SBarry Smith a->nonew = (flg ? -2 : 0); 1297aa275fccSKris Buschelman break; 129828b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 129928b2fa4aSMatthew Knepley a->nounused = (flg ? -1 : 0); 130028b2fa4aSMatthew Knepley break; 13018c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1302aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1303aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 1304071fcb05SBarry Smith case MAT_SORTED_FULL: 1305290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1306aa275fccSKris Buschelman break; 13075021d80fSJed Brown case MAT_SPD: 130877e54ba9SKris Buschelman case MAT_SYMMETRIC: 130977e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 13109a4540c5SBarry Smith case MAT_HERMITIAN: 13119a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1312c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1313672ba085SHong Zhang case MAT_STRUCTURE_ONLY: 13145021d80fSJed Brown /* These options are handled directly by MatSetOption() */ 131577e54ba9SKris Buschelman break; 1316aa275fccSKris Buschelman default: 1317e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 13182d61bbb3SSatish Balay } 13192d61bbb3SSatish Balay PetscFunctionReturn(0); 13202d61bbb3SSatish Balay } 13212d61bbb3SSatish Balay 132252768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */ 132352768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 13242d61bbb3SSatish Balay { 13256849ba73SBarry Smith PetscErrorCode ierr; 132652768537SHong Zhang PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 132752768537SHong Zhang MatScalar *aa_i; 132887828ca2SBarry Smith PetscScalar *v_i; 13292d61bbb3SSatish Balay 13302d61bbb3SSatish Balay PetscFunctionBegin; 1331d0f46423SBarry Smith bs = A->rmap->bs; 133252768537SHong Zhang bs2 = bs*bs; 1333e32f2f54SBarry Smith if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 13342d61bbb3SSatish Balay 13352d61bbb3SSatish Balay bn = row/bs; /* Block number */ 13362d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 13372d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 13382d61bbb3SSatish Balay *nz = bs*M; 13392d61bbb3SSatish Balay 13402d61bbb3SSatish Balay if (v) { 1341f4259b30SLisandro Dalcin *v = NULL; 13422d61bbb3SSatish Balay if (*nz) { 1343854ce69bSBarry Smith ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 13442d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13452d61bbb3SSatish Balay v_i = *v + i*bs; 13462d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 134726fbe8dcSKarl Rupp for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 13482d61bbb3SSatish Balay } 13492d61bbb3SSatish Balay } 13502d61bbb3SSatish Balay } 13512d61bbb3SSatish Balay 13522d61bbb3SSatish Balay if (idx) { 1353f4259b30SLisandro Dalcin *idx = NULL; 13542d61bbb3SSatish Balay if (*nz) { 1355854ce69bSBarry Smith ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 13562d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13572d61bbb3SSatish Balay idx_i = *idx + i*bs; 13582d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 135926fbe8dcSKarl Rupp for (j=0; j<bs; j++) idx_i[j] = itmp++; 13602d61bbb3SSatish Balay } 13612d61bbb3SSatish Balay } 13622d61bbb3SSatish Balay } 13632d61bbb3SSatish Balay PetscFunctionReturn(0); 13642d61bbb3SSatish Balay } 13652d61bbb3SSatish Balay 136652768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 136752768537SHong Zhang { 136852768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 136952768537SHong Zhang PetscErrorCode ierr; 137052768537SHong Zhang 137152768537SHong Zhang PetscFunctionBegin; 137252768537SHong Zhang ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 137352768537SHong Zhang PetscFunctionReturn(0); 137452768537SHong Zhang } 137552768537SHong Zhang 1376c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 13772d61bbb3SSatish Balay { 1378dfbe8321SBarry Smith PetscErrorCode ierr; 1379606d414cSSatish Balay 13802d61bbb3SSatish Balay PetscFunctionBegin; 1381cb4a9cd9SHong Zhang if (nz) *nz = 0; 138205b42c5fSBarry Smith if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 138305b42c5fSBarry Smith if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 13842d61bbb3SSatish Balay PetscFunctionReturn(0); 13852d61bbb3SSatish Balay } 13862d61bbb3SSatish Balay 1387fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 13882d61bbb3SSatish Balay { 138920e84f26SHong Zhang Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 13902d61bbb3SSatish Balay Mat C; 13916849ba73SBarry Smith PetscErrorCode ierr; 139220e84f26SHong Zhang PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 139320e84f26SHong Zhang PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 139420e84f26SHong Zhang MatScalar *ata,*aa=a->a; 13952d61bbb3SSatish Balay 13962d61bbb3SSatish Balay PetscFunctionBegin; 139720e84f26SHong Zhang ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1398cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 139920e84f26SHong Zhang for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 14002d61bbb3SSatish Balay 1401ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1402d0f46423SBarry Smith ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 14037adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 140420e84f26SHong Zhang ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 140520e84f26SHong Zhang 140620e84f26SHong Zhang at = (Mat_SeqBAIJ*)C->data; 140720e84f26SHong Zhang ati = at->i; 140820e84f26SHong Zhang for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1409fc4dec0aSBarry Smith } else { 1410fc4dec0aSBarry Smith C = *B; 141120e84f26SHong Zhang at = (Mat_SeqBAIJ*)C->data; 141220e84f26SHong Zhang ati = at->i; 1413fc4dec0aSBarry Smith } 1414fc4dec0aSBarry Smith 141520e84f26SHong Zhang atj = at->j; 141620e84f26SHong Zhang ata = at->a; 141720e84f26SHong Zhang 141820e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 1419580bdb30SBarry Smith ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 142020e84f26SHong Zhang 142120e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 14222d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 142320e84f26SHong Zhang anzj = ai[i+1] - ai[i]; 142420e84f26SHong Zhang for (j=0; j<anzj; j++) { 142520e84f26SHong Zhang atj[atfill[*aj]] = i; 142620e84f26SHong Zhang for (kr=0; kr<bs; kr++) { 142720e84f26SHong Zhang for (k=0; k<bs; k++) { 142820e84f26SHong Zhang ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 14292d61bbb3SSatish Balay } 14302d61bbb3SSatish Balay } 143120e84f26SHong Zhang atfill[*aj++] += 1; 143220e84f26SHong Zhang } 143320e84f26SHong Zhang } 14342d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14352d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14362d61bbb3SSatish Balay 143720e84f26SHong Zhang /* Clean up temporary space and complete requests. */ 143820e84f26SHong Zhang ierr = PetscFree(atfill);CHKERRQ(ierr); 143920e84f26SHong Zhang 1440cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 144120e84f26SHong Zhang ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 14422d61bbb3SSatish Balay *B = C; 14432d61bbb3SSatish Balay } else { 144428be2f97SBarry Smith ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 14452d61bbb3SSatish Balay } 14462d61bbb3SSatish Balay PetscFunctionReturn(0); 14472d61bbb3SSatish Balay } 14482d61bbb3SSatish Balay 1449453d3561SHong Zhang PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1450453d3561SHong Zhang { 1451453d3561SHong Zhang PetscErrorCode ierr; 1452453d3561SHong Zhang Mat Btrans; 1453453d3561SHong Zhang 1454453d3561SHong Zhang PetscFunctionBegin; 1455453d3561SHong Zhang *f = PETSC_FALSE; 1456453d3561SHong Zhang ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1457453d3561SHong Zhang ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1458453d3561SHong Zhang ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1459453d3561SHong Zhang PetscFunctionReturn(0); 1460453d3561SHong Zhang } 1461453d3561SHong Zhang 1462618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1463b51a4376SLisandro Dalcin PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 14642593348eSBarry Smith { 1465b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1466b51a4376SLisandro Dalcin PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1467b51a4376SLisandro Dalcin PetscInt *rowlens,*colidxs; 1468b51a4376SLisandro Dalcin PetscScalar *matvals; 14696849ba73SBarry Smith PetscErrorCode ierr; 14702593348eSBarry Smith 14713a40ed3dSBarry Smith PetscFunctionBegin; 1472b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 14733b2fbd54SBarry Smith 1474b51a4376SLisandro Dalcin M = mat->rmap->N; 1475b51a4376SLisandro Dalcin N = mat->cmap->N; 1476b51a4376SLisandro Dalcin m = mat->rmap->n; 1477b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1478b51a4376SLisandro Dalcin nz = bs*bs*A->nz; 14792593348eSBarry Smith 1480b51a4376SLisandro Dalcin /* write matrix header */ 1481b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID; 1482b51a4376SLisandro Dalcin header[1] = M; header[2] = N; header[3] = nz; 1483b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 14842593348eSBarry Smith 1485b51a4376SLisandro Dalcin /* store row lengths */ 1486b51a4376SLisandro Dalcin ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1487b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1488b51a4376SLisandro Dalcin for (j=0; j<bs; j++) 1489b51a4376SLisandro Dalcin rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1490b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1491b51a4376SLisandro Dalcin ierr = PetscFree(rowlens);CHKERRQ(ierr); 1492b51a4376SLisandro Dalcin 1493b51a4376SLisandro Dalcin /* store column indices */ 1494b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1495b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1496b51a4376SLisandro Dalcin for (k=0; k<bs; k++) 1497b51a4376SLisandro Dalcin for (j=A->i[i]; j<A->i[i+1]; j++) 1498b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1499b51a4376SLisandro Dalcin colidxs[cnt++] = bs*A->j[j] + l; 1500b51a4376SLisandro Dalcin if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1501b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1502b51a4376SLisandro Dalcin ierr = PetscFree(colidxs);CHKERRQ(ierr); 15032593348eSBarry Smith 15042593348eSBarry Smith /* store nonzero values */ 1505b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1506b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1507b51a4376SLisandro Dalcin for (k=0; k<bs; k++) 1508b51a4376SLisandro Dalcin for (j=A->i[i]; j<A->i[i+1]; j++) 1509b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1510b51a4376SLisandro Dalcin matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1511b51a4376SLisandro Dalcin if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1512b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1513b51a4376SLisandro Dalcin ierr = PetscFree(matvals);CHKERRQ(ierr); 1514ce6f0cecSBarry Smith 1515b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 1516b51a4376SLisandro Dalcin ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 15173a40ed3dSBarry Smith PetscFunctionReturn(0); 15182593348eSBarry Smith } 15192593348eSBarry Smith 15207dc0baabSHong Zhang static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 15217dc0baabSHong Zhang { 15227dc0baabSHong Zhang PetscErrorCode ierr; 15237dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 15247dc0baabSHong Zhang PetscInt i,bs = A->rmap->bs,k; 15257dc0baabSHong Zhang 15267dc0baabSHong Zhang PetscFunctionBegin; 15277dc0baabSHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 15287dc0baabSHong Zhang for (i=0; i<a->mbs; i++) { 15297dc0baabSHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 15307dc0baabSHong Zhang for (k=a->i[i]; k<a->i[i+1]; k++) { 15317dc0baabSHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 15327dc0baabSHong Zhang } 15337dc0baabSHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 15347dc0baabSHong Zhang } 15357dc0baabSHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 15367dc0baabSHong Zhang PetscFunctionReturn(0); 15377dc0baabSHong Zhang } 15387dc0baabSHong Zhang 15396849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 15402593348eSBarry Smith { 1541b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1542dfbe8321SBarry Smith PetscErrorCode ierr; 1543d0f46423SBarry Smith PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1544f3ef73ceSBarry Smith PetscViewerFormat format; 15452593348eSBarry Smith 15463a40ed3dSBarry Smith PetscFunctionBegin; 15477dc0baabSHong Zhang if (A->structure_only) { 15487dc0baabSHong Zhang ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 15497dc0baabSHong Zhang PetscFunctionReturn(0); 15507dc0baabSHong Zhang } 15517dc0baabSHong Zhang 1552b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1553456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 155477431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1555fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1556ade3a672SBarry Smith const char *matname; 1557bcd9e38bSBarry Smith Mat aij; 1558ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1559ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1560ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1561bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 15626bf464f9SBarry Smith ierr = MatDestroy(&aij);CHKERRQ(ierr); 156304929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 156404929863SHong Zhang PetscFunctionReturn(0); 1565fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1566d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 156744cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 156844cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 156977431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 157044cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 157144cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 1572aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 15730e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 157457622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 157557622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15760e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 157757622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 157857622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15790e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 158057622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15810ef38995SBarry Smith } 158244cd7ae7SLois Curfman McInnes #else 15830ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 158457622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 15850ef38995SBarry Smith } 158644cd7ae7SLois Curfman McInnes #endif 158744cd7ae7SLois Curfman McInnes } 158844cd7ae7SLois Curfman McInnes } 1589b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 159044cd7ae7SLois Curfman McInnes } 159144cd7ae7SLois Curfman McInnes } 1592d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 15930ef38995SBarry Smith } else { 1594d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1595b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1596b6490206SBarry Smith for (j=0; j<bs; j++) { 159777431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1598b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1599b6490206SBarry Smith for (l=0; l<bs; l++) { 1600aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 16010e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 160257622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 160357622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 16040e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 160557622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 160657622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 16070ef38995SBarry Smith } else { 160857622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 160988685aaeSLois Curfman McInnes } 161088685aaeSLois Curfman McInnes #else 161157622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 161288685aaeSLois Curfman McInnes #endif 16132593348eSBarry Smith } 16142593348eSBarry Smith } 1615b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 16162593348eSBarry Smith } 16172593348eSBarry Smith } 1618d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1619b6490206SBarry Smith } 1620b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 16213a40ed3dSBarry Smith PetscFunctionReturn(0); 16222593348eSBarry Smith } 16232593348eSBarry Smith 16249804daf3SBarry Smith #include <petscdraw.h> 16256849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 16263270192aSSatish Balay { 162777ed5343SBarry Smith Mat A = (Mat) Aa; 16283270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 16296849ba73SBarry Smith PetscErrorCode ierr; 1630d0f46423SBarry Smith PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 16310e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 16323f1db9ecSBarry Smith MatScalar *aa; 1633b0a32e0cSBarry Smith PetscViewer viewer; 1634b3e7f47fSJed Brown PetscViewerFormat format; 16353270192aSSatish Balay 16363a40ed3dSBarry Smith PetscFunctionBegin; 163777ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1638b3e7f47fSJed Brown ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1639b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 164077ed5343SBarry Smith 16413270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1642b3e7f47fSJed Brown 1643b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1644383922c3SLisandro Dalcin ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1645383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */ 1646b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 16473270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16483270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1649d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16503270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16513270192aSSatish Balay aa = a->a + j*bs2; 16523270192aSSatish Balay for (k=0; k<bs; k++) { 16533270192aSSatish Balay for (l=0; l<bs; l++) { 16540e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 1655b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16563270192aSSatish Balay } 16573270192aSSatish Balay } 16583270192aSSatish Balay } 16593270192aSSatish Balay } 1660b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 16613270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16623270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1663d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16643270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16653270192aSSatish Balay aa = a->a + j*bs2; 16663270192aSSatish Balay for (k=0; k<bs; k++) { 16673270192aSSatish Balay for (l=0; l<bs; l++) { 16680e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 1669b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16703270192aSSatish Balay } 16713270192aSSatish Balay } 16723270192aSSatish Balay } 16733270192aSSatish Balay } 1674b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 16753270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16763270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1677d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16783270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16793270192aSSatish Balay aa = a->a + j*bs2; 16803270192aSSatish Balay for (k=0; k<bs; k++) { 16813270192aSSatish Balay for (l=0; l<bs; l++) { 16820e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 1683b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16843270192aSSatish Balay } 16853270192aSSatish Balay } 16863270192aSSatish Balay } 16873270192aSSatish Balay } 1688383922c3SLisandro Dalcin ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1689b3e7f47fSJed Brown } else { 1690b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 1691b3e7f47fSJed Brown /* first determine max of all nonzero values */ 1692b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0; 1693b3e7f47fSJed Brown PetscDraw popup; 1694b3e7f47fSJed Brown 1695b3e7f47fSJed Brown for (i=0; i<a->nz*a->bs2; i++) { 1696b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1697b3e7f47fSJed Brown } 1698383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL; 1699b3e7f47fSJed Brown ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 170045f3bb6eSLisandro Dalcin ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1701383922c3SLisandro Dalcin 1702383922c3SLisandro Dalcin ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1703b3e7f47fSJed Brown for (i=0,row=0; i<mbs; i++,row+=bs) { 1704b3e7f47fSJed Brown for (j=a->i[i]; j<a->i[i+1]; j++) { 1705b3e7f47fSJed Brown y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1706b3e7f47fSJed Brown x_l = a->j[j]*bs; x_r = x_l + 1.0; 1707b3e7f47fSJed Brown aa = a->a + j*bs2; 1708b3e7f47fSJed Brown for (k=0; k<bs; k++) { 1709b3e7f47fSJed Brown for (l=0; l<bs; l++) { 1710383922c3SLisandro Dalcin MatScalar v = *aa++; 1711383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1712b3e7f47fSJed Brown ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1713b3e7f47fSJed Brown } 1714b3e7f47fSJed Brown } 1715b3e7f47fSJed Brown } 1716b3e7f47fSJed Brown } 1717383922c3SLisandro Dalcin ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1718b3e7f47fSJed Brown } 171977ed5343SBarry Smith PetscFunctionReturn(0); 172077ed5343SBarry Smith } 17213270192aSSatish Balay 17226849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 172377ed5343SBarry Smith { 1724dfbe8321SBarry Smith PetscErrorCode ierr; 17250e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 1726b0a32e0cSBarry Smith PetscDraw draw; 1727ace3abfcSBarry Smith PetscBool isnull; 17283270192aSSatish Balay 172977ed5343SBarry Smith PetscFunctionBegin; 1730b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 173145f3bb6eSLisandro Dalcin ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 173245f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 173377ed5343SBarry Smith 1734d0f46423SBarry Smith xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 173577ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 1736b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1737832b7cebSLisandro Dalcin ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1738b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 17390298fd71SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1740832b7cebSLisandro Dalcin ierr = PetscDrawSave(draw);CHKERRQ(ierr); 17413a40ed3dSBarry Smith PetscFunctionReturn(0); 17423270192aSSatish Balay } 17433270192aSSatish Balay 1744dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 17452593348eSBarry Smith { 1746dfbe8321SBarry Smith PetscErrorCode ierr; 1747ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw; 17482593348eSBarry Smith 17493a40ed3dSBarry Smith PetscFunctionBegin; 1750251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1751251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1752251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 175332077d6dSBarry Smith if (iascii) { 17543a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 17550f5bd95cSBarry Smith } else if (isbinary) { 17563a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 17570f5bd95cSBarry Smith } else if (isdraw) { 17583a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 17595cd90555SBarry Smith } else { 1760a5e6ed63SBarry Smith Mat B; 1761ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1762a5e6ed63SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 17636bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 17642593348eSBarry Smith } 17653a40ed3dSBarry Smith PetscFunctionReturn(0); 17662593348eSBarry Smith } 1767b6490206SBarry Smith 1768c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1769cd0e1443SSatish Balay { 1770cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1771c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1772c1ac3661SBarry Smith PetscInt *ai = a->i,*ailen = a->ilen; 1773d0f46423SBarry Smith PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 177497e567efSBarry Smith MatScalar *ap,*aa = a->a; 1775cd0e1443SSatish Balay 17763a40ed3dSBarry Smith PetscFunctionBegin; 17772d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 1778cd0e1443SSatish Balay row = im[k]; brow = row/bs; 1779e32f2f54SBarry Smith if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1780e32f2f54SBarry Smith if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 17812d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 17822c3acbe9SBarry Smith nrow = ailen[brow]; 17832d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 1784e32f2f54SBarry Smith if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1785e32f2f54SBarry Smith if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 17862d61bbb3SSatish Balay col = in[l]; 17872d61bbb3SSatish Balay bcol = col/bs; 17882d61bbb3SSatish Balay cidx = col%bs; 17892d61bbb3SSatish Balay ridx = row%bs; 17902d61bbb3SSatish Balay high = nrow; 17912d61bbb3SSatish Balay low = 0; /* assume unsorted */ 17922d61bbb3SSatish Balay while (high-low > 5) { 1793cd0e1443SSatish Balay t = (low+high)/2; 1794cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 1795cd0e1443SSatish Balay else low = t; 1796cd0e1443SSatish Balay } 1797cd0e1443SSatish Balay for (i=low; i<high; i++) { 1798cd0e1443SSatish Balay if (rp[i] > bcol) break; 1799cd0e1443SSatish Balay if (rp[i] == bcol) { 18002d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 18012d61bbb3SSatish Balay goto finished; 1802cd0e1443SSatish Balay } 1803cd0e1443SSatish Balay } 180497e567efSBarry Smith *v++ = 0.0; 18052d61bbb3SSatish Balay finished:; 1806cd0e1443SSatish Balay } 1807cd0e1443SSatish Balay } 18083a40ed3dSBarry Smith PetscFunctionReturn(0); 1809cd0e1443SSatish Balay } 1810cd0e1443SSatish Balay 1811dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 181292c4ed94SBarry Smith { 181392c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1814e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1815c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 18166849ba73SBarry Smith PetscErrorCode ierr; 1817d0f46423SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1818ace3abfcSBarry Smith PetscBool roworiented=a->roworiented; 1819dd6ea824SBarry Smith const PetscScalar *value = v; 18209d243f67SHong Zhang MatScalar *ap=NULL,*aa = a->a,*bap; 182192c4ed94SBarry Smith 18223a40ed3dSBarry Smith PetscFunctionBegin; 18230e324ae4SSatish Balay if (roworiented) { 18240e324ae4SSatish Balay stepval = (n-1)*bs; 18250e324ae4SSatish Balay } else { 18260e324ae4SSatish Balay stepval = (m-1)*bs; 18270e324ae4SSatish Balay } 182892c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 182992c4ed94SBarry Smith row = im[k]; 18305ef9f2a5SBarry Smith if (row < 0) continue; 1831cf9c20a2SJed Brown if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 183292c4ed94SBarry Smith rp = aj + ai[row]; 18337dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2*ai[row]; 183492c4ed94SBarry Smith rmax = imax[row]; 183592c4ed94SBarry Smith nrow = ailen[row]; 183692c4ed94SBarry Smith low = 0; 1837c71e6ed7SBarry Smith high = nrow; 183892c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 18395ef9f2a5SBarry Smith if (in[l] < 0) continue; 1840cf9c20a2SJed Brown if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 184192c4ed94SBarry Smith col = in[l]; 18427dc0baabSHong Zhang if (!A->structure_only) { 184392c4ed94SBarry Smith if (roworiented) { 184453ef36baSBarry Smith value = v + (k*(stepval+bs) + l)*bs; 18450e324ae4SSatish Balay } else { 184653ef36baSBarry Smith value = v + (l*(stepval+bs) + k)*bs; 184792c4ed94SBarry Smith } 18487dc0baabSHong Zhang } 184926fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 185026fbe8dcSKarl Rupp else high = nrow; 1851e2ee6c50SBarry Smith lastcol = col; 185292c4ed94SBarry Smith while (high-low > 7) { 185392c4ed94SBarry Smith t = (low+high)/2; 185492c4ed94SBarry Smith if (rp[t] > col) high = t; 185592c4ed94SBarry Smith else low = t; 185692c4ed94SBarry Smith } 185792c4ed94SBarry Smith for (i=low; i<high; i++) { 185892c4ed94SBarry Smith if (rp[i] > col) break; 185992c4ed94SBarry Smith if (rp[i] == col) { 18607dc0baabSHong Zhang if (A->structure_only) goto noinsert2; 18618a84c255SSatish Balay bap = ap + bs2*i; 18620e324ae4SSatish Balay if (roworiented) { 18638a84c255SSatish Balay if (is == ADD_VALUES) { 1864dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1865dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18668a84c255SSatish Balay bap[jj] += *value++; 1867dd9472c6SBarry Smith } 1868dd9472c6SBarry Smith } 18690e324ae4SSatish Balay } else { 1870dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1871dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18720e324ae4SSatish Balay bap[jj] = *value++; 18738a84c255SSatish Balay } 1874dd9472c6SBarry Smith } 1875dd9472c6SBarry Smith } 18760e324ae4SSatish Balay } else { 18770e324ae4SSatish Balay if (is == ADD_VALUES) { 187853ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1879dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 188053ef36baSBarry Smith bap[jj] += value[jj]; 1881dd9472c6SBarry Smith } 188253ef36baSBarry Smith bap += bs; 1883dd9472c6SBarry Smith } 18840e324ae4SSatish Balay } else { 188553ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1886dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 188753ef36baSBarry Smith bap[jj] = value[jj]; 18880e324ae4SSatish Balay } 188953ef36baSBarry Smith bap += bs; 18908a84c255SSatish Balay } 1891dd9472c6SBarry Smith } 1892dd9472c6SBarry Smith } 1893f1241b54SBarry Smith goto noinsert2; 189492c4ed94SBarry Smith } 189592c4ed94SBarry Smith } 189689280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 18972f7d4af7SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 18987dc0baabSHong Zhang if (A->structure_only) { 18997dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 19007dc0baabSHong Zhang } else { 1901fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 19027dc0baabSHong Zhang } 1903c03d1d03SSatish Balay N = nrow++ - 1; high++; 190492c4ed94SBarry Smith /* shift up all the later entries in this row */ 1905580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 190692c4ed94SBarry Smith rp[i] = col; 19077dc0baabSHong Zhang if (!A->structure_only) { 1908580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 19098a84c255SSatish Balay bap = ap + bs2*i; 19100e324ae4SSatish Balay if (roworiented) { 1911dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1912dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 19130e324ae4SSatish Balay bap[jj] = *value++; 1914dd9472c6SBarry Smith } 1915dd9472c6SBarry Smith } 19160e324ae4SSatish Balay } else { 1917dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1918dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 19190e324ae4SSatish Balay *bap++ = *value++; 19200e324ae4SSatish Balay } 1921dd9472c6SBarry Smith } 1922dd9472c6SBarry Smith } 19237dc0baabSHong Zhang } 1924f1241b54SBarry Smith noinsert2:; 192592c4ed94SBarry Smith low = i; 192692c4ed94SBarry Smith } 192792c4ed94SBarry Smith ailen[row] = nrow; 192892c4ed94SBarry Smith } 19293a40ed3dSBarry Smith PetscFunctionReturn(0); 193092c4ed94SBarry Smith } 193126e093fcSHong Zhang 1932dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1933584200bdSSatish Balay { 1934584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1935580bdb30SBarry Smith PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1936d0f46423SBarry Smith PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 19376849ba73SBarry Smith PetscErrorCode ierr; 1938c1ac3661SBarry Smith PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 19393f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 19403447b6efSHong Zhang PetscReal ratio=0.6; 1941584200bdSSatish Balay 19423a40ed3dSBarry Smith PetscFunctionBegin; 19433a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1944584200bdSSatish Balay 194543ee02c3SBarry Smith if (m) rmax = ailen[0]; 1946584200bdSSatish Balay for (i=1; i<mbs; i++) { 1947584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1948584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1949d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1950584200bdSSatish Balay if (fshift) { 1951580bdb30SBarry Smith ip = aj + ai[i]; 1952580bdb30SBarry Smith ap = aa + bs2*ai[i]; 1953584200bdSSatish Balay N = ailen[i]; 1954580bdb30SBarry Smith ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1955672ba085SHong Zhang if (!A->structure_only) { 1956580bdb30SBarry Smith ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1957584200bdSSatish Balay } 1958672ba085SHong Zhang } 1959584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1960584200bdSSatish Balay } 1961584200bdSSatish Balay if (mbs) { 1962584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1963584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1964584200bdSSatish Balay } 19657c565772SBarry Smith 1966584200bdSSatish Balay /* reset ilen and imax for each row */ 19677c565772SBarry Smith a->nonzerorowcnt = 0; 1968672ba085SHong Zhang if (A->structure_only) { 1969672ba085SHong Zhang ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1970672ba085SHong Zhang } else { /* !A->structure_only */ 1971584200bdSSatish Balay for (i=0; i<mbs; i++) { 1972584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 19737c565772SBarry Smith a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1974584200bdSSatish Balay } 1975672ba085SHong Zhang } 1976a7c10996SSatish Balay a->nz = ai[mbs]; 1977584200bdSSatish Balay 1978584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1979b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 1980584200bdSSatish Balay if (fshift && a->diag) { 1981606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 19823bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1983f4259b30SLisandro Dalcin a->diag = NULL; 1984584200bdSSatish Balay } 198565e19b50SBarry Smith if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1986d0f46423SBarry Smith ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1987ae15b995SBarry Smith ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1988ae15b995SBarry Smith ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 198926fbe8dcSKarl Rupp 19908e58a170SBarry Smith A->info.mallocs += a->reallocs; 1991e2f3b5e9SSatish Balay a->reallocs = 0; 19920e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1993647a6520SHong Zhang a->rmax = rmax; 1994cf4441caSHong Zhang 1995672ba085SHong Zhang if (!A->structure_only) { 199611e456e1SBarry Smith ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1997672ba085SHong Zhang } 19983a40ed3dSBarry Smith PetscFunctionReturn(0); 1999584200bdSSatish Balay } 2000584200bdSSatish Balay 2001bea157c4SSatish Balay /* 2002bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 2003bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2004bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2005bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 2006bea157c4SSatish Balay */ 2007c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2008d9b7c43dSSatish Balay { 2009c1ac3661SBarry Smith PetscInt i,j,k,row; 2010ace3abfcSBarry Smith PetscBool flg; 20113a40ed3dSBarry Smith 2012433994e6SBarry Smith PetscFunctionBegin; 2013bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 2014bea157c4SSatish Balay row = idx[i]; 2015bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 2016bea157c4SSatish Balay sizes[j] = 1; 2017bea157c4SSatish Balay i++; 2018e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2019bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2020bea157c4SSatish Balay i++; 2021bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 2022bea157c4SSatish Balay flg = PETSC_TRUE; 2023bea157c4SSatish Balay for (k=1; k<bs; k++) { 2024bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 2025bea157c4SSatish Balay flg = PETSC_FALSE; 2026bea157c4SSatish Balay break; 2027d9b7c43dSSatish Balay } 2028bea157c4SSatish Balay } 2029abc0a331SBarry Smith if (flg) { /* No break in the bs */ 2030bea157c4SSatish Balay sizes[j] = bs; 2031bea157c4SSatish Balay i += bs; 2032bea157c4SSatish Balay } else { 2033bea157c4SSatish Balay sizes[j] = 1; 2034bea157c4SSatish Balay i++; 2035bea157c4SSatish Balay } 2036bea157c4SSatish Balay } 2037bea157c4SSatish Balay } 2038bea157c4SSatish Balay *bs_max = j; 20393a40ed3dSBarry Smith PetscFunctionReturn(0); 2040d9b7c43dSSatish Balay } 2041d9b7c43dSSatish Balay 20422b40b63fSBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2043d9b7c43dSSatish Balay { 2044d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2045dfbe8321SBarry Smith PetscErrorCode ierr; 2046f4df32b1SMatthew Knepley PetscInt i,j,k,count,*rows; 2047d0f46423SBarry Smith PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 204887828ca2SBarry Smith PetscScalar zero = 0.0; 20493f1db9ecSBarry Smith MatScalar *aa; 205097b48c8fSBarry Smith const PetscScalar *xx; 205197b48c8fSBarry Smith PetscScalar *bb; 2052d9b7c43dSSatish Balay 20533a40ed3dSBarry Smith PetscFunctionBegin; 205497b48c8fSBarry Smith /* fix right hand side if needed */ 205597b48c8fSBarry Smith if (x && b) { 205697b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 205797b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 205897b48c8fSBarry Smith for (i=0; i<is_n; i++) { 205997b48c8fSBarry Smith bb[is_idx[i]] = diag*xx[is_idx[i]]; 206097b48c8fSBarry Smith } 206197b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 206297b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 206397b48c8fSBarry Smith } 206497b48c8fSBarry Smith 2065d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2066bea157c4SSatish Balay /* allocate memory for rows,sizes */ 2067dcca6d9dSJed Brown ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2068bea157c4SSatish Balay 2069563b5814SBarry Smith /* copy IS values to rows, and sort them */ 207026fbe8dcSKarl Rupp for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2071bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 207297b48c8fSBarry Smith 2073a9817697SBarry Smith if (baij->keepnonzeropattern) { 207426fbe8dcSKarl Rupp for (i=0; i<is_n; i++) sizes[i] = 1; 2075dffd3267SBarry Smith bs_max = is_n; 2076dffd3267SBarry Smith } else { 2077bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2078e56f5c9eSBarry Smith A->nonzerostate++; 2079dffd3267SBarry Smith } 2080bea157c4SSatish Balay 2081bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2082bea157c4SSatish Balay row = rows[j]; 2083e32f2f54SBarry Smith if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2084bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2085b31fbe3bSSatish Balay aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2086a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2087d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2088bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 2089bea157c4SSatish Balay baij->ilen[row/bs] = 1; 2090bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 209126fbe8dcSKarl Rupp 2092580bdb30SBarry Smith ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2093a07cd24cSSatish Balay } 2094563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 2095bea157c4SSatish Balay for (k=0; k<bs; k++) { 2096f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2097bea157c4SSatish Balay } 2098f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2099bea157c4SSatish Balay baij->ilen[row/bs] = 0; 2100f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2101bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 2102cf9c20a2SJed Brown if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2103bea157c4SSatish Balay for (k=0; k<count; k++) { 2104d9b7c43dSSatish Balay aa[0] = zero; 2105d9b7c43dSSatish Balay aa += bs; 2106d9b7c43dSSatish Balay } 2107d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2108f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2109d9b7c43dSSatish Balay } 2110d9b7c43dSSatish Balay } 2111bea157c4SSatish Balay } 2112bea157c4SSatish Balay 2113fca92195SBarry Smith ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 21149a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 21153a40ed3dSBarry Smith PetscFunctionReturn(0); 2116d9b7c43dSSatish Balay } 21171c351548SSatish Balay 211897b48c8fSBarry Smith PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 211997b48c8fSBarry Smith { 212097b48c8fSBarry Smith Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 212197b48c8fSBarry Smith PetscErrorCode ierr; 212297b48c8fSBarry Smith PetscInt i,j,k,count; 212397b48c8fSBarry Smith PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 212497b48c8fSBarry Smith PetscScalar zero = 0.0; 212597b48c8fSBarry Smith MatScalar *aa; 212697b48c8fSBarry Smith const PetscScalar *xx; 212797b48c8fSBarry Smith PetscScalar *bb; 212856777dd2SBarry Smith PetscBool *zeroed,vecs = PETSC_FALSE; 212997b48c8fSBarry Smith 213097b48c8fSBarry Smith PetscFunctionBegin; 213197b48c8fSBarry Smith /* fix right hand side if needed */ 213297b48c8fSBarry Smith if (x && b) { 213397b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 213497b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 213556777dd2SBarry Smith vecs = PETSC_TRUE; 213697b48c8fSBarry Smith } 213797b48c8fSBarry Smith 213897b48c8fSBarry Smith /* zero the columns */ 21391795a4d1SJed Brown ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 214097b48c8fSBarry Smith for (i=0; i<is_n; i++) { 214197b48c8fSBarry Smith if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 214297b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 214397b48c8fSBarry Smith } 214497b48c8fSBarry Smith for (i=0; i<A->rmap->N; i++) { 214597b48c8fSBarry Smith if (!zeroed[i]) { 214697b48c8fSBarry Smith row = i/bs; 214797b48c8fSBarry Smith for (j=baij->i[row]; j<baij->i[row+1]; j++) { 214897b48c8fSBarry Smith for (k=0; k<bs; k++) { 214997b48c8fSBarry Smith col = bs*baij->j[j] + k; 215097b48c8fSBarry Smith if (zeroed[col]) { 215197b48c8fSBarry Smith aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 215256777dd2SBarry Smith if (vecs) bb[i] -= aa[0]*xx[col]; 215397b48c8fSBarry Smith aa[0] = 0.0; 215497b48c8fSBarry Smith } 215597b48c8fSBarry Smith } 215697b48c8fSBarry Smith } 215756777dd2SBarry Smith } else if (vecs) bb[i] = diag*xx[i]; 215897b48c8fSBarry Smith } 215997b48c8fSBarry Smith ierr = PetscFree(zeroed);CHKERRQ(ierr); 216056777dd2SBarry Smith if (vecs) { 216156777dd2SBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 216256777dd2SBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 216356777dd2SBarry Smith } 216497b48c8fSBarry Smith 216597b48c8fSBarry Smith /* zero the rows */ 216697b48c8fSBarry Smith for (i=0; i<is_n; i++) { 216797b48c8fSBarry Smith row = is_idx[i]; 216897b48c8fSBarry Smith count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 216997b48c8fSBarry Smith aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 217097b48c8fSBarry Smith for (k=0; k<count; k++) { 217197b48c8fSBarry Smith aa[0] = zero; 217297b48c8fSBarry Smith aa += bs; 217397b48c8fSBarry Smith } 2174d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 217597b48c8fSBarry Smith ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 217697b48c8fSBarry Smith } 217797b48c8fSBarry Smith } 217897b48c8fSBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 217997b48c8fSBarry Smith PetscFunctionReturn(0); 218097b48c8fSBarry Smith } 218197b48c8fSBarry Smith 2182c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 21832d61bbb3SSatish Balay { 21842d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2185e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2186c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2187d0f46423SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 21886849ba73SBarry Smith PetscErrorCode ierr; 2189c1ac3661SBarry Smith PetscInt ridx,cidx,bs2=a->bs2; 2190ace3abfcSBarry Smith PetscBool roworiented=a->roworiented; 2191d8cdefa3SHong Zhang MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 21922d61bbb3SSatish Balay 21932d61bbb3SSatish Balay PetscFunctionBegin; 21942d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 2195085a36d4SBarry Smith row = im[k]; 2196085a36d4SBarry Smith brow = row/bs; 21975ef9f2a5SBarry Smith if (row < 0) continue; 2198cf9c20a2SJed Brown if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 21992d61bbb3SSatish Balay rp = aj + ai[brow]; 2200672ba085SHong Zhang if (!A->structure_only) ap = aa + bs2*ai[brow]; 22012d61bbb3SSatish Balay rmax = imax[brow]; 22022d61bbb3SSatish Balay nrow = ailen[brow]; 22032d61bbb3SSatish Balay low = 0; 2204c71e6ed7SBarry Smith high = nrow; 22052d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 22065ef9f2a5SBarry Smith if (in[l] < 0) continue; 2207cf9c20a2SJed Brown if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 22082d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 22092d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 2210672ba085SHong Zhang if (!A->structure_only) { 22112d61bbb3SSatish Balay if (roworiented) { 22125ef9f2a5SBarry Smith value = v[l + k*n]; 22132d61bbb3SSatish Balay } else { 22142d61bbb3SSatish Balay value = v[k + l*m]; 22152d61bbb3SSatish Balay } 2216672ba085SHong Zhang } 22177cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 2218e2ee6c50SBarry Smith lastcol = col; 22192d61bbb3SSatish Balay while (high-low > 7) { 22202d61bbb3SSatish Balay t = (low+high)/2; 22212d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 22222d61bbb3SSatish Balay else low = t; 22232d61bbb3SSatish Balay } 22242d61bbb3SSatish Balay for (i=low; i<high; i++) { 22252d61bbb3SSatish Balay if (rp[i] > bcol) break; 22262d61bbb3SSatish Balay if (rp[i] == bcol) { 22272d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 2228672ba085SHong Zhang if (!A->structure_only) { 22292d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 22302d61bbb3SSatish Balay else *bap = value; 2231672ba085SHong Zhang } 22322d61bbb3SSatish Balay goto noinsert1; 22332d61bbb3SSatish Balay } 22342d61bbb3SSatish Balay } 22352d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 2236e32f2f54SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2237672ba085SHong Zhang if (A->structure_only) { 2238672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2239672ba085SHong Zhang } else { 2240fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2241672ba085SHong Zhang } 2242c03d1d03SSatish Balay N = nrow++ - 1; high++; 22432d61bbb3SSatish Balay /* shift up all the later entries in this row */ 2244580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 22452d61bbb3SSatish Balay rp[i] = bcol; 2246580bdb30SBarry Smith if (!A->structure_only) { 2247580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2248580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2249580bdb30SBarry Smith ap[bs2*i + bs*cidx + ridx] = value; 2250580bdb30SBarry Smith } 2251085a36d4SBarry Smith a->nz++; 2252e56f5c9eSBarry Smith A->nonzerostate++; 22532d61bbb3SSatish Balay noinsert1:; 22542d61bbb3SSatish Balay low = i; 22552d61bbb3SSatish Balay } 22562d61bbb3SSatish Balay ailen[brow] = nrow; 22572d61bbb3SSatish Balay } 22582d61bbb3SSatish Balay PetscFunctionReturn(0); 22592d61bbb3SSatish Balay } 22602d61bbb3SSatish Balay 22610481f469SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 22622d61bbb3SSatish Balay { 22632d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 22642d61bbb3SSatish Balay Mat outA; 2265dfbe8321SBarry Smith PetscErrorCode ierr; 2266ace3abfcSBarry Smith PetscBool row_identity,col_identity; 22672d61bbb3SSatish Balay 22682d61bbb3SSatish Balay PetscFunctionBegin; 2269e32f2f54SBarry Smith if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2270667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2271667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2272f23aa3ddSBarry Smith if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 22732d61bbb3SSatish Balay 22742d61bbb3SSatish Balay outA = inA; 2275d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 2276f6224b95SHong Zhang ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2277f6224b95SHong Zhang ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 22782d61bbb3SSatish Balay 2279c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2280cf242676SKris Buschelman 2281c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 22826bf464f9SBarry Smith ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2283c3122656SLisandro Dalcin a->row = row; 2284c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 22856bf464f9SBarry Smith ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2286c3122656SLisandro Dalcin a->col = col; 2287c38d4ed2SBarry Smith 2288c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 22896bf464f9SBarry Smith ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 22904c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 22913bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2292c38d4ed2SBarry Smith 2293ace3abfcSBarry Smith ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2294c38d4ed2SBarry Smith if (!a->solve_work) { 2295854ce69bSBarry Smith ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 22963bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2297c38d4ed2SBarry Smith } 2298719d5645SBarry Smith ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 22992d61bbb3SSatish Balay PetscFunctionReturn(0); 23002d61bbb3SSatish Balay } 2301d9b7c43dSSatish Balay 23027087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 230327a8da17SBarry Smith { 230427a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2305bdb1c0e1SJed Brown PetscInt i,nz,mbs; 230627a8da17SBarry Smith 230727a8da17SBarry Smith PetscFunctionBegin; 2308b32cb4a7SJed Brown nz = baij->maxnz; 2309bdb1c0e1SJed Brown mbs = baij->mbs; 231027a8da17SBarry Smith for (i=0; i<nz; i++) { 231127a8da17SBarry Smith baij->j[i] = indices[i]; 231227a8da17SBarry Smith } 231327a8da17SBarry Smith baij->nz = nz; 2314bdb1c0e1SJed Brown for (i=0; i<mbs; i++) { 231527a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 231627a8da17SBarry Smith } 231727a8da17SBarry Smith PetscFunctionReturn(0); 231827a8da17SBarry Smith } 231927a8da17SBarry Smith 232027a8da17SBarry Smith /*@ 232127a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 232227a8da17SBarry Smith in the matrix. 232327a8da17SBarry Smith 232427a8da17SBarry Smith Input Parameters: 232527a8da17SBarry Smith + mat - the SeqBAIJ matrix 232627a8da17SBarry Smith - indices - the column indices 232727a8da17SBarry Smith 232815091d37SBarry Smith Level: advanced 232915091d37SBarry Smith 233027a8da17SBarry Smith Notes: 233127a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 233227a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 233327a8da17SBarry Smith of the MatSetValues() operation. 233427a8da17SBarry Smith 233527a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 2336d1be2dadSMatthew Knepley MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 233727a8da17SBarry Smith 233827a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 233927a8da17SBarry Smith 234027a8da17SBarry Smith @*/ 23417087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 234227a8da17SBarry Smith { 23434ac538c5SBarry Smith PetscErrorCode ierr; 234427a8da17SBarry Smith 234527a8da17SBarry Smith PetscFunctionBegin; 23460700a824SBarry Smith PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 23474482741eSBarry Smith PetscValidPointer(indices,2); 23484ac538c5SBarry Smith ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 234927a8da17SBarry Smith PetscFunctionReturn(0); 235027a8da17SBarry Smith } 235127a8da17SBarry Smith 2352985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2353273d9f13SBarry Smith { 2354273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2355dfbe8321SBarry Smith PetscErrorCode ierr; 2356c1ac3661SBarry Smith PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2357273d9f13SBarry Smith PetscReal atmp; 235887828ca2SBarry Smith PetscScalar *x,zero = 0.0; 2359273d9f13SBarry Smith MatScalar *aa; 2360c1ac3661SBarry Smith PetscInt ncols,brow,krow,kcol; 2361273d9f13SBarry Smith 2362273d9f13SBarry Smith PetscFunctionBegin; 2363e32f2f54SBarry Smith if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2364d0f46423SBarry Smith bs = A->rmap->bs; 2365273d9f13SBarry Smith aa = a->a; 2366273d9f13SBarry Smith ai = a->i; 2367273d9f13SBarry Smith aj = a->j; 2368273d9f13SBarry Smith mbs = a->mbs; 2369273d9f13SBarry Smith 23702dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 23711ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2372273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2373e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2374273d9f13SBarry Smith for (i=0; i<mbs; i++) { 2375273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 2376273d9f13SBarry Smith brow = bs*i; 2377273d9f13SBarry Smith for (j=0; j<ncols; j++) { 2378273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++) { 2379273d9f13SBarry Smith for (krow=0; krow<bs; krow++) { 2380273d9f13SBarry Smith atmp = PetscAbsScalar(*aa);aa++; 2381273d9f13SBarry Smith row = brow + krow; /* row index */ 2382985db425SBarry Smith if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2383273d9f13SBarry Smith } 2384273d9f13SBarry Smith } 2385273d9f13SBarry Smith aj++; 2386273d9f13SBarry Smith } 2387273d9f13SBarry Smith } 23881ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2389273d9f13SBarry Smith PetscFunctionReturn(0); 2390273d9f13SBarry Smith } 2391273d9f13SBarry Smith 23923c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 23933c896bc6SHong Zhang { 23943c896bc6SHong Zhang PetscErrorCode ierr; 23953c896bc6SHong Zhang 23963c896bc6SHong Zhang PetscFunctionBegin; 23973c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 23983c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 23993c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24003c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2401d88c0aacSHong Zhang PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 24023c896bc6SHong Zhang 2403d88c0aacSHong Zhang if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2404d88c0aacSHong Zhang if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2405580bdb30SBarry Smith ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2406cdc753b6SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 24073c896bc6SHong Zhang } else { 24083c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 24093c896bc6SHong Zhang } 24103c896bc6SHong Zhang PetscFunctionReturn(0); 24113c896bc6SHong Zhang } 24123c896bc6SHong Zhang 24134994cf47SJed Brown PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2414273d9f13SBarry Smith { 2415dfbe8321SBarry Smith PetscErrorCode ierr; 2416273d9f13SBarry Smith 2417273d9f13SBarry Smith PetscFunctionBegin; 2418f4259b30SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2419273d9f13SBarry Smith PetscFunctionReturn(0); 2420273d9f13SBarry Smith } 2421273d9f13SBarry Smith 2422cda14afcSprj- static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2423f2a5309cSSatish Balay { 2424f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24256e111a19SKarl Rupp 2426f2a5309cSSatish Balay PetscFunctionBegin; 2427f2a5309cSSatish Balay *array = a->a; 2428f2a5309cSSatish Balay PetscFunctionReturn(0); 2429f2a5309cSSatish Balay } 2430f2a5309cSSatish Balay 2431cda14afcSprj- static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2432f2a5309cSSatish Balay { 2433f2a5309cSSatish Balay PetscFunctionBegin; 2434cda14afcSprj- *array = NULL; 2435f2a5309cSSatish Balay PetscFunctionReturn(0); 2436f2a5309cSSatish Balay } 2437f2a5309cSSatish Balay 243852768537SHong Zhang PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 243952768537SHong Zhang { 2440b264fe52SHong Zhang PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 244152768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 244252768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2443b264fe52SHong Zhang PetscErrorCode ierr; 244452768537SHong Zhang 244552768537SHong Zhang PetscFunctionBegin; 244652768537SHong Zhang /* Set the number of nonzeros in the new matrix */ 2447b264fe52SHong Zhang ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 244852768537SHong Zhang PetscFunctionReturn(0); 244952768537SHong Zhang } 245052768537SHong Zhang 2451f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 245242ee4b1aSHong Zhang { 245342ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2454dfbe8321SBarry Smith PetscErrorCode ierr; 245531ce2d13SHong Zhang PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2456e838b9e7SJed Brown PetscBLASInt one=1; 245742ee4b1aSHong Zhang 245842ee4b1aSHong Zhang PetscFunctionBegin; 245942ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2460f4df32b1SMatthew Knepley PetscScalar alpha = a; 2461c5df96a5SBarry Smith PetscBLASInt bnz; 2462c5df96a5SBarry Smith ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 24638b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2464a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2465ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2466ab784542SHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 246742ee4b1aSHong Zhang } else { 246852768537SHong Zhang Mat B; 246952768537SHong Zhang PetscInt *nnz; 247052768537SHong Zhang if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 247152768537SHong Zhang ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 247252768537SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 247352768537SHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 247452768537SHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 247552768537SHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 247652768537SHong Zhang ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 247752768537SHong Zhang ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 247852768537SHong Zhang ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 247952768537SHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 248028be2f97SBarry Smith ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 248152768537SHong Zhang ierr = PetscFree(nnz);CHKERRQ(ierr); 248242ee4b1aSHong Zhang } 248342ee4b1aSHong Zhang PetscFunctionReturn(0); 248442ee4b1aSHong Zhang } 248542ee4b1aSHong Zhang 24862726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 24872726fb6dSPierre Jolivet { 24882726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX) 24892726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24902726fb6dSPierre Jolivet PetscInt i,nz = a->bs2*a->i[a->mbs]; 24912726fb6dSPierre Jolivet MatScalar *aa = a->a; 24922726fb6dSPierre Jolivet 24932726fb6dSPierre Jolivet PetscFunctionBegin; 24942726fb6dSPierre Jolivet for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 24952726fb6dSPierre Jolivet #else 24962726fb6dSPierre Jolivet PetscFunctionBegin; 24972726fb6dSPierre Jolivet #endif 24982726fb6dSPierre Jolivet PetscFunctionReturn(0); 24992726fb6dSPierre Jolivet } 25002726fb6dSPierre Jolivet 250199cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 250299cafbc1SBarry Smith { 250399cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 250499cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2505dd6ea824SBarry Smith MatScalar *aa = a->a; 250699cafbc1SBarry Smith 250799cafbc1SBarry Smith PetscFunctionBegin; 250899cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 250999cafbc1SBarry Smith PetscFunctionReturn(0); 251099cafbc1SBarry Smith } 251199cafbc1SBarry Smith 251299cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 251399cafbc1SBarry Smith { 251499cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 251599cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2516dd6ea824SBarry Smith MatScalar *aa = a->a; 251799cafbc1SBarry Smith 251899cafbc1SBarry Smith PetscFunctionBegin; 251999cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 252099cafbc1SBarry Smith PetscFunctionReturn(0); 252199cafbc1SBarry Smith } 252299cafbc1SBarry Smith 25233acb8795SBarry Smith /* 25242479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 25253acb8795SBarry Smith */ 25261a83f524SJed Brown PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 25273acb8795SBarry Smith { 25283acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 25293acb8795SBarry Smith PetscErrorCode ierr; 25303acb8795SBarry Smith PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 25313acb8795SBarry Smith PetscInt nz = a->i[m],row,*jj,mr,col; 25323acb8795SBarry Smith 25333acb8795SBarry Smith PetscFunctionBegin; 25343acb8795SBarry Smith *nn = n; 25353acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 2536e7e72b3dSBarry Smith if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2537e7e72b3dSBarry Smith else { 2538b9e7e5c1SBarry Smith ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2539854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2540b9e7e5c1SBarry Smith ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 25413acb8795SBarry Smith jj = a->j; 25423acb8795SBarry Smith for (i=0; i<nz; i++) { 25433acb8795SBarry Smith collengths[jj[i]]++; 25443acb8795SBarry Smith } 25453acb8795SBarry Smith cia[0] = oshift; 25463acb8795SBarry Smith for (i=0; i<n; i++) { 25473acb8795SBarry Smith cia[i+1] = cia[i] + collengths[i]; 25483acb8795SBarry Smith } 2549580bdb30SBarry Smith ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 25503acb8795SBarry Smith jj = a->j; 25513acb8795SBarry Smith for (row=0; row<m; row++) { 25523acb8795SBarry Smith mr = a->i[row+1] - a->i[row]; 25533acb8795SBarry Smith for (i=0; i<mr; i++) { 25543acb8795SBarry Smith col = *jj++; 255526fbe8dcSKarl Rupp 25563acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 25573acb8795SBarry Smith } 25583acb8795SBarry Smith } 25593acb8795SBarry Smith ierr = PetscFree(collengths);CHKERRQ(ierr); 25603acb8795SBarry Smith *ia = cia; *ja = cja; 25613acb8795SBarry Smith } 25623acb8795SBarry Smith PetscFunctionReturn(0); 25633acb8795SBarry Smith } 25643acb8795SBarry Smith 25651a83f524SJed Brown PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 25663acb8795SBarry Smith { 25673acb8795SBarry Smith PetscErrorCode ierr; 25683acb8795SBarry Smith 25693acb8795SBarry Smith PetscFunctionBegin; 25703acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 25713acb8795SBarry Smith ierr = PetscFree(*ia);CHKERRQ(ierr); 25723acb8795SBarry Smith ierr = PetscFree(*ja);CHKERRQ(ierr); 25733acb8795SBarry Smith PetscFunctionReturn(0); 25743acb8795SBarry Smith } 25753acb8795SBarry Smith 2576525d23c0SHong Zhang /* 2577525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2578525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2579040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2580525d23c0SHong Zhang */ 2581525d23c0SHong Zhang PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2582f6d58c54SBarry Smith { 2583525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2584f6d58c54SBarry Smith PetscErrorCode ierr; 2585c0349474SHong Zhang PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2586525d23c0SHong Zhang PetscInt nz = a->i[m],row,*jj,mr,col; 2587525d23c0SHong Zhang PetscInt *cspidx; 2588f6d58c54SBarry Smith 2589f6d58c54SBarry Smith PetscFunctionBegin; 2590525d23c0SHong Zhang *nn = n; 2591525d23c0SHong Zhang if (!ia) PetscFunctionReturn(0); 2592f6d58c54SBarry Smith 2593b9e7e5c1SBarry Smith ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2594854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2595b9e7e5c1SBarry Smith ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2596b9e7e5c1SBarry Smith ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2597525d23c0SHong Zhang jj = a->j; 2598525d23c0SHong Zhang for (i=0; i<nz; i++) { 2599525d23c0SHong Zhang collengths[jj[i]]++; 2600f6d58c54SBarry Smith } 2601525d23c0SHong Zhang cia[0] = oshift; 2602525d23c0SHong Zhang for (i=0; i<n; i++) { 2603525d23c0SHong Zhang cia[i+1] = cia[i] + collengths[i]; 2604525d23c0SHong Zhang } 2605580bdb30SBarry Smith ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2606525d23c0SHong Zhang jj = a->j; 2607525d23c0SHong Zhang for (row=0; row<m; row++) { 2608525d23c0SHong Zhang mr = a->i[row+1] - a->i[row]; 2609525d23c0SHong Zhang for (i=0; i<mr; i++) { 2610525d23c0SHong Zhang col = *jj++; 2611525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2612525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2613525d23c0SHong Zhang } 2614525d23c0SHong Zhang } 2615525d23c0SHong Zhang ierr = PetscFree(collengths);CHKERRQ(ierr); 2616071fcb05SBarry Smith *ia = cia; 2617071fcb05SBarry Smith *ja = cja; 2618525d23c0SHong Zhang *spidx = cspidx; 2619525d23c0SHong Zhang PetscFunctionReturn(0); 2620f6d58c54SBarry Smith } 2621f6d58c54SBarry Smith 2622525d23c0SHong Zhang PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2623525d23c0SHong Zhang { 2624525d23c0SHong Zhang PetscErrorCode ierr; 2625f6d58c54SBarry Smith 2626525d23c0SHong Zhang PetscFunctionBegin; 2627525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2628525d23c0SHong Zhang ierr = PetscFree(*spidx);CHKERRQ(ierr); 2629f6d58c54SBarry Smith PetscFunctionReturn(0); 2630f6d58c54SBarry Smith } 263199cafbc1SBarry Smith 26327d68702bSBarry Smith PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 26337d68702bSBarry Smith { 26347d68702bSBarry Smith PetscErrorCode ierr; 26357d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 26367d68702bSBarry Smith 26377d68702bSBarry Smith PetscFunctionBegin; 26386f33a894SBarry Smith if (!Y->preallocated || !aij->nz) { 26397d68702bSBarry Smith ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 26407d68702bSBarry Smith } 26417d68702bSBarry Smith ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 26427d68702bSBarry Smith PetscFunctionReturn(0); 26437d68702bSBarry Smith } 26447d68702bSBarry Smith 26452593348eSBarry Smith /* -------------------------------------------------------------------*/ 26463964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2647cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2648cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2649cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 265097304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 26517c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 26527c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2653f4259b30SLisandro Dalcin NULL, 2654f4259b30SLisandro Dalcin NULL, 2655f4259b30SLisandro Dalcin NULL, 2656f4259b30SLisandro Dalcin /* 10*/ NULL, 2657cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2658f4259b30SLisandro Dalcin NULL, 2659f4259b30SLisandro Dalcin NULL, 2660f2501298SSatish Balay MatTranspose_SeqBAIJ, 266197304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 2662cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2663cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2664cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2665cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 2666f4259b30SLisandro Dalcin /* 20*/ NULL, 2667cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2668cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2669cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 2670d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 2671f4259b30SLisandro Dalcin NULL, 2672f4259b30SLisandro Dalcin NULL, 2673f4259b30SLisandro Dalcin NULL, 2674f4259b30SLisandro Dalcin NULL, 26754994cf47SJed Brown /* 29*/ MatSetUp_SeqBAIJ, 2676f4259b30SLisandro Dalcin NULL, 2677f4259b30SLisandro Dalcin NULL, 2678f4259b30SLisandro Dalcin NULL, 2679f4259b30SLisandro Dalcin NULL, 2680d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 2681f4259b30SLisandro Dalcin NULL, 2682f4259b30SLisandro Dalcin NULL, 2683cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2684f4259b30SLisandro Dalcin NULL, 2685d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 26867dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ, 2687cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2688cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 26893c896bc6SHong Zhang MatCopy_SeqBAIJ, 2690f4259b30SLisandro Dalcin /* 44*/ NULL, 2691cc2dc46cSBarry Smith MatScale_SeqBAIJ, 26927d68702bSBarry Smith MatShift_SeqBAIJ, 2693f4259b30SLisandro Dalcin NULL, 269497b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 2695f4259b30SLisandro Dalcin /* 49*/ NULL, 26963b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 269792c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 26983acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 26993acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 270093dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 2701f4259b30SLisandro Dalcin NULL, 2702f4259b30SLisandro Dalcin NULL, 2703090001bdSToby Isaac NULL, 2704d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 27057dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2706b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2707b9b97703SBarry Smith MatView_SeqBAIJ, 2708f4259b30SLisandro Dalcin NULL, 2709f4259b30SLisandro Dalcin NULL, 2710f4259b30SLisandro Dalcin /* 64*/ NULL, 2711f4259b30SLisandro Dalcin NULL, 2712f4259b30SLisandro Dalcin NULL, 2713f4259b30SLisandro Dalcin NULL, 2714f4259b30SLisandro Dalcin NULL, 2715d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2716f4259b30SLisandro Dalcin NULL, 2717c87e5d42SMatthew Knepley MatConvert_Basic, 2718f4259b30SLisandro Dalcin NULL, 2719f4259b30SLisandro Dalcin NULL, 2720f4259b30SLisandro Dalcin /* 74*/ NULL, 2721f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 2722f4259b30SLisandro Dalcin NULL, 2723f4259b30SLisandro Dalcin NULL, 2724f4259b30SLisandro Dalcin NULL, 2725f4259b30SLisandro Dalcin /* 79*/ NULL, 2726f4259b30SLisandro Dalcin NULL, 2727f4259b30SLisandro Dalcin NULL, 2728f4259b30SLisandro Dalcin NULL, 27295bba2384SShri Abhyankar MatLoad_SeqBAIJ, 2730f4259b30SLisandro Dalcin /* 84*/ NULL, 2731f4259b30SLisandro Dalcin NULL, 2732f4259b30SLisandro Dalcin NULL, 2733f4259b30SLisandro Dalcin NULL, 2734f4259b30SLisandro Dalcin NULL, 2735f4259b30SLisandro Dalcin /* 89*/ NULL, 2736f4259b30SLisandro Dalcin NULL, 2737f4259b30SLisandro Dalcin NULL, 2738f4259b30SLisandro Dalcin NULL, 2739f4259b30SLisandro Dalcin NULL, 2740f4259b30SLisandro Dalcin /* 94*/ NULL, 2741f4259b30SLisandro Dalcin NULL, 2742f4259b30SLisandro Dalcin NULL, 2743f4259b30SLisandro Dalcin NULL, 2744f4259b30SLisandro Dalcin NULL, 2745f4259b30SLisandro Dalcin /* 99*/ NULL, 2746f4259b30SLisandro Dalcin NULL, 2747f4259b30SLisandro Dalcin NULL, 27482726fb6dSPierre Jolivet MatConjugate_SeqBAIJ, 2749f4259b30SLisandro Dalcin NULL, 2750f4259b30SLisandro Dalcin /*104*/ NULL, 275199cafbc1SBarry Smith MatRealPart_SeqBAIJ, 27522af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 2753f4259b30SLisandro Dalcin NULL, 2754f4259b30SLisandro Dalcin NULL, 2755f4259b30SLisandro Dalcin /*109*/ NULL, 2756f4259b30SLisandro Dalcin NULL, 2757f4259b30SLisandro Dalcin NULL, 2758f4259b30SLisandro Dalcin NULL, 2759547795f9SHong Zhang MatMissingDiagonal_SeqBAIJ, 2760f4259b30SLisandro Dalcin /*114*/ NULL, 2761f4259b30SLisandro Dalcin NULL, 2762f4259b30SLisandro Dalcin NULL, 2763f4259b30SLisandro Dalcin NULL, 2764f4259b30SLisandro Dalcin NULL, 2765f4259b30SLisandro Dalcin /*119*/ NULL, 2766f4259b30SLisandro Dalcin NULL, 2767547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 2768d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 2769f4259b30SLisandro Dalcin NULL, 2770f4259b30SLisandro Dalcin /*124*/ NULL, 2771*9463ebdaSPierre Jolivet MatGetColumnNorms_SeqBAIJ, 27723964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 2773f4259b30SLisandro Dalcin NULL, 2774f4259b30SLisandro Dalcin NULL, 2775f4259b30SLisandro Dalcin /*129*/ NULL, 2776f4259b30SLisandro Dalcin NULL, 2777f4259b30SLisandro Dalcin NULL, 2778f4259b30SLisandro Dalcin NULL, 2779f4259b30SLisandro Dalcin NULL, 2780f4259b30SLisandro Dalcin /*134*/ NULL, 2781f4259b30SLisandro Dalcin NULL, 2782f4259b30SLisandro Dalcin NULL, 2783f4259b30SLisandro Dalcin NULL, 2784f4259b30SLisandro Dalcin NULL, 278546533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 2786f4259b30SLisandro Dalcin NULL, 2787f4259b30SLisandro Dalcin NULL, 2788bdf6f3fcSHong Zhang MatFDColoringSetUp_SeqXAIJ, 2789f4259b30SLisandro Dalcin NULL, 279086e85357SHong Zhang /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 279186e85357SHong Zhang MatDestroySubMatrices_SeqBAIJ 279299cafbc1SBarry Smith }; 27932593348eSBarry Smith 27947087cfbeSBarry Smith PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 27953e90b805SBarry Smith { 27963e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 27978ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2798dfbe8321SBarry Smith PetscErrorCode ierr; 27993e90b805SBarry Smith 28003e90b805SBarry Smith PetscFunctionBegin; 2801e7e72b3dSBarry Smith if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 28023e90b805SBarry Smith 28033e90b805SBarry Smith /* allocate space for values if not already there */ 28043e90b805SBarry Smith if (!aij->saved_values) { 2805854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 28063bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 28073e90b805SBarry Smith } 28083e90b805SBarry Smith 28093e90b805SBarry Smith /* copy values over */ 2810580bdb30SBarry Smith ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 28113e90b805SBarry Smith PetscFunctionReturn(0); 28123e90b805SBarry Smith } 28133e90b805SBarry Smith 28147087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 28153e90b805SBarry Smith { 28163e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 28176849ba73SBarry Smith PetscErrorCode ierr; 28188ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs]*aij->bs2; 28193e90b805SBarry Smith 28203e90b805SBarry Smith PetscFunctionBegin; 2821e7e72b3dSBarry Smith if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2822e7e72b3dSBarry Smith if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 28233e90b805SBarry Smith 28243e90b805SBarry Smith /* copy values over */ 2825580bdb30SBarry Smith ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 28263e90b805SBarry Smith PetscFunctionReturn(0); 28273e90b805SBarry Smith } 28283e90b805SBarry Smith 2829cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2830cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2831273d9f13SBarry Smith 2832b5b72c8aSIrina Sokolova PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2833a23d5eceSKris Buschelman { 2834a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 28356849ba73SBarry Smith PetscErrorCode ierr; 2836535b19f3SBarry Smith PetscInt i,mbs,nbs,bs2; 28378afaa268SBarry Smith PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2838a23d5eceSKris Buschelman 2839a23d5eceSKris Buschelman PetscFunctionBegin; 28402576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2841ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 2842ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 2843ab93d7beSBarry Smith nz = 0; 2844ab93d7beSBarry Smith } 28458c07d4e3SBarry Smith 284633d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 284726283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 284826283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2849e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2850899cda47SBarry Smith 2851899cda47SBarry Smith B->preallocated = PETSC_TRUE; 2852899cda47SBarry Smith 2853d0f46423SBarry Smith mbs = B->rmap->n/bs; 2854d0f46423SBarry Smith nbs = B->cmap->n/bs; 2855a23d5eceSKris Buschelman bs2 = bs*bs; 2856a23d5eceSKris Buschelman 285765e19b50SBarry Smith if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2858a23d5eceSKris Buschelman 2859a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2860e32f2f54SBarry Smith if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2861a23d5eceSKris Buschelman if (nnz) { 2862a23d5eceSKris Buschelman for (i=0; i<mbs; i++) { 2863e32f2f54SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2864e32f2f54SBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2865a23d5eceSKris Buschelman } 2866a23d5eceSKris Buschelman } 2867a23d5eceSKris Buschelman 2868a23d5eceSKris Buschelman b = (Mat_SeqBAIJ*)B->data; 2869ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 28708afaa268SBarry Smith ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 28718c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 28728c07d4e3SBarry Smith 2873a23d5eceSKris Buschelman if (!flg) { 2874a23d5eceSKris Buschelman switch (bs) { 2875a23d5eceSKris Buschelman case 1: 2876a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 2877a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2878a23d5eceSKris Buschelman break; 2879a23d5eceSKris Buschelman case 2: 2880a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 2881a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2882a23d5eceSKris Buschelman break; 2883a23d5eceSKris Buschelman case 3: 2884a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 2885a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2886a23d5eceSKris Buschelman break; 2887a23d5eceSKris Buschelman case 4: 2888a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 2889a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2890a23d5eceSKris Buschelman break; 2891a23d5eceSKris Buschelman case 5: 2892a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 2893a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2894a23d5eceSKris Buschelman break; 2895a23d5eceSKris Buschelman case 6: 2896a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 2897a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2898a23d5eceSKris Buschelman break; 2899a23d5eceSKris Buschelman case 7: 2900a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 2901a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2902a23d5eceSKris Buschelman break; 290396e086a2SDaniel Kokron case 9: 29046679dcc1SBarry Smith { 29056679dcc1SBarry Smith PetscInt version = 1; 29066679dcc1SBarry Smith ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 29076679dcc1SBarry Smith switch (version) { 29085f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 29096679dcc1SBarry Smith case 1: 291096e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 291196e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 29121e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 29136679dcc1SBarry Smith break; 29146679dcc1SBarry Smith #endif 29156679dcc1SBarry Smith default: 291696e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N; 291796e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N; 29181e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 291996e086a2SDaniel Kokron break; 29206679dcc1SBarry Smith } 29216679dcc1SBarry Smith break; 29226679dcc1SBarry Smith } 2923ebada01fSBarry Smith case 11: 2924ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11; 2925ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2926ebada01fSBarry Smith break; 29276679dcc1SBarry Smith case 12: 29286679dcc1SBarry Smith { 29296679dcc1SBarry Smith PetscInt version = 1; 29306679dcc1SBarry Smith ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 29316679dcc1SBarry Smith switch (version) { 29326679dcc1SBarry Smith case 1: 29336679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1; 29346679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 29351e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29368ab949d8SShri Abhyankar break; 29376679dcc1SBarry Smith case 2: 29386679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2; 29396679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 29401e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29416679dcc1SBarry Smith break; 29426679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 29436679dcc1SBarry Smith case 3: 29446679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 29456679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 29461e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 29476679dcc1SBarry Smith break; 29486679dcc1SBarry Smith #endif 2949a23d5eceSKris Buschelman default: 2950a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 2951a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 29521e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 29536679dcc1SBarry Smith break; 29546679dcc1SBarry Smith } 29556679dcc1SBarry Smith break; 29566679dcc1SBarry Smith } 29576679dcc1SBarry Smith case 15: 29586679dcc1SBarry Smith { 29596679dcc1SBarry Smith PetscInt version = 1; 29606679dcc1SBarry Smith ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 29616679dcc1SBarry Smith switch (version) { 29626679dcc1SBarry Smith case 1: 29636679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1; 29641e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29656679dcc1SBarry Smith break; 29666679dcc1SBarry Smith case 2: 29676679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2; 29681e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29696679dcc1SBarry Smith break; 29706679dcc1SBarry Smith case 3: 29716679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3; 29721e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29736679dcc1SBarry Smith break; 29746679dcc1SBarry Smith case 4: 29756679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4; 29761e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29776679dcc1SBarry Smith break; 29786679dcc1SBarry Smith default: 29796679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 29801e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 29816679dcc1SBarry Smith break; 29826679dcc1SBarry Smith } 29836679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 29846679dcc1SBarry Smith break; 29856679dcc1SBarry Smith } 29866679dcc1SBarry Smith default: 29876679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 29886679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 29891e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2990a23d5eceSKris Buschelman break; 2991a23d5eceSKris Buschelman } 2992a23d5eceSKris Buschelman } 2993e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 2994a23d5eceSKris Buschelman b->mbs = mbs; 2995a23d5eceSKris Buschelman b->nbs = nbs; 2996ab93d7beSBarry Smith if (!skipallocation) { 29972ee49352SLisandro Dalcin if (!b->imax) { 2998dcca6d9dSJed Brown ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 29993bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 300026fbe8dcSKarl Rupp 30014fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 30022ee49352SLisandro Dalcin } 3003ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 300426fbe8dcSKarl Rupp for (i=0; i<mbs; i++) b->ilen[i] = 0; 3005a23d5eceSKris Buschelman if (!nnz) { 3006a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3007c62bd62aSJed Brown else if (nz < 0) nz = 1; 30085d2a9ed1SStefano Zampini nz = PetscMin(nz,nbs); 3009a23d5eceSKris Buschelman for (i=0; i<mbs; i++) b->imax[i] = nz; 301061778c46SBarry Smith ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 3011a23d5eceSKris Buschelman } else { 3012c73702f5SBarry Smith PetscInt64 nz64 = 0; 3013c73702f5SBarry Smith for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 3014c73702f5SBarry Smith ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 3015a23d5eceSKris Buschelman } 3016a23d5eceSKris Buschelman 3017a23d5eceSKris Buschelman /* allocate the matrix space */ 30182ee49352SLisandro Dalcin ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3019672ba085SHong Zhang if (B->structure_only) { 3020672ba085SHong Zhang ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 3021672ba085SHong Zhang ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 3022672ba085SHong Zhang ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 3023672ba085SHong Zhang } else { 30246679dcc1SBarry Smith PetscInt nzbs2 = 0; 30256679dcc1SBarry Smith ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr); 30266679dcc1SBarry Smith ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 30273bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3028580bdb30SBarry Smith ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 3029672ba085SHong Zhang } 3030580bdb30SBarry Smith ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 303126fbe8dcSKarl Rupp 3032672ba085SHong Zhang if (B->structure_only) { 3033672ba085SHong Zhang b->singlemalloc = PETSC_FALSE; 3034672ba085SHong Zhang b->free_a = PETSC_FALSE; 3035672ba085SHong Zhang } else { 3036a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 3037672ba085SHong Zhang b->free_a = PETSC_TRUE; 3038672ba085SHong Zhang } 3039672ba085SHong Zhang b->free_ij = PETSC_TRUE; 3040672ba085SHong Zhang 3041a23d5eceSKris Buschelman b->i[0] = 0; 3042a23d5eceSKris Buschelman for (i=1; i<mbs+1; i++) { 3043a23d5eceSKris Buschelman b->i[i] = b->i[i-1] + b->imax[i-1]; 3044a23d5eceSKris Buschelman } 3045672ba085SHong Zhang 3046e811da20SHong Zhang } else { 3047e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3048e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3049ab93d7beSBarry Smith } 3050a23d5eceSKris Buschelman 3051a23d5eceSKris Buschelman b->bs2 = bs2; 3052a23d5eceSKris Buschelman b->mbs = mbs; 3053a23d5eceSKris Buschelman b->nz = 0; 3054b32cb4a7SJed Brown b->maxnz = nz; 3055b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3056cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 3057cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 30582576faa2SJed Brown if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3059a23d5eceSKris Buschelman PetscFunctionReturn(0); 3060a23d5eceSKris Buschelman } 3061a23d5eceSKris Buschelman 3062cf12db73SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3063725b52f3SLisandro Dalcin { 3064725b52f3SLisandro Dalcin PetscInt i,m,nz,nz_max=0,*nnz; 3065f4259b30SLisandro Dalcin PetscScalar *values=NULL; 3066d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3067725b52f3SLisandro Dalcin PetscErrorCode ierr; 3068725b52f3SLisandro Dalcin 3069725b52f3SLisandro Dalcin PetscFunctionBegin; 3070e32f2f54SBarry Smith if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 307126283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 307226283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 307326283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 307426283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3075e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3076d0f46423SBarry Smith m = B->rmap->n/bs; 3077725b52f3SLisandro Dalcin 307826fbe8dcSKarl Rupp if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3079854ce69bSBarry Smith ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3080725b52f3SLisandro Dalcin for (i=0; i<m; i++) { 3081cf12db73SBarry Smith nz = ii[i+1]- ii[i]; 308226fbe8dcSKarl Rupp if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3083725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3084725b52f3SLisandro Dalcin nnz[i] = nz; 3085725b52f3SLisandro Dalcin } 3086725b52f3SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3087725b52f3SLisandro Dalcin ierr = PetscFree(nnz);CHKERRQ(ierr); 3088725b52f3SLisandro Dalcin 3089725b52f3SLisandro Dalcin values = (PetscScalar*)V; 3090725b52f3SLisandro Dalcin if (!values) { 30911795a4d1SJed Brown ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3092725b52f3SLisandro Dalcin } 3093725b52f3SLisandro Dalcin for (i=0; i<m; i++) { 3094cf12db73SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 3095cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3096bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { 3097cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3098725b52f3SLisandro Dalcin ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 30993adadaf3SJed Brown } else { 31003adadaf3SJed Brown PetscInt j; 31013adadaf3SJed Brown for (j=0; j<ncols; j++) { 31023adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3103d47bf9aaSJed Brown ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 31043adadaf3SJed Brown } 31053adadaf3SJed Brown } 3106725b52f3SLisandro Dalcin } 3107725b52f3SLisandro Dalcin if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3108725b52f3SLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109725b52f3SLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31107827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3111725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3112725b52f3SLisandro Dalcin } 3113725b52f3SLisandro Dalcin 3114cda14afcSprj- /*@C 3115cda14afcSprj- MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3116cda14afcSprj- 3117cda14afcSprj- Not Collective 3118cda14afcSprj- 3119cda14afcSprj- Input Parameter: 3120cda14afcSprj- . mat - a MATSEQBAIJ matrix 3121cda14afcSprj- 3122cda14afcSprj- Output Parameter: 3123cda14afcSprj- . array - pointer to the data 3124cda14afcSprj- 3125cda14afcSprj- Level: intermediate 3126cda14afcSprj- 3127cda14afcSprj- .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3128cda14afcSprj- @*/ 3129cda14afcSprj- PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3130cda14afcSprj- { 3131cda14afcSprj- PetscErrorCode ierr; 3132cda14afcSprj- 3133cda14afcSprj- PetscFunctionBegin; 3134cda14afcSprj- ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3135cda14afcSprj- PetscFunctionReturn(0); 3136cda14afcSprj- } 3137cda14afcSprj- 3138cda14afcSprj- /*@C 3139cda14afcSprj- MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3140cda14afcSprj- 3141cda14afcSprj- Not Collective 3142cda14afcSprj- 3143cda14afcSprj- Input Parameters: 3144cda14afcSprj- + mat - a MATSEQBAIJ matrix 3145cda14afcSprj- - array - pointer to the data 3146cda14afcSprj- 3147cda14afcSprj- Level: intermediate 3148cda14afcSprj- 3149cda14afcSprj- .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3150cda14afcSprj- @*/ 3151cda14afcSprj- PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3152cda14afcSprj- { 3153cda14afcSprj- PetscErrorCode ierr; 3154cda14afcSprj- 3155cda14afcSprj- PetscFunctionBegin; 3156cda14afcSprj- ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3157cda14afcSprj- PetscFunctionReturn(0); 3158cda14afcSprj- } 3159cda14afcSprj- 31600bad9183SKris Buschelman /*MC 3161fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 31620bad9183SKris Buschelman block sparse compressed row format. 31630bad9183SKris Buschelman 31640bad9183SKris Buschelman Options Database Keys: 31656679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 31666679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 31670bad9183SKris Buschelman 31680bad9183SKris Buschelman Level: beginner 31690cd7f59aSBarry Smith 31700cd7f59aSBarry Smith Notes: 31710cd7f59aSBarry Smith MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 31720cd7f59aSBarry Smith space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 31730bad9183SKris Buschelman 31746679dcc1SBarry Smith Run with -info to see what version of the matrix-vector product is being used 31756679dcc1SBarry Smith 3176f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ() 31770bad9183SKris Buschelman M*/ 31780bad9183SKris Buschelman 3179cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3180b24902e0SBarry Smith 31818cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 31822593348eSBarry Smith { 3183dfbe8321SBarry Smith PetscErrorCode ierr; 3184c1ac3661SBarry Smith PetscMPIInt size; 3185b6490206SBarry Smith Mat_SeqBAIJ *b; 31863b2fbd54SBarry Smith 31873a40ed3dSBarry Smith PetscFunctionBegin; 3188ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3189e32f2f54SBarry Smith if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3190b6490206SBarry Smith 3191b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3192b0a32e0cSBarry Smith B->data = (void*)b; 3193549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 319426fbe8dcSKarl Rupp 3195f4259b30SLisandro Dalcin b->row = NULL; 3196f4259b30SLisandro Dalcin b->col = NULL; 3197f4259b30SLisandro Dalcin b->icol = NULL; 31982593348eSBarry Smith b->reallocs = 0; 3199f4259b30SLisandro Dalcin b->saved_values = NULL; 32002593348eSBarry Smith 3201c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 32022593348eSBarry Smith b->nonew = 0; 3203f4259b30SLisandro Dalcin b->diag = NULL; 3204f4259b30SLisandro Dalcin B->spptr = NULL; 3205b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3206a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 32074e220ebcSLois Curfman McInnes 3208cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3209cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3210bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3211bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3212bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3213bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3214bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3215bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3216bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3217bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3218bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 32197ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 3220c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 32217ea3e4caSstefano_zampini #endif 3222c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 322317667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 32243a40ed3dSBarry Smith PetscFunctionReturn(0); 32252593348eSBarry Smith } 32262593348eSBarry Smith 3227ace3abfcSBarry Smith PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 32282593348eSBarry Smith { 3229b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 32306849ba73SBarry Smith PetscErrorCode ierr; 3231a96a251dSBarry Smith PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3232de6a44a3SBarry Smith 32333a40ed3dSBarry Smith PetscFunctionBegin; 3234e32f2f54SBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 32352593348eSBarry Smith 32364fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 32374fd072dbSBarry Smith c->imax = a->imax; 32384fd072dbSBarry Smith c->ilen = a->ilen; 32394fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 32404fd072dbSBarry Smith } else { 3241dcca6d9dSJed Brown ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 32423bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3243b6490206SBarry Smith for (i=0; i<mbs; i++) { 32442593348eSBarry Smith c->imax[i] = a->imax[i]; 32452593348eSBarry Smith c->ilen[i] = a->ilen[i]; 32462593348eSBarry Smith } 32474fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 32484fd072dbSBarry Smith } 32492593348eSBarry Smith 32502593348eSBarry Smith /* allocate the matrix space */ 325116a2bf60SHong Zhang if (mallocmatspace) { 32524fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 32531795a4d1SJed Brown ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 32543bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 325526fbe8dcSKarl Rupp 32564fd072dbSBarry Smith c->i = a->i; 32574fd072dbSBarry Smith c->j = a->j; 3258379be0ddSLisandro Dalcin c->singlemalloc = PETSC_FALSE; 3259379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 3260379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 32614fd072dbSBarry Smith c->parent = A; 32621e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 32631e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 326426fbe8dcSKarl Rupp 32654fd072dbSBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 32664fd072dbSBarry Smith ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 32674fd072dbSBarry Smith ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 32684fd072dbSBarry Smith } else { 3269dcca6d9dSJed Brown ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 32703bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 327126fbe8dcSKarl Rupp 3272c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 3273379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 32744fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 327526fbe8dcSKarl Rupp 3276580bdb30SBarry Smith ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3277b6490206SBarry Smith if (mbs > 0) { 3278580bdb30SBarry Smith ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 32792e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 3280580bdb30SBarry Smith ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 32812e8a6d31SBarry Smith } else { 3282580bdb30SBarry Smith ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 32832593348eSBarry Smith } 32842593348eSBarry Smith } 32851e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 32861e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 328716a2bf60SHong Zhang } 32884fd072dbSBarry Smith } 328916a2bf60SHong Zhang 32902593348eSBarry Smith c->roworiented = a->roworiented; 32912593348eSBarry Smith c->nonew = a->nonew; 329226fbe8dcSKarl Rupp 32931e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 32941e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 329526fbe8dcSKarl Rupp 32965c9eb25fSBarry Smith c->bs2 = a->bs2; 32975c9eb25fSBarry Smith c->mbs = a->mbs; 32985c9eb25fSBarry Smith c->nbs = a->nbs; 32992593348eSBarry Smith 33002593348eSBarry Smith if (a->diag) { 33014fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 33024fd072dbSBarry Smith c->diag = a->diag; 33034fd072dbSBarry Smith c->free_diag = PETSC_FALSE; 33044fd072dbSBarry Smith } else { 3305854ce69bSBarry Smith ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 33063bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 330726fbe8dcSKarl Rupp for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 33084fd072dbSBarry Smith c->free_diag = PETSC_TRUE; 33094fd072dbSBarry Smith } 3310f4259b30SLisandro Dalcin } else c->diag = NULL; 331126fbe8dcSKarl Rupp 33122593348eSBarry Smith c->nz = a->nz; 3313f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3314f361c04dSBarry Smith c->solve_work = NULL; 3315f361c04dSBarry Smith c->mult_work = NULL; 3316f361c04dSBarry Smith c->sor_workt = NULL; 3317f361c04dSBarry Smith c->sor_work = NULL; 331888e51ccdSHong Zhang 331988e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 332088e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3321cd6b891eSBarry Smith if (a->compressedrow.use) { 332288e51ccdSHong Zhang i = a->compressedrow.nrows; 3323dcca6d9dSJed Brown ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 33243bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3325580bdb30SBarry Smith ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3326580bdb30SBarry Smith ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 332788e51ccdSHong Zhang } else { 332888e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 33290298fd71SBarry Smith c->compressedrow.i = NULL; 33300298fd71SBarry Smith c->compressedrow.rindex = NULL; 333188e51ccdSHong Zhang } 3332e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate; 333326fbe8dcSKarl Rupp 3334140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 33353a40ed3dSBarry Smith PetscFunctionReturn(0); 33362593348eSBarry Smith } 33372593348eSBarry Smith 3338b24902e0SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3339b24902e0SBarry Smith { 3340b24902e0SBarry Smith PetscErrorCode ierr; 3341b24902e0SBarry Smith 3342b24902e0SBarry Smith PetscFunctionBegin; 3343ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3344d0f46423SBarry Smith ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 33455c9eb25fSBarry Smith ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 334698ad0f72SJed Brown ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3347b24902e0SBarry Smith PetscFunctionReturn(0); 3348b24902e0SBarry Smith } 3349b24902e0SBarry Smith 3350618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3351b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3352f501eaabSShri Abhyankar { 3353b51a4376SLisandro Dalcin PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3354b51a4376SLisandro Dalcin PetscInt *rowidxs,*colidxs; 3355b51a4376SLisandro Dalcin PetscScalar *matvals; 3356f501eaabSShri Abhyankar PetscErrorCode ierr; 3357b51a4376SLisandro Dalcin 3358b51a4376SLisandro Dalcin PetscFunctionBegin; 3359b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3360b51a4376SLisandro Dalcin 3361b51a4376SLisandro Dalcin /* read matrix header */ 3362b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3363b51a4376SLisandro Dalcin if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3364b51a4376SLisandro Dalcin M = header[1]; N = header[2]; nz = header[3]; 3365b51a4376SLisandro Dalcin if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3366b51a4376SLisandro Dalcin if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3367b51a4376SLisandro Dalcin if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3368b51a4376SLisandro Dalcin 3369b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 3370b51a4376SLisandro Dalcin ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3371b51a4376SLisandro Dalcin /* set local and global sizes if not set already */ 3372b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M; 3373b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N; 3374b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3375b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 3376b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3377b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3378b51a4376SLisandro Dalcin 3379b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 3380b51a4376SLisandro Dalcin ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3381b51a4376SLisandro Dalcin if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3382b51a4376SLisandro Dalcin ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3383b51a4376SLisandro Dalcin ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3384b51a4376SLisandro Dalcin mbs = m/bs; nbs = n/bs; 3385b51a4376SLisandro Dalcin 3386b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */ 3387b51a4376SLisandro Dalcin ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3388b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3389b51a4376SLisandro Dalcin rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3390b51a4376SLisandro Dalcin sum = rowidxs[m]; 3391b51a4376SLisandro Dalcin if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3392b51a4376SLisandro Dalcin 3393b51a4376SLisandro Dalcin /* read in column indices and nonzero values */ 3394b51a4376SLisandro Dalcin ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3395b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3396b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3397b51a4376SLisandro Dalcin 3398b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3399b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */ 3400b51a4376SLisandro Dalcin PetscInt *nnz; 3401618cc2edSLisandro Dalcin PetscBool sbaij; 3402b51a4376SLisandro Dalcin 3403b51a4376SLisandro Dalcin ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3404b51a4376SLisandro Dalcin ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3405618cc2edSLisandro Dalcin ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3406b51a4376SLisandro Dalcin for (i=0; i<mbs; i++) { 3407b51a4376SLisandro Dalcin ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3408618cc2edSLisandro Dalcin for (k=0; k<bs; k++) { 3409618cc2edSLisandro Dalcin PetscInt row = bs*i + k; 3410618cc2edSLisandro Dalcin for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3411618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3412618cc2edSLisandro Dalcin if (!sbaij || col >= row) 3413618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3414618cc2edSLisandro Dalcin } 3415618cc2edSLisandro Dalcin } 3416b51a4376SLisandro Dalcin } 3417b51a4376SLisandro Dalcin ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3418b51a4376SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3419618cc2edSLisandro Dalcin ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3420b51a4376SLisandro Dalcin ierr = PetscFree(nnz);CHKERRQ(ierr); 3421b51a4376SLisandro Dalcin } 3422b51a4376SLisandro Dalcin 3423b51a4376SLisandro Dalcin /* store matrix values */ 3424b51a4376SLisandro Dalcin for (i=0; i<m; i++) { 3425b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3426618cc2edSLisandro Dalcin ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3427b51a4376SLisandro Dalcin } 3428b51a4376SLisandro Dalcin 3429b51a4376SLisandro Dalcin ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3430b51a4376SLisandro Dalcin ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3431b51a4376SLisandro Dalcin ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3432b51a4376SLisandro Dalcin ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3433b51a4376SLisandro Dalcin PetscFunctionReturn(0); 3434b51a4376SLisandro Dalcin } 3435b51a4376SLisandro Dalcin 3436b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3437b51a4376SLisandro Dalcin { 3438b51a4376SLisandro Dalcin PetscErrorCode ierr; 34397f489da9SVaclav Hapla PetscBool isbinary; 3440f501eaabSShri Abhyankar 3441f501eaabSShri Abhyankar PetscFunctionBegin; 34427f489da9SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3443b51a4376SLisandro Dalcin if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3444b51a4376SLisandro Dalcin ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3445f501eaabSShri Abhyankar PetscFunctionReturn(0); 3446f501eaabSShri Abhyankar } 3447f501eaabSShri Abhyankar 3448273d9f13SBarry Smith /*@C 3449273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3450273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 3451273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3452273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3453273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 34542593348eSBarry Smith 3455d083f849SBarry Smith Collective 3456273d9f13SBarry Smith 3457273d9f13SBarry Smith Input Parameters: 3458273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 3459bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3460bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3461273d9f13SBarry Smith . m - number of rows 3462273d9f13SBarry Smith . n - number of columns 346335d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 346435d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 34650298fd71SBarry Smith (possibly different for each block row) or NULL 3466273d9f13SBarry Smith 3467273d9f13SBarry Smith Output Parameter: 3468273d9f13SBarry Smith . A - the matrix 3469273d9f13SBarry Smith 3470175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3471f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 3472175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3473175b88e8SBarry Smith 3474273d9f13SBarry Smith Options Database Keys: 3475a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 3476273d9f13SBarry Smith block calculations (much slower) 3477a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3478273d9f13SBarry Smith 3479273d9f13SBarry Smith Level: intermediate 3480273d9f13SBarry Smith 3481273d9f13SBarry Smith Notes: 3482d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3483d1be2dadSMatthew Knepley 348449a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 348549a6f317SBarry Smith 348635d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 348735d8aa7fSBarry Smith 3488273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3489273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3490273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3491273d9f13SBarry Smith 3492273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 34930298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3494a7f22e61SSatish Balay allocation. See Users-Manual: ch_mat for details. 3495273d9f13SBarry Smith matrices. 3496273d9f13SBarry Smith 349769b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3498273d9f13SBarry Smith @*/ 34997087cfbeSBarry Smith PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3500273d9f13SBarry Smith { 3501dfbe8321SBarry Smith PetscErrorCode ierr; 3502273d9f13SBarry Smith 3503273d9f13SBarry Smith PetscFunctionBegin; 3504f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3505f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3506273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3507367daffbSBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3508273d9f13SBarry Smith PetscFunctionReturn(0); 3509273d9f13SBarry Smith } 3510273d9f13SBarry Smith 3511273d9f13SBarry Smith /*@C 3512273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3513273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 3514273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3515273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3516273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 3517273d9f13SBarry Smith 3518d083f849SBarry Smith Collective 3519273d9f13SBarry Smith 3520273d9f13SBarry Smith Input Parameters: 35211c4f3114SJed Brown + B - the matrix 3522bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3523bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3524273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3525273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 35260298fd71SBarry Smith (possibly different for each block row) or NULL 3527273d9f13SBarry Smith 3528273d9f13SBarry Smith Options Database Keys: 3529a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 3530273d9f13SBarry Smith block calculations (much slower) 3531a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3532273d9f13SBarry Smith 3533273d9f13SBarry Smith Level: intermediate 3534273d9f13SBarry Smith 3535273d9f13SBarry Smith Notes: 353649a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 353749a6f317SBarry Smith 3538aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3539aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3540aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3541aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3542aa95bbe8SBarry Smith 3543273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3544273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3545273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3546273d9f13SBarry Smith 3547273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 35480298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3549a7f22e61SSatish Balay allocation. See Users-Manual: ch_mat for details. 3550273d9f13SBarry Smith 355169b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3552273d9f13SBarry Smith @*/ 35537087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3554273d9f13SBarry Smith { 35554ac538c5SBarry Smith PetscErrorCode ierr; 3556273d9f13SBarry Smith 3557273d9f13SBarry Smith PetscFunctionBegin; 35586ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 35596ba663aaSJed Brown PetscValidType(B,1); 35606ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 35614ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3562273d9f13SBarry Smith PetscFunctionReturn(0); 3563273d9f13SBarry Smith } 3564a1d92eedSBarry Smith 3565725b52f3SLisandro Dalcin /*@C 3566664954b6SBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3567725b52f3SLisandro Dalcin 3568d083f849SBarry Smith Collective 3569725b52f3SLisandro Dalcin 3570725b52f3SLisandro Dalcin Input Parameters: 35711c4f3114SJed Brown + B - the matrix 3572725b52f3SLisandro Dalcin . i - the indices into j for the start of each local row (starts with zero) 3573725b52f3SLisandro Dalcin . j - the column indices for each local row (starts with zero) these must be sorted for each row 3574725b52f3SLisandro Dalcin - v - optional values in the matrix 3575725b52f3SLisandro Dalcin 3576664954b6SBarry Smith Level: advanced 3577725b52f3SLisandro Dalcin 35783adadaf3SJed Brown Notes: 35793adadaf3SJed Brown The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 35803adadaf3SJed Brown may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 35813adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 35823adadaf3SJed Brown MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 35833adadaf3SJed Brown block column and the second index is over columns within a block. 35843adadaf3SJed Brown 3585664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3586664954b6SBarry Smith 3587725b52f3SLisandro Dalcin .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3588725b52f3SLisandro Dalcin @*/ 35897087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3590725b52f3SLisandro Dalcin { 35914ac538c5SBarry Smith PetscErrorCode ierr; 3592725b52f3SLisandro Dalcin 3593725b52f3SLisandro Dalcin PetscFunctionBegin; 35946ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 35956ba663aaSJed Brown PetscValidType(B,1); 35966ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 35974ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3598725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3599725b52f3SLisandro Dalcin } 3600725b52f3SLisandro Dalcin 3601c75a6043SHong Zhang /*@ 3602dfb205c3SBarry Smith MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3603c75a6043SHong Zhang 3604d083f849SBarry Smith Collective 3605c75a6043SHong Zhang 3606c75a6043SHong Zhang Input Parameters: 3607c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3608c75a6043SHong Zhang . bs - size of block 3609c75a6043SHong Zhang . m - number of rows 3610c75a6043SHong Zhang . n - number of columns 3611483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3612c75a6043SHong Zhang . j - column indices 3613c75a6043SHong Zhang - a - matrix values 3614c75a6043SHong Zhang 3615c75a6043SHong Zhang Output Parameter: 3616c75a6043SHong Zhang . mat - the matrix 3617c75a6043SHong Zhang 3618dfb205c3SBarry Smith Level: advanced 3619c75a6043SHong Zhang 3620c75a6043SHong Zhang Notes: 3621c75a6043SHong Zhang The i, j, and a arrays are not copied by this routine, the user must free these arrays 3622c75a6043SHong Zhang once the matrix is destroyed 3623c75a6043SHong Zhang 3624c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3625c75a6043SHong Zhang 3626c75a6043SHong Zhang The i and j indices are 0 based 3627c75a6043SHong Zhang 3628dfb205c3SBarry Smith When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3629dfb205c3SBarry Smith 36303adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 36313adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 36323adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 36333adadaf3SJed Brown with column-major ordering within blocks. 3634dfb205c3SBarry Smith 363569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3636c75a6043SHong Zhang 3637c75a6043SHong Zhang @*/ 3638c3c607ccSBarry Smith PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3639c75a6043SHong Zhang { 3640c75a6043SHong Zhang PetscErrorCode ierr; 3641c75a6043SHong Zhang PetscInt ii; 3642c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3643c75a6043SHong Zhang 3644c75a6043SHong Zhang PetscFunctionBegin; 3645e32f2f54SBarry Smith if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 364641096f02SStefano Zampini if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3647c75a6043SHong Zhang 3648c75a6043SHong Zhang ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3649c75a6043SHong Zhang ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3650c75a6043SHong Zhang ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3651f4259b30SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3652c75a6043SHong Zhang baij = (Mat_SeqBAIJ*)(*mat)->data; 3653dcca6d9dSJed Brown ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 36543bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3655c75a6043SHong Zhang 3656c75a6043SHong Zhang baij->i = i; 3657c75a6043SHong Zhang baij->j = j; 3658c75a6043SHong Zhang baij->a = a; 365926fbe8dcSKarl Rupp 3660c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 3661c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3662e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3663e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3664c75a6043SHong Zhang 3665c75a6043SHong Zhang for (ii=0; ii<m; ii++) { 3666c75a6043SHong Zhang baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3667cf9c20a2SJed Brown if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3668c75a6043SHong Zhang } 366976bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 3670c75a6043SHong Zhang for (ii=0; ii<baij->i[m]; ii++) { 3671e32f2f54SBarry Smith if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3672e32f2f54SBarry Smith if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3673c75a6043SHong Zhang } 367476bd3646SJed Brown } 3675c75a6043SHong Zhang 3676c75a6043SHong Zhang ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3677c75a6043SHong Zhang ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3678c75a6043SHong Zhang PetscFunctionReturn(0); 3679c75a6043SHong Zhang } 3680bdf6f3fcSHong Zhang 3681bdf6f3fcSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3682bdf6f3fcSHong Zhang { 3683bdf6f3fcSHong Zhang PetscErrorCode ierr; 36848761c3d6SHong Zhang PetscMPIInt size; 3685bdf6f3fcSHong Zhang 3686bdf6f3fcSHong Zhang PetscFunctionBegin; 3687ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 36888761c3d6SHong Zhang if (size == 1 && scall == MAT_REUSE_MATRIX) { 36898761c3d6SHong Zhang ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 36908761c3d6SHong Zhang } else { 3691bdf6f3fcSHong Zhang ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 36928761c3d6SHong Zhang } 3693bdf6f3fcSHong Zhang PetscFunctionReturn(0); 3694bdf6f3fcSHong Zhang } 3695