1be1d678aSKris Buschelman 22593348eSBarry Smith /* 3b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 42593348eSBarry Smith matrix storage format. 52593348eSBarry Smith */ 6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <petscblaslapack.h> 8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h> 1043516a2dSKris Buschelman 117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 137ea3e4caSstefano_zampini #endif 147ea3e4caSstefano_zampini 15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17b5b72c8aSIrina Sokolova #endif 18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19b5b72c8aSIrina Sokolova 20857cbf51SRichard Tran Mills PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions) 219463ebdaSPierre Jolivet { 229463ebdaSPierre Jolivet PetscErrorCode ierr; 239463ebdaSPierre Jolivet Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 24857cbf51SRichard Tran Mills PetscInt m,n,i; 259463ebdaSPierre Jolivet PetscInt ib,jb,bs = A->rmap->bs; 269463ebdaSPierre Jolivet MatScalar *a_val = a_aij->a; 279463ebdaSPierre Jolivet 289463ebdaSPierre Jolivet PetscFunctionBegin; 29857cbf51SRichard Tran Mills ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 30857cbf51SRichard Tran Mills for (i=0; i<n; i++) reductions[i] = 0.0; 319463ebdaSPierre Jolivet if (type == NORM_2) { 329463ebdaSPierre Jolivet for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 339463ebdaSPierre Jolivet for (jb=0; jb<bs; jb++) { 349463ebdaSPierre Jolivet for (ib=0; ib<bs; ib++) { 35857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 369463ebdaSPierre Jolivet a_val++; 379463ebdaSPierre Jolivet } 389463ebdaSPierre Jolivet } 399463ebdaSPierre Jolivet } 409463ebdaSPierre Jolivet } else if (type == NORM_1) { 419463ebdaSPierre Jolivet for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 429463ebdaSPierre Jolivet for (jb=0; jb<bs; jb++) { 439463ebdaSPierre Jolivet for (ib=0; ib<bs; ib++) { 44857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 459463ebdaSPierre Jolivet a_val++; 469463ebdaSPierre Jolivet } 479463ebdaSPierre Jolivet } 489463ebdaSPierre Jolivet } 499463ebdaSPierre Jolivet } else if (type == NORM_INFINITY) { 509463ebdaSPierre Jolivet for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 519463ebdaSPierre Jolivet for (jb=0; jb<bs; jb++) { 529463ebdaSPierre Jolivet for (ib=0; ib<bs; ib++) { 539463ebdaSPierre Jolivet int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 54857cbf51SRichard Tran Mills reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 559463ebdaSPierre Jolivet a_val++; 569463ebdaSPierre Jolivet } 579463ebdaSPierre Jolivet } 589463ebdaSPierre Jolivet } 59857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 60857cbf51SRichard Tran Mills for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 61857cbf51SRichard Tran Mills for (jb=0; jb<bs; jb++) { 62857cbf51SRichard Tran Mills for (ib=0; ib<bs; ib++) { 63857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 64857cbf51SRichard Tran Mills a_val++; 65857cbf51SRichard Tran Mills } 66857cbf51SRichard Tran Mills } 67857cbf51SRichard Tran Mills } 68857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 69857cbf51SRichard Tran Mills for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 70857cbf51SRichard Tran Mills for (jb=0; jb<bs; jb++) { 71857cbf51SRichard Tran Mills for (ib=0; ib<bs; ib++) { 72857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 73857cbf51SRichard Tran Mills a_val++; 74857cbf51SRichard Tran Mills } 75857cbf51SRichard Tran Mills } 76857cbf51SRichard Tran Mills } 77857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 789463ebdaSPierre Jolivet if (type == NORM_2) { 79857cbf51SRichard Tran Mills for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 80857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 81857cbf51SRichard Tran Mills for (i=0; i<n; i++) reductions[i] /= m; 829463ebdaSPierre Jolivet } 839463ebdaSPierre Jolivet PetscFunctionReturn(0); 849463ebdaSPierre Jolivet } 859463ebdaSPierre Jolivet 86713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 87b01c7715SBarry Smith { 88b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 896849ba73SBarry Smith PetscErrorCode ierr; 90de80f912SBarry Smith PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 917f0c90edSBarry Smith MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 9262bba022SBarry Smith PetscReal shift = 0.0; 931a9391e3SHong Zhang PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 94b01c7715SBarry Smith 95b01c7715SBarry Smith PetscFunctionBegin; 96a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 97a455e926SHong Zhang 989797317bSBarry Smith if (a->idiagvalid) { 999797317bSBarry Smith if (values) *values = a->idiag; 1009797317bSBarry Smith PetscFunctionReturn(0); 1019797317bSBarry Smith } 102b01c7715SBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 103b01c7715SBarry Smith diag_offset = a->diag; 104b01c7715SBarry Smith if (!a->idiag) { 1057f0c90edSBarry Smith ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 1067f0c90edSBarry Smith ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 107b01c7715SBarry Smith } 108b01c7715SBarry Smith diag = a->idiag; 109bbead8a2SBarry Smith if (values) *values = a->idiag; 110b01c7715SBarry Smith /* factor and invert each block */ 111521d7252SBarry Smith switch (bs) { 112ab040260SJed Brown case 1: 113ab040260SJed Brown for (i=0; i<mbs; i++) { 114ab040260SJed Brown odiag = v + 1*diag_offset[i]; 115ab040260SJed Brown diag[0] = odiag[0]; 116ec1892c8SHong Zhang 117ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 118ec1892c8SHong Zhang if (allowzeropivot) { 1197b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1207b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 1217b6c816cSBarry Smith A->factorerror_zeropivot_row = i; 1227d3de750SJacob Faibussowitsch ierr = PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i);CHKERRQ(ierr); 12398921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 124ec1892c8SHong Zhang } 125ec1892c8SHong Zhang 126d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 127ab040260SJed Brown diag += 1; 128ab040260SJed Brown } 129ab040260SJed Brown break; 130b01c7715SBarry Smith case 2: 131b01c7715SBarry Smith for (i=0; i<mbs; i++) { 132b01c7715SBarry Smith odiag = v + 4*diag_offset[i]; 133b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 134a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1357b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 136b01c7715SBarry Smith diag += 4; 137b01c7715SBarry Smith } 138b01c7715SBarry Smith break; 139b01c7715SBarry Smith case 3: 140b01c7715SBarry Smith for (i=0; i<mbs; i++) { 141b01c7715SBarry Smith odiag = v + 9*diag_offset[i]; 142b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 143b01c7715SBarry Smith diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 144b01c7715SBarry Smith diag[8] = odiag[8]; 145a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1467b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 147b01c7715SBarry Smith diag += 9; 148b01c7715SBarry Smith } 149b01c7715SBarry Smith break; 150b01c7715SBarry Smith case 4: 151b01c7715SBarry Smith for (i=0; i<mbs; i++) { 152b01c7715SBarry Smith odiag = v + 16*diag_offset[i]; 153580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 154a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1557b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 156b01c7715SBarry Smith diag += 16; 157b01c7715SBarry Smith } 158b01c7715SBarry Smith break; 159b01c7715SBarry Smith case 5: 160b01c7715SBarry Smith for (i=0; i<mbs; i++) { 161b01c7715SBarry Smith odiag = v + 25*diag_offset[i]; 162580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 163a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1647b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 165b01c7715SBarry Smith diag += 25; 166b01c7715SBarry Smith } 167b01c7715SBarry Smith break; 168d49b2adcSBarry Smith case 6: 169d49b2adcSBarry Smith for (i=0; i<mbs; i++) { 170d49b2adcSBarry Smith odiag = v + 36*diag_offset[i]; 171580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 172a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1737b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 174d49b2adcSBarry Smith diag += 36; 175d49b2adcSBarry Smith } 176d49b2adcSBarry Smith break; 177de80f912SBarry Smith case 7: 178de80f912SBarry Smith for (i=0; i<mbs; i++) { 179de80f912SBarry Smith odiag = v + 49*diag_offset[i]; 180580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 181a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1827b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 183de80f912SBarry Smith diag += 49; 184de80f912SBarry Smith } 185de80f912SBarry Smith break; 186b01c7715SBarry Smith default: 187dcca6d9dSJed Brown ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 188de80f912SBarry Smith for (i=0; i<mbs; i++) { 189de80f912SBarry Smith odiag = v + bs2*diag_offset[i]; 190580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 1915f8bbccaSHong Zhang ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1927b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 193de80f912SBarry Smith diag += bs2; 194de80f912SBarry Smith } 195de80f912SBarry Smith ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 196b01c7715SBarry Smith } 197b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 198b01c7715SBarry Smith PetscFunctionReturn(0); 199b01c7715SBarry Smith } 200b01c7715SBarry Smith 201e48d15efSToby Isaac PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2026d3beeddSMatthew Knepley { 2036d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 204e48d15efSToby Isaac PetscScalar *x,*work,*w,*workt,*t; 205e48d15efSToby Isaac const MatScalar *v,*aa = a->a, *idiag; 206e48d15efSToby Isaac const PetscScalar *b,*xb; 2075455b99fSToby Isaac PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 2086d3beeddSMatthew Knepley PetscErrorCode ierr; 209e48d15efSToby Isaac PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 210c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 211b01c7715SBarry Smith 212b01c7715SBarry Smith PetscFunctionBegin; 213b01c7715SBarry Smith its = its*lits; 2142c71b3e2SJacob Faibussowitsch PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 2152c71b3e2SJacob Faibussowitsch PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 2162c71b3e2SJacob Faibussowitsch PetscCheckFalse(fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 2172c71b3e2SJacob Faibussowitsch PetscCheckFalse(omega != 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 2182c71b3e2SJacob Faibussowitsch PetscCheckFalse((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 219b01c7715SBarry Smith 2200298fd71SBarry Smith if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 221b01c7715SBarry Smith 222b2ec919aSToby Isaac if (!m) PetscFunctionReturn(0); 223b01c7715SBarry Smith diag = a->diag; 224b01c7715SBarry Smith idiag = a->idiag; 225de80f912SBarry Smith k = PetscMax(A->rmap->n,A->cmap->n); 226e48d15efSToby Isaac if (!a->mult_work) { 227f361c04dSBarry Smith ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 228de80f912SBarry Smith } 2293475c22fSBarry Smith if (!a->sor_workt) { 230f361c04dSBarry Smith ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 231de80f912SBarry Smith } 232de80f912SBarry Smith if (!a->sor_work) { 233785e854fSJed Brown ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 234de80f912SBarry Smith } 2353475c22fSBarry Smith work = a->mult_work; 2363475c22fSBarry Smith t = a->sor_workt; 237de80f912SBarry Smith w = a->sor_work; 238de80f912SBarry Smith 239de80f912SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240de80f912SBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 241de80f912SBarry Smith 242de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 243de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 244e48d15efSToby Isaac switch (bs) { 245e48d15efSToby Isaac case 1: 246e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x,idiag,b); 247e48d15efSToby Isaac t[0] = b[0]; 248e48d15efSToby Isaac i2 = 1; 249e48d15efSToby Isaac idiag += 1; 250e48d15efSToby Isaac for (i=1; i<m; i++) { 251e48d15efSToby Isaac v = aa + ai[i]; 252e48d15efSToby Isaac vi = aj + ai[i]; 253e48d15efSToby Isaac nz = diag[i] - ai[i]; 254e48d15efSToby Isaac s[0] = b[i2]; 255e48d15efSToby Isaac for (j=0; j<nz; j++) { 256e48d15efSToby Isaac xw[0] = x[vi[j]]; 257e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 258e48d15efSToby Isaac } 259e48d15efSToby Isaac t[i2] = s[0]; 260e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 261e48d15efSToby Isaac x[i2] = xw[0]; 262e48d15efSToby Isaac idiag += 1; 263e48d15efSToby Isaac i2 += 1; 264e48d15efSToby Isaac } 265e48d15efSToby Isaac break; 266e48d15efSToby Isaac case 2: 267e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x,idiag,b); 268e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; 269e48d15efSToby Isaac i2 = 2; 270e48d15efSToby Isaac idiag += 4; 271e48d15efSToby Isaac for (i=1; i<m; i++) { 272e48d15efSToby Isaac v = aa + 4*ai[i]; 273e48d15efSToby Isaac vi = aj + ai[i]; 274e48d15efSToby Isaac nz = diag[i] - ai[i]; 275e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 276e48d15efSToby Isaac for (j=0; j<nz; j++) { 277e48d15efSToby Isaac idx = 2*vi[j]; 278e48d15efSToby Isaac it = 4*j; 279e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 280e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 281e48d15efSToby Isaac } 282e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; 283e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 284e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 285e48d15efSToby Isaac idiag += 4; 286e48d15efSToby Isaac i2 += 2; 287e48d15efSToby Isaac } 288e48d15efSToby Isaac break; 289e48d15efSToby Isaac case 3: 290e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x,idiag,b); 291e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 292e48d15efSToby Isaac i2 = 3; 293e48d15efSToby Isaac idiag += 9; 294e48d15efSToby Isaac for (i=1; i<m; i++) { 295e48d15efSToby Isaac v = aa + 9*ai[i]; 296e48d15efSToby Isaac vi = aj + ai[i]; 297e48d15efSToby Isaac nz = diag[i] - ai[i]; 298e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 299e48d15efSToby Isaac while (nz--) { 300e48d15efSToby Isaac idx = 3*(*vi++); 301e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 302e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 303e48d15efSToby Isaac v += 9; 304e48d15efSToby Isaac } 305e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 306e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 307e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 308e48d15efSToby Isaac idiag += 9; 309e48d15efSToby Isaac i2 += 3; 310e48d15efSToby Isaac } 311e48d15efSToby Isaac break; 312e48d15efSToby Isaac case 4: 313e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x,idiag,b); 314e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 315e48d15efSToby Isaac i2 = 4; 316e48d15efSToby Isaac idiag += 16; 317e48d15efSToby Isaac for (i=1; i<m; i++) { 318e48d15efSToby Isaac v = aa + 16*ai[i]; 319e48d15efSToby Isaac vi = aj + ai[i]; 320e48d15efSToby Isaac nz = diag[i] - ai[i]; 321e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 322e48d15efSToby Isaac while (nz--) { 323e48d15efSToby Isaac idx = 4*(*vi++); 324e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 325e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 326e48d15efSToby Isaac v += 16; 327e48d15efSToby Isaac } 328e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 329e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 330e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 331e48d15efSToby Isaac idiag += 16; 332e48d15efSToby Isaac i2 += 4; 333e48d15efSToby Isaac } 334e48d15efSToby Isaac break; 335e48d15efSToby Isaac case 5: 336e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x,idiag,b); 337e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 338e48d15efSToby Isaac i2 = 5; 339e48d15efSToby Isaac idiag += 25; 340e48d15efSToby Isaac for (i=1; i<m; i++) { 341e48d15efSToby Isaac v = aa + 25*ai[i]; 342e48d15efSToby Isaac vi = aj + ai[i]; 343e48d15efSToby Isaac nz = diag[i] - ai[i]; 344e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 345e48d15efSToby Isaac while (nz--) { 346e48d15efSToby Isaac idx = 5*(*vi++); 347e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 348e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 349e48d15efSToby Isaac v += 25; 350e48d15efSToby Isaac } 351e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 352e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 353e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 354e48d15efSToby Isaac idiag += 25; 355e48d15efSToby Isaac i2 += 5; 356e48d15efSToby Isaac } 357e48d15efSToby Isaac break; 358e48d15efSToby Isaac case 6: 359e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x,idiag,b); 360e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 361e48d15efSToby Isaac i2 = 6; 362e48d15efSToby Isaac idiag += 36; 363e48d15efSToby Isaac for (i=1; i<m; i++) { 364e48d15efSToby Isaac v = aa + 36*ai[i]; 365e48d15efSToby Isaac vi = aj + ai[i]; 366e48d15efSToby Isaac nz = diag[i] - ai[i]; 367e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 368e48d15efSToby Isaac while (nz--) { 369e48d15efSToby Isaac idx = 6*(*vi++); 370e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 371e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 372e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 373e48d15efSToby Isaac v += 36; 374e48d15efSToby Isaac } 375e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 376e48d15efSToby Isaac t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 377e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 378e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 379e48d15efSToby Isaac idiag += 36; 380e48d15efSToby Isaac i2 += 6; 381e48d15efSToby Isaac } 382e48d15efSToby Isaac break; 383e48d15efSToby Isaac case 7: 384e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x,idiag,b); 385e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 386e48d15efSToby Isaac t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 387e48d15efSToby Isaac i2 = 7; 388e48d15efSToby Isaac idiag += 49; 389e48d15efSToby Isaac for (i=1; i<m; i++) { 390e48d15efSToby Isaac v = aa + 49*ai[i]; 391e48d15efSToby Isaac vi = aj + ai[i]; 392e48d15efSToby Isaac nz = diag[i] - ai[i]; 393e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 394e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 395e48d15efSToby Isaac while (nz--) { 396e48d15efSToby Isaac idx = 7*(*vi++); 397e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 398e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 399e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 400e48d15efSToby Isaac v += 49; 401e48d15efSToby Isaac } 402e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 403e48d15efSToby Isaac t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 404e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 405e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 406e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 407e48d15efSToby Isaac idiag += 49; 408e48d15efSToby Isaac i2 += 7; 409e48d15efSToby Isaac } 410e48d15efSToby Isaac break; 411e48d15efSToby Isaac default: 41296b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 413580bdb30SBarry Smith ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 414de80f912SBarry Smith i2 = bs; 415de80f912SBarry Smith idiag += bs2; 416de80f912SBarry Smith for (i=1; i<m; i++) { 417de80f912SBarry Smith v = aa + bs2*ai[i]; 418de80f912SBarry Smith vi = aj + ai[i]; 419de80f912SBarry Smith nz = diag[i] - ai[i]; 420de80f912SBarry Smith 421580bdb30SBarry Smith ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 422de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 423de80f912SBarry Smith workt = work; 424de80f912SBarry Smith for (j=0; j<nz; j++) { 425580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 426de80f912SBarry Smith workt += bs; 427de80f912SBarry Smith } 42896b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 429580bdb30SBarry Smith ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 43096b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 431de80f912SBarry Smith 432de80f912SBarry Smith idiag += bs2; 433de80f912SBarry Smith i2 += bs; 434de80f912SBarry Smith } 435e48d15efSToby Isaac break; 436e48d15efSToby Isaac } 437de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 438e48d15efSToby Isaac ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 439e48d15efSToby Isaac xb = t; 440de80f912SBarry Smith } 441e48d15efSToby Isaac else xb = b; 442de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 443e48d15efSToby Isaac idiag = a->idiag+bs2*(a->mbs-1); 444e48d15efSToby Isaac i2 = bs * (m-1); 445e48d15efSToby Isaac switch (bs) { 446e48d15efSToby Isaac case 1: 447e48d15efSToby Isaac s[0] = xb[i2]; 448e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 449e48d15efSToby Isaac x[i2] = xw[0]; 450e48d15efSToby Isaac i2 -= 1; 451e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 452e48d15efSToby Isaac v = aa + (diag[i]+1); 453e48d15efSToby Isaac vi = aj + diag[i] + 1; 454e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 455e48d15efSToby Isaac s[0] = xb[i2]; 456e48d15efSToby Isaac for (j=0; j<nz; j++) { 457e48d15efSToby Isaac xw[0] = x[vi[j]]; 458e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 459e48d15efSToby Isaac } 460e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 461e48d15efSToby Isaac x[i2] = xw[0]; 462e48d15efSToby Isaac idiag -= 1; 463e48d15efSToby Isaac i2 -= 1; 464e48d15efSToby Isaac } 465e48d15efSToby Isaac break; 466e48d15efSToby Isaac case 2: 467e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; 468e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 469e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 470e48d15efSToby Isaac i2 -= 2; 471e48d15efSToby Isaac idiag -= 4; 472e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 473e48d15efSToby Isaac v = aa + 4*(diag[i] + 1); 474e48d15efSToby Isaac vi = aj + diag[i] + 1; 475e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 476e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; 477e48d15efSToby Isaac for (j=0; j<nz; j++) { 478e48d15efSToby Isaac idx = 2*vi[j]; 479e48d15efSToby Isaac it = 4*j; 480e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 481e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 482e48d15efSToby Isaac } 483e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 484e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 485e48d15efSToby Isaac idiag -= 4; 486e48d15efSToby Isaac i2 -= 2; 487e48d15efSToby Isaac } 488e48d15efSToby Isaac break; 489e48d15efSToby Isaac case 3: 490e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 491e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 492e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 493e48d15efSToby Isaac i2 -= 3; 494e48d15efSToby Isaac idiag -= 9; 495e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 496e48d15efSToby Isaac v = aa + 9*(diag[i]+1); 497e48d15efSToby Isaac vi = aj + diag[i] + 1; 498e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 499e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 500e48d15efSToby Isaac while (nz--) { 501e48d15efSToby Isaac idx = 3*(*vi++); 502e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 503e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 504e48d15efSToby Isaac v += 9; 505e48d15efSToby Isaac } 506e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 507e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 508e48d15efSToby Isaac idiag -= 9; 509e48d15efSToby Isaac i2 -= 3; 510e48d15efSToby Isaac } 511e48d15efSToby Isaac break; 512e48d15efSToby Isaac case 4: 513e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 514e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 515e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 516e48d15efSToby Isaac i2 -= 4; 517e48d15efSToby Isaac idiag -= 16; 518e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 519e48d15efSToby Isaac v = aa + 16*(diag[i]+1); 520e48d15efSToby Isaac vi = aj + diag[i] + 1; 521e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 522e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 523e48d15efSToby Isaac while (nz--) { 524e48d15efSToby Isaac idx = 4*(*vi++); 525e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 526e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 527e48d15efSToby Isaac v += 16; 528e48d15efSToby Isaac } 529e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 530e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 531e48d15efSToby Isaac idiag -= 16; 532e48d15efSToby Isaac i2 -= 4; 533e48d15efSToby Isaac } 534e48d15efSToby Isaac break; 535e48d15efSToby Isaac case 5: 536e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 537e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 538e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 539e48d15efSToby Isaac i2 -= 5; 540e48d15efSToby Isaac idiag -= 25; 541e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 542e48d15efSToby Isaac v = aa + 25*(diag[i]+1); 543e48d15efSToby Isaac vi = aj + diag[i] + 1; 544e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 545e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 546e48d15efSToby Isaac while (nz--) { 547e48d15efSToby Isaac idx = 5*(*vi++); 548e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 549e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 550e48d15efSToby Isaac v += 25; 551e48d15efSToby Isaac } 552e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 553e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 554e48d15efSToby Isaac idiag -= 25; 555e48d15efSToby Isaac i2 -= 5; 556e48d15efSToby Isaac } 557e48d15efSToby Isaac break; 558e48d15efSToby Isaac case 6: 559e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 560e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 561e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 562e48d15efSToby Isaac i2 -= 6; 563e48d15efSToby Isaac idiag -= 36; 564e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 565e48d15efSToby Isaac v = aa + 36*(diag[i]+1); 566e48d15efSToby Isaac vi = aj + diag[i] + 1; 567e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 568e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 569e48d15efSToby Isaac while (nz--) { 570e48d15efSToby Isaac idx = 6*(*vi++); 571e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 572e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 573e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 574e48d15efSToby Isaac v += 36; 575e48d15efSToby Isaac } 576e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 577e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 578e48d15efSToby Isaac idiag -= 36; 579e48d15efSToby Isaac i2 -= 6; 580e48d15efSToby Isaac } 581e48d15efSToby Isaac break; 582e48d15efSToby Isaac case 7: 583e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 584e48d15efSToby Isaac s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 585e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x,idiag,b); 586e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 587e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 588e48d15efSToby Isaac i2 -= 7; 589e48d15efSToby Isaac idiag -= 49; 590e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 591e48d15efSToby Isaac v = aa + 49*(diag[i]+1); 592e48d15efSToby Isaac vi = aj + diag[i] + 1; 593e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 594e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 595e48d15efSToby Isaac s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 596e48d15efSToby Isaac while (nz--) { 597e48d15efSToby Isaac idx = 7*(*vi++); 598e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 599e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 600e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 601e48d15efSToby Isaac v += 49; 602e48d15efSToby Isaac } 603e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 604e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 605e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 606e48d15efSToby Isaac idiag -= 49; 607e48d15efSToby Isaac i2 -= 7; 608e48d15efSToby Isaac } 609e48d15efSToby Isaac break; 610e48d15efSToby Isaac default: 611580bdb30SBarry Smith ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 61296b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 613de80f912SBarry Smith i2 -= bs; 614e48d15efSToby Isaac idiag -= bs2; 615de80f912SBarry Smith for (i=m-2; i>=0; i--) { 616de80f912SBarry Smith v = aa + bs2*(diag[i]+1); 617de80f912SBarry Smith vi = aj + diag[i] + 1; 618de80f912SBarry Smith nz = ai[i+1] - diag[i] - 1; 619de80f912SBarry Smith 620580bdb30SBarry Smith ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 621de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 622de80f912SBarry Smith workt = work; 623de80f912SBarry Smith for (j=0; j<nz; j++) { 624580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 625de80f912SBarry Smith workt += bs; 626de80f912SBarry Smith } 62796b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 62896b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 629e48d15efSToby Isaac 630de80f912SBarry Smith idiag -= bs2; 631de80f912SBarry Smith i2 -= bs; 632de80f912SBarry Smith } 633e48d15efSToby Isaac break; 634e48d15efSToby Isaac } 635de80f912SBarry Smith ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 636de80f912SBarry Smith } 637e48d15efSToby Isaac its--; 638e48d15efSToby Isaac } 639e48d15efSToby Isaac while (its--) { 640e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 641e48d15efSToby Isaac idiag = a->idiag; 642e48d15efSToby Isaac i2 = 0; 643e48d15efSToby Isaac switch (bs) { 644e48d15efSToby Isaac case 1: 645e48d15efSToby Isaac for (i=0; i<m; i++) { 646e48d15efSToby Isaac v = aa + ai[i]; 647e48d15efSToby Isaac vi = aj + ai[i]; 648e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 649e48d15efSToby Isaac s[0] = b[i2]; 650e48d15efSToby Isaac for (j=0; j<nz; j++) { 651e48d15efSToby Isaac xw[0] = x[vi[j]]; 652e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 653e48d15efSToby Isaac } 654e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 655e48d15efSToby Isaac x[i2] += xw[0]; 656e48d15efSToby Isaac idiag += 1; 657e48d15efSToby Isaac i2 += 1; 658e48d15efSToby Isaac } 659e48d15efSToby Isaac break; 660e48d15efSToby Isaac case 2: 661e48d15efSToby Isaac for (i=0; i<m; i++) { 662e48d15efSToby Isaac v = aa + 4*ai[i]; 663e48d15efSToby Isaac vi = aj + ai[i]; 664e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 665e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 666e48d15efSToby Isaac for (j=0; j<nz; j++) { 667e48d15efSToby Isaac idx = 2*vi[j]; 668e48d15efSToby Isaac it = 4*j; 669e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 670e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 671e48d15efSToby Isaac } 672e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 673e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; 674e48d15efSToby Isaac idiag += 4; 675e48d15efSToby Isaac i2 += 2; 676e48d15efSToby Isaac } 677e48d15efSToby Isaac break; 678e48d15efSToby Isaac case 3: 679e48d15efSToby Isaac for (i=0; i<m; i++) { 680e48d15efSToby Isaac v = aa + 9*ai[i]; 681e48d15efSToby Isaac vi = aj + ai[i]; 682e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 683e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 684e48d15efSToby Isaac while (nz--) { 685e48d15efSToby Isaac idx = 3*(*vi++); 686e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 688e48d15efSToby Isaac v += 9; 689e48d15efSToby Isaac } 690e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 691e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 692e48d15efSToby Isaac idiag += 9; 693e48d15efSToby Isaac i2 += 3; 694e48d15efSToby Isaac } 695e48d15efSToby Isaac break; 696e48d15efSToby Isaac case 4: 697e48d15efSToby Isaac for (i=0; i<m; i++) { 698e48d15efSToby Isaac v = aa + 16*ai[i]; 699e48d15efSToby Isaac vi = aj + ai[i]; 700e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 701e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 702e48d15efSToby Isaac while (nz--) { 703e48d15efSToby Isaac idx = 4*(*vi++); 704e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 705e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 706e48d15efSToby Isaac v += 16; 707e48d15efSToby Isaac } 708e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 709e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 710e48d15efSToby Isaac idiag += 16; 711e48d15efSToby Isaac i2 += 4; 712e48d15efSToby Isaac } 713e48d15efSToby Isaac break; 714e48d15efSToby Isaac case 5: 715e48d15efSToby Isaac for (i=0; i<m; i++) { 716e48d15efSToby Isaac v = aa + 25*ai[i]; 717e48d15efSToby Isaac vi = aj + ai[i]; 718e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 719e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 720e48d15efSToby Isaac while (nz--) { 721e48d15efSToby Isaac idx = 5*(*vi++); 722e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 723e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 724e48d15efSToby Isaac v += 25; 725e48d15efSToby Isaac } 726e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 727e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 728e48d15efSToby Isaac idiag += 25; 729e48d15efSToby Isaac i2 += 5; 730e48d15efSToby Isaac } 731e48d15efSToby Isaac break; 732e48d15efSToby Isaac case 6: 733e48d15efSToby Isaac for (i=0; i<m; i++) { 734e48d15efSToby Isaac v = aa + 36*ai[i]; 735e48d15efSToby Isaac vi = aj + ai[i]; 736e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 737e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 738e48d15efSToby Isaac while (nz--) { 739e48d15efSToby Isaac idx = 6*(*vi++); 740e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 741e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 742e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 743e48d15efSToby Isaac v += 36; 744e48d15efSToby Isaac } 745e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 746e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 747e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 748e48d15efSToby Isaac idiag += 36; 749e48d15efSToby Isaac i2 += 6; 750e48d15efSToby Isaac } 751e48d15efSToby Isaac break; 752e48d15efSToby Isaac case 7: 753e48d15efSToby Isaac for (i=0; i<m; i++) { 754e48d15efSToby Isaac v = aa + 49*ai[i]; 755e48d15efSToby Isaac vi = aj + ai[i]; 756e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 757e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 758e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 759e48d15efSToby Isaac while (nz--) { 760e48d15efSToby Isaac idx = 7*(*vi++); 761e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 762e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 763e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 764e48d15efSToby Isaac v += 49; 765e48d15efSToby Isaac } 766e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 767e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 768e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 769e48d15efSToby Isaac idiag += 49; 770e48d15efSToby Isaac i2 += 7; 771e48d15efSToby Isaac } 772e48d15efSToby Isaac break; 773e48d15efSToby Isaac default: 774e48d15efSToby Isaac for (i=0; i<m; i++) { 775e48d15efSToby Isaac v = aa + bs2*ai[i]; 776e48d15efSToby Isaac vi = aj + ai[i]; 777e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 778e48d15efSToby Isaac 779580bdb30SBarry Smith ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 780e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 781e48d15efSToby Isaac workt = work; 782e48d15efSToby Isaac for (j=0; j<nz; j++) { 783580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 784e48d15efSToby Isaac workt += bs; 785e48d15efSToby Isaac } 786e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 787e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 788e48d15efSToby Isaac 789e48d15efSToby Isaac idiag += bs2; 790e48d15efSToby Isaac i2 += bs; 791e48d15efSToby Isaac } 792e48d15efSToby Isaac break; 793e48d15efSToby Isaac } 794e48d15efSToby Isaac ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 795e48d15efSToby Isaac } 796e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 797e48d15efSToby Isaac idiag = a->idiag+bs2*(a->mbs-1); 798e48d15efSToby Isaac i2 = bs * (m-1); 799e48d15efSToby Isaac switch (bs) { 800e48d15efSToby Isaac case 1: 801e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 802e48d15efSToby Isaac v = aa + ai[i]; 803e48d15efSToby Isaac vi = aj + ai[i]; 804e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 805e48d15efSToby Isaac s[0] = b[i2]; 806e48d15efSToby Isaac for (j=0; j<nz; j++) { 807e48d15efSToby Isaac xw[0] = x[vi[j]]; 808e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 809e48d15efSToby Isaac } 810e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 811e48d15efSToby Isaac x[i2] += xw[0]; 812e48d15efSToby Isaac idiag -= 1; 813e48d15efSToby Isaac i2 -= 1; 814e48d15efSToby Isaac } 815e48d15efSToby Isaac break; 816e48d15efSToby Isaac case 2: 817e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 818e48d15efSToby Isaac v = aa + 4*ai[i]; 819e48d15efSToby Isaac vi = aj + ai[i]; 820e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 821e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 822e48d15efSToby Isaac for (j=0; j<nz; j++) { 823e48d15efSToby Isaac idx = 2*vi[j]; 824e48d15efSToby Isaac it = 4*j; 825e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 826e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 827e48d15efSToby Isaac } 828e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 829e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; 830e48d15efSToby Isaac idiag -= 4; 831e48d15efSToby Isaac i2 -= 2; 832e48d15efSToby Isaac } 833e48d15efSToby Isaac break; 834e48d15efSToby Isaac case 3: 835e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 836e48d15efSToby Isaac v = aa + 9*ai[i]; 837e48d15efSToby Isaac vi = aj + ai[i]; 838e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 839e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 840e48d15efSToby Isaac while (nz--) { 841e48d15efSToby Isaac idx = 3*(*vi++); 842e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 844e48d15efSToby Isaac v += 9; 845e48d15efSToby Isaac } 846e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 847e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 848e48d15efSToby Isaac idiag -= 9; 849e48d15efSToby Isaac i2 -= 3; 850e48d15efSToby Isaac } 851e48d15efSToby Isaac break; 852e48d15efSToby Isaac case 4: 853e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 854e48d15efSToby Isaac v = aa + 16*ai[i]; 855e48d15efSToby Isaac vi = aj + ai[i]; 856e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 857e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 858e48d15efSToby Isaac while (nz--) { 859e48d15efSToby Isaac idx = 4*(*vi++); 860e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 861e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 862e48d15efSToby Isaac v += 16; 863e48d15efSToby Isaac } 864e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 865e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 866e48d15efSToby Isaac idiag -= 16; 867e48d15efSToby Isaac i2 -= 4; 868e48d15efSToby Isaac } 869e48d15efSToby Isaac break; 870e48d15efSToby Isaac case 5: 871e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 872e48d15efSToby Isaac v = aa + 25*ai[i]; 873e48d15efSToby Isaac vi = aj + ai[i]; 874e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 875e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 876e48d15efSToby Isaac while (nz--) { 877e48d15efSToby Isaac idx = 5*(*vi++); 878e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 879e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 880e48d15efSToby Isaac v += 25; 881e48d15efSToby Isaac } 882e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 883e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 884e48d15efSToby Isaac idiag -= 25; 885e48d15efSToby Isaac i2 -= 5; 886e48d15efSToby Isaac } 887e48d15efSToby Isaac break; 888e48d15efSToby Isaac case 6: 889e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 890e48d15efSToby Isaac v = aa + 36*ai[i]; 891e48d15efSToby Isaac vi = aj + ai[i]; 892e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 893e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 894e48d15efSToby Isaac while (nz--) { 895e48d15efSToby Isaac idx = 6*(*vi++); 896e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 897e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 898e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 899e48d15efSToby Isaac v += 36; 900e48d15efSToby Isaac } 901e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 902e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 903e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 904e48d15efSToby Isaac idiag -= 36; 905e48d15efSToby Isaac i2 -= 6; 906e48d15efSToby Isaac } 907e48d15efSToby Isaac break; 908e48d15efSToby Isaac case 7: 909e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 910e48d15efSToby Isaac v = aa + 49*ai[i]; 911e48d15efSToby Isaac vi = aj + ai[i]; 912e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 913e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 914e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 915e48d15efSToby Isaac while (nz--) { 916e48d15efSToby Isaac idx = 7*(*vi++); 917e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 918e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 919e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 920e48d15efSToby Isaac v += 49; 921e48d15efSToby Isaac } 922e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 923e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 924e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 925e48d15efSToby Isaac idiag -= 49; 926e48d15efSToby Isaac i2 -= 7; 927e48d15efSToby Isaac } 928e48d15efSToby Isaac break; 929e48d15efSToby Isaac default: 930e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 931e48d15efSToby Isaac v = aa + bs2*ai[i]; 932e48d15efSToby Isaac vi = aj + ai[i]; 933e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 934e48d15efSToby Isaac 935580bdb30SBarry Smith ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 936e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 937e48d15efSToby Isaac workt = work; 938e48d15efSToby Isaac for (j=0; j<nz; j++) { 939580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 940e48d15efSToby Isaac workt += bs; 941e48d15efSToby Isaac } 942e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 943e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 944e48d15efSToby Isaac 945e48d15efSToby Isaac idiag -= bs2; 946e48d15efSToby Isaac i2 -= bs; 947e48d15efSToby Isaac } 948e48d15efSToby Isaac break; 949e48d15efSToby Isaac } 950e48d15efSToby Isaac ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 951e48d15efSToby Isaac } 952e48d15efSToby Isaac } 953de80f912SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954de80f912SBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 955de80f912SBarry Smith PetscFunctionReturn(0); 956de80f912SBarry Smith } 957de80f912SBarry Smith 958af674e45SBarry Smith /* 95981824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 960af674e45SBarry Smith */ 961af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 962af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 963af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 964af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 965af674e45SBarry Smith #endif 966af674e45SBarry Smith 9678cc058d9SJed Brown PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 968af674e45SBarry Smith { 969af674e45SBarry Smith Mat A = *AA; 970af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 971c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 972c1ac3661SBarry Smith PetscInt *ai =a->i,*ailen=a->ilen; 97317ec6a02SBarry Smith PetscInt *aj =a->j,stepval,lastcol = -1; 974f15d580aSBarry Smith const PetscScalar *value = v; 9754bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 97670990e77SSatish Balay PetscErrorCode ierr; 977af674e45SBarry Smith 978af674e45SBarry Smith PetscFunctionBegin; 979ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 980af674e45SBarry Smith stepval = (n-1)*4; 981af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 982af674e45SBarry Smith row = im[k]; 983af674e45SBarry Smith rp = aj + ai[row]; 984af674e45SBarry Smith ap = aa + 16*ai[row]; 985af674e45SBarry Smith nrow = ailen[row]; 986af674e45SBarry Smith low = 0; 98717ec6a02SBarry Smith high = nrow; 988af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 989af674e45SBarry Smith col = in[l]; 990db4deed7SKarl Rupp if (col <= lastcol) low = 0; 991db4deed7SKarl Rupp else high = nrow; 99217ec6a02SBarry Smith lastcol = col; 9931e3347e8SBarry Smith value = v + k*(stepval+4 + l)*4; 994af674e45SBarry Smith while (high-low > 7) { 995af674e45SBarry Smith t = (low+high)/2; 996af674e45SBarry Smith if (rp[t] > col) high = t; 997af674e45SBarry Smith else low = t; 998af674e45SBarry Smith } 999af674e45SBarry Smith for (i=low; i<high; i++) { 1000af674e45SBarry Smith if (rp[i] > col) break; 1001af674e45SBarry Smith if (rp[i] == col) { 1002af674e45SBarry Smith bap = ap + 16*i; 1003af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 1004af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 1005af674e45SBarry Smith bap[jj] += *value++; 1006af674e45SBarry Smith } 1007af674e45SBarry Smith } 1008af674e45SBarry Smith goto noinsert2; 1009af674e45SBarry Smith } 1010af674e45SBarry Smith } 1011af674e45SBarry Smith N = nrow++ - 1; 101217ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 1013af674e45SBarry Smith /* shift up all the later entries in this row */ 1014af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 1015af674e45SBarry Smith rp[ii+1] = rp[ii]; 101670990e77SSatish Balay ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 1017af674e45SBarry Smith } 1018af674e45SBarry Smith if (N >= i) { 101970990e77SSatish Balay ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1020af674e45SBarry Smith } 1021af674e45SBarry Smith rp[i] = col; 1022af674e45SBarry Smith bap = ap + 16*i; 1023af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 1024af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 1025af674e45SBarry Smith bap[jj] = *value++; 1026af674e45SBarry Smith } 1027af674e45SBarry Smith } 1028af674e45SBarry Smith noinsert2:; 1029af674e45SBarry Smith low = i; 1030af674e45SBarry Smith } 1031af674e45SBarry Smith ailen[row] = nrow; 1032af674e45SBarry Smith } 1033be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1034af674e45SBarry Smith } 1035af674e45SBarry Smith 1036af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1037af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1038af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1039af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1040af674e45SBarry Smith #endif 1041af674e45SBarry Smith 10428cc058d9SJed Brown PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1043af674e45SBarry Smith { 1044af674e45SBarry Smith Mat A = *AA; 1045af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1046580bdb30SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1047c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 1048c1ac3661SBarry Smith PetscInt *aj=a->j,brow,bcol; 104917ec6a02SBarry Smith PetscInt ridx,cidx,lastcol = -1; 1050af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 105170990e77SSatish Balay PetscErrorCode ierr; 1052af674e45SBarry Smith 1053af674e45SBarry Smith PetscFunctionBegin; 1054af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 1055af674e45SBarry Smith row = im[k]; brow = row/4; 1056af674e45SBarry Smith rp = aj + ai[brow]; 1057af674e45SBarry Smith ap = aa + 16*ai[brow]; 1058af674e45SBarry Smith nrow = ailen[brow]; 1059af674e45SBarry Smith low = 0; 106017ec6a02SBarry Smith high = nrow; 1061af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 1062af674e45SBarry Smith col = in[l]; bcol = col/4; 1063af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 1064af674e45SBarry Smith value = v[l + k*n]; 1065db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1066db4deed7SKarl Rupp else high = nrow; 106717ec6a02SBarry Smith lastcol = col; 1068af674e45SBarry Smith while (high-low > 7) { 1069af674e45SBarry Smith t = (low+high)/2; 1070af674e45SBarry Smith if (rp[t] > bcol) high = t; 1071af674e45SBarry Smith else low = t; 1072af674e45SBarry Smith } 1073af674e45SBarry Smith for (i=low; i<high; i++) { 1074af674e45SBarry Smith if (rp[i] > bcol) break; 1075af674e45SBarry Smith if (rp[i] == bcol) { 1076af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 1077af674e45SBarry Smith *bap += value; 1078af674e45SBarry Smith goto noinsert1; 1079af674e45SBarry Smith } 1080af674e45SBarry Smith } 1081af674e45SBarry Smith N = nrow++ - 1; 108217ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1083af674e45SBarry Smith /* shift up all the later entries in this row */ 108470990e77SSatish Balay ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 108570990e77SSatish Balay ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 108670990e77SSatish Balay ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1087af674e45SBarry Smith rp[i] = bcol; 1088af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 1089af674e45SBarry Smith noinsert1:; 1090af674e45SBarry Smith low = i; 1091af674e45SBarry Smith } 1092af674e45SBarry Smith ailen[brow] = nrow; 1093af674e45SBarry Smith } 1094be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1095af674e45SBarry Smith } 1096af674e45SBarry Smith 1097be5855fcSBarry Smith /* 1098be5855fcSBarry Smith Checks for missing diagonals 1099be5855fcSBarry Smith */ 1100ace3abfcSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1101be5855fcSBarry Smith { 1102be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11036849ba73SBarry Smith PetscErrorCode ierr; 11047734d3b5SMatthew G. Knepley PetscInt *diag,*ii = a->i,i; 1105be5855fcSBarry Smith 1106be5855fcSBarry Smith PetscFunctionBegin; 1107c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 11082af78befSBarry Smith *missing = PETSC_FALSE; 11097734d3b5SMatthew G. Knepley if (A->rmap->n > 0 && !ii) { 11102efa7f71SHong Zhang *missing = PETSC_TRUE; 11112efa7f71SHong Zhang if (d) *d = 0; 1112994fe344SLisandro Dalcin ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 11132efa7f71SHong Zhang } else { 111401445905SHong Zhang PetscInt n; 111501445905SHong Zhang n = PetscMin(a->mbs, a->nbs); 1116883fce79SBarry Smith diag = a->diag; 111701445905SHong Zhang for (i=0; i<n; i++) { 11187734d3b5SMatthew G. Knepley if (diag[i] >= ii[i+1]) { 11192af78befSBarry Smith *missing = PETSC_TRUE; 11202af78befSBarry Smith if (d) *d = i; 11217d3de750SJacob Faibussowitsch ierr = PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i);CHKERRQ(ierr); 1122358d2f5dSShri Abhyankar break; 11232efa7f71SHong Zhang } 1124be5855fcSBarry Smith } 1125be5855fcSBarry Smith } 1126be5855fcSBarry Smith PetscFunctionReturn(0); 1127be5855fcSBarry Smith } 1128be5855fcSBarry Smith 1129dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1130de6a44a3SBarry Smith { 1131de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11326849ba73SBarry Smith PetscErrorCode ierr; 113309f38230SBarry Smith PetscInt i,j,m = a->mbs; 1134de6a44a3SBarry Smith 11353a40ed3dSBarry Smith PetscFunctionBegin; 113609f38230SBarry Smith if (!a->diag) { 1137785e854fSJed Brown ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 11383bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 11394fd072dbSBarry Smith a->free_diag = PETSC_TRUE; 114009f38230SBarry Smith } 11417fc0212eSBarry Smith for (i=0; i<m; i++) { 114209f38230SBarry Smith a->diag[i] = a->i[i+1]; 1143de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 1144de6a44a3SBarry Smith if (a->j[j] == i) { 114509f38230SBarry Smith a->diag[i] = j; 1146de6a44a3SBarry Smith break; 1147de6a44a3SBarry Smith } 1148de6a44a3SBarry Smith } 1149de6a44a3SBarry Smith } 11503a40ed3dSBarry Smith PetscFunctionReturn(0); 1151de6a44a3SBarry Smith } 11522593348eSBarry Smith 11531a83f524SJed Brown static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 11543b2fbd54SBarry Smith { 11553b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1156dfbe8321SBarry Smith PetscErrorCode ierr; 11571a83f524SJed Brown PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 11581a83f524SJed Brown PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 11593b2fbd54SBarry Smith 11603a40ed3dSBarry Smith PetscFunctionBegin; 11613b2fbd54SBarry Smith *nn = n; 11623a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 11633b2fbd54SBarry Smith if (symmetric) { 11642462f5fdSStefano Zampini ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1165553b3c51SBarry Smith nz = tia[n]; 11663b2fbd54SBarry Smith } else { 11678f7157efSSatish Balay tia = a->i; tja = a->j; 11683b2fbd54SBarry Smith } 11693b2fbd54SBarry Smith 1170ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1171ecc77c7aSBarry Smith (*nn) *= bs; 11728f7157efSSatish Balay /* malloc & create the natural set of indices */ 1173785e854fSJed Brown ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 11749985e31cSBarry Smith if (n) { 11752462f5fdSStefano Zampini (*ia)[0] = oshift; 1176ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1177ecc77c7aSBarry Smith (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1178ecc77c7aSBarry Smith } 11799985e31cSBarry Smith } 1180ecc77c7aSBarry Smith 1181ecc77c7aSBarry Smith for (i=1; i<n; i++) { 1182ecc77c7aSBarry Smith (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1183ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1184ecc77c7aSBarry Smith (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 11858f7157efSSatish Balay } 11868f7157efSSatish Balay } 11879985e31cSBarry Smith if (n) { 1188ecc77c7aSBarry Smith (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 11899985e31cSBarry Smith } 1190ecc77c7aSBarry Smith 11911a83f524SJed Brown if (inja) { 1192785e854fSJed Brown ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 11939985e31cSBarry Smith cnt = 0; 11949985e31cSBarry Smith for (i=0; i<n; i++) { 11959985e31cSBarry Smith for (j=0; j<bs; j++) { 11969985e31cSBarry Smith for (k=tia[i]; k<tia[i+1]; k++) { 11979985e31cSBarry Smith for (l=0; l<bs; l++) { 11989985e31cSBarry Smith (*ja)[cnt++] = bs*tja[k] + l; 11999985e31cSBarry Smith } 12009985e31cSBarry Smith } 12019985e31cSBarry Smith } 12029985e31cSBarry Smith } 12039985e31cSBarry Smith } 12049985e31cSBarry Smith 12058f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 12068f7157efSSatish Balay ierr = PetscFree(tia);CHKERRQ(ierr); 12078f7157efSSatish Balay ierr = PetscFree(tja);CHKERRQ(ierr); 12088f7157efSSatish Balay } 1209f6d58c54SBarry Smith } else if (oshift == 1) { 1210715a17b5SBarry Smith if (symmetric) { 1211a2ea699eSBarry Smith nz = tia[A->rmap->n/bs]; 1212715a17b5SBarry Smith /* add 1 to i and j indices */ 1213715a17b5SBarry Smith for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1214715a17b5SBarry Smith *ia = tia; 1215715a17b5SBarry Smith if (ja) { 1216715a17b5SBarry Smith for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1217715a17b5SBarry Smith *ja = tja; 1218715a17b5SBarry Smith } 1219715a17b5SBarry Smith } else { 1220a2ea699eSBarry Smith nz = a->i[A->rmap->n/bs]; 1221f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 1222854ce69bSBarry Smith ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1223f6d58c54SBarry Smith for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1224f6d58c54SBarry Smith if (ja) { 1225785e854fSJed Brown ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1226f6d58c54SBarry Smith for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1227f6d58c54SBarry Smith } 1228715a17b5SBarry Smith } 12298f7157efSSatish Balay } else { 12308f7157efSSatish Balay *ia = tia; 1231ecc77c7aSBarry Smith if (ja) *ja = tja; 12328f7157efSSatish Balay } 12333a40ed3dSBarry Smith PetscFunctionReturn(0); 12343b2fbd54SBarry Smith } 12353b2fbd54SBarry Smith 12361a83f524SJed Brown static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 12373b2fbd54SBarry Smith { 12386849ba73SBarry Smith PetscErrorCode ierr; 12393b2fbd54SBarry Smith 12403a40ed3dSBarry Smith PetscFunctionBegin; 12413a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 1242715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1243606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 12449985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 12453b2fbd54SBarry Smith } 12463a40ed3dSBarry Smith PetscFunctionReturn(0); 12473b2fbd54SBarry Smith } 12483b2fbd54SBarry Smith 1249dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 12502d61bbb3SSatish Balay { 12512d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1252dfbe8321SBarry Smith PetscErrorCode ierr; 12532d61bbb3SSatish Balay 1254433994e6SBarry Smith PetscFunctionBegin; 1255aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1256c0aa6a63SJacob Faibussowitsch PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz); 12572d61bbb3SSatish Balay #endif 1258e6b907acSBarry Smith ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 12596bf464f9SBarry Smith ierr = ISDestroy(&a->row);CHKERRQ(ierr); 12606bf464f9SBarry Smith ierr = ISDestroy(&a->col);CHKERRQ(ierr); 12614fd072dbSBarry Smith if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 126205b42c5fSBarry Smith ierr = PetscFree(a->idiag);CHKERRQ(ierr); 12634fd072dbSBarry Smith if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 126405b42c5fSBarry Smith ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 126505b42c5fSBarry Smith ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1266f361c04dSBarry Smith ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1267de80f912SBarry Smith ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 12686bf464f9SBarry Smith ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 126905b42c5fSBarry Smith ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1270cd6b891eSBarry Smith ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1271c4319e64SHong Zhang 12726bf464f9SBarry Smith ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 12736bf464f9SBarry Smith ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1274bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 1275901853e0SKris Buschelman 1276f4259b30SLisandro Dalcin ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1277cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1278cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1279bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1280bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1281bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1282bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1283bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1284bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1285bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1286bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1287bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 12887ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 1289c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 12907ea3e4caSstefano_zampini #endif 1291c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 12922d61bbb3SSatish Balay PetscFunctionReturn(0); 12932d61bbb3SSatish Balay } 12942d61bbb3SSatish Balay 1295ace3abfcSBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 12962d61bbb3SSatish Balay { 12972d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 129863ba0a88SBarry Smith PetscErrorCode ierr; 12992d61bbb3SSatish Balay 13002d61bbb3SSatish Balay PetscFunctionBegin; 1301aa275fccSKris Buschelman switch (op) { 1302aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 13034e0d8c25SBarry Smith a->roworiented = flg; 1304aa275fccSKris Buschelman break; 1305a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1306a9817697SBarry Smith a->keepnonzeropattern = flg; 1307aa275fccSKris Buschelman break; 1308512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1309512a5fc5SBarry Smith a->nonew = (flg ? 0 : 1); 1310aa275fccSKris Buschelman break; 1311aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 13124e0d8c25SBarry Smith a->nonew = (flg ? -1 : 0); 1313aa275fccSKris Buschelman break; 1314aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 13154e0d8c25SBarry Smith a->nonew = (flg ? -2 : 0); 1316aa275fccSKris Buschelman break; 131728b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 131828b2fa4aSMatthew Knepley a->nounused = (flg ? -1 : 0); 131928b2fa4aSMatthew Knepley break; 13208c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1321aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1322aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 1323071fcb05SBarry Smith case MAT_SORTED_FULL: 13247d3de750SJacob Faibussowitsch ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1325aa275fccSKris Buschelman break; 13265021d80fSJed Brown case MAT_SPD: 132777e54ba9SKris Buschelman case MAT_SYMMETRIC: 132877e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 13299a4540c5SBarry Smith case MAT_HERMITIAN: 13309a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1331c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1332672ba085SHong Zhang case MAT_STRUCTURE_ONLY: 13335021d80fSJed Brown /* These options are handled directly by MatSetOption() */ 133477e54ba9SKris Buschelman break; 1335aa275fccSKris Buschelman default: 133698921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 13372d61bbb3SSatish Balay } 13382d61bbb3SSatish Balay PetscFunctionReturn(0); 13392d61bbb3SSatish Balay } 13402d61bbb3SSatish Balay 134152768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */ 134252768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 13432d61bbb3SSatish Balay { 13446849ba73SBarry Smith PetscErrorCode ierr; 134552768537SHong Zhang PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 134652768537SHong Zhang MatScalar *aa_i; 134787828ca2SBarry Smith PetscScalar *v_i; 13482d61bbb3SSatish Balay 13492d61bbb3SSatish Balay PetscFunctionBegin; 1350d0f46423SBarry Smith bs = A->rmap->bs; 135152768537SHong Zhang bs2 = bs*bs; 13522c71b3e2SJacob Faibussowitsch PetscCheckFalse(row < 0 || row >= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row); 13532d61bbb3SSatish Balay 13542d61bbb3SSatish Balay bn = row/bs; /* Block number */ 13552d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 13562d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 13572d61bbb3SSatish Balay *nz = bs*M; 13582d61bbb3SSatish Balay 13592d61bbb3SSatish Balay if (v) { 1360f4259b30SLisandro Dalcin *v = NULL; 13612d61bbb3SSatish Balay if (*nz) { 1362854ce69bSBarry Smith ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 13632d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13642d61bbb3SSatish Balay v_i = *v + i*bs; 13652d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 136626fbe8dcSKarl Rupp for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 13672d61bbb3SSatish Balay } 13682d61bbb3SSatish Balay } 13692d61bbb3SSatish Balay } 13702d61bbb3SSatish Balay 13712d61bbb3SSatish Balay if (idx) { 1372f4259b30SLisandro Dalcin *idx = NULL; 13732d61bbb3SSatish Balay if (*nz) { 1374854ce69bSBarry Smith ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 13752d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13762d61bbb3SSatish Balay idx_i = *idx + i*bs; 13772d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 137826fbe8dcSKarl Rupp for (j=0; j<bs; j++) idx_i[j] = itmp++; 13792d61bbb3SSatish Balay } 13802d61bbb3SSatish Balay } 13812d61bbb3SSatish Balay } 13822d61bbb3SSatish Balay PetscFunctionReturn(0); 13832d61bbb3SSatish Balay } 13842d61bbb3SSatish Balay 138552768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 138652768537SHong Zhang { 138752768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138852768537SHong Zhang PetscErrorCode ierr; 138952768537SHong Zhang 139052768537SHong Zhang PetscFunctionBegin; 139152768537SHong Zhang ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 139252768537SHong Zhang PetscFunctionReturn(0); 139352768537SHong Zhang } 139452768537SHong Zhang 1395c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 13962d61bbb3SSatish Balay { 1397dfbe8321SBarry Smith PetscErrorCode ierr; 1398606d414cSSatish Balay 13992d61bbb3SSatish Balay PetscFunctionBegin; 1400cb4a9cd9SHong Zhang if (nz) *nz = 0; 140105b42c5fSBarry Smith if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 140205b42c5fSBarry Smith if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 14032d61bbb3SSatish Balay PetscFunctionReturn(0); 14042d61bbb3SSatish Balay } 14052d61bbb3SSatish Balay 1406fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 14072d61bbb3SSatish Balay { 140820e84f26SHong Zhang Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 14092d61bbb3SSatish Balay Mat C; 14106849ba73SBarry Smith PetscErrorCode ierr; 141120e84f26SHong Zhang PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 141220e84f26SHong Zhang PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 141320e84f26SHong Zhang MatScalar *ata,*aa=a->a; 14142d61bbb3SSatish Balay 14152d61bbb3SSatish Balay PetscFunctionBegin; 141620e84f26SHong Zhang ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1417cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 141820e84f26SHong Zhang for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 14192d61bbb3SSatish Balay 1420ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1421d0f46423SBarry Smith ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 14227adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 142320e84f26SHong Zhang ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 142420e84f26SHong Zhang 142520e84f26SHong Zhang at = (Mat_SeqBAIJ*)C->data; 142620e84f26SHong Zhang ati = at->i; 142720e84f26SHong Zhang for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1428fc4dec0aSBarry Smith } else { 1429fc4dec0aSBarry Smith C = *B; 143020e84f26SHong Zhang at = (Mat_SeqBAIJ*)C->data; 143120e84f26SHong Zhang ati = at->i; 1432fc4dec0aSBarry Smith } 1433fc4dec0aSBarry Smith 143420e84f26SHong Zhang atj = at->j; 143520e84f26SHong Zhang ata = at->a; 143620e84f26SHong Zhang 143720e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 1438580bdb30SBarry Smith ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 143920e84f26SHong Zhang 144020e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 14412d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 144220e84f26SHong Zhang anzj = ai[i+1] - ai[i]; 144320e84f26SHong Zhang for (j=0; j<anzj; j++) { 144420e84f26SHong Zhang atj[atfill[*aj]] = i; 144520e84f26SHong Zhang for (kr=0; kr<bs; kr++) { 144620e84f26SHong Zhang for (k=0; k<bs; k++) { 144720e84f26SHong Zhang ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 14482d61bbb3SSatish Balay } 14492d61bbb3SSatish Balay } 145020e84f26SHong Zhang atfill[*aj++] += 1; 145120e84f26SHong Zhang } 145220e84f26SHong Zhang } 14532d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14542d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14552d61bbb3SSatish Balay 145620e84f26SHong Zhang /* Clean up temporary space and complete requests. */ 145720e84f26SHong Zhang ierr = PetscFree(atfill);CHKERRQ(ierr); 145820e84f26SHong Zhang 1459cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 146020e84f26SHong Zhang ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 14612d61bbb3SSatish Balay *B = C; 14622d61bbb3SSatish Balay } else { 146328be2f97SBarry Smith ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 14642d61bbb3SSatish Balay } 14652d61bbb3SSatish Balay PetscFunctionReturn(0); 14662d61bbb3SSatish Balay } 14672d61bbb3SSatish Balay 1468453d3561SHong Zhang PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1469453d3561SHong Zhang { 1470453d3561SHong Zhang PetscErrorCode ierr; 1471453d3561SHong Zhang Mat Btrans; 1472453d3561SHong Zhang 1473453d3561SHong Zhang PetscFunctionBegin; 1474453d3561SHong Zhang *f = PETSC_FALSE; 1475453d3561SHong Zhang ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1476453d3561SHong Zhang ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1477453d3561SHong Zhang ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1478453d3561SHong Zhang PetscFunctionReturn(0); 1479453d3561SHong Zhang } 1480453d3561SHong Zhang 1481618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1482b51a4376SLisandro Dalcin PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 14832593348eSBarry Smith { 1484b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1485b51a4376SLisandro Dalcin PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1486b51a4376SLisandro Dalcin PetscInt *rowlens,*colidxs; 1487b51a4376SLisandro Dalcin PetscScalar *matvals; 14886849ba73SBarry Smith PetscErrorCode ierr; 14892593348eSBarry Smith 14903a40ed3dSBarry Smith PetscFunctionBegin; 1491b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 14923b2fbd54SBarry Smith 1493b51a4376SLisandro Dalcin M = mat->rmap->N; 1494b51a4376SLisandro Dalcin N = mat->cmap->N; 1495b51a4376SLisandro Dalcin m = mat->rmap->n; 1496b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1497b51a4376SLisandro Dalcin nz = bs*bs*A->nz; 14982593348eSBarry Smith 1499b51a4376SLisandro Dalcin /* write matrix header */ 1500b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID; 1501b51a4376SLisandro Dalcin header[1] = M; header[2] = N; header[3] = nz; 1502b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 15032593348eSBarry Smith 1504b51a4376SLisandro Dalcin /* store row lengths */ 1505b51a4376SLisandro Dalcin ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1506b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1507b51a4376SLisandro Dalcin for (j=0; j<bs; j++) 1508b51a4376SLisandro Dalcin rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1509b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1510b51a4376SLisandro Dalcin ierr = PetscFree(rowlens);CHKERRQ(ierr); 1511b51a4376SLisandro Dalcin 1512b51a4376SLisandro Dalcin /* store column indices */ 1513b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1514b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1515b51a4376SLisandro Dalcin for (k=0; k<bs; k++) 1516b51a4376SLisandro Dalcin for (j=A->i[i]; j<A->i[i+1]; j++) 1517b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1518b51a4376SLisandro Dalcin colidxs[cnt++] = bs*A->j[j] + l; 15192c71b3e2SJacob Faibussowitsch PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1520b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1521b51a4376SLisandro Dalcin ierr = PetscFree(colidxs);CHKERRQ(ierr); 15222593348eSBarry Smith 15232593348eSBarry Smith /* store nonzero values */ 1524b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1525b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1526b51a4376SLisandro Dalcin for (k=0; k<bs; k++) 1527b51a4376SLisandro Dalcin for (j=A->i[i]; j<A->i[i+1]; j++) 1528b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1529b51a4376SLisandro Dalcin matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 15302c71b3e2SJacob Faibussowitsch PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1531b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1532b51a4376SLisandro Dalcin ierr = PetscFree(matvals);CHKERRQ(ierr); 1533ce6f0cecSBarry Smith 1534b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 1535b51a4376SLisandro Dalcin ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 15363a40ed3dSBarry Smith PetscFunctionReturn(0); 15372593348eSBarry Smith } 15382593348eSBarry Smith 15397dc0baabSHong Zhang static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 15407dc0baabSHong Zhang { 15417dc0baabSHong Zhang PetscErrorCode ierr; 15427dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 15437dc0baabSHong Zhang PetscInt i,bs = A->rmap->bs,k; 15447dc0baabSHong Zhang 15457dc0baabSHong Zhang PetscFunctionBegin; 15467dc0baabSHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 15477dc0baabSHong Zhang for (i=0; i<a->mbs; i++) { 1548c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1);CHKERRQ(ierr); 15497dc0baabSHong Zhang for (k=a->i[i]; k<a->i[i+1]; k++) { 1550c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 15517dc0baabSHong Zhang } 15527dc0baabSHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 15537dc0baabSHong Zhang } 15547dc0baabSHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 15557dc0baabSHong Zhang PetscFunctionReturn(0); 15567dc0baabSHong Zhang } 15577dc0baabSHong Zhang 15586849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 15592593348eSBarry Smith { 1560b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1561dfbe8321SBarry Smith PetscErrorCode ierr; 1562d0f46423SBarry Smith PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1563f3ef73ceSBarry Smith PetscViewerFormat format; 15642593348eSBarry Smith 15653a40ed3dSBarry Smith PetscFunctionBegin; 15667dc0baabSHong Zhang if (A->structure_only) { 15677dc0baabSHong Zhang ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 15687dc0baabSHong Zhang PetscFunctionReturn(0); 15697dc0baabSHong Zhang } 15707dc0baabSHong Zhang 1571b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1572456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1573c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 1574fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1575ade3a672SBarry Smith const char *matname; 1576bcd9e38bSBarry Smith Mat aij; 1577ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1578ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1579ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1580bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 15816bf464f9SBarry Smith ierr = MatDestroy(&aij);CHKERRQ(ierr); 158204929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 158304929863SHong Zhang PetscFunctionReturn(0); 1584fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1585d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 158644cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 158744cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 1588c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);CHKERRQ(ierr); 158944cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 159044cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 1591aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 15920e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1593c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l, 159457622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15950e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1596c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l, 159757622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15980e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1599c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 16000ef38995SBarry Smith } 160144cd7ae7SLois Curfman McInnes #else 16020ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 1603c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 16040ef38995SBarry Smith } 160544cd7ae7SLois Curfman McInnes #endif 160644cd7ae7SLois Curfman McInnes } 160744cd7ae7SLois Curfman McInnes } 1608b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 160944cd7ae7SLois Curfman McInnes } 161044cd7ae7SLois Curfman McInnes } 1611d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 16120ef38995SBarry Smith } else { 1613d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1614b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1615b6490206SBarry Smith for (j=0; j<bs; j++) { 1616c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);CHKERRQ(ierr); 1617b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1618b6490206SBarry Smith for (l=0; l<bs; l++) { 1619aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 16200e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1621c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l, 162257622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 16230e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1624c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l, 162557622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 16260ef38995SBarry Smith } else { 1627c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 162888685aaeSLois Curfman McInnes } 162988685aaeSLois Curfman McInnes #else 1630c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 163188685aaeSLois Curfman McInnes #endif 16322593348eSBarry Smith } 16332593348eSBarry Smith } 1634b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 16352593348eSBarry Smith } 16362593348eSBarry Smith } 1637d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1638b6490206SBarry Smith } 1639b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 16403a40ed3dSBarry Smith PetscFunctionReturn(0); 16412593348eSBarry Smith } 16422593348eSBarry Smith 16439804daf3SBarry Smith #include <petscdraw.h> 16446849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 16453270192aSSatish Balay { 164677ed5343SBarry Smith Mat A = (Mat) Aa; 16473270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 16486849ba73SBarry Smith PetscErrorCode ierr; 1649d0f46423SBarry Smith PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 16500e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 16513f1db9ecSBarry Smith MatScalar *aa; 1652b0a32e0cSBarry Smith PetscViewer viewer; 1653b3e7f47fSJed Brown PetscViewerFormat format; 16543270192aSSatish Balay 16553a40ed3dSBarry Smith PetscFunctionBegin; 165677ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1657b3e7f47fSJed Brown ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1658b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 165977ed5343SBarry Smith 16603270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1661b3e7f47fSJed Brown 1662b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1663383922c3SLisandro Dalcin ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1664383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */ 1665b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 16663270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16673270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1668d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16693270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16703270192aSSatish Balay aa = a->a + j*bs2; 16713270192aSSatish Balay for (k=0; k<bs; k++) { 16723270192aSSatish Balay for (l=0; l<bs; l++) { 16730e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 1674b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16753270192aSSatish Balay } 16763270192aSSatish Balay } 16773270192aSSatish Balay } 16783270192aSSatish Balay } 1679b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 16803270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16813270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1682d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16833270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16843270192aSSatish Balay aa = a->a + j*bs2; 16853270192aSSatish Balay for (k=0; k<bs; k++) { 16863270192aSSatish Balay for (l=0; l<bs; l++) { 16870e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 1688b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16893270192aSSatish Balay } 16903270192aSSatish Balay } 16913270192aSSatish Balay } 16923270192aSSatish Balay } 1693b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 16943270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16953270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1696d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16973270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16983270192aSSatish Balay aa = a->a + j*bs2; 16993270192aSSatish Balay for (k=0; k<bs; k++) { 17003270192aSSatish Balay for (l=0; l<bs; l++) { 17010e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 1702b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 17033270192aSSatish Balay } 17043270192aSSatish Balay } 17053270192aSSatish Balay } 17063270192aSSatish Balay } 1707383922c3SLisandro Dalcin ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1708b3e7f47fSJed Brown } else { 1709b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 1710b3e7f47fSJed Brown /* first determine max of all nonzero values */ 1711b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0; 1712b3e7f47fSJed Brown PetscDraw popup; 1713b3e7f47fSJed Brown 1714b3e7f47fSJed Brown for (i=0; i<a->nz*a->bs2; i++) { 1715b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1716b3e7f47fSJed Brown } 1717383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL; 1718b3e7f47fSJed Brown ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 171945f3bb6eSLisandro Dalcin ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1720383922c3SLisandro Dalcin 1721383922c3SLisandro Dalcin ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1722b3e7f47fSJed Brown for (i=0,row=0; i<mbs; i++,row+=bs) { 1723b3e7f47fSJed Brown for (j=a->i[i]; j<a->i[i+1]; j++) { 1724b3e7f47fSJed Brown y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1725b3e7f47fSJed Brown x_l = a->j[j]*bs; x_r = x_l + 1.0; 1726b3e7f47fSJed Brown aa = a->a + j*bs2; 1727b3e7f47fSJed Brown for (k=0; k<bs; k++) { 1728b3e7f47fSJed Brown for (l=0; l<bs; l++) { 1729383922c3SLisandro Dalcin MatScalar v = *aa++; 1730383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1731b3e7f47fSJed Brown ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1732b3e7f47fSJed Brown } 1733b3e7f47fSJed Brown } 1734b3e7f47fSJed Brown } 1735b3e7f47fSJed Brown } 1736383922c3SLisandro Dalcin ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1737b3e7f47fSJed Brown } 173877ed5343SBarry Smith PetscFunctionReturn(0); 173977ed5343SBarry Smith } 17403270192aSSatish Balay 17416849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 174277ed5343SBarry Smith { 1743dfbe8321SBarry Smith PetscErrorCode ierr; 17440e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 1745b0a32e0cSBarry Smith PetscDraw draw; 1746ace3abfcSBarry Smith PetscBool isnull; 17473270192aSSatish Balay 174877ed5343SBarry Smith PetscFunctionBegin; 1749b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 175045f3bb6eSLisandro Dalcin ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 175145f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 175277ed5343SBarry Smith 1753d0f46423SBarry Smith xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 175477ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 1755b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1756832b7cebSLisandro Dalcin ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1757b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 17580298fd71SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1759832b7cebSLisandro Dalcin ierr = PetscDrawSave(draw);CHKERRQ(ierr); 17603a40ed3dSBarry Smith PetscFunctionReturn(0); 17613270192aSSatish Balay } 17623270192aSSatish Balay 1763dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 17642593348eSBarry Smith { 1765dfbe8321SBarry Smith PetscErrorCode ierr; 1766ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw; 17672593348eSBarry Smith 17683a40ed3dSBarry Smith PetscFunctionBegin; 1769251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1770251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1771251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 177232077d6dSBarry Smith if (iascii) { 17733a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 17740f5bd95cSBarry Smith } else if (isbinary) { 17753a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 17760f5bd95cSBarry Smith } else if (isdraw) { 17773a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 17785cd90555SBarry Smith } else { 1779a5e6ed63SBarry Smith Mat B; 1780ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1781a5e6ed63SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 17826bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 17832593348eSBarry Smith } 17843a40ed3dSBarry Smith PetscFunctionReturn(0); 17852593348eSBarry Smith } 1786b6490206SBarry Smith 1787c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1788cd0e1443SSatish Balay { 1789cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1790c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1791c1ac3661SBarry Smith PetscInt *ai = a->i,*ailen = a->ilen; 1792d0f46423SBarry Smith PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 179397e567efSBarry Smith MatScalar *ap,*aa = a->a; 1794cd0e1443SSatish Balay 17953a40ed3dSBarry Smith PetscFunctionBegin; 17962d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 1797cd0e1443SSatish Balay row = im[k]; brow = row/bs; 179854c59aa7SJacob Faibussowitsch if (row < 0) {v += n; continue;} /* negative row */ 179954c59aa7SJacob Faibussowitsch PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row); 18002d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 18012c3acbe9SBarry Smith nrow = ailen[brow]; 18022d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 180354c59aa7SJacob Faibussowitsch if (in[l] < 0) {v++; continue;} /* negative column */ 180454c59aa7SJacob Faibussowitsch PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]); 18052d61bbb3SSatish Balay col = in[l]; 18062d61bbb3SSatish Balay bcol = col/bs; 18072d61bbb3SSatish Balay cidx = col%bs; 18082d61bbb3SSatish Balay ridx = row%bs; 18092d61bbb3SSatish Balay high = nrow; 18102d61bbb3SSatish Balay low = 0; /* assume unsorted */ 18112d61bbb3SSatish Balay while (high-low > 5) { 1812cd0e1443SSatish Balay t = (low+high)/2; 1813cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 1814cd0e1443SSatish Balay else low = t; 1815cd0e1443SSatish Balay } 1816cd0e1443SSatish Balay for (i=low; i<high; i++) { 1817cd0e1443SSatish Balay if (rp[i] > bcol) break; 1818cd0e1443SSatish Balay if (rp[i] == bcol) { 18192d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 18202d61bbb3SSatish Balay goto finished; 1821cd0e1443SSatish Balay } 1822cd0e1443SSatish Balay } 182397e567efSBarry Smith *v++ = 0.0; 18242d61bbb3SSatish Balay finished:; 1825cd0e1443SSatish Balay } 1826cd0e1443SSatish Balay } 18273a40ed3dSBarry Smith PetscFunctionReturn(0); 1828cd0e1443SSatish Balay } 1829cd0e1443SSatish Balay 1830dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 183192c4ed94SBarry Smith { 183292c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1833e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1834c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 18356849ba73SBarry Smith PetscErrorCode ierr; 1836d0f46423SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1837ace3abfcSBarry Smith PetscBool roworiented=a->roworiented; 1838dd6ea824SBarry Smith const PetscScalar *value = v; 18399d243f67SHong Zhang MatScalar *ap=NULL,*aa = a->a,*bap; 184092c4ed94SBarry Smith 18413a40ed3dSBarry Smith PetscFunctionBegin; 18420e324ae4SSatish Balay if (roworiented) { 18430e324ae4SSatish Balay stepval = (n-1)*bs; 18440e324ae4SSatish Balay } else { 18450e324ae4SSatish Balay stepval = (m-1)*bs; 18460e324ae4SSatish Balay } 184792c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 184892c4ed94SBarry Smith row = im[k]; 18495ef9f2a5SBarry Smith if (row < 0) continue; 18506bdcaf15SBarry Smith PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1); 185192c4ed94SBarry Smith rp = aj + ai[row]; 18527dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2*ai[row]; 185392c4ed94SBarry Smith rmax = imax[row]; 185492c4ed94SBarry Smith nrow = ailen[row]; 185592c4ed94SBarry Smith low = 0; 1856c71e6ed7SBarry Smith high = nrow; 185792c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 18585ef9f2a5SBarry Smith if (in[l] < 0) continue; 18596bdcaf15SBarry Smith PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1); 186092c4ed94SBarry Smith col = in[l]; 18617dc0baabSHong Zhang if (!A->structure_only) { 186292c4ed94SBarry Smith if (roworiented) { 186353ef36baSBarry Smith value = v + (k*(stepval+bs) + l)*bs; 18640e324ae4SSatish Balay } else { 186553ef36baSBarry Smith value = v + (l*(stepval+bs) + k)*bs; 186692c4ed94SBarry Smith } 18677dc0baabSHong Zhang } 186826fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 186926fbe8dcSKarl Rupp else high = nrow; 1870e2ee6c50SBarry Smith lastcol = col; 187192c4ed94SBarry Smith while (high-low > 7) { 187292c4ed94SBarry Smith t = (low+high)/2; 187392c4ed94SBarry Smith if (rp[t] > col) high = t; 187492c4ed94SBarry Smith else low = t; 187592c4ed94SBarry Smith } 187692c4ed94SBarry Smith for (i=low; i<high; i++) { 187792c4ed94SBarry Smith if (rp[i] > col) break; 187892c4ed94SBarry Smith if (rp[i] == col) { 18797dc0baabSHong Zhang if (A->structure_only) goto noinsert2; 18808a84c255SSatish Balay bap = ap + bs2*i; 18810e324ae4SSatish Balay if (roworiented) { 18828a84c255SSatish Balay if (is == ADD_VALUES) { 1883dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1884dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18858a84c255SSatish Balay bap[jj] += *value++; 1886dd9472c6SBarry Smith } 1887dd9472c6SBarry Smith } 18880e324ae4SSatish Balay } else { 1889dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1890dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18910e324ae4SSatish Balay bap[jj] = *value++; 18928a84c255SSatish Balay } 1893dd9472c6SBarry Smith } 1894dd9472c6SBarry Smith } 18950e324ae4SSatish Balay } else { 18960e324ae4SSatish Balay if (is == ADD_VALUES) { 189753ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1898dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 189953ef36baSBarry Smith bap[jj] += value[jj]; 1900dd9472c6SBarry Smith } 190153ef36baSBarry Smith bap += bs; 1902dd9472c6SBarry Smith } 19030e324ae4SSatish Balay } else { 190453ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1905dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 190653ef36baSBarry Smith bap[jj] = value[jj]; 19070e324ae4SSatish Balay } 190853ef36baSBarry Smith bap += bs; 19098a84c255SSatish Balay } 1910dd9472c6SBarry Smith } 1911dd9472c6SBarry Smith } 1912f1241b54SBarry Smith goto noinsert2; 191392c4ed94SBarry Smith } 191492c4ed94SBarry Smith } 191589280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 19162c71b3e2SJacob Faibussowitsch PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 19177dc0baabSHong Zhang if (A->structure_only) { 19187dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 19197dc0baabSHong Zhang } else { 1920fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 19217dc0baabSHong Zhang } 1922c03d1d03SSatish Balay N = nrow++ - 1; high++; 192392c4ed94SBarry Smith /* shift up all the later entries in this row */ 1924580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 192592c4ed94SBarry Smith rp[i] = col; 19267dc0baabSHong Zhang if (!A->structure_only) { 1927580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 19288a84c255SSatish Balay bap = ap + bs2*i; 19290e324ae4SSatish Balay if (roworiented) { 1930dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1931dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 19320e324ae4SSatish Balay bap[jj] = *value++; 1933dd9472c6SBarry Smith } 1934dd9472c6SBarry Smith } 19350e324ae4SSatish Balay } else { 1936dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1937dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 19380e324ae4SSatish Balay *bap++ = *value++; 19390e324ae4SSatish Balay } 1940dd9472c6SBarry Smith } 1941dd9472c6SBarry Smith } 19427dc0baabSHong Zhang } 1943f1241b54SBarry Smith noinsert2:; 194492c4ed94SBarry Smith low = i; 194592c4ed94SBarry Smith } 194692c4ed94SBarry Smith ailen[row] = nrow; 194792c4ed94SBarry Smith } 19483a40ed3dSBarry Smith PetscFunctionReturn(0); 194992c4ed94SBarry Smith } 195026e093fcSHong Zhang 1951dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1952584200bdSSatish Balay { 1953584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1954580bdb30SBarry Smith PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1955d0f46423SBarry Smith PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 19566849ba73SBarry Smith PetscErrorCode ierr; 1957c1ac3661SBarry Smith PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 19583f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 19593447b6efSHong Zhang PetscReal ratio=0.6; 1960584200bdSSatish Balay 19613a40ed3dSBarry Smith PetscFunctionBegin; 19623a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1963584200bdSSatish Balay 196443ee02c3SBarry Smith if (m) rmax = ailen[0]; 1965584200bdSSatish Balay for (i=1; i<mbs; i++) { 1966584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1967584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1968d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1969584200bdSSatish Balay if (fshift) { 1970580bdb30SBarry Smith ip = aj + ai[i]; 1971580bdb30SBarry Smith ap = aa + bs2*ai[i]; 1972584200bdSSatish Balay N = ailen[i]; 1973580bdb30SBarry Smith ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1974672ba085SHong Zhang if (!A->structure_only) { 1975580bdb30SBarry Smith ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1976584200bdSSatish Balay } 1977672ba085SHong Zhang } 1978584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1979584200bdSSatish Balay } 1980584200bdSSatish Balay if (mbs) { 1981584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1982584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1983584200bdSSatish Balay } 19847c565772SBarry Smith 1985584200bdSSatish Balay /* reset ilen and imax for each row */ 19867c565772SBarry Smith a->nonzerorowcnt = 0; 1987672ba085SHong Zhang if (A->structure_only) { 1988672ba085SHong Zhang ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1989672ba085SHong Zhang } else { /* !A->structure_only */ 1990584200bdSSatish Balay for (i=0; i<mbs; i++) { 1991584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 19927c565772SBarry Smith a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1993584200bdSSatish Balay } 1994672ba085SHong Zhang } 1995a7c10996SSatish Balay a->nz = ai[mbs]; 1996584200bdSSatish Balay 1997584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1998b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 1999584200bdSSatish Balay if (fshift && a->diag) { 2000606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 20013bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2002f4259b30SLisandro Dalcin a->diag = NULL; 2003584200bdSSatish Balay } 20042c71b3e2SJacob Faibussowitsch PetscCheckFalse(fshift && a->nounused == -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 20057d3de750SJacob Faibussowitsch ierr = PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 20067d3de750SJacob Faibussowitsch ierr = PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs);CHKERRQ(ierr); 20077d3de750SJacob Faibussowitsch ierr = PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax);CHKERRQ(ierr); 200826fbe8dcSKarl Rupp 20098e58a170SBarry Smith A->info.mallocs += a->reallocs; 2010e2f3b5e9SSatish Balay a->reallocs = 0; 20110e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 2012647a6520SHong Zhang a->rmax = rmax; 2013cf4441caSHong Zhang 2014672ba085SHong Zhang if (!A->structure_only) { 201511e456e1SBarry Smith ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2016672ba085SHong Zhang } 20173a40ed3dSBarry Smith PetscFunctionReturn(0); 2018584200bdSSatish Balay } 2019584200bdSSatish Balay 2020bea157c4SSatish Balay /* 2021bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 2022bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2023a5b23f4aSJose E. Roman then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2024bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 2025bea157c4SSatish Balay */ 2026c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2027d9b7c43dSSatish Balay { 2028c1ac3661SBarry Smith PetscInt i,j,k,row; 2029ace3abfcSBarry Smith PetscBool flg; 20303a40ed3dSBarry Smith 2031433994e6SBarry Smith PetscFunctionBegin; 2032bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 2033bea157c4SSatish Balay row = idx[i]; 2034a5b23f4aSJose E. Roman if (row%bs!=0) { /* Not the beginning of a block */ 2035bea157c4SSatish Balay sizes[j] = 1; 2036bea157c4SSatish Balay i++; 2037e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2038bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2039bea157c4SSatish Balay i++; 2040bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 2041bea157c4SSatish Balay flg = PETSC_TRUE; 2042bea157c4SSatish Balay for (k=1; k<bs; k++) { 2043bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 2044bea157c4SSatish Balay flg = PETSC_FALSE; 2045bea157c4SSatish Balay break; 2046d9b7c43dSSatish Balay } 2047bea157c4SSatish Balay } 2048abc0a331SBarry Smith if (flg) { /* No break in the bs */ 2049bea157c4SSatish Balay sizes[j] = bs; 2050bea157c4SSatish Balay i += bs; 2051bea157c4SSatish Balay } else { 2052bea157c4SSatish Balay sizes[j] = 1; 2053bea157c4SSatish Balay i++; 2054bea157c4SSatish Balay } 2055bea157c4SSatish Balay } 2056bea157c4SSatish Balay } 2057bea157c4SSatish Balay *bs_max = j; 20583a40ed3dSBarry Smith PetscFunctionReturn(0); 2059d9b7c43dSSatish Balay } 2060d9b7c43dSSatish Balay 20612b40b63fSBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2062d9b7c43dSSatish Balay { 2063d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2064dfbe8321SBarry Smith PetscErrorCode ierr; 2065f4df32b1SMatthew Knepley PetscInt i,j,k,count,*rows; 2066d0f46423SBarry Smith PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 206787828ca2SBarry Smith PetscScalar zero = 0.0; 20683f1db9ecSBarry Smith MatScalar *aa; 206997b48c8fSBarry Smith const PetscScalar *xx; 207097b48c8fSBarry Smith PetscScalar *bb; 2071d9b7c43dSSatish Balay 20723a40ed3dSBarry Smith PetscFunctionBegin; 207397b48c8fSBarry Smith /* fix right hand side if needed */ 207497b48c8fSBarry Smith if (x && b) { 207597b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 207697b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 207797b48c8fSBarry Smith for (i=0; i<is_n; i++) { 207897b48c8fSBarry Smith bb[is_idx[i]] = diag*xx[is_idx[i]]; 207997b48c8fSBarry Smith } 208097b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 208197b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 208297b48c8fSBarry Smith } 208397b48c8fSBarry Smith 2084d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2085bea157c4SSatish Balay /* allocate memory for rows,sizes */ 2086dcca6d9dSJed Brown ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2087bea157c4SSatish Balay 2088563b5814SBarry Smith /* copy IS values to rows, and sort them */ 208926fbe8dcSKarl Rupp for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2090bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 209197b48c8fSBarry Smith 2092a9817697SBarry Smith if (baij->keepnonzeropattern) { 209326fbe8dcSKarl Rupp for (i=0; i<is_n; i++) sizes[i] = 1; 2094dffd3267SBarry Smith bs_max = is_n; 2095dffd3267SBarry Smith } else { 2096bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2097e56f5c9eSBarry Smith A->nonzerostate++; 2098dffd3267SBarry Smith } 2099bea157c4SSatish Balay 2100bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2101bea157c4SSatish Balay row = rows[j]; 21022c71b3e2SJacob Faibussowitsch PetscCheckFalse(row < 0 || row > A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row); 2103bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2104b31fbe3bSSatish Balay aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2105a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2106d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2107bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 2108bea157c4SSatish Balay baij->ilen[row/bs] = 1; 2109bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 211026fbe8dcSKarl Rupp 2111580bdb30SBarry Smith ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2112a07cd24cSSatish Balay } 2113563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 2114bea157c4SSatish Balay for (k=0; k<bs; k++) { 2115f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2116bea157c4SSatish Balay } 2117f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2118bea157c4SSatish Balay baij->ilen[row/bs] = 0; 2119f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2120bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 21216bdcaf15SBarry Smith PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2122bea157c4SSatish Balay for (k=0; k<count; k++) { 2123d9b7c43dSSatish Balay aa[0] = zero; 2124d9b7c43dSSatish Balay aa += bs; 2125d9b7c43dSSatish Balay } 2126d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2127f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2128d9b7c43dSSatish Balay } 2129d9b7c43dSSatish Balay } 2130bea157c4SSatish Balay } 2131bea157c4SSatish Balay 2132fca92195SBarry Smith ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 21339a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 21343a40ed3dSBarry Smith PetscFunctionReturn(0); 2135d9b7c43dSSatish Balay } 21361c351548SSatish Balay 213797b48c8fSBarry Smith PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 213897b48c8fSBarry Smith { 213997b48c8fSBarry Smith Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 214097b48c8fSBarry Smith PetscErrorCode ierr; 214197b48c8fSBarry Smith PetscInt i,j,k,count; 214297b48c8fSBarry Smith PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 214397b48c8fSBarry Smith PetscScalar zero = 0.0; 214497b48c8fSBarry Smith MatScalar *aa; 214597b48c8fSBarry Smith const PetscScalar *xx; 214697b48c8fSBarry Smith PetscScalar *bb; 214756777dd2SBarry Smith PetscBool *zeroed,vecs = PETSC_FALSE; 214897b48c8fSBarry Smith 214997b48c8fSBarry Smith PetscFunctionBegin; 215097b48c8fSBarry Smith /* fix right hand side if needed */ 215197b48c8fSBarry Smith if (x && b) { 215297b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 215397b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 215456777dd2SBarry Smith vecs = PETSC_TRUE; 215597b48c8fSBarry Smith } 215697b48c8fSBarry Smith 215797b48c8fSBarry Smith /* zero the columns */ 21581795a4d1SJed Brown ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 215997b48c8fSBarry Smith for (i=0; i<is_n; i++) { 21602c71b3e2SJacob Faibussowitsch PetscCheckFalse(is_idx[i] < 0 || is_idx[i] >= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]); 216197b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 216297b48c8fSBarry Smith } 216397b48c8fSBarry Smith for (i=0; i<A->rmap->N; i++) { 216497b48c8fSBarry Smith if (!zeroed[i]) { 216597b48c8fSBarry Smith row = i/bs; 216697b48c8fSBarry Smith for (j=baij->i[row]; j<baij->i[row+1]; j++) { 216797b48c8fSBarry Smith for (k=0; k<bs; k++) { 216897b48c8fSBarry Smith col = bs*baij->j[j] + k; 216997b48c8fSBarry Smith if (zeroed[col]) { 217097b48c8fSBarry Smith aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 217156777dd2SBarry Smith if (vecs) bb[i] -= aa[0]*xx[col]; 217297b48c8fSBarry Smith aa[0] = 0.0; 217397b48c8fSBarry Smith } 217497b48c8fSBarry Smith } 217597b48c8fSBarry Smith } 217656777dd2SBarry Smith } else if (vecs) bb[i] = diag*xx[i]; 217797b48c8fSBarry Smith } 217897b48c8fSBarry Smith ierr = PetscFree(zeroed);CHKERRQ(ierr); 217956777dd2SBarry Smith if (vecs) { 218056777dd2SBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 218156777dd2SBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 218256777dd2SBarry Smith } 218397b48c8fSBarry Smith 218497b48c8fSBarry Smith /* zero the rows */ 218597b48c8fSBarry Smith for (i=0; i<is_n; i++) { 218697b48c8fSBarry Smith row = is_idx[i]; 218797b48c8fSBarry Smith count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 218897b48c8fSBarry Smith aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 218997b48c8fSBarry Smith for (k=0; k<count; k++) { 219097b48c8fSBarry Smith aa[0] = zero; 219197b48c8fSBarry Smith aa += bs; 219297b48c8fSBarry Smith } 2193d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 219497b48c8fSBarry Smith ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 219597b48c8fSBarry Smith } 219697b48c8fSBarry Smith } 219797b48c8fSBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 219897b48c8fSBarry Smith PetscFunctionReturn(0); 219997b48c8fSBarry Smith } 220097b48c8fSBarry Smith 2201c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 22022d61bbb3SSatish Balay { 22032d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2204e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2205c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2206d0f46423SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 22076849ba73SBarry Smith PetscErrorCode ierr; 2208c1ac3661SBarry Smith PetscInt ridx,cidx,bs2=a->bs2; 2209ace3abfcSBarry Smith PetscBool roworiented=a->roworiented; 2210d8cdefa3SHong Zhang MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 22112d61bbb3SSatish Balay 22122d61bbb3SSatish Balay PetscFunctionBegin; 22132d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 2214085a36d4SBarry Smith row = im[k]; 2215085a36d4SBarry Smith brow = row/bs; 22165ef9f2a5SBarry Smith if (row < 0) continue; 22176bdcaf15SBarry Smith PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1); 22182d61bbb3SSatish Balay rp = aj + ai[brow]; 2219672ba085SHong Zhang if (!A->structure_only) ap = aa + bs2*ai[brow]; 22202d61bbb3SSatish Balay rmax = imax[brow]; 22212d61bbb3SSatish Balay nrow = ailen[brow]; 22222d61bbb3SSatish Balay low = 0; 2223c71e6ed7SBarry Smith high = nrow; 22242d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 22255ef9f2a5SBarry Smith if (in[l] < 0) continue; 22266bdcaf15SBarry Smith PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1); 22272d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 22282d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 2229672ba085SHong Zhang if (!A->structure_only) { 22302d61bbb3SSatish Balay if (roworiented) { 22315ef9f2a5SBarry Smith value = v[l + k*n]; 22322d61bbb3SSatish Balay } else { 22332d61bbb3SSatish Balay value = v[k + l*m]; 22342d61bbb3SSatish Balay } 2235672ba085SHong Zhang } 22367cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 2237e2ee6c50SBarry Smith lastcol = col; 22382d61bbb3SSatish Balay while (high-low > 7) { 22392d61bbb3SSatish Balay t = (low+high)/2; 22402d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 22412d61bbb3SSatish Balay else low = t; 22422d61bbb3SSatish Balay } 22432d61bbb3SSatish Balay for (i=low; i<high; i++) { 22442d61bbb3SSatish Balay if (rp[i] > bcol) break; 22452d61bbb3SSatish Balay if (rp[i] == bcol) { 22462d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 2247672ba085SHong Zhang if (!A->structure_only) { 22482d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 22492d61bbb3SSatish Balay else *bap = value; 2250672ba085SHong Zhang } 22512d61bbb3SSatish Balay goto noinsert1; 22522d61bbb3SSatish Balay } 22532d61bbb3SSatish Balay } 22542d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 22552c71b3e2SJacob Faibussowitsch PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2256672ba085SHong Zhang if (A->structure_only) { 2257672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2258672ba085SHong Zhang } else { 2259fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2260672ba085SHong Zhang } 2261c03d1d03SSatish Balay N = nrow++ - 1; high++; 22622d61bbb3SSatish Balay /* shift up all the later entries in this row */ 2263580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 22642d61bbb3SSatish Balay rp[i] = bcol; 2265580bdb30SBarry Smith if (!A->structure_only) { 2266580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2267580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2268580bdb30SBarry Smith ap[bs2*i + bs*cidx + ridx] = value; 2269580bdb30SBarry Smith } 2270085a36d4SBarry Smith a->nz++; 2271e56f5c9eSBarry Smith A->nonzerostate++; 22722d61bbb3SSatish Balay noinsert1:; 22732d61bbb3SSatish Balay low = i; 22742d61bbb3SSatish Balay } 22752d61bbb3SSatish Balay ailen[brow] = nrow; 22762d61bbb3SSatish Balay } 22772d61bbb3SSatish Balay PetscFunctionReturn(0); 22782d61bbb3SSatish Balay } 22792d61bbb3SSatish Balay 22800481f469SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 22812d61bbb3SSatish Balay { 22822d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 22832d61bbb3SSatish Balay Mat outA; 2284dfbe8321SBarry Smith PetscErrorCode ierr; 2285ace3abfcSBarry Smith PetscBool row_identity,col_identity; 22862d61bbb3SSatish Balay 22872d61bbb3SSatish Balay PetscFunctionBegin; 22882c71b3e2SJacob Faibussowitsch PetscCheckFalse(info->levels != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2289667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2290667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 22912c71b3e2SJacob Faibussowitsch PetscCheckFalse(!row_identity || !col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 22922d61bbb3SSatish Balay 22932d61bbb3SSatish Balay outA = inA; 2294d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 2295f6224b95SHong Zhang ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2296f6224b95SHong Zhang ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 22972d61bbb3SSatish Balay 2298c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2299cf242676SKris Buschelman 2300c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 23016bf464f9SBarry Smith ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2302c3122656SLisandro Dalcin a->row = row; 2303c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 23046bf464f9SBarry Smith ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2305c3122656SLisandro Dalcin a->col = col; 2306c38d4ed2SBarry Smith 2307c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 23086bf464f9SBarry Smith ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 23094c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 23103bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2311c38d4ed2SBarry Smith 2312ace3abfcSBarry Smith ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2313c38d4ed2SBarry Smith if (!a->solve_work) { 2314854ce69bSBarry Smith ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 23153bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2316c38d4ed2SBarry Smith } 2317719d5645SBarry Smith ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 23182d61bbb3SSatish Balay PetscFunctionReturn(0); 23192d61bbb3SSatish Balay } 2320d9b7c43dSSatish Balay 23217087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 232227a8da17SBarry Smith { 232327a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2324bdb1c0e1SJed Brown PetscInt i,nz,mbs; 232527a8da17SBarry Smith 232627a8da17SBarry Smith PetscFunctionBegin; 2327b32cb4a7SJed Brown nz = baij->maxnz; 2328bdb1c0e1SJed Brown mbs = baij->mbs; 232927a8da17SBarry Smith for (i=0; i<nz; i++) { 233027a8da17SBarry Smith baij->j[i] = indices[i]; 233127a8da17SBarry Smith } 233227a8da17SBarry Smith baij->nz = nz; 2333bdb1c0e1SJed Brown for (i=0; i<mbs; i++) { 233427a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 233527a8da17SBarry Smith } 233627a8da17SBarry Smith PetscFunctionReturn(0); 233727a8da17SBarry Smith } 233827a8da17SBarry Smith 233927a8da17SBarry Smith /*@ 234027a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 234127a8da17SBarry Smith in the matrix. 234227a8da17SBarry Smith 234327a8da17SBarry Smith Input Parameters: 234427a8da17SBarry Smith + mat - the SeqBAIJ matrix 234527a8da17SBarry Smith - indices - the column indices 234627a8da17SBarry Smith 234715091d37SBarry Smith Level: advanced 234815091d37SBarry Smith 234927a8da17SBarry Smith Notes: 235027a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 235127a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 235227a8da17SBarry Smith of the MatSetValues() operation. 235327a8da17SBarry Smith 235427a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 2355d1be2dadSMatthew Knepley MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 235627a8da17SBarry Smith 235727a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 235827a8da17SBarry Smith 235927a8da17SBarry Smith @*/ 23607087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 236127a8da17SBarry Smith { 23624ac538c5SBarry Smith PetscErrorCode ierr; 236327a8da17SBarry Smith 236427a8da17SBarry Smith PetscFunctionBegin; 23650700a824SBarry Smith PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 23664482741eSBarry Smith PetscValidPointer(indices,2); 23674ac538c5SBarry Smith ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 236827a8da17SBarry Smith PetscFunctionReturn(0); 236927a8da17SBarry Smith } 237027a8da17SBarry Smith 2371985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2372273d9f13SBarry Smith { 2373273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2374dfbe8321SBarry Smith PetscErrorCode ierr; 2375c1ac3661SBarry Smith PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2376273d9f13SBarry Smith PetscReal atmp; 237787828ca2SBarry Smith PetscScalar *x,zero = 0.0; 2378273d9f13SBarry Smith MatScalar *aa; 2379c1ac3661SBarry Smith PetscInt ncols,brow,krow,kcol; 2380273d9f13SBarry Smith 2381273d9f13SBarry Smith PetscFunctionBegin; 23822c71b3e2SJacob Faibussowitsch PetscCheckFalse(A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2383d0f46423SBarry Smith bs = A->rmap->bs; 2384273d9f13SBarry Smith aa = a->a; 2385273d9f13SBarry Smith ai = a->i; 2386273d9f13SBarry Smith aj = a->j; 2387273d9f13SBarry Smith mbs = a->mbs; 2388273d9f13SBarry Smith 23892dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 23901ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2391273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 23922c71b3e2SJacob Faibussowitsch PetscCheckFalse(n != A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2393273d9f13SBarry Smith for (i=0; i<mbs; i++) { 2394273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 2395273d9f13SBarry Smith brow = bs*i; 2396273d9f13SBarry Smith for (j=0; j<ncols; j++) { 2397273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++) { 2398273d9f13SBarry Smith for (krow=0; krow<bs; krow++) { 2399273d9f13SBarry Smith atmp = PetscAbsScalar(*aa);aa++; 2400273d9f13SBarry Smith row = brow + krow; /* row index */ 2401985db425SBarry Smith if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2402273d9f13SBarry Smith } 2403273d9f13SBarry Smith } 2404273d9f13SBarry Smith aj++; 2405273d9f13SBarry Smith } 2406273d9f13SBarry Smith } 24071ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2408273d9f13SBarry Smith PetscFunctionReturn(0); 2409273d9f13SBarry Smith } 2410273d9f13SBarry Smith 24113c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 24123c896bc6SHong Zhang { 24133c896bc6SHong Zhang PetscErrorCode ierr; 24143c896bc6SHong Zhang 24153c896bc6SHong Zhang PetscFunctionBegin; 24163c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 24173c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 24183c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24193c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2420d88c0aacSHong Zhang PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 24213c896bc6SHong Zhang 24222c71b3e2SJacob Faibussowitsch PetscCheckFalse(a->i[ambs] != b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]); 24232c71b3e2SJacob Faibussowitsch PetscCheckFalse(abs != bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs); 2424580bdb30SBarry Smith ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2425cdc753b6SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 24263c896bc6SHong Zhang } else { 24273c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 24283c896bc6SHong Zhang } 24293c896bc6SHong Zhang PetscFunctionReturn(0); 24303c896bc6SHong Zhang } 24313c896bc6SHong Zhang 24324994cf47SJed Brown PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2433273d9f13SBarry Smith { 2434dfbe8321SBarry Smith PetscErrorCode ierr; 2435273d9f13SBarry Smith 2436273d9f13SBarry Smith PetscFunctionBegin; 2437f4259b30SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2438273d9f13SBarry Smith PetscFunctionReturn(0); 2439273d9f13SBarry Smith } 2440273d9f13SBarry Smith 2441cda14afcSprj- static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2442f2a5309cSSatish Balay { 2443f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24446e111a19SKarl Rupp 2445f2a5309cSSatish Balay PetscFunctionBegin; 2446f2a5309cSSatish Balay *array = a->a; 2447f2a5309cSSatish Balay PetscFunctionReturn(0); 2448f2a5309cSSatish Balay } 2449f2a5309cSSatish Balay 2450cda14afcSprj- static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2451f2a5309cSSatish Balay { 2452f2a5309cSSatish Balay PetscFunctionBegin; 2453cda14afcSprj- *array = NULL; 2454f2a5309cSSatish Balay PetscFunctionReturn(0); 2455f2a5309cSSatish Balay } 2456f2a5309cSSatish Balay 245752768537SHong Zhang PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 245852768537SHong Zhang { 2459b264fe52SHong Zhang PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 246052768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 246152768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2462b264fe52SHong Zhang PetscErrorCode ierr; 246352768537SHong Zhang 246452768537SHong Zhang PetscFunctionBegin; 246552768537SHong Zhang /* Set the number of nonzeros in the new matrix */ 2466b264fe52SHong Zhang ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 246752768537SHong Zhang PetscFunctionReturn(0); 246852768537SHong Zhang } 246952768537SHong Zhang 2470f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 247142ee4b1aSHong Zhang { 247242ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2473dfbe8321SBarry Smith PetscErrorCode ierr; 247431ce2d13SHong Zhang PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2475e838b9e7SJed Brown PetscBLASInt one=1; 247642ee4b1aSHong Zhang 247742ee4b1aSHong Zhang PetscFunctionBegin; 2478134adf20SPierre Jolivet if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2479134adf20SPierre Jolivet PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2480134adf20SPierre Jolivet if (e) { 2481134adf20SPierre Jolivet ierr = PetscArraycmp(x->i,y->i,x->mbs+1,&e);CHKERRQ(ierr); 2482134adf20SPierre Jolivet if (e) { 2483134adf20SPierre Jolivet ierr = PetscArraycmp(x->j,y->j,x->i[x->mbs],&e);CHKERRQ(ierr); 2484134adf20SPierre Jolivet if (e) str = SAME_NONZERO_PATTERN; 2485134adf20SPierre Jolivet } 2486134adf20SPierre Jolivet } 248754c59aa7SJacob Faibussowitsch if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN"); 2488134adf20SPierre Jolivet } 248942ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2490f4df32b1SMatthew Knepley PetscScalar alpha = a; 2491c5df96a5SBarry Smith PetscBLASInt bnz; 2492c5df96a5SBarry Smith ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 24938b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2494a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2495ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2496ab784542SHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 249742ee4b1aSHong Zhang } else { 249852768537SHong Zhang Mat B; 249952768537SHong Zhang PetscInt *nnz; 250054c59aa7SJacob Faibussowitsch PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 250152768537SHong Zhang ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 250252768537SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 250352768537SHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 250452768537SHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 250552768537SHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 250652768537SHong Zhang ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 250752768537SHong Zhang ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 250852768537SHong Zhang ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 250952768537SHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 251079c2fd05SStefano Zampini ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr); 251152768537SHong Zhang ierr = PetscFree(nnz);CHKERRQ(ierr); 251242ee4b1aSHong Zhang } 251342ee4b1aSHong Zhang PetscFunctionReturn(0); 251442ee4b1aSHong Zhang } 251542ee4b1aSHong Zhang 25162726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 25172726fb6dSPierre Jolivet { 25182726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX) 25192726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 25202726fb6dSPierre Jolivet PetscInt i,nz = a->bs2*a->i[a->mbs]; 25212726fb6dSPierre Jolivet MatScalar *aa = a->a; 25222726fb6dSPierre Jolivet 25232726fb6dSPierre Jolivet PetscFunctionBegin; 25242726fb6dSPierre Jolivet for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 25252726fb6dSPierre Jolivet #else 25262726fb6dSPierre Jolivet PetscFunctionBegin; 25272726fb6dSPierre Jolivet #endif 25282726fb6dSPierre Jolivet PetscFunctionReturn(0); 25292726fb6dSPierre Jolivet } 25302726fb6dSPierre Jolivet 253199cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 253299cafbc1SBarry Smith { 253399cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 253499cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2535dd6ea824SBarry Smith MatScalar *aa = a->a; 253699cafbc1SBarry Smith 253799cafbc1SBarry Smith PetscFunctionBegin; 253899cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 253999cafbc1SBarry Smith PetscFunctionReturn(0); 254099cafbc1SBarry Smith } 254199cafbc1SBarry Smith 254299cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 254399cafbc1SBarry Smith { 254499cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 254599cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2546dd6ea824SBarry Smith MatScalar *aa = a->a; 254799cafbc1SBarry Smith 254899cafbc1SBarry Smith PetscFunctionBegin; 254999cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 255099cafbc1SBarry Smith PetscFunctionReturn(0); 255199cafbc1SBarry Smith } 255299cafbc1SBarry Smith 25533acb8795SBarry Smith /* 25542479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 25553acb8795SBarry Smith */ 25561a83f524SJed Brown PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 25573acb8795SBarry Smith { 25583acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 25593acb8795SBarry Smith PetscErrorCode ierr; 25603acb8795SBarry Smith PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 25613acb8795SBarry Smith PetscInt nz = a->i[m],row,*jj,mr,col; 25623acb8795SBarry Smith 25633acb8795SBarry Smith PetscFunctionBegin; 25643acb8795SBarry Smith *nn = n; 25653acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 25662c71b3e2SJacob Faibussowitsch PetscCheckFalse(symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2567e7e72b3dSBarry Smith else { 2568b9e7e5c1SBarry Smith ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2569854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2570b9e7e5c1SBarry Smith ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 25713acb8795SBarry Smith jj = a->j; 25723acb8795SBarry Smith for (i=0; i<nz; i++) { 25733acb8795SBarry Smith collengths[jj[i]]++; 25743acb8795SBarry Smith } 25753acb8795SBarry Smith cia[0] = oshift; 25763acb8795SBarry Smith for (i=0; i<n; i++) { 25773acb8795SBarry Smith cia[i+1] = cia[i] + collengths[i]; 25783acb8795SBarry Smith } 2579580bdb30SBarry Smith ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 25803acb8795SBarry Smith jj = a->j; 25813acb8795SBarry Smith for (row=0; row<m; row++) { 25823acb8795SBarry Smith mr = a->i[row+1] - a->i[row]; 25833acb8795SBarry Smith for (i=0; i<mr; i++) { 25843acb8795SBarry Smith col = *jj++; 258526fbe8dcSKarl Rupp 25863acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 25873acb8795SBarry Smith } 25883acb8795SBarry Smith } 25893acb8795SBarry Smith ierr = PetscFree(collengths);CHKERRQ(ierr); 25903acb8795SBarry Smith *ia = cia; *ja = cja; 25913acb8795SBarry Smith } 25923acb8795SBarry Smith PetscFunctionReturn(0); 25933acb8795SBarry Smith } 25943acb8795SBarry Smith 25951a83f524SJed Brown PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 25963acb8795SBarry Smith { 25973acb8795SBarry Smith PetscErrorCode ierr; 25983acb8795SBarry Smith 25993acb8795SBarry Smith PetscFunctionBegin; 26003acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 26013acb8795SBarry Smith ierr = PetscFree(*ia);CHKERRQ(ierr); 26023acb8795SBarry Smith ierr = PetscFree(*ja);CHKERRQ(ierr); 26033acb8795SBarry Smith PetscFunctionReturn(0); 26043acb8795SBarry Smith } 26053acb8795SBarry Smith 2606525d23c0SHong Zhang /* 2607525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2608525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2609040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2610525d23c0SHong Zhang */ 2611525d23c0SHong Zhang PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2612f6d58c54SBarry Smith { 2613525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2614f6d58c54SBarry Smith PetscErrorCode ierr; 2615c0349474SHong Zhang PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2616525d23c0SHong Zhang PetscInt nz = a->i[m],row,*jj,mr,col; 2617525d23c0SHong Zhang PetscInt *cspidx; 2618f6d58c54SBarry Smith 2619f6d58c54SBarry Smith PetscFunctionBegin; 2620525d23c0SHong Zhang *nn = n; 2621525d23c0SHong Zhang if (!ia) PetscFunctionReturn(0); 2622f6d58c54SBarry Smith 2623b9e7e5c1SBarry Smith ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2624854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2625b9e7e5c1SBarry Smith ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2626b9e7e5c1SBarry Smith ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2627525d23c0SHong Zhang jj = a->j; 2628525d23c0SHong Zhang for (i=0; i<nz; i++) { 2629525d23c0SHong Zhang collengths[jj[i]]++; 2630f6d58c54SBarry Smith } 2631525d23c0SHong Zhang cia[0] = oshift; 2632525d23c0SHong Zhang for (i=0; i<n; i++) { 2633525d23c0SHong Zhang cia[i+1] = cia[i] + collengths[i]; 2634525d23c0SHong Zhang } 2635580bdb30SBarry Smith ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2636525d23c0SHong Zhang jj = a->j; 2637525d23c0SHong Zhang for (row=0; row<m; row++) { 2638525d23c0SHong Zhang mr = a->i[row+1] - a->i[row]; 2639525d23c0SHong Zhang for (i=0; i<mr; i++) { 2640525d23c0SHong Zhang col = *jj++; 2641525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2642525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2643525d23c0SHong Zhang } 2644525d23c0SHong Zhang } 2645525d23c0SHong Zhang ierr = PetscFree(collengths);CHKERRQ(ierr); 2646071fcb05SBarry Smith *ia = cia; 2647071fcb05SBarry Smith *ja = cja; 2648525d23c0SHong Zhang *spidx = cspidx; 2649525d23c0SHong Zhang PetscFunctionReturn(0); 2650f6d58c54SBarry Smith } 2651f6d58c54SBarry Smith 2652525d23c0SHong Zhang PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2653525d23c0SHong Zhang { 2654525d23c0SHong Zhang PetscErrorCode ierr; 2655f6d58c54SBarry Smith 2656525d23c0SHong Zhang PetscFunctionBegin; 2657525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2658525d23c0SHong Zhang ierr = PetscFree(*spidx);CHKERRQ(ierr); 2659f6d58c54SBarry Smith PetscFunctionReturn(0); 2660f6d58c54SBarry Smith } 266199cafbc1SBarry Smith 26627d68702bSBarry Smith PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 26637d68702bSBarry Smith { 26647d68702bSBarry Smith PetscErrorCode ierr; 26657d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 26667d68702bSBarry Smith 26677d68702bSBarry Smith PetscFunctionBegin; 26686f33a894SBarry Smith if (!Y->preallocated || !aij->nz) { 26697d68702bSBarry Smith ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 26707d68702bSBarry Smith } 26717d68702bSBarry Smith ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 26727d68702bSBarry Smith PetscFunctionReturn(0); 26737d68702bSBarry Smith } 26747d68702bSBarry Smith 26752593348eSBarry Smith /* -------------------------------------------------------------------*/ 26763964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2677cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2678cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2679cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 268097304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 26817c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 26827c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2683f4259b30SLisandro Dalcin NULL, 2684f4259b30SLisandro Dalcin NULL, 2685f4259b30SLisandro Dalcin NULL, 2686f4259b30SLisandro Dalcin /* 10*/ NULL, 2687cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2688f4259b30SLisandro Dalcin NULL, 2689f4259b30SLisandro Dalcin NULL, 2690f2501298SSatish Balay MatTranspose_SeqBAIJ, 269197304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 2692cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2693cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2694cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2695cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 2696f4259b30SLisandro Dalcin /* 20*/ NULL, 2697cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2698cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2699cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 2700d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 2701f4259b30SLisandro Dalcin NULL, 2702f4259b30SLisandro Dalcin NULL, 2703f4259b30SLisandro Dalcin NULL, 2704f4259b30SLisandro Dalcin NULL, 27054994cf47SJed Brown /* 29*/ MatSetUp_SeqBAIJ, 2706f4259b30SLisandro Dalcin NULL, 2707f4259b30SLisandro Dalcin NULL, 2708f4259b30SLisandro Dalcin NULL, 2709f4259b30SLisandro Dalcin NULL, 2710d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 2711f4259b30SLisandro Dalcin NULL, 2712f4259b30SLisandro Dalcin NULL, 2713cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2714f4259b30SLisandro Dalcin NULL, 2715d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 27167dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ, 2717cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2718cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 27193c896bc6SHong Zhang MatCopy_SeqBAIJ, 2720f4259b30SLisandro Dalcin /* 44*/ NULL, 2721cc2dc46cSBarry Smith MatScale_SeqBAIJ, 27227d68702bSBarry Smith MatShift_SeqBAIJ, 2723f4259b30SLisandro Dalcin NULL, 272497b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 2725f4259b30SLisandro Dalcin /* 49*/ NULL, 27263b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 272792c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 27283acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 27293acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 273093dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 2731f4259b30SLisandro Dalcin NULL, 2732f4259b30SLisandro Dalcin NULL, 2733090001bdSToby Isaac NULL, 2734d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 27357dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2736b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2737b9b97703SBarry Smith MatView_SeqBAIJ, 2738f4259b30SLisandro Dalcin NULL, 2739f4259b30SLisandro Dalcin NULL, 2740f4259b30SLisandro Dalcin /* 64*/ NULL, 2741f4259b30SLisandro Dalcin NULL, 2742f4259b30SLisandro Dalcin NULL, 2743f4259b30SLisandro Dalcin NULL, 2744f4259b30SLisandro Dalcin NULL, 2745d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2746f4259b30SLisandro Dalcin NULL, 2747c87e5d42SMatthew Knepley MatConvert_Basic, 2748f4259b30SLisandro Dalcin NULL, 2749f4259b30SLisandro Dalcin NULL, 2750f4259b30SLisandro Dalcin /* 74*/ NULL, 2751f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 2752f4259b30SLisandro Dalcin NULL, 2753f4259b30SLisandro Dalcin NULL, 2754f4259b30SLisandro Dalcin NULL, 2755f4259b30SLisandro Dalcin /* 79*/ NULL, 2756f4259b30SLisandro Dalcin NULL, 2757f4259b30SLisandro Dalcin NULL, 2758f4259b30SLisandro Dalcin NULL, 27595bba2384SShri Abhyankar MatLoad_SeqBAIJ, 2760f4259b30SLisandro Dalcin /* 84*/ NULL, 2761f4259b30SLisandro Dalcin NULL, 2762f4259b30SLisandro Dalcin NULL, 2763f4259b30SLisandro Dalcin NULL, 2764f4259b30SLisandro Dalcin NULL, 2765f4259b30SLisandro Dalcin /* 89*/ NULL, 2766f4259b30SLisandro Dalcin NULL, 2767f4259b30SLisandro Dalcin NULL, 2768f4259b30SLisandro Dalcin NULL, 2769f4259b30SLisandro Dalcin NULL, 2770f4259b30SLisandro Dalcin /* 94*/ NULL, 2771f4259b30SLisandro Dalcin NULL, 2772f4259b30SLisandro Dalcin NULL, 2773f4259b30SLisandro Dalcin NULL, 2774f4259b30SLisandro Dalcin NULL, 2775f4259b30SLisandro Dalcin /* 99*/ NULL, 2776f4259b30SLisandro Dalcin NULL, 2777f4259b30SLisandro Dalcin NULL, 27782726fb6dSPierre Jolivet MatConjugate_SeqBAIJ, 2779f4259b30SLisandro Dalcin NULL, 2780f4259b30SLisandro Dalcin /*104*/ NULL, 278199cafbc1SBarry Smith MatRealPart_SeqBAIJ, 27822af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 2783f4259b30SLisandro Dalcin NULL, 2784f4259b30SLisandro Dalcin NULL, 2785f4259b30SLisandro Dalcin /*109*/ NULL, 2786f4259b30SLisandro Dalcin NULL, 2787f4259b30SLisandro Dalcin NULL, 2788f4259b30SLisandro Dalcin NULL, 2789547795f9SHong Zhang MatMissingDiagonal_SeqBAIJ, 2790f4259b30SLisandro Dalcin /*114*/ NULL, 2791f4259b30SLisandro Dalcin NULL, 2792f4259b30SLisandro Dalcin NULL, 2793f4259b30SLisandro Dalcin NULL, 2794f4259b30SLisandro Dalcin NULL, 2795f4259b30SLisandro Dalcin /*119*/ NULL, 2796f4259b30SLisandro Dalcin NULL, 2797547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 2798d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 2799f4259b30SLisandro Dalcin NULL, 2800f4259b30SLisandro Dalcin /*124*/ NULL, 2801857cbf51SRichard Tran Mills MatGetColumnReductions_SeqBAIJ, 28023964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 2803f4259b30SLisandro Dalcin NULL, 2804f4259b30SLisandro Dalcin NULL, 2805f4259b30SLisandro Dalcin /*129*/ NULL, 2806f4259b30SLisandro Dalcin NULL, 2807f4259b30SLisandro Dalcin NULL, 2808f4259b30SLisandro Dalcin NULL, 2809f4259b30SLisandro Dalcin NULL, 2810f4259b30SLisandro Dalcin /*134*/ NULL, 2811f4259b30SLisandro Dalcin NULL, 2812f4259b30SLisandro Dalcin NULL, 2813f4259b30SLisandro Dalcin NULL, 2814f4259b30SLisandro Dalcin NULL, 281546533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 2816f4259b30SLisandro Dalcin NULL, 2817f4259b30SLisandro Dalcin NULL, 2818bdf6f3fcSHong Zhang MatFDColoringSetUp_SeqXAIJ, 2819f4259b30SLisandro Dalcin NULL, 282086e85357SHong Zhang /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2821*d70f29a3SPierre Jolivet MatDestroySubMatrices_SeqBAIJ, 2822*d70f29a3SPierre Jolivet NULL, 2823*d70f29a3SPierre Jolivet NULL 282499cafbc1SBarry Smith }; 28252593348eSBarry Smith 28267087cfbeSBarry Smith PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 28273e90b805SBarry Smith { 28283e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 28298ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2830dfbe8321SBarry Smith PetscErrorCode ierr; 28313e90b805SBarry Smith 28323e90b805SBarry Smith PetscFunctionBegin; 28332c71b3e2SJacob Faibussowitsch PetscCheckFalse(aij->nonew != 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 28343e90b805SBarry Smith 28353e90b805SBarry Smith /* allocate space for values if not already there */ 28363e90b805SBarry Smith if (!aij->saved_values) { 2837854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 28383bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 28393e90b805SBarry Smith } 28403e90b805SBarry Smith 28413e90b805SBarry Smith /* copy values over */ 2842580bdb30SBarry Smith ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 28433e90b805SBarry Smith PetscFunctionReturn(0); 28443e90b805SBarry Smith } 28453e90b805SBarry Smith 28467087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 28473e90b805SBarry Smith { 28483e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 28496849ba73SBarry Smith PetscErrorCode ierr; 28508ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs]*aij->bs2; 28513e90b805SBarry Smith 28523e90b805SBarry Smith PetscFunctionBegin; 28532c71b3e2SJacob Faibussowitsch PetscCheckFalse(aij->nonew != 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 28542c71b3e2SJacob Faibussowitsch PetscCheckFalse(!aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 28553e90b805SBarry Smith 28563e90b805SBarry Smith /* copy values over */ 2857580bdb30SBarry Smith ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 28583e90b805SBarry Smith PetscFunctionReturn(0); 28593e90b805SBarry Smith } 28603e90b805SBarry Smith 2861cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2862cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2863273d9f13SBarry Smith 2864b5b72c8aSIrina Sokolova PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2865a23d5eceSKris Buschelman { 2866a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 28676849ba73SBarry Smith PetscErrorCode ierr; 2868535b19f3SBarry Smith PetscInt i,mbs,nbs,bs2; 28698afaa268SBarry Smith PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2870a23d5eceSKris Buschelman 2871a23d5eceSKris Buschelman PetscFunctionBegin; 28722576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2873ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 2874ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 2875ab93d7beSBarry Smith nz = 0; 2876ab93d7beSBarry Smith } 28778c07d4e3SBarry Smith 287833d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 287926283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 288026283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2881e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2882899cda47SBarry Smith 2883899cda47SBarry Smith B->preallocated = PETSC_TRUE; 2884899cda47SBarry Smith 2885d0f46423SBarry Smith mbs = B->rmap->n/bs; 2886d0f46423SBarry Smith nbs = B->cmap->n/bs; 2887a23d5eceSKris Buschelman bs2 = bs*bs; 2888a23d5eceSKris Buschelman 28892c71b3e2SJacob Faibussowitsch PetscCheckFalse(mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs); 2890a23d5eceSKris Buschelman 2891a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 28922c71b3e2SJacob Faibussowitsch PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz); 2893a23d5eceSKris Buschelman if (nnz) { 2894a23d5eceSKris Buschelman for (i=0; i<mbs; i++) { 28952c71b3e2SJacob Faibussowitsch PetscCheckFalse(nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]); 28962c71b3e2SJacob Faibussowitsch PetscCheckFalse(nnz[i] > nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs); 2897a23d5eceSKris Buschelman } 2898a23d5eceSKris Buschelman } 2899a23d5eceSKris Buschelman 2900a23d5eceSKris Buschelman b = (Mat_SeqBAIJ*)B->data; 2901ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 29028afaa268SBarry Smith ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 29038c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 29048c07d4e3SBarry Smith 2905a23d5eceSKris Buschelman if (!flg) { 2906a23d5eceSKris Buschelman switch (bs) { 2907a23d5eceSKris Buschelman case 1: 2908a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 2909a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2910a23d5eceSKris Buschelman break; 2911a23d5eceSKris Buschelman case 2: 2912a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 2913a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2914a23d5eceSKris Buschelman break; 2915a23d5eceSKris Buschelman case 3: 2916a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 2917a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2918a23d5eceSKris Buschelman break; 2919a23d5eceSKris Buschelman case 4: 2920a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 2921a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2922a23d5eceSKris Buschelman break; 2923a23d5eceSKris Buschelman case 5: 2924a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 2925a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2926a23d5eceSKris Buschelman break; 2927a23d5eceSKris Buschelman case 6: 2928a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 2929a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2930a23d5eceSKris Buschelman break; 2931a23d5eceSKris Buschelman case 7: 2932a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 2933a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2934a23d5eceSKris Buschelman break; 293596e086a2SDaniel Kokron case 9: 29366679dcc1SBarry Smith { 29376679dcc1SBarry Smith PetscInt version = 1; 29386679dcc1SBarry Smith ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 29396679dcc1SBarry Smith switch (version) { 29405f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 29416679dcc1SBarry Smith case 1: 294296e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 294396e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 29447d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 29456679dcc1SBarry Smith break; 29466679dcc1SBarry Smith #endif 29476679dcc1SBarry Smith default: 294896e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N; 294996e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N; 29507d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 295196e086a2SDaniel Kokron break; 29526679dcc1SBarry Smith } 29536679dcc1SBarry Smith break; 29546679dcc1SBarry Smith } 2955ebada01fSBarry Smith case 11: 2956ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11; 2957ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2958ebada01fSBarry Smith break; 29596679dcc1SBarry Smith case 12: 29606679dcc1SBarry Smith { 29616679dcc1SBarry Smith PetscInt version = 1; 29626679dcc1SBarry Smith ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 29636679dcc1SBarry Smith switch (version) { 29646679dcc1SBarry Smith case 1: 29656679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1; 29666679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 29677d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr); 29688ab949d8SShri Abhyankar break; 29696679dcc1SBarry Smith case 2: 29706679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2; 29716679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 29727d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr); 29736679dcc1SBarry Smith break; 29746679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 29756679dcc1SBarry Smith case 3: 29766679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 29776679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 29787d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 29796679dcc1SBarry Smith break; 29806679dcc1SBarry Smith #endif 2981a23d5eceSKris Buschelman default: 2982a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 2983a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 29847d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 29856679dcc1SBarry Smith break; 29866679dcc1SBarry Smith } 29876679dcc1SBarry Smith break; 29886679dcc1SBarry Smith } 29896679dcc1SBarry Smith case 15: 29906679dcc1SBarry Smith { 29916679dcc1SBarry Smith PetscInt version = 1; 29926679dcc1SBarry Smith ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 29936679dcc1SBarry Smith switch (version) { 29946679dcc1SBarry Smith case 1: 29956679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1; 29967d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr); 29976679dcc1SBarry Smith break; 29986679dcc1SBarry Smith case 2: 29996679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2; 30007d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr); 30016679dcc1SBarry Smith break; 30026679dcc1SBarry Smith case 3: 30036679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3; 30047d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr); 30056679dcc1SBarry Smith break; 30066679dcc1SBarry Smith case 4: 30076679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4; 30087d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr); 30096679dcc1SBarry Smith break; 30106679dcc1SBarry Smith default: 30116679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 30127d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 30136679dcc1SBarry Smith break; 30146679dcc1SBarry Smith } 30156679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 30166679dcc1SBarry Smith break; 30176679dcc1SBarry Smith } 30186679dcc1SBarry Smith default: 30196679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 30206679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 30217d3de750SJacob Faibussowitsch ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 3022a23d5eceSKris Buschelman break; 3023a23d5eceSKris Buschelman } 3024a23d5eceSKris Buschelman } 3025e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 3026a23d5eceSKris Buschelman b->mbs = mbs; 3027a23d5eceSKris Buschelman b->nbs = nbs; 3028ab93d7beSBarry Smith if (!skipallocation) { 30292ee49352SLisandro Dalcin if (!b->imax) { 3030dcca6d9dSJed Brown ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 30313bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 303226fbe8dcSKarl Rupp 30334fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 30342ee49352SLisandro Dalcin } 3035ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 303626fbe8dcSKarl Rupp for (i=0; i<mbs; i++) b->ilen[i] = 0; 3037a23d5eceSKris Buschelman if (!nnz) { 3038a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3039c62bd62aSJed Brown else if (nz < 0) nz = 1; 30405d2a9ed1SStefano Zampini nz = PetscMin(nz,nbs); 3041a23d5eceSKris Buschelman for (i=0; i<mbs; i++) b->imax[i] = nz; 304261778c46SBarry Smith ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 3043a23d5eceSKris Buschelman } else { 3044c73702f5SBarry Smith PetscInt64 nz64 = 0; 3045c73702f5SBarry Smith for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 3046c73702f5SBarry Smith ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 3047a23d5eceSKris Buschelman } 3048a23d5eceSKris Buschelman 3049a23d5eceSKris Buschelman /* allocate the matrix space */ 30502ee49352SLisandro Dalcin ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3051672ba085SHong Zhang if (B->structure_only) { 3052672ba085SHong Zhang ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 3053672ba085SHong Zhang ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 3054672ba085SHong Zhang ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 3055672ba085SHong Zhang } else { 30566679dcc1SBarry Smith PetscInt nzbs2 = 0; 30576679dcc1SBarry Smith ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr); 30586679dcc1SBarry Smith ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 30593bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3060580bdb30SBarry Smith ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 3061672ba085SHong Zhang } 3062580bdb30SBarry Smith ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 306326fbe8dcSKarl Rupp 3064672ba085SHong Zhang if (B->structure_only) { 3065672ba085SHong Zhang b->singlemalloc = PETSC_FALSE; 3066672ba085SHong Zhang b->free_a = PETSC_FALSE; 3067672ba085SHong Zhang } else { 3068a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 3069672ba085SHong Zhang b->free_a = PETSC_TRUE; 3070672ba085SHong Zhang } 3071672ba085SHong Zhang b->free_ij = PETSC_TRUE; 3072672ba085SHong Zhang 3073a23d5eceSKris Buschelman b->i[0] = 0; 3074a23d5eceSKris Buschelman for (i=1; i<mbs+1; i++) { 3075a23d5eceSKris Buschelman b->i[i] = b->i[i-1] + b->imax[i-1]; 3076a23d5eceSKris Buschelman } 3077672ba085SHong Zhang 3078e811da20SHong Zhang } else { 3079e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3080e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3081ab93d7beSBarry Smith } 3082a23d5eceSKris Buschelman 3083a23d5eceSKris Buschelman b->bs2 = bs2; 3084a23d5eceSKris Buschelman b->mbs = mbs; 3085a23d5eceSKris Buschelman b->nz = 0; 3086b32cb4a7SJed Brown b->maxnz = nz; 3087b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3088cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 3089cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 30902576faa2SJed Brown if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3091a23d5eceSKris Buschelman PetscFunctionReturn(0); 3092a23d5eceSKris Buschelman } 3093a23d5eceSKris Buschelman 3094cf12db73SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3095725b52f3SLisandro Dalcin { 3096725b52f3SLisandro Dalcin PetscInt i,m,nz,nz_max=0,*nnz; 3097f4259b30SLisandro Dalcin PetscScalar *values=NULL; 3098d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3099725b52f3SLisandro Dalcin PetscErrorCode ierr; 3100725b52f3SLisandro Dalcin 3101725b52f3SLisandro Dalcin PetscFunctionBegin; 31022c71b3e2SJacob Faibussowitsch PetscCheckFalse(bs < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 310326283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 310426283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 310526283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 310626283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3107e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3108d0f46423SBarry Smith m = B->rmap->n/bs; 3109725b52f3SLisandro Dalcin 31102c71b3e2SJacob Faibussowitsch PetscCheckFalse(ii[0] != 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 3111854ce69bSBarry Smith ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3112725b52f3SLisandro Dalcin for (i=0; i<m; i++) { 3113cf12db73SBarry Smith nz = ii[i+1]- ii[i]; 31142c71b3e2SJacob Faibussowitsch PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 3115725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3116725b52f3SLisandro Dalcin nnz[i] = nz; 3117725b52f3SLisandro Dalcin } 3118725b52f3SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3119725b52f3SLisandro Dalcin ierr = PetscFree(nnz);CHKERRQ(ierr); 3120725b52f3SLisandro Dalcin 3121725b52f3SLisandro Dalcin values = (PetscScalar*)V; 3122725b52f3SLisandro Dalcin if (!values) { 31231795a4d1SJed Brown ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3124725b52f3SLisandro Dalcin } 3125725b52f3SLisandro Dalcin for (i=0; i<m; i++) { 3126cf12db73SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 3127cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3128bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { 3129cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3130725b52f3SLisandro Dalcin ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 31313adadaf3SJed Brown } else { 31323adadaf3SJed Brown PetscInt j; 31333adadaf3SJed Brown for (j=0; j<ncols; j++) { 31343adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3135d47bf9aaSJed Brown ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 31363adadaf3SJed Brown } 31373adadaf3SJed Brown } 3138725b52f3SLisandro Dalcin } 3139725b52f3SLisandro Dalcin if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3140725b52f3SLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3141725b52f3SLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31427827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3143725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3144725b52f3SLisandro Dalcin } 3145725b52f3SLisandro Dalcin 3146cda14afcSprj- /*@C 3147cda14afcSprj- MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3148cda14afcSprj- 3149cda14afcSprj- Not Collective 3150cda14afcSprj- 3151cda14afcSprj- Input Parameter: 3152cda14afcSprj- . mat - a MATSEQBAIJ matrix 3153cda14afcSprj- 3154cda14afcSprj- Output Parameter: 3155cda14afcSprj- . array - pointer to the data 3156cda14afcSprj- 3157cda14afcSprj- Level: intermediate 3158cda14afcSprj- 3159cda14afcSprj- .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3160cda14afcSprj- @*/ 3161cda14afcSprj- PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3162cda14afcSprj- { 3163cda14afcSprj- PetscErrorCode ierr; 3164cda14afcSprj- 3165cda14afcSprj- PetscFunctionBegin; 3166cda14afcSprj- ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3167cda14afcSprj- PetscFunctionReturn(0); 3168cda14afcSprj- } 3169cda14afcSprj- 3170cda14afcSprj- /*@C 3171cda14afcSprj- MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3172cda14afcSprj- 3173cda14afcSprj- Not Collective 3174cda14afcSprj- 3175cda14afcSprj- Input Parameters: 3176cda14afcSprj- + mat - a MATSEQBAIJ matrix 3177cda14afcSprj- - array - pointer to the data 3178cda14afcSprj- 3179cda14afcSprj- Level: intermediate 3180cda14afcSprj- 3181cda14afcSprj- .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3182cda14afcSprj- @*/ 3183cda14afcSprj- PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3184cda14afcSprj- { 3185cda14afcSprj- PetscErrorCode ierr; 3186cda14afcSprj- 3187cda14afcSprj- PetscFunctionBegin; 3188cda14afcSprj- ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3189cda14afcSprj- PetscFunctionReturn(0); 3190cda14afcSprj- } 3191cda14afcSprj- 31920bad9183SKris Buschelman /*MC 3193fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 31940bad9183SKris Buschelman block sparse compressed row format. 31950bad9183SKris Buschelman 31960bad9183SKris Buschelman Options Database Keys: 31976679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 31986679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 31990bad9183SKris Buschelman 32000bad9183SKris Buschelman Level: beginner 32010cd7f59aSBarry Smith 32020cd7f59aSBarry Smith Notes: 32030cd7f59aSBarry Smith MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 32040cd7f59aSBarry Smith space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 32050bad9183SKris Buschelman 32066679dcc1SBarry Smith Run with -info to see what version of the matrix-vector product is being used 32076679dcc1SBarry Smith 3208f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ() 32090bad9183SKris Buschelman M*/ 32100bad9183SKris Buschelman 3211cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3212b24902e0SBarry Smith 32138cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 32142593348eSBarry Smith { 3215dfbe8321SBarry Smith PetscErrorCode ierr; 3216c1ac3661SBarry Smith PetscMPIInt size; 3217b6490206SBarry Smith Mat_SeqBAIJ *b; 32183b2fbd54SBarry Smith 32193a40ed3dSBarry Smith PetscFunctionBegin; 3220ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 32212c71b3e2SJacob Faibussowitsch PetscCheckFalse(size > 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3222b6490206SBarry Smith 3223b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3224b0a32e0cSBarry Smith B->data = (void*)b; 3225549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 322626fbe8dcSKarl Rupp 3227f4259b30SLisandro Dalcin b->row = NULL; 3228f4259b30SLisandro Dalcin b->col = NULL; 3229f4259b30SLisandro Dalcin b->icol = NULL; 32302593348eSBarry Smith b->reallocs = 0; 3231f4259b30SLisandro Dalcin b->saved_values = NULL; 32322593348eSBarry Smith 3233c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 32342593348eSBarry Smith b->nonew = 0; 3235f4259b30SLisandro Dalcin b->diag = NULL; 3236f4259b30SLisandro Dalcin B->spptr = NULL; 3237b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3238a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 32394e220ebcSLois Curfman McInnes 3240cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3241cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3242bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3243bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3244bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3245bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3246bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3247bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3248bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3249bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 32507ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 3251c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 32527ea3e4caSstefano_zampini #endif 3253c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 325417667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 32553a40ed3dSBarry Smith PetscFunctionReturn(0); 32562593348eSBarry Smith } 32572593348eSBarry Smith 3258ace3abfcSBarry Smith PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 32592593348eSBarry Smith { 3260b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 32616849ba73SBarry Smith PetscErrorCode ierr; 3262a96a251dSBarry Smith PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3263de6a44a3SBarry Smith 32643a40ed3dSBarry Smith PetscFunctionBegin; 32652c71b3e2SJacob Faibussowitsch PetscCheckFalse(a->i[mbs] != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 32662593348eSBarry Smith 32674fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 32684fd072dbSBarry Smith c->imax = a->imax; 32694fd072dbSBarry Smith c->ilen = a->ilen; 32704fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 32714fd072dbSBarry Smith } else { 3272dcca6d9dSJed Brown ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 32733bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3274b6490206SBarry Smith for (i=0; i<mbs; i++) { 32752593348eSBarry Smith c->imax[i] = a->imax[i]; 32762593348eSBarry Smith c->ilen[i] = a->ilen[i]; 32772593348eSBarry Smith } 32784fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 32794fd072dbSBarry Smith } 32802593348eSBarry Smith 32812593348eSBarry Smith /* allocate the matrix space */ 328216a2bf60SHong Zhang if (mallocmatspace) { 32834fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 32841795a4d1SJed Brown ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 32853bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 328626fbe8dcSKarl Rupp 32874fd072dbSBarry Smith c->i = a->i; 32884fd072dbSBarry Smith c->j = a->j; 3289379be0ddSLisandro Dalcin c->singlemalloc = PETSC_FALSE; 3290379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 3291379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 32924fd072dbSBarry Smith c->parent = A; 32931e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 32941e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 329526fbe8dcSKarl Rupp 32964fd072dbSBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 32974fd072dbSBarry Smith ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 32984fd072dbSBarry Smith ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 32994fd072dbSBarry Smith } else { 3300dcca6d9dSJed Brown ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 33013bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 330226fbe8dcSKarl Rupp 3303c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 3304379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 33054fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 330626fbe8dcSKarl Rupp 3307580bdb30SBarry Smith ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3308b6490206SBarry Smith if (mbs > 0) { 3309580bdb30SBarry Smith ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 33102e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 3311580bdb30SBarry Smith ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 33122e8a6d31SBarry Smith } else { 3313580bdb30SBarry Smith ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 33142593348eSBarry Smith } 33152593348eSBarry Smith } 33161e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 33171e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 331816a2bf60SHong Zhang } 33194fd072dbSBarry Smith } 332016a2bf60SHong Zhang 33212593348eSBarry Smith c->roworiented = a->roworiented; 33222593348eSBarry Smith c->nonew = a->nonew; 332326fbe8dcSKarl Rupp 33241e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 33251e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 332626fbe8dcSKarl Rupp 33275c9eb25fSBarry Smith c->bs2 = a->bs2; 33285c9eb25fSBarry Smith c->mbs = a->mbs; 33295c9eb25fSBarry Smith c->nbs = a->nbs; 33302593348eSBarry Smith 33312593348eSBarry Smith if (a->diag) { 33324fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 33334fd072dbSBarry Smith c->diag = a->diag; 33344fd072dbSBarry Smith c->free_diag = PETSC_FALSE; 33354fd072dbSBarry Smith } else { 3336854ce69bSBarry Smith ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 33373bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 333826fbe8dcSKarl Rupp for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 33394fd072dbSBarry Smith c->free_diag = PETSC_TRUE; 33404fd072dbSBarry Smith } 3341f4259b30SLisandro Dalcin } else c->diag = NULL; 334226fbe8dcSKarl Rupp 33432593348eSBarry Smith c->nz = a->nz; 3344f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3345f361c04dSBarry Smith c->solve_work = NULL; 3346f361c04dSBarry Smith c->mult_work = NULL; 3347f361c04dSBarry Smith c->sor_workt = NULL; 3348f361c04dSBarry Smith c->sor_work = NULL; 334988e51ccdSHong Zhang 335088e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 335188e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3352cd6b891eSBarry Smith if (a->compressedrow.use) { 335388e51ccdSHong Zhang i = a->compressedrow.nrows; 3354dcca6d9dSJed Brown ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 33553bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3356580bdb30SBarry Smith ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3357580bdb30SBarry Smith ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 335888e51ccdSHong Zhang } else { 335988e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 33600298fd71SBarry Smith c->compressedrow.i = NULL; 33610298fd71SBarry Smith c->compressedrow.rindex = NULL; 336288e51ccdSHong Zhang } 3363e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate; 336426fbe8dcSKarl Rupp 3365140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 33663a40ed3dSBarry Smith PetscFunctionReturn(0); 33672593348eSBarry Smith } 33682593348eSBarry Smith 3369b24902e0SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3370b24902e0SBarry Smith { 3371b24902e0SBarry Smith PetscErrorCode ierr; 3372b24902e0SBarry Smith 3373b24902e0SBarry Smith PetscFunctionBegin; 3374ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3375d0f46423SBarry Smith ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 33765c9eb25fSBarry Smith ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 337798ad0f72SJed Brown ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3378b24902e0SBarry Smith PetscFunctionReturn(0); 3379b24902e0SBarry Smith } 3380b24902e0SBarry Smith 3381618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3382b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3383f501eaabSShri Abhyankar { 3384b51a4376SLisandro Dalcin PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3385b51a4376SLisandro Dalcin PetscInt *rowidxs,*colidxs; 3386b51a4376SLisandro Dalcin PetscScalar *matvals; 3387f501eaabSShri Abhyankar PetscErrorCode ierr; 3388b51a4376SLisandro Dalcin 3389b51a4376SLisandro Dalcin PetscFunctionBegin; 3390b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3391b51a4376SLisandro Dalcin 3392b51a4376SLisandro Dalcin /* read matrix header */ 3393b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 33942c71b3e2SJacob Faibussowitsch PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3395b51a4376SLisandro Dalcin M = header[1]; N = header[2]; nz = header[3]; 33962c71b3e2SJacob Faibussowitsch PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 33972c71b3e2SJacob Faibussowitsch PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 33982c71b3e2SJacob Faibussowitsch PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3399b51a4376SLisandro Dalcin 3400b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 3401b51a4376SLisandro Dalcin ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3402b51a4376SLisandro Dalcin /* set local and global sizes if not set already */ 3403b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M; 3404b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N; 3405b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3406b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 3407b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3408b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3409b51a4376SLisandro Dalcin 3410b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 3411b51a4376SLisandro Dalcin ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 34122c71b3e2SJacob Faibussowitsch PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3413b51a4376SLisandro Dalcin ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3414b51a4376SLisandro Dalcin ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3415b51a4376SLisandro Dalcin mbs = m/bs; nbs = n/bs; 3416b51a4376SLisandro Dalcin 3417b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */ 3418b51a4376SLisandro Dalcin ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3419b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3420b51a4376SLisandro Dalcin rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3421b51a4376SLisandro Dalcin sum = rowidxs[m]; 34222c71b3e2SJacob Faibussowitsch PetscCheckFalse(sum != nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3423b51a4376SLisandro Dalcin 3424b51a4376SLisandro Dalcin /* read in column indices and nonzero values */ 3425b51a4376SLisandro Dalcin ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3426b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3427b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3428b51a4376SLisandro Dalcin 3429b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3430b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */ 3431b51a4376SLisandro Dalcin PetscInt *nnz; 3432618cc2edSLisandro Dalcin PetscBool sbaij; 3433b51a4376SLisandro Dalcin 3434b51a4376SLisandro Dalcin ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3435b51a4376SLisandro Dalcin ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3436618cc2edSLisandro Dalcin ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3437b51a4376SLisandro Dalcin for (i=0; i<mbs; i++) { 3438b51a4376SLisandro Dalcin ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3439618cc2edSLisandro Dalcin for (k=0; k<bs; k++) { 3440618cc2edSLisandro Dalcin PetscInt row = bs*i + k; 3441618cc2edSLisandro Dalcin for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3442618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3443618cc2edSLisandro Dalcin if (!sbaij || col >= row) 3444618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3445618cc2edSLisandro Dalcin } 3446618cc2edSLisandro Dalcin } 3447b51a4376SLisandro Dalcin } 3448b51a4376SLisandro Dalcin ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3449b51a4376SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3450618cc2edSLisandro Dalcin ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3451b51a4376SLisandro Dalcin ierr = PetscFree(nnz);CHKERRQ(ierr); 3452b51a4376SLisandro Dalcin } 3453b51a4376SLisandro Dalcin 3454b51a4376SLisandro Dalcin /* store matrix values */ 3455b51a4376SLisandro Dalcin for (i=0; i<m; i++) { 3456b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3457618cc2edSLisandro Dalcin ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3458b51a4376SLisandro Dalcin } 3459b51a4376SLisandro Dalcin 3460b51a4376SLisandro Dalcin ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3461b51a4376SLisandro Dalcin ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3462b51a4376SLisandro Dalcin ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3463b51a4376SLisandro Dalcin ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3464b51a4376SLisandro Dalcin PetscFunctionReturn(0); 3465b51a4376SLisandro Dalcin } 3466b51a4376SLisandro Dalcin 3467b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3468b51a4376SLisandro Dalcin { 3469b51a4376SLisandro Dalcin PetscErrorCode ierr; 34707f489da9SVaclav Hapla PetscBool isbinary; 3471f501eaabSShri Abhyankar 3472f501eaabSShri Abhyankar PetscFunctionBegin; 34737f489da9SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 34742c71b3e2SJacob Faibussowitsch PetscCheckFalse(!isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3475b51a4376SLisandro Dalcin ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3476f501eaabSShri Abhyankar PetscFunctionReturn(0); 3477f501eaabSShri Abhyankar } 3478f501eaabSShri Abhyankar 3479273d9f13SBarry Smith /*@C 3480273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3481273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 3482273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3483273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3484273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 34852593348eSBarry Smith 3486d083f849SBarry Smith Collective 3487273d9f13SBarry Smith 3488273d9f13SBarry Smith Input Parameters: 3489273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 3490bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3491bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3492273d9f13SBarry Smith . m - number of rows 3493273d9f13SBarry Smith . n - number of columns 349435d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 349535d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 34960298fd71SBarry Smith (possibly different for each block row) or NULL 3497273d9f13SBarry Smith 3498273d9f13SBarry Smith Output Parameter: 3499273d9f13SBarry Smith . A - the matrix 3500273d9f13SBarry Smith 3501175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3502f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 3503175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3504175b88e8SBarry Smith 3505273d9f13SBarry Smith Options Database Keys: 3506a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 3507273d9f13SBarry Smith block calculations (much slower) 3508a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3509273d9f13SBarry Smith 3510273d9f13SBarry Smith Level: intermediate 3511273d9f13SBarry Smith 3512273d9f13SBarry Smith Notes: 3513d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3514d1be2dadSMatthew Knepley 351549a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 351649a6f317SBarry Smith 351735d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 351835d8aa7fSBarry Smith 3519273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3520273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3521273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3522273d9f13SBarry Smith 3523273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 35240298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3525a7f22e61SSatish Balay allocation. See Users-Manual: ch_mat for details. 3526273d9f13SBarry Smith matrices. 3527273d9f13SBarry Smith 352869b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3529273d9f13SBarry Smith @*/ 35307087cfbeSBarry Smith PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3531273d9f13SBarry Smith { 3532dfbe8321SBarry Smith PetscErrorCode ierr; 3533273d9f13SBarry Smith 3534273d9f13SBarry Smith PetscFunctionBegin; 3535f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3536f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3537273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3538367daffbSBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3539273d9f13SBarry Smith PetscFunctionReturn(0); 3540273d9f13SBarry Smith } 3541273d9f13SBarry Smith 3542273d9f13SBarry Smith /*@C 3543273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3544273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 3545273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3546273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3547273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 3548273d9f13SBarry Smith 3549d083f849SBarry Smith Collective 3550273d9f13SBarry Smith 3551273d9f13SBarry Smith Input Parameters: 35521c4f3114SJed Brown + B - the matrix 3553bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3554bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3555273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3556273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 35570298fd71SBarry Smith (possibly different for each block row) or NULL 3558273d9f13SBarry Smith 3559273d9f13SBarry Smith Options Database Keys: 3560a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 3561273d9f13SBarry Smith block calculations (much slower) 3562a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3563273d9f13SBarry Smith 3564273d9f13SBarry Smith Level: intermediate 3565273d9f13SBarry Smith 3566273d9f13SBarry Smith Notes: 356749a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 356849a6f317SBarry Smith 3569aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3570aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3571aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3572aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3573aa95bbe8SBarry Smith 3574273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3575273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3576273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3577273d9f13SBarry Smith 3578273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 35790298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3580a7f22e61SSatish Balay allocation. See Users-Manual: ch_mat for details. 3581273d9f13SBarry Smith 358269b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3583273d9f13SBarry Smith @*/ 35847087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3585273d9f13SBarry Smith { 35864ac538c5SBarry Smith PetscErrorCode ierr; 3587273d9f13SBarry Smith 3588273d9f13SBarry Smith PetscFunctionBegin; 35896ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 35906ba663aaSJed Brown PetscValidType(B,1); 35916ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 35924ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3593273d9f13SBarry Smith PetscFunctionReturn(0); 3594273d9f13SBarry Smith } 3595a1d92eedSBarry Smith 3596725b52f3SLisandro Dalcin /*@C 3597664954b6SBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3598725b52f3SLisandro Dalcin 3599d083f849SBarry Smith Collective 3600725b52f3SLisandro Dalcin 3601725b52f3SLisandro Dalcin Input Parameters: 36021c4f3114SJed Brown + B - the matrix 3603725b52f3SLisandro Dalcin . i - the indices into j for the start of each local row (starts with zero) 3604725b52f3SLisandro Dalcin . j - the column indices for each local row (starts with zero) these must be sorted for each row 3605725b52f3SLisandro Dalcin - v - optional values in the matrix 3606725b52f3SLisandro Dalcin 3607664954b6SBarry Smith Level: advanced 3608725b52f3SLisandro Dalcin 36093adadaf3SJed Brown Notes: 36103adadaf3SJed Brown The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 36113adadaf3SJed Brown may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 36123adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 36133adadaf3SJed Brown MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 36143adadaf3SJed Brown block column and the second index is over columns within a block. 36153adadaf3SJed Brown 3616664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3617664954b6SBarry Smith 3618725b52f3SLisandro Dalcin .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3619725b52f3SLisandro Dalcin @*/ 36207087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3621725b52f3SLisandro Dalcin { 36224ac538c5SBarry Smith PetscErrorCode ierr; 3623725b52f3SLisandro Dalcin 3624725b52f3SLisandro Dalcin PetscFunctionBegin; 36256ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 36266ba663aaSJed Brown PetscValidType(B,1); 36276ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 36284ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3629725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3630725b52f3SLisandro Dalcin } 3631725b52f3SLisandro Dalcin 3632c75a6043SHong Zhang /*@ 3633dfb205c3SBarry Smith MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3634c75a6043SHong Zhang 3635d083f849SBarry Smith Collective 3636c75a6043SHong Zhang 3637c75a6043SHong Zhang Input Parameters: 3638c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3639c75a6043SHong Zhang . bs - size of block 3640c75a6043SHong Zhang . m - number of rows 3641c75a6043SHong Zhang . n - number of columns 3642483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3643c75a6043SHong Zhang . j - column indices 3644c75a6043SHong Zhang - a - matrix values 3645c75a6043SHong Zhang 3646c75a6043SHong Zhang Output Parameter: 3647c75a6043SHong Zhang . mat - the matrix 3648c75a6043SHong Zhang 3649dfb205c3SBarry Smith Level: advanced 3650c75a6043SHong Zhang 3651c75a6043SHong Zhang Notes: 3652c75a6043SHong Zhang The i, j, and a arrays are not copied by this routine, the user must free these arrays 3653c75a6043SHong Zhang once the matrix is destroyed 3654c75a6043SHong Zhang 3655c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3656c75a6043SHong Zhang 3657c75a6043SHong Zhang The i and j indices are 0 based 3658c75a6043SHong Zhang 3659dfb205c3SBarry Smith When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3660dfb205c3SBarry Smith 36613adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 36623adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 36633adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 36643adadaf3SJed Brown with column-major ordering within blocks. 3665dfb205c3SBarry Smith 366669b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3667c75a6043SHong Zhang 3668c75a6043SHong Zhang @*/ 3669c3c607ccSBarry Smith PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3670c75a6043SHong Zhang { 3671c75a6043SHong Zhang PetscErrorCode ierr; 3672c75a6043SHong Zhang PetscInt ii; 3673c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3674c75a6043SHong Zhang 3675c75a6043SHong Zhang PetscFunctionBegin; 36762c71b3e2SJacob Faibussowitsch PetscCheckFalse(bs != 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs); 36772c71b3e2SJacob Faibussowitsch PetscCheckFalse(m > 0 && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3678c75a6043SHong Zhang 3679c75a6043SHong Zhang ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3680c75a6043SHong Zhang ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3681c75a6043SHong Zhang ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3682f4259b30SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3683c75a6043SHong Zhang baij = (Mat_SeqBAIJ*)(*mat)->data; 3684dcca6d9dSJed Brown ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 36853bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3686c75a6043SHong Zhang 3687c75a6043SHong Zhang baij->i = i; 3688c75a6043SHong Zhang baij->j = j; 3689c75a6043SHong Zhang baij->a = a; 369026fbe8dcSKarl Rupp 3691c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 3692c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3693e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3694e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3695c75a6043SHong Zhang 3696c75a6043SHong Zhang for (ii=0; ii<m; ii++) { 3697c75a6043SHong Zhang baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 36986bdcaf15SBarry Smith PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]); 3699c75a6043SHong Zhang } 370076bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 3701c75a6043SHong Zhang for (ii=0; ii<baij->i[m]; ii++) { 37026bdcaf15SBarry Smith PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 37036bdcaf15SBarry Smith PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 3704c75a6043SHong Zhang } 370576bd3646SJed Brown } 3706c75a6043SHong Zhang 3707c75a6043SHong Zhang ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3708c75a6043SHong Zhang ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3709c75a6043SHong Zhang PetscFunctionReturn(0); 3710c75a6043SHong Zhang } 3711bdf6f3fcSHong Zhang 3712bdf6f3fcSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3713bdf6f3fcSHong Zhang { 3714bdf6f3fcSHong Zhang PetscErrorCode ierr; 37158761c3d6SHong Zhang PetscMPIInt size; 3716bdf6f3fcSHong Zhang 3717bdf6f3fcSHong Zhang PetscFunctionBegin; 3718ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 37198761c3d6SHong Zhang if (size == 1 && scall == MAT_REUSE_MATRIX) { 37208761c3d6SHong Zhang ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 37218761c3d6SHong Zhang } else { 3722bdf6f3fcSHong Zhang ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 37238761c3d6SHong Zhang } 3724bdf6f3fcSHong Zhang PetscFunctionReturn(0); 3725bdf6f3fcSHong Zhang } 3726