1be1d678aSKris Buschelman 22593348eSBarry Smith /* 3b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 42593348eSBarry Smith matrix storage format. 52593348eSBarry Smith */ 6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <petscblaslapack.h> 8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h> 1043516a2dSKris Buschelman 117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 137ea3e4caSstefano_zampini #endif 147ea3e4caSstefano_zampini 15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17b5b72c8aSIrina Sokolova #endif 18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19b5b72c8aSIrina Sokolova 20713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 21b01c7715SBarry Smith { 22b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 236849ba73SBarry Smith PetscErrorCode ierr; 24de80f912SBarry Smith PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 257f0c90edSBarry Smith MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 2662bba022SBarry Smith PetscReal shift = 0.0; 271a9391e3SHong Zhang PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 28b01c7715SBarry Smith 29b01c7715SBarry Smith PetscFunctionBegin; 30a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 31a455e926SHong Zhang 329797317bSBarry Smith if (a->idiagvalid) { 339797317bSBarry Smith if (values) *values = a->idiag; 349797317bSBarry Smith PetscFunctionReturn(0); 359797317bSBarry Smith } 36b01c7715SBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 37b01c7715SBarry Smith diag_offset = a->diag; 38b01c7715SBarry Smith if (!a->idiag) { 397f0c90edSBarry Smith ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 407f0c90edSBarry Smith ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 41b01c7715SBarry Smith } 42b01c7715SBarry Smith diag = a->idiag; 43bbead8a2SBarry Smith if (values) *values = a->idiag; 44b01c7715SBarry Smith /* factor and invert each block */ 45521d7252SBarry Smith switch (bs) { 46ab040260SJed Brown case 1: 47ab040260SJed Brown for (i=0; i<mbs; i++) { 48ab040260SJed Brown odiag = v + 1*diag_offset[i]; 49ab040260SJed Brown diag[0] = odiag[0]; 50ec1892c8SHong Zhang 51ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 52ec1892c8SHong Zhang if (allowzeropivot) { 537b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 547b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 557b6c816cSBarry Smith A->factorerror_zeropivot_row = i; 56ec1892c8SHong Zhang ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 577b6c816cSBarry Smith } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 58ec1892c8SHong Zhang } 59ec1892c8SHong Zhang 60d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 61ab040260SJed Brown diag += 1; 62ab040260SJed Brown } 63ab040260SJed Brown break; 64b01c7715SBarry Smith case 2: 65b01c7715SBarry Smith for (i=0; i<mbs; i++) { 66b01c7715SBarry Smith odiag = v + 4*diag_offset[i]; 67b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 697b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 70b01c7715SBarry Smith diag += 4; 71b01c7715SBarry Smith } 72b01c7715SBarry Smith break; 73b01c7715SBarry Smith case 3: 74b01c7715SBarry Smith for (i=0; i<mbs; i++) { 75b01c7715SBarry Smith odiag = v + 9*diag_offset[i]; 76b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77b01c7715SBarry Smith diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 78b01c7715SBarry Smith diag[8] = odiag[8]; 79a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 807b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 81b01c7715SBarry Smith diag += 9; 82b01c7715SBarry Smith } 83b01c7715SBarry Smith break; 84b01c7715SBarry Smith case 4: 85b01c7715SBarry Smith for (i=0; i<mbs; i++) { 86b01c7715SBarry Smith odiag = v + 16*diag_offset[i]; 87580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 88a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 897b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 90b01c7715SBarry Smith diag += 16; 91b01c7715SBarry Smith } 92b01c7715SBarry Smith break; 93b01c7715SBarry Smith case 5: 94b01c7715SBarry Smith for (i=0; i<mbs; i++) { 95b01c7715SBarry Smith odiag = v + 25*diag_offset[i]; 96580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 97a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 987b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 99b01c7715SBarry Smith diag += 25; 100b01c7715SBarry Smith } 101b01c7715SBarry Smith break; 102d49b2adcSBarry Smith case 6: 103d49b2adcSBarry Smith for (i=0; i<mbs; i++) { 104d49b2adcSBarry Smith odiag = v + 36*diag_offset[i]; 105580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 106a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1077b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 108d49b2adcSBarry Smith diag += 36; 109d49b2adcSBarry Smith } 110d49b2adcSBarry Smith break; 111de80f912SBarry Smith case 7: 112de80f912SBarry Smith for (i=0; i<mbs; i++) { 113de80f912SBarry Smith odiag = v + 49*diag_offset[i]; 114580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 115a455e926SHong Zhang ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1167b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 117de80f912SBarry Smith diag += 49; 118de80f912SBarry Smith } 119de80f912SBarry Smith break; 120b01c7715SBarry Smith default: 121dcca6d9dSJed Brown ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 122de80f912SBarry Smith for (i=0; i<mbs; i++) { 123de80f912SBarry Smith odiag = v + bs2*diag_offset[i]; 124580bdb30SBarry Smith ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 1255f8bbccaSHong Zhang ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1267b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127de80f912SBarry Smith diag += bs2; 128de80f912SBarry Smith } 129de80f912SBarry Smith ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 130b01c7715SBarry Smith } 131b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 132b01c7715SBarry Smith PetscFunctionReturn(0); 133b01c7715SBarry Smith } 134b01c7715SBarry Smith 135e48d15efSToby Isaac PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1366d3beeddSMatthew Knepley { 1376d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138e48d15efSToby Isaac PetscScalar *x,*work,*w,*workt,*t; 139e48d15efSToby Isaac const MatScalar *v,*aa = a->a, *idiag; 140e48d15efSToby Isaac const PetscScalar *b,*xb; 1415455b99fSToby Isaac PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 1426d3beeddSMatthew Knepley PetscErrorCode ierr; 143e48d15efSToby Isaac PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 144c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 145b01c7715SBarry Smith 146b01c7715SBarry Smith PetscFunctionBegin; 147b01c7715SBarry Smith its = its*lits; 148e32f2f54SBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 149e32f2f54SBarry Smith if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 15052c4f950SSatish Balay if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 15152c4f950SSatish Balay if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 15252c4f950SSatish Balay if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 153b01c7715SBarry Smith 1540298fd71SBarry Smith if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 155b01c7715SBarry Smith 156b2ec919aSToby Isaac if (!m) PetscFunctionReturn(0); 157b01c7715SBarry Smith diag = a->diag; 158b01c7715SBarry Smith idiag = a->idiag; 159de80f912SBarry Smith k = PetscMax(A->rmap->n,A->cmap->n); 160e48d15efSToby Isaac if (!a->mult_work) { 161f361c04dSBarry Smith ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 162de80f912SBarry Smith } 1633475c22fSBarry Smith if (!a->sor_workt) { 164f361c04dSBarry Smith ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 165de80f912SBarry Smith } 166de80f912SBarry Smith if (!a->sor_work) { 167785e854fSJed Brown ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 168de80f912SBarry Smith } 1693475c22fSBarry Smith work = a->mult_work; 1703475c22fSBarry Smith t = a->sor_workt; 171de80f912SBarry Smith w = a->sor_work; 172de80f912SBarry Smith 173de80f912SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174de80f912SBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175de80f912SBarry Smith 176de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 177de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 178e48d15efSToby Isaac switch (bs) { 179e48d15efSToby Isaac case 1: 180e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x,idiag,b); 181e48d15efSToby Isaac t[0] = b[0]; 182e48d15efSToby Isaac i2 = 1; 183e48d15efSToby Isaac idiag += 1; 184e48d15efSToby Isaac for (i=1; i<m; i++) { 185e48d15efSToby Isaac v = aa + ai[i]; 186e48d15efSToby Isaac vi = aj + ai[i]; 187e48d15efSToby Isaac nz = diag[i] - ai[i]; 188e48d15efSToby Isaac s[0] = b[i2]; 189e48d15efSToby Isaac for (j=0; j<nz; j++) { 190e48d15efSToby Isaac xw[0] = x[vi[j]]; 191e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 192e48d15efSToby Isaac } 193e48d15efSToby Isaac t[i2] = s[0]; 194e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 195e48d15efSToby Isaac x[i2] = xw[0]; 196e48d15efSToby Isaac idiag += 1; 197e48d15efSToby Isaac i2 += 1; 198e48d15efSToby Isaac } 199e48d15efSToby Isaac break; 200e48d15efSToby Isaac case 2: 201e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x,idiag,b); 202e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; 203e48d15efSToby Isaac i2 = 2; 204e48d15efSToby Isaac idiag += 4; 205e48d15efSToby Isaac for (i=1; i<m; i++) { 206e48d15efSToby Isaac v = aa + 4*ai[i]; 207e48d15efSToby Isaac vi = aj + ai[i]; 208e48d15efSToby Isaac nz = diag[i] - ai[i]; 209e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 210e48d15efSToby Isaac for (j=0; j<nz; j++) { 211e48d15efSToby Isaac idx = 2*vi[j]; 212e48d15efSToby Isaac it = 4*j; 213e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 214e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 215e48d15efSToby Isaac } 216e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; 217e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 218e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 219e48d15efSToby Isaac idiag += 4; 220e48d15efSToby Isaac i2 += 2; 221e48d15efSToby Isaac } 222e48d15efSToby Isaac break; 223e48d15efSToby Isaac case 3: 224e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x,idiag,b); 225e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 226e48d15efSToby Isaac i2 = 3; 227e48d15efSToby Isaac idiag += 9; 228e48d15efSToby Isaac for (i=1; i<m; i++) { 229e48d15efSToby Isaac v = aa + 9*ai[i]; 230e48d15efSToby Isaac vi = aj + ai[i]; 231e48d15efSToby Isaac nz = diag[i] - ai[i]; 232e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 233e48d15efSToby Isaac while (nz--) { 234e48d15efSToby Isaac idx = 3*(*vi++); 235e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 236e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 237e48d15efSToby Isaac v += 9; 238e48d15efSToby Isaac } 239e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 240e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 241e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 242e48d15efSToby Isaac idiag += 9; 243e48d15efSToby Isaac i2 += 3; 244e48d15efSToby Isaac } 245e48d15efSToby Isaac break; 246e48d15efSToby Isaac case 4: 247e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x,idiag,b); 248e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 249e48d15efSToby Isaac i2 = 4; 250e48d15efSToby Isaac idiag += 16; 251e48d15efSToby Isaac for (i=1; i<m; i++) { 252e48d15efSToby Isaac v = aa + 16*ai[i]; 253e48d15efSToby Isaac vi = aj + ai[i]; 254e48d15efSToby Isaac nz = diag[i] - ai[i]; 255e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 256e48d15efSToby Isaac while (nz--) { 257e48d15efSToby Isaac idx = 4*(*vi++); 258e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 259e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 260e48d15efSToby Isaac v += 16; 261e48d15efSToby Isaac } 262e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 263e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 264e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 265e48d15efSToby Isaac idiag += 16; 266e48d15efSToby Isaac i2 += 4; 267e48d15efSToby Isaac } 268e48d15efSToby Isaac break; 269e48d15efSToby Isaac case 5: 270e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x,idiag,b); 271e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 272e48d15efSToby Isaac i2 = 5; 273e48d15efSToby Isaac idiag += 25; 274e48d15efSToby Isaac for (i=1; i<m; i++) { 275e48d15efSToby Isaac v = aa + 25*ai[i]; 276e48d15efSToby Isaac vi = aj + ai[i]; 277e48d15efSToby Isaac nz = diag[i] - ai[i]; 278e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 279e48d15efSToby Isaac while (nz--) { 280e48d15efSToby Isaac idx = 5*(*vi++); 281e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 282e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 283e48d15efSToby Isaac v += 25; 284e48d15efSToby Isaac } 285e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 286e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 287e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 288e48d15efSToby Isaac idiag += 25; 289e48d15efSToby Isaac i2 += 5; 290e48d15efSToby Isaac } 291e48d15efSToby Isaac break; 292e48d15efSToby Isaac case 6: 293e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x,idiag,b); 294e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 295e48d15efSToby Isaac i2 = 6; 296e48d15efSToby Isaac idiag += 36; 297e48d15efSToby Isaac for (i=1; i<m; i++) { 298e48d15efSToby Isaac v = aa + 36*ai[i]; 299e48d15efSToby Isaac vi = aj + ai[i]; 300e48d15efSToby Isaac nz = diag[i] - ai[i]; 301e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 302e48d15efSToby Isaac while (nz--) { 303e48d15efSToby Isaac idx = 6*(*vi++); 304e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 305e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 306e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 307e48d15efSToby Isaac v += 36; 308e48d15efSToby Isaac } 309e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 310e48d15efSToby Isaac t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 311e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 312e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 313e48d15efSToby Isaac idiag += 36; 314e48d15efSToby Isaac i2 += 6; 315e48d15efSToby Isaac } 316e48d15efSToby Isaac break; 317e48d15efSToby Isaac case 7: 318e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x,idiag,b); 319e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 320e48d15efSToby Isaac t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 321e48d15efSToby Isaac i2 = 7; 322e48d15efSToby Isaac idiag += 49; 323e48d15efSToby Isaac for (i=1; i<m; i++) { 324e48d15efSToby Isaac v = aa + 49*ai[i]; 325e48d15efSToby Isaac vi = aj + ai[i]; 326e48d15efSToby Isaac nz = diag[i] - ai[i]; 327e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 328e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 329e48d15efSToby Isaac while (nz--) { 330e48d15efSToby Isaac idx = 7*(*vi++); 331e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 332e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 333e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 334e48d15efSToby Isaac v += 49; 335e48d15efSToby Isaac } 336e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 337e48d15efSToby Isaac t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 338e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 339e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 340e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 341e48d15efSToby Isaac idiag += 49; 342e48d15efSToby Isaac i2 += 7; 343e48d15efSToby Isaac } 344e48d15efSToby Isaac break; 345e48d15efSToby Isaac default: 34696b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 347580bdb30SBarry Smith ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 348de80f912SBarry Smith i2 = bs; 349de80f912SBarry Smith idiag += bs2; 350de80f912SBarry Smith for (i=1; i<m; i++) { 351de80f912SBarry Smith v = aa + bs2*ai[i]; 352de80f912SBarry Smith vi = aj + ai[i]; 353de80f912SBarry Smith nz = diag[i] - ai[i]; 354de80f912SBarry Smith 355580bdb30SBarry Smith ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 356de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 357de80f912SBarry Smith workt = work; 358de80f912SBarry Smith for (j=0; j<nz; j++) { 359580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 360de80f912SBarry Smith workt += bs; 361de80f912SBarry Smith } 36296b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 363580bdb30SBarry Smith ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 36496b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 365de80f912SBarry Smith 366de80f912SBarry Smith idiag += bs2; 367de80f912SBarry Smith i2 += bs; 368de80f912SBarry Smith } 369e48d15efSToby Isaac break; 370e48d15efSToby Isaac } 371de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 372e48d15efSToby Isaac ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 373e48d15efSToby Isaac xb = t; 374de80f912SBarry Smith } 375e48d15efSToby Isaac else xb = b; 376de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 377e48d15efSToby Isaac idiag = a->idiag+bs2*(a->mbs-1); 378e48d15efSToby Isaac i2 = bs * (m-1); 379e48d15efSToby Isaac switch (bs) { 380e48d15efSToby Isaac case 1: 381e48d15efSToby Isaac s[0] = xb[i2]; 382e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 383e48d15efSToby Isaac x[i2] = xw[0]; 384e48d15efSToby Isaac i2 -= 1; 385e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 386e48d15efSToby Isaac v = aa + (diag[i]+1); 387e48d15efSToby Isaac vi = aj + diag[i] + 1; 388e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 389e48d15efSToby Isaac s[0] = xb[i2]; 390e48d15efSToby Isaac for (j=0; j<nz; j++) { 391e48d15efSToby Isaac xw[0] = x[vi[j]]; 392e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 393e48d15efSToby Isaac } 394e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 395e48d15efSToby Isaac x[i2] = xw[0]; 396e48d15efSToby Isaac idiag -= 1; 397e48d15efSToby Isaac i2 -= 1; 398e48d15efSToby Isaac } 399e48d15efSToby Isaac break; 400e48d15efSToby Isaac case 2: 401e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; 402e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 403e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 404e48d15efSToby Isaac i2 -= 2; 405e48d15efSToby Isaac idiag -= 4; 406e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 407e48d15efSToby Isaac v = aa + 4*(diag[i] + 1); 408e48d15efSToby Isaac vi = aj + diag[i] + 1; 409e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 410e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; 411e48d15efSToby Isaac for (j=0; j<nz; j++) { 412e48d15efSToby Isaac idx = 2*vi[j]; 413e48d15efSToby Isaac it = 4*j; 414e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 415e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 416e48d15efSToby Isaac } 417e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 419e48d15efSToby Isaac idiag -= 4; 420e48d15efSToby Isaac i2 -= 2; 421e48d15efSToby Isaac } 422e48d15efSToby Isaac break; 423e48d15efSToby Isaac case 3: 424e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 426e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 427e48d15efSToby Isaac i2 -= 3; 428e48d15efSToby Isaac idiag -= 9; 429e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 430e48d15efSToby Isaac v = aa + 9*(diag[i]+1); 431e48d15efSToby Isaac vi = aj + diag[i] + 1; 432e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 433e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 434e48d15efSToby Isaac while (nz--) { 435e48d15efSToby Isaac idx = 3*(*vi++); 436e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 437e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 438e48d15efSToby Isaac v += 9; 439e48d15efSToby Isaac } 440e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442e48d15efSToby Isaac idiag -= 9; 443e48d15efSToby Isaac i2 -= 3; 444e48d15efSToby Isaac } 445e48d15efSToby Isaac break; 446e48d15efSToby Isaac case 4: 447e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 449e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 450e48d15efSToby Isaac i2 -= 4; 451e48d15efSToby Isaac idiag -= 16; 452e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 453e48d15efSToby Isaac v = aa + 16*(diag[i]+1); 454e48d15efSToby Isaac vi = aj + diag[i] + 1; 455e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 456e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 457e48d15efSToby Isaac while (nz--) { 458e48d15efSToby Isaac idx = 4*(*vi++); 459e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 460e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 461e48d15efSToby Isaac v += 16; 462e48d15efSToby Isaac } 463e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465e48d15efSToby Isaac idiag -= 16; 466e48d15efSToby Isaac i2 -= 4; 467e48d15efSToby Isaac } 468e48d15efSToby Isaac break; 469e48d15efSToby Isaac case 5: 470e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 472e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 473e48d15efSToby Isaac i2 -= 5; 474e48d15efSToby Isaac idiag -= 25; 475e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 476e48d15efSToby Isaac v = aa + 25*(diag[i]+1); 477e48d15efSToby Isaac vi = aj + diag[i] + 1; 478e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 479e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 480e48d15efSToby Isaac while (nz--) { 481e48d15efSToby Isaac idx = 5*(*vi++); 482e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 483e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 484e48d15efSToby Isaac v += 25; 485e48d15efSToby Isaac } 486e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488e48d15efSToby Isaac idiag -= 25; 489e48d15efSToby Isaac i2 -= 5; 490e48d15efSToby Isaac } 491e48d15efSToby Isaac break; 492e48d15efSToby Isaac case 6: 493e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 495e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 496e48d15efSToby Isaac i2 -= 6; 497e48d15efSToby Isaac idiag -= 36; 498e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 499e48d15efSToby Isaac v = aa + 36*(diag[i]+1); 500e48d15efSToby Isaac vi = aj + diag[i] + 1; 501e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 502e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 503e48d15efSToby Isaac while (nz--) { 504e48d15efSToby Isaac idx = 6*(*vi++); 505e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 506e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 507e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 508e48d15efSToby Isaac v += 36; 509e48d15efSToby Isaac } 510e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 511e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 512e48d15efSToby Isaac idiag -= 36; 513e48d15efSToby Isaac i2 -= 6; 514e48d15efSToby Isaac } 515e48d15efSToby Isaac break; 516e48d15efSToby Isaac case 7: 517e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 518e48d15efSToby Isaac s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 519e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x,idiag,b); 520e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 521e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 522e48d15efSToby Isaac i2 -= 7; 523e48d15efSToby Isaac idiag -= 49; 524e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 525e48d15efSToby Isaac v = aa + 49*(diag[i]+1); 526e48d15efSToby Isaac vi = aj + diag[i] + 1; 527e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 528e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 529e48d15efSToby Isaac s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 530e48d15efSToby Isaac while (nz--) { 531e48d15efSToby Isaac idx = 7*(*vi++); 532e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 533e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 534e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 535e48d15efSToby Isaac v += 49; 536e48d15efSToby Isaac } 537e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 538e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 539e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 540e48d15efSToby Isaac idiag -= 49; 541e48d15efSToby Isaac i2 -= 7; 542e48d15efSToby Isaac } 543e48d15efSToby Isaac break; 544e48d15efSToby Isaac default: 545580bdb30SBarry Smith ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 54696b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 547de80f912SBarry Smith i2 -= bs; 548e48d15efSToby Isaac idiag -= bs2; 549de80f912SBarry Smith for (i=m-2; i>=0; i--) { 550de80f912SBarry Smith v = aa + bs2*(diag[i]+1); 551de80f912SBarry Smith vi = aj + diag[i] + 1; 552de80f912SBarry Smith nz = ai[i+1] - diag[i] - 1; 553de80f912SBarry Smith 554580bdb30SBarry Smith ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 555de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 556de80f912SBarry Smith workt = work; 557de80f912SBarry Smith for (j=0; j<nz; j++) { 558580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 559de80f912SBarry Smith workt += bs; 560de80f912SBarry Smith } 56196b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 56296b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 563e48d15efSToby Isaac 564de80f912SBarry Smith idiag -= bs2; 565de80f912SBarry Smith i2 -= bs; 566de80f912SBarry Smith } 567e48d15efSToby Isaac break; 568e48d15efSToby Isaac } 569de80f912SBarry Smith ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 570de80f912SBarry Smith } 571e48d15efSToby Isaac its--; 572e48d15efSToby Isaac } 573e48d15efSToby Isaac while (its--) { 574e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 575e48d15efSToby Isaac idiag = a->idiag; 576e48d15efSToby Isaac i2 = 0; 577e48d15efSToby Isaac switch (bs) { 578e48d15efSToby Isaac case 1: 579e48d15efSToby Isaac for (i=0; i<m; i++) { 580e48d15efSToby Isaac v = aa + ai[i]; 581e48d15efSToby Isaac vi = aj + ai[i]; 582e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 583e48d15efSToby Isaac s[0] = b[i2]; 584e48d15efSToby Isaac for (j=0; j<nz; j++) { 585e48d15efSToby Isaac xw[0] = x[vi[j]]; 586e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 587e48d15efSToby Isaac } 588e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 589e48d15efSToby Isaac x[i2] += xw[0]; 590e48d15efSToby Isaac idiag += 1; 591e48d15efSToby Isaac i2 += 1; 592e48d15efSToby Isaac } 593e48d15efSToby Isaac break; 594e48d15efSToby Isaac case 2: 595e48d15efSToby Isaac for (i=0; i<m; i++) { 596e48d15efSToby Isaac v = aa + 4*ai[i]; 597e48d15efSToby Isaac vi = aj + ai[i]; 598e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 599e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 600e48d15efSToby Isaac for (j=0; j<nz; j++) { 601e48d15efSToby Isaac idx = 2*vi[j]; 602e48d15efSToby Isaac it = 4*j; 603e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 604e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 605e48d15efSToby Isaac } 606e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 607e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; 608e48d15efSToby Isaac idiag += 4; 609e48d15efSToby Isaac i2 += 2; 610e48d15efSToby Isaac } 611e48d15efSToby Isaac break; 612e48d15efSToby Isaac case 3: 613e48d15efSToby Isaac for (i=0; i<m; i++) { 614e48d15efSToby Isaac v = aa + 9*ai[i]; 615e48d15efSToby Isaac vi = aj + ai[i]; 616e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 617e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 618e48d15efSToby Isaac while (nz--) { 619e48d15efSToby Isaac idx = 3*(*vi++); 620e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 621e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 622e48d15efSToby Isaac v += 9; 623e48d15efSToby Isaac } 624e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 625e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 626e48d15efSToby Isaac idiag += 9; 627e48d15efSToby Isaac i2 += 3; 628e48d15efSToby Isaac } 629e48d15efSToby Isaac break; 630e48d15efSToby Isaac case 4: 631e48d15efSToby Isaac for (i=0; i<m; i++) { 632e48d15efSToby Isaac v = aa + 16*ai[i]; 633e48d15efSToby Isaac vi = aj + ai[i]; 634e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 635e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 636e48d15efSToby Isaac while (nz--) { 637e48d15efSToby Isaac idx = 4*(*vi++); 638e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 639e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 640e48d15efSToby Isaac v += 16; 641e48d15efSToby Isaac } 642e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 643e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 644e48d15efSToby Isaac idiag += 16; 645e48d15efSToby Isaac i2 += 4; 646e48d15efSToby Isaac } 647e48d15efSToby Isaac break; 648e48d15efSToby Isaac case 5: 649e48d15efSToby Isaac for (i=0; i<m; i++) { 650e48d15efSToby Isaac v = aa + 25*ai[i]; 651e48d15efSToby Isaac vi = aj + ai[i]; 652e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 653e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 654e48d15efSToby Isaac while (nz--) { 655e48d15efSToby Isaac idx = 5*(*vi++); 656e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 657e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 658e48d15efSToby Isaac v += 25; 659e48d15efSToby Isaac } 660e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 661e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 662e48d15efSToby Isaac idiag += 25; 663e48d15efSToby Isaac i2 += 5; 664e48d15efSToby Isaac } 665e48d15efSToby Isaac break; 666e48d15efSToby Isaac case 6: 667e48d15efSToby Isaac for (i=0; i<m; i++) { 668e48d15efSToby Isaac v = aa + 36*ai[i]; 669e48d15efSToby Isaac vi = aj + ai[i]; 670e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 671e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 672e48d15efSToby Isaac while (nz--) { 673e48d15efSToby Isaac idx = 6*(*vi++); 674e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 675e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 676e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 677e48d15efSToby Isaac v += 36; 678e48d15efSToby Isaac } 679e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 680e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 681e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 682e48d15efSToby Isaac idiag += 36; 683e48d15efSToby Isaac i2 += 6; 684e48d15efSToby Isaac } 685e48d15efSToby Isaac break; 686e48d15efSToby Isaac case 7: 687e48d15efSToby Isaac for (i=0; i<m; i++) { 688e48d15efSToby Isaac v = aa + 49*ai[i]; 689e48d15efSToby Isaac vi = aj + ai[i]; 690e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 691e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 692e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 693e48d15efSToby Isaac while (nz--) { 694e48d15efSToby Isaac idx = 7*(*vi++); 695e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 696e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 697e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 698e48d15efSToby Isaac v += 49; 699e48d15efSToby Isaac } 700e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 701e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 702e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 703e48d15efSToby Isaac idiag += 49; 704e48d15efSToby Isaac i2 += 7; 705e48d15efSToby Isaac } 706e48d15efSToby Isaac break; 707e48d15efSToby Isaac default: 708e48d15efSToby Isaac for (i=0; i<m; i++) { 709e48d15efSToby Isaac v = aa + bs2*ai[i]; 710e48d15efSToby Isaac vi = aj + ai[i]; 711e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 712e48d15efSToby Isaac 713580bdb30SBarry Smith ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 714e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 715e48d15efSToby Isaac workt = work; 716e48d15efSToby Isaac for (j=0; j<nz; j++) { 717580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 718e48d15efSToby Isaac workt += bs; 719e48d15efSToby Isaac } 720e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 721e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 722e48d15efSToby Isaac 723e48d15efSToby Isaac idiag += bs2; 724e48d15efSToby Isaac i2 += bs; 725e48d15efSToby Isaac } 726e48d15efSToby Isaac break; 727e48d15efSToby Isaac } 728e48d15efSToby Isaac ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 729e48d15efSToby Isaac } 730e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 731e48d15efSToby Isaac idiag = a->idiag+bs2*(a->mbs-1); 732e48d15efSToby Isaac i2 = bs * (m-1); 733e48d15efSToby Isaac switch (bs) { 734e48d15efSToby Isaac case 1: 735e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 736e48d15efSToby Isaac v = aa + ai[i]; 737e48d15efSToby Isaac vi = aj + ai[i]; 738e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 739e48d15efSToby Isaac s[0] = b[i2]; 740e48d15efSToby Isaac for (j=0; j<nz; j++) { 741e48d15efSToby Isaac xw[0] = x[vi[j]]; 742e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 743e48d15efSToby Isaac } 744e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 745e48d15efSToby Isaac x[i2] += xw[0]; 746e48d15efSToby Isaac idiag -= 1; 747e48d15efSToby Isaac i2 -= 1; 748e48d15efSToby Isaac } 749e48d15efSToby Isaac break; 750e48d15efSToby Isaac case 2: 751e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 752e48d15efSToby Isaac v = aa + 4*ai[i]; 753e48d15efSToby Isaac vi = aj + ai[i]; 754e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 755e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 756e48d15efSToby Isaac for (j=0; j<nz; j++) { 757e48d15efSToby Isaac idx = 2*vi[j]; 758e48d15efSToby Isaac it = 4*j; 759e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 760e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 761e48d15efSToby Isaac } 762e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 763e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; 764e48d15efSToby Isaac idiag -= 4; 765e48d15efSToby Isaac i2 -= 2; 766e48d15efSToby Isaac } 767e48d15efSToby Isaac break; 768e48d15efSToby Isaac case 3: 769e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 770e48d15efSToby Isaac v = aa + 9*ai[i]; 771e48d15efSToby Isaac vi = aj + ai[i]; 772e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 773e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 774e48d15efSToby Isaac while (nz--) { 775e48d15efSToby Isaac idx = 3*(*vi++); 776e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 777e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 778e48d15efSToby Isaac v += 9; 779e48d15efSToby Isaac } 780e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 781e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 782e48d15efSToby Isaac idiag -= 9; 783e48d15efSToby Isaac i2 -= 3; 784e48d15efSToby Isaac } 785e48d15efSToby Isaac break; 786e48d15efSToby Isaac case 4: 787e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 788e48d15efSToby Isaac v = aa + 16*ai[i]; 789e48d15efSToby Isaac vi = aj + ai[i]; 790e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 791e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 792e48d15efSToby Isaac while (nz--) { 793e48d15efSToby Isaac idx = 4*(*vi++); 794e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 795e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 796e48d15efSToby Isaac v += 16; 797e48d15efSToby Isaac } 798e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 799e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 800e48d15efSToby Isaac idiag -= 16; 801e48d15efSToby Isaac i2 -= 4; 802e48d15efSToby Isaac } 803e48d15efSToby Isaac break; 804e48d15efSToby Isaac case 5: 805e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 806e48d15efSToby Isaac v = aa + 25*ai[i]; 807e48d15efSToby Isaac vi = aj + ai[i]; 808e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 809e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 810e48d15efSToby Isaac while (nz--) { 811e48d15efSToby Isaac idx = 5*(*vi++); 812e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 813e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 814e48d15efSToby Isaac v += 25; 815e48d15efSToby Isaac } 816e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 817e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 818e48d15efSToby Isaac idiag -= 25; 819e48d15efSToby Isaac i2 -= 5; 820e48d15efSToby Isaac } 821e48d15efSToby Isaac break; 822e48d15efSToby Isaac case 6: 823e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 824e48d15efSToby Isaac v = aa + 36*ai[i]; 825e48d15efSToby Isaac vi = aj + ai[i]; 826e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 827e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 828e48d15efSToby Isaac while (nz--) { 829e48d15efSToby Isaac idx = 6*(*vi++); 830e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 831e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 832e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 833e48d15efSToby Isaac v += 36; 834e48d15efSToby Isaac } 835e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 836e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 837e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 838e48d15efSToby Isaac idiag -= 36; 839e48d15efSToby Isaac i2 -= 6; 840e48d15efSToby Isaac } 841e48d15efSToby Isaac break; 842e48d15efSToby Isaac case 7: 843e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 844e48d15efSToby Isaac v = aa + 49*ai[i]; 845e48d15efSToby Isaac vi = aj + ai[i]; 846e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 847e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 848e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 849e48d15efSToby Isaac while (nz--) { 850e48d15efSToby Isaac idx = 7*(*vi++); 851e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 852e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 853e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 854e48d15efSToby Isaac v += 49; 855e48d15efSToby Isaac } 856e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 857e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 858e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 859e48d15efSToby Isaac idiag -= 49; 860e48d15efSToby Isaac i2 -= 7; 861e48d15efSToby Isaac } 862e48d15efSToby Isaac break; 863e48d15efSToby Isaac default: 864e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 865e48d15efSToby Isaac v = aa + bs2*ai[i]; 866e48d15efSToby Isaac vi = aj + ai[i]; 867e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 868e48d15efSToby Isaac 869580bdb30SBarry Smith ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 870e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 871e48d15efSToby Isaac workt = work; 872e48d15efSToby Isaac for (j=0; j<nz; j++) { 873580bdb30SBarry Smith ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 874e48d15efSToby Isaac workt += bs; 875e48d15efSToby Isaac } 876e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 877e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 878e48d15efSToby Isaac 879e48d15efSToby Isaac idiag -= bs2; 880e48d15efSToby Isaac i2 -= bs; 881e48d15efSToby Isaac } 882e48d15efSToby Isaac break; 883e48d15efSToby Isaac } 884e48d15efSToby Isaac ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 885e48d15efSToby Isaac } 886e48d15efSToby Isaac } 887de80f912SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888de80f912SBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 889de80f912SBarry Smith PetscFunctionReturn(0); 890de80f912SBarry Smith } 891de80f912SBarry Smith 892af674e45SBarry Smith /* 89381824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 894af674e45SBarry Smith */ 895af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 896af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 897af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 898af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 899af674e45SBarry Smith #endif 900af674e45SBarry Smith 9018cc058d9SJed Brown PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 902af674e45SBarry Smith { 903af674e45SBarry Smith Mat A = *AA; 904af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 905c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 906c1ac3661SBarry Smith PetscInt *ai =a->i,*ailen=a->ilen; 90717ec6a02SBarry Smith PetscInt *aj =a->j,stepval,lastcol = -1; 908f15d580aSBarry Smith const PetscScalar *value = v; 9094bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 91070990e77SSatish Balay PetscErrorCode ierr; 911af674e45SBarry Smith 912af674e45SBarry Smith PetscFunctionBegin; 913ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 914af674e45SBarry Smith stepval = (n-1)*4; 915af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 916af674e45SBarry Smith row = im[k]; 917af674e45SBarry Smith rp = aj + ai[row]; 918af674e45SBarry Smith ap = aa + 16*ai[row]; 919af674e45SBarry Smith nrow = ailen[row]; 920af674e45SBarry Smith low = 0; 92117ec6a02SBarry Smith high = nrow; 922af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 923af674e45SBarry Smith col = in[l]; 924db4deed7SKarl Rupp if (col <= lastcol) low = 0; 925db4deed7SKarl Rupp else high = nrow; 92617ec6a02SBarry Smith lastcol = col; 9271e3347e8SBarry Smith value = v + k*(stepval+4 + l)*4; 928af674e45SBarry Smith while (high-low > 7) { 929af674e45SBarry Smith t = (low+high)/2; 930af674e45SBarry Smith if (rp[t] > col) high = t; 931af674e45SBarry Smith else low = t; 932af674e45SBarry Smith } 933af674e45SBarry Smith for (i=low; i<high; i++) { 934af674e45SBarry Smith if (rp[i] > col) break; 935af674e45SBarry Smith if (rp[i] == col) { 936af674e45SBarry Smith bap = ap + 16*i; 937af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 938af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 939af674e45SBarry Smith bap[jj] += *value++; 940af674e45SBarry Smith } 941af674e45SBarry Smith } 942af674e45SBarry Smith goto noinsert2; 943af674e45SBarry Smith } 944af674e45SBarry Smith } 945af674e45SBarry Smith N = nrow++ - 1; 94617ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 947af674e45SBarry Smith /* shift up all the later entries in this row */ 948af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 949af674e45SBarry Smith rp[ii+1] = rp[ii]; 95070990e77SSatish Balay ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 951af674e45SBarry Smith } 952af674e45SBarry Smith if (N >= i) { 95370990e77SSatish Balay ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 954af674e45SBarry Smith } 955af674e45SBarry Smith rp[i] = col; 956af674e45SBarry Smith bap = ap + 16*i; 957af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 958af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 959af674e45SBarry Smith bap[jj] = *value++; 960af674e45SBarry Smith } 961af674e45SBarry Smith } 962af674e45SBarry Smith noinsert2:; 963af674e45SBarry Smith low = i; 964af674e45SBarry Smith } 965af674e45SBarry Smith ailen[row] = nrow; 966af674e45SBarry Smith } 967be1d678aSKris Buschelman PetscFunctionReturnVoid(); 968af674e45SBarry Smith } 969af674e45SBarry Smith 970af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 971af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 972af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 973af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 974af674e45SBarry Smith #endif 975af674e45SBarry Smith 9768cc058d9SJed Brown PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 977af674e45SBarry Smith { 978af674e45SBarry Smith Mat A = *AA; 979af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 980580bdb30SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 981c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 982c1ac3661SBarry Smith PetscInt *aj=a->j,brow,bcol; 98317ec6a02SBarry Smith PetscInt ridx,cidx,lastcol = -1; 984af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 98570990e77SSatish Balay PetscErrorCode ierr; 986af674e45SBarry Smith 987af674e45SBarry Smith PetscFunctionBegin; 988af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 989af674e45SBarry Smith row = im[k]; brow = row/4; 990af674e45SBarry Smith rp = aj + ai[brow]; 991af674e45SBarry Smith ap = aa + 16*ai[brow]; 992af674e45SBarry Smith nrow = ailen[brow]; 993af674e45SBarry Smith low = 0; 99417ec6a02SBarry Smith high = nrow; 995af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 996af674e45SBarry Smith col = in[l]; bcol = col/4; 997af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 998af674e45SBarry Smith value = v[l + k*n]; 999db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1000db4deed7SKarl Rupp else high = nrow; 100117ec6a02SBarry Smith lastcol = col; 1002af674e45SBarry Smith while (high-low > 7) { 1003af674e45SBarry Smith t = (low+high)/2; 1004af674e45SBarry Smith if (rp[t] > bcol) high = t; 1005af674e45SBarry Smith else low = t; 1006af674e45SBarry Smith } 1007af674e45SBarry Smith for (i=low; i<high; i++) { 1008af674e45SBarry Smith if (rp[i] > bcol) break; 1009af674e45SBarry Smith if (rp[i] == bcol) { 1010af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 1011af674e45SBarry Smith *bap += value; 1012af674e45SBarry Smith goto noinsert1; 1013af674e45SBarry Smith } 1014af674e45SBarry Smith } 1015af674e45SBarry Smith N = nrow++ - 1; 101617ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1017af674e45SBarry Smith /* shift up all the later entries in this row */ 101870990e77SSatish Balay ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 101970990e77SSatish Balay ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 102070990e77SSatish Balay ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1021af674e45SBarry Smith rp[i] = bcol; 1022af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 1023af674e45SBarry Smith noinsert1:; 1024af674e45SBarry Smith low = i; 1025af674e45SBarry Smith } 1026af674e45SBarry Smith ailen[brow] = nrow; 1027af674e45SBarry Smith } 1028be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1029af674e45SBarry Smith } 1030af674e45SBarry Smith 1031be5855fcSBarry Smith /* 1032be5855fcSBarry Smith Checks for missing diagonals 1033be5855fcSBarry Smith */ 1034ace3abfcSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035be5855fcSBarry Smith { 1036be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 10376849ba73SBarry Smith PetscErrorCode ierr; 10387734d3b5SMatthew G. Knepley PetscInt *diag,*ii = a->i,i; 1039be5855fcSBarry Smith 1040be5855fcSBarry Smith PetscFunctionBegin; 1041c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 10422af78befSBarry Smith *missing = PETSC_FALSE; 10437734d3b5SMatthew G. Knepley if (A->rmap->n > 0 && !ii) { 10442efa7f71SHong Zhang *missing = PETSC_TRUE; 10452efa7f71SHong Zhang if (d) *d = 0; 1046994fe344SLisandro Dalcin ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 10472efa7f71SHong Zhang } else { 104801445905SHong Zhang PetscInt n; 104901445905SHong Zhang n = PetscMin(a->mbs, a->nbs); 1050883fce79SBarry Smith diag = a->diag; 105101445905SHong Zhang for (i=0; i<n; i++) { 10527734d3b5SMatthew G. Knepley if (diag[i] >= ii[i+1]) { 10532af78befSBarry Smith *missing = PETSC_TRUE; 10542af78befSBarry Smith if (d) *d = i; 1055994fe344SLisandro Dalcin ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1056358d2f5dSShri Abhyankar break; 10572efa7f71SHong Zhang } 1058be5855fcSBarry Smith } 1059be5855fcSBarry Smith } 1060be5855fcSBarry Smith PetscFunctionReturn(0); 1061be5855fcSBarry Smith } 1062be5855fcSBarry Smith 1063dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064de6a44a3SBarry Smith { 1065de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 10666849ba73SBarry Smith PetscErrorCode ierr; 106709f38230SBarry Smith PetscInt i,j,m = a->mbs; 1068de6a44a3SBarry Smith 10693a40ed3dSBarry Smith PetscFunctionBegin; 107009f38230SBarry Smith if (!a->diag) { 1071785e854fSJed Brown ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 10723bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 10734fd072dbSBarry Smith a->free_diag = PETSC_TRUE; 107409f38230SBarry Smith } 10757fc0212eSBarry Smith for (i=0; i<m; i++) { 107609f38230SBarry Smith a->diag[i] = a->i[i+1]; 1077de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 1078de6a44a3SBarry Smith if (a->j[j] == i) { 107909f38230SBarry Smith a->diag[i] = j; 1080de6a44a3SBarry Smith break; 1081de6a44a3SBarry Smith } 1082de6a44a3SBarry Smith } 1083de6a44a3SBarry Smith } 10843a40ed3dSBarry Smith PetscFunctionReturn(0); 1085de6a44a3SBarry Smith } 10862593348eSBarry Smith 10871a83f524SJed Brown static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 10883b2fbd54SBarry Smith { 10893b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1090dfbe8321SBarry Smith PetscErrorCode ierr; 10911a83f524SJed Brown PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 10921a83f524SJed Brown PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 10933b2fbd54SBarry Smith 10943a40ed3dSBarry Smith PetscFunctionBegin; 10953b2fbd54SBarry Smith *nn = n; 10963a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 10973b2fbd54SBarry Smith if (symmetric) { 10982462f5fdSStefano Zampini ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1099553b3c51SBarry Smith nz = tia[n]; 11003b2fbd54SBarry Smith } else { 11018f7157efSSatish Balay tia = a->i; tja = a->j; 11023b2fbd54SBarry Smith } 11033b2fbd54SBarry Smith 1104ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1105ecc77c7aSBarry Smith (*nn) *= bs; 11068f7157efSSatish Balay /* malloc & create the natural set of indices */ 1107785e854fSJed Brown ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 11089985e31cSBarry Smith if (n) { 11092462f5fdSStefano Zampini (*ia)[0] = oshift; 1110ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1111ecc77c7aSBarry Smith (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1112ecc77c7aSBarry Smith } 11139985e31cSBarry Smith } 1114ecc77c7aSBarry Smith 1115ecc77c7aSBarry Smith for (i=1; i<n; i++) { 1116ecc77c7aSBarry Smith (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1117ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1118ecc77c7aSBarry Smith (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 11198f7157efSSatish Balay } 11208f7157efSSatish Balay } 11219985e31cSBarry Smith if (n) { 1122ecc77c7aSBarry Smith (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 11239985e31cSBarry Smith } 1124ecc77c7aSBarry Smith 11251a83f524SJed Brown if (inja) { 1126785e854fSJed Brown ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 11279985e31cSBarry Smith cnt = 0; 11289985e31cSBarry Smith for (i=0; i<n; i++) { 11299985e31cSBarry Smith for (j=0; j<bs; j++) { 11309985e31cSBarry Smith for (k=tia[i]; k<tia[i+1]; k++) { 11319985e31cSBarry Smith for (l=0; l<bs; l++) { 11329985e31cSBarry Smith (*ja)[cnt++] = bs*tja[k] + l; 11339985e31cSBarry Smith } 11349985e31cSBarry Smith } 11359985e31cSBarry Smith } 11369985e31cSBarry Smith } 11379985e31cSBarry Smith } 11389985e31cSBarry Smith 11398f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 11408f7157efSSatish Balay ierr = PetscFree(tia);CHKERRQ(ierr); 11418f7157efSSatish Balay ierr = PetscFree(tja);CHKERRQ(ierr); 11428f7157efSSatish Balay } 1143f6d58c54SBarry Smith } else if (oshift == 1) { 1144715a17b5SBarry Smith if (symmetric) { 1145a2ea699eSBarry Smith nz = tia[A->rmap->n/bs]; 1146715a17b5SBarry Smith /* add 1 to i and j indices */ 1147715a17b5SBarry Smith for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1148715a17b5SBarry Smith *ia = tia; 1149715a17b5SBarry Smith if (ja) { 1150715a17b5SBarry Smith for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1151715a17b5SBarry Smith *ja = tja; 1152715a17b5SBarry Smith } 1153715a17b5SBarry Smith } else { 1154a2ea699eSBarry Smith nz = a->i[A->rmap->n/bs]; 1155f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 1156854ce69bSBarry Smith ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1157f6d58c54SBarry Smith for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1158f6d58c54SBarry Smith if (ja) { 1159785e854fSJed Brown ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1160f6d58c54SBarry Smith for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1161f6d58c54SBarry Smith } 1162715a17b5SBarry Smith } 11638f7157efSSatish Balay } else { 11648f7157efSSatish Balay *ia = tia; 1165ecc77c7aSBarry Smith if (ja) *ja = tja; 11668f7157efSSatish Balay } 11673a40ed3dSBarry Smith PetscFunctionReturn(0); 11683b2fbd54SBarry Smith } 11693b2fbd54SBarry Smith 11701a83f524SJed Brown static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 11713b2fbd54SBarry Smith { 11726849ba73SBarry Smith PetscErrorCode ierr; 11733b2fbd54SBarry Smith 11743a40ed3dSBarry Smith PetscFunctionBegin; 11753a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 1176715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1177606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 11789985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 11793b2fbd54SBarry Smith } 11803a40ed3dSBarry Smith PetscFunctionReturn(0); 11813b2fbd54SBarry Smith } 11823b2fbd54SBarry Smith 1183dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 11842d61bbb3SSatish Balay { 11852d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1186dfbe8321SBarry Smith PetscErrorCode ierr; 11872d61bbb3SSatish Balay 1188433994e6SBarry Smith PetscFunctionBegin; 1189aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1190d0f46423SBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 11912d61bbb3SSatish Balay #endif 1192e6b907acSBarry Smith ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 11936bf464f9SBarry Smith ierr = ISDestroy(&a->row);CHKERRQ(ierr); 11946bf464f9SBarry Smith ierr = ISDestroy(&a->col);CHKERRQ(ierr); 11954fd072dbSBarry Smith if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 119605b42c5fSBarry Smith ierr = PetscFree(a->idiag);CHKERRQ(ierr); 11974fd072dbSBarry Smith if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 119805b42c5fSBarry Smith ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 119905b42c5fSBarry Smith ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1200f361c04dSBarry Smith ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1201de80f912SBarry Smith ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 12026bf464f9SBarry Smith ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 120305b42c5fSBarry Smith ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1204cd6b891eSBarry Smith ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1205c4319e64SHong Zhang 12066bf464f9SBarry Smith ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 12076bf464f9SBarry Smith ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1208bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 1209901853e0SKris Buschelman 1210f4259b30SLisandro Dalcin ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1211cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1212cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1213bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1214bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1215bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1216bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1217bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1218bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1219bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1221bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1222bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 12237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 1224c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 12257ea3e4caSstefano_zampini #endif 1226c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 12272d61bbb3SSatish Balay PetscFunctionReturn(0); 12282d61bbb3SSatish Balay } 12292d61bbb3SSatish Balay 1230ace3abfcSBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 12312d61bbb3SSatish Balay { 12322d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 123363ba0a88SBarry Smith PetscErrorCode ierr; 12342d61bbb3SSatish Balay 12352d61bbb3SSatish Balay PetscFunctionBegin; 1236aa275fccSKris Buschelman switch (op) { 1237aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 12384e0d8c25SBarry Smith a->roworiented = flg; 1239aa275fccSKris Buschelman break; 1240a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1241a9817697SBarry Smith a->keepnonzeropattern = flg; 1242aa275fccSKris Buschelman break; 1243512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1244512a5fc5SBarry Smith a->nonew = (flg ? 0 : 1); 1245aa275fccSKris Buschelman break; 1246aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 12474e0d8c25SBarry Smith a->nonew = (flg ? -1 : 0); 1248aa275fccSKris Buschelman break; 1249aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 12504e0d8c25SBarry Smith a->nonew = (flg ? -2 : 0); 1251aa275fccSKris Buschelman break; 125228b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 125328b2fa4aSMatthew Knepley a->nounused = (flg ? -1 : 0); 125428b2fa4aSMatthew Knepley break; 12558c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1256aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1257aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 1258071fcb05SBarry Smith case MAT_SORTED_FULL: 1259290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1260aa275fccSKris Buschelman break; 12615021d80fSJed Brown case MAT_SPD: 126277e54ba9SKris Buschelman case MAT_SYMMETRIC: 126377e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 12649a4540c5SBarry Smith case MAT_HERMITIAN: 12659a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1266c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1267672ba085SHong Zhang case MAT_STRUCTURE_ONLY: 12685021d80fSJed Brown /* These options are handled directly by MatSetOption() */ 126977e54ba9SKris Buschelman break; 1270aa275fccSKris Buschelman default: 1271e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 12722d61bbb3SSatish Balay } 12732d61bbb3SSatish Balay PetscFunctionReturn(0); 12742d61bbb3SSatish Balay } 12752d61bbb3SSatish Balay 127652768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */ 127752768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 12782d61bbb3SSatish Balay { 12796849ba73SBarry Smith PetscErrorCode ierr; 128052768537SHong Zhang PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 128152768537SHong Zhang MatScalar *aa_i; 128287828ca2SBarry Smith PetscScalar *v_i; 12832d61bbb3SSatish Balay 12842d61bbb3SSatish Balay PetscFunctionBegin; 1285d0f46423SBarry Smith bs = A->rmap->bs; 128652768537SHong Zhang bs2 = bs*bs; 1287e32f2f54SBarry Smith if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 12882d61bbb3SSatish Balay 12892d61bbb3SSatish Balay bn = row/bs; /* Block number */ 12902d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 12912d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 12922d61bbb3SSatish Balay *nz = bs*M; 12932d61bbb3SSatish Balay 12942d61bbb3SSatish Balay if (v) { 1295f4259b30SLisandro Dalcin *v = NULL; 12962d61bbb3SSatish Balay if (*nz) { 1297854ce69bSBarry Smith ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 12982d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 12992d61bbb3SSatish Balay v_i = *v + i*bs; 13002d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 130126fbe8dcSKarl Rupp for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 13022d61bbb3SSatish Balay } 13032d61bbb3SSatish Balay } 13042d61bbb3SSatish Balay } 13052d61bbb3SSatish Balay 13062d61bbb3SSatish Balay if (idx) { 1307f4259b30SLisandro Dalcin *idx = NULL; 13082d61bbb3SSatish Balay if (*nz) { 1309854ce69bSBarry Smith ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 13102d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13112d61bbb3SSatish Balay idx_i = *idx + i*bs; 13122d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 131326fbe8dcSKarl Rupp for (j=0; j<bs; j++) idx_i[j] = itmp++; 13142d61bbb3SSatish Balay } 13152d61bbb3SSatish Balay } 13162d61bbb3SSatish Balay } 13172d61bbb3SSatish Balay PetscFunctionReturn(0); 13182d61bbb3SSatish Balay } 13192d61bbb3SSatish Balay 132052768537SHong Zhang PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 132152768537SHong Zhang { 132252768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132352768537SHong Zhang PetscErrorCode ierr; 132452768537SHong Zhang 132552768537SHong Zhang PetscFunctionBegin; 132652768537SHong Zhang ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 132752768537SHong Zhang PetscFunctionReturn(0); 132852768537SHong Zhang } 132952768537SHong Zhang 1330c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 13312d61bbb3SSatish Balay { 1332dfbe8321SBarry Smith PetscErrorCode ierr; 1333606d414cSSatish Balay 13342d61bbb3SSatish Balay PetscFunctionBegin; 1335cb4a9cd9SHong Zhang if (nz) *nz = 0; 133605b42c5fSBarry Smith if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 133705b42c5fSBarry Smith if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 13382d61bbb3SSatish Balay PetscFunctionReturn(0); 13392d61bbb3SSatish Balay } 13402d61bbb3SSatish Balay 1341fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 13422d61bbb3SSatish Balay { 134320e84f26SHong Zhang Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 13442d61bbb3SSatish Balay Mat C; 13456849ba73SBarry Smith PetscErrorCode ierr; 134620e84f26SHong Zhang PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 134720e84f26SHong Zhang PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 134820e84f26SHong Zhang MatScalar *ata,*aa=a->a; 13492d61bbb3SSatish Balay 13502d61bbb3SSatish Balay PetscFunctionBegin; 135120e84f26SHong Zhang ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1352cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 135320e84f26SHong Zhang for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 13542d61bbb3SSatish Balay 1355ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1356d0f46423SBarry Smith ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 13577adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 135820e84f26SHong Zhang ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 135920e84f26SHong Zhang 136020e84f26SHong Zhang at = (Mat_SeqBAIJ*)C->data; 136120e84f26SHong Zhang ati = at->i; 136220e84f26SHong Zhang for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1363fc4dec0aSBarry Smith } else { 1364fc4dec0aSBarry Smith C = *B; 136520e84f26SHong Zhang at = (Mat_SeqBAIJ*)C->data; 136620e84f26SHong Zhang ati = at->i; 1367fc4dec0aSBarry Smith } 1368fc4dec0aSBarry Smith 136920e84f26SHong Zhang atj = at->j; 137020e84f26SHong Zhang ata = at->a; 137120e84f26SHong Zhang 137220e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 1373580bdb30SBarry Smith ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 137420e84f26SHong Zhang 137520e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 13762d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 137720e84f26SHong Zhang anzj = ai[i+1] - ai[i]; 137820e84f26SHong Zhang for (j=0; j<anzj; j++) { 137920e84f26SHong Zhang atj[atfill[*aj]] = i; 138020e84f26SHong Zhang for (kr=0; kr<bs; kr++) { 138120e84f26SHong Zhang for (k=0; k<bs; k++) { 138220e84f26SHong Zhang ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 13832d61bbb3SSatish Balay } 13842d61bbb3SSatish Balay } 138520e84f26SHong Zhang atfill[*aj++] += 1; 138620e84f26SHong Zhang } 138720e84f26SHong Zhang } 13882d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 13892d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 13902d61bbb3SSatish Balay 139120e84f26SHong Zhang /* Clean up temporary space and complete requests. */ 139220e84f26SHong Zhang ierr = PetscFree(atfill);CHKERRQ(ierr); 139320e84f26SHong Zhang 1394cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 139520e84f26SHong Zhang ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 13962d61bbb3SSatish Balay *B = C; 13972d61bbb3SSatish Balay } else { 139828be2f97SBarry Smith ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 13992d61bbb3SSatish Balay } 14002d61bbb3SSatish Balay PetscFunctionReturn(0); 14012d61bbb3SSatish Balay } 14022d61bbb3SSatish Balay 1403453d3561SHong Zhang PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1404453d3561SHong Zhang { 1405453d3561SHong Zhang PetscErrorCode ierr; 1406453d3561SHong Zhang Mat Btrans; 1407453d3561SHong Zhang 1408453d3561SHong Zhang PetscFunctionBegin; 1409453d3561SHong Zhang *f = PETSC_FALSE; 1410453d3561SHong Zhang ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1411453d3561SHong Zhang ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1412453d3561SHong Zhang ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1413453d3561SHong Zhang PetscFunctionReturn(0); 1414453d3561SHong Zhang } 1415453d3561SHong Zhang 1416618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1417b51a4376SLisandro Dalcin PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 14182593348eSBarry Smith { 1419b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1420b51a4376SLisandro Dalcin PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1421b51a4376SLisandro Dalcin PetscInt *rowlens,*colidxs; 1422b51a4376SLisandro Dalcin PetscScalar *matvals; 14236849ba73SBarry Smith PetscErrorCode ierr; 14242593348eSBarry Smith 14253a40ed3dSBarry Smith PetscFunctionBegin; 1426b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 14273b2fbd54SBarry Smith 1428b51a4376SLisandro Dalcin M = mat->rmap->N; 1429b51a4376SLisandro Dalcin N = mat->cmap->N; 1430b51a4376SLisandro Dalcin m = mat->rmap->n; 1431b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1432b51a4376SLisandro Dalcin nz = bs*bs*A->nz; 14332593348eSBarry Smith 1434b51a4376SLisandro Dalcin /* write matrix header */ 1435b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID; 1436b51a4376SLisandro Dalcin header[1] = M; header[2] = N; header[3] = nz; 1437b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 14382593348eSBarry Smith 1439b51a4376SLisandro Dalcin /* store row lengths */ 1440b51a4376SLisandro Dalcin ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1441b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1442b51a4376SLisandro Dalcin for (j=0; j<bs; j++) 1443b51a4376SLisandro Dalcin rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1444b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1445b51a4376SLisandro Dalcin ierr = PetscFree(rowlens);CHKERRQ(ierr); 1446b51a4376SLisandro Dalcin 1447b51a4376SLisandro Dalcin /* store column indices */ 1448b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1449b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1450b51a4376SLisandro Dalcin for (k=0; k<bs; k++) 1451b51a4376SLisandro Dalcin for (j=A->i[i]; j<A->i[i+1]; j++) 1452b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1453b51a4376SLisandro Dalcin colidxs[cnt++] = bs*A->j[j] + l; 1454b51a4376SLisandro Dalcin if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1455b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1456b51a4376SLisandro Dalcin ierr = PetscFree(colidxs);CHKERRQ(ierr); 14572593348eSBarry Smith 14582593348eSBarry Smith /* store nonzero values */ 1459b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1460b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1461b51a4376SLisandro Dalcin for (k=0; k<bs; k++) 1462b51a4376SLisandro Dalcin for (j=A->i[i]; j<A->i[i+1]; j++) 1463b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1464b51a4376SLisandro Dalcin matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1465b51a4376SLisandro Dalcin if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1466b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1467b51a4376SLisandro Dalcin ierr = PetscFree(matvals);CHKERRQ(ierr); 1468ce6f0cecSBarry Smith 1469b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 1470b51a4376SLisandro Dalcin ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 14713a40ed3dSBarry Smith PetscFunctionReturn(0); 14722593348eSBarry Smith } 14732593348eSBarry Smith 14747dc0baabSHong Zhang static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 14757dc0baabSHong Zhang { 14767dc0baabSHong Zhang PetscErrorCode ierr; 14777dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 14787dc0baabSHong Zhang PetscInt i,bs = A->rmap->bs,k; 14797dc0baabSHong Zhang 14807dc0baabSHong Zhang PetscFunctionBegin; 14817dc0baabSHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 14827dc0baabSHong Zhang for (i=0; i<a->mbs; i++) { 14837dc0baabSHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 14847dc0baabSHong Zhang for (k=a->i[i]; k<a->i[i+1]; k++) { 14857dc0baabSHong Zhang ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 14867dc0baabSHong Zhang } 14877dc0baabSHong Zhang ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 14887dc0baabSHong Zhang } 14897dc0baabSHong Zhang ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 14907dc0baabSHong Zhang PetscFunctionReturn(0); 14917dc0baabSHong Zhang } 14927dc0baabSHong Zhang 14936849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 14942593348eSBarry Smith { 1495b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1496dfbe8321SBarry Smith PetscErrorCode ierr; 1497d0f46423SBarry Smith PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1498f3ef73ceSBarry Smith PetscViewerFormat format; 14992593348eSBarry Smith 15003a40ed3dSBarry Smith PetscFunctionBegin; 15017dc0baabSHong Zhang if (A->structure_only) { 15027dc0baabSHong Zhang ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 15037dc0baabSHong Zhang PetscFunctionReturn(0); 15047dc0baabSHong Zhang } 15057dc0baabSHong Zhang 1506b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1507456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 150877431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1509fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1510ade3a672SBarry Smith const char *matname; 1511bcd9e38bSBarry Smith Mat aij; 1512ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1513ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1514ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1515bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 15166bf464f9SBarry Smith ierr = MatDestroy(&aij);CHKERRQ(ierr); 151704929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 151804929863SHong Zhang PetscFunctionReturn(0); 1519fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1520d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 152144cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 152244cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 152377431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 152444cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 152544cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 1526aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 15270e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 152857622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 152957622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15300e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 153157622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 153257622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15330e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 153457622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15350ef38995SBarry Smith } 153644cd7ae7SLois Curfman McInnes #else 15370ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 153857622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 15390ef38995SBarry Smith } 154044cd7ae7SLois Curfman McInnes #endif 154144cd7ae7SLois Curfman McInnes } 154244cd7ae7SLois Curfman McInnes } 1543b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 154444cd7ae7SLois Curfman McInnes } 154544cd7ae7SLois Curfman McInnes } 1546d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 15470ef38995SBarry Smith } else { 1548d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1549b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1550b6490206SBarry Smith for (j=0; j<bs; j++) { 155177431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1552b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1553b6490206SBarry Smith for (l=0; l<bs; l++) { 1554aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 15550e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 155657622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 155757622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15580e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 155957622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 156057622a8eSBarry Smith (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15610ef38995SBarry Smith } else { 156257622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 156388685aaeSLois Curfman McInnes } 156488685aaeSLois Curfman McInnes #else 156557622a8eSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 156688685aaeSLois Curfman McInnes #endif 15672593348eSBarry Smith } 15682593348eSBarry Smith } 1569b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 15702593348eSBarry Smith } 15712593348eSBarry Smith } 1572d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1573b6490206SBarry Smith } 1574b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 15753a40ed3dSBarry Smith PetscFunctionReturn(0); 15762593348eSBarry Smith } 15772593348eSBarry Smith 15789804daf3SBarry Smith #include <petscdraw.h> 15796849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 15803270192aSSatish Balay { 158177ed5343SBarry Smith Mat A = (Mat) Aa; 15823270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 15836849ba73SBarry Smith PetscErrorCode ierr; 1584d0f46423SBarry Smith PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 15850e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 15863f1db9ecSBarry Smith MatScalar *aa; 1587b0a32e0cSBarry Smith PetscViewer viewer; 1588b3e7f47fSJed Brown PetscViewerFormat format; 15893270192aSSatish Balay 15903a40ed3dSBarry Smith PetscFunctionBegin; 159177ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1592b3e7f47fSJed Brown ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1593b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 159477ed5343SBarry Smith 15953270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1596b3e7f47fSJed Brown 1597b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1598383922c3SLisandro Dalcin ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1599383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */ 1600b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 16013270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16023270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1603d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16043270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16053270192aSSatish Balay aa = a->a + j*bs2; 16063270192aSSatish Balay for (k=0; k<bs; k++) { 16073270192aSSatish Balay for (l=0; l<bs; l++) { 16080e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 1609b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16103270192aSSatish Balay } 16113270192aSSatish Balay } 16123270192aSSatish Balay } 16133270192aSSatish Balay } 1614b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 16153270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16163270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1617d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16183270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16193270192aSSatish Balay aa = a->a + j*bs2; 16203270192aSSatish Balay for (k=0; k<bs; k++) { 16213270192aSSatish Balay for (l=0; l<bs; l++) { 16220e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 1623b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16243270192aSSatish Balay } 16253270192aSSatish Balay } 16263270192aSSatish Balay } 16273270192aSSatish Balay } 1628b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 16293270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16303270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1631d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16323270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16333270192aSSatish Balay aa = a->a + j*bs2; 16343270192aSSatish Balay for (k=0; k<bs; k++) { 16353270192aSSatish Balay for (l=0; l<bs; l++) { 16360e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 1637b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16383270192aSSatish Balay } 16393270192aSSatish Balay } 16403270192aSSatish Balay } 16413270192aSSatish Balay } 1642383922c3SLisandro Dalcin ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1643b3e7f47fSJed Brown } else { 1644b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 1645b3e7f47fSJed Brown /* first determine max of all nonzero values */ 1646b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0; 1647b3e7f47fSJed Brown PetscDraw popup; 1648b3e7f47fSJed Brown 1649b3e7f47fSJed Brown for (i=0; i<a->nz*a->bs2; i++) { 1650b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1651b3e7f47fSJed Brown } 1652383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL; 1653b3e7f47fSJed Brown ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 165445f3bb6eSLisandro Dalcin ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1655383922c3SLisandro Dalcin 1656383922c3SLisandro Dalcin ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1657b3e7f47fSJed Brown for (i=0,row=0; i<mbs; i++,row+=bs) { 1658b3e7f47fSJed Brown for (j=a->i[i]; j<a->i[i+1]; j++) { 1659b3e7f47fSJed Brown y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1660b3e7f47fSJed Brown x_l = a->j[j]*bs; x_r = x_l + 1.0; 1661b3e7f47fSJed Brown aa = a->a + j*bs2; 1662b3e7f47fSJed Brown for (k=0; k<bs; k++) { 1663b3e7f47fSJed Brown for (l=0; l<bs; l++) { 1664383922c3SLisandro Dalcin MatScalar v = *aa++; 1665383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1666b3e7f47fSJed Brown ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1667b3e7f47fSJed Brown } 1668b3e7f47fSJed Brown } 1669b3e7f47fSJed Brown } 1670b3e7f47fSJed Brown } 1671383922c3SLisandro Dalcin ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1672b3e7f47fSJed Brown } 167377ed5343SBarry Smith PetscFunctionReturn(0); 167477ed5343SBarry Smith } 16753270192aSSatish Balay 16766849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 167777ed5343SBarry Smith { 1678dfbe8321SBarry Smith PetscErrorCode ierr; 16790e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 1680b0a32e0cSBarry Smith PetscDraw draw; 1681ace3abfcSBarry Smith PetscBool isnull; 16823270192aSSatish Balay 168377ed5343SBarry Smith PetscFunctionBegin; 1684b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 168545f3bb6eSLisandro Dalcin ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 168645f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 168777ed5343SBarry Smith 1688d0f46423SBarry Smith xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 168977ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 1690b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1691832b7cebSLisandro Dalcin ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1692b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 16930298fd71SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1694832b7cebSLisandro Dalcin ierr = PetscDrawSave(draw);CHKERRQ(ierr); 16953a40ed3dSBarry Smith PetscFunctionReturn(0); 16963270192aSSatish Balay } 16973270192aSSatish Balay 1698dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 16992593348eSBarry Smith { 1700dfbe8321SBarry Smith PetscErrorCode ierr; 1701ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw; 17022593348eSBarry Smith 17033a40ed3dSBarry Smith PetscFunctionBegin; 1704251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1705251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1706251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 170732077d6dSBarry Smith if (iascii) { 17083a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 17090f5bd95cSBarry Smith } else if (isbinary) { 17103a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 17110f5bd95cSBarry Smith } else if (isdraw) { 17123a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 17135cd90555SBarry Smith } else { 1714a5e6ed63SBarry Smith Mat B; 1715ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1716a5e6ed63SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 17176bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 17182593348eSBarry Smith } 17193a40ed3dSBarry Smith PetscFunctionReturn(0); 17202593348eSBarry Smith } 1721b6490206SBarry Smith 1722c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1723cd0e1443SSatish Balay { 1724cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1725c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1726c1ac3661SBarry Smith PetscInt *ai = a->i,*ailen = a->ilen; 1727d0f46423SBarry Smith PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 172897e567efSBarry Smith MatScalar *ap,*aa = a->a; 1729cd0e1443SSatish Balay 17303a40ed3dSBarry Smith PetscFunctionBegin; 17312d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 1732cd0e1443SSatish Balay row = im[k]; brow = row/bs; 1733e32f2f54SBarry Smith if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1734e32f2f54SBarry Smith if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 17352d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 17362c3acbe9SBarry Smith nrow = ailen[brow]; 17372d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 1738e32f2f54SBarry Smith if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1739e32f2f54SBarry Smith if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 17402d61bbb3SSatish Balay col = in[l]; 17412d61bbb3SSatish Balay bcol = col/bs; 17422d61bbb3SSatish Balay cidx = col%bs; 17432d61bbb3SSatish Balay ridx = row%bs; 17442d61bbb3SSatish Balay high = nrow; 17452d61bbb3SSatish Balay low = 0; /* assume unsorted */ 17462d61bbb3SSatish Balay while (high-low > 5) { 1747cd0e1443SSatish Balay t = (low+high)/2; 1748cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 1749cd0e1443SSatish Balay else low = t; 1750cd0e1443SSatish Balay } 1751cd0e1443SSatish Balay for (i=low; i<high; i++) { 1752cd0e1443SSatish Balay if (rp[i] > bcol) break; 1753cd0e1443SSatish Balay if (rp[i] == bcol) { 17542d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 17552d61bbb3SSatish Balay goto finished; 1756cd0e1443SSatish Balay } 1757cd0e1443SSatish Balay } 175897e567efSBarry Smith *v++ = 0.0; 17592d61bbb3SSatish Balay finished:; 1760cd0e1443SSatish Balay } 1761cd0e1443SSatish Balay } 17623a40ed3dSBarry Smith PetscFunctionReturn(0); 1763cd0e1443SSatish Balay } 1764cd0e1443SSatish Balay 1765dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 176692c4ed94SBarry Smith { 176792c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1768e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1769c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 17706849ba73SBarry Smith PetscErrorCode ierr; 1771d0f46423SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1772ace3abfcSBarry Smith PetscBool roworiented=a->roworiented; 1773dd6ea824SBarry Smith const PetscScalar *value = v; 17749d243f67SHong Zhang MatScalar *ap=NULL,*aa = a->a,*bap; 177592c4ed94SBarry Smith 17763a40ed3dSBarry Smith PetscFunctionBegin; 17770e324ae4SSatish Balay if (roworiented) { 17780e324ae4SSatish Balay stepval = (n-1)*bs; 17790e324ae4SSatish Balay } else { 17800e324ae4SSatish Balay stepval = (m-1)*bs; 17810e324ae4SSatish Balay } 178292c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 178392c4ed94SBarry Smith row = im[k]; 17845ef9f2a5SBarry Smith if (row < 0) continue; 1785cf9c20a2SJed Brown if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 178692c4ed94SBarry Smith rp = aj + ai[row]; 17877dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2*ai[row]; 178892c4ed94SBarry Smith rmax = imax[row]; 178992c4ed94SBarry Smith nrow = ailen[row]; 179092c4ed94SBarry Smith low = 0; 1791c71e6ed7SBarry Smith high = nrow; 179292c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 17935ef9f2a5SBarry Smith if (in[l] < 0) continue; 1794cf9c20a2SJed Brown if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 179592c4ed94SBarry Smith col = in[l]; 17967dc0baabSHong Zhang if (!A->structure_only) { 179792c4ed94SBarry Smith if (roworiented) { 179853ef36baSBarry Smith value = v + (k*(stepval+bs) + l)*bs; 17990e324ae4SSatish Balay } else { 180053ef36baSBarry Smith value = v + (l*(stepval+bs) + k)*bs; 180192c4ed94SBarry Smith } 18027dc0baabSHong Zhang } 180326fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 180426fbe8dcSKarl Rupp else high = nrow; 1805e2ee6c50SBarry Smith lastcol = col; 180692c4ed94SBarry Smith while (high-low > 7) { 180792c4ed94SBarry Smith t = (low+high)/2; 180892c4ed94SBarry Smith if (rp[t] > col) high = t; 180992c4ed94SBarry Smith else low = t; 181092c4ed94SBarry Smith } 181192c4ed94SBarry Smith for (i=low; i<high; i++) { 181292c4ed94SBarry Smith if (rp[i] > col) break; 181392c4ed94SBarry Smith if (rp[i] == col) { 18147dc0baabSHong Zhang if (A->structure_only) goto noinsert2; 18158a84c255SSatish Balay bap = ap + bs2*i; 18160e324ae4SSatish Balay if (roworiented) { 18178a84c255SSatish Balay if (is == ADD_VALUES) { 1818dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1819dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18208a84c255SSatish Balay bap[jj] += *value++; 1821dd9472c6SBarry Smith } 1822dd9472c6SBarry Smith } 18230e324ae4SSatish Balay } else { 1824dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1825dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18260e324ae4SSatish Balay bap[jj] = *value++; 18278a84c255SSatish Balay } 1828dd9472c6SBarry Smith } 1829dd9472c6SBarry Smith } 18300e324ae4SSatish Balay } else { 18310e324ae4SSatish Balay if (is == ADD_VALUES) { 183253ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1833dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 183453ef36baSBarry Smith bap[jj] += value[jj]; 1835dd9472c6SBarry Smith } 183653ef36baSBarry Smith bap += bs; 1837dd9472c6SBarry Smith } 18380e324ae4SSatish Balay } else { 183953ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1840dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 184153ef36baSBarry Smith bap[jj] = value[jj]; 18420e324ae4SSatish Balay } 184353ef36baSBarry Smith bap += bs; 18448a84c255SSatish Balay } 1845dd9472c6SBarry Smith } 1846dd9472c6SBarry Smith } 1847f1241b54SBarry Smith goto noinsert2; 184892c4ed94SBarry Smith } 184992c4ed94SBarry Smith } 185089280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 18512f7d4af7SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 18527dc0baabSHong Zhang if (A->structure_only) { 18537dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 18547dc0baabSHong Zhang } else { 1855fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 18567dc0baabSHong Zhang } 1857c03d1d03SSatish Balay N = nrow++ - 1; high++; 185892c4ed94SBarry Smith /* shift up all the later entries in this row */ 1859580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 186092c4ed94SBarry Smith rp[i] = col; 18617dc0baabSHong Zhang if (!A->structure_only) { 1862580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 18638a84c255SSatish Balay bap = ap + bs2*i; 18640e324ae4SSatish Balay if (roworiented) { 1865dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1866dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18670e324ae4SSatish Balay bap[jj] = *value++; 1868dd9472c6SBarry Smith } 1869dd9472c6SBarry Smith } 18700e324ae4SSatish Balay } else { 1871dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1872dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 18730e324ae4SSatish Balay *bap++ = *value++; 18740e324ae4SSatish Balay } 1875dd9472c6SBarry Smith } 1876dd9472c6SBarry Smith } 18777dc0baabSHong Zhang } 1878f1241b54SBarry Smith noinsert2:; 187992c4ed94SBarry Smith low = i; 188092c4ed94SBarry Smith } 188192c4ed94SBarry Smith ailen[row] = nrow; 188292c4ed94SBarry Smith } 18833a40ed3dSBarry Smith PetscFunctionReturn(0); 188492c4ed94SBarry Smith } 188526e093fcSHong Zhang 1886dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1887584200bdSSatish Balay { 1888584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1889580bdb30SBarry Smith PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1890d0f46423SBarry Smith PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 18916849ba73SBarry Smith PetscErrorCode ierr; 1892c1ac3661SBarry Smith PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 18933f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 18943447b6efSHong Zhang PetscReal ratio=0.6; 1895584200bdSSatish Balay 18963a40ed3dSBarry Smith PetscFunctionBegin; 18973a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1898584200bdSSatish Balay 189943ee02c3SBarry Smith if (m) rmax = ailen[0]; 1900584200bdSSatish Balay for (i=1; i<mbs; i++) { 1901584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1902584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1903d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1904584200bdSSatish Balay if (fshift) { 1905580bdb30SBarry Smith ip = aj + ai[i]; 1906580bdb30SBarry Smith ap = aa + bs2*ai[i]; 1907584200bdSSatish Balay N = ailen[i]; 1908580bdb30SBarry Smith ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1909672ba085SHong Zhang if (!A->structure_only) { 1910580bdb30SBarry Smith ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1911584200bdSSatish Balay } 1912672ba085SHong Zhang } 1913584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1914584200bdSSatish Balay } 1915584200bdSSatish Balay if (mbs) { 1916584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1917584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1918584200bdSSatish Balay } 19197c565772SBarry Smith 1920584200bdSSatish Balay /* reset ilen and imax for each row */ 19217c565772SBarry Smith a->nonzerorowcnt = 0; 1922672ba085SHong Zhang if (A->structure_only) { 1923672ba085SHong Zhang ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1924672ba085SHong Zhang } else { /* !A->structure_only */ 1925584200bdSSatish Balay for (i=0; i<mbs; i++) { 1926584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 19277c565772SBarry Smith a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1928584200bdSSatish Balay } 1929672ba085SHong Zhang } 1930a7c10996SSatish Balay a->nz = ai[mbs]; 1931584200bdSSatish Balay 1932584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1933b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 1934584200bdSSatish Balay if (fshift && a->diag) { 1935606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 19363bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1937f4259b30SLisandro Dalcin a->diag = NULL; 1938584200bdSSatish Balay } 193965e19b50SBarry Smith if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1940d0f46423SBarry Smith ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1941ae15b995SBarry Smith ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1942ae15b995SBarry Smith ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 194326fbe8dcSKarl Rupp 19448e58a170SBarry Smith A->info.mallocs += a->reallocs; 1945e2f3b5e9SSatish Balay a->reallocs = 0; 19460e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1947647a6520SHong Zhang a->rmax = rmax; 1948cf4441caSHong Zhang 1949672ba085SHong Zhang if (!A->structure_only) { 195011e456e1SBarry Smith ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1951672ba085SHong Zhang } 19523a40ed3dSBarry Smith PetscFunctionReturn(0); 1953584200bdSSatish Balay } 1954584200bdSSatish Balay 1955bea157c4SSatish Balay /* 1956bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1957bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1958bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1959bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1960bea157c4SSatish Balay */ 1961c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1962d9b7c43dSSatish Balay { 1963c1ac3661SBarry Smith PetscInt i,j,k,row; 1964ace3abfcSBarry Smith PetscBool flg; 19653a40ed3dSBarry Smith 1966433994e6SBarry Smith PetscFunctionBegin; 1967bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1968bea157c4SSatish Balay row = idx[i]; 1969bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1970bea157c4SSatish Balay sizes[j] = 1; 1971bea157c4SSatish Balay i++; 1972e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1973bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1974bea157c4SSatish Balay i++; 1975bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1976bea157c4SSatish Balay flg = PETSC_TRUE; 1977bea157c4SSatish Balay for (k=1; k<bs; k++) { 1978bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1979bea157c4SSatish Balay flg = PETSC_FALSE; 1980bea157c4SSatish Balay break; 1981d9b7c43dSSatish Balay } 1982bea157c4SSatish Balay } 1983abc0a331SBarry Smith if (flg) { /* No break in the bs */ 1984bea157c4SSatish Balay sizes[j] = bs; 1985bea157c4SSatish Balay i += bs; 1986bea157c4SSatish Balay } else { 1987bea157c4SSatish Balay sizes[j] = 1; 1988bea157c4SSatish Balay i++; 1989bea157c4SSatish Balay } 1990bea157c4SSatish Balay } 1991bea157c4SSatish Balay } 1992bea157c4SSatish Balay *bs_max = j; 19933a40ed3dSBarry Smith PetscFunctionReturn(0); 1994d9b7c43dSSatish Balay } 1995d9b7c43dSSatish Balay 19962b40b63fSBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1997d9b7c43dSSatish Balay { 1998d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1999dfbe8321SBarry Smith PetscErrorCode ierr; 2000f4df32b1SMatthew Knepley PetscInt i,j,k,count,*rows; 2001d0f46423SBarry Smith PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 200287828ca2SBarry Smith PetscScalar zero = 0.0; 20033f1db9ecSBarry Smith MatScalar *aa; 200497b48c8fSBarry Smith const PetscScalar *xx; 200597b48c8fSBarry Smith PetscScalar *bb; 2006d9b7c43dSSatish Balay 20073a40ed3dSBarry Smith PetscFunctionBegin; 200897b48c8fSBarry Smith /* fix right hand side if needed */ 200997b48c8fSBarry Smith if (x && b) { 201097b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 201197b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 201297b48c8fSBarry Smith for (i=0; i<is_n; i++) { 201397b48c8fSBarry Smith bb[is_idx[i]] = diag*xx[is_idx[i]]; 201497b48c8fSBarry Smith } 201597b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 201697b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 201797b48c8fSBarry Smith } 201897b48c8fSBarry Smith 2019d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2020bea157c4SSatish Balay /* allocate memory for rows,sizes */ 2021dcca6d9dSJed Brown ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2022bea157c4SSatish Balay 2023563b5814SBarry Smith /* copy IS values to rows, and sort them */ 202426fbe8dcSKarl Rupp for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2025bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 202697b48c8fSBarry Smith 2027a9817697SBarry Smith if (baij->keepnonzeropattern) { 202826fbe8dcSKarl Rupp for (i=0; i<is_n; i++) sizes[i] = 1; 2029dffd3267SBarry Smith bs_max = is_n; 2030dffd3267SBarry Smith } else { 2031bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2032e56f5c9eSBarry Smith A->nonzerostate++; 2033dffd3267SBarry Smith } 2034bea157c4SSatish Balay 2035bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2036bea157c4SSatish Balay row = rows[j]; 2037e32f2f54SBarry Smith if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2038bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2039b31fbe3bSSatish Balay aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2040a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2041d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2042bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 2043bea157c4SSatish Balay baij->ilen[row/bs] = 1; 2044bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 204526fbe8dcSKarl Rupp 2046580bdb30SBarry Smith ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2047a07cd24cSSatish Balay } 2048563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 2049bea157c4SSatish Balay for (k=0; k<bs; k++) { 2050f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2051bea157c4SSatish Balay } 2052f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2053bea157c4SSatish Balay baij->ilen[row/bs] = 0; 2054f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2055bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 2056cf9c20a2SJed Brown if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2057bea157c4SSatish Balay for (k=0; k<count; k++) { 2058d9b7c43dSSatish Balay aa[0] = zero; 2059d9b7c43dSSatish Balay aa += bs; 2060d9b7c43dSSatish Balay } 2061d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2062f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2063d9b7c43dSSatish Balay } 2064d9b7c43dSSatish Balay } 2065bea157c4SSatish Balay } 2066bea157c4SSatish Balay 2067fca92195SBarry Smith ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 20689a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 20693a40ed3dSBarry Smith PetscFunctionReturn(0); 2070d9b7c43dSSatish Balay } 20711c351548SSatish Balay 207297b48c8fSBarry Smith PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 207397b48c8fSBarry Smith { 207497b48c8fSBarry Smith Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 207597b48c8fSBarry Smith PetscErrorCode ierr; 207697b48c8fSBarry Smith PetscInt i,j,k,count; 207797b48c8fSBarry Smith PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 207897b48c8fSBarry Smith PetscScalar zero = 0.0; 207997b48c8fSBarry Smith MatScalar *aa; 208097b48c8fSBarry Smith const PetscScalar *xx; 208197b48c8fSBarry Smith PetscScalar *bb; 208256777dd2SBarry Smith PetscBool *zeroed,vecs = PETSC_FALSE; 208397b48c8fSBarry Smith 208497b48c8fSBarry Smith PetscFunctionBegin; 208597b48c8fSBarry Smith /* fix right hand side if needed */ 208697b48c8fSBarry Smith if (x && b) { 208797b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 208897b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 208956777dd2SBarry Smith vecs = PETSC_TRUE; 209097b48c8fSBarry Smith } 209197b48c8fSBarry Smith 209297b48c8fSBarry Smith /* zero the columns */ 20931795a4d1SJed Brown ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 209497b48c8fSBarry Smith for (i=0; i<is_n; i++) { 209597b48c8fSBarry Smith if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 209697b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 209797b48c8fSBarry Smith } 209897b48c8fSBarry Smith for (i=0; i<A->rmap->N; i++) { 209997b48c8fSBarry Smith if (!zeroed[i]) { 210097b48c8fSBarry Smith row = i/bs; 210197b48c8fSBarry Smith for (j=baij->i[row]; j<baij->i[row+1]; j++) { 210297b48c8fSBarry Smith for (k=0; k<bs; k++) { 210397b48c8fSBarry Smith col = bs*baij->j[j] + k; 210497b48c8fSBarry Smith if (zeroed[col]) { 210597b48c8fSBarry Smith aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 210656777dd2SBarry Smith if (vecs) bb[i] -= aa[0]*xx[col]; 210797b48c8fSBarry Smith aa[0] = 0.0; 210897b48c8fSBarry Smith } 210997b48c8fSBarry Smith } 211097b48c8fSBarry Smith } 211156777dd2SBarry Smith } else if (vecs) bb[i] = diag*xx[i]; 211297b48c8fSBarry Smith } 211397b48c8fSBarry Smith ierr = PetscFree(zeroed);CHKERRQ(ierr); 211456777dd2SBarry Smith if (vecs) { 211556777dd2SBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 211656777dd2SBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 211756777dd2SBarry Smith } 211897b48c8fSBarry Smith 211997b48c8fSBarry Smith /* zero the rows */ 212097b48c8fSBarry Smith for (i=0; i<is_n; i++) { 212197b48c8fSBarry Smith row = is_idx[i]; 212297b48c8fSBarry Smith count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 212397b48c8fSBarry Smith aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 212497b48c8fSBarry Smith for (k=0; k<count; k++) { 212597b48c8fSBarry Smith aa[0] = zero; 212697b48c8fSBarry Smith aa += bs; 212797b48c8fSBarry Smith } 2128d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 212997b48c8fSBarry Smith ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 213097b48c8fSBarry Smith } 213197b48c8fSBarry Smith } 213297b48c8fSBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 213397b48c8fSBarry Smith PetscFunctionReturn(0); 213497b48c8fSBarry Smith } 213597b48c8fSBarry Smith 2136c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 21372d61bbb3SSatish Balay { 21382d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2139e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2140c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2141d0f46423SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 21426849ba73SBarry Smith PetscErrorCode ierr; 2143c1ac3661SBarry Smith PetscInt ridx,cidx,bs2=a->bs2; 2144ace3abfcSBarry Smith PetscBool roworiented=a->roworiented; 2145d8cdefa3SHong Zhang MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 21462d61bbb3SSatish Balay 21472d61bbb3SSatish Balay PetscFunctionBegin; 21482d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 2149085a36d4SBarry Smith row = im[k]; 2150085a36d4SBarry Smith brow = row/bs; 21515ef9f2a5SBarry Smith if (row < 0) continue; 2152cf9c20a2SJed Brown if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 21532d61bbb3SSatish Balay rp = aj + ai[brow]; 2154672ba085SHong Zhang if (!A->structure_only) ap = aa + bs2*ai[brow]; 21552d61bbb3SSatish Balay rmax = imax[brow]; 21562d61bbb3SSatish Balay nrow = ailen[brow]; 21572d61bbb3SSatish Balay low = 0; 2158c71e6ed7SBarry Smith high = nrow; 21592d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 21605ef9f2a5SBarry Smith if (in[l] < 0) continue; 2161cf9c20a2SJed Brown if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 21622d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 21632d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 2164672ba085SHong Zhang if (!A->structure_only) { 21652d61bbb3SSatish Balay if (roworiented) { 21665ef9f2a5SBarry Smith value = v[l + k*n]; 21672d61bbb3SSatish Balay } else { 21682d61bbb3SSatish Balay value = v[k + l*m]; 21692d61bbb3SSatish Balay } 2170672ba085SHong Zhang } 21717cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 2172e2ee6c50SBarry Smith lastcol = col; 21732d61bbb3SSatish Balay while (high-low > 7) { 21742d61bbb3SSatish Balay t = (low+high)/2; 21752d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 21762d61bbb3SSatish Balay else low = t; 21772d61bbb3SSatish Balay } 21782d61bbb3SSatish Balay for (i=low; i<high; i++) { 21792d61bbb3SSatish Balay if (rp[i] > bcol) break; 21802d61bbb3SSatish Balay if (rp[i] == bcol) { 21812d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 2182672ba085SHong Zhang if (!A->structure_only) { 21832d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 21842d61bbb3SSatish Balay else *bap = value; 2185672ba085SHong Zhang } 21862d61bbb3SSatish Balay goto noinsert1; 21872d61bbb3SSatish Balay } 21882d61bbb3SSatish Balay } 21892d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 2190e32f2f54SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2191672ba085SHong Zhang if (A->structure_only) { 2192672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2193672ba085SHong Zhang } else { 2194fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2195672ba085SHong Zhang } 2196c03d1d03SSatish Balay N = nrow++ - 1; high++; 21972d61bbb3SSatish Balay /* shift up all the later entries in this row */ 2198580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 21992d61bbb3SSatish Balay rp[i] = bcol; 2200580bdb30SBarry Smith if (!A->structure_only) { 2201580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2202580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2203580bdb30SBarry Smith ap[bs2*i + bs*cidx + ridx] = value; 2204580bdb30SBarry Smith } 2205085a36d4SBarry Smith a->nz++; 2206e56f5c9eSBarry Smith A->nonzerostate++; 22072d61bbb3SSatish Balay noinsert1:; 22082d61bbb3SSatish Balay low = i; 22092d61bbb3SSatish Balay } 22102d61bbb3SSatish Balay ailen[brow] = nrow; 22112d61bbb3SSatish Balay } 22122d61bbb3SSatish Balay PetscFunctionReturn(0); 22132d61bbb3SSatish Balay } 22142d61bbb3SSatish Balay 22150481f469SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 22162d61bbb3SSatish Balay { 22172d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 22182d61bbb3SSatish Balay Mat outA; 2219dfbe8321SBarry Smith PetscErrorCode ierr; 2220ace3abfcSBarry Smith PetscBool row_identity,col_identity; 22212d61bbb3SSatish Balay 22222d61bbb3SSatish Balay PetscFunctionBegin; 2223e32f2f54SBarry Smith if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2224667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2225667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2226f23aa3ddSBarry Smith if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 22272d61bbb3SSatish Balay 22282d61bbb3SSatish Balay outA = inA; 2229d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 2230f6224b95SHong Zhang ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2231f6224b95SHong Zhang ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 22322d61bbb3SSatish Balay 2233c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2234cf242676SKris Buschelman 2235c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 22366bf464f9SBarry Smith ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2237c3122656SLisandro Dalcin a->row = row; 2238c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 22396bf464f9SBarry Smith ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2240c3122656SLisandro Dalcin a->col = col; 2241c38d4ed2SBarry Smith 2242c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 22436bf464f9SBarry Smith ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 22444c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 22453bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2246c38d4ed2SBarry Smith 2247ace3abfcSBarry Smith ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2248c38d4ed2SBarry Smith if (!a->solve_work) { 2249854ce69bSBarry Smith ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 22503bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2251c38d4ed2SBarry Smith } 2252719d5645SBarry Smith ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 22532d61bbb3SSatish Balay PetscFunctionReturn(0); 22542d61bbb3SSatish Balay } 2255d9b7c43dSSatish Balay 22567087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 225727a8da17SBarry Smith { 225827a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2259bdb1c0e1SJed Brown PetscInt i,nz,mbs; 226027a8da17SBarry Smith 226127a8da17SBarry Smith PetscFunctionBegin; 2262b32cb4a7SJed Brown nz = baij->maxnz; 2263bdb1c0e1SJed Brown mbs = baij->mbs; 226427a8da17SBarry Smith for (i=0; i<nz; i++) { 226527a8da17SBarry Smith baij->j[i] = indices[i]; 226627a8da17SBarry Smith } 226727a8da17SBarry Smith baij->nz = nz; 2268bdb1c0e1SJed Brown for (i=0; i<mbs; i++) { 226927a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 227027a8da17SBarry Smith } 227127a8da17SBarry Smith PetscFunctionReturn(0); 227227a8da17SBarry Smith } 227327a8da17SBarry Smith 227427a8da17SBarry Smith /*@ 227527a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 227627a8da17SBarry Smith in the matrix. 227727a8da17SBarry Smith 227827a8da17SBarry Smith Input Parameters: 227927a8da17SBarry Smith + mat - the SeqBAIJ matrix 228027a8da17SBarry Smith - indices - the column indices 228127a8da17SBarry Smith 228215091d37SBarry Smith Level: advanced 228315091d37SBarry Smith 228427a8da17SBarry Smith Notes: 228527a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 228627a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 228727a8da17SBarry Smith of the MatSetValues() operation. 228827a8da17SBarry Smith 228927a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 2290d1be2dadSMatthew Knepley MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 229127a8da17SBarry Smith 229227a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 229327a8da17SBarry Smith 229427a8da17SBarry Smith @*/ 22957087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 229627a8da17SBarry Smith { 22974ac538c5SBarry Smith PetscErrorCode ierr; 229827a8da17SBarry Smith 229927a8da17SBarry Smith PetscFunctionBegin; 23000700a824SBarry Smith PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 23014482741eSBarry Smith PetscValidPointer(indices,2); 23024ac538c5SBarry Smith ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 230327a8da17SBarry Smith PetscFunctionReturn(0); 230427a8da17SBarry Smith } 230527a8da17SBarry Smith 2306985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2307273d9f13SBarry Smith { 2308273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2309dfbe8321SBarry Smith PetscErrorCode ierr; 2310c1ac3661SBarry Smith PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2311273d9f13SBarry Smith PetscReal atmp; 231287828ca2SBarry Smith PetscScalar *x,zero = 0.0; 2313273d9f13SBarry Smith MatScalar *aa; 2314c1ac3661SBarry Smith PetscInt ncols,brow,krow,kcol; 2315273d9f13SBarry Smith 2316273d9f13SBarry Smith PetscFunctionBegin; 2317e32f2f54SBarry Smith if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2318d0f46423SBarry Smith bs = A->rmap->bs; 2319273d9f13SBarry Smith aa = a->a; 2320273d9f13SBarry Smith ai = a->i; 2321273d9f13SBarry Smith aj = a->j; 2322273d9f13SBarry Smith mbs = a->mbs; 2323273d9f13SBarry Smith 23242dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 23251ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2326273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2327e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2328273d9f13SBarry Smith for (i=0; i<mbs; i++) { 2329273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 2330273d9f13SBarry Smith brow = bs*i; 2331273d9f13SBarry Smith for (j=0; j<ncols; j++) { 2332273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++) { 2333273d9f13SBarry Smith for (krow=0; krow<bs; krow++) { 2334273d9f13SBarry Smith atmp = PetscAbsScalar(*aa);aa++; 2335273d9f13SBarry Smith row = brow + krow; /* row index */ 2336985db425SBarry Smith if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2337273d9f13SBarry Smith } 2338273d9f13SBarry Smith } 2339273d9f13SBarry Smith aj++; 2340273d9f13SBarry Smith } 2341273d9f13SBarry Smith } 23421ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2343273d9f13SBarry Smith PetscFunctionReturn(0); 2344273d9f13SBarry Smith } 2345273d9f13SBarry Smith 23463c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 23473c896bc6SHong Zhang { 23483c896bc6SHong Zhang PetscErrorCode ierr; 23493c896bc6SHong Zhang 23503c896bc6SHong Zhang PetscFunctionBegin; 23513c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 23523c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 23533c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 23543c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2355d88c0aacSHong Zhang PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 23563c896bc6SHong Zhang 2357d88c0aacSHong Zhang if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2358d88c0aacSHong Zhang if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2359580bdb30SBarry Smith ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2360cdc753b6SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 23613c896bc6SHong Zhang } else { 23623c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 23633c896bc6SHong Zhang } 23643c896bc6SHong Zhang PetscFunctionReturn(0); 23653c896bc6SHong Zhang } 23663c896bc6SHong Zhang 23674994cf47SJed Brown PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2368273d9f13SBarry Smith { 2369dfbe8321SBarry Smith PetscErrorCode ierr; 2370273d9f13SBarry Smith 2371273d9f13SBarry Smith PetscFunctionBegin; 2372f4259b30SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2373273d9f13SBarry Smith PetscFunctionReturn(0); 2374273d9f13SBarry Smith } 2375273d9f13SBarry Smith 2376cda14afcSprj- static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2377f2a5309cSSatish Balay { 2378f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 23796e111a19SKarl Rupp 2380f2a5309cSSatish Balay PetscFunctionBegin; 2381f2a5309cSSatish Balay *array = a->a; 2382f2a5309cSSatish Balay PetscFunctionReturn(0); 2383f2a5309cSSatish Balay } 2384f2a5309cSSatish Balay 2385cda14afcSprj- static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2386f2a5309cSSatish Balay { 2387f2a5309cSSatish Balay PetscFunctionBegin; 2388cda14afcSprj- *array = NULL; 2389f2a5309cSSatish Balay PetscFunctionReturn(0); 2390f2a5309cSSatish Balay } 2391f2a5309cSSatish Balay 239252768537SHong Zhang PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 239352768537SHong Zhang { 2394b264fe52SHong Zhang PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 239552768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 239652768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2397b264fe52SHong Zhang PetscErrorCode ierr; 239852768537SHong Zhang 239952768537SHong Zhang PetscFunctionBegin; 240052768537SHong Zhang /* Set the number of nonzeros in the new matrix */ 2401b264fe52SHong Zhang ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 240252768537SHong Zhang PetscFunctionReturn(0); 240352768537SHong Zhang } 240452768537SHong Zhang 2405f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 240642ee4b1aSHong Zhang { 240742ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2408dfbe8321SBarry Smith PetscErrorCode ierr; 240931ce2d13SHong Zhang PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2410e838b9e7SJed Brown PetscBLASInt one=1; 241142ee4b1aSHong Zhang 241242ee4b1aSHong Zhang PetscFunctionBegin; 241342ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2414f4df32b1SMatthew Knepley PetscScalar alpha = a; 2415c5df96a5SBarry Smith PetscBLASInt bnz; 2416c5df96a5SBarry Smith ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 24178b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2418a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2419ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2420ab784542SHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 242142ee4b1aSHong Zhang } else { 242252768537SHong Zhang Mat B; 242352768537SHong Zhang PetscInt *nnz; 242452768537SHong Zhang if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 242552768537SHong Zhang ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 242652768537SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 242752768537SHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 242852768537SHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 242952768537SHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 243052768537SHong Zhang ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 243152768537SHong Zhang ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 243252768537SHong Zhang ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 243352768537SHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 243428be2f97SBarry Smith ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 243552768537SHong Zhang ierr = PetscFree(nnz);CHKERRQ(ierr); 243642ee4b1aSHong Zhang } 243742ee4b1aSHong Zhang PetscFunctionReturn(0); 243842ee4b1aSHong Zhang } 243942ee4b1aSHong Zhang 24402726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 24412726fb6dSPierre Jolivet { 24422726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX) 24432726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24442726fb6dSPierre Jolivet PetscInt i,nz = a->bs2*a->i[a->mbs]; 24452726fb6dSPierre Jolivet MatScalar *aa = a->a; 24462726fb6dSPierre Jolivet 24472726fb6dSPierre Jolivet PetscFunctionBegin; 24482726fb6dSPierre Jolivet for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 24492726fb6dSPierre Jolivet #else 24502726fb6dSPierre Jolivet PetscFunctionBegin; 24512726fb6dSPierre Jolivet #endif 24522726fb6dSPierre Jolivet PetscFunctionReturn(0); 24532726fb6dSPierre Jolivet } 24542726fb6dSPierre Jolivet 245599cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 245699cafbc1SBarry Smith { 245799cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 245899cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2459dd6ea824SBarry Smith MatScalar *aa = a->a; 246099cafbc1SBarry Smith 246199cafbc1SBarry Smith PetscFunctionBegin; 246299cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 246399cafbc1SBarry Smith PetscFunctionReturn(0); 246499cafbc1SBarry Smith } 246599cafbc1SBarry Smith 246699cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 246799cafbc1SBarry Smith { 246899cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 246999cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2470dd6ea824SBarry Smith MatScalar *aa = a->a; 247199cafbc1SBarry Smith 247299cafbc1SBarry Smith PetscFunctionBegin; 247399cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 247499cafbc1SBarry Smith PetscFunctionReturn(0); 247599cafbc1SBarry Smith } 247699cafbc1SBarry Smith 24773acb8795SBarry Smith /* 24782479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 24793acb8795SBarry Smith */ 24801a83f524SJed Brown PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 24813acb8795SBarry Smith { 24823acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24833acb8795SBarry Smith PetscErrorCode ierr; 24843acb8795SBarry Smith PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 24853acb8795SBarry Smith PetscInt nz = a->i[m],row,*jj,mr,col; 24863acb8795SBarry Smith 24873acb8795SBarry Smith PetscFunctionBegin; 24883acb8795SBarry Smith *nn = n; 24893acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 2490e7e72b3dSBarry Smith if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2491e7e72b3dSBarry Smith else { 2492b9e7e5c1SBarry Smith ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2493854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2494b9e7e5c1SBarry Smith ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 24953acb8795SBarry Smith jj = a->j; 24963acb8795SBarry Smith for (i=0; i<nz; i++) { 24973acb8795SBarry Smith collengths[jj[i]]++; 24983acb8795SBarry Smith } 24993acb8795SBarry Smith cia[0] = oshift; 25003acb8795SBarry Smith for (i=0; i<n; i++) { 25013acb8795SBarry Smith cia[i+1] = cia[i] + collengths[i]; 25023acb8795SBarry Smith } 2503580bdb30SBarry Smith ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 25043acb8795SBarry Smith jj = a->j; 25053acb8795SBarry Smith for (row=0; row<m; row++) { 25063acb8795SBarry Smith mr = a->i[row+1] - a->i[row]; 25073acb8795SBarry Smith for (i=0; i<mr; i++) { 25083acb8795SBarry Smith col = *jj++; 250926fbe8dcSKarl Rupp 25103acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 25113acb8795SBarry Smith } 25123acb8795SBarry Smith } 25133acb8795SBarry Smith ierr = PetscFree(collengths);CHKERRQ(ierr); 25143acb8795SBarry Smith *ia = cia; *ja = cja; 25153acb8795SBarry Smith } 25163acb8795SBarry Smith PetscFunctionReturn(0); 25173acb8795SBarry Smith } 25183acb8795SBarry Smith 25191a83f524SJed Brown PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 25203acb8795SBarry Smith { 25213acb8795SBarry Smith PetscErrorCode ierr; 25223acb8795SBarry Smith 25233acb8795SBarry Smith PetscFunctionBegin; 25243acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 25253acb8795SBarry Smith ierr = PetscFree(*ia);CHKERRQ(ierr); 25263acb8795SBarry Smith ierr = PetscFree(*ja);CHKERRQ(ierr); 25273acb8795SBarry Smith PetscFunctionReturn(0); 25283acb8795SBarry Smith } 25293acb8795SBarry Smith 2530525d23c0SHong Zhang /* 2531525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2532525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2533040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2534525d23c0SHong Zhang */ 2535525d23c0SHong Zhang PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2536f6d58c54SBarry Smith { 2537525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2538f6d58c54SBarry Smith PetscErrorCode ierr; 2539c0349474SHong Zhang PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2540525d23c0SHong Zhang PetscInt nz = a->i[m],row,*jj,mr,col; 2541525d23c0SHong Zhang PetscInt *cspidx; 2542f6d58c54SBarry Smith 2543f6d58c54SBarry Smith PetscFunctionBegin; 2544525d23c0SHong Zhang *nn = n; 2545525d23c0SHong Zhang if (!ia) PetscFunctionReturn(0); 2546f6d58c54SBarry Smith 2547b9e7e5c1SBarry Smith ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2548854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2549b9e7e5c1SBarry Smith ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2550b9e7e5c1SBarry Smith ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2551525d23c0SHong Zhang jj = a->j; 2552525d23c0SHong Zhang for (i=0; i<nz; i++) { 2553525d23c0SHong Zhang collengths[jj[i]]++; 2554f6d58c54SBarry Smith } 2555525d23c0SHong Zhang cia[0] = oshift; 2556525d23c0SHong Zhang for (i=0; i<n; i++) { 2557525d23c0SHong Zhang cia[i+1] = cia[i] + collengths[i]; 2558525d23c0SHong Zhang } 2559580bdb30SBarry Smith ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2560525d23c0SHong Zhang jj = a->j; 2561525d23c0SHong Zhang for (row=0; row<m; row++) { 2562525d23c0SHong Zhang mr = a->i[row+1] - a->i[row]; 2563525d23c0SHong Zhang for (i=0; i<mr; i++) { 2564525d23c0SHong Zhang col = *jj++; 2565525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2566525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2567525d23c0SHong Zhang } 2568525d23c0SHong Zhang } 2569525d23c0SHong Zhang ierr = PetscFree(collengths);CHKERRQ(ierr); 2570071fcb05SBarry Smith *ia = cia; 2571071fcb05SBarry Smith *ja = cja; 2572525d23c0SHong Zhang *spidx = cspidx; 2573525d23c0SHong Zhang PetscFunctionReturn(0); 2574f6d58c54SBarry Smith } 2575f6d58c54SBarry Smith 2576525d23c0SHong Zhang PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2577525d23c0SHong Zhang { 2578525d23c0SHong Zhang PetscErrorCode ierr; 2579f6d58c54SBarry Smith 2580525d23c0SHong Zhang PetscFunctionBegin; 2581525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2582525d23c0SHong Zhang ierr = PetscFree(*spidx);CHKERRQ(ierr); 2583f6d58c54SBarry Smith PetscFunctionReturn(0); 2584f6d58c54SBarry Smith } 258599cafbc1SBarry Smith 25867d68702bSBarry Smith PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 25877d68702bSBarry Smith { 25887d68702bSBarry Smith PetscErrorCode ierr; 25897d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 25907d68702bSBarry Smith 25917d68702bSBarry Smith PetscFunctionBegin; 25926f33a894SBarry Smith if (!Y->preallocated || !aij->nz) { 25937d68702bSBarry Smith ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 25947d68702bSBarry Smith } 25957d68702bSBarry Smith ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 25967d68702bSBarry Smith PetscFunctionReturn(0); 25977d68702bSBarry Smith } 25987d68702bSBarry Smith 25992593348eSBarry Smith /* -------------------------------------------------------------------*/ 26003964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2601cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2602cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2603cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 260497304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 26057c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 26067c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2607f4259b30SLisandro Dalcin NULL, 2608f4259b30SLisandro Dalcin NULL, 2609f4259b30SLisandro Dalcin NULL, 2610f4259b30SLisandro Dalcin /* 10*/ NULL, 2611cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2612f4259b30SLisandro Dalcin NULL, 2613f4259b30SLisandro Dalcin NULL, 2614f2501298SSatish Balay MatTranspose_SeqBAIJ, 261597304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 2616cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2617cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2618cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2619cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 2620f4259b30SLisandro Dalcin /* 20*/ NULL, 2621cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2622cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2623cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 2624d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 2625f4259b30SLisandro Dalcin NULL, 2626f4259b30SLisandro Dalcin NULL, 2627f4259b30SLisandro Dalcin NULL, 2628f4259b30SLisandro Dalcin NULL, 26294994cf47SJed Brown /* 29*/ MatSetUp_SeqBAIJ, 2630f4259b30SLisandro Dalcin NULL, 2631f4259b30SLisandro Dalcin NULL, 2632f4259b30SLisandro Dalcin NULL, 2633f4259b30SLisandro Dalcin NULL, 2634d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 2635f4259b30SLisandro Dalcin NULL, 2636f4259b30SLisandro Dalcin NULL, 2637cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2638f4259b30SLisandro Dalcin NULL, 2639d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 26407dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ, 2641cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2642cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 26433c896bc6SHong Zhang MatCopy_SeqBAIJ, 2644f4259b30SLisandro Dalcin /* 44*/ NULL, 2645cc2dc46cSBarry Smith MatScale_SeqBAIJ, 26467d68702bSBarry Smith MatShift_SeqBAIJ, 2647f4259b30SLisandro Dalcin NULL, 264897b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 2649f4259b30SLisandro Dalcin /* 49*/ NULL, 26503b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 265192c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 26523acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 26533acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 265493dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 2655f4259b30SLisandro Dalcin NULL, 2656f4259b30SLisandro Dalcin NULL, 2657090001bdSToby Isaac NULL, 2658d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 26597dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2660b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2661b9b97703SBarry Smith MatView_SeqBAIJ, 2662f4259b30SLisandro Dalcin NULL, 2663f4259b30SLisandro Dalcin NULL, 2664f4259b30SLisandro Dalcin /* 64*/ NULL, 2665f4259b30SLisandro Dalcin NULL, 2666f4259b30SLisandro Dalcin NULL, 2667f4259b30SLisandro Dalcin NULL, 2668f4259b30SLisandro Dalcin NULL, 2669d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2670f4259b30SLisandro Dalcin NULL, 2671c87e5d42SMatthew Knepley MatConvert_Basic, 2672f4259b30SLisandro Dalcin NULL, 2673f4259b30SLisandro Dalcin NULL, 2674f4259b30SLisandro Dalcin /* 74*/ NULL, 2675f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 2676f4259b30SLisandro Dalcin NULL, 2677f4259b30SLisandro Dalcin NULL, 2678f4259b30SLisandro Dalcin NULL, 2679f4259b30SLisandro Dalcin /* 79*/ NULL, 2680f4259b30SLisandro Dalcin NULL, 2681f4259b30SLisandro Dalcin NULL, 2682f4259b30SLisandro Dalcin NULL, 26835bba2384SShri Abhyankar MatLoad_SeqBAIJ, 2684f4259b30SLisandro Dalcin /* 84*/ NULL, 2685f4259b30SLisandro Dalcin NULL, 2686f4259b30SLisandro Dalcin NULL, 2687f4259b30SLisandro Dalcin NULL, 2688f4259b30SLisandro Dalcin NULL, 2689f4259b30SLisandro Dalcin /* 89*/ NULL, 2690f4259b30SLisandro Dalcin NULL, 2691f4259b30SLisandro Dalcin NULL, 2692f4259b30SLisandro Dalcin NULL, 2693f4259b30SLisandro Dalcin NULL, 2694f4259b30SLisandro Dalcin /* 94*/ NULL, 2695f4259b30SLisandro Dalcin NULL, 2696f4259b30SLisandro Dalcin NULL, 2697f4259b30SLisandro Dalcin NULL, 2698f4259b30SLisandro Dalcin NULL, 2699f4259b30SLisandro Dalcin /* 99*/ NULL, 2700f4259b30SLisandro Dalcin NULL, 2701f4259b30SLisandro Dalcin NULL, 27022726fb6dSPierre Jolivet MatConjugate_SeqBAIJ, 2703f4259b30SLisandro Dalcin NULL, 2704f4259b30SLisandro Dalcin /*104*/ NULL, 270599cafbc1SBarry Smith MatRealPart_SeqBAIJ, 27062af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 2707f4259b30SLisandro Dalcin NULL, 2708f4259b30SLisandro Dalcin NULL, 2709f4259b30SLisandro Dalcin /*109*/ NULL, 2710f4259b30SLisandro Dalcin NULL, 2711f4259b30SLisandro Dalcin NULL, 2712f4259b30SLisandro Dalcin NULL, 2713547795f9SHong Zhang MatMissingDiagonal_SeqBAIJ, 2714f4259b30SLisandro Dalcin /*114*/ NULL, 2715f4259b30SLisandro Dalcin NULL, 2716f4259b30SLisandro Dalcin NULL, 2717f4259b30SLisandro Dalcin NULL, 2718f4259b30SLisandro Dalcin NULL, 2719f4259b30SLisandro Dalcin /*119*/ NULL, 2720f4259b30SLisandro Dalcin NULL, 2721547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 2722d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 2723f4259b30SLisandro Dalcin NULL, 2724f4259b30SLisandro Dalcin /*124*/ NULL, 2725f4259b30SLisandro Dalcin NULL, 27263964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 2727f4259b30SLisandro Dalcin NULL, 2728f4259b30SLisandro Dalcin NULL, 2729f4259b30SLisandro Dalcin /*129*/ NULL, 2730f4259b30SLisandro Dalcin NULL, 2731f4259b30SLisandro Dalcin NULL, 2732f4259b30SLisandro Dalcin NULL, 2733f4259b30SLisandro Dalcin NULL, 2734f4259b30SLisandro Dalcin /*134*/ NULL, 2735f4259b30SLisandro Dalcin NULL, 2736f4259b30SLisandro Dalcin NULL, 2737f4259b30SLisandro Dalcin NULL, 2738f4259b30SLisandro Dalcin NULL, 273946533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 2740f4259b30SLisandro Dalcin NULL, 2741f4259b30SLisandro Dalcin NULL, 2742bdf6f3fcSHong Zhang MatFDColoringSetUp_SeqXAIJ, 2743f4259b30SLisandro Dalcin NULL, 274486e85357SHong Zhang /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 274586e85357SHong Zhang MatDestroySubMatrices_SeqBAIJ 274699cafbc1SBarry Smith }; 27472593348eSBarry Smith 27487087cfbeSBarry Smith PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 27493e90b805SBarry Smith { 27503e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 27518ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2752dfbe8321SBarry Smith PetscErrorCode ierr; 27533e90b805SBarry Smith 27543e90b805SBarry Smith PetscFunctionBegin; 2755e7e72b3dSBarry Smith if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 27563e90b805SBarry Smith 27573e90b805SBarry Smith /* allocate space for values if not already there */ 27583e90b805SBarry Smith if (!aij->saved_values) { 2759854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 27603bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 27613e90b805SBarry Smith } 27623e90b805SBarry Smith 27633e90b805SBarry Smith /* copy values over */ 2764580bdb30SBarry Smith ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 27653e90b805SBarry Smith PetscFunctionReturn(0); 27663e90b805SBarry Smith } 27673e90b805SBarry Smith 27687087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 27693e90b805SBarry Smith { 27703e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 27716849ba73SBarry Smith PetscErrorCode ierr; 27728ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs]*aij->bs2; 27733e90b805SBarry Smith 27743e90b805SBarry Smith PetscFunctionBegin; 2775e7e72b3dSBarry Smith if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2776e7e72b3dSBarry Smith if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 27773e90b805SBarry Smith 27783e90b805SBarry Smith /* copy values over */ 2779580bdb30SBarry Smith ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 27803e90b805SBarry Smith PetscFunctionReturn(0); 27813e90b805SBarry Smith } 27823e90b805SBarry Smith 2783cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2784cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2785273d9f13SBarry Smith 2786b5b72c8aSIrina Sokolova PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2787a23d5eceSKris Buschelman { 2788a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 27896849ba73SBarry Smith PetscErrorCode ierr; 2790535b19f3SBarry Smith PetscInt i,mbs,nbs,bs2; 27918afaa268SBarry Smith PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2792a23d5eceSKris Buschelman 2793a23d5eceSKris Buschelman PetscFunctionBegin; 27942576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2795ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 2796ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 2797ab93d7beSBarry Smith nz = 0; 2798ab93d7beSBarry Smith } 27998c07d4e3SBarry Smith 280033d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 280126283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 280226283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2803e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2804899cda47SBarry Smith 2805899cda47SBarry Smith B->preallocated = PETSC_TRUE; 2806899cda47SBarry Smith 2807d0f46423SBarry Smith mbs = B->rmap->n/bs; 2808d0f46423SBarry Smith nbs = B->cmap->n/bs; 2809a23d5eceSKris Buschelman bs2 = bs*bs; 2810a23d5eceSKris Buschelman 281165e19b50SBarry Smith if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2812a23d5eceSKris Buschelman 2813a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2814e32f2f54SBarry Smith if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2815a23d5eceSKris Buschelman if (nnz) { 2816a23d5eceSKris Buschelman for (i=0; i<mbs; i++) { 2817e32f2f54SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2818e32f2f54SBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2819a23d5eceSKris Buschelman } 2820a23d5eceSKris Buschelman } 2821a23d5eceSKris Buschelman 2822a23d5eceSKris Buschelman b = (Mat_SeqBAIJ*)B->data; 2823ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 28248afaa268SBarry Smith ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 28258c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 28268c07d4e3SBarry Smith 2827a23d5eceSKris Buschelman if (!flg) { 2828a23d5eceSKris Buschelman switch (bs) { 2829a23d5eceSKris Buschelman case 1: 2830a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 2831a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2832a23d5eceSKris Buschelman break; 2833a23d5eceSKris Buschelman case 2: 2834a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 2835a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2836a23d5eceSKris Buschelman break; 2837a23d5eceSKris Buschelman case 3: 2838a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 2839a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2840a23d5eceSKris Buschelman break; 2841a23d5eceSKris Buschelman case 4: 2842a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 2843a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2844a23d5eceSKris Buschelman break; 2845a23d5eceSKris Buschelman case 5: 2846a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 2847a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2848a23d5eceSKris Buschelman break; 2849a23d5eceSKris Buschelman case 6: 2850a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 2851a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2852a23d5eceSKris Buschelman break; 2853a23d5eceSKris Buschelman case 7: 2854a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 2855a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2856a23d5eceSKris Buschelman break; 285796e086a2SDaniel Kokron case 9: 28586679dcc1SBarry Smith { 28596679dcc1SBarry Smith PetscInt version = 1; 28606679dcc1SBarry Smith ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 28616679dcc1SBarry Smith switch (version) { 28625f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 28636679dcc1SBarry Smith case 1: 286496e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 286596e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2866*1e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 28676679dcc1SBarry Smith break; 28686679dcc1SBarry Smith #endif 28696679dcc1SBarry Smith default: 287096e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N; 287196e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2872*1e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 287396e086a2SDaniel Kokron break; 28746679dcc1SBarry Smith } 28756679dcc1SBarry Smith break; 28766679dcc1SBarry Smith } 2877ebada01fSBarry Smith case 11: 2878ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11; 2879ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2880ebada01fSBarry Smith break; 28816679dcc1SBarry Smith case 12: 28826679dcc1SBarry Smith { 28836679dcc1SBarry Smith PetscInt version = 1; 28846679dcc1SBarry Smith ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 28856679dcc1SBarry Smith switch (version) { 28866679dcc1SBarry Smith case 1: 28876679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1; 28886679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2889*1e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 28908ab949d8SShri Abhyankar break; 28916679dcc1SBarry Smith case 2: 28926679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2; 28936679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2894*1e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 28956679dcc1SBarry Smith break; 28966679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 28976679dcc1SBarry Smith case 3: 28986679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 28996679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2900*1e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 29016679dcc1SBarry Smith break; 29026679dcc1SBarry Smith #endif 2903a23d5eceSKris Buschelman default: 2904a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 2905a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2906*1e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 29076679dcc1SBarry Smith break; 29086679dcc1SBarry Smith } 29096679dcc1SBarry Smith break; 29106679dcc1SBarry Smith } 29116679dcc1SBarry Smith case 15: 29126679dcc1SBarry Smith { 29136679dcc1SBarry Smith PetscInt version = 1; 29146679dcc1SBarry Smith ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 29156679dcc1SBarry Smith switch (version) { 29166679dcc1SBarry Smith case 1: 29176679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2918*1e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29196679dcc1SBarry Smith break; 29206679dcc1SBarry Smith case 2: 29216679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2922*1e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29236679dcc1SBarry Smith break; 29246679dcc1SBarry Smith case 3: 29256679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2926*1e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29276679dcc1SBarry Smith break; 29286679dcc1SBarry Smith case 4: 29296679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2930*1e1ea65dSPierre Jolivet ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 29316679dcc1SBarry Smith break; 29326679dcc1SBarry Smith default: 29336679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 2934*1e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 29356679dcc1SBarry Smith break; 29366679dcc1SBarry Smith } 29376679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 29386679dcc1SBarry Smith break; 29396679dcc1SBarry Smith } 29406679dcc1SBarry Smith default: 29416679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 29426679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2943*1e1ea65dSPierre Jolivet ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2944a23d5eceSKris Buschelman break; 2945a23d5eceSKris Buschelman } 2946a23d5eceSKris Buschelman } 2947e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 2948a23d5eceSKris Buschelman b->mbs = mbs; 2949a23d5eceSKris Buschelman b->nbs = nbs; 2950ab93d7beSBarry Smith if (!skipallocation) { 29512ee49352SLisandro Dalcin if (!b->imax) { 2952dcca6d9dSJed Brown ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 29533bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 295426fbe8dcSKarl Rupp 29554fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 29562ee49352SLisandro Dalcin } 2957ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 295826fbe8dcSKarl Rupp for (i=0; i<mbs; i++) b->ilen[i] = 0; 2959a23d5eceSKris Buschelman if (!nnz) { 2960a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2961c62bd62aSJed Brown else if (nz < 0) nz = 1; 29625d2a9ed1SStefano Zampini nz = PetscMin(nz,nbs); 2963a23d5eceSKris Buschelman for (i=0; i<mbs; i++) b->imax[i] = nz; 296461778c46SBarry Smith ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 2965a23d5eceSKris Buschelman } else { 2966c73702f5SBarry Smith PetscInt64 nz64 = 0; 2967c73702f5SBarry Smith for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 2968c73702f5SBarry Smith ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 2969a23d5eceSKris Buschelman } 2970a23d5eceSKris Buschelman 2971a23d5eceSKris Buschelman /* allocate the matrix space */ 29722ee49352SLisandro Dalcin ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2973672ba085SHong Zhang if (B->structure_only) { 2974672ba085SHong Zhang ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2975672ba085SHong Zhang ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2976672ba085SHong Zhang ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2977672ba085SHong Zhang } else { 29786679dcc1SBarry Smith PetscInt nzbs2 = 0; 29796679dcc1SBarry Smith ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr); 29806679dcc1SBarry Smith ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 29813bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2982580bdb30SBarry Smith ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2983672ba085SHong Zhang } 2984580bdb30SBarry Smith ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 298526fbe8dcSKarl Rupp 2986672ba085SHong Zhang if (B->structure_only) { 2987672ba085SHong Zhang b->singlemalloc = PETSC_FALSE; 2988672ba085SHong Zhang b->free_a = PETSC_FALSE; 2989672ba085SHong Zhang } else { 2990a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 2991672ba085SHong Zhang b->free_a = PETSC_TRUE; 2992672ba085SHong Zhang } 2993672ba085SHong Zhang b->free_ij = PETSC_TRUE; 2994672ba085SHong Zhang 2995a23d5eceSKris Buschelman b->i[0] = 0; 2996a23d5eceSKris Buschelman for (i=1; i<mbs+1; i++) { 2997a23d5eceSKris Buschelman b->i[i] = b->i[i-1] + b->imax[i-1]; 2998a23d5eceSKris Buschelman } 2999672ba085SHong Zhang 3000e811da20SHong Zhang } else { 3001e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3002e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3003ab93d7beSBarry Smith } 3004a23d5eceSKris Buschelman 3005a23d5eceSKris Buschelman b->bs2 = bs2; 3006a23d5eceSKris Buschelman b->mbs = mbs; 3007a23d5eceSKris Buschelman b->nz = 0; 3008b32cb4a7SJed Brown b->maxnz = nz; 3009b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3010cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 3011cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 30122576faa2SJed Brown if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3013a23d5eceSKris Buschelman PetscFunctionReturn(0); 3014a23d5eceSKris Buschelman } 3015a23d5eceSKris Buschelman 3016cf12db73SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3017725b52f3SLisandro Dalcin { 3018725b52f3SLisandro Dalcin PetscInt i,m,nz,nz_max=0,*nnz; 3019f4259b30SLisandro Dalcin PetscScalar *values=NULL; 3020d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3021725b52f3SLisandro Dalcin PetscErrorCode ierr; 3022725b52f3SLisandro Dalcin 3023725b52f3SLisandro Dalcin PetscFunctionBegin; 3024e32f2f54SBarry Smith if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 302526283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 302626283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 302726283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 302826283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3029e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3030d0f46423SBarry Smith m = B->rmap->n/bs; 3031725b52f3SLisandro Dalcin 303226fbe8dcSKarl Rupp if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3033854ce69bSBarry Smith ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3034725b52f3SLisandro Dalcin for (i=0; i<m; i++) { 3035cf12db73SBarry Smith nz = ii[i+1]- ii[i]; 303626fbe8dcSKarl Rupp if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3037725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3038725b52f3SLisandro Dalcin nnz[i] = nz; 3039725b52f3SLisandro Dalcin } 3040725b52f3SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3041725b52f3SLisandro Dalcin ierr = PetscFree(nnz);CHKERRQ(ierr); 3042725b52f3SLisandro Dalcin 3043725b52f3SLisandro Dalcin values = (PetscScalar*)V; 3044725b52f3SLisandro Dalcin if (!values) { 30451795a4d1SJed Brown ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3046725b52f3SLisandro Dalcin } 3047725b52f3SLisandro Dalcin for (i=0; i<m; i++) { 3048cf12db73SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 3049cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3050bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { 3051cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3052725b52f3SLisandro Dalcin ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 30533adadaf3SJed Brown } else { 30543adadaf3SJed Brown PetscInt j; 30553adadaf3SJed Brown for (j=0; j<ncols; j++) { 30563adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3057d47bf9aaSJed Brown ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 30583adadaf3SJed Brown } 30593adadaf3SJed Brown } 3060725b52f3SLisandro Dalcin } 3061725b52f3SLisandro Dalcin if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3062725b52f3SLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3063725b52f3SLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 30647827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3065725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3066725b52f3SLisandro Dalcin } 3067725b52f3SLisandro Dalcin 3068cda14afcSprj- /*@C 3069cda14afcSprj- MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3070cda14afcSprj- 3071cda14afcSprj- Not Collective 3072cda14afcSprj- 3073cda14afcSprj- Input Parameter: 3074cda14afcSprj- . mat - a MATSEQBAIJ matrix 3075cda14afcSprj- 3076cda14afcSprj- Output Parameter: 3077cda14afcSprj- . array - pointer to the data 3078cda14afcSprj- 3079cda14afcSprj- Level: intermediate 3080cda14afcSprj- 3081cda14afcSprj- .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3082cda14afcSprj- @*/ 3083cda14afcSprj- PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3084cda14afcSprj- { 3085cda14afcSprj- PetscErrorCode ierr; 3086cda14afcSprj- 3087cda14afcSprj- PetscFunctionBegin; 3088cda14afcSprj- ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3089cda14afcSprj- PetscFunctionReturn(0); 3090cda14afcSprj- } 3091cda14afcSprj- 3092cda14afcSprj- /*@C 3093cda14afcSprj- MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3094cda14afcSprj- 3095cda14afcSprj- Not Collective 3096cda14afcSprj- 3097cda14afcSprj- Input Parameters: 3098cda14afcSprj- + mat - a MATSEQBAIJ matrix 3099cda14afcSprj- - array - pointer to the data 3100cda14afcSprj- 3101cda14afcSprj- Level: intermediate 3102cda14afcSprj- 3103cda14afcSprj- .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3104cda14afcSprj- @*/ 3105cda14afcSprj- PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3106cda14afcSprj- { 3107cda14afcSprj- PetscErrorCode ierr; 3108cda14afcSprj- 3109cda14afcSprj- PetscFunctionBegin; 3110cda14afcSprj- ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3111cda14afcSprj- PetscFunctionReturn(0); 3112cda14afcSprj- } 3113cda14afcSprj- 31140bad9183SKris Buschelman /*MC 3115fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 31160bad9183SKris Buschelman block sparse compressed row format. 31170bad9183SKris Buschelman 31180bad9183SKris Buschelman Options Database Keys: 31196679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 31206679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 31210bad9183SKris Buschelman 31220bad9183SKris Buschelman Level: beginner 31230cd7f59aSBarry Smith 31240cd7f59aSBarry Smith Notes: 31250cd7f59aSBarry Smith MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 31260cd7f59aSBarry Smith space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 31270bad9183SKris Buschelman 31286679dcc1SBarry Smith Run with -info to see what version of the matrix-vector product is being used 31296679dcc1SBarry Smith 3130f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ() 31310bad9183SKris Buschelman M*/ 31320bad9183SKris Buschelman 3133cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3134b24902e0SBarry Smith 31358cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 31362593348eSBarry Smith { 3137dfbe8321SBarry Smith PetscErrorCode ierr; 3138c1ac3661SBarry Smith PetscMPIInt size; 3139b6490206SBarry Smith Mat_SeqBAIJ *b; 31403b2fbd54SBarry Smith 31413a40ed3dSBarry Smith PetscFunctionBegin; 3142ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3143e32f2f54SBarry Smith if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3144b6490206SBarry Smith 3145b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3146b0a32e0cSBarry Smith B->data = (void*)b; 3147549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 314826fbe8dcSKarl Rupp 3149f4259b30SLisandro Dalcin b->row = NULL; 3150f4259b30SLisandro Dalcin b->col = NULL; 3151f4259b30SLisandro Dalcin b->icol = NULL; 31522593348eSBarry Smith b->reallocs = 0; 3153f4259b30SLisandro Dalcin b->saved_values = NULL; 31542593348eSBarry Smith 3155c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 31562593348eSBarry Smith b->nonew = 0; 3157f4259b30SLisandro Dalcin b->diag = NULL; 3158f4259b30SLisandro Dalcin B->spptr = NULL; 3159b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3160a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 31614e220ebcSLois Curfman McInnes 3162cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3163cda14afcSprj- ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3164bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3165bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3166bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3167bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3168bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3169bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3170bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3171bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3172bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 31737ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 3174c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 31757ea3e4caSstefano_zampini #endif 3176c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 317717667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 31783a40ed3dSBarry Smith PetscFunctionReturn(0); 31792593348eSBarry Smith } 31802593348eSBarry Smith 3181ace3abfcSBarry Smith PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 31822593348eSBarry Smith { 3183b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 31846849ba73SBarry Smith PetscErrorCode ierr; 3185a96a251dSBarry Smith PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3186de6a44a3SBarry Smith 31873a40ed3dSBarry Smith PetscFunctionBegin; 3188e32f2f54SBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 31892593348eSBarry Smith 31904fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 31914fd072dbSBarry Smith c->imax = a->imax; 31924fd072dbSBarry Smith c->ilen = a->ilen; 31934fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 31944fd072dbSBarry Smith } else { 3195dcca6d9dSJed Brown ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 31963bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3197b6490206SBarry Smith for (i=0; i<mbs; i++) { 31982593348eSBarry Smith c->imax[i] = a->imax[i]; 31992593348eSBarry Smith c->ilen[i] = a->ilen[i]; 32002593348eSBarry Smith } 32014fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 32024fd072dbSBarry Smith } 32032593348eSBarry Smith 32042593348eSBarry Smith /* allocate the matrix space */ 320516a2bf60SHong Zhang if (mallocmatspace) { 32064fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 32071795a4d1SJed Brown ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 32083bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 320926fbe8dcSKarl Rupp 32104fd072dbSBarry Smith c->i = a->i; 32114fd072dbSBarry Smith c->j = a->j; 3212379be0ddSLisandro Dalcin c->singlemalloc = PETSC_FALSE; 3213379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 3214379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 32154fd072dbSBarry Smith c->parent = A; 32161e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 32171e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 321826fbe8dcSKarl Rupp 32194fd072dbSBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 32204fd072dbSBarry Smith ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 32214fd072dbSBarry Smith ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 32224fd072dbSBarry Smith } else { 3223dcca6d9dSJed Brown ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 32243bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 322526fbe8dcSKarl Rupp 3226c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 3227379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 32284fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 322926fbe8dcSKarl Rupp 3230580bdb30SBarry Smith ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3231b6490206SBarry Smith if (mbs > 0) { 3232580bdb30SBarry Smith ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 32332e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 3234580bdb30SBarry Smith ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 32352e8a6d31SBarry Smith } else { 3236580bdb30SBarry Smith ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 32372593348eSBarry Smith } 32382593348eSBarry Smith } 32391e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 32401e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 324116a2bf60SHong Zhang } 32424fd072dbSBarry Smith } 324316a2bf60SHong Zhang 32442593348eSBarry Smith c->roworiented = a->roworiented; 32452593348eSBarry Smith c->nonew = a->nonew; 324626fbe8dcSKarl Rupp 32471e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 32481e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 324926fbe8dcSKarl Rupp 32505c9eb25fSBarry Smith c->bs2 = a->bs2; 32515c9eb25fSBarry Smith c->mbs = a->mbs; 32525c9eb25fSBarry Smith c->nbs = a->nbs; 32532593348eSBarry Smith 32542593348eSBarry Smith if (a->diag) { 32554fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 32564fd072dbSBarry Smith c->diag = a->diag; 32574fd072dbSBarry Smith c->free_diag = PETSC_FALSE; 32584fd072dbSBarry Smith } else { 3259854ce69bSBarry Smith ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 32603bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 326126fbe8dcSKarl Rupp for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 32624fd072dbSBarry Smith c->free_diag = PETSC_TRUE; 32634fd072dbSBarry Smith } 3264f4259b30SLisandro Dalcin } else c->diag = NULL; 326526fbe8dcSKarl Rupp 32662593348eSBarry Smith c->nz = a->nz; 3267f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3268f361c04dSBarry Smith c->solve_work = NULL; 3269f361c04dSBarry Smith c->mult_work = NULL; 3270f361c04dSBarry Smith c->sor_workt = NULL; 3271f361c04dSBarry Smith c->sor_work = NULL; 327288e51ccdSHong Zhang 327388e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 327488e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3275cd6b891eSBarry Smith if (a->compressedrow.use) { 327688e51ccdSHong Zhang i = a->compressedrow.nrows; 3277dcca6d9dSJed Brown ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 32783bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3279580bdb30SBarry Smith ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3280580bdb30SBarry Smith ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 328188e51ccdSHong Zhang } else { 328288e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 32830298fd71SBarry Smith c->compressedrow.i = NULL; 32840298fd71SBarry Smith c->compressedrow.rindex = NULL; 328588e51ccdSHong Zhang } 3286e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate; 328726fbe8dcSKarl Rupp 3288140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 32893a40ed3dSBarry Smith PetscFunctionReturn(0); 32902593348eSBarry Smith } 32912593348eSBarry Smith 3292b24902e0SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3293b24902e0SBarry Smith { 3294b24902e0SBarry Smith PetscErrorCode ierr; 3295b24902e0SBarry Smith 3296b24902e0SBarry Smith PetscFunctionBegin; 3297ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3298d0f46423SBarry Smith ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 32995c9eb25fSBarry Smith ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 330098ad0f72SJed Brown ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3301b24902e0SBarry Smith PetscFunctionReturn(0); 3302b24902e0SBarry Smith } 3303b24902e0SBarry Smith 3304618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3305b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3306f501eaabSShri Abhyankar { 3307b51a4376SLisandro Dalcin PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3308b51a4376SLisandro Dalcin PetscInt *rowidxs,*colidxs; 3309b51a4376SLisandro Dalcin PetscScalar *matvals; 3310f501eaabSShri Abhyankar PetscErrorCode ierr; 3311b51a4376SLisandro Dalcin 3312b51a4376SLisandro Dalcin PetscFunctionBegin; 3313b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3314b51a4376SLisandro Dalcin 3315b51a4376SLisandro Dalcin /* read matrix header */ 3316b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3317b51a4376SLisandro Dalcin if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3318b51a4376SLisandro Dalcin M = header[1]; N = header[2]; nz = header[3]; 3319b51a4376SLisandro Dalcin if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3320b51a4376SLisandro Dalcin if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3321b51a4376SLisandro Dalcin if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3322b51a4376SLisandro Dalcin 3323b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 3324b51a4376SLisandro Dalcin ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3325b51a4376SLisandro Dalcin /* set local and global sizes if not set already */ 3326b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M; 3327b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N; 3328b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3329b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 3330b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3331b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3332b51a4376SLisandro Dalcin 3333b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 3334b51a4376SLisandro Dalcin ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3335b51a4376SLisandro Dalcin if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3336b51a4376SLisandro Dalcin ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3337b51a4376SLisandro Dalcin ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3338b51a4376SLisandro Dalcin mbs = m/bs; nbs = n/bs; 3339b51a4376SLisandro Dalcin 3340b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */ 3341b51a4376SLisandro Dalcin ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3342b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3343b51a4376SLisandro Dalcin rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3344b51a4376SLisandro Dalcin sum = rowidxs[m]; 3345b51a4376SLisandro Dalcin if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3346b51a4376SLisandro Dalcin 3347b51a4376SLisandro Dalcin /* read in column indices and nonzero values */ 3348b51a4376SLisandro Dalcin ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3349b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3350b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3351b51a4376SLisandro Dalcin 3352b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3353b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */ 3354b51a4376SLisandro Dalcin PetscInt *nnz; 3355618cc2edSLisandro Dalcin PetscBool sbaij; 3356b51a4376SLisandro Dalcin 3357b51a4376SLisandro Dalcin ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3358b51a4376SLisandro Dalcin ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3359618cc2edSLisandro Dalcin ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3360b51a4376SLisandro Dalcin for (i=0; i<mbs; i++) { 3361b51a4376SLisandro Dalcin ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3362618cc2edSLisandro Dalcin for (k=0; k<bs; k++) { 3363618cc2edSLisandro Dalcin PetscInt row = bs*i + k; 3364618cc2edSLisandro Dalcin for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3365618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3366618cc2edSLisandro Dalcin if (!sbaij || col >= row) 3367618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3368618cc2edSLisandro Dalcin } 3369618cc2edSLisandro Dalcin } 3370b51a4376SLisandro Dalcin } 3371b51a4376SLisandro Dalcin ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3372b51a4376SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3373618cc2edSLisandro Dalcin ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3374b51a4376SLisandro Dalcin ierr = PetscFree(nnz);CHKERRQ(ierr); 3375b51a4376SLisandro Dalcin } 3376b51a4376SLisandro Dalcin 3377b51a4376SLisandro Dalcin /* store matrix values */ 3378b51a4376SLisandro Dalcin for (i=0; i<m; i++) { 3379b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3380618cc2edSLisandro Dalcin ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3381b51a4376SLisandro Dalcin } 3382b51a4376SLisandro Dalcin 3383b51a4376SLisandro Dalcin ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3384b51a4376SLisandro Dalcin ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3385b51a4376SLisandro Dalcin ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3386b51a4376SLisandro Dalcin ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3387b51a4376SLisandro Dalcin PetscFunctionReturn(0); 3388b51a4376SLisandro Dalcin } 3389b51a4376SLisandro Dalcin 3390b51a4376SLisandro Dalcin PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3391b51a4376SLisandro Dalcin { 3392b51a4376SLisandro Dalcin PetscErrorCode ierr; 33937f489da9SVaclav Hapla PetscBool isbinary; 3394f501eaabSShri Abhyankar 3395f501eaabSShri Abhyankar PetscFunctionBegin; 33967f489da9SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3397b51a4376SLisandro Dalcin if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3398b51a4376SLisandro Dalcin ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3399f501eaabSShri Abhyankar PetscFunctionReturn(0); 3400f501eaabSShri Abhyankar } 3401f501eaabSShri Abhyankar 3402273d9f13SBarry Smith /*@C 3403273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3404273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 3405273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3406273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3407273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 34082593348eSBarry Smith 3409d083f849SBarry Smith Collective 3410273d9f13SBarry Smith 3411273d9f13SBarry Smith Input Parameters: 3412273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 3413bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3414bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3415273d9f13SBarry Smith . m - number of rows 3416273d9f13SBarry Smith . n - number of columns 341735d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 341835d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 34190298fd71SBarry Smith (possibly different for each block row) or NULL 3420273d9f13SBarry Smith 3421273d9f13SBarry Smith Output Parameter: 3422273d9f13SBarry Smith . A - the matrix 3423273d9f13SBarry Smith 3424175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3425f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 3426175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3427175b88e8SBarry Smith 3428273d9f13SBarry Smith Options Database Keys: 3429a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 3430273d9f13SBarry Smith block calculations (much slower) 3431a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3432273d9f13SBarry Smith 3433273d9f13SBarry Smith Level: intermediate 3434273d9f13SBarry Smith 3435273d9f13SBarry Smith Notes: 3436d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3437d1be2dadSMatthew Knepley 343849a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 343949a6f317SBarry Smith 344035d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 344135d8aa7fSBarry Smith 3442273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3443273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3444273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3445273d9f13SBarry Smith 3446273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 34470298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3448a7f22e61SSatish Balay allocation. See Users-Manual: ch_mat for details. 3449273d9f13SBarry Smith matrices. 3450273d9f13SBarry Smith 345169b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3452273d9f13SBarry Smith @*/ 34537087cfbeSBarry Smith PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3454273d9f13SBarry Smith { 3455dfbe8321SBarry Smith PetscErrorCode ierr; 3456273d9f13SBarry Smith 3457273d9f13SBarry Smith PetscFunctionBegin; 3458f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3459f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3460273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3461367daffbSBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3462273d9f13SBarry Smith PetscFunctionReturn(0); 3463273d9f13SBarry Smith } 3464273d9f13SBarry Smith 3465273d9f13SBarry Smith /*@C 3466273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3467273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 3468273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3469273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3470273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 3471273d9f13SBarry Smith 3472d083f849SBarry Smith Collective 3473273d9f13SBarry Smith 3474273d9f13SBarry Smith Input Parameters: 34751c4f3114SJed Brown + B - the matrix 3476bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3477bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3478273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3479273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 34800298fd71SBarry Smith (possibly different for each block row) or NULL 3481273d9f13SBarry Smith 3482273d9f13SBarry Smith Options Database Keys: 3483a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 3484273d9f13SBarry Smith block calculations (much slower) 3485a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3486273d9f13SBarry Smith 3487273d9f13SBarry Smith Level: intermediate 3488273d9f13SBarry Smith 3489273d9f13SBarry Smith Notes: 349049a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 349149a6f317SBarry Smith 3492aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3493aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3494aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3495aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3496aa95bbe8SBarry Smith 3497273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3498273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3499273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3500273d9f13SBarry Smith 3501273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 35020298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3503a7f22e61SSatish Balay allocation. See Users-Manual: ch_mat for details. 3504273d9f13SBarry Smith 350569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3506273d9f13SBarry Smith @*/ 35077087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3508273d9f13SBarry Smith { 35094ac538c5SBarry Smith PetscErrorCode ierr; 3510273d9f13SBarry Smith 3511273d9f13SBarry Smith PetscFunctionBegin; 35126ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 35136ba663aaSJed Brown PetscValidType(B,1); 35146ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 35154ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3516273d9f13SBarry Smith PetscFunctionReturn(0); 3517273d9f13SBarry Smith } 3518a1d92eedSBarry Smith 3519725b52f3SLisandro Dalcin /*@C 3520664954b6SBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3521725b52f3SLisandro Dalcin 3522d083f849SBarry Smith Collective 3523725b52f3SLisandro Dalcin 3524725b52f3SLisandro Dalcin Input Parameters: 35251c4f3114SJed Brown + B - the matrix 3526725b52f3SLisandro Dalcin . i - the indices into j for the start of each local row (starts with zero) 3527725b52f3SLisandro Dalcin . j - the column indices for each local row (starts with zero) these must be sorted for each row 3528725b52f3SLisandro Dalcin - v - optional values in the matrix 3529725b52f3SLisandro Dalcin 3530664954b6SBarry Smith Level: advanced 3531725b52f3SLisandro Dalcin 35323adadaf3SJed Brown Notes: 35333adadaf3SJed Brown The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 35343adadaf3SJed Brown may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 35353adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 35363adadaf3SJed Brown MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 35373adadaf3SJed Brown block column and the second index is over columns within a block. 35383adadaf3SJed Brown 3539664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3540664954b6SBarry Smith 3541725b52f3SLisandro Dalcin .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3542725b52f3SLisandro Dalcin @*/ 35437087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3544725b52f3SLisandro Dalcin { 35454ac538c5SBarry Smith PetscErrorCode ierr; 3546725b52f3SLisandro Dalcin 3547725b52f3SLisandro Dalcin PetscFunctionBegin; 35486ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 35496ba663aaSJed Brown PetscValidType(B,1); 35506ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 35514ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3552725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3553725b52f3SLisandro Dalcin } 3554725b52f3SLisandro Dalcin 3555c75a6043SHong Zhang /*@ 3556dfb205c3SBarry Smith MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3557c75a6043SHong Zhang 3558d083f849SBarry Smith Collective 3559c75a6043SHong Zhang 3560c75a6043SHong Zhang Input Parameters: 3561c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3562c75a6043SHong Zhang . bs - size of block 3563c75a6043SHong Zhang . m - number of rows 3564c75a6043SHong Zhang . n - number of columns 3565483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3566c75a6043SHong Zhang . j - column indices 3567c75a6043SHong Zhang - a - matrix values 3568c75a6043SHong Zhang 3569c75a6043SHong Zhang Output Parameter: 3570c75a6043SHong Zhang . mat - the matrix 3571c75a6043SHong Zhang 3572dfb205c3SBarry Smith Level: advanced 3573c75a6043SHong Zhang 3574c75a6043SHong Zhang Notes: 3575c75a6043SHong Zhang The i, j, and a arrays are not copied by this routine, the user must free these arrays 3576c75a6043SHong Zhang once the matrix is destroyed 3577c75a6043SHong Zhang 3578c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3579c75a6043SHong Zhang 3580c75a6043SHong Zhang The i and j indices are 0 based 3581c75a6043SHong Zhang 3582dfb205c3SBarry Smith When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3583dfb205c3SBarry Smith 35843adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 35853adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 35863adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 35873adadaf3SJed Brown with column-major ordering within blocks. 3588dfb205c3SBarry Smith 358969b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3590c75a6043SHong Zhang 3591c75a6043SHong Zhang @*/ 3592c3c607ccSBarry Smith PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3593c75a6043SHong Zhang { 3594c75a6043SHong Zhang PetscErrorCode ierr; 3595c75a6043SHong Zhang PetscInt ii; 3596c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3597c75a6043SHong Zhang 3598c75a6043SHong Zhang PetscFunctionBegin; 3599e32f2f54SBarry Smith if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 360041096f02SStefano Zampini if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3601c75a6043SHong Zhang 3602c75a6043SHong Zhang ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3603c75a6043SHong Zhang ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3604c75a6043SHong Zhang ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3605f4259b30SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3606c75a6043SHong Zhang baij = (Mat_SeqBAIJ*)(*mat)->data; 3607dcca6d9dSJed Brown ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 36083bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3609c75a6043SHong Zhang 3610c75a6043SHong Zhang baij->i = i; 3611c75a6043SHong Zhang baij->j = j; 3612c75a6043SHong Zhang baij->a = a; 361326fbe8dcSKarl Rupp 3614c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 3615c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3616e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3617e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3618c75a6043SHong Zhang 3619c75a6043SHong Zhang for (ii=0; ii<m; ii++) { 3620c75a6043SHong Zhang baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3621cf9c20a2SJed Brown if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3622c75a6043SHong Zhang } 362376bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 3624c75a6043SHong Zhang for (ii=0; ii<baij->i[m]; ii++) { 3625e32f2f54SBarry Smith if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3626e32f2f54SBarry Smith if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3627c75a6043SHong Zhang } 362876bd3646SJed Brown } 3629c75a6043SHong Zhang 3630c75a6043SHong Zhang ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631c75a6043SHong Zhang ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632c75a6043SHong Zhang PetscFunctionReturn(0); 3633c75a6043SHong Zhang } 3634bdf6f3fcSHong Zhang 3635bdf6f3fcSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3636bdf6f3fcSHong Zhang { 3637bdf6f3fcSHong Zhang PetscErrorCode ierr; 36388761c3d6SHong Zhang PetscMPIInt size; 3639bdf6f3fcSHong Zhang 3640bdf6f3fcSHong Zhang PetscFunctionBegin; 3641ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 36428761c3d6SHong Zhang if (size == 1 && scall == MAT_REUSE_MATRIX) { 36438761c3d6SHong Zhang ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 36448761c3d6SHong Zhang } else { 3645bdf6f3fcSHong Zhang ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 36468761c3d6SHong Zhang } 3647bdf6f3fcSHong Zhang PetscFunctionReturn(0); 3648bdf6f3fcSHong Zhang } 3649