1be1d678aSKris Buschelman 22593348eSBarry Smith /* 3b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 42593348eSBarry Smith matrix storage format. 52593348eSBarry Smith */ 6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <petscblaslapack.h> 806873bf2SBarry Smith #include <petsc-private/kernels/blockinvert.h> 9b01c7715SBarry Smith 1043516a2dSKris Buschelman 1143516a2dSKris Buschelman #undef __FUNCT__ 12b01c7715SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14b01c7715SBarry Smith { 15b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 166849ba73SBarry Smith PetscErrorCode ierr; 17de80f912SBarry Smith PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18de80f912SBarry Smith MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 1962bba022SBarry Smith PetscReal shift = 0.0; 20b01c7715SBarry Smith 21b01c7715SBarry Smith PetscFunctionBegin; 229797317bSBarry Smith if (a->idiagvalid) { 239797317bSBarry Smith if (values) *values = a->idiag; 249797317bSBarry Smith PetscFunctionReturn(0); 259797317bSBarry Smith } 26b01c7715SBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27b01c7715SBarry Smith diag_offset = a->diag; 28b01c7715SBarry Smith if (!a->idiag) { 29de80f912SBarry Smith ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30de80f912SBarry Smith ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31b01c7715SBarry Smith } 32b01c7715SBarry Smith diag = a->idiag; 33de80f912SBarry Smith mdiag = a->idiag+bs2*mbs; 34bbead8a2SBarry Smith if (values) *values = a->idiag; 35b01c7715SBarry Smith /* factor and invert each block */ 36521d7252SBarry Smith switch (bs) { 37ab040260SJed Brown case 1: 38ab040260SJed Brown for (i=0; i<mbs; i++) { 39ab040260SJed Brown odiag = v + 1*diag_offset[i]; 40ab040260SJed Brown diag[0] = odiag[0]; 41ab040260SJed Brown mdiag[0] = odiag[0]; 42d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43ab040260SJed Brown diag += 1; 44ab040260SJed Brown mdiag += 1; 45ab040260SJed Brown } 46ab040260SJed Brown break; 47b01c7715SBarry Smith case 2: 48b01c7715SBarry Smith for (i=0; i<mbs; i++) { 49b01c7715SBarry Smith odiag = v + 4*diag_offset[i]; 50b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 5296b95a6bSBarry Smith ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53b01c7715SBarry Smith diag += 4; 54b01c7715SBarry Smith mdiag += 4; 55b01c7715SBarry Smith } 56b01c7715SBarry Smith break; 57b01c7715SBarry Smith case 3: 58b01c7715SBarry Smith for (i=0; i<mbs; i++) { 59b01c7715SBarry Smith odiag = v + 9*diag_offset[i]; 60b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61b01c7715SBarry Smith diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62b01c7715SBarry Smith diag[8] = odiag[8]; 63b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64b01c7715SBarry Smith mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65b01c7715SBarry Smith mdiag[8] = odiag[8]; 6696b95a6bSBarry Smith ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67b01c7715SBarry Smith diag += 9; 68b01c7715SBarry Smith mdiag += 9; 69b01c7715SBarry Smith } 70b01c7715SBarry Smith break; 71b01c7715SBarry Smith case 4: 72b01c7715SBarry Smith for (i=0; i<mbs; i++) { 73b01c7715SBarry Smith odiag = v + 16*diag_offset[i]; 74b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 7696b95a6bSBarry Smith ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77b01c7715SBarry Smith diag += 16; 78b01c7715SBarry Smith mdiag += 16; 79b01c7715SBarry Smith } 80b01c7715SBarry Smith break; 81b01c7715SBarry Smith case 5: 82b01c7715SBarry Smith for (i=0; i<mbs; i++) { 83b01c7715SBarry Smith odiag = v + 25*diag_offset[i]; 84b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 8696b95a6bSBarry Smith ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87b01c7715SBarry Smith diag += 25; 88b01c7715SBarry Smith mdiag += 25; 89b01c7715SBarry Smith } 90b01c7715SBarry Smith break; 91d49b2adcSBarry Smith case 6: 92d49b2adcSBarry Smith for (i=0; i<mbs; i++) { 93d49b2adcSBarry Smith odiag = v + 36*diag_offset[i]; 94d49b2adcSBarry Smith ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95d49b2adcSBarry Smith ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 9696b95a6bSBarry Smith ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97d49b2adcSBarry Smith diag += 36; 98d49b2adcSBarry Smith mdiag += 36; 99d49b2adcSBarry Smith } 100d49b2adcSBarry Smith break; 101de80f912SBarry Smith case 7: 102de80f912SBarry Smith for (i=0; i<mbs; i++) { 103de80f912SBarry Smith odiag = v + 49*diag_offset[i]; 104de80f912SBarry Smith ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105de80f912SBarry Smith ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 10696b95a6bSBarry Smith ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107de80f912SBarry Smith diag += 49; 108de80f912SBarry Smith mdiag += 49; 109de80f912SBarry Smith } 110de80f912SBarry Smith break; 111b01c7715SBarry Smith default: 112de80f912SBarry Smith ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113de80f912SBarry Smith for (i=0; i<mbs; i++) { 114de80f912SBarry Smith odiag = v + bs2*diag_offset[i]; 115de80f912SBarry Smith ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116de80f912SBarry Smith ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 11796b95a6bSBarry Smith ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118de80f912SBarry Smith diag += bs2; 119de80f912SBarry Smith mdiag += bs2; 120de80f912SBarry Smith } 121de80f912SBarry Smith ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122b01c7715SBarry Smith } 123b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 124b01c7715SBarry Smith PetscFunctionReturn(0); 125b01c7715SBarry Smith } 126b01c7715SBarry Smith 127*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_1_exp(v,A,w,exp) \ 128*e48d15efSToby Isaac do { \ 129*e48d15efSToby Isaac v[0] exp A[0]*w[0]; \ 130*e48d15efSToby Isaac } while (0) 1316d3beeddSMatthew Knepley 132*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_2_exp(v,A,w,exp) \ 133*e48d15efSToby Isaac do { \ 134*e48d15efSToby Isaac v[0] exp A[0]*w[0] + A[2]*w[1]; \ 135*e48d15efSToby Isaac v[1] exp A[1]*w[0] + A[3]*w[1]; \ 136*e48d15efSToby Isaac } while (0) 1376d3beeddSMatthew Knepley 138*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_3_exp(v,A,w,exp) \ 139*e48d15efSToby Isaac do { \ 140*e48d15efSToby Isaac v[0] exp A[0]*w[0] + A[3]*w[1] + A[6]*w[2]; \ 141*e48d15efSToby Isaac v[1] exp A[1]*w[0] + A[4]*w[1] + A[7]*w[2]; \ 142*e48d15efSToby Isaac v[2] exp A[2]*w[0] + A[5]*w[1] + A[8]*w[2]; \ 143*e48d15efSToby Isaac } while (0) 1446d3beeddSMatthew Knepley 145*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_4_exp(v,A,w,exp) \ 146*e48d15efSToby Isaac do { \ 147*e48d15efSToby Isaac v[0] exp A[0]*w[0] + A[4]*w[1] + A[8] *w[2] + A[12]*w[3]; \ 148*e48d15efSToby Isaac v[1] exp A[1]*w[0] + A[5]*w[1] + A[9] *w[2] + A[13]*w[3]; \ 149*e48d15efSToby Isaac v[2] exp A[2]*w[0] + A[6]*w[1] + A[10]*w[2] + A[14]*w[3]; \ 150*e48d15efSToby Isaac v[3] exp A[3]*w[0] + A[7]*w[1] + A[11]*w[2] + A[15]*w[3]; \ 151*e48d15efSToby Isaac } while (0) 1526d3beeddSMatthew Knepley 153*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_4_exp(v,A,w,exp) \ 154*e48d15efSToby Isaac do { \ 155*e48d15efSToby Isaac v[0] exp A[0]*w[0] + A[4]*w[1] + A[8] *w[2] + A[12]*w[3]; \ 156*e48d15efSToby Isaac v[1] exp A[1]*w[0] + A[5]*w[1] + A[9] *w[2] + A[13]*w[3]; \ 157*e48d15efSToby Isaac v[2] exp A[2]*w[0] + A[6]*w[1] + A[10]*w[2] + A[14]*w[3]; \ 158*e48d15efSToby Isaac v[3] exp A[3]*w[0] + A[7]*w[1] + A[11]*w[2] + A[15]*w[3]; \ 159*e48d15efSToby Isaac } while (0) 160*e48d15efSToby Isaac 161*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_5_exp(v,A,w,exp) \ 162*e48d15efSToby Isaac do { \ 163*e48d15efSToby Isaac v[0] exp A[0]*w[0] + A[5]*w[1] + A[10]*w[2] + A[15]*w[3] + A[20]*w[4]; \ 164*e48d15efSToby Isaac v[1] exp A[1]*w[0] + A[6]*w[1] + A[11]*w[2] + A[16]*w[3] + A[21]*w[4]; \ 165*e48d15efSToby Isaac v[2] exp A[2]*w[0] + A[7]*w[1] + A[12]*w[2] + A[17]*w[3] + A[22]*w[4]; \ 166*e48d15efSToby Isaac v[3] exp A[3]*w[0] + A[8]*w[1] + A[13]*w[2] + A[18]*w[3] + A[23]*w[4]; \ 167*e48d15efSToby Isaac v[4] exp A[4]*w[0] + A[9]*w[1] + A[14]*w[2] + A[19]*w[3] + A[24]*w[4]; \ 168*e48d15efSToby Isaac } while (0) 169*e48d15efSToby Isaac 170*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_6_exp(v,A,w,exp) \ 171*e48d15efSToby Isaac do { \ 172*e48d15efSToby Isaac v[0] exp A[0]*w[0] + A[6] *w[1] + A[12]*w[2] + A[18]*w[3] + A[24]*w[4] + A[30]*w[5]; \ 173*e48d15efSToby Isaac v[1] exp A[1]*w[0] + A[7] *w[1] + A[13]*w[2] + A[19]*w[3] + A[25]*w[4] + A[31]*w[5]; \ 174*e48d15efSToby Isaac v[2] exp A[2]*w[0] + A[8] *w[1] + A[14]*w[2] + A[20]*w[3] + A[26]*w[4] + A[32]*w[5]; \ 175*e48d15efSToby Isaac v[3] exp A[3]*w[0] + A[9] *w[1] + A[15]*w[2] + A[21]*w[3] + A[27]*w[4] + A[33]*w[5]; \ 176*e48d15efSToby Isaac v[4] exp A[4]*w[0] + A[10]*w[1] + A[16]*w[2] + A[22]*w[3] + A[28]*w[4] + A[34]*w[5]; \ 177*e48d15efSToby Isaac v[5] exp A[5]*w[0] + A[11]*w[1] + A[17]*w[2] + A[23]*w[3] + A[29]*w[4] + A[35]*w[5]; \ 178*e48d15efSToby Isaac } while (0) 179*e48d15efSToby Isaac 180*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_7_exp(v,A,w,exp) \ 181*e48d15efSToby Isaac do { \ 182*e48d15efSToby Isaac v[0] exp A[0]*w[0] + A[7] *w[1] + A[14]*w[2] + A[21]*w[3] + A[28]*w[4] + A[35]*w[5] + A[42]*w[6]; \ 183*e48d15efSToby Isaac v[1] exp A[1]*w[0] + A[8] *w[1] + A[15]*w[2] + A[22]*w[3] + A[29]*w[4] + A[36]*w[5] + A[43]*w[6]; \ 184*e48d15efSToby Isaac v[2] exp A[2]*w[0] + A[9] *w[1] + A[16]*w[2] + A[23]*w[3] + A[30]*w[4] + A[37]*w[5] + A[44]*w[6]; \ 185*e48d15efSToby Isaac v[3] exp A[3]*w[0] + A[10]*w[1] + A[17]*w[2] + A[24]*w[3] + A[31]*w[4] + A[38]*w[5] + A[45]*w[6]; \ 186*e48d15efSToby Isaac v[4] exp A[4]*w[0] + A[11]*w[1] + A[18]*w[2] + A[25]*w[3] + A[32]*w[4] + A[39]*w[5] + A[46]*w[6]; \ 187*e48d15efSToby Isaac v[5] exp A[5]*w[0] + A[12]*w[1] + A[19]*w[2] + A[26]*w[3] + A[33]*w[4] + A[40]*w[5] + A[47]*w[6]; \ 188*e48d15efSToby Isaac v[6] exp A[6]*w[0] + A[13]*w[1] + A[20]*w[2] + A[27]*w[3] + A[34]*w[4] + A[41]*w[5] + A[48]*w[6]; \ 189*e48d15efSToby Isaac } while (0) 190*e48d15efSToby Isaac 191*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,=) 192*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,=) 193*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,=) 194*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,=) 195*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,=) 196*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,=) 197*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,=) 198*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,+=) 199*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,+=) 200*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,+=) 201*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,+=) 202*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,+=) 203*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,+=) 204*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,+=) 205*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,-=) 206*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,-=) 207*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,-=) 208*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,-=) 209*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,-=) 210*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,-=) 211*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,-=) 2126d3beeddSMatthew Knepley 2136d3beeddSMatthew Knepley #undef __FUNCT__ 214*e48d15efSToby Isaac #define __FUNCT__ "MatSOR_SeqBAIJ" 215*e48d15efSToby Isaac PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 216b01c7715SBarry Smith { 217b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218*e48d15efSToby Isaac PetscScalar *x,*work,*w,*workt,*t; 219*e48d15efSToby Isaac const MatScalar *v,*aa = a->a, *idiag; 220*e48d15efSToby Isaac const PetscScalar *b,*xb; 221*e48d15efSToby Isaac PetscScalar s[7], xw[7]; 222dfbe8321SBarry Smith PetscErrorCode ierr; 223*e48d15efSToby Isaac PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 224c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 225b01c7715SBarry Smith 226b01c7715SBarry Smith PetscFunctionBegin; 227b01c7715SBarry Smith its = its*lits; 228e32f2f54SBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 229e32f2f54SBarry Smith if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230e32f2f54SBarry Smith if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231e32f2f54SBarry Smith if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232e32f2f54SBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 233b01c7715SBarry Smith 2340298fd71SBarry Smith if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 235b01c7715SBarry Smith 236b2ec919aSToby Isaac if (!m) PetscFunctionReturn(0); 237b01c7715SBarry Smith diag = a->diag; 238b01c7715SBarry Smith idiag = a->idiag; 239de80f912SBarry Smith k = PetscMax(A->rmap->n,A->cmap->n); 240*e48d15efSToby Isaac if (!a->mult_work) { 241*e48d15efSToby Isaac ierr = PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 242de80f912SBarry Smith } 243de80f912SBarry Smith work = a->mult_work; 244*e48d15efSToby Isaac t = work + k+1; 245de80f912SBarry Smith if (!a->sor_work) { 246de80f912SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 247de80f912SBarry Smith } 248de80f912SBarry Smith w = a->sor_work; 249de80f912SBarry Smith 250de80f912SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 251de80f912SBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 252de80f912SBarry Smith 253de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 254de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 255*e48d15efSToby Isaac switch (bs) { 256*e48d15efSToby Isaac case 1: 257*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x,idiag,b); 258*e48d15efSToby Isaac t[0] = b[0]; 259*e48d15efSToby Isaac i2 = 1; 260*e48d15efSToby Isaac idiag += 1; 261*e48d15efSToby Isaac for (i=1; i<m; i++) { 262*e48d15efSToby Isaac v = aa + ai[i]; 263*e48d15efSToby Isaac vi = aj + ai[i]; 264*e48d15efSToby Isaac nz = diag[i] - ai[i]; 265*e48d15efSToby Isaac s[0] = b[i2]; 266*e48d15efSToby Isaac for (j=0; j<nz; j++) { 267*e48d15efSToby Isaac xw[0] = x[vi[j]]; 268*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 269*e48d15efSToby Isaac } 270*e48d15efSToby Isaac t[i2] = s[0]; 271*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 272*e48d15efSToby Isaac x[i2] = xw[0]; 273*e48d15efSToby Isaac idiag += 1; 274*e48d15efSToby Isaac i2 += 1; 275*e48d15efSToby Isaac } 276*e48d15efSToby Isaac break; 277*e48d15efSToby Isaac case 2: 278*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x,idiag,b); 279*e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; 280*e48d15efSToby Isaac i2 = 2; 281*e48d15efSToby Isaac idiag += 4; 282*e48d15efSToby Isaac for (i=1; i<m; i++) { 283*e48d15efSToby Isaac v = aa + 4*ai[i]; 284*e48d15efSToby Isaac vi = aj + ai[i]; 285*e48d15efSToby Isaac nz = diag[i] - ai[i]; 286*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 287*e48d15efSToby Isaac for (j=0; j<nz; j++) { 288*e48d15efSToby Isaac idx = 2*vi[j]; 289*e48d15efSToby Isaac it = 4*j; 290*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 291*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 292*e48d15efSToby Isaac } 293*e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; 294*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 295*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 296*e48d15efSToby Isaac idiag += 4; 297*e48d15efSToby Isaac i2 += 2; 298*e48d15efSToby Isaac } 299*e48d15efSToby Isaac break; 300*e48d15efSToby Isaac case 3: 301*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x,idiag,b); 302*e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 303*e48d15efSToby Isaac i2 = 3; 304*e48d15efSToby Isaac idiag += 9; 305*e48d15efSToby Isaac for (i=1; i<m; i++) { 306*e48d15efSToby Isaac v = aa + 9*ai[i]; 307*e48d15efSToby Isaac vi = aj + ai[i]; 308*e48d15efSToby Isaac nz = diag[i] - ai[i]; 309*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 310*e48d15efSToby Isaac while (nz--) { 311*e48d15efSToby Isaac idx = 3*(*vi++); 312*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 313*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 314*e48d15efSToby Isaac v += 9; 315*e48d15efSToby Isaac } 316*e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 317*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 318*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 319*e48d15efSToby Isaac idiag += 9; 320*e48d15efSToby Isaac i2 += 3; 321*e48d15efSToby Isaac } 322*e48d15efSToby Isaac break; 323*e48d15efSToby Isaac case 4: 324*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x,idiag,b); 325*e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 326*e48d15efSToby Isaac i2 = 4; 327*e48d15efSToby Isaac idiag += 16; 328*e48d15efSToby Isaac for (i=1; i<m; i++) { 329*e48d15efSToby Isaac v = aa + 16*ai[i]; 330*e48d15efSToby Isaac vi = aj + ai[i]; 331*e48d15efSToby Isaac nz = diag[i] - ai[i]; 332*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 333*e48d15efSToby Isaac while (nz--) { 334*e48d15efSToby Isaac idx = 4*(*vi++); 335*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 336*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 337*e48d15efSToby Isaac v += 16; 338*e48d15efSToby Isaac } 339*e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 340*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 341*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 342*e48d15efSToby Isaac idiag += 16; 343*e48d15efSToby Isaac i2 += 4; 344*e48d15efSToby Isaac } 345*e48d15efSToby Isaac break; 346*e48d15efSToby Isaac case 5: 347*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x,idiag,b); 348*e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 349*e48d15efSToby Isaac i2 = 5; 350*e48d15efSToby Isaac idiag += 25; 351*e48d15efSToby Isaac for (i=1; i<m; i++) { 352*e48d15efSToby Isaac v = aa + 25*ai[i]; 353*e48d15efSToby Isaac vi = aj + ai[i]; 354*e48d15efSToby Isaac nz = diag[i] - ai[i]; 355*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 356*e48d15efSToby Isaac while (nz--) { 357*e48d15efSToby Isaac idx = 5*(*vi++); 358*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 359*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 360*e48d15efSToby Isaac v += 25; 361*e48d15efSToby Isaac } 362*e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 363*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 364*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 365*e48d15efSToby Isaac idiag += 25; 366*e48d15efSToby Isaac i2 += 5; 367*e48d15efSToby Isaac } 368*e48d15efSToby Isaac break; 369*e48d15efSToby Isaac case 6: 370*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x,idiag,b); 371*e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 372*e48d15efSToby Isaac i2 = 6; 373*e48d15efSToby Isaac idiag += 36; 374*e48d15efSToby Isaac for (i=1; i<m; i++) { 375*e48d15efSToby Isaac v = aa + 36*ai[i]; 376*e48d15efSToby Isaac vi = aj + ai[i]; 377*e48d15efSToby Isaac nz = diag[i] - ai[i]; 378*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 379*e48d15efSToby Isaac while (nz--) { 380*e48d15efSToby Isaac idx = 6*(*vi++); 381*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 382*e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 383*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 384*e48d15efSToby Isaac v += 36; 385*e48d15efSToby Isaac } 386*e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 387*e48d15efSToby Isaac t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 388*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 389*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 390*e48d15efSToby Isaac idiag += 36; 391*e48d15efSToby Isaac i2 += 6; 392*e48d15efSToby Isaac } 393*e48d15efSToby Isaac break; 394*e48d15efSToby Isaac case 7: 395*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x,idiag,b); 396*e48d15efSToby Isaac t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 397*e48d15efSToby Isaac t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 398*e48d15efSToby Isaac i2 = 7; 399*e48d15efSToby Isaac idiag += 49; 400*e48d15efSToby Isaac for (i=1; i<m; i++) { 401*e48d15efSToby Isaac v = aa + 49*ai[i]; 402*e48d15efSToby Isaac vi = aj + ai[i]; 403*e48d15efSToby Isaac nz = diag[i] - ai[i]; 404*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 405*e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 406*e48d15efSToby Isaac while (nz--) { 407*e48d15efSToby Isaac idx = 7*(*vi++); 408*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 409*e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 410*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 411*e48d15efSToby Isaac v += 49; 412*e48d15efSToby Isaac } 413*e48d15efSToby Isaac t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 414*e48d15efSToby Isaac t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 415*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 416*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 417*e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 418*e48d15efSToby Isaac idiag += 49; 419*e48d15efSToby Isaac i2 += 7; 420*e48d15efSToby Isaac } 421*e48d15efSToby Isaac break; 422*e48d15efSToby Isaac default: 42396b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 424*e48d15efSToby Isaac ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 425de80f912SBarry Smith i2 = bs; 426de80f912SBarry Smith idiag += bs2; 427de80f912SBarry Smith for (i=1; i<m; i++) { 428de80f912SBarry Smith v = aa + bs2*ai[i]; 429de80f912SBarry Smith vi = aj + ai[i]; 430de80f912SBarry Smith nz = diag[i] - ai[i]; 431de80f912SBarry Smith 432de80f912SBarry Smith ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 433de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 434de80f912SBarry Smith workt = work; 435de80f912SBarry Smith for (j=0; j<nz; j++) { 436de80f912SBarry Smith ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 437de80f912SBarry Smith workt += bs; 438de80f912SBarry Smith } 43996b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 440*e48d15efSToby Isaac ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 44196b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 442de80f912SBarry Smith 443de80f912SBarry Smith idiag += bs2; 444de80f912SBarry Smith i2 += bs; 445de80f912SBarry Smith } 446*e48d15efSToby Isaac break; 447*e48d15efSToby Isaac } 448de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 449*e48d15efSToby Isaac ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 450*e48d15efSToby Isaac xb = t; 451de80f912SBarry Smith } 452*e48d15efSToby Isaac else xb = b; 453de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 454*e48d15efSToby Isaac idiag = a->idiag+bs2*(a->mbs-1); 455*e48d15efSToby Isaac i2 = bs * (m-1); 456*e48d15efSToby Isaac switch (bs) { 457*e48d15efSToby Isaac case 1: 458*e48d15efSToby Isaac s[0] = xb[i2]; 459*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 460*e48d15efSToby Isaac x[i2] = xw[0]; 461*e48d15efSToby Isaac i2 -= 1; 462*e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 463*e48d15efSToby Isaac v = aa + (diag[i]+1); 464*e48d15efSToby Isaac vi = aj + diag[i] + 1; 465*e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 466*e48d15efSToby Isaac s[0] = xb[i2]; 467*e48d15efSToby Isaac for (j=0; j<nz; j++) { 468*e48d15efSToby Isaac xw[0] = x[vi[j]]; 469*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 470*e48d15efSToby Isaac } 471*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 472*e48d15efSToby Isaac x[i2] = xw[0]; 473*e48d15efSToby Isaac idiag -= 1; 474*e48d15efSToby Isaac i2 -= 1; 475*e48d15efSToby Isaac } 476*e48d15efSToby Isaac break; 477*e48d15efSToby Isaac case 2: 478*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; 479*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 480*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 481*e48d15efSToby Isaac i2 -= 2; 482*e48d15efSToby Isaac idiag -= 4; 483*e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 484*e48d15efSToby Isaac v = aa + 4*(diag[i] + 1); 485*e48d15efSToby Isaac vi = aj + diag[i] + 1; 486*e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 487*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; 488*e48d15efSToby Isaac for (j=0; j<nz; j++) { 489*e48d15efSToby Isaac idx = 2*vi[j]; 490*e48d15efSToby Isaac it = 4*j; 491*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 492*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 493*e48d15efSToby Isaac } 494*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 495*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; 496*e48d15efSToby Isaac idiag -= 4; 497*e48d15efSToby Isaac i2 -= 2; 498*e48d15efSToby Isaac } 499*e48d15efSToby Isaac break; 500*e48d15efSToby Isaac case 3: 501*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 502*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 503*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 504*e48d15efSToby Isaac i2 -= 3; 505*e48d15efSToby Isaac idiag -= 9; 506*e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 507*e48d15efSToby Isaac v = aa + 9*(diag[i]+1); 508*e48d15efSToby Isaac vi = aj + diag[i] + 1; 509*e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 510*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 511*e48d15efSToby Isaac while (nz--) { 512*e48d15efSToby Isaac idx = 3*(*vi++); 513*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 514*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 515*e48d15efSToby Isaac v += 9; 516*e48d15efSToby Isaac } 517*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 518*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 519*e48d15efSToby Isaac idiag -= 9; 520*e48d15efSToby Isaac i2 -= 3; 521*e48d15efSToby Isaac } 522*e48d15efSToby Isaac break; 523*e48d15efSToby Isaac case 4: 524*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 525*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 526*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 527*e48d15efSToby Isaac i2 -= 4; 528*e48d15efSToby Isaac idiag -= 16; 529*e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 530*e48d15efSToby Isaac v = aa + 16*(diag[i]+1); 531*e48d15efSToby Isaac vi = aj + diag[i] + 1; 532*e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 533*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 534*e48d15efSToby Isaac while (nz--) { 535*e48d15efSToby Isaac idx = 4*(*vi++); 536*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 537*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 538*e48d15efSToby Isaac v += 16; 539*e48d15efSToby Isaac } 540*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 541*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 542*e48d15efSToby Isaac idiag -= 16; 543*e48d15efSToby Isaac i2 -= 4; 544*e48d15efSToby Isaac } 545*e48d15efSToby Isaac break; 546*e48d15efSToby Isaac case 5: 547*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 548*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 549*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 550*e48d15efSToby Isaac i2 -= 5; 551*e48d15efSToby Isaac idiag -= 25; 552*e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 553*e48d15efSToby Isaac v = aa + 25*(diag[i]+1); 554*e48d15efSToby Isaac vi = aj + diag[i] + 1; 555*e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 556*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 557*e48d15efSToby Isaac while (nz--) { 558*e48d15efSToby Isaac idx = 5*(*vi++); 559*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 560*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 561*e48d15efSToby Isaac v += 25; 562*e48d15efSToby Isaac } 563*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 564*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 565*e48d15efSToby Isaac idiag -= 25; 566*e48d15efSToby Isaac i2 -= 5; 567*e48d15efSToby Isaac } 568*e48d15efSToby Isaac break; 569*e48d15efSToby Isaac case 6: 570*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 571*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 572*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 573*e48d15efSToby Isaac i2 -= 6; 574*e48d15efSToby Isaac idiag -= 36; 575*e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 576*e48d15efSToby Isaac v = aa + 36*(diag[i]+1); 577*e48d15efSToby Isaac vi = aj + diag[i] + 1; 578*e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 579*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 580*e48d15efSToby Isaac while (nz--) { 581*e48d15efSToby Isaac idx = 6*(*vi++); 582*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 583*e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 584*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 585*e48d15efSToby Isaac v += 36; 586*e48d15efSToby Isaac } 587*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 588*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 589*e48d15efSToby Isaac idiag -= 36; 590*e48d15efSToby Isaac i2 -= 6; 591*e48d15efSToby Isaac } 592*e48d15efSToby Isaac break; 593*e48d15efSToby Isaac case 7: 594*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 595*e48d15efSToby Isaac s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 596*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x,idiag,b); 597*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 598*e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 599*e48d15efSToby Isaac i2 -= 7; 600*e48d15efSToby Isaac idiag -= 49; 601*e48d15efSToby Isaac for (i=m-2; i>=0; i--) { 602*e48d15efSToby Isaac v = aa + 49*(diag[i]+1); 603*e48d15efSToby Isaac vi = aj + diag[i] + 1; 604*e48d15efSToby Isaac nz = ai[i+1] - diag[i] - 1; 605*e48d15efSToby Isaac s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 606*e48d15efSToby Isaac s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 607*e48d15efSToby Isaac while (nz--) { 608*e48d15efSToby Isaac idx = 7*(*vi++); 609*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 610*e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 611*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 612*e48d15efSToby Isaac v += 49; 613*e48d15efSToby Isaac } 614*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 615*e48d15efSToby Isaac x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 616*e48d15efSToby Isaac x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 617*e48d15efSToby Isaac idiag -= 49; 618*e48d15efSToby Isaac i2 -= 7; 619*e48d15efSToby Isaac } 620*e48d15efSToby Isaac break; 621*e48d15efSToby Isaac default: 622*e48d15efSToby Isaac ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 62396b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 624de80f912SBarry Smith i2 -= bs; 625*e48d15efSToby Isaac idiag -= bs2; 626de80f912SBarry Smith for (i=m-2; i>=0; i--) { 627de80f912SBarry Smith v = aa + bs2*(diag[i]+1); 628de80f912SBarry Smith vi = aj + diag[i] + 1; 629de80f912SBarry Smith nz = ai[i+1] - diag[i] - 1; 630de80f912SBarry Smith 631*e48d15efSToby Isaac ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 632de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 633de80f912SBarry Smith workt = work; 634de80f912SBarry Smith for (j=0; j<nz; j++) { 635de80f912SBarry Smith ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 636de80f912SBarry Smith workt += bs; 637de80f912SBarry Smith } 63896b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 63996b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 640*e48d15efSToby Isaac 641de80f912SBarry Smith idiag -= bs2; 642de80f912SBarry Smith i2 -= bs; 643de80f912SBarry Smith } 644*e48d15efSToby Isaac break; 645*e48d15efSToby Isaac } 646de80f912SBarry Smith ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 647de80f912SBarry Smith } 648*e48d15efSToby Isaac its--; 649*e48d15efSToby Isaac } 650*e48d15efSToby Isaac while (its--) { 651*e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 652*e48d15efSToby Isaac idiag = a->idiag; 653*e48d15efSToby Isaac i2 = 0; 654*e48d15efSToby Isaac switch (bs) { 655*e48d15efSToby Isaac case 1: 656*e48d15efSToby Isaac for (i=0; i<m; i++) { 657*e48d15efSToby Isaac v = aa + ai[i]; 658*e48d15efSToby Isaac vi = aj + ai[i]; 659*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 660*e48d15efSToby Isaac s[0] = b[i2]; 661*e48d15efSToby Isaac for (j=0; j<nz; j++) { 662*e48d15efSToby Isaac xw[0] = x[vi[j]]; 663*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 664*e48d15efSToby Isaac } 665*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 666*e48d15efSToby Isaac x[i2] += xw[0]; 667*e48d15efSToby Isaac idiag += 1; 668*e48d15efSToby Isaac i2 += 1; 669*e48d15efSToby Isaac } 670*e48d15efSToby Isaac break; 671*e48d15efSToby Isaac case 2: 672*e48d15efSToby Isaac for (i=0; i<m; i++) { 673*e48d15efSToby Isaac v = aa + 4*ai[i]; 674*e48d15efSToby Isaac vi = aj + ai[i]; 675*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 676*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 677*e48d15efSToby Isaac for (j=0; j<nz; j++) { 678*e48d15efSToby Isaac idx = 2*vi[j]; 679*e48d15efSToby Isaac it = 4*j; 680*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 681*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 682*e48d15efSToby Isaac } 683*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 684*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; 685*e48d15efSToby Isaac idiag += 4; 686*e48d15efSToby Isaac i2 += 2; 687*e48d15efSToby Isaac } 688*e48d15efSToby Isaac break; 689*e48d15efSToby Isaac case 3: 690*e48d15efSToby Isaac for (i=0; i<m; i++) { 691*e48d15efSToby Isaac v = aa + 9*ai[i]; 692*e48d15efSToby Isaac vi = aj + ai[i]; 693*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 694*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 695*e48d15efSToby Isaac while (nz--) { 696*e48d15efSToby Isaac idx = 3*(*vi++); 697*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 698*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 699*e48d15efSToby Isaac v += 9; 700*e48d15efSToby Isaac } 701*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 702*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703*e48d15efSToby Isaac idiag += 9; 704*e48d15efSToby Isaac i2 += 3; 705*e48d15efSToby Isaac } 706*e48d15efSToby Isaac break; 707*e48d15efSToby Isaac case 4: 708*e48d15efSToby Isaac for (i=0; i<m; i++) { 709*e48d15efSToby Isaac v = aa + 16*ai[i]; 710*e48d15efSToby Isaac vi = aj + ai[i]; 711*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 712*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 713*e48d15efSToby Isaac while (nz--) { 714*e48d15efSToby Isaac idx = 4*(*vi++); 715*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 716*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 717*e48d15efSToby Isaac v += 16; 718*e48d15efSToby Isaac } 719*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 720*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 721*e48d15efSToby Isaac idiag += 16; 722*e48d15efSToby Isaac i2 += 4; 723*e48d15efSToby Isaac } 724*e48d15efSToby Isaac break; 725*e48d15efSToby Isaac case 5: 726*e48d15efSToby Isaac for (i=0; i<m; i++) { 727*e48d15efSToby Isaac v = aa + 25*ai[i]; 728*e48d15efSToby Isaac vi = aj + ai[i]; 729*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 730*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 731*e48d15efSToby Isaac while (nz--) { 732*e48d15efSToby Isaac idx = 5*(*vi++); 733*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 734*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 735*e48d15efSToby Isaac v += 25; 736*e48d15efSToby Isaac } 737*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 738*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 739*e48d15efSToby Isaac idiag += 25; 740*e48d15efSToby Isaac i2 += 5; 741*e48d15efSToby Isaac } 742*e48d15efSToby Isaac break; 743*e48d15efSToby Isaac case 6: 744*e48d15efSToby Isaac for (i=0; i<m; i++) { 745*e48d15efSToby Isaac v = aa + 36*ai[i]; 746*e48d15efSToby Isaac vi = aj + ai[i]; 747*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 748*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 749*e48d15efSToby Isaac while (nz--) { 750*e48d15efSToby Isaac idx = 6*(*vi++); 751*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 752*e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 753*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 754*e48d15efSToby Isaac v += 36; 755*e48d15efSToby Isaac } 756*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 757*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 758*e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 759*e48d15efSToby Isaac idiag += 36; 760*e48d15efSToby Isaac i2 += 6; 761*e48d15efSToby Isaac } 762*e48d15efSToby Isaac break; 763*e48d15efSToby Isaac case 7: 764*e48d15efSToby Isaac for (i=0; i<m; i++) { 765*e48d15efSToby Isaac v = aa + 49*ai[i]; 766*e48d15efSToby Isaac vi = aj + ai[i]; 767*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 768*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 769*e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 770*e48d15efSToby Isaac while (nz--) { 771*e48d15efSToby Isaac idx = 7*(*vi++); 772*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 773*e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 774*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 775*e48d15efSToby Isaac v += 49; 776*e48d15efSToby Isaac } 777*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 778*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 779*e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 780*e48d15efSToby Isaac idiag += 49; 781*e48d15efSToby Isaac i2 += 7; 782*e48d15efSToby Isaac } 783*e48d15efSToby Isaac break; 784*e48d15efSToby Isaac default: 785*e48d15efSToby Isaac for (i=0; i<m; i++) { 786*e48d15efSToby Isaac v = aa + bs2*ai[i]; 787*e48d15efSToby Isaac vi = aj + ai[i]; 788*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 789*e48d15efSToby Isaac 790*e48d15efSToby Isaac ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 791*e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 792*e48d15efSToby Isaac workt = work; 793*e48d15efSToby Isaac for (j=0; j<nz; j++) { 794*e48d15efSToby Isaac ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 795*e48d15efSToby Isaac workt += bs; 796*e48d15efSToby Isaac } 797*e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 798*e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 799*e48d15efSToby Isaac 800*e48d15efSToby Isaac idiag += bs2; 801*e48d15efSToby Isaac i2 += bs; 802*e48d15efSToby Isaac } 803*e48d15efSToby Isaac break; 804*e48d15efSToby Isaac } 805*e48d15efSToby Isaac ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 806*e48d15efSToby Isaac } 807*e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 808*e48d15efSToby Isaac idiag = a->idiag+bs2*(a->mbs-1); 809*e48d15efSToby Isaac i2 = bs * (m-1); 810*e48d15efSToby Isaac switch (bs) { 811*e48d15efSToby Isaac case 1: 812*e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 813*e48d15efSToby Isaac v = aa + ai[i]; 814*e48d15efSToby Isaac vi = aj + ai[i]; 815*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 816*e48d15efSToby Isaac s[0] = b[i2]; 817*e48d15efSToby Isaac for (j=0; j<nz; j++) { 818*e48d15efSToby Isaac xw[0] = x[vi[j]]; 819*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 820*e48d15efSToby Isaac } 821*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 822*e48d15efSToby Isaac x[i2] += xw[0]; 823*e48d15efSToby Isaac idiag -= 1; 824*e48d15efSToby Isaac i2 -= 1; 825*e48d15efSToby Isaac } 826*e48d15efSToby Isaac break; 827*e48d15efSToby Isaac case 2: 828*e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 829*e48d15efSToby Isaac v = aa + 4*ai[i]; 830*e48d15efSToby Isaac vi = aj + ai[i]; 831*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 832*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; 833*e48d15efSToby Isaac for (j=0; j<nz; j++) { 834*e48d15efSToby Isaac idx = 2*vi[j]; 835*e48d15efSToby Isaac it = 4*j; 836*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; 837*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 838*e48d15efSToby Isaac } 839*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 840*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; 841*e48d15efSToby Isaac idiag -= 4; 842*e48d15efSToby Isaac i2 -= 2; 843*e48d15efSToby Isaac } 844*e48d15efSToby Isaac break; 845*e48d15efSToby Isaac case 3: 846*e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 847*e48d15efSToby Isaac v = aa + 9*ai[i]; 848*e48d15efSToby Isaac vi = aj + ai[i]; 849*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 850*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 851*e48d15efSToby Isaac while (nz--) { 852*e48d15efSToby Isaac idx = 3*(*vi++); 853*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 854*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 855*e48d15efSToby Isaac v += 9; 856*e48d15efSToby Isaac } 857*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 858*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859*e48d15efSToby Isaac idiag -= 9; 860*e48d15efSToby Isaac i2 -= 3; 861*e48d15efSToby Isaac } 862*e48d15efSToby Isaac break; 863*e48d15efSToby Isaac case 4: 864*e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 865*e48d15efSToby Isaac v = aa + 16*ai[i]; 866*e48d15efSToby Isaac vi = aj + ai[i]; 867*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 868*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 869*e48d15efSToby Isaac while (nz--) { 870*e48d15efSToby Isaac idx = 4*(*vi++); 871*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 872*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 873*e48d15efSToby Isaac v += 16; 874*e48d15efSToby Isaac } 875*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 876*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 877*e48d15efSToby Isaac idiag -= 16; 878*e48d15efSToby Isaac i2 -= 4; 879*e48d15efSToby Isaac } 880*e48d15efSToby Isaac break; 881*e48d15efSToby Isaac case 5: 882*e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 883*e48d15efSToby Isaac v = aa + 25*ai[i]; 884*e48d15efSToby Isaac vi = aj + ai[i]; 885*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 886*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 887*e48d15efSToby Isaac while (nz--) { 888*e48d15efSToby Isaac idx = 5*(*vi++); 889*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 890*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 891*e48d15efSToby Isaac v += 25; 892*e48d15efSToby Isaac } 893*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 894*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 895*e48d15efSToby Isaac idiag -= 25; 896*e48d15efSToby Isaac i2 -= 5; 897*e48d15efSToby Isaac } 898*e48d15efSToby Isaac break; 899*e48d15efSToby Isaac case 6: 900*e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 901*e48d15efSToby Isaac v = aa + 36*ai[i]; 902*e48d15efSToby Isaac vi = aj + ai[i]; 903*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 904*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 905*e48d15efSToby Isaac while (nz--) { 906*e48d15efSToby Isaac idx = 6*(*vi++); 907*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 908*e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 909*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 910*e48d15efSToby Isaac v += 36; 911*e48d15efSToby Isaac } 912*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 913*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 914*e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 915*e48d15efSToby Isaac idiag -= 36; 916*e48d15efSToby Isaac i2 -= 6; 917*e48d15efSToby Isaac } 918*e48d15efSToby Isaac break; 919*e48d15efSToby Isaac case 7: 920*e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 921*e48d15efSToby Isaac v = aa + 49*ai[i]; 922*e48d15efSToby Isaac vi = aj + ai[i]; 923*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 924*e48d15efSToby Isaac s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 925*e48d15efSToby Isaac s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 926*e48d15efSToby Isaac while (nz--) { 927*e48d15efSToby Isaac idx = 7*(*vi++); 928*e48d15efSToby Isaac xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 929*e48d15efSToby Isaac xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 930*e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 931*e48d15efSToby Isaac v += 49; 932*e48d15efSToby Isaac } 933*e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 934*e48d15efSToby Isaac x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 935*e48d15efSToby Isaac x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 936*e48d15efSToby Isaac idiag -= 49; 937*e48d15efSToby Isaac i2 -= 7; 938*e48d15efSToby Isaac } 939*e48d15efSToby Isaac break; 940*e48d15efSToby Isaac default: 941*e48d15efSToby Isaac for (i=m-1; i>=0; i--) { 942*e48d15efSToby Isaac v = aa + bs2*ai[i]; 943*e48d15efSToby Isaac vi = aj + ai[i]; 944*e48d15efSToby Isaac nz = ai[i+1] - ai[i]; 945*e48d15efSToby Isaac 946*e48d15efSToby Isaac ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 947*e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 948*e48d15efSToby Isaac workt = work; 949*e48d15efSToby Isaac for (j=0; j<nz; j++) { 950*e48d15efSToby Isaac ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 951*e48d15efSToby Isaac workt += bs; 952*e48d15efSToby Isaac } 953*e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 954*e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 955*e48d15efSToby Isaac 956*e48d15efSToby Isaac idiag -= bs2; 957*e48d15efSToby Isaac i2 -= bs; 958*e48d15efSToby Isaac } 959*e48d15efSToby Isaac break; 960*e48d15efSToby Isaac } 961*e48d15efSToby Isaac ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 962*e48d15efSToby Isaac } 963*e48d15efSToby Isaac } 964de80f912SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 965de80f912SBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 966de80f912SBarry Smith PetscFunctionReturn(0); 967de80f912SBarry Smith } 968de80f912SBarry Smith 969*e48d15efSToby Isaac 970af674e45SBarry Smith /* 97181824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 972af674e45SBarry Smith */ 973af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 974af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 975af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 976af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 977af674e45SBarry Smith #endif 978af674e45SBarry Smith 979af674e45SBarry Smith #undef __FUNCT__ 980af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 9818cc058d9SJed Brown PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 982af674e45SBarry Smith { 983af674e45SBarry Smith Mat A = *AA; 984af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 985c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 986c1ac3661SBarry Smith PetscInt *ai =a->i,*ailen=a->ilen; 98717ec6a02SBarry Smith PetscInt *aj =a->j,stepval,lastcol = -1; 988f15d580aSBarry Smith const PetscScalar *value = v; 9894bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 990af674e45SBarry Smith 991af674e45SBarry Smith PetscFunctionBegin; 992ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 993af674e45SBarry Smith stepval = (n-1)*4; 994af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 995af674e45SBarry Smith row = im[k]; 996af674e45SBarry Smith rp = aj + ai[row]; 997af674e45SBarry Smith ap = aa + 16*ai[row]; 998af674e45SBarry Smith nrow = ailen[row]; 999af674e45SBarry Smith low = 0; 100017ec6a02SBarry Smith high = nrow; 1001af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 1002af674e45SBarry Smith col = in[l]; 1003db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1004db4deed7SKarl Rupp else high = nrow; 100517ec6a02SBarry Smith lastcol = col; 10061e3347e8SBarry Smith value = v + k*(stepval+4 + l)*4; 1007af674e45SBarry Smith while (high-low > 7) { 1008af674e45SBarry Smith t = (low+high)/2; 1009af674e45SBarry Smith if (rp[t] > col) high = t; 1010af674e45SBarry Smith else low = t; 1011af674e45SBarry Smith } 1012af674e45SBarry Smith for (i=low; i<high; i++) { 1013af674e45SBarry Smith if (rp[i] > col) break; 1014af674e45SBarry Smith if (rp[i] == col) { 1015af674e45SBarry Smith bap = ap + 16*i; 1016af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 1017af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 1018af674e45SBarry Smith bap[jj] += *value++; 1019af674e45SBarry Smith } 1020af674e45SBarry Smith } 1021af674e45SBarry Smith goto noinsert2; 1022af674e45SBarry Smith } 1023af674e45SBarry Smith } 1024af674e45SBarry Smith N = nrow++ - 1; 102517ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 1026af674e45SBarry Smith /* shift up all the later entries in this row */ 1027af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 1028af674e45SBarry Smith rp[ii+1] = rp[ii]; 1029a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1030af674e45SBarry Smith } 1031af674e45SBarry Smith if (N >= i) { 1032a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1033af674e45SBarry Smith } 1034af674e45SBarry Smith rp[i] = col; 1035af674e45SBarry Smith bap = ap + 16*i; 1036af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 1037af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 1038af674e45SBarry Smith bap[jj] = *value++; 1039af674e45SBarry Smith } 1040af674e45SBarry Smith } 1041af674e45SBarry Smith noinsert2:; 1042af674e45SBarry Smith low = i; 1043af674e45SBarry Smith } 1044af674e45SBarry Smith ailen[row] = nrow; 1045af674e45SBarry Smith } 1046be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1047af674e45SBarry Smith } 1048af674e45SBarry Smith 1049af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1050af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1051af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1052af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1053af674e45SBarry Smith #endif 1054af674e45SBarry Smith 1055af674e45SBarry Smith #undef __FUNCT__ 1056af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 10578cc058d9SJed Brown PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1058af674e45SBarry Smith { 1059af674e45SBarry Smith Mat A = *AA; 1060af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1061c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1062c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 1063c1ac3661SBarry Smith PetscInt *aj=a->j,brow,bcol; 106417ec6a02SBarry Smith PetscInt ridx,cidx,lastcol = -1; 1065af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 1066af674e45SBarry Smith 1067af674e45SBarry Smith PetscFunctionBegin; 1068af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 1069af674e45SBarry Smith row = im[k]; brow = row/4; 1070af674e45SBarry Smith rp = aj + ai[brow]; 1071af674e45SBarry Smith ap = aa + 16*ai[brow]; 1072af674e45SBarry Smith nrow = ailen[brow]; 1073af674e45SBarry Smith low = 0; 107417ec6a02SBarry Smith high = nrow; 1075af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 1076af674e45SBarry Smith col = in[l]; bcol = col/4; 1077af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 1078af674e45SBarry Smith value = v[l + k*n]; 1079db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1080db4deed7SKarl Rupp else high = nrow; 108117ec6a02SBarry Smith lastcol = col; 1082af674e45SBarry Smith while (high-low > 7) { 1083af674e45SBarry Smith t = (low+high)/2; 1084af674e45SBarry Smith if (rp[t] > bcol) high = t; 1085af674e45SBarry Smith else low = t; 1086af674e45SBarry Smith } 1087af674e45SBarry Smith for (i=low; i<high; i++) { 1088af674e45SBarry Smith if (rp[i] > bcol) break; 1089af674e45SBarry Smith if (rp[i] == bcol) { 1090af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 1091af674e45SBarry Smith *bap += value; 1092af674e45SBarry Smith goto noinsert1; 1093af674e45SBarry Smith } 1094af674e45SBarry Smith } 1095af674e45SBarry Smith N = nrow++ - 1; 109617ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1097af674e45SBarry Smith /* shift up all the later entries in this row */ 1098af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 1099af674e45SBarry Smith rp[ii+1] = rp[ii]; 1100a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1101af674e45SBarry Smith } 1102af674e45SBarry Smith if (N>=i) { 1103a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1104af674e45SBarry Smith } 1105af674e45SBarry Smith rp[i] = bcol; 1106af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 1107af674e45SBarry Smith noinsert1:; 1108af674e45SBarry Smith low = i; 1109af674e45SBarry Smith } 1110af674e45SBarry Smith ailen[brow] = nrow; 1111af674e45SBarry Smith } 1112be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1113af674e45SBarry Smith } 1114af674e45SBarry Smith 1115be5855fcSBarry Smith /* 1116be5855fcSBarry Smith Checks for missing diagonals 1117be5855fcSBarry Smith */ 11184a2ae208SSatish Balay #undef __FUNCT__ 11194a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1120ace3abfcSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1121be5855fcSBarry Smith { 1122be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11236849ba73SBarry Smith PetscErrorCode ierr; 1124c1ac3661SBarry Smith PetscInt *diag,*jj = a->j,i; 1125be5855fcSBarry Smith 1126be5855fcSBarry Smith PetscFunctionBegin; 1127c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 11282af78befSBarry Smith *missing = PETSC_FALSE; 11292efa7f71SHong Zhang if (A->rmap->n > 0 && !jj) { 11302efa7f71SHong Zhang *missing = PETSC_TRUE; 11312efa7f71SHong Zhang if (d) *d = 0; 1132358d2f5dSShri Abhyankar PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 11332efa7f71SHong Zhang } else { 1134883fce79SBarry Smith diag = a->diag; 11350e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 1136be5855fcSBarry Smith if (jj[diag[i]] != i) { 11372af78befSBarry Smith *missing = PETSC_TRUE; 11382af78befSBarry Smith if (d) *d = i; 11392efa7f71SHong Zhang PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1140358d2f5dSShri Abhyankar break; 11412efa7f71SHong Zhang } 1142be5855fcSBarry Smith } 1143be5855fcSBarry Smith } 1144be5855fcSBarry Smith PetscFunctionReturn(0); 1145be5855fcSBarry Smith } 1146be5855fcSBarry Smith 11474a2ae208SSatish Balay #undef __FUNCT__ 11484a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1149dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1150de6a44a3SBarry Smith { 1151de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11526849ba73SBarry Smith PetscErrorCode ierr; 115309f38230SBarry Smith PetscInt i,j,m = a->mbs; 1154de6a44a3SBarry Smith 11553a40ed3dSBarry Smith PetscFunctionBegin; 115609f38230SBarry Smith if (!a->diag) { 115709f38230SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 11584fd072dbSBarry Smith ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 11594fd072dbSBarry Smith a->free_diag = PETSC_TRUE; 116009f38230SBarry Smith } 11617fc0212eSBarry Smith for (i=0; i<m; i++) { 116209f38230SBarry Smith a->diag[i] = a->i[i+1]; 1163de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 1164de6a44a3SBarry Smith if (a->j[j] == i) { 116509f38230SBarry Smith a->diag[i] = j; 1166de6a44a3SBarry Smith break; 1167de6a44a3SBarry Smith } 1168de6a44a3SBarry Smith } 1169de6a44a3SBarry Smith } 11703a40ed3dSBarry Smith PetscFunctionReturn(0); 1171de6a44a3SBarry Smith } 11722593348eSBarry Smith 11732593348eSBarry Smith 11744a2ae208SSatish Balay #undef __FUNCT__ 11754a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 11761a83f524SJed Brown static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 11773b2fbd54SBarry Smith { 11783b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1179dfbe8321SBarry Smith PetscErrorCode ierr; 11801a83f524SJed Brown PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 11811a83f524SJed Brown PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 11823b2fbd54SBarry Smith 11833a40ed3dSBarry Smith PetscFunctionBegin; 11843b2fbd54SBarry Smith *nn = n; 11853a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 11863b2fbd54SBarry Smith if (symmetric) { 11878f7157efSSatish Balay ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1188553b3c51SBarry Smith nz = tia[n]; 11893b2fbd54SBarry Smith } else { 11908f7157efSSatish Balay tia = a->i; tja = a->j; 11913b2fbd54SBarry Smith } 11923b2fbd54SBarry Smith 1193ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1194ecc77c7aSBarry Smith (*nn) *= bs; 11958f7157efSSatish Balay /* malloc & create the natural set of indices */ 1196ecc77c7aSBarry Smith ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 11979985e31cSBarry Smith if (n) { 1198ecc77c7aSBarry Smith (*ia)[0] = 0; 1199ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1200ecc77c7aSBarry Smith (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1201ecc77c7aSBarry Smith } 12029985e31cSBarry Smith } 1203ecc77c7aSBarry Smith 1204ecc77c7aSBarry Smith for (i=1; i<n; i++) { 1205ecc77c7aSBarry Smith (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1206ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1207ecc77c7aSBarry Smith (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 12088f7157efSSatish Balay } 12098f7157efSSatish Balay } 12109985e31cSBarry Smith if (n) { 1211ecc77c7aSBarry Smith (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 12129985e31cSBarry Smith } 1213ecc77c7aSBarry Smith 12141a83f524SJed Brown if (inja) { 12159985e31cSBarry Smith ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 12169985e31cSBarry Smith cnt = 0; 12179985e31cSBarry Smith for (i=0; i<n; i++) { 12189985e31cSBarry Smith for (j=0; j<bs; j++) { 12199985e31cSBarry Smith for (k=tia[i]; k<tia[i+1]; k++) { 12209985e31cSBarry Smith for (l=0; l<bs; l++) { 12219985e31cSBarry Smith (*ja)[cnt++] = bs*tja[k] + l; 12229985e31cSBarry Smith } 12239985e31cSBarry Smith } 12249985e31cSBarry Smith } 12259985e31cSBarry Smith } 12269985e31cSBarry Smith } 12279985e31cSBarry Smith 12288f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 12298f7157efSSatish Balay ierr = PetscFree(tia);CHKERRQ(ierr); 12308f7157efSSatish Balay ierr = PetscFree(tja);CHKERRQ(ierr); 12318f7157efSSatish Balay } 1232f6d58c54SBarry Smith } else if (oshift == 1) { 1233715a17b5SBarry Smith if (symmetric) { 1234a2ea699eSBarry Smith nz = tia[A->rmap->n/bs]; 1235715a17b5SBarry Smith /* add 1 to i and j indices */ 1236715a17b5SBarry Smith for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1237715a17b5SBarry Smith *ia = tia; 1238715a17b5SBarry Smith if (ja) { 1239715a17b5SBarry Smith for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1240715a17b5SBarry Smith *ja = tja; 1241715a17b5SBarry Smith } 1242715a17b5SBarry Smith } else { 1243a2ea699eSBarry Smith nz = a->i[A->rmap->n/bs]; 1244f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 1245f6d58c54SBarry Smith ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1246f6d58c54SBarry Smith for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1247f6d58c54SBarry Smith if (ja) { 1248f6d58c54SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1249f6d58c54SBarry Smith for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1250f6d58c54SBarry Smith } 1251715a17b5SBarry Smith } 12528f7157efSSatish Balay } else { 12538f7157efSSatish Balay *ia = tia; 1254ecc77c7aSBarry Smith if (ja) *ja = tja; 12558f7157efSSatish Balay } 12563a40ed3dSBarry Smith PetscFunctionReturn(0); 12573b2fbd54SBarry Smith } 12583b2fbd54SBarry Smith 12594a2ae208SSatish Balay #undef __FUNCT__ 12604a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 12611a83f524SJed Brown static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 12623b2fbd54SBarry Smith { 12636849ba73SBarry Smith PetscErrorCode ierr; 12643b2fbd54SBarry Smith 12653a40ed3dSBarry Smith PetscFunctionBegin; 12663a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 1267715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1268606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 12699985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 12703b2fbd54SBarry Smith } 12713a40ed3dSBarry Smith PetscFunctionReturn(0); 12723b2fbd54SBarry Smith } 12733b2fbd54SBarry Smith 12744a2ae208SSatish Balay #undef __FUNCT__ 12754a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 1276dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 12772d61bbb3SSatish Balay { 12782d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1279dfbe8321SBarry Smith PetscErrorCode ierr; 12802d61bbb3SSatish Balay 1281433994e6SBarry Smith PetscFunctionBegin; 1282aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1283d0f46423SBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 12842d61bbb3SSatish Balay #endif 1285e6b907acSBarry Smith ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 12866bf464f9SBarry Smith ierr = ISDestroy(&a->row);CHKERRQ(ierr); 12876bf464f9SBarry Smith ierr = ISDestroy(&a->col);CHKERRQ(ierr); 12884fd072dbSBarry Smith if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 128905b42c5fSBarry Smith ierr = PetscFree(a->idiag);CHKERRQ(ierr); 12904fd072dbSBarry Smith if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 129105b42c5fSBarry Smith ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 129205b42c5fSBarry Smith ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1293de80f912SBarry Smith ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 12946bf464f9SBarry Smith ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 129505b42c5fSBarry Smith ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 129605b42c5fSBarry Smith ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1297cd6b891eSBarry Smith ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1298c4319e64SHong Zhang 12996bf464f9SBarry Smith ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 13006bf464f9SBarry Smith ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1301bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 1302901853e0SKris Buschelman 1303dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 13040298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr); 13050298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 13060298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 13070298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr); 13080298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr); 13090298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr); 13100298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 13110298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 13120298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr); 13130298fd71SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 13142d61bbb3SSatish Balay PetscFunctionReturn(0); 13152d61bbb3SSatish Balay } 13162d61bbb3SSatish Balay 13174a2ae208SSatish Balay #undef __FUNCT__ 13184a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 1319ace3abfcSBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 13202d61bbb3SSatish Balay { 13212d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132263ba0a88SBarry Smith PetscErrorCode ierr; 13232d61bbb3SSatish Balay 13242d61bbb3SSatish Balay PetscFunctionBegin; 1325aa275fccSKris Buschelman switch (op) { 1326aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 13274e0d8c25SBarry Smith a->roworiented = flg; 1328aa275fccSKris Buschelman break; 1329a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1330a9817697SBarry Smith a->keepnonzeropattern = flg; 1331aa275fccSKris Buschelman break; 1332512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1333512a5fc5SBarry Smith a->nonew = (flg ? 0 : 1); 1334aa275fccSKris Buschelman break; 1335aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 13364e0d8c25SBarry Smith a->nonew = (flg ? -1 : 0); 1337aa275fccSKris Buschelman break; 1338aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 13394e0d8c25SBarry Smith a->nonew = (flg ? -2 : 0); 1340aa275fccSKris Buschelman break; 134128b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 134228b2fa4aSMatthew Knepley a->nounused = (flg ? -1 : 0); 134328b2fa4aSMatthew Knepley break; 1344cd6b891eSBarry Smith case MAT_CHECK_COMPRESSED_ROW: 1345cd6b891eSBarry Smith a->compressedrow.check = flg; 1346cd6b891eSBarry Smith break; 13474e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1348aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1349aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 1350290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1351aa275fccSKris Buschelman break; 13525021d80fSJed Brown case MAT_SPD: 135377e54ba9SKris Buschelman case MAT_SYMMETRIC: 135477e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 13559a4540c5SBarry Smith case MAT_HERMITIAN: 13569a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 13575021d80fSJed Brown /* These options are handled directly by MatSetOption() */ 135877e54ba9SKris Buschelman break; 1359aa275fccSKris Buschelman default: 1360e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 13612d61bbb3SSatish Balay } 13622d61bbb3SSatish Balay PetscFunctionReturn(0); 13632d61bbb3SSatish Balay } 13642d61bbb3SSatish Balay 13654a2ae208SSatish Balay #undef __FUNCT__ 13664a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 1367c1ac3661SBarry Smith PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 13682d61bbb3SSatish Balay { 13692d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 13706849ba73SBarry Smith PetscErrorCode ierr; 1371c1ac3661SBarry Smith PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 13723f1db9ecSBarry Smith MatScalar *aa,*aa_i; 137387828ca2SBarry Smith PetscScalar *v_i; 13742d61bbb3SSatish Balay 13752d61bbb3SSatish Balay PetscFunctionBegin; 1376d0f46423SBarry Smith bs = A->rmap->bs; 13772d61bbb3SSatish Balay ai = a->i; 13782d61bbb3SSatish Balay aj = a->j; 13792d61bbb3SSatish Balay aa = a->a; 13802d61bbb3SSatish Balay bs2 = a->bs2; 13812d61bbb3SSatish Balay 1382e32f2f54SBarry Smith if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 13832d61bbb3SSatish Balay 13842d61bbb3SSatish Balay bn = row/bs; /* Block number */ 13852d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 13862d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 13872d61bbb3SSatish Balay *nz = bs*M; 13882d61bbb3SSatish Balay 13892d61bbb3SSatish Balay if (v) { 13902d61bbb3SSatish Balay *v = 0; 13912d61bbb3SSatish Balay if (*nz) { 139287828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 13932d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13942d61bbb3SSatish Balay v_i = *v + i*bs; 13952d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 139626fbe8dcSKarl Rupp for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 13972d61bbb3SSatish Balay } 13982d61bbb3SSatish Balay } 13992d61bbb3SSatish Balay } 14002d61bbb3SSatish Balay 14012d61bbb3SSatish Balay if (idx) { 14022d61bbb3SSatish Balay *idx = 0; 14032d61bbb3SSatish Balay if (*nz) { 1404c1ac3661SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 14052d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 14062d61bbb3SSatish Balay idx_i = *idx + i*bs; 14072d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 140826fbe8dcSKarl Rupp for (j=0; j<bs; j++) idx_i[j] = itmp++; 14092d61bbb3SSatish Balay } 14102d61bbb3SSatish Balay } 14112d61bbb3SSatish Balay } 14122d61bbb3SSatish Balay PetscFunctionReturn(0); 14132d61bbb3SSatish Balay } 14142d61bbb3SSatish Balay 14154a2ae208SSatish Balay #undef __FUNCT__ 14164a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1417c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 14182d61bbb3SSatish Balay { 1419dfbe8321SBarry Smith PetscErrorCode ierr; 1420606d414cSSatish Balay 14212d61bbb3SSatish Balay PetscFunctionBegin; 142205b42c5fSBarry Smith if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 142305b42c5fSBarry Smith if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 14242d61bbb3SSatish Balay PetscFunctionReturn(0); 14252d61bbb3SSatish Balay } 14262d61bbb3SSatish Balay 1427fca92195SBarry Smith extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1428fca92195SBarry Smith 14294a2ae208SSatish Balay #undef __FUNCT__ 14304a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 1431fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 14322d61bbb3SSatish Balay { 14332d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 14342d61bbb3SSatish Balay Mat C; 14356849ba73SBarry Smith PetscErrorCode ierr; 1436d0f46423SBarry Smith PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1437c1ac3661SBarry Smith PetscInt *rows,*cols,bs2=a->bs2; 1438dd6ea824SBarry Smith MatScalar *array; 14392d61bbb3SSatish Balay 14402d61bbb3SSatish Balay PetscFunctionBegin; 1441e32f2f54SBarry Smith if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1442fc4dec0aSBarry Smith if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1443c1ac3661SBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1444c1ac3661SBarry Smith ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 14452d61bbb3SSatish Balay 14462d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1447ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1448d0f46423SBarry Smith ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 14497adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1450ecd8bba6SJed Brown ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1451606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 1452fc4dec0aSBarry Smith } else { 1453fc4dec0aSBarry Smith C = *B; 1454fc4dec0aSBarry Smith } 1455fc4dec0aSBarry Smith 1456fc4dec0aSBarry Smith array = a->a; 1457fca92195SBarry Smith ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 14582d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 14592d61bbb3SSatish Balay cols[0] = i*bs; 14602d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 14612d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 14622d61bbb3SSatish Balay for (j=0; j<len; j++) { 14632d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 14642d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1465fca92195SBarry Smith ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 14662d61bbb3SSatish Balay array += bs2; 14672d61bbb3SSatish Balay } 14682d61bbb3SSatish Balay } 1469fca92195SBarry Smith ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 14702d61bbb3SSatish Balay 14712d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14722d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14732d61bbb3SSatish Balay 1474815cbec1SBarry Smith if (reuse == MAT_INITIAL_MATRIX || *B != A) { 14752d61bbb3SSatish Balay *B = C; 14762d61bbb3SSatish Balay } else { 1477eb6b5d47SBarry Smith ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 14782d61bbb3SSatish Balay } 14792d61bbb3SSatish Balay PetscFunctionReturn(0); 14802d61bbb3SSatish Balay } 14812d61bbb3SSatish Balay 1482453d3561SHong Zhang #undef __FUNCT__ 1483453d3561SHong Zhang #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1484453d3561SHong Zhang PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1485453d3561SHong Zhang { 1486453d3561SHong Zhang PetscErrorCode ierr; 1487453d3561SHong Zhang Mat Btrans; 1488453d3561SHong Zhang 1489453d3561SHong Zhang PetscFunctionBegin; 1490453d3561SHong Zhang *f = PETSC_FALSE; 1491453d3561SHong Zhang ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1492453d3561SHong Zhang ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1493453d3561SHong Zhang ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1494453d3561SHong Zhang PetscFunctionReturn(0); 1495453d3561SHong Zhang } 1496453d3561SHong Zhang 14974a2ae208SSatish Balay #undef __FUNCT__ 14984a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 14996849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 15002593348eSBarry Smith { 1501b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 15026849ba73SBarry Smith PetscErrorCode ierr; 1503d0f46423SBarry Smith PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1504b24ad042SBarry Smith int fd; 150587828ca2SBarry Smith PetscScalar *aa; 1506ce6f0cecSBarry Smith FILE *file; 15072593348eSBarry Smith 15083a40ed3dSBarry Smith PetscFunctionBegin; 1509b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1510d0f46423SBarry Smith ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 15110700a824SBarry Smith col_lens[0] = MAT_FILE_CLASSID; 15123b2fbd54SBarry Smith 1513d0f46423SBarry Smith col_lens[1] = A->rmap->N; 1514d0f46423SBarry Smith col_lens[2] = A->cmap->n; 15157e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 15162593348eSBarry Smith 15172593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 1518b6490206SBarry Smith count = 0; 1519b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1520b6490206SBarry Smith for (j=0; j<bs; j++) { 1521b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1522b6490206SBarry Smith } 15232593348eSBarry Smith } 1524d0f46423SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1525606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 15262593348eSBarry Smith 15272593348eSBarry Smith /* store column indices (zero start index) */ 1528c1ac3661SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1529b6490206SBarry Smith count = 0; 1530b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1531b6490206SBarry Smith for (j=0; j<bs; j++) { 1532b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1533b6490206SBarry Smith for (l=0; l<bs; l++) { 1534b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 15352593348eSBarry Smith } 15362593348eSBarry Smith } 1537b6490206SBarry Smith } 1538b6490206SBarry Smith } 15396f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1540606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 15412593348eSBarry Smith 15422593348eSBarry Smith /* store nonzero values */ 154387828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1544b6490206SBarry Smith count = 0; 1545b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1546b6490206SBarry Smith for (j=0; j<bs; j++) { 1547b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1548b6490206SBarry Smith for (l=0; l<bs; l++) { 15497e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 1550b6490206SBarry Smith } 1551b6490206SBarry Smith } 1552b6490206SBarry Smith } 1553b6490206SBarry Smith } 15546f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1555606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1556ce6f0cecSBarry Smith 1557b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1558ce6f0cecSBarry Smith if (file) { 1559d0f46423SBarry Smith fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1560ce6f0cecSBarry Smith } 15613a40ed3dSBarry Smith PetscFunctionReturn(0); 15622593348eSBarry Smith } 15632593348eSBarry Smith 15644a2ae208SSatish Balay #undef __FUNCT__ 15654a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 15666849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 15672593348eSBarry Smith { 1568b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1569dfbe8321SBarry Smith PetscErrorCode ierr; 1570d0f46423SBarry Smith PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1571f3ef73ceSBarry Smith PetscViewerFormat format; 15722593348eSBarry Smith 15733a40ed3dSBarry Smith PetscFunctionBegin; 1574b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1575456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 157677431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1577fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1578bcd9e38bSBarry Smith Mat aij; 1579ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1580bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 15816bf464f9SBarry Smith ierr = MatDestroy(&aij);CHKERRQ(ierr); 158204929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 158304929863SHong Zhang PetscFunctionReturn(0); 1584fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1585d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 15867566de4bSShri Abhyankar ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 158744cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 158844cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 158977431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 159044cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 159144cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 1592aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 15930e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1594a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 15950e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15960e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1597a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 15980e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15990e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1600a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 16010ef38995SBarry Smith } 160244cd7ae7SLois Curfman McInnes #else 16030ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 1604a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 16050ef38995SBarry Smith } 160644cd7ae7SLois Curfman McInnes #endif 160744cd7ae7SLois Curfman McInnes } 160844cd7ae7SLois Curfman McInnes } 1609b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 161044cd7ae7SLois Curfman McInnes } 161144cd7ae7SLois Curfman McInnes } 1612d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 16130ef38995SBarry Smith } else { 1614d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 16157566de4bSShri Abhyankar ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1616b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1617b6490206SBarry Smith for (j=0; j<bs; j++) { 161877431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1619b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1620b6490206SBarry Smith for (l=0; l<bs; l++) { 1621aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 16220e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1623a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 16240e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 16250e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1626a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 16270e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 16280ef38995SBarry Smith } else { 1629a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 163088685aaeSLois Curfman McInnes } 163188685aaeSLois Curfman McInnes #else 1632a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 163388685aaeSLois Curfman McInnes #endif 16342593348eSBarry Smith } 16352593348eSBarry Smith } 1636b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 16372593348eSBarry Smith } 16382593348eSBarry Smith } 1639d00279f6SBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1640b6490206SBarry Smith } 1641b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 16423a40ed3dSBarry Smith PetscFunctionReturn(0); 16432593348eSBarry Smith } 16442593348eSBarry Smith 16459804daf3SBarry Smith #include <petscdraw.h> 16464a2ae208SSatish Balay #undef __FUNCT__ 16474a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 16486849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 16493270192aSSatish Balay { 165077ed5343SBarry Smith Mat A = (Mat) Aa; 16513270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 16526849ba73SBarry Smith PetscErrorCode ierr; 1653d0f46423SBarry Smith PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 16540e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 16553f1db9ecSBarry Smith MatScalar *aa; 1656b0a32e0cSBarry Smith PetscViewer viewer; 1657b3e7f47fSJed Brown PetscViewerFormat format; 16583270192aSSatish Balay 16593a40ed3dSBarry Smith PetscFunctionBegin; 166077ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1661b3e7f47fSJed Brown ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 166277ed5343SBarry Smith 1663b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 166477ed5343SBarry Smith 16653270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1666b3e7f47fSJed Brown 1667b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1668b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 16693270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16703270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1671d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16723270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16733270192aSSatish Balay aa = a->a + j*bs2; 16743270192aSSatish Balay for (k=0; k<bs; k++) { 16753270192aSSatish Balay for (l=0; l<bs; l++) { 16760e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 1677b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16783270192aSSatish Balay } 16793270192aSSatish Balay } 16803270192aSSatish Balay } 16813270192aSSatish Balay } 1682b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 16833270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16843270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1685d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16863270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16873270192aSSatish Balay aa = a->a + j*bs2; 16883270192aSSatish Balay for (k=0; k<bs; k++) { 16893270192aSSatish Balay for (l=0; l<bs; l++) { 16900e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 1691b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16923270192aSSatish Balay } 16933270192aSSatish Balay } 16943270192aSSatish Balay } 16953270192aSSatish Balay } 1696b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 16973270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16983270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1699d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 17003270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 17013270192aSSatish Balay aa = a->a + j*bs2; 17023270192aSSatish Balay for (k=0; k<bs; k++) { 17033270192aSSatish Balay for (l=0; l<bs; l++) { 17040e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 1705b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 17063270192aSSatish Balay } 17073270192aSSatish Balay } 17083270192aSSatish Balay } 17093270192aSSatish Balay } 1710b3e7f47fSJed Brown } else { 1711b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 1712b3e7f47fSJed Brown /* first determine max of all nonzero values */ 1713b3e7f47fSJed Brown PetscDraw popup; 1714b3e7f47fSJed Brown PetscReal scale,maxv = 0.0; 1715b3e7f47fSJed Brown 1716b3e7f47fSJed Brown for (i=0; i<a->nz*a->bs2; i++) { 1717b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1718b3e7f47fSJed Brown } 1719b3e7f47fSJed Brown scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1720b3e7f47fSJed Brown ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 172126fbe8dcSKarl Rupp if (popup) { 172226fbe8dcSKarl Rupp ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 172326fbe8dcSKarl Rupp } 1724b3e7f47fSJed Brown for (i=0,row=0; i<mbs; i++,row+=bs) { 1725b3e7f47fSJed Brown for (j=a->i[i]; j<a->i[i+1]; j++) { 1726b3e7f47fSJed Brown y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1727b3e7f47fSJed Brown x_l = a->j[j]*bs; x_r = x_l + 1.0; 1728b3e7f47fSJed Brown aa = a->a + j*bs2; 1729b3e7f47fSJed Brown for (k=0; k<bs; k++) { 1730b3e7f47fSJed Brown for (l=0; l<bs; l++) { 1731b3e7f47fSJed Brown color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1732b3e7f47fSJed Brown ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1733b3e7f47fSJed Brown } 1734b3e7f47fSJed Brown } 1735b3e7f47fSJed Brown } 1736b3e7f47fSJed Brown } 1737b3e7f47fSJed Brown } 173877ed5343SBarry Smith PetscFunctionReturn(0); 173977ed5343SBarry Smith } 17403270192aSSatish Balay 17414a2ae208SSatish Balay #undef __FUNCT__ 17424a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 17436849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 174477ed5343SBarry Smith { 1745dfbe8321SBarry Smith PetscErrorCode ierr; 17460e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 1747b0a32e0cSBarry Smith PetscDraw draw; 1748ace3abfcSBarry Smith PetscBool isnull; 17493270192aSSatish Balay 175077ed5343SBarry Smith PetscFunctionBegin; 1751b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1752b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 175377ed5343SBarry Smith 175477ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1755d0f46423SBarry Smith xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 175677ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 1757b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1758b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 17590298fd71SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 17603a40ed3dSBarry Smith PetscFunctionReturn(0); 17613270192aSSatish Balay } 17623270192aSSatish Balay 17634a2ae208SSatish Balay #undef __FUNCT__ 17644a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 1765dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 17662593348eSBarry Smith { 1767dfbe8321SBarry Smith PetscErrorCode ierr; 1768ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw; 17692593348eSBarry Smith 17703a40ed3dSBarry Smith PetscFunctionBegin; 1771251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1772251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1773251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 177432077d6dSBarry Smith if (iascii) { 17753a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 17760f5bd95cSBarry Smith } else if (isbinary) { 17773a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 17780f5bd95cSBarry Smith } else if (isdraw) { 17793a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 17805cd90555SBarry Smith } else { 1781a5e6ed63SBarry Smith Mat B; 1782ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1783a5e6ed63SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 17846bf464f9SBarry Smith ierr = MatDestroy(&B);CHKERRQ(ierr); 17852593348eSBarry Smith } 17863a40ed3dSBarry Smith PetscFunctionReturn(0); 17872593348eSBarry Smith } 1788b6490206SBarry Smith 1789cd0e1443SSatish Balay 17904a2ae208SSatish Balay #undef __FUNCT__ 17914a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 1792c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1793cd0e1443SSatish Balay { 1794cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1795c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1796c1ac3661SBarry Smith PetscInt *ai = a->i,*ailen = a->ilen; 1797d0f46423SBarry Smith PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 179897e567efSBarry Smith MatScalar *ap,*aa = a->a; 1799cd0e1443SSatish Balay 18003a40ed3dSBarry Smith PetscFunctionBegin; 18012d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 1802cd0e1443SSatish Balay row = im[k]; brow = row/bs; 1803e32f2f54SBarry Smith if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1804e32f2f54SBarry Smith if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 18052d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 18062c3acbe9SBarry Smith nrow = ailen[brow]; 18072d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 1808e32f2f54SBarry Smith if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1809e32f2f54SBarry Smith if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 18102d61bbb3SSatish Balay col = in[l]; 18112d61bbb3SSatish Balay bcol = col/bs; 18122d61bbb3SSatish Balay cidx = col%bs; 18132d61bbb3SSatish Balay ridx = row%bs; 18142d61bbb3SSatish Balay high = nrow; 18152d61bbb3SSatish Balay low = 0; /* assume unsorted */ 18162d61bbb3SSatish Balay while (high-low > 5) { 1817cd0e1443SSatish Balay t = (low+high)/2; 1818cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 1819cd0e1443SSatish Balay else low = t; 1820cd0e1443SSatish Balay } 1821cd0e1443SSatish Balay for (i=low; i<high; i++) { 1822cd0e1443SSatish Balay if (rp[i] > bcol) break; 1823cd0e1443SSatish Balay if (rp[i] == bcol) { 18242d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 18252d61bbb3SSatish Balay goto finished; 1826cd0e1443SSatish Balay } 1827cd0e1443SSatish Balay } 182897e567efSBarry Smith *v++ = 0.0; 18292d61bbb3SSatish Balay finished:; 1830cd0e1443SSatish Balay } 1831cd0e1443SSatish Balay } 18323a40ed3dSBarry Smith PetscFunctionReturn(0); 1833cd0e1443SSatish Balay } 1834cd0e1443SSatish Balay 18354a2ae208SSatish Balay #undef __FUNCT__ 18364a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1837dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 183892c4ed94SBarry Smith { 183992c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1840e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1841c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 18426849ba73SBarry Smith PetscErrorCode ierr; 1843d0f46423SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1844ace3abfcSBarry Smith PetscBool roworiented=a->roworiented; 1845dd6ea824SBarry Smith const PetscScalar *value = v; 1846f15d580aSBarry Smith MatScalar *ap,*aa = a->a,*bap; 184792c4ed94SBarry Smith 18483a40ed3dSBarry Smith PetscFunctionBegin; 18490e324ae4SSatish Balay if (roworiented) { 18500e324ae4SSatish Balay stepval = (n-1)*bs; 18510e324ae4SSatish Balay } else { 18520e324ae4SSatish Balay stepval = (m-1)*bs; 18530e324ae4SSatish Balay } 185492c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 185592c4ed94SBarry Smith row = im[k]; 18565ef9f2a5SBarry Smith if (row < 0) continue; 18572515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 1858e32f2f54SBarry Smith if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 185992c4ed94SBarry Smith #endif 186092c4ed94SBarry Smith rp = aj + ai[row]; 186192c4ed94SBarry Smith ap = aa + bs2*ai[row]; 186292c4ed94SBarry Smith rmax = imax[row]; 186392c4ed94SBarry Smith nrow = ailen[row]; 186492c4ed94SBarry Smith low = 0; 1865c71e6ed7SBarry Smith high = nrow; 186692c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 18675ef9f2a5SBarry Smith if (in[l] < 0) continue; 18682515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 1869e32f2f54SBarry Smith if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 187092c4ed94SBarry Smith #endif 187192c4ed94SBarry Smith col = in[l]; 187292c4ed94SBarry Smith if (roworiented) { 187353ef36baSBarry Smith value = v + (k*(stepval+bs) + l)*bs; 18740e324ae4SSatish Balay } else { 187553ef36baSBarry Smith value = v + (l*(stepval+bs) + k)*bs; 187692c4ed94SBarry Smith } 187726fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 187826fbe8dcSKarl Rupp else high = nrow; 1879e2ee6c50SBarry Smith lastcol = col; 188092c4ed94SBarry Smith while (high-low > 7) { 188192c4ed94SBarry Smith t = (low+high)/2; 188292c4ed94SBarry Smith if (rp[t] > col) high = t; 188392c4ed94SBarry Smith else low = t; 188492c4ed94SBarry Smith } 188592c4ed94SBarry Smith for (i=low; i<high; i++) { 188692c4ed94SBarry Smith if (rp[i] > col) break; 188792c4ed94SBarry Smith if (rp[i] == col) { 18888a84c255SSatish Balay bap = ap + bs2*i; 18890e324ae4SSatish Balay if (roworiented) { 18908a84c255SSatish Balay if (is == ADD_VALUES) { 1891dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1892dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18938a84c255SSatish Balay bap[jj] += *value++; 1894dd9472c6SBarry Smith } 1895dd9472c6SBarry Smith } 18960e324ae4SSatish Balay } else { 1897dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1898dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18990e324ae4SSatish Balay bap[jj] = *value++; 19008a84c255SSatish Balay } 1901dd9472c6SBarry Smith } 1902dd9472c6SBarry Smith } 19030e324ae4SSatish Balay } else { 19040e324ae4SSatish Balay if (is == ADD_VALUES) { 190553ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1906dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 190753ef36baSBarry Smith bap[jj] += value[jj]; 1908dd9472c6SBarry Smith } 190953ef36baSBarry Smith bap += bs; 1910dd9472c6SBarry Smith } 19110e324ae4SSatish Balay } else { 191253ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1913dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 191453ef36baSBarry Smith bap[jj] = value[jj]; 19150e324ae4SSatish Balay } 191653ef36baSBarry Smith bap += bs; 19178a84c255SSatish Balay } 1918dd9472c6SBarry Smith } 1919dd9472c6SBarry Smith } 1920f1241b54SBarry Smith goto noinsert2; 192192c4ed94SBarry Smith } 192292c4ed94SBarry Smith } 192389280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 1924e32f2f54SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1925fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1926c03d1d03SSatish Balay N = nrow++ - 1; high++; 192792c4ed94SBarry Smith /* shift up all the later entries in this row */ 192892c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 192992c4ed94SBarry Smith rp[ii+1] = rp[ii]; 1930549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 193192c4ed94SBarry Smith } 1932549d3d68SSatish Balay if (N >= i) { 1933549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1934549d3d68SSatish Balay } 193592c4ed94SBarry Smith rp[i] = col; 19368a84c255SSatish Balay bap = ap + bs2*i; 19370e324ae4SSatish Balay if (roworiented) { 1938dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1939dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 19400e324ae4SSatish Balay bap[jj] = *value++; 1941dd9472c6SBarry Smith } 1942dd9472c6SBarry Smith } 19430e324ae4SSatish Balay } else { 1944dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1945dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 19460e324ae4SSatish Balay *bap++ = *value++; 19470e324ae4SSatish Balay } 1948dd9472c6SBarry Smith } 1949dd9472c6SBarry Smith } 1950f1241b54SBarry Smith noinsert2:; 195192c4ed94SBarry Smith low = i; 195292c4ed94SBarry Smith } 195392c4ed94SBarry Smith ailen[row] = nrow; 195492c4ed94SBarry Smith } 19553a40ed3dSBarry Smith PetscFunctionReturn(0); 195692c4ed94SBarry Smith } 195726e093fcSHong Zhang 19584a2ae208SSatish Balay #undef __FUNCT__ 19594a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1960dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1961584200bdSSatish Balay { 1962584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1963c1ac3661SBarry Smith PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1964d0f46423SBarry Smith PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 19656849ba73SBarry Smith PetscErrorCode ierr; 1966c1ac3661SBarry Smith PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 19673f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 19683447b6efSHong Zhang PetscReal ratio=0.6; 1969584200bdSSatish Balay 19703a40ed3dSBarry Smith PetscFunctionBegin; 19713a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1972584200bdSSatish Balay 197343ee02c3SBarry Smith if (m) rmax = ailen[0]; 1974584200bdSSatish Balay for (i=1; i<mbs; i++) { 1975584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1976584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1977d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1978584200bdSSatish Balay if (fshift) { 1979a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1980584200bdSSatish Balay N = ailen[i]; 1981584200bdSSatish Balay for (j=0; j<N; j++) { 1982584200bdSSatish Balay ip[j-fshift] = ip[j]; 198326fbe8dcSKarl Rupp 1984549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1985584200bdSSatish Balay } 1986584200bdSSatish Balay } 1987584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1988584200bdSSatish Balay } 1989584200bdSSatish Balay if (mbs) { 1990584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1991584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1992584200bdSSatish Balay } 1993584200bdSSatish Balay /* reset ilen and imax for each row */ 1994584200bdSSatish Balay for (i=0; i<mbs; i++) { 1995584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1996584200bdSSatish Balay } 1997a7c10996SSatish Balay a->nz = ai[mbs]; 1998584200bdSSatish Balay 1999584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 2000b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 2001584200bdSSatish Balay if (fshift && a->diag) { 2002606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 200352e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2004584200bdSSatish Balay a->diag = 0; 2005584200bdSSatish Balay } 200665e19b50SBarry Smith if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2007d0f46423SBarry Smith ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2008ae15b995SBarry Smith ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2009ae15b995SBarry Smith ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 201026fbe8dcSKarl Rupp 20118e58a170SBarry Smith A->info.mallocs += a->reallocs; 2012e2f3b5e9SSatish Balay a->reallocs = 0; 20130e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 2014cf4441caSHong Zhang 2015cd6b891eSBarry Smith ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 201626fbe8dcSKarl Rupp 201788e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 20183a40ed3dSBarry Smith PetscFunctionReturn(0); 2019584200bdSSatish Balay } 2020584200bdSSatish Balay 2021bea157c4SSatish Balay /* 2022bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 2023bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2024bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2025bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 2026bea157c4SSatish Balay */ 20274a2ae208SSatish Balay #undef __FUNCT__ 20284a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2029c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2030d9b7c43dSSatish Balay { 2031c1ac3661SBarry Smith PetscInt i,j,k,row; 2032ace3abfcSBarry Smith PetscBool flg; 20333a40ed3dSBarry Smith 2034433994e6SBarry Smith PetscFunctionBegin; 2035bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 2036bea157c4SSatish Balay row = idx[i]; 2037bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 2038bea157c4SSatish Balay sizes[j] = 1; 2039bea157c4SSatish Balay i++; 2040e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2041bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2042bea157c4SSatish Balay i++; 2043bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 2044bea157c4SSatish Balay flg = PETSC_TRUE; 2045bea157c4SSatish Balay for (k=1; k<bs; k++) { 2046bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 2047bea157c4SSatish Balay flg = PETSC_FALSE; 2048bea157c4SSatish Balay break; 2049d9b7c43dSSatish Balay } 2050bea157c4SSatish Balay } 2051abc0a331SBarry Smith if (flg) { /* No break in the bs */ 2052bea157c4SSatish Balay sizes[j] = bs; 2053bea157c4SSatish Balay i += bs; 2054bea157c4SSatish Balay } else { 2055bea157c4SSatish Balay sizes[j] = 1; 2056bea157c4SSatish Balay i++; 2057bea157c4SSatish Balay } 2058bea157c4SSatish Balay } 2059bea157c4SSatish Balay } 2060bea157c4SSatish Balay *bs_max = j; 20613a40ed3dSBarry Smith PetscFunctionReturn(0); 2062d9b7c43dSSatish Balay } 2063d9b7c43dSSatish Balay 20644a2ae208SSatish Balay #undef __FUNCT__ 20654a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 20662b40b63fSBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2067d9b7c43dSSatish Balay { 2068d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2069dfbe8321SBarry Smith PetscErrorCode ierr; 2070f4df32b1SMatthew Knepley PetscInt i,j,k,count,*rows; 2071d0f46423SBarry Smith PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 207287828ca2SBarry Smith PetscScalar zero = 0.0; 20733f1db9ecSBarry Smith MatScalar *aa; 207497b48c8fSBarry Smith const PetscScalar *xx; 207597b48c8fSBarry Smith PetscScalar *bb; 2076d9b7c43dSSatish Balay 20773a40ed3dSBarry Smith PetscFunctionBegin; 207897b48c8fSBarry Smith /* fix right hand side if needed */ 207997b48c8fSBarry Smith if (x && b) { 208097b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 208197b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 208297b48c8fSBarry Smith for (i=0; i<is_n; i++) { 208397b48c8fSBarry Smith bb[is_idx[i]] = diag*xx[is_idx[i]]; 208497b48c8fSBarry Smith } 208597b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 208697b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 208797b48c8fSBarry Smith } 208897b48c8fSBarry Smith 2089d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2090bea157c4SSatish Balay /* allocate memory for rows,sizes */ 2091fca92195SBarry Smith ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2092bea157c4SSatish Balay 2093563b5814SBarry Smith /* copy IS values to rows, and sort them */ 209426fbe8dcSKarl Rupp for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2095bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 209697b48c8fSBarry Smith 2097a9817697SBarry Smith if (baij->keepnonzeropattern) { 209826fbe8dcSKarl Rupp for (i=0; i<is_n; i++) sizes[i] = 1; 2099dffd3267SBarry Smith bs_max = is_n; 210088e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 2101dffd3267SBarry Smith } else { 2102bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 210326fbe8dcSKarl Rupp 210488e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 2105dffd3267SBarry Smith } 2106bea157c4SSatish Balay 2107bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2108bea157c4SSatish Balay row = rows[j]; 2109e32f2f54SBarry Smith if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2110bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2111b31fbe3bSSatish Balay aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2112a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2113d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2114bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 2115bea157c4SSatish Balay baij->ilen[row/bs] = 1; 2116bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 211726fbe8dcSKarl Rupp 2118bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2119a07cd24cSSatish Balay } 2120563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 2121bea157c4SSatish Balay for (k=0; k<bs; k++) { 2122f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2123bea157c4SSatish Balay } 2124f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2125bea157c4SSatish Balay baij->ilen[row/bs] = 0; 2126f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2127bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 2128aa482453SBarry Smith #if defined(PETSC_USE_DEBUG) 2129e32f2f54SBarry Smith if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2130bea157c4SSatish Balay #endif 2131bea157c4SSatish Balay for (k=0; k<count; k++) { 2132d9b7c43dSSatish Balay aa[0] = zero; 2133d9b7c43dSSatish Balay aa += bs; 2134d9b7c43dSSatish Balay } 2135d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2136f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2137d9b7c43dSSatish Balay } 2138d9b7c43dSSatish Balay } 2139bea157c4SSatish Balay } 2140bea157c4SSatish Balay 2141fca92195SBarry Smith ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 21429a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 21433a40ed3dSBarry Smith PetscFunctionReturn(0); 2144d9b7c43dSSatish Balay } 21451c351548SSatish Balay 21464a2ae208SSatish Balay #undef __FUNCT__ 214797b48c8fSBarry Smith #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 214897b48c8fSBarry Smith PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 214997b48c8fSBarry Smith { 215097b48c8fSBarry Smith Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 215197b48c8fSBarry Smith PetscErrorCode ierr; 215297b48c8fSBarry Smith PetscInt i,j,k,count; 215397b48c8fSBarry Smith PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 215497b48c8fSBarry Smith PetscScalar zero = 0.0; 215597b48c8fSBarry Smith MatScalar *aa; 215697b48c8fSBarry Smith const PetscScalar *xx; 215797b48c8fSBarry Smith PetscScalar *bb; 215856777dd2SBarry Smith PetscBool *zeroed,vecs = PETSC_FALSE; 215997b48c8fSBarry Smith 216097b48c8fSBarry Smith PetscFunctionBegin; 216197b48c8fSBarry Smith /* fix right hand side if needed */ 216297b48c8fSBarry Smith if (x && b) { 216397b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 216497b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 216556777dd2SBarry Smith vecs = PETSC_TRUE; 216697b48c8fSBarry Smith } 216797b48c8fSBarry Smith A->same_nonzero = PETSC_TRUE; 216897b48c8fSBarry Smith 216997b48c8fSBarry Smith /* zero the columns */ 217097b48c8fSBarry Smith ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 217197b48c8fSBarry Smith ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 217297b48c8fSBarry Smith for (i=0; i<is_n; i++) { 217397b48c8fSBarry Smith if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 217497b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 217597b48c8fSBarry Smith } 217697b48c8fSBarry Smith for (i=0; i<A->rmap->N; i++) { 217797b48c8fSBarry Smith if (!zeroed[i]) { 217897b48c8fSBarry Smith row = i/bs; 217997b48c8fSBarry Smith for (j=baij->i[row]; j<baij->i[row+1]; j++) { 218097b48c8fSBarry Smith for (k=0; k<bs; k++) { 218197b48c8fSBarry Smith col = bs*baij->j[j] + k; 218297b48c8fSBarry Smith if (zeroed[col]) { 218397b48c8fSBarry Smith aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 218456777dd2SBarry Smith if (vecs) bb[i] -= aa[0]*xx[col]; 218597b48c8fSBarry Smith aa[0] = 0.0; 218697b48c8fSBarry Smith } 218797b48c8fSBarry Smith } 218897b48c8fSBarry Smith } 218956777dd2SBarry Smith } else if (vecs) bb[i] = diag*xx[i]; 219097b48c8fSBarry Smith } 219197b48c8fSBarry Smith ierr = PetscFree(zeroed);CHKERRQ(ierr); 219256777dd2SBarry Smith if (vecs) { 219356777dd2SBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 219456777dd2SBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 219556777dd2SBarry Smith } 219697b48c8fSBarry Smith 219797b48c8fSBarry Smith /* zero the rows */ 219897b48c8fSBarry Smith for (i=0; i<is_n; i++) { 219997b48c8fSBarry Smith row = is_idx[i]; 220097b48c8fSBarry Smith count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 220197b48c8fSBarry Smith aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 220297b48c8fSBarry Smith for (k=0; k<count; k++) { 220397b48c8fSBarry Smith aa[0] = zero; 220497b48c8fSBarry Smith aa += bs; 220597b48c8fSBarry Smith } 2206d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 220797b48c8fSBarry Smith ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 220897b48c8fSBarry Smith } 220997b48c8fSBarry Smith } 221097b48c8fSBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 221197b48c8fSBarry Smith PetscFunctionReturn(0); 221297b48c8fSBarry Smith } 221397b48c8fSBarry Smith 221497b48c8fSBarry Smith #undef __FUNCT__ 22154a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 2216c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 22172d61bbb3SSatish Balay { 22182d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2219e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2220c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2221d0f46423SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 22226849ba73SBarry Smith PetscErrorCode ierr; 2223c1ac3661SBarry Smith PetscInt ridx,cidx,bs2=a->bs2; 2224ace3abfcSBarry Smith PetscBool roworiented=a->roworiented; 22253f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 22262d61bbb3SSatish Balay 22272d61bbb3SSatish Balay PetscFunctionBegin; 222871fd2e92SBarry Smith if (v) PetscValidScalarPointer(v,6); 22292d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 2230085a36d4SBarry Smith row = im[k]; 2231085a36d4SBarry Smith brow = row/bs; 22325ef9f2a5SBarry Smith if (row < 0) continue; 22332515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 2234e32f2f54SBarry Smith if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 22352d61bbb3SSatish Balay #endif 22362d61bbb3SSatish Balay rp = aj + ai[brow]; 22372d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 22382d61bbb3SSatish Balay rmax = imax[brow]; 22392d61bbb3SSatish Balay nrow = ailen[brow]; 22402d61bbb3SSatish Balay low = 0; 2241c71e6ed7SBarry Smith high = nrow; 22422d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 22435ef9f2a5SBarry Smith if (in[l] < 0) continue; 22442515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 2245e32f2f54SBarry Smith if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 22462d61bbb3SSatish Balay #endif 22472d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 22482d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 22492d61bbb3SSatish Balay if (roworiented) { 22505ef9f2a5SBarry Smith value = v[l + k*n]; 22512d61bbb3SSatish Balay } else { 22522d61bbb3SSatish Balay value = v[k + l*m]; 22532d61bbb3SSatish Balay } 22547cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 2255e2ee6c50SBarry Smith lastcol = col; 22562d61bbb3SSatish Balay while (high-low > 7) { 22572d61bbb3SSatish Balay t = (low+high)/2; 22582d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 22592d61bbb3SSatish Balay else low = t; 22602d61bbb3SSatish Balay } 22612d61bbb3SSatish Balay for (i=low; i<high; i++) { 22622d61bbb3SSatish Balay if (rp[i] > bcol) break; 22632d61bbb3SSatish Balay if (rp[i] == bcol) { 22642d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 22652d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 22662d61bbb3SSatish Balay else *bap = value; 22672d61bbb3SSatish Balay goto noinsert1; 22682d61bbb3SSatish Balay } 22692d61bbb3SSatish Balay } 22702d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 2271e32f2f54SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2272fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2273c03d1d03SSatish Balay N = nrow++ - 1; high++; 22742d61bbb3SSatish Balay /* shift up all the later entries in this row */ 22752d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 22762d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 2277549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 22782d61bbb3SSatish Balay } 2279549d3d68SSatish Balay if (N>=i) { 2280549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2281549d3d68SSatish Balay } 22822d61bbb3SSatish Balay rp[i] = bcol; 22832d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 2284085a36d4SBarry Smith a->nz++; 22852d61bbb3SSatish Balay noinsert1:; 22862d61bbb3SSatish Balay low = i; 22872d61bbb3SSatish Balay } 22882d61bbb3SSatish Balay ailen[brow] = nrow; 22892d61bbb3SSatish Balay } 229088e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 22912d61bbb3SSatish Balay PetscFunctionReturn(0); 22922d61bbb3SSatish Balay } 22932d61bbb3SSatish Balay 22944a2ae208SSatish Balay #undef __FUNCT__ 22954a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 22960481f469SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 22972d61bbb3SSatish Balay { 22982d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 22992d61bbb3SSatish Balay Mat outA; 2300dfbe8321SBarry Smith PetscErrorCode ierr; 2301ace3abfcSBarry Smith PetscBool row_identity,col_identity; 23022d61bbb3SSatish Balay 23032d61bbb3SSatish Balay PetscFunctionBegin; 2304e32f2f54SBarry Smith if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2305667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2306667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2307f23aa3ddSBarry Smith if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 23082d61bbb3SSatish Balay 23092d61bbb3SSatish Balay outA = inA; 2310d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 23112d61bbb3SSatish Balay 2312c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2313cf242676SKris Buschelman 2314c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 23156bf464f9SBarry Smith ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2316c3122656SLisandro Dalcin a->row = row; 2317c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 23186bf464f9SBarry Smith ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2319c3122656SLisandro Dalcin a->col = col; 2320c38d4ed2SBarry Smith 2321c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 23226bf464f9SBarry Smith ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 23234c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 232452e6d16bSBarry Smith ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2325c38d4ed2SBarry Smith 2326ace3abfcSBarry Smith ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2327c38d4ed2SBarry Smith if (!a->solve_work) { 2328d0f46423SBarry Smith ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2329d0f46423SBarry Smith ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2330c38d4ed2SBarry Smith } 2331719d5645SBarry Smith ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 23322d61bbb3SSatish Balay PetscFunctionReturn(0); 23332d61bbb3SSatish Balay } 2334d9b7c43dSSatish Balay 23354a2ae208SSatish Balay #undef __FUNCT__ 23364a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 23377087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 233827a8da17SBarry Smith { 233927a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2340bdb1c0e1SJed Brown PetscInt i,nz,mbs; 234127a8da17SBarry Smith 234227a8da17SBarry Smith PetscFunctionBegin; 2343b32cb4a7SJed Brown nz = baij->maxnz; 2344bdb1c0e1SJed Brown mbs = baij->mbs; 234527a8da17SBarry Smith for (i=0; i<nz; i++) { 234627a8da17SBarry Smith baij->j[i] = indices[i]; 234727a8da17SBarry Smith } 234827a8da17SBarry Smith baij->nz = nz; 2349bdb1c0e1SJed Brown for (i=0; i<mbs; i++) { 235027a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 235127a8da17SBarry Smith } 235227a8da17SBarry Smith PetscFunctionReturn(0); 235327a8da17SBarry Smith } 235427a8da17SBarry Smith 23554a2ae208SSatish Balay #undef __FUNCT__ 23564a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 235727a8da17SBarry Smith /*@ 235827a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 235927a8da17SBarry Smith in the matrix. 236027a8da17SBarry Smith 236127a8da17SBarry Smith Input Parameters: 236227a8da17SBarry Smith + mat - the SeqBAIJ matrix 236327a8da17SBarry Smith - indices - the column indices 236427a8da17SBarry Smith 236515091d37SBarry Smith Level: advanced 236615091d37SBarry Smith 236727a8da17SBarry Smith Notes: 236827a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 236927a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 237027a8da17SBarry Smith of the MatSetValues() operation. 237127a8da17SBarry Smith 237227a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 2373d1be2dadSMatthew Knepley MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 237427a8da17SBarry Smith 237527a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 237627a8da17SBarry Smith 237727a8da17SBarry Smith @*/ 23787087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 237927a8da17SBarry Smith { 23804ac538c5SBarry Smith PetscErrorCode ierr; 238127a8da17SBarry Smith 238227a8da17SBarry Smith PetscFunctionBegin; 23830700a824SBarry Smith PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 23844482741eSBarry Smith PetscValidPointer(indices,2); 23854ac538c5SBarry Smith ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 238627a8da17SBarry Smith PetscFunctionReturn(0); 238727a8da17SBarry Smith } 238827a8da17SBarry Smith 23894a2ae208SSatish Balay #undef __FUNCT__ 2390985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2391985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2392273d9f13SBarry Smith { 2393273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2394dfbe8321SBarry Smith PetscErrorCode ierr; 2395c1ac3661SBarry Smith PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2396273d9f13SBarry Smith PetscReal atmp; 239787828ca2SBarry Smith PetscScalar *x,zero = 0.0; 2398273d9f13SBarry Smith MatScalar *aa; 2399c1ac3661SBarry Smith PetscInt ncols,brow,krow,kcol; 2400273d9f13SBarry Smith 2401273d9f13SBarry Smith PetscFunctionBegin; 2402e32f2f54SBarry Smith if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2403d0f46423SBarry Smith bs = A->rmap->bs; 2404273d9f13SBarry Smith aa = a->a; 2405273d9f13SBarry Smith ai = a->i; 2406273d9f13SBarry Smith aj = a->j; 2407273d9f13SBarry Smith mbs = a->mbs; 2408273d9f13SBarry Smith 24092dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 24101ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2411273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2412e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2413273d9f13SBarry Smith for (i=0; i<mbs; i++) { 2414273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 2415273d9f13SBarry Smith brow = bs*i; 2416273d9f13SBarry Smith for (j=0; j<ncols; j++) { 2417273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++) { 2418273d9f13SBarry Smith for (krow=0; krow<bs; krow++) { 2419273d9f13SBarry Smith atmp = PetscAbsScalar(*aa);aa++; 2420273d9f13SBarry Smith row = brow + krow; /* row index */ 2421a83599f4SBarry Smith /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2422985db425SBarry Smith if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2423273d9f13SBarry Smith } 2424273d9f13SBarry Smith } 2425273d9f13SBarry Smith aj++; 2426273d9f13SBarry Smith } 2427273d9f13SBarry Smith } 24281ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2429273d9f13SBarry Smith PetscFunctionReturn(0); 2430273d9f13SBarry Smith } 2431273d9f13SBarry Smith 24324a2ae208SSatish Balay #undef __FUNCT__ 24333c896bc6SHong Zhang #define __FUNCT__ "MatCopy_SeqBAIJ" 24343c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 24353c896bc6SHong Zhang { 24363c896bc6SHong Zhang PetscErrorCode ierr; 24373c896bc6SHong Zhang 24383c896bc6SHong Zhang PetscFunctionBegin; 24393c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 24403c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 24413c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24423c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2443d88c0aacSHong Zhang PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 24443c896bc6SHong Zhang 2445d88c0aacSHong Zhang if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2446d88c0aacSHong Zhang if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2447d88c0aacSHong Zhang ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 24483c896bc6SHong Zhang } else { 24493c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 24503c896bc6SHong Zhang } 24513c896bc6SHong Zhang PetscFunctionReturn(0); 24523c896bc6SHong Zhang } 24533c896bc6SHong Zhang 24543c896bc6SHong Zhang #undef __FUNCT__ 24554994cf47SJed Brown #define __FUNCT__ "MatSetUp_SeqBAIJ" 24564994cf47SJed Brown PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2457273d9f13SBarry Smith { 2458dfbe8321SBarry Smith PetscErrorCode ierr; 2459273d9f13SBarry Smith 2460273d9f13SBarry Smith PetscFunctionBegin; 2461535b19f3SBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2462273d9f13SBarry Smith PetscFunctionReturn(0); 2463273d9f13SBarry Smith } 2464273d9f13SBarry Smith 24654a2ae208SSatish Balay #undef __FUNCT__ 24668c778c55SBarry Smith #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 24678c778c55SBarry Smith PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2468f2a5309cSSatish Balay { 2469f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24706e111a19SKarl Rupp 2471f2a5309cSSatish Balay PetscFunctionBegin; 2472f2a5309cSSatish Balay *array = a->a; 2473f2a5309cSSatish Balay PetscFunctionReturn(0); 2474f2a5309cSSatish Balay } 2475f2a5309cSSatish Balay 24764a2ae208SSatish Balay #undef __FUNCT__ 24778c778c55SBarry Smith #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 24788c778c55SBarry Smith PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2479f2a5309cSSatish Balay { 2480f2a5309cSSatish Balay PetscFunctionBegin; 2481f2a5309cSSatish Balay PetscFunctionReturn(0); 2482f2a5309cSSatish Balay } 2483f2a5309cSSatish Balay 248442ee4b1aSHong Zhang #undef __FUNCT__ 248542ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ" 2486f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 248742ee4b1aSHong Zhang { 248842ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2489dfbe8321SBarry Smith PetscErrorCode ierr; 2490e838b9e7SJed Brown PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2491e838b9e7SJed Brown PetscBLASInt one=1; 249242ee4b1aSHong Zhang 249342ee4b1aSHong Zhang PetscFunctionBegin; 249442ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2495f4df32b1SMatthew Knepley PetscScalar alpha = a; 2496c5df96a5SBarry Smith PetscBLASInt bnz; 2497c5df96a5SBarry Smith ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2498a83cb05cSBarry Smith PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2499c537a176SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2500c4319e64SHong Zhang if (y->xtoy && y->XtoY != X) { 2501c4319e64SHong Zhang ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 25026bf464f9SBarry Smith ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2503c537a176SHong Zhang } 2504c4319e64SHong Zhang if (!y->xtoy) { /* get xtoy */ 25050298fd71SBarry Smith ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2506c4319e64SHong Zhang y->XtoY = X; 2507c009d632SSatish Balay ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2508c537a176SHong Zhang } 2509c537a176SHong Zhang for (i=0; i<x->nz; i++) { 2510c4319e64SHong Zhang j = 0; 2511c4319e64SHong Zhang while (j < bs2) { 2512f4df32b1SMatthew Knepley y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2513c4319e64SHong Zhang j++; 2514c537a176SHong Zhang } 2515c4319e64SHong Zhang } 25161e2582c4SBarry Smith ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 251742ee4b1aSHong Zhang } else { 2518f4df32b1SMatthew Knepley ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 251942ee4b1aSHong Zhang } 252042ee4b1aSHong Zhang PetscFunctionReturn(0); 252142ee4b1aSHong Zhang } 252242ee4b1aSHong Zhang 252399cafbc1SBarry Smith #undef __FUNCT__ 252499cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_SeqBAIJ" 252599cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 252699cafbc1SBarry Smith { 252799cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 252899cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2529dd6ea824SBarry Smith MatScalar *aa = a->a; 253099cafbc1SBarry Smith 253199cafbc1SBarry Smith PetscFunctionBegin; 253299cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 253399cafbc1SBarry Smith PetscFunctionReturn(0); 253499cafbc1SBarry Smith } 253599cafbc1SBarry Smith 253699cafbc1SBarry Smith #undef __FUNCT__ 253799cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 253899cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 253999cafbc1SBarry Smith { 254099cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 254199cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2542dd6ea824SBarry Smith MatScalar *aa = a->a; 254399cafbc1SBarry Smith 254499cafbc1SBarry Smith PetscFunctionBegin; 254599cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 254699cafbc1SBarry Smith PetscFunctionReturn(0); 254799cafbc1SBarry Smith } 254899cafbc1SBarry Smith 25493acb8795SBarry Smith extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 25503acb8795SBarry Smith 25513acb8795SBarry Smith #undef __FUNCT__ 25523acb8795SBarry Smith #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 25533acb8795SBarry Smith /* 25543acb8795SBarry Smith Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 25553acb8795SBarry Smith */ 25561a83f524SJed Brown PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 25573acb8795SBarry Smith { 25583acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 25593acb8795SBarry Smith PetscErrorCode ierr; 25603acb8795SBarry Smith PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 25613acb8795SBarry Smith PetscInt nz = a->i[m],row,*jj,mr,col; 25623acb8795SBarry Smith 25633acb8795SBarry Smith PetscFunctionBegin; 25643acb8795SBarry Smith *nn = n; 25653acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 2566e7e72b3dSBarry Smith if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2567e7e72b3dSBarry Smith else { 25683acb8795SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 25693acb8795SBarry Smith ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 25703acb8795SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 25713acb8795SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 25723acb8795SBarry Smith jj = a->j; 25733acb8795SBarry Smith for (i=0; i<nz; i++) { 25743acb8795SBarry Smith collengths[jj[i]]++; 25753acb8795SBarry Smith } 25763acb8795SBarry Smith cia[0] = oshift; 25773acb8795SBarry Smith for (i=0; i<n; i++) { 25783acb8795SBarry Smith cia[i+1] = cia[i] + collengths[i]; 25793acb8795SBarry Smith } 25803acb8795SBarry Smith ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 25813acb8795SBarry Smith jj = a->j; 25823acb8795SBarry Smith for (row=0; row<m; row++) { 25833acb8795SBarry Smith mr = a->i[row+1] - a->i[row]; 25843acb8795SBarry Smith for (i=0; i<mr; i++) { 25853acb8795SBarry Smith col = *jj++; 258626fbe8dcSKarl Rupp 25873acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 25883acb8795SBarry Smith } 25893acb8795SBarry Smith } 25903acb8795SBarry Smith ierr = PetscFree(collengths);CHKERRQ(ierr); 25913acb8795SBarry Smith *ia = cia; *ja = cja; 25923acb8795SBarry Smith } 25933acb8795SBarry Smith PetscFunctionReturn(0); 25943acb8795SBarry Smith } 25953acb8795SBarry Smith 25963acb8795SBarry Smith #undef __FUNCT__ 25973acb8795SBarry Smith #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 25981a83f524SJed Brown PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 25993acb8795SBarry Smith { 26003acb8795SBarry Smith PetscErrorCode ierr; 26013acb8795SBarry Smith 26023acb8795SBarry Smith PetscFunctionBegin; 26033acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 26043acb8795SBarry Smith ierr = PetscFree(*ia);CHKERRQ(ierr); 26053acb8795SBarry Smith ierr = PetscFree(*ja);CHKERRQ(ierr); 26063acb8795SBarry Smith PetscFunctionReturn(0); 26073acb8795SBarry Smith } 26083acb8795SBarry Smith 2609f6d58c54SBarry Smith #undef __FUNCT__ 2610f6d58c54SBarry Smith #define __FUNCT__ "MatFDColoringApply_BAIJ" 26117087cfbeSBarry Smith PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2612f6d58c54SBarry Smith { 2613f6d58c54SBarry Smith PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 2614f6d58c54SBarry Smith PetscErrorCode ierr; 26154e269d77SPeter Brune PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2616f6d58c54SBarry Smith PetscScalar dx,*y,*xx,*w3_array; 2617f6d58c54SBarry Smith PetscScalar *vscale_array; 2618f6d58c54SBarry Smith PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2619f6d58c54SBarry Smith Vec w1 = coloring->w1,w2=coloring->w2,w3; 2620f6d58c54SBarry Smith void *fctx = coloring->fctx; 2621ace3abfcSBarry Smith PetscBool flg = PETSC_FALSE; 2622f6d58c54SBarry Smith PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 2623f6d58c54SBarry Smith Vec x1_tmp; 2624f6d58c54SBarry Smith 2625f6d58c54SBarry Smith PetscFunctionBegin; 26260700a824SBarry Smith PetscValidHeaderSpecific(J,MAT_CLASSID,1); 26270700a824SBarry Smith PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 26280700a824SBarry Smith PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2629e32f2f54SBarry Smith if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2630f6d58c54SBarry Smith 2631f6d58c54SBarry Smith ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2632f6d58c54SBarry Smith ierr = MatSetUnfactored(J);CHKERRQ(ierr); 26330298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 2634f6d58c54SBarry Smith if (flg) { 2635f6d58c54SBarry Smith ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2636f6d58c54SBarry Smith } else { 2637ace3abfcSBarry Smith PetscBool assembled; 2638f6d58c54SBarry Smith ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2639f6d58c54SBarry Smith if (assembled) { 2640f6d58c54SBarry Smith ierr = MatZeroEntries(J);CHKERRQ(ierr); 2641f6d58c54SBarry Smith } 2642f6d58c54SBarry Smith } 2643f6d58c54SBarry Smith 2644f6d58c54SBarry Smith x1_tmp = x1; 2645f6d58c54SBarry Smith if (!coloring->vscale) { 2646f6d58c54SBarry Smith ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2647f6d58c54SBarry Smith } 2648f6d58c54SBarry Smith 2649f6d58c54SBarry Smith if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2650f6d58c54SBarry Smith ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2651f6d58c54SBarry Smith } 2652f6d58c54SBarry Smith ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2653f6d58c54SBarry Smith 2654f6d58c54SBarry Smith /* Set w1 = F(x1) */ 26554e269d77SPeter Brune if (!coloring->fset) { 2656f6d58c54SBarry Smith ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2657f6d58c54SBarry Smith ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2658f6d58c54SBarry Smith ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 26594e269d77SPeter Brune } else { 26604e269d77SPeter Brune coloring->fset = PETSC_FALSE; 2661f6d58c54SBarry Smith } 2662f6d58c54SBarry Smith 2663f6d58c54SBarry Smith if (!coloring->w3) { 2664f6d58c54SBarry Smith ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2665f6d58c54SBarry Smith ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2666f6d58c54SBarry Smith } 2667f6d58c54SBarry Smith w3 = coloring->w3; 2668f6d58c54SBarry Smith 2669f6d58c54SBarry Smith /* Compute all the local scale factors, including ghost points */ 2670f6d58c54SBarry Smith ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2671f6d58c54SBarry Smith ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2672f6d58c54SBarry Smith ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2673f6d58c54SBarry Smith if (ctype == IS_COLORING_GHOSTED) { 2674f6d58c54SBarry Smith col_start = 0; col_end = N; 2675f6d58c54SBarry Smith } else if (ctype == IS_COLORING_GLOBAL) { 2676f6d58c54SBarry Smith xx = xx - start; 2677f6d58c54SBarry Smith vscale_array = vscale_array - start; 2678f6d58c54SBarry Smith col_start = start; col_end = N + start; 267926fbe8dcSKarl Rupp } 2680f6d58c54SBarry Smith for (col=col_start; col<col_end; col++) { 2681f6d58c54SBarry Smith /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2682f6d58c54SBarry Smith if (coloring->htype[0] == 'w') { 2683f6d58c54SBarry Smith dx = 1.0 + unorm; 2684f6d58c54SBarry Smith } else { 2685f6d58c54SBarry Smith dx = xx[col]; 2686f6d58c54SBarry Smith } 2687d4a378daSJed Brown if (dx == (PetscScalar)0.0) dx = 1.0; 2688f6d58c54SBarry Smith if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2689f6d58c54SBarry Smith else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2690f6d58c54SBarry Smith dx *= epsilon; 2691d4a378daSJed Brown vscale_array[col] = (PetscScalar)1.0/dx; 2692365a8a9eSBarry Smith } 2693f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2694f6d58c54SBarry Smith ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2695f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL) { 2696f6d58c54SBarry Smith ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2697f6d58c54SBarry Smith ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2698f6d58c54SBarry Smith } 2699f6d58c54SBarry Smith if (coloring->vscaleforrow) { 2700f6d58c54SBarry Smith vscaleforrow = coloring->vscaleforrow; 2701e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2702f6d58c54SBarry Smith 2703f6d58c54SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2704f6d58c54SBarry Smith /* 2705f6d58c54SBarry Smith Loop over each color 2706f6d58c54SBarry Smith */ 2707f6d58c54SBarry Smith ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2708f6d58c54SBarry Smith for (k=0; k<coloring->ncolors; k++) { 2709f6d58c54SBarry Smith coloring->currentcolor = k; 2710f6d58c54SBarry Smith for (i=0; i<bs; i++) { 2711f6d58c54SBarry Smith ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2712f6d58c54SBarry Smith ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2713f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2714f6d58c54SBarry Smith /* 2715f6d58c54SBarry Smith Loop over each column associated with color 2716f6d58c54SBarry Smith adding the perturbation to the vector w3. 2717f6d58c54SBarry Smith */ 2718f6d58c54SBarry Smith for (l=0; l<coloring->ncolumns[k]; l++) { 2719f6d58c54SBarry Smith col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2720f6d58c54SBarry Smith if (coloring->htype[0] == 'w') { 2721f6d58c54SBarry Smith dx = 1.0 + unorm; 2722f6d58c54SBarry Smith } else { 2723f6d58c54SBarry Smith dx = xx[col]; 2724f6d58c54SBarry Smith } 2725d4a378daSJed Brown if (dx == (PetscScalar)0.0) dx = 1.0; 2726f6d58c54SBarry Smith if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2727f6d58c54SBarry Smith else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2728f6d58c54SBarry Smith dx *= epsilon; 2729e32f2f54SBarry Smith if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2730f6d58c54SBarry Smith w3_array[col] += dx; 2731f6d58c54SBarry Smith } 2732f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2733f6d58c54SBarry Smith ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2734f6d58c54SBarry Smith 2735f6d58c54SBarry Smith /* 2736f6d58c54SBarry Smith Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2737f6d58c54SBarry Smith w2 = F(x1 + dx) - F(x1) 2738f6d58c54SBarry Smith */ 2739f6d58c54SBarry Smith ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2740f6d58c54SBarry Smith ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2741f6d58c54SBarry Smith ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2742f6d58c54SBarry Smith ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2743f6d58c54SBarry Smith 2744f6d58c54SBarry Smith /* 2745f6d58c54SBarry Smith Loop over rows of vector, putting results into Jacobian matrix 2746f6d58c54SBarry Smith */ 2747f6d58c54SBarry Smith ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2748f6d58c54SBarry Smith for (l=0; l<coloring->nrows[k]; l++) { 2749f6d58c54SBarry Smith row = bs*coloring->rows[k][l]; /* local row index */ 2750f6d58c54SBarry Smith col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2751f6d58c54SBarry Smith for (j=0; j<bs; j++) { 2752f6d58c54SBarry Smith y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2753f6d58c54SBarry Smith srows[j] = row + start + j; 2754f6d58c54SBarry Smith } 2755f6d58c54SBarry Smith ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2756f6d58c54SBarry Smith } 2757f6d58c54SBarry Smith ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2758f6d58c54SBarry Smith } 2759f6d58c54SBarry Smith } /* endof for each color */ 2760f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2761f6d58c54SBarry Smith ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2762f6d58c54SBarry Smith ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2763f6d58c54SBarry Smith ierr = PetscFree(srows);CHKERRQ(ierr); 2764f6d58c54SBarry Smith 2765f6d58c54SBarry Smith coloring->currentcolor = -1; 276626fbe8dcSKarl Rupp 2767f6d58c54SBarry Smith ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768f6d58c54SBarry Smith ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2769f6d58c54SBarry Smith ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2770f6d58c54SBarry Smith PetscFunctionReturn(0); 2771f6d58c54SBarry Smith } 277299cafbc1SBarry Smith 27732593348eSBarry Smith /* -------------------------------------------------------------------*/ 27743964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2775cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2776cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2777cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 277897304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 27797c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 27807c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2781db4efbfdSBarry Smith 0, 2782cc2dc46cSBarry Smith 0, 2783cc2dc46cSBarry Smith 0, 278497304618SKris Buschelman /* 10*/ 0, 2785cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2786cc2dc46cSBarry Smith 0, 2787b6490206SBarry Smith 0, 2788f2501298SSatish Balay MatTranspose_SeqBAIJ, 278997304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 2790cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2791cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2792cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2793cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 279497304618SKris Buschelman /* 20*/ 0, 2795cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2796cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2797cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 2798d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 2799db4efbfdSBarry Smith 0, 2800db4efbfdSBarry Smith 0, 2801db4efbfdSBarry Smith 0, 2802db4efbfdSBarry Smith 0, 28034994cf47SJed Brown /* 29*/ MatSetUp_SeqBAIJ, 2804db4efbfdSBarry Smith 0, 2805db4efbfdSBarry Smith 0, 28068c778c55SBarry Smith 0, 28078c778c55SBarry Smith 0, 2808d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 2809cc2dc46cSBarry Smith 0, 2810cc2dc46cSBarry Smith 0, 2811cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2812cc2dc46cSBarry Smith 0, 2813d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 2814cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 2815cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2816cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 28173c896bc6SHong Zhang MatCopy_SeqBAIJ, 2818d519adbfSMatthew Knepley /* 44*/ 0, 2819cc2dc46cSBarry Smith MatScale_SeqBAIJ, 2820cc2dc46cSBarry Smith 0, 2821cc2dc46cSBarry Smith 0, 282297b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 2823f73d5cc4SBarry Smith /* 49*/ 0, 28243b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 282592c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 28263acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 28273acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 28283acb8795SBarry Smith /* 54*/ MatFDColoringCreate_SeqAIJ, 2829cc2dc46cSBarry Smith 0, 2830cc2dc46cSBarry Smith 0, 2831cc2dc46cSBarry Smith 0, 2832d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 2833d519adbfSMatthew Knepley /* 59*/ MatGetSubMatrix_SeqBAIJ, 2834b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2835b9b97703SBarry Smith MatView_SeqBAIJ, 2836357abbc8SBarry Smith 0, 2837273d9f13SBarry Smith 0, 2838d519adbfSMatthew Knepley /* 64*/ 0, 2839273d9f13SBarry Smith 0, 2840273d9f13SBarry Smith 0, 2841273d9f13SBarry Smith 0, 2842273d9f13SBarry Smith 0, 2843d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2844273d9f13SBarry Smith 0, 2845c87e5d42SMatthew Knepley MatConvert_Basic, 284697304618SKris Buschelman 0, 284797304618SKris Buschelman 0, 2848d519adbfSMatthew Knepley /* 74*/ 0, 2849f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 285097304618SKris Buschelman 0, 285197304618SKris Buschelman 0, 285297304618SKris Buschelman 0, 2853d519adbfSMatthew Knepley /* 79*/ 0, 285497304618SKris Buschelman 0, 285597304618SKris Buschelman 0, 285697304618SKris Buschelman 0, 28575bba2384SShri Abhyankar MatLoad_SeqBAIJ, 2858d519adbfSMatthew Knepley /* 84*/ 0, 2859b01c7715SBarry Smith 0, 2860b01c7715SBarry Smith 0, 2861b01c7715SBarry Smith 0, 2862865e5f61SKris Buschelman 0, 2863d519adbfSMatthew Knepley /* 89*/ 0, 2864865e5f61SKris Buschelman 0, 2865865e5f61SKris Buschelman 0, 2866865e5f61SKris Buschelman 0, 2867865e5f61SKris Buschelman 0, 2868d519adbfSMatthew Knepley /* 94*/ 0, 2869865e5f61SKris Buschelman 0, 2870865e5f61SKris Buschelman 0, 287199cafbc1SBarry Smith 0, 287299cafbc1SBarry Smith 0, 2873d519adbfSMatthew Knepley /* 99*/ 0, 287499cafbc1SBarry Smith 0, 287599cafbc1SBarry Smith 0, 287699cafbc1SBarry Smith 0, 287799cafbc1SBarry Smith 0, 2878d519adbfSMatthew Knepley /*104*/ 0, 287999cafbc1SBarry Smith MatRealPart_SeqBAIJ, 28802af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 28812af78befSBarry Smith 0, 28822af78befSBarry Smith 0, 2883d519adbfSMatthew Knepley /*109*/ 0, 28842af78befSBarry Smith 0, 28852af78befSBarry Smith 0, 28862af78befSBarry Smith 0, 2887547795f9SHong Zhang MatMissingDiagonal_SeqBAIJ, 2888547795f9SHong Zhang /*114*/ 0, 2889547795f9SHong Zhang 0, 2890547795f9SHong Zhang 0, 2891547795f9SHong Zhang 0, 2892547795f9SHong Zhang 0, 2893547795f9SHong Zhang /*119*/ 0, 2894547795f9SHong Zhang 0, 2895547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 2896d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 2897d6037b41SHong Zhang 0, 2898bbead8a2SBarry Smith /*124*/ 0, 2899bbead8a2SBarry Smith 0, 29003964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 29013964eb88SJed Brown 0, 29023964eb88SJed Brown 0, 29033964eb88SJed Brown /*129*/ 0, 29043964eb88SJed Brown 0, 29053964eb88SJed Brown 0, 29063964eb88SJed Brown 0, 29073964eb88SJed Brown 0, 29083964eb88SJed Brown /*134*/ 0, 29093964eb88SJed Brown 0, 29103964eb88SJed Brown 0, 29113964eb88SJed Brown 0, 29123964eb88SJed Brown 0, 29133964eb88SJed Brown /*139*/ 0, 29143964eb88SJed Brown 0 291599cafbc1SBarry Smith }; 29162593348eSBarry Smith 29174a2ae208SSatish Balay #undef __FUNCT__ 29184a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 29197087cfbeSBarry Smith PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 29203e90b805SBarry Smith { 29213e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 29228ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2923dfbe8321SBarry Smith PetscErrorCode ierr; 29243e90b805SBarry Smith 29253e90b805SBarry Smith PetscFunctionBegin; 2926e7e72b3dSBarry Smith if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 29273e90b805SBarry Smith 29283e90b805SBarry Smith /* allocate space for values if not already there */ 29293e90b805SBarry Smith if (!aij->saved_values) { 293087828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 29311784c0f5SBarry Smith ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 29323e90b805SBarry Smith } 29333e90b805SBarry Smith 29343e90b805SBarry Smith /* copy values over */ 293587828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 29363e90b805SBarry Smith PetscFunctionReturn(0); 29373e90b805SBarry Smith } 29383e90b805SBarry Smith 29394a2ae208SSatish Balay #undef __FUNCT__ 29404a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 29417087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 29423e90b805SBarry Smith { 29433e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 29446849ba73SBarry Smith PetscErrorCode ierr; 29458ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs]*aij->bs2; 29463e90b805SBarry Smith 29473e90b805SBarry Smith PetscFunctionBegin; 2948e7e72b3dSBarry Smith if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2949e7e72b3dSBarry Smith if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 29503e90b805SBarry Smith 29513e90b805SBarry Smith /* copy values over */ 295287828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 29533e90b805SBarry Smith PetscFunctionReturn(0); 29543e90b805SBarry Smith } 29553e90b805SBarry Smith 29568cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 29578cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2958273d9f13SBarry Smith 29594a2ae208SSatish Balay #undef __FUNCT__ 2960a23d5eceSKris Buschelman #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 29617087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2962a23d5eceSKris Buschelman { 2963a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 29646849ba73SBarry Smith PetscErrorCode ierr; 2965535b19f3SBarry Smith PetscInt i,mbs,nbs,bs2; 29662576faa2SJed Brown PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2967a23d5eceSKris Buschelman 2968a23d5eceSKris Buschelman PetscFunctionBegin; 29692576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2970ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 2971ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 2972ab93d7beSBarry Smith nz = 0; 2973ab93d7beSBarry Smith } 29748c07d4e3SBarry Smith 297526283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 297626283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 297726283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 297826283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2979e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2980899cda47SBarry Smith 2981899cda47SBarry Smith B->preallocated = PETSC_TRUE; 2982899cda47SBarry Smith 2983d0f46423SBarry Smith mbs = B->rmap->n/bs; 2984d0f46423SBarry Smith nbs = B->cmap->n/bs; 2985a23d5eceSKris Buschelman bs2 = bs*bs; 2986a23d5eceSKris Buschelman 298765e19b50SBarry Smith if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2988a23d5eceSKris Buschelman 2989a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2990e32f2f54SBarry Smith if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2991a23d5eceSKris Buschelman if (nnz) { 2992a23d5eceSKris Buschelman for (i=0; i<mbs; i++) { 2993e32f2f54SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2994e32f2f54SBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2995a23d5eceSKris Buschelman } 2996a23d5eceSKris Buschelman } 2997a23d5eceSKris Buschelman 2998a23d5eceSKris Buschelman b = (Mat_SeqBAIJ*)B->data; 2999ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 30000298fd71SBarry Smith ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 30018c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 30028c07d4e3SBarry Smith 3003a23d5eceSKris Buschelman if (!flg) { 3004a23d5eceSKris Buschelman switch (bs) { 3005a23d5eceSKris Buschelman case 1: 3006a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 3007a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3008a23d5eceSKris Buschelman break; 3009a23d5eceSKris Buschelman case 2: 3010a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 3011a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3012a23d5eceSKris Buschelman break; 3013a23d5eceSKris Buschelman case 3: 3014a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 3015a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3016a23d5eceSKris Buschelman break; 3017a23d5eceSKris Buschelman case 4: 3018a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 3019a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3020a23d5eceSKris Buschelman break; 3021a23d5eceSKris Buschelman case 5: 3022a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 3023a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3024a23d5eceSKris Buschelman break; 3025a23d5eceSKris Buschelman case 6: 3026a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 3027a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3028a23d5eceSKris Buschelman break; 3029a23d5eceSKris Buschelman case 7: 3030a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 3031a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3032a23d5eceSKris Buschelman break; 30338ab949d8SShri Abhyankar case 15: 3034832cc040SShri Abhyankar B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3035de80f912SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 30368ab949d8SShri Abhyankar break; 3037a23d5eceSKris Buschelman default: 3038a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 3039a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3040a23d5eceSKris Buschelman break; 3041a23d5eceSKris Buschelman } 3042a23d5eceSKris Buschelman } 3043*e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 3044a23d5eceSKris Buschelman b->mbs = mbs; 3045a23d5eceSKris Buschelman b->nbs = nbs; 3046ab93d7beSBarry Smith if (!skipallocation) { 30472ee49352SLisandro Dalcin if (!b->imax) { 3048ab93d7beSBarry Smith ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3049a2ea699eSBarry Smith ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 305026fbe8dcSKarl Rupp 30514fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 30522ee49352SLisandro Dalcin } 3053ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 305426fbe8dcSKarl Rupp for (i=0; i<mbs; i++) b->ilen[i] = 0; 3055a23d5eceSKris Buschelman if (!nnz) { 3056a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3057c62bd62aSJed Brown else if (nz < 0) nz = 1; 3058a23d5eceSKris Buschelman for (i=0; i<mbs; i++) b->imax[i] = nz; 3059a23d5eceSKris Buschelman nz = nz*mbs; 3060a23d5eceSKris Buschelman } else { 3061a23d5eceSKris Buschelman nz = 0; 3062a23d5eceSKris Buschelman for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3063a23d5eceSKris Buschelman } 3064a23d5eceSKris Buschelman 3065a23d5eceSKris Buschelman /* allocate the matrix space */ 30662ee49352SLisandro Dalcin ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3067d0f46423SBarry Smith ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3068d0f46423SBarry Smith ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3069a23d5eceSKris Buschelman ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3070c1ac3661SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 307126fbe8dcSKarl Rupp 3072a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 3073a23d5eceSKris Buschelman b->i[0] = 0; 3074a23d5eceSKris Buschelman for (i=1; i<mbs+1; i++) { 3075a23d5eceSKris Buschelman b->i[i] = b->i[i-1] + b->imax[i-1]; 3076a23d5eceSKris Buschelman } 3077e6b907acSBarry Smith b->free_a = PETSC_TRUE; 3078e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 3079e811da20SHong Zhang } else { 3080e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3081e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3082ab93d7beSBarry Smith } 3083a23d5eceSKris Buschelman 3084a23d5eceSKris Buschelman b->bs2 = bs2; 3085a23d5eceSKris Buschelman b->mbs = mbs; 3086a23d5eceSKris Buschelman b->nz = 0; 3087b32cb4a7SJed Brown b->maxnz = nz; 3088b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 30892576faa2SJed Brown if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3090a23d5eceSKris Buschelman PetscFunctionReturn(0); 3091a23d5eceSKris Buschelman } 3092a23d5eceSKris Buschelman 3093725b52f3SLisandro Dalcin #undef __FUNCT__ 3094725b52f3SLisandro Dalcin #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3095cf12db73SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3096725b52f3SLisandro Dalcin { 3097725b52f3SLisandro Dalcin PetscInt i,m,nz,nz_max=0,*nnz; 3098725b52f3SLisandro Dalcin PetscScalar *values=0; 3099725b52f3SLisandro Dalcin PetscErrorCode ierr; 3100725b52f3SLisandro Dalcin 3101725b52f3SLisandro Dalcin PetscFunctionBegin; 3102e32f2f54SBarry Smith if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 310326283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 310426283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 310526283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 310626283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3107e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3108d0f46423SBarry Smith m = B->rmap->n/bs; 3109725b52f3SLisandro Dalcin 311026fbe8dcSKarl Rupp if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3111725b52f3SLisandro Dalcin ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3112725b52f3SLisandro Dalcin for (i=0; i<m; i++) { 3113cf12db73SBarry Smith nz = ii[i+1]- ii[i]; 311426fbe8dcSKarl Rupp if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3115725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3116725b52f3SLisandro Dalcin nnz[i] = nz; 3117725b52f3SLisandro Dalcin } 3118725b52f3SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3119725b52f3SLisandro Dalcin ierr = PetscFree(nnz);CHKERRQ(ierr); 3120725b52f3SLisandro Dalcin 3121725b52f3SLisandro Dalcin values = (PetscScalar*)V; 3122725b52f3SLisandro Dalcin if (!values) { 3123725b52f3SLisandro Dalcin ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3124725b52f3SLisandro Dalcin ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3125725b52f3SLisandro Dalcin } 3126725b52f3SLisandro Dalcin for (i=0; i<m; i++) { 3127cf12db73SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 3128cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3129cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3130725b52f3SLisandro Dalcin ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3131725b52f3SLisandro Dalcin } 3132725b52f3SLisandro Dalcin if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3133725b52f3SLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3134725b52f3SLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31357827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3136725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3137725b52f3SLisandro Dalcin } 3138725b52f3SLisandro Dalcin 31398cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 31408cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 314167877ebaSShri Abhyankar #if defined(PETSC_HAVE_MUMPS) 31428cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 314367877ebaSShri Abhyankar #endif 3144b2573a8aSBarry Smith extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 3145b24902e0SBarry Smith 31460bad9183SKris Buschelman /*MC 3147fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 31480bad9183SKris Buschelman block sparse compressed row format. 31490bad9183SKris Buschelman 31500bad9183SKris Buschelman Options Database Keys: 31510bad9183SKris Buschelman . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 31520bad9183SKris Buschelman 31530bad9183SKris Buschelman Level: beginner 31540bad9183SKris Buschelman 3155f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ() 31560bad9183SKris Buschelman M*/ 31570bad9183SKris Buschelman 31588cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3159b24902e0SBarry Smith 3160a23d5eceSKris Buschelman #undef __FUNCT__ 31614a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 31628cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 31632593348eSBarry Smith { 3164dfbe8321SBarry Smith PetscErrorCode ierr; 3165c1ac3661SBarry Smith PetscMPIInt size; 3166b6490206SBarry Smith Mat_SeqBAIJ *b; 31673b2fbd54SBarry Smith 31683a40ed3dSBarry Smith PetscFunctionBegin; 3169ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3170e32f2f54SBarry Smith if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3171b6490206SBarry Smith 317238f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3173b0a32e0cSBarry Smith B->data = (void*)b; 3174549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 317526fbe8dcSKarl Rupp 31762593348eSBarry Smith b->row = 0; 31772593348eSBarry Smith b->col = 0; 3178e51c0b9cSSatish Balay b->icol = 0; 31792593348eSBarry Smith b->reallocs = 0; 31803e90b805SBarry Smith b->saved_values = 0; 31812593348eSBarry Smith 3182c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 31832593348eSBarry Smith b->nonew = 0; 31842593348eSBarry Smith b->diag = 0; 31852593348eSBarry Smith b->solve_work = 0; 3186de6a44a3SBarry Smith b->mult_work = 0; 31872a1b7f2aSHong Zhang B->spptr = 0; 3188b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3189a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 3190c4319e64SHong Zhang b->xtoy = 0; 3191c4319e64SHong Zhang b->XtoY = 0; 319288e51ccdSHong Zhang B->same_nonzero = PETSC_FALSE; 31934e220ebcSLois Curfman McInnes 319400de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C","MatGetFactorAvailable_seqbaij_petsc",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 319500de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqbaij_petsc",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 319600de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C","MatGetFactor_seqbaij_bstrm",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 319767877ebaSShri Abhyankar #if defined(PETSC_HAVE_MUMPS) 319800de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 319967877ebaSShri Abhyankar #endif 320000de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C","MatInvertBlockDiagonal_SeqBAIJ",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 320100de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_SeqBAIJ",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 320200de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_SeqBAIJ",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 320300de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C","MatSeqBAIJSetColumnIndices_SeqBAIJ",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 320400de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C","MatConvert_SeqBAIJ_SeqAIJ",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 320500de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C","MatConvert_SeqBAIJ_SeqSBAIJ",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 320600de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C","MatSeqBAIJSetPreallocation_SeqBAIJ",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 320700de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C","MatSeqBAIJSetPreallocationCSR_SeqBAIJ",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 320800de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C","MatConvert_SeqBAIJ_SeqBSTRM",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 320900de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_SeqBAIJ",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 321017667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 32113a40ed3dSBarry Smith PetscFunctionReturn(0); 32122593348eSBarry Smith } 32132593348eSBarry Smith 32144a2ae208SSatish Balay #undef __FUNCT__ 3215b24902e0SBarry Smith #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3216ace3abfcSBarry Smith PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 32172593348eSBarry Smith { 3218b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 32196849ba73SBarry Smith PetscErrorCode ierr; 3220a96a251dSBarry Smith PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3221de6a44a3SBarry Smith 32223a40ed3dSBarry Smith PetscFunctionBegin; 3223e32f2f54SBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 32242593348eSBarry Smith 32254fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 32264fd072dbSBarry Smith c->imax = a->imax; 32274fd072dbSBarry Smith c->ilen = a->ilen; 32284fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 32294fd072dbSBarry Smith } else { 323033b91e9fSSatish Balay ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 32314fd072dbSBarry Smith ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3232b6490206SBarry Smith for (i=0; i<mbs; i++) { 32332593348eSBarry Smith c->imax[i] = a->imax[i]; 32342593348eSBarry Smith c->ilen[i] = a->ilen[i]; 32352593348eSBarry Smith } 32364fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 32374fd072dbSBarry Smith } 32382593348eSBarry Smith 32392593348eSBarry Smith /* allocate the matrix space */ 324016a2bf60SHong Zhang if (mallocmatspace) { 32414fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 32424fd072dbSBarry Smith ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 32434fd072dbSBarry Smith ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3244379be0ddSLisandro Dalcin ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 324526fbe8dcSKarl Rupp 32464fd072dbSBarry Smith c->i = a->i; 32474fd072dbSBarry Smith c->j = a->j; 3248379be0ddSLisandro Dalcin c->singlemalloc = PETSC_FALSE; 3249379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 3250379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 32514fd072dbSBarry Smith c->parent = A; 32521e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 32531e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 325426fbe8dcSKarl Rupp 32554fd072dbSBarry Smith ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 32564fd072dbSBarry Smith ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 32574fd072dbSBarry Smith ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 32584fd072dbSBarry Smith } else { 3259a96a251dSBarry Smith ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 326016a2bf60SHong Zhang ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 326126fbe8dcSKarl Rupp 3262c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 3263379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 32644fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 326526fbe8dcSKarl Rupp 3266c1ac3661SBarry Smith ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3267b6490206SBarry Smith if (mbs > 0) { 3268c1ac3661SBarry Smith ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 32692e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 3270549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 32712e8a6d31SBarry Smith } else { 3272549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 32732593348eSBarry Smith } 32742593348eSBarry Smith } 32751e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 32761e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 327716a2bf60SHong Zhang } 32784fd072dbSBarry Smith } 327916a2bf60SHong Zhang 32802593348eSBarry Smith c->roworiented = a->roworiented; 32812593348eSBarry Smith c->nonew = a->nonew; 328226fbe8dcSKarl Rupp 32831e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 32841e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 328526fbe8dcSKarl Rupp 32865c9eb25fSBarry Smith c->bs2 = a->bs2; 32875c9eb25fSBarry Smith c->mbs = a->mbs; 32885c9eb25fSBarry Smith c->nbs = a->nbs; 32892593348eSBarry Smith 32902593348eSBarry Smith if (a->diag) { 32914fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 32924fd072dbSBarry Smith c->diag = a->diag; 32934fd072dbSBarry Smith c->free_diag = PETSC_FALSE; 32944fd072dbSBarry Smith } else { 3295c1ac3661SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 329652e6d16bSBarry Smith ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 329726fbe8dcSKarl Rupp for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 32984fd072dbSBarry Smith c->free_diag = PETSC_TRUE; 32994fd072dbSBarry Smith } 330098305bb5SBarry Smith } else c->diag = 0; 330126fbe8dcSKarl Rupp 33022593348eSBarry Smith c->nz = a->nz; 3303f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 33042593348eSBarry Smith c->solve_work = 0; 33057fc0212eSBarry Smith c->mult_work = 0; 330688e51ccdSHong Zhang 330788e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 330888e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3309cd6b891eSBarry Smith c->compressedrow.check = a->compressedrow.check; 3310cd6b891eSBarry Smith if (a->compressedrow.use) { 331188e51ccdSHong Zhang i = a->compressedrow.nrows; 33120e83c824SBarry Smith ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 33134fd072dbSBarry Smith ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 331488e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 331588e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 331688e51ccdSHong Zhang } else { 331788e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 33180298fd71SBarry Smith c->compressedrow.i = NULL; 33190298fd71SBarry Smith c->compressedrow.rindex = NULL; 332088e51ccdSHong Zhang } 332188e51ccdSHong Zhang C->same_nonzero = A->same_nonzero; 332226fbe8dcSKarl Rupp 3323140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 33245d5aaa0eSBarry Smith ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 33253a40ed3dSBarry Smith PetscFunctionReturn(0); 33262593348eSBarry Smith } 33272593348eSBarry Smith 33284a2ae208SSatish Balay #undef __FUNCT__ 3329b24902e0SBarry Smith #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3330b24902e0SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3331b24902e0SBarry Smith { 3332b24902e0SBarry Smith PetscErrorCode ierr; 3333b24902e0SBarry Smith 3334b24902e0SBarry Smith PetscFunctionBegin; 3335ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3336d0f46423SBarry Smith ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 33375c9eb25fSBarry Smith ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 333898ad0f72SJed Brown ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3339b24902e0SBarry Smith PetscFunctionReturn(0); 3340b24902e0SBarry Smith } 3341b24902e0SBarry Smith 3342b24902e0SBarry Smith #undef __FUNCT__ 33435bba2384SShri Abhyankar #define __FUNCT__ "MatLoad_SeqBAIJ" 3344112444f4SShri Abhyankar PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3345f501eaabSShri Abhyankar { 3346f501eaabSShri Abhyankar Mat_SeqBAIJ *a; 3347f501eaabSShri Abhyankar PetscErrorCode ierr; 3348f501eaabSShri Abhyankar PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3349f501eaabSShri Abhyankar PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3350f501eaabSShri Abhyankar PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3351f501eaabSShri Abhyankar PetscInt *masked,nmask,tmp,bs2,ishift; 3352f501eaabSShri Abhyankar PetscMPIInt size; 3353f501eaabSShri Abhyankar int fd; 3354f501eaabSShri Abhyankar PetscScalar *aa; 3355ce94432eSBarry Smith MPI_Comm comm; 3356f501eaabSShri Abhyankar 3357f501eaabSShri Abhyankar PetscFunctionBegin; 3358ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 33590298fd71SBarry Smith ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 33600298fd71SBarry Smith ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3361f501eaabSShri Abhyankar ierr = PetscOptionsEnd();CHKERRQ(ierr); 3362f501eaabSShri Abhyankar bs2 = bs*bs; 3363f501eaabSShri Abhyankar 3364f501eaabSShri Abhyankar ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3365f501eaabSShri Abhyankar if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3366f501eaabSShri Abhyankar ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3367f501eaabSShri Abhyankar ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3368f501eaabSShri Abhyankar if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3369f501eaabSShri Abhyankar M = header[1]; N = header[2]; nz = header[3]; 3370f501eaabSShri Abhyankar 3371f501eaabSShri Abhyankar if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3372f501eaabSShri Abhyankar if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3373f501eaabSShri Abhyankar 3374f501eaabSShri Abhyankar /* 3375f501eaabSShri Abhyankar This code adds extra rows to make sure the number of rows is 3376f501eaabSShri Abhyankar divisible by the blocksize 3377f501eaabSShri Abhyankar */ 3378f501eaabSShri Abhyankar mbs = M/bs; 3379f501eaabSShri Abhyankar extra_rows = bs - M + bs*(mbs); 3380f501eaabSShri Abhyankar if (extra_rows == bs) extra_rows = 0; 3381f501eaabSShri Abhyankar else mbs++; 3382f501eaabSShri Abhyankar if (extra_rows) { 3383f501eaabSShri Abhyankar ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3384f501eaabSShri Abhyankar } 3385f501eaabSShri Abhyankar 3386f501eaabSShri Abhyankar /* Set global sizes if not already set */ 3387f501eaabSShri Abhyankar if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3388f501eaabSShri Abhyankar ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3389f501eaabSShri Abhyankar } else { /* Check if the matrix global sizes are correct */ 3390f501eaabSShri Abhyankar ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 33914c5b953cSHong Zhang if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 33924c5b953cSHong Zhang ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 33934c5b953cSHong Zhang } 3394f501eaabSShri Abhyankar if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3395f501eaabSShri Abhyankar } 3396f501eaabSShri Abhyankar 3397f501eaabSShri Abhyankar /* read in row lengths */ 3398f501eaabSShri Abhyankar ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3399f501eaabSShri Abhyankar ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3400f501eaabSShri Abhyankar for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3401f501eaabSShri Abhyankar 3402f501eaabSShri Abhyankar /* read in column indices */ 3403f501eaabSShri Abhyankar ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3404f501eaabSShri Abhyankar ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3405f501eaabSShri Abhyankar for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3406f501eaabSShri Abhyankar 3407f501eaabSShri Abhyankar /* loop over row lengths determining block row lengths */ 3408f501eaabSShri Abhyankar ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3409f501eaabSShri Abhyankar ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3410f501eaabSShri Abhyankar ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3411f501eaabSShri Abhyankar ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3412f501eaabSShri Abhyankar rowcount = 0; 3413f501eaabSShri Abhyankar nzcount = 0; 3414f501eaabSShri Abhyankar for (i=0; i<mbs; i++) { 3415f501eaabSShri Abhyankar nmask = 0; 3416f501eaabSShri Abhyankar for (j=0; j<bs; j++) { 3417f501eaabSShri Abhyankar kmax = rowlengths[rowcount]; 3418f501eaabSShri Abhyankar for (k=0; k<kmax; k++) { 3419f501eaabSShri Abhyankar tmp = jj[nzcount++]/bs; 3420f501eaabSShri Abhyankar if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3421f501eaabSShri Abhyankar } 3422f501eaabSShri Abhyankar rowcount++; 3423f501eaabSShri Abhyankar } 3424f501eaabSShri Abhyankar browlengths[i] += nmask; 3425f501eaabSShri Abhyankar /* zero out the mask elements we set */ 3426f501eaabSShri Abhyankar for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3427f501eaabSShri Abhyankar } 3428f501eaabSShri Abhyankar 34292f480046SShri Abhyankar /* Do preallocation */ 3430f501eaabSShri Abhyankar ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3431f501eaabSShri Abhyankar a = (Mat_SeqBAIJ*)newmat->data; 3432f501eaabSShri Abhyankar 3433f501eaabSShri Abhyankar /* set matrix "i" values */ 3434f501eaabSShri Abhyankar a->i[0] = 0; 3435f501eaabSShri Abhyankar for (i=1; i<= mbs; i++) { 3436f501eaabSShri Abhyankar a->i[i] = a->i[i-1] + browlengths[i-1]; 3437f501eaabSShri Abhyankar a->ilen[i-1] = browlengths[i-1]; 3438f501eaabSShri Abhyankar } 3439f501eaabSShri Abhyankar a->nz = 0; 3440f501eaabSShri Abhyankar for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3441f501eaabSShri Abhyankar 3442f501eaabSShri Abhyankar /* read in nonzero values */ 3443f501eaabSShri Abhyankar ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3444f501eaabSShri Abhyankar ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3445f501eaabSShri Abhyankar for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3446f501eaabSShri Abhyankar 3447f501eaabSShri Abhyankar /* set "a" and "j" values into matrix */ 3448f501eaabSShri Abhyankar nzcount = 0; jcount = 0; 3449f501eaabSShri Abhyankar for (i=0; i<mbs; i++) { 3450f501eaabSShri Abhyankar nzcountb = nzcount; 3451f501eaabSShri Abhyankar nmask = 0; 3452f501eaabSShri Abhyankar for (j=0; j<bs; j++) { 3453f501eaabSShri Abhyankar kmax = rowlengths[i*bs+j]; 3454f501eaabSShri Abhyankar for (k=0; k<kmax; k++) { 3455f501eaabSShri Abhyankar tmp = jj[nzcount++]/bs; 3456f501eaabSShri Abhyankar if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3457f501eaabSShri Abhyankar } 3458f501eaabSShri Abhyankar } 3459f501eaabSShri Abhyankar /* sort the masked values */ 3460f501eaabSShri Abhyankar ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3461f501eaabSShri Abhyankar 3462f501eaabSShri Abhyankar /* set "j" values into matrix */ 3463f501eaabSShri Abhyankar maskcount = 1; 3464f501eaabSShri Abhyankar for (j=0; j<nmask; j++) { 3465f501eaabSShri Abhyankar a->j[jcount++] = masked[j]; 3466f501eaabSShri Abhyankar mask[masked[j]] = maskcount++; 3467f501eaabSShri Abhyankar } 3468f501eaabSShri Abhyankar /* set "a" values into matrix */ 3469f501eaabSShri Abhyankar ishift = bs2*a->i[i]; 3470f501eaabSShri Abhyankar for (j=0; j<bs; j++) { 3471f501eaabSShri Abhyankar kmax = rowlengths[i*bs+j]; 3472f501eaabSShri Abhyankar for (k=0; k<kmax; k++) { 3473f501eaabSShri Abhyankar tmp = jj[nzcountb]/bs; 3474f501eaabSShri Abhyankar block = mask[tmp] - 1; 3475f501eaabSShri Abhyankar point = jj[nzcountb] - bs*tmp; 3476f501eaabSShri Abhyankar idx = ishift + bs2*block + j + bs*point; 3477f501eaabSShri Abhyankar a->a[idx] = (MatScalar)aa[nzcountb++]; 3478f501eaabSShri Abhyankar } 3479f501eaabSShri Abhyankar } 3480f501eaabSShri Abhyankar /* zero out the mask elements we set */ 3481f501eaabSShri Abhyankar for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3482f501eaabSShri Abhyankar } 3483f501eaabSShri Abhyankar if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3484f501eaabSShri Abhyankar 3485f501eaabSShri Abhyankar ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3486f501eaabSShri Abhyankar ierr = PetscFree(browlengths);CHKERRQ(ierr); 3487f501eaabSShri Abhyankar ierr = PetscFree(aa);CHKERRQ(ierr); 3488f501eaabSShri Abhyankar ierr = PetscFree(jj);CHKERRQ(ierr); 3489f501eaabSShri Abhyankar ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3490f501eaabSShri Abhyankar 3491f501eaabSShri Abhyankar ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3492f501eaabSShri Abhyankar ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3493f501eaabSShri Abhyankar PetscFunctionReturn(0); 3494f501eaabSShri Abhyankar } 3495f501eaabSShri Abhyankar 3496f501eaabSShri Abhyankar #undef __FUNCT__ 34974a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 3498273d9f13SBarry Smith /*@C 3499273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3500273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 3501273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3502273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3503273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 35042593348eSBarry Smith 3505273d9f13SBarry Smith Collective on MPI_Comm 3506273d9f13SBarry Smith 3507273d9f13SBarry Smith Input Parameters: 3508273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 3509273d9f13SBarry Smith . bs - size of block 3510273d9f13SBarry Smith . m - number of rows 3511273d9f13SBarry Smith . n - number of columns 351235d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 351335d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 35140298fd71SBarry Smith (possibly different for each block row) or NULL 3515273d9f13SBarry Smith 3516273d9f13SBarry Smith Output Parameter: 3517273d9f13SBarry Smith . A - the matrix 3518273d9f13SBarry Smith 3519175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3520ae1d86c5SBarry Smith MatXXXXSetPreallocation() paradgm instead of this routine directly. 3521175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3522175b88e8SBarry Smith 3523273d9f13SBarry Smith Options Database Keys: 3524273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 3525273d9f13SBarry Smith block calculations (much slower) 3526273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 3527273d9f13SBarry Smith 3528273d9f13SBarry Smith Level: intermediate 3529273d9f13SBarry Smith 3530273d9f13SBarry Smith Notes: 3531d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3532d1be2dadSMatthew Knepley 353349a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 353449a6f317SBarry Smith 353535d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 353635d8aa7fSBarry Smith 3537273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3538273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3539273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3540273d9f13SBarry Smith 3541273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 35420298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 35430598bfebSBarry Smith allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3544273d9f13SBarry Smith matrices. 3545273d9f13SBarry Smith 354669b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3547273d9f13SBarry Smith @*/ 35487087cfbeSBarry Smith PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3549273d9f13SBarry Smith { 3550dfbe8321SBarry Smith PetscErrorCode ierr; 3551273d9f13SBarry Smith 3552273d9f13SBarry Smith PetscFunctionBegin; 3553f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3554f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3555273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3556ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3557273d9f13SBarry Smith PetscFunctionReturn(0); 3558273d9f13SBarry Smith } 3559273d9f13SBarry Smith 35604a2ae208SSatish Balay #undef __FUNCT__ 35614a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3562273d9f13SBarry Smith /*@C 3563273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3564273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 3565273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3566273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3567273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 3568273d9f13SBarry Smith 3569273d9f13SBarry Smith Collective on MPI_Comm 3570273d9f13SBarry Smith 3571273d9f13SBarry Smith Input Parameters: 3572273d9f13SBarry Smith + A - the matrix 3573273d9f13SBarry Smith . bs - size of block 3574273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3575273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 35760298fd71SBarry Smith (possibly different for each block row) or NULL 3577273d9f13SBarry Smith 3578273d9f13SBarry Smith Options Database Keys: 3579273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 3580273d9f13SBarry Smith block calculations (much slower) 3581273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 3582273d9f13SBarry Smith 3583273d9f13SBarry Smith Level: intermediate 3584273d9f13SBarry Smith 3585273d9f13SBarry Smith Notes: 358649a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 358749a6f317SBarry Smith 3588aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3589aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3590aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3591aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3592aa95bbe8SBarry Smith 3593273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3594273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3595273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3596273d9f13SBarry Smith 3597273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 35980298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 35990598bfebSBarry Smith allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3600273d9f13SBarry Smith 360169b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3602273d9f13SBarry Smith @*/ 36037087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3604273d9f13SBarry Smith { 36054ac538c5SBarry Smith PetscErrorCode ierr; 3606273d9f13SBarry Smith 3607273d9f13SBarry Smith PetscFunctionBegin; 36086ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 36096ba663aaSJed Brown PetscValidType(B,1); 36106ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 36114ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3612273d9f13SBarry Smith PetscFunctionReturn(0); 3613273d9f13SBarry Smith } 3614a1d92eedSBarry Smith 3615c75a6043SHong Zhang #undef __FUNCT__ 3616725b52f3SLisandro Dalcin #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3617725b52f3SLisandro Dalcin /*@C 3618725b52f3SLisandro Dalcin MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3619725b52f3SLisandro Dalcin (the default sequential PETSc format). 3620725b52f3SLisandro Dalcin 3621725b52f3SLisandro Dalcin Collective on MPI_Comm 3622725b52f3SLisandro Dalcin 3623725b52f3SLisandro Dalcin Input Parameters: 3624725b52f3SLisandro Dalcin + A - the matrix 3625725b52f3SLisandro Dalcin . i - the indices into j for the start of each local row (starts with zero) 3626725b52f3SLisandro Dalcin . j - the column indices for each local row (starts with zero) these must be sorted for each row 3627725b52f3SLisandro Dalcin - v - optional values in the matrix 3628725b52f3SLisandro Dalcin 3629725b52f3SLisandro Dalcin Level: developer 3630725b52f3SLisandro Dalcin 3631725b52f3SLisandro Dalcin .keywords: matrix, aij, compressed row, sparse 3632725b52f3SLisandro Dalcin 3633725b52f3SLisandro Dalcin .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3634725b52f3SLisandro Dalcin @*/ 36357087cfbeSBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3636725b52f3SLisandro Dalcin { 36374ac538c5SBarry Smith PetscErrorCode ierr; 3638725b52f3SLisandro Dalcin 3639725b52f3SLisandro Dalcin PetscFunctionBegin; 36406ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 36416ba663aaSJed Brown PetscValidType(B,1); 36426ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 36434ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3644725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3645725b52f3SLisandro Dalcin } 3646725b52f3SLisandro Dalcin 3647725b52f3SLisandro Dalcin 3648725b52f3SLisandro Dalcin #undef __FUNCT__ 3649c75a6043SHong Zhang #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3650c75a6043SHong Zhang /*@ 3651dfb205c3SBarry Smith MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3652c75a6043SHong Zhang 3653c75a6043SHong Zhang Collective on MPI_Comm 3654c75a6043SHong Zhang 3655c75a6043SHong Zhang Input Parameters: 3656c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3657c75a6043SHong Zhang . bs - size of block 3658c75a6043SHong Zhang . m - number of rows 3659c75a6043SHong Zhang . n - number of columns 3660c75a6043SHong Zhang . i - row indices 3661c75a6043SHong Zhang . j - column indices 3662c75a6043SHong Zhang - a - matrix values 3663c75a6043SHong Zhang 3664c75a6043SHong Zhang Output Parameter: 3665c75a6043SHong Zhang . mat - the matrix 3666c75a6043SHong Zhang 3667dfb205c3SBarry Smith Level: advanced 3668c75a6043SHong Zhang 3669c75a6043SHong Zhang Notes: 3670c75a6043SHong Zhang The i, j, and a arrays are not copied by this routine, the user must free these arrays 3671c75a6043SHong Zhang once the matrix is destroyed 3672c75a6043SHong Zhang 3673c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3674c75a6043SHong Zhang 3675c75a6043SHong Zhang The i and j indices are 0 based 3676c75a6043SHong Zhang 3677dfb205c3SBarry Smith When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3678dfb205c3SBarry Smith 3679dfb205c3SBarry Smith 368069b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3681c75a6043SHong Zhang 3682c75a6043SHong Zhang @*/ 36837087cfbeSBarry Smith PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3684c75a6043SHong Zhang { 3685c75a6043SHong Zhang PetscErrorCode ierr; 3686c75a6043SHong Zhang PetscInt ii; 3687c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3688c75a6043SHong Zhang 3689c75a6043SHong Zhang PetscFunctionBegin; 3690e32f2f54SBarry Smith if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3691e32f2f54SBarry Smith if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3692c75a6043SHong Zhang 3693c75a6043SHong Zhang ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3694c75a6043SHong Zhang ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3695c75a6043SHong Zhang ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3696c75a6043SHong Zhang ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3697c75a6043SHong Zhang baij = (Mat_SeqBAIJ*)(*mat)->data; 3698c75a6043SHong Zhang ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 36991784c0f5SBarry Smith ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3700c75a6043SHong Zhang 3701c75a6043SHong Zhang baij->i = i; 3702c75a6043SHong Zhang baij->j = j; 3703c75a6043SHong Zhang baij->a = a; 370426fbe8dcSKarl Rupp 3705c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 3706c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3707e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3708e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3709c75a6043SHong Zhang 3710c75a6043SHong Zhang for (ii=0; ii<m; ii++) { 3711c75a6043SHong Zhang baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3712c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG) 3713e32f2f54SBarry Smith if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3714c75a6043SHong Zhang #endif 3715c75a6043SHong Zhang } 3716c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG) 3717c75a6043SHong Zhang for (ii=0; ii<baij->i[m]; ii++) { 3718e32f2f54SBarry Smith if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3719e32f2f54SBarry Smith if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3720c75a6043SHong Zhang } 3721c75a6043SHong Zhang #endif 3722c75a6043SHong Zhang 3723c75a6043SHong Zhang ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3724c75a6043SHong Zhang ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3725c75a6043SHong Zhang PetscFunctionReturn(0); 3726c75a6043SHong Zhang } 3727